diff options
| author | Sophie Forrest <git@sophieforrest.com> | 2024-09-11 18:14:35 +1200 |
|---|---|---|
| committer | Sophie Forrest <git@sophieforrest.com> | 2024-09-11 18:14:35 +1200 |
| commit | 2acec90f5c1576e3fbceff218f781225ee6efdb7 (patch) | |
| tree | d21184b54484a93bb6573ad7869e63b734dd437e /src | |
| parent | c368412ffd6be8332f12d0a4d8418faf70448fbc (diff) | |
feat: initial work on nom parsers
Diffstat (limited to '')
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/parser.rs | 74 |
2 files changed, 76 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs index 1246a76..279000a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,8 @@ //! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly //! parse prerequisites, however. +mod parser; + use std::collections::{HashMap, HashSet}; use scraper::{CaseSensitivity, ElementRef, Html, Selector}; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..c722bfc --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +//! Nom parsers used within the parsing steps. + +use nom::{ + bytes::complete::{tag, take_while}, + character::complete::multispace0, + combinator::map_res, + sequence::{pair, preceded}, + IResult, +}; + +/// Determines if the provided character is an ascii digit. +const fn is_decimal_digit(c: char) -> bool { + c.is_ascii_digit() +} + +/// Parses a string slice into a [`u16`]. +/// +/// # Errors +/// +/// This function will return an error if the string cannot be parsed. +fn from_decimal(input: &str) -> Result<u16, std::num::ParseIntError> { + input.parse::<u16>() +} + +/// Retrieves all the digits from a CRN and maps them to a [`u16`]. +/// +/// # Errors +/// +/// This function will return an error if nom cannot parse the input. +fn crn_digits(input: &str) -> IResult<&str, u16> { + map_res(take_while(is_decimal_digit), from_decimal)(input) +} + +/// Parses a course reference number. +/// +/// # Errors +/// +/// This function will return an error if nom cannot parse the input. +pub fn course_reference_number(input: &str) -> IResult<&str, u16> { + preceded(pair(tag("CRN"), multispace0), crn_digits)(input) +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + + #[test] + fn crn_parser_basic() { + assert_eq!(course_reference_number("CRN 5912").unwrap().1, 5912); + assert_eq!(course_reference_number("CRN 17146").unwrap().1, 17146); + } + + #[test] + fn crn_parser_postfix() { + assert_eq!( + course_reference_number("CRN 331 [Distance]").unwrap().1, + 331 + ); + } + + #[test] + fn crn_parser_extra_whitespace() { + assert_eq!(course_reference_number("CRN 8913 ").unwrap().1, 8913); + assert_eq!(course_reference_number("CRN 61151").unwrap().1, 61151); + } + + #[test] + fn crn_parser_no_whitespace() { + assert_eq!(course_reference_number("CRN615").unwrap().1, 615); + } +} |