diff options
| -rw-r--r-- | Cargo.lock | 17 | ||||
| -rw-r--r-- | Cargo.toml | 1 | ||||
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/parser.rs | 74 |
4 files changed, 94 insertions, 0 deletions
diff --git a/Cargo.lock b/Cargo.lock index cb0869c..af77657 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -692,6 +692,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] name = "miniz_oxide" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -736,6 +742,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" [[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] name = "nu-ansi-term" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1772,6 +1788,7 @@ name = "vuw_course_scraper" version = "0.1.0" dependencies = [ "futures-util", + "nom", "reqwest", "scraper", "serde", diff --git a/Cargo.toml b/Cargo.toml index 0937cad..28f9cb3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] futures-util = "0.3.30" +nom = "7.1.3" reqwest = "0.12.7" scraper = "0.20.0" serde = { version = "1.0.209", features = ["derive"] } diff --git a/src/lib.rs b/src/lib.rs index 1246a76..279000a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,8 @@ //! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly //! parse prerequisites, however. +mod parser; + use std::collections::{HashMap, HashSet}; use scraper::{CaseSensitivity, ElementRef, Html, Selector}; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..c722bfc --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +//! Nom parsers used within the parsing steps. + +use nom::{ + bytes::complete::{tag, take_while}, + character::complete::multispace0, + combinator::map_res, + sequence::{pair, preceded}, + IResult, +}; + +/// Determines if the provided character is an ascii digit. +const fn is_decimal_digit(c: char) -> bool { + c.is_ascii_digit() +} + +/// Parses a string slice into a [`u16`]. +/// +/// # Errors +/// +/// This function will return an error if the string cannot be parsed. +fn from_decimal(input: &str) -> Result<u16, std::num::ParseIntError> { + input.parse::<u16>() +} + +/// Retrieves all the digits from a CRN and maps them to a [`u16`]. +/// +/// # Errors +/// +/// This function will return an error if nom cannot parse the input. +fn crn_digits(input: &str) -> IResult<&str, u16> { + map_res(take_while(is_decimal_digit), from_decimal)(input) +} + +/// Parses a course reference number. +/// +/// # Errors +/// +/// This function will return an error if nom cannot parse the input. +pub fn course_reference_number(input: &str) -> IResult<&str, u16> { + preceded(pair(tag("CRN"), multispace0), crn_digits)(input) +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + + #[test] + fn crn_parser_basic() { + assert_eq!(course_reference_number("CRN 5912").unwrap().1, 5912); + assert_eq!(course_reference_number("CRN 17146").unwrap().1, 17146); + } + + #[test] + fn crn_parser_postfix() { + assert_eq!( + course_reference_number("CRN 331 [Distance]").unwrap().1, + 331 + ); + } + + #[test] + fn crn_parser_extra_whitespace() { + assert_eq!(course_reference_number("CRN 8913 ").unwrap().1, 8913); + assert_eq!(course_reference_number("CRN 61151").unwrap().1, 61151); + } + + #[test] + fn crn_parser_no_whitespace() { + assert_eq!(course_reference_number("CRN615").unwrap().1, 615); + } +} |