summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock17
-rw-r--r--Cargo.toml1
-rw-r--r--src/lib.rs2
-rw-r--r--src/parser.rs74
4 files changed, 94 insertions, 0 deletions
diff --git a/Cargo.lock b/Cargo.lock
index cb0869c..af77657 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -692,6 +692,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
 [[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
 name = "miniz_oxide"
 version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -736,6 +742,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
 
 [[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
 name = "nu-ansi-term"
 version = "0.46.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1772,6 +1788,7 @@ name = "vuw_course_scraper"
 version = "0.1.0"
 dependencies = [
  "futures-util",
+ "nom",
  "reqwest",
  "scraper",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 0937cad..28f9cb3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,6 +5,7 @@ edition = "2021"
 
 [dependencies]
 futures-util = "0.3.30"
+nom = "7.1.3"
 reqwest = "0.12.7"
 scraper = "0.20.0"
 serde = { version = "1.0.209", features = ["derive"] }
diff --git a/src/lib.rs b/src/lib.rs
index 1246a76..279000a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,6 +5,8 @@
 //! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly
 //! parse prerequisites, however.
 
+mod parser;
+
 use std::collections::{HashMap, HashSet};
 
 use scraper::{CaseSensitivity, ElementRef, Html, Selector};
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..c722bfc
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+//! Nom parsers used within the parsing steps.
+
+use nom::{
+	bytes::complete::{tag, take_while},
+	character::complete::multispace0,
+	combinator::map_res,
+	sequence::{pair, preceded},
+	IResult,
+};
+
+/// Determines if the provided character is an ascii digit.
+const fn is_decimal_digit(c: char) -> bool {
+	c.is_ascii_digit()
+}
+
+/// Parses a string slice into a [`u16`].
+///
+/// # Errors
+///
+/// This function will return an error if the string cannot be parsed.
+fn from_decimal(input: &str) -> Result<u16, std::num::ParseIntError> {
+	input.parse::<u16>()
+}
+
+/// Retrieves all the digits from a CRN and maps them to a [`u16`].
+///
+/// # Errors
+///
+/// This function will return an error if nom cannot parse the input.
+fn crn_digits(input: &str) -> IResult<&str, u16> {
+	map_res(take_while(is_decimal_digit), from_decimal)(input)
+}
+
+/// Parses a course reference number.
+///
+/// # Errors
+///
+/// This function will return an error if nom cannot parse the input.
+pub fn course_reference_number(input: &str) -> IResult<&str, u16> {
+	preceded(pair(tag("CRN"), multispace0), crn_digits)(input)
+}
+
+#[cfg(test)]
+#[allow(clippy::unwrap_used)]
+mod tests {
+	use super::*;
+
+	#[test]
+	fn crn_parser_basic() {
+		assert_eq!(course_reference_number("CRN 5912").unwrap().1, 5912);
+		assert_eq!(course_reference_number("CRN 17146").unwrap().1, 17146);
+	}
+
+	#[test]
+	fn crn_parser_postfix() {
+		assert_eq!(
+			course_reference_number("CRN 331 [Distance]").unwrap().1,
+			331
+		);
+	}
+
+	#[test]
+	fn crn_parser_extra_whitespace() {
+		assert_eq!(course_reference_number("CRN 8913 ").unwrap().1, 8913);
+		assert_eq!(course_reference_number("CRN  61151").unwrap().1, 61151);
+	}
+
+	#[test]
+	fn crn_parser_no_whitespace() {
+		assert_eq!(course_reference_number("CRN615").unwrap().1, 615);
+	}
+}