summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorSophie Forrest <git@sophieforrest.com>2024-09-11 18:14:35 +1200
committerSophie Forrest <git@sophieforrest.com>2024-09-11 18:14:35 +1200
commit2acec90f5c1576e3fbceff218f781225ee6efdb7 (patch)
treed21184b54484a93bb6573ad7869e63b734dd437e /src
parentc368412ffd6be8332f12d0a4d8418faf70448fbc (diff)
feat: initial work on nom parsers
Diffstat (limited to 'src')
-rw-r--r--src/lib.rs2
-rw-r--r--src/parser.rs74
2 files changed, 76 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 1246a76..279000a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,6 +5,8 @@
 //! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly
 //! parse prerequisites, however.
 
+mod parser;
+
 use std::collections::{HashMap, HashSet};
 
 use scraper::{CaseSensitivity, ElementRef, Html, Selector};
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..c722bfc
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+//! Nom parsers used within the parsing steps.
+
+use nom::{
+	bytes::complete::{tag, take_while},
+	character::complete::multispace0,
+	combinator::map_res,
+	sequence::{pair, preceded},
+	IResult,
+};
+
+/// Determines if the provided character is an ascii digit.
+const fn is_decimal_digit(c: char) -> bool {
+	c.is_ascii_digit()
+}
+
+/// Parses a string slice into a [`u16`].
+///
+/// # Errors
+///
+/// This function will return an error if the string cannot be parsed.
+fn from_decimal(input: &str) -> Result<u16, std::num::ParseIntError> {
+	input.parse::<u16>()
+}
+
+/// Retrieves all the digits from a CRN and maps them to a [`u16`].
+///
+/// # Errors
+///
+/// This function will return an error if nom cannot parse the input.
+fn crn_digits(input: &str) -> IResult<&str, u16> {
+	map_res(take_while(is_decimal_digit), from_decimal)(input)
+}
+
+/// Parses a course reference number.
+///
+/// # Errors
+///
+/// This function will return an error if nom cannot parse the input.
+pub fn course_reference_number(input: &str) -> IResult<&str, u16> {
+	preceded(pair(tag("CRN"), multispace0), crn_digits)(input)
+}
+
+#[cfg(test)]
+#[allow(clippy::unwrap_used)]
+mod tests {
+	use super::*;
+
+	#[test]
+	fn crn_parser_basic() {
+		assert_eq!(course_reference_number("CRN 5912").unwrap().1, 5912);
+		assert_eq!(course_reference_number("CRN 17146").unwrap().1, 17146);
+	}
+
+	#[test]
+	fn crn_parser_postfix() {
+		assert_eq!(
+			course_reference_number("CRN 331 [Distance]").unwrap().1,
+			331
+		);
+	}
+
+	#[test]
+	fn crn_parser_extra_whitespace() {
+		assert_eq!(course_reference_number("CRN 8913 ").unwrap().1, 8913);
+		assert_eq!(course_reference_number("CRN  61151").unwrap().1, 61151);
+	}
+
+	#[test]
+	fn crn_parser_no_whitespace() {
+		assert_eq!(course_reference_number("CRN615").unwrap().1, 615);
+	}
+}