summary refs log tree commit diff
path: root/src/lib.rs
diff options
context:
space:
mode:
authorSophie Forrest <git@sophieforrest.com>2024-12-04 17:53:46 +1300
committerSophie Forrest <git@sophieforrest.com>2024-12-04 17:53:46 +1300
commit3725fe07e58f459bb7ab9fcbc10775cf4b138ec8 (patch)
treee3c07e8bf7ba53a164538973787deb3e6693ea3e /src/lib.rs
parentf20503aa26ec2e91fb585defa338993985dac2e5 (diff)
feat(parser): finish nom rewrite with coursepoints parser
This parser can correctly parse course prerequisites, corequisites, and
restrictions, which the previous parser could not do. These cannot be
split into a truly computer readable format yet, and I believe this
would be out of scope for this project.
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs83
1 files changed, 23 insertions, 60 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 25f6ea6..ee08cbc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,13 +9,10 @@ mod parser;
 
 use std::collections::{HashMap, HashSet};
 
-use parser::{offering, subtitle, title};
+use parser::{course_offering, offering, subtitle, title};
 use scraper::{CaseSensitivity, ElementRef, Html, Selector};
 use serde::{Deserialize, Serialize};
-use tracing::{debug, info};
-
-/// Slice used for splitting requirements for parsing.
-const SPLIT_SLICE: &[char] = &[';', ','];
+use tracing::info;
 
 /// Alias to the nom error type.
 type NomError<'a> = nom::Err<nom::error::Error<&'a str>>;
@@ -78,74 +75,38 @@ impl<'a> Course<'a> {
 
 	/// Parses the course points, prerequisites, and restrictions from the given element.
 	///
-	/// # Panics
+	/// # Errors
 	///
-	/// Panics if parsing fails, or a slice is made in the middle of a character.
-	pub fn parse_coursepoints(&mut self, elem: ElementRef<'a>) {
+	/// Panics if parsing fails.
+	pub fn parse_coursepoints(&mut self, elem: ElementRef<'a>) -> Result<(), NomError> {
 		// Parse course points, prerequisites, and exclusions.
 		let details = elem
 			.first_child()
 			.and_then(|el| el.first_child()?.value().as_text());
 
 		if let Some(details) = details {
-			let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect();
-
-			info!("{:#?}", &details_split);
-
-			// Occasionally there is extra whitespace here, so this needs to be trimmed.
-			let points = details_split.first().expect("split should exist").trim();
-			debug!("{:?}", points);
-
-			let points_slice = &points.get(..points.len() - 4).expect("should be at indice");
-			info!("{:?}", points_slice);
-
-			let points = points_slice
-				.parse::<f32>()
-				.expect("should correctly parse points");
-			info!("{:?}", points);
+			// Parse the info from our nom parser.
+			let (_, (points, requirements)) = course_offering(details)?;
 
 			self.points = points;
 
-			if let Some(requirements) = details_split.last().map(|s| s.trim()) {
-				if requirements.starts_with("(X)") {
-					self.restrictions = requirements
-						.get(4..)
-						.expect("should be at indice")
-						.split(SPLIT_SLICE)
-						.map(str::trim)
-						.collect::<Vec<&str>>();
-				} else if requirements.starts_with("(P)") {
-					let requirements = &requirements
-						.get(4..)
-						.expect("should be at indice")
-						.split(" (X) ")
-						.collect::<Vec<&str>>();
-
-					self.prerequisites = requirements
-						.first()
-						.map(|s| {
-							s.split(SPLIT_SLICE)
-								.map(str::trim)
-								.filter(|s| !s.is_empty())
-								.collect::<Vec<&str>>()
-						})
-						.unwrap_or_default();
-
-					if requirements.len() > 1 {
-						self.restrictions = requirements
-							.last()
-							.map(|s| s.split(SPLIT_SLICE).map(str::trim).collect::<Vec<&str>>())
-							.unwrap_or_default();
-					}
-				} else if details_split.len() > 1 {
-					// Prevent the points from being dumped into requirements if they're the only
-					// item.
-					self.prerequisites = vec![requirements];
+			if let Some((prerequisites, corequisites, restrictions)) = requirements {
+				// None of these are guaranteed to exist, so we need to use let Some for these.
+				if let Some(prerequisites) = prerequisites {
+					self.prerequisites.push(prerequisites);
 				}
 
-				info!("{requirements}");
+				if let Some(corequisites) = corequisites {
+					self.corequisites.push(corequisites);
+				}
+
+				if let Some(restrictions) = restrictions {
+					self.restrictions.push(restrictions);
+				}
 			}
 		}
+
+		Ok(())
 	}
 
 	/// Parses the course timetable.
@@ -310,7 +271,9 @@ pub fn parse_document(document: &Html) -> HashMap<&str, Course<'_>> {
 				.parse_timetable(elem)
 				.expect("could not parse timetable");
 		} else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) {
-			working_course.parse_coursepoints(elem);
+			working_course
+				.parse_coursepoints(elem)
+				.expect("could not parse coursepoints");
 		}
 	}