diff options
| author | Sophie Forrest <git@sophieforrest.com> | 2024-12-04 17:53:46 +1300 |
|---|---|---|
| committer | Sophie Forrest <git@sophieforrest.com> | 2024-12-04 17:53:46 +1300 |
| commit | 3725fe07e58f459bb7ab9fcbc10775cf4b138ec8 (patch) | |
| tree | e3c07e8bf7ba53a164538973787deb3e6693ea3e /src/lib.rs | |
| parent | f20503aa26ec2e91fb585defa338993985dac2e5 (diff) | |
feat(parser): finish nom rewrite with coursepoints parser
This parser can correctly parse course prerequisites, corequisites, and restrictions, which the previous parser could not do. These cannot be split into a truly computer readable format yet, and I believe this would be out of scope for this project.
Diffstat (limited to '')
| -rw-r--r-- | src/lib.rs | 83 |
1 files changed, 23 insertions, 60 deletions
diff --git a/src/lib.rs b/src/lib.rs index 25f6ea6..ee08cbc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,13 +9,10 @@ mod parser; use std::collections::{HashMap, HashSet}; -use parser::{offering, subtitle, title}; +use parser::{course_offering, offering, subtitle, title}; use scraper::{CaseSensitivity, ElementRef, Html, Selector}; use serde::{Deserialize, Serialize}; -use tracing::{debug, info}; - -/// Slice used for splitting requirements for parsing. -const SPLIT_SLICE: &[char] = &[';', ',']; +use tracing::info; /// Alias to the nom error type. type NomError<'a> = nom::Err<nom::error::Error<&'a str>>; @@ -78,74 +75,38 @@ impl<'a> Course<'a> { /// Parses the course points, prerequisites, and restrictions from the given element. /// - /// # Panics + /// # Errors /// - /// Panics if parsing fails, or a slice is made in the middle of a character. - pub fn parse_coursepoints(&mut self, elem: ElementRef<'a>) { + /// Panics if parsing fails. + pub fn parse_coursepoints(&mut self, elem: ElementRef<'a>) -> Result<(), NomError> { // Parse course points, prerequisites, and exclusions. let details = elem .first_child() .and_then(|el| el.first_child()?.value().as_text()); if let Some(details) = details { - let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect(); - - info!("{:#?}", &details_split); - - // Occasionally there is extra whitespace here, so this needs to be trimmed. - let points = details_split.first().expect("split should exist").trim(); - debug!("{:?}", points); - - let points_slice = &points.get(..points.len() - 4).expect("should be at indice"); - info!("{:?}", points_slice); - - let points = points_slice - .parse::<f32>() - .expect("should correctly parse points"); - info!("{:?}", points); + // Parse the info from our nom parser. + let (_, (points, requirements)) = course_offering(details)?; self.points = points; - if let Some(requirements) = details_split.last().map(|s| s.trim()) { - if requirements.starts_with("(X)") { - self.restrictions = requirements - .get(4..) - .expect("should be at indice") - .split(SPLIT_SLICE) - .map(str::trim) - .collect::<Vec<&str>>(); - } else if requirements.starts_with("(P)") { - let requirements = &requirements - .get(4..) - .expect("should be at indice") - .split(" (X) ") - .collect::<Vec<&str>>(); - - self.prerequisites = requirements - .first() - .map(|s| { - s.split(SPLIT_SLICE) - .map(str::trim) - .filter(|s| !s.is_empty()) - .collect::<Vec<&str>>() - }) - .unwrap_or_default(); - - if requirements.len() > 1 { - self.restrictions = requirements - .last() - .map(|s| s.split(SPLIT_SLICE).map(str::trim).collect::<Vec<&str>>()) - .unwrap_or_default(); - } - } else if details_split.len() > 1 { - // Prevent the points from being dumped into requirements if they're the only - // item. - self.prerequisites = vec![requirements]; + if let Some((prerequisites, corequisites, restrictions)) = requirements { + // None of these are guaranteed to exist, so we need to use let Some for these. + if let Some(prerequisites) = prerequisites { + self.prerequisites.push(prerequisites); } - info!("{requirements}"); + if let Some(corequisites) = corequisites { + self.corequisites.push(corequisites); + } + + if let Some(restrictions) = restrictions { + self.restrictions.push(restrictions); + } } } + + Ok(()) } /// Parses the course timetable. @@ -310,7 +271,9 @@ pub fn parse_document(document: &Html) -> HashMap<&str, Course<'_>> { .parse_timetable(elem) .expect("could not parse timetable"); } else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) { - working_course.parse_coursepoints(elem); + working_course + .parse_coursepoints(elem) + .expect("could not parse coursepoints"); } } |