diff options
Diffstat (limited to '')
| -rw-r--r-- | src/lib.rs | 78 |
1 files changed, 29 insertions, 49 deletions
diff --git a/src/lib.rs b/src/lib.rs index 279000a..10a1c39 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,13 +9,17 @@ mod parser; use std::collections::{HashMap, HashSet}; +use parser::{offering, subtitle, title}; use scraper::{CaseSensitivity, ElementRef, Html, Selector}; use serde::{Deserialize, Serialize}; -use tracing::{debug, info}; +use tracing::{debug, info, warn}; /// Slice used for splitting requirements for parsing. const SPLIT_SLICE: &[char] = &[';', ',']; +/// Alias to the nom error type. +type NomError<'a> = nom::Err<nom::error::Error<&'a str>>; + /// A VUW course, along with all relevant data. #[derive(Clone, Debug, Deserialize, Serialize)] #[non_exhaustive] @@ -49,30 +53,24 @@ pub struct Course<'a> { } impl<'a> Course<'a> { - /// Parses the courde id. + /// Parses the course id. /// - /// # Panics + /// # Errors /// - /// Panics if string is sliced in the middle of a character. - pub fn parse_courseid(&mut self, elem: ElementRef<'a>) { - elem.children().for_each(|child| { - child.children().for_each(|c| { - if let Some(text) = c.value().as_text() { - // The actual text we're looking for - let text: &str = text.trim(); - - self.title = text; - } else if let Some(text) = c - .first_child() - .and_then(|node| node.value().as_text().map(|text| &**text)) - { - if let Some((indice, _char)) = text.char_indices().nth(1) { - // Skip over "-" - self.subtitle = text.get(indice..).expect("indice should be valid").trim(); - } - } - }); - }); + /// This function will return an error if nom fails to parse the course title or subtitle. + pub fn parse_courseid(&mut self, elem: ElementRef<'a>) -> Result<(), NomError> { + for child in elem.children().flat_map(|child| child.children()) { + if let Some(text) = child.value().as_text() { + self.title = title(text)?.1; + } else if let Some(text) = child + .first_child() + .and_then(|node| node.value().as_text().map(|text| &**text)) + { + self.subtitle = subtitle(text)?.1; + } + } + + Ok(()) } /// Parses the course points, prerequisites, and restrictions from the given element. @@ -158,30 +156,10 @@ impl<'a> Course<'a> { .and_then(|el| el.first_child()?.value().as_text()); if let Some(details) = details { - let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect(); - - info!("{:#?}", &details_split); - - let offering = CourseOffering::new( - details_split - .last() - .expect("course reference number should exist") - .get(4..) - .expect("course reference number digits should start at this indice") - .split_whitespace() - .next() - .expect("course reference number should exist") - .parse::<u16>() - .expect("course reference number should be parseable"), - Trimester::try_from( - *details_split - .first() - .expect("trimester element should exist"), - ) - .expect("should be parseable into a trimester"), - ); + info!("{:#?}", &details); - self.timetable.push(offering); + self.timetable + .push(offering(details).expect("cannot parse course offering").1); } } } @@ -256,7 +234,7 @@ pub enum Trimester { } impl TryFrom<&str> for Trimester { - type Error = String; + type Error = &'static str; fn try_from(value: &str) -> Result<Self, Self::Error> { match value { @@ -269,7 +247,7 @@ impl TryFrom<&str> for Trimester { "2+3/3" => Ok(Self::TwoThree), "3+1/3" => Ok(Self::ThreeOne), "1+2+3/3" | "2+3+1/3" | "full year" => Ok(Self::FullYear), - _ => Err(String::from("Invalid trimester.")), + _ => Err("Invalid trimester."), } } } @@ -300,7 +278,9 @@ pub fn parse_document(document: &Html) -> HashMap<&str, Course<'_>> { working_course = Course::default(); working_course.subject_areas.insert(subject_area); - working_course.parse_courseid(elem); + working_course + .parse_courseid(elem) + .expect("could not parse courseid"); } else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) { working_course.offered = false; } else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) { |