// SPDX-License-Identifier: AGPL-3.0-or-later //! # VUW Course scraper //! //! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly //! parse prerequisites, however. use std::{collections::HashSet, fmt}; use scraper::ElementRef; use serde::{Deserialize, Serialize}; use tracing::{debug, info}; /// Slice used for splitting requirements for parsing. const SPLIT_SLICE: &[char] = &[';', ',']; /// A VUW course, along with all relevant data. #[derive(Clone, Debug, Deserialize, Serialize)] #[non_exhaustive] pub struct Course<'a> { /// Description of the course. pub description: Option<&'a str>, /// Whether this course is offered in the upcoming year. pub offered: bool, /// Amount of points this course is worth. pub points: u8, /// Courses that must be taken before this course. pub prerequisites: Vec<&'a str>, /// Courses that cannot be taken if you take this course. pub restrictions: Vec<&'a str>, /// Subject areas this course belongs to. pub subject_areas: HashSet<&'a str>, /// Subtitle of this course, its longer name. pub subtitle: &'a str, /// Timetable of this course, includes trimesters and CRNs. pub timetable: Vec, /// Title of this course, also known as the code. pub title: &'a str, } impl<'a> Course<'a> { /// . /// /// # Panics /// /// Panics if . pub fn parse_courseid(&mut self, elem: ElementRef<'a>) { elem.children().for_each(|child| { child.children().for_each(|c| { if let Some(text) = c.value().as_text() { // The actual text we're looking for let text: &str = text.trim(); self.title = text; } else if let Some(text) = c .first_child() .and_then(|node| node.value().as_text().map(|text| &**text)) { if let Some((indice, _char)) = text.char_indices().nth(1) { // Skip over "-" self.subtitle = text.get(indice..).expect("indice should be valid").trim(); } } }); }); } /// Parses the course points, prerequisites, and restrictions from the given element. /// /// # Panics /// /// Panics if . pub fn parse_coursepoints(&mut self, elem: ElementRef<'a>) { // Parse course points, prerequisites, and exclusions. let details = elem .first_child() .and_then(|el| el.first_child()?.value().as_text()); if let Some(details) = details { let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect(); info!("{:#?}", &details_split); // Occasionally there is extra whitespace here, so this needs to be trimmed. let points = details_split.first().expect("split should exist").trim(); debug!("{:?}", points); let points_slice = &points.get(..points.len() - 4).expect("should be at indice"); info!("{:?}", points_slice); let points = points_slice .parse::() .expect("should correctly parse points"); info!("{:?}", points); self.points = points; if let Some(requirements) = details_split.last().map(|s| s.trim()) { if requirements.starts_with("(X)") { self.restrictions = requirements .get(4..) .expect("should be at indice") .split(SPLIT_SLICE) .map(str::trim) .collect::>(); } else if requirements.starts_with("(P)") { let requirements = &requirements .get(4..) .expect("should be at indice") .split(" (X) ") .collect::>(); self.prerequisites = requirements .first() .map(|s| { s.split(SPLIT_SLICE) .map(str::trim) .filter(|s| !s.is_empty()) .collect::>() }) .unwrap_or_default(); if requirements.len() > 1 { self.restrictions = requirements .last() .map(|s| s.split(SPLIT_SLICE).map(str::trim).collect::>()) .unwrap_or_default(); } } else { self.prerequisites = vec![requirements]; } info!("{requirements}"); } } } /// . /// /// # Panics /// /// Panics if . pub fn parse_timetable(&mut self, elem: ElementRef<'a>) { // Parse timetable / CRNs. let details = elem .first_child() .and_then(|el| el.first_child()?.value().as_text()); if let Some(details) = details { let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect(); info!("{:#?}", &details_split); let offering = CourseOffering::new( details_split .last() .expect("course reference number should exist") .get(4..) .expect("course reference number digits should start at this indice") .parse::() .expect("course reference number should be parseable"), Trimester::try_from( *details_split .first() .expect("trimester element should exist"), ) .expect("should be parseable into a trimester"), ); self.timetable.push(offering); } } } impl Default for Course<'_> { fn default() -> Self { Self { description: Option::default(), offered: true, points: u8::default(), prerequisites: Vec::default(), restrictions: Vec::default(), subject_areas: HashSet::default(), subtitle: "", timetable: Vec::default(), title: "", } } } /// A course offering, includes the CRN and [`Trimester`]. #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] #[non_exhaustive] pub struct CourseOffering { /// Reference number for this coursem e.g. 11723. pub course_reference_number: u16, /// Trimester this course is offered in. pub trimester: Trimester, } impl CourseOffering { /// Creates a new [`CourseOffering`]. #[must_use] pub const fn new(course_reference_number: u16, trimester: Trimester) -> Self { Self { course_reference_number, trimester, } } } /// Trimester information Victoria University of Wellington offers. #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, PartialOrd, Ord, Serialize)] pub enum Trimester { /// Trimester one. One, /// Trimester two. Two, /// Trimester three. Three, /// Block dates. See course page for more information. BlockDates, /// Part year. See course page for more information. PartYear, /// Trimesters one and two. OneTwo, /// Trimesters two and three. TwoThree, /// Trimesters three and one. ThreeOne, /// Trimesters one, two, and three. FullYear, } impl TryFrom<&str> for Trimester { type Error = String; fn try_from(value: &str) -> Result { match value { "1/3" => Ok(Self::One), "2/3" => Ok(Self::Two), "3/3" => Ok(Self::Three), "block dates/3" => Ok(Self::BlockDates), "part year/3" => Ok(Self::PartYear), "1+2/3" => Ok(Self::OneTwo), "2+3/3" => Ok(Self::TwoThree), "3+1/3" => Ok(Self::ThreeOne), "1+2+3/3" => Ok(Self::FullYear), _ => Err(String::from("Invalid trimester.")), } } } impl fmt::Display for Course<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "Course {{ title: {}, subtitle: {}, offered: {}, areas: [{}] }}", self.title, self.subtitle, self.offered, self.subject_areas .iter() // Necessary as Rust refuses to build Vec<&str> with &String. .map(|s| &**s) .collect::>() .join(", "), ) } }