From c4ce297ff951583c9ffe3a88aa22933577b329da Mon Sep 17 00:00:00 2001 From: Sophie Forrest Date: Fri, 6 Sep 2024 13:55:19 +1200 Subject: refactor: make clippy happy + no cloning --- src/lib.rs | 287 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 287 insertions(+) create mode 100644 src/lib.rs (limited to 'src/lib.rs') diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..6fbf7dc --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +//! # VUW Course scraper +//! +//! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly +//! parse prerequisites, however. + +use std::{collections::HashSet, fmt}; + +use scraper::ElementRef; +use serde::{Deserialize, Serialize}; +use tracing::{debug, info}; + +/// Slice used for splitting requirements for parsing. +const SPLIT_SLICE: &[char] = &[';', ',']; + +/// A VUW course, along with all relevant data. +#[derive(Clone, Debug, Deserialize, Serialize)] +#[non_exhaustive] +pub struct Course<'a> { + /// Description of the course. + pub description: Option<&'a str>, + + /// Whether this course is offered in the upcoming year. + pub offered: bool, + + /// Amount of points this course is worth. + pub points: u8, + + /// Courses that must be taken before this course. + pub prerequisites: Vec<&'a str>, + + /// Courses that cannot be taken if you take this course. + pub restrictions: Vec<&'a str>, + + /// Subject areas this course belongs to. + pub subject_areas: HashSet<&'a str>, + + /// Subtitle of this course, its longer name. + pub subtitle: &'a str, + + /// Timetable of this course, includes trimesters and CRNs. + pub timetable: Vec, + + /// Title of this course, also known as the code. + pub title: &'a str, +} + +impl<'a> Course<'a> { + /// . + /// + /// # Panics + /// + /// Panics if . + pub fn parse_courseid(&mut self, elem: ElementRef<'a>) { + elem.children().for_each(|child| { + child.children().for_each(|c| { + if let Some(text) = c.value().as_text() { + // The actual text we're looking for + let text: &str = text.trim(); + + self.title = text; + } else if let Some(text) = c + .first_child() + .and_then(|node| node.value().as_text().map(|text| &**text)) + { + if let Some((indice, _char)) = text.char_indices().nth(1) { + // Skip over "-" + self.subtitle = text.get(indice..).expect("indice should be valid").trim(); + } + } + }); + }); + } + + /// Parses the course points, prerequisites, and restrictions from the given element. + /// + /// # Panics + /// + /// Panics if . + pub fn parse_coursepoints(&mut self, elem: ElementRef<'a>) { + // Parse course points, prerequisites, and exclusions. + let details = elem + .first_child() + .and_then(|el| el.first_child()?.value().as_text()); + + if let Some(details) = details { + let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect(); + + info!("{:#?}", &details_split); + + // Occasionally there is extra whitespace here, so this needs to be trimmed. + let points = details_split.first().expect("split should exist").trim(); + debug!("{:?}", points); + + let points_slice = &points.get(..points.len() - 4).expect("should be at indice"); + info!("{:?}", points_slice); + + let points = points_slice + .parse::() + .expect("should correctly parse points"); + info!("{:?}", points); + + self.points = points; + + if let Some(requirements) = details_split.last().map(|s| s.trim()) { + if requirements.starts_with("(X)") { + self.restrictions = requirements + .get(4..) + .expect("should be at indice") + .split(SPLIT_SLICE) + .map(str::trim) + .collect::>(); + } else if requirements.starts_with("(P)") { + let requirements = &requirements + .get(4..) + .expect("should be at indice") + .split(" (X) ") + .collect::>(); + + self.prerequisites = requirements + .first() + .map(|s| { + s.split(SPLIT_SLICE) + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect::>() + }) + .unwrap_or_default(); + + if requirements.len() > 1 { + self.restrictions = requirements + .last() + .map(|s| s.split(SPLIT_SLICE).map(str::trim).collect::>()) + .unwrap_or_default(); + } + } else { + self.prerequisites = vec![requirements]; + } + + info!("{requirements}"); + } + } + } + + /// . + /// + /// # Panics + /// + /// Panics if . + pub fn parse_timetable(&mut self, elem: ElementRef<'a>) { + // Parse timetable / CRNs. + let details = elem + .first_child() + .and_then(|el| el.first_child()?.value().as_text()); + + if let Some(details) = details { + let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect(); + + info!("{:#?}", &details_split); + + let offering = CourseOffering::new( + details_split + .last() + .expect("course reference number should exist") + .get(4..) + .expect("course reference number digits should start at this indice") + .parse::() + .expect("course reference number should be parseable"), + Trimester::try_from( + *details_split + .first() + .expect("trimester element should exist"), + ) + .expect("should be parseable into a trimester"), + ); + + self.timetable.push(offering); + } + } +} + +impl Default for Course<'_> { + fn default() -> Self { + Self { + description: Option::default(), + offered: true, + points: u8::default(), + prerequisites: Vec::default(), + restrictions: Vec::default(), + subject_areas: HashSet::default(), + subtitle: "", + timetable: Vec::default(), + title: "", + } + } +} + +/// A course offering, includes the CRN and [`Trimester`]. +#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] +#[non_exhaustive] +pub struct CourseOffering { + /// Reference number for this coursem e.g. 11723. + pub course_reference_number: u16, + + /// Trimester this course is offered in. + pub trimester: Trimester, +} + +impl CourseOffering { + /// Creates a new [`CourseOffering`]. + #[must_use] + pub const fn new(course_reference_number: u16, trimester: Trimester) -> Self { + Self { + course_reference_number, + trimester, + } + } +} + +/// Trimester information Victoria University of Wellington offers. +#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, PartialOrd, Ord, Serialize)] +pub enum Trimester { + /// Trimester one. + One, + + /// Trimester two. + Two, + + /// Trimester three. + Three, + + /// Block dates. See course page for more information. + BlockDates, + + /// Part year. See course page for more information. + PartYear, + + /// Trimesters one and two. + OneTwo, + + /// Trimesters two and three. + TwoThree, + + /// Trimesters three and one. + ThreeOne, + + /// Trimesters one, two, and three. + FullYear, +} + +impl TryFrom<&str> for Trimester { + type Error = String; + + fn try_from(value: &str) -> Result { + match value { + "1/3" => Ok(Self::One), + "2/3" => Ok(Self::Two), + "3/3" => Ok(Self::Three), + "block dates/3" => Ok(Self::BlockDates), + "part year/3" => Ok(Self::PartYear), + "1+2/3" => Ok(Self::OneTwo), + "2+3/3" => Ok(Self::TwoThree), + "3+1/3" => Ok(Self::ThreeOne), + "1+2+3/3" => Ok(Self::FullYear), + _ => Err(String::from("Invalid trimester.")), + } + } +} + +impl fmt::Display for Course<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Course {{ title: {}, subtitle: {}, offered: {}, areas: [{}] }}", + self.title, + self.subtitle, + self.offered, + self.subject_areas + .iter() + // Necessary as Rust refuses to build Vec<&str> with &String. + .map(|s| &**s) + .collect::>() + .join(", "), + ) + } +} -- cgit 1.4.1