summary refs log tree commit diff
path: root/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs287
1 files changed, 287 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..6fbf7dc
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+//! # VUW Course scraper
+//!
+//! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly
+//! parse prerequisites, however.
+
+use std::{collections::HashSet, fmt};
+
+use scraper::ElementRef;
+use serde::{Deserialize, Serialize};
+use tracing::{debug, info};
+
+/// Slice used for splitting requirements for parsing.
+const SPLIT_SLICE: &[char] = &[';', ','];
+
+/// A VUW course, along with all relevant data.
+#[derive(Clone, Debug, Deserialize, Serialize)]
+#[non_exhaustive]
+pub struct Course<'a> {
+	/// Description of the course.
+	pub description: Option<&'a str>,
+
+	/// Whether this course is offered in the upcoming year.
+	pub offered: bool,
+
+	/// Amount of points this course is worth.
+	pub points: u8,
+
+	/// Courses that must be taken before this course.
+	pub prerequisites: Vec<&'a str>,
+
+	/// Courses that cannot be taken if you take this course.
+	pub restrictions: Vec<&'a str>,
+
+	/// Subject areas this course belongs to.
+	pub subject_areas: HashSet<&'a str>,
+
+	/// Subtitle of this course, its longer name.
+	pub subtitle: &'a str,
+
+	/// Timetable of this course, includes trimesters and CRNs.
+	pub timetable: Vec<CourseOffering>,
+
+	/// Title of this course, also known as the code.
+	pub title: &'a str,
+}
+
+impl<'a> Course<'a> {
+	/// .
+	///
+	/// # Panics
+	///
+	/// Panics if .
+	pub fn parse_courseid(&mut self, elem: ElementRef<'a>) {
+		elem.children().for_each(|child| {
+			child.children().for_each(|c| {
+				if let Some(text) = c.value().as_text() {
+					// The actual text we're looking for
+					let text: &str = text.trim();
+
+					self.title = text;
+				} else if let Some(text) = c
+					.first_child()
+					.and_then(|node| node.value().as_text().map(|text| &**text))
+				{
+					if let Some((indice, _char)) = text.char_indices().nth(1) {
+						// Skip over "-"
+						self.subtitle = text.get(indice..).expect("indice should be valid").trim();
+					}
+				}
+			});
+		});
+	}
+
+	/// Parses the course points, prerequisites, and restrictions from the given element.
+	///
+	/// # Panics
+	///
+	/// Panics if .
+	pub fn parse_coursepoints(&mut self, elem: ElementRef<'a>) {
+		// Parse course points, prerequisites, and exclusions.
+		let details = elem
+			.first_child()
+			.and_then(|el| el.first_child()?.value().as_text());
+
+		if let Some(details) = details {
+			let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect();
+
+			info!("{:#?}", &details_split);
+
+			// Occasionally there is extra whitespace here, so this needs to be trimmed.
+			let points = details_split.first().expect("split should exist").trim();
+			debug!("{:?}", points);
+
+			let points_slice = &points.get(..points.len() - 4).expect("should be at indice");
+			info!("{:?}", points_slice);
+
+			let points = points_slice
+				.parse::<u8>()
+				.expect("should correctly parse points");
+			info!("{:?}", points);
+
+			self.points = points;
+
+			if let Some(requirements) = details_split.last().map(|s| s.trim()) {
+				if requirements.starts_with("(X)") {
+					self.restrictions = requirements
+						.get(4..)
+						.expect("should be at indice")
+						.split(SPLIT_SLICE)
+						.map(str::trim)
+						.collect::<Vec<&str>>();
+				} else if requirements.starts_with("(P)") {
+					let requirements = &requirements
+						.get(4..)
+						.expect("should be at indice")
+						.split(" (X) ")
+						.collect::<Vec<&str>>();
+
+					self.prerequisites = requirements
+						.first()
+						.map(|s| {
+							s.split(SPLIT_SLICE)
+								.map(str::trim)
+								.filter(|s| !s.is_empty())
+								.collect::<Vec<&str>>()
+						})
+						.unwrap_or_default();
+
+					if requirements.len() > 1 {
+						self.restrictions = requirements
+							.last()
+							.map(|s| s.split(SPLIT_SLICE).map(str::trim).collect::<Vec<&str>>())
+							.unwrap_or_default();
+					}
+				} else {
+					self.prerequisites = vec![requirements];
+				}
+
+				info!("{requirements}");
+			}
+		}
+	}
+
+	/// .
+	///
+	/// # Panics
+	///
+	/// Panics if .
+	pub fn parse_timetable(&mut self, elem: ElementRef<'a>) {
+		// Parse timetable / CRNs.
+		let details = elem
+			.first_child()
+			.and_then(|el| el.first_child()?.value().as_text());
+
+		if let Some(details) = details {
+			let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect();
+
+			info!("{:#?}", &details_split);
+
+			let offering = CourseOffering::new(
+				details_split
+					.last()
+					.expect("course reference number should exist")
+					.get(4..)
+					.expect("course reference number digits should start at this indice")
+					.parse::<u16>()
+					.expect("course reference number should be parseable"),
+				Trimester::try_from(
+					*details_split
+						.first()
+						.expect("trimester element should exist"),
+				)
+				.expect("should be parseable into a trimester"),
+			);
+
+			self.timetable.push(offering);
+		}
+	}
+}
+
+impl Default for Course<'_> {
+	fn default() -> Self {
+		Self {
+			description: Option::default(),
+			offered: true,
+			points: u8::default(),
+			prerequisites: Vec::default(),
+			restrictions: Vec::default(),
+			subject_areas: HashSet::default(),
+			subtitle: "",
+			timetable: Vec::default(),
+			title: "",
+		}
+	}
+}
+
+/// A course offering, includes the CRN and [`Trimester`].
+#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
+#[non_exhaustive]
+pub struct CourseOffering {
+	/// Reference number for this coursem e.g. 11723.
+	pub course_reference_number: u16,
+
+	/// Trimester this course is offered in.
+	pub trimester: Trimester,
+}
+
+impl CourseOffering {
+	/// Creates a new [`CourseOffering`].
+	#[must_use]
+	pub const fn new(course_reference_number: u16, trimester: Trimester) -> Self {
+		Self {
+			course_reference_number,
+			trimester,
+		}
+	}
+}
+
+/// Trimester information Victoria University of Wellington offers.
+#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, PartialOrd, Ord, Serialize)]
+pub enum Trimester {
+	/// Trimester one.
+	One,
+
+	/// Trimester two.
+	Two,
+
+	/// Trimester three.
+	Three,
+
+	/// Block dates. See course page for more information.
+	BlockDates,
+
+	/// Part year. See course page for more information.
+	PartYear,
+
+	/// Trimesters one and two.
+	OneTwo,
+
+	/// Trimesters two and three.
+	TwoThree,
+
+	/// Trimesters three and one.
+	ThreeOne,
+
+	/// Trimesters one, two, and three.
+	FullYear,
+}
+
+impl TryFrom<&str> for Trimester {
+	type Error = String;
+
+	fn try_from(value: &str) -> Result<Self, Self::Error> {
+		match value {
+			"1/3" => Ok(Self::One),
+			"2/3" => Ok(Self::Two),
+			"3/3" => Ok(Self::Three),
+			"block dates/3" => Ok(Self::BlockDates),
+			"part year/3" => Ok(Self::PartYear),
+			"1+2/3" => Ok(Self::OneTwo),
+			"2+3/3" => Ok(Self::TwoThree),
+			"3+1/3" => Ok(Self::ThreeOne),
+			"1+2+3/3" => Ok(Self::FullYear),
+			_ => Err(String::from("Invalid trimester.")),
+		}
+	}
+}
+
+impl fmt::Display for Course<'_> {
+	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+		write!(
+			f,
+			"Course {{ title: {}, subtitle: {}, offered: {}, areas: [{}] }}",
+			self.title,
+			self.subtitle,
+			self.offered,
+			self.subject_areas
+                .iter()
+                // Necessary as Rust refuses to build Vec<&str> with &String.
+                .map(|s| &**s)
+                .collect::<Vec<&str>>()
+                .join(", "),
+		)
+	}
+}