summary refs log tree commit diff
path: root/src/main.rs
diff options
context:
space:
mode:
authorSophie Forrest <git@sophieforrest.com>2024-09-06 13:55:19 +1200
committerSophie Forrest <git@sophieforrest.com>2024-09-06 13:55:19 +1200
commitc4ce297ff951583c9ffe3a88aa22933577b329da (patch)
tree93f55666b0ba0d74be87e10380dcc25a84a17677 /src/main.rs
parent151ab2e8a837242f9654be1280286dc9514fe49c (diff)
refactor: make clippy happy + no cloning
Diffstat (limited to 'src/main.rs')
-rw-r--r--src/main.rs331
1 files changed, 73 insertions, 258 deletions
diff --git a/src/main.rs b/src/main.rs
index ee7686e..05f3976 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,270 +1,85 @@
 // SPDX-License-Identifier: AGPL-3.0-or-later
 
-use serde::{Deserialize, Serialize};
-use std::{
-    collections::{HashMap, HashSet},
-    fmt, fs,
-};
+//! # VUW Course scraper
+//!
+//! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly
+//! parse prerequisites, however.
 
-use tracing::{debug, info, level_filters::LevelFilter};
+use std::{collections::HashMap, fs};
 
 use scraper::{CaseSensitivity, Html, Selector};
+use serde::{Deserialize, Serialize};
+use tracing::level_filters::LevelFilter;
+use vuw_course_scraper::Course;
 
-// TODO: Use string slices to avoid clones?
-#[derive(Clone, Debug, Deserialize, Serialize)]
-struct Course<'a> {
-    description: Option<&'a str>,
-    offered: bool,
-    points: u8,
-    prerequisites: Vec<String>,
-    restrictions: Vec<String>,
-    subject_areas: HashSet<String>,
-    subtitle: &'a str,
-    timetable: Vec<CourseOffering>,
-    title: &'a str,
-}
-
-#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
-struct CourseOffering {
-    course_reference_number: u16,
-    trimester: Trimester,
-}
-
-#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, PartialOrd, Ord, Serialize)]
-enum Trimester {
-    One,
-    Two,
-    Three,
-    BlockDates,
-    PartYear,
-    OneTwo,
-    TwoThree,
-    ThreeOne,
-    FullYear,
-}
-
-impl TryFrom<&str> for Trimester {
-    type Error = String;
-
-    fn try_from(value: &str) -> Result<Self, Self::Error> {
-        match value {
-            "1/3" => Ok(Self::One),
-            "2/3" => Ok(Self::Two),
-            "3/3" => Ok(Self::Three),
-            "block dates/3" => Ok(Self::BlockDates),
-            "part year/3" => Ok(Self::PartYear),
-            "1+2/3" => Ok(Self::OneTwo),
-            "2+3/3" => Ok(Self::TwoThree),
-            "3+1/3" => Ok(Self::ThreeOne),
-            "1+2+3/3" => Ok(Self::FullYear),
-            _ => Err(String::from("Invalid trimester.")),
-        }
-    }
-}
-
+/// Utility struct for exporting to JSON.
 #[derive(Clone, Deserialize, Serialize)]
 struct JsonExport<'a> {
-    #[serde(borrow)]
-    courses: HashMap<&'a str, Course<'a>>,
-}
-
-impl Default for Course<'_> {
-    fn default() -> Self {
-        Self {
-            description: Option::default(),
-            offered: true,
-            points: u8::default(),
-            prerequisites: Vec::default(),
-            restrictions: Vec::default(),
-            subject_areas: HashSet::default(),
-            subtitle: "",
-            timetable: Vec::default(),
-            title: "",
-        }
-    }
-}
-
-impl fmt::Display for Course<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "Course {{ title: {}, subtitle: {}, offered: {}, areas: [{}] }}",
-            self.title,
-            self.subtitle,
-            self.offered,
-            self.subject_areas
-                .iter()
-                // Necessary as Rust refuses to build Vec<&str> with &String.
-                .map(|s| &s[..])
-                .collect::<Vec<&str>>()
-                .join(", "),
-        )
-    }
+	/// [`HashMap`] of all courses.
+	#[serde(borrow)]
+	courses: HashMap<&'a str, Course<'a>>,
 }
 
 fn main() {
-    tracing_subscriber::fmt()
-        .with_max_level(LevelFilter::INFO)
-        .init();
-
-    let html = include_str!("../courses.html");
-
-    let document = Html::parse_document(html);
-
-    let mut course_map: HashMap<&str, Course> = HashMap::new();
-
-    let mut subject_area = String::new();
-    let mut working_course = Course::default();
-
-    for elem in document.select(&Selector::parse("p").unwrap()) {
-        let elem_value = elem.value();
-
-        if elem_value.has_class("courseid", CaseSensitivity::AsciiCaseInsensitive) {
-            course_map
-                .entry(working_course.title)
-                .and_modify(|c| {
-                    c.subject_areas.insert(subject_area.clone());
-                })
-                .or_insert(working_course);
-            working_course = Course::default();
-            working_course.subject_areas.insert(subject_area.clone());
-
-            elem.children().for_each(|child| {
-                child.children().for_each(|c| {
-                    if c.value().is_text() {
-                        let working = &c.value().as_text().unwrap()[..];
-
-                        // Skip over space.
-                        working_course.title = &working[..working.len() - 1];
-                    } else {
-                        working_course.subtitle = &c
-                            .first_child()
-                            .unwrap()
-                            .value()
-                            .as_text()
-                            .unwrap()
-                            // Skip over "- ".
-                            [4..];
-                    }
-                });
-            });
-        } else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) {
-            working_course.offered = false;
-        } else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) {
-            subject_area = elem
-                .first_child()
-                .unwrap()
-                .first_child()
-                .unwrap()
-                .value()
-                .as_text()
-                .unwrap()
-                .to_string();
-        } else if elem_value.has_class("subjectsbody", CaseSensitivity::CaseSensitive) {
-            let description = elem
-                .first_child()
-                .and_then(|el| el.first_child()?.value().as_text())
-                .map(|t| &t[..]);
-
-            working_course.description = description;
-
-            // println!("{}", working_course.description);
-        } else if elem_value.has_class("timetable", CaseSensitivity::CaseSensitive) {
-            // Parse timetable / CRNs.
-            let details = elem
-                .first_child()
-                .and_then(|el| el.first_child()?.value().as_text())
-                .map(|t| t.to_string());
-
-            if let Some(details) = details {
-                let details_split: Vec<&str> = details.split(" • ").take(2).collect();
-
-                info!("{:#?}", &details_split);
-
-                let offering = CourseOffering {
-                    course_reference_number: details_split.last().unwrap()[4..]
-                        .parse::<u16>()
-                        .unwrap(),
-                    trimester: Trimester::try_from(*details_split.first().unwrap()).unwrap(),
-                };
-
-                working_course.timetable.push(offering);
-            }
-        } else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) {
-            // Parse course points, prerequisites, and exclusions.
-            let details = elem
-                .first_child()
-                .and_then(|el| el.first_child()?.value().as_text())
-                .map(|t| t.to_string());
-
-            if let Some(details) = details {
-                let details_split: Vec<&str> = details.split(" • ").take(2).collect();
-
-                info!("{:#?}", &details_split);
-
-                // Occasionally there is extra whitespace here, so this needs to be trimmed.
-                let points = details_split.first().unwrap().trim();
-                debug!("{:?}", points);
-
-                let points_slice = &points[..points.len() - 4];
-                info!("{:?}", points_slice);
-
-                let points = points_slice.parse::<u8>().unwrap();
-                info!("{:?}", points);
-
-                working_course.points = points;
-
-                if let Some(requirements) = details_split.last().map(|s| s.trim()) {
-                    if requirements.starts_with("(X)") {
-                        working_course.restrictions = requirements[4..]
-                            .split(&[';', ','])
-                            .map(str::trim)
-                            .map(str::to_owned)
-                            .collect::<Vec<String>>();
-                    } else {
-                        let requirements = &requirements[4..].split(" (X) ").collect::<Vec<&str>>();
-
-                        working_course.prerequisites = requirements
-                            .first()
-                            .unwrap()
-                            .split(&[',', ';'])
-                            .map(str::trim)
-                            .filter(|s| !s.is_empty())
-                            .map(str::to_owned)
-                            .collect();
-
-                        if requirements.len() > 1 {
-                            working_course.restrictions = requirements
-                                .last()
-                                .unwrap()
-                                .split(&[',', ';'])
-                                .map(str::trim)
-                                .map(str::to_owned)
-                                .collect();
-                        }
-                    }
-
-                    info!("{requirements}");
-                }
-            }
-        }
-    }
-
-    debug!("{:?}", course_map.get("COMP 102"));
-
-    course_map.remove("");
-
-    fs::write(
-        "./export.json",
-        simd_json::serde::to_string(&JsonExport {
-            courses: course_map,
-        })
-        .unwrap(),
-    )
-    .unwrap();
-
-    // course_map
-    //     .values()
-    //     .for_each(|c| println!("{:#?}", c.subject_areas));
-    // course_map.values().for_each(|c| println!("{c}"));
+	tracing_subscriber::fmt()
+		.with_max_level(LevelFilter::INFO)
+		.init();
+
+	let html = &fs::read_to_string("./courses.html").expect("file does not exist");
+
+	let document = Html::parse_document(html);
+
+	let mut course_map: HashMap<&str, Course> = HashMap::new();
+
+	let mut subject_area = "";
+	let mut working_course = Course::default();
+
+	for elem in document.select(&Selector::parse("p").expect("selector should always be valid")) {
+		let elem_value = elem.value();
+
+		if elem_value.has_class("courseid", CaseSensitivity::AsciiCaseInsensitive) {
+			course_map
+				.entry(working_course.title)
+				.and_modify(|c| {
+					c.subject_areas.insert(subject_area);
+				})
+				.or_insert(working_course);
+			working_course = Course::default();
+			working_course.subject_areas.insert(subject_area);
+
+			working_course.parse_courseid(elem);
+		} else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) {
+			working_course.offered = false;
+		} else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) {
+			if let Some(subject_area_name) = elem.first_child().and_then(|child| {
+				child
+					.first_child()
+					.and_then(|nexted_child| nexted_child.value().as_text())
+			}) {
+				subject_area = &**subject_area_name;
+			}
+		} else if elem_value.has_class("subjectsbody", CaseSensitivity::CaseSensitive) {
+			let description = elem
+				.first_child()
+				.and_then(|el| el.first_child()?.value().as_text())
+				.map(|t| &**t);
+
+			working_course.description = description;
+		} else if elem_value.has_class("timetable", CaseSensitivity::CaseSensitive) {
+			working_course.parse_timetable(elem);
+		} else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) {
+			working_course.parse_coursepoints(elem);
+		}
+	}
+
+	course_map.remove("");
+
+	fs::write(
+		"./export.json",
+		simd_json::serde::to_string(&JsonExport {
+			courses: course_map,
+		})
+		.expect("json should parse correctly"),
+	)
+	.expect("file should be writable");
 }