// SPDX-License-Identifier: AGPL-3.0-or-later //! # VUW Course scraper //! //! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly //! parse prerequisites, however. use std::{collections::HashMap, fs}; use scraper::{CaseSensitivity, Html, Selector}; use serde::{Deserialize, Serialize}; use tracing::level_filters::LevelFilter; use vuw_course_scraper::Course; /// Utility struct for exporting to JSON. #[derive(Clone, Deserialize, Serialize)] struct JsonExport<'a> { /// [`HashMap`] of all courses. #[serde(borrow)] courses: HashMap<&'a str, Course<'a>>, } fn main() { tracing_subscriber::fmt() .with_max_level(LevelFilter::INFO) .init(); let html = &fs::read_to_string("./courses.html").expect("file does not exist"); let document = Html::parse_document(html); let mut course_map: HashMap<&str, Course> = HashMap::new(); let mut subject_area = ""; let mut working_course = Course::default(); for elem in document.select(&Selector::parse("p").expect("selector should always be valid")) { let elem_value = elem.value(); if elem_value.has_class("courseid", CaseSensitivity::AsciiCaseInsensitive) { course_map .entry(working_course.title) .and_modify(|c| { c.subject_areas.insert(subject_area); }) .or_insert(working_course); working_course = Course::default(); working_course.subject_areas.insert(subject_area); working_course.parse_courseid(elem); } else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) { working_course.offered = false; } else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) { if let Some(subject_area_name) = elem.first_child().and_then(|child| { child .first_child() .and_then(|nexted_child| nexted_child.value().as_text()) }) { subject_area = &**subject_area_name; } } else if elem_value.has_class("subjectsbody", CaseSensitivity::CaseSensitive) { let description = elem .first_child() .and_then(|el| el.first_child()?.value().as_text()) .map(|t| &**t); working_course.description = description; } else if elem_value.has_class("timetable", CaseSensitivity::CaseSensitive) { working_course.parse_timetable(elem); } else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) { working_course.parse_coursepoints(elem); } } course_map.remove(""); fs::write( "./export.json", simd_json::serde::to_string(&JsonExport { courses: course_map, }) .expect("json should parse correctly"), ) .expect("file should be writable"); }