use serde::{Deserialize, Serialize}; use std::{ collections::{HashMap, HashSet}, fmt, fs, }; use tracing::{debug, info, level_filters::LevelFilter}; use scraper::{CaseSensitivity, Html, Selector}; // TODO: Use string slices to avoid clones? #[derive(Clone, Debug, Deserialize, Serialize)] struct Course { description: Option, offered: bool, points: u8, prerequisites: Vec, restrictions: Vec, subject_areas: HashSet, subtitle: String, timetable: Vec, title: String, } #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] struct CourseOffering { course_reference_number: u16, trimester: Trimester, } #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, PartialOrd, Ord, Serialize)] enum Trimester { One, Two, Three, BlockDates, PartYear, OneTwo, TwoThree, ThreeOne, FullYear, } impl TryFrom<&str> for Trimester { type Error = String; fn try_from(value: &str) -> Result { match value { "1/3" => Ok(Self::One), "2/3" => Ok(Self::Two), "3/3" => Ok(Self::Three), "block dates/3" => Ok(Self::BlockDates), "part year/3" => Ok(Self::PartYear), "1+2/3" => Ok(Self::OneTwo), "2+3/3" => Ok(Self::TwoThree), "3+1/3" => Ok(Self::ThreeOne), "1+2+3/3" => Ok(Self::FullYear), _ => Err(String::from("Invalid trimester.")), } } } #[derive(Clone, Deserialize, Serialize)] struct JsonExport { courses: HashMap, } impl Default for Course { fn default() -> Self { Self { description: Option::default(), offered: true, points: u8::default(), prerequisites: Vec::default(), restrictions: Vec::default(), subject_areas: HashSet::default(), subtitle: String::default(), timetable: Vec::default(), title: String::default(), } } } impl fmt::Display for Course { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "Course {{ title: {}, subtitle: {}, offered: {}, areas: [{}] }}", self.title, self.subtitle, self.offered, self.subject_areas .iter() // Necessary as Rust refuses to build Vec<&str> with &String. .map(|s| &s[..]) .collect::>() .join(", "), ) } } fn main() { tracing_subscriber::fmt() .with_max_level(LevelFilter::INFO) .init(); let html = include_str!("../courses.html"); let document = Html::parse_document(html); let mut course_map: HashMap = HashMap::new(); let mut subject_area = String::new(); let mut working_course = Course::default(); for elem in document.select(&Selector::parse("p").unwrap()) { let elem_value = elem.value(); if elem_value.has_class("courseid", CaseSensitivity::AsciiCaseInsensitive) { course_map .entry(working_course.title.clone()) .and_modify(|c| { c.subject_areas.insert(subject_area.clone()); }) .or_insert(working_course.clone()); working_course = Course::default(); working_course.subject_areas.insert(subject_area.clone()); elem.children().for_each(|child| { child.children().for_each(|c| { if c.value().is_text() { let working = c.value().as_text().unwrap().to_string(); // Skip over space. working_course.title = working[..working.len() - 1].to_owned(); } else { working_course.subtitle = c .first_child() .unwrap() .value() .as_text() .unwrap() // Skip over "- ". .to_string()[4..] .to_owned(); } }); }); } else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) { working_course.offered = false; } else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) { subject_area = elem .first_child() .unwrap() .first_child() .unwrap() .value() .as_text() .unwrap() .to_string(); } else if elem_value.has_class("subjectsbody", CaseSensitivity::CaseSensitive) { let description = elem .first_child() .and_then(|el| el.first_child()?.value().as_text()) .map(|t| t.to_string().replace('\n', "")); working_course.description = description; // println!("{}", working_course.description); } else if elem_value.has_class("timetable", CaseSensitivity::CaseSensitive) { // Parse timetable / CRNs. let details = elem .first_child() .and_then(|el| el.first_child()?.value().as_text()) .map(|t| t.to_string()); if let Some(details) = details { let details_split: Vec<&str> = details.split(" • ").take(2).collect(); info!("{:#?}", &details_split); let offering = CourseOffering { course_reference_number: details_split.last().unwrap()[4..] .parse::() .unwrap(), trimester: Trimester::try_from(*details_split.first().unwrap()).unwrap(), }; working_course.timetable.push(offering); } } else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) { // Parse course points, prerequisites, and exclusions. let details = elem .first_child() .and_then(|el| el.first_child()?.value().as_text()) .map(|t| t.to_string()); if let Some(details) = details { let details_split: Vec<&str> = details.split(" • ").take(2).collect(); info!("{:#?}", &details_split); // Occasionally there is extra whitespace here, so this needs to be trimmed. let points = details_split.first().unwrap().trim(); debug!("{:?}", points); let points_slice = &points[..points.len() - 4]; info!("{:?}", points_slice); let points = points_slice.parse::().unwrap(); info!("{:?}", points); working_course.points = points; // if let Some(requirements) = details_split.last() { // requirements.split(|c|) // } } } } debug!("{:?}", course_map.get("COMP 102")); course_map.remove(&String::new()); fs::write( "./export.json", simd_json::serde::to_string(&JsonExport { courses: course_map, }) .unwrap(), ) .unwrap(); // course_map // .values() // .for_each(|c| println!("{:#?}", c.subject_areas)); // course_map.values().for_each(|c| println!("{c}")); }