diff options
Diffstat (limited to 'src/main.rs')
| -rw-r--r-- | src/main.rs | 239 |
1 files changed, 239 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..908fdd5 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,239 @@ +use serde::{Deserialize, Serialize}; +use std::{ + collections::{HashMap, HashSet}, + fmt, fs, +}; + +use tracing::{debug, info, level_filters::LevelFilter}; + +use scraper::{CaseSensitivity, Html, Selector}; + +// TODO: Use string slices to avoid clones? +#[derive(Clone, Debug, Deserialize, Serialize)] +struct Course { + description: Option<String>, + offered: bool, + points: u8, + prerequisites: Vec<String>, + restrictions: Vec<String>, + subject_areas: HashSet<String>, + subtitle: String, + timetable: Vec<CourseOffering>, + title: String, +} + +#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] +struct CourseOffering { + course_reference_number: u16, + trimester: Trimester, +} + +#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, PartialOrd, Ord, Serialize)] +enum Trimester { + One, + Two, + Three, + BlockDates, + PartYear, + OneTwo, + TwoThree, + ThreeOne, + FullYear, +} + +impl TryFrom<&str> for Trimester { + type Error = String; + + fn try_from(value: &str) -> Result<Self, Self::Error> { + match value { + "1/3" => Ok(Self::One), + "2/3" => Ok(Self::Two), + "3/3" => Ok(Self::Three), + "block dates/3" => Ok(Self::BlockDates), + "part year/3" => Ok(Self::PartYear), + "1+2/3" => Ok(Self::OneTwo), + "2+3/3" => Ok(Self::TwoThree), + "3+1/3" => Ok(Self::ThreeOne), + "1+2+3/3" => Ok(Self::FullYear), + _ => Err(String::from("Invalid trimester.")), + } + } +} + +#[derive(Clone, Deserialize, Serialize)] +struct JsonExport { + courses: HashMap<String, Course>, +} + +impl Default for Course { + fn default() -> Self { + Self { + description: Option::default(), + offered: true, + points: u8::default(), + prerequisites: Vec::default(), + restrictions: Vec::default(), + subject_areas: HashSet::default(), + subtitle: String::default(), + timetable: Vec::default(), + title: String::default(), + } + } +} + +impl fmt::Display for Course { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Course {{ title: {}, subtitle: {}, offered: {}, areas: [{}] }}", + self.title, + self.subtitle, + self.offered, + self.subject_areas + .iter() + // Necessary as Rust refuses to build Vec<&str> with &String. + .map(|s| &s[..]) + .collect::<Vec<&str>>() + .join(", "), + ) + } +} + +fn main() { + tracing_subscriber::fmt() + .with_max_level(LevelFilter::INFO) + .init(); + + let html = include_str!("../courses.html"); + + let document = Html::parse_document(html); + + let mut course_map: HashMap<String, Course> = HashMap::new(); + + let mut subject_area = String::new(); + let mut working_course = Course::default(); + + for elem in document.select(&Selector::parse("p").unwrap()) { + let elem_value = elem.value(); + + if elem_value.has_class("courseid", CaseSensitivity::AsciiCaseInsensitive) { + course_map + .entry(working_course.title.clone()) + .and_modify(|c| { + c.subject_areas.insert(subject_area.clone()); + }) + .or_insert(working_course.clone()); + working_course = Course::default(); + working_course.subject_areas.insert(subject_area.clone()); + + elem.children().for_each(|child| { + child.children().for_each(|c| { + if c.value().is_text() { + let working = c.value().as_text().unwrap().to_string(); + + // Skip over space. + working_course.title = working[..working.len() - 1].to_owned(); + } else { + working_course.subtitle = c + .first_child() + .unwrap() + .value() + .as_text() + .unwrap() + // Skip over "- ". + .to_string()[4..] + .to_owned(); + } + }); + }); + } else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) { + working_course.offered = false; + } else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) { + subject_area = elem + .first_child() + .unwrap() + .first_child() + .unwrap() + .value() + .as_text() + .unwrap() + .to_string(); + } else if elem_value.has_class("subjectsbody", CaseSensitivity::CaseSensitive) { + let description = elem + .first_child() + .and_then(|el| el.first_child()?.value().as_text()) + .map(|t| t.to_string().replace('\n', "")); + + working_course.description = description; + + // println!("{}", working_course.description); + } else if elem_value.has_class("timetable", CaseSensitivity::CaseSensitive) { + // Parse timetable / CRNs. + let details = elem + .first_child() + .and_then(|el| el.first_child()?.value().as_text()) + .map(|t| t.to_string()); + + if let Some(details) = details { + let details_split: Vec<&str> = details.split(" • ").take(2).collect(); + + info!("{:#?}", &details_split); + + let offering = CourseOffering { + course_reference_number: details_split.last().unwrap()[4..] + .parse::<u16>() + .unwrap(), + trimester: Trimester::try_from(*details_split.first().unwrap()).unwrap(), + }; + + working_course.timetable.push(offering); + } + } else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) { + // Parse course points, prerequisites, and exclusions. + let details = elem + .first_child() + .and_then(|el| el.first_child()?.value().as_text()) + .map(|t| t.to_string()); + + if let Some(details) = details { + let details_split: Vec<&str> = details.split(" • ").take(2).collect(); + + info!("{:#?}", &details_split); + + // Occasionally there is extra whitespace here, so this needs to be trimmed. + let points = details_split.first().unwrap().trim(); + debug!("{:?}", points); + + let points_slice = &points[..points.len() - 4]; + info!("{:?}", points_slice); + + let points = points_slice.parse::<u8>().unwrap(); + info!("{:?}", points); + + working_course.points = points; + + // if let Some(requirements) = details_split.last() { + // requirements.split(|c|) + // } + } + } + } + + debug!("{:?}", course_map.get("COMP 102")); + + course_map.remove(&String::new()); + + fs::write( + "./export.json", + simd_json::serde::to_string(&JsonExport { + courses: course_map, + }) + .unwrap(), + ) + .unwrap(); + + // course_map + // .values() + // .for_each(|c| println!("{:#?}", c.subject_areas)); + // course_map.values().for_each(|c| println!("{c}")); +} |