summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/main.rs239
1 files changed, 239 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..908fdd5
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,239 @@
+use serde::{Deserialize, Serialize};
+use std::{
+    collections::{HashMap, HashSet},
+    fmt, fs,
+};
+
+use tracing::{debug, info, level_filters::LevelFilter};
+
+use scraper::{CaseSensitivity, Html, Selector};
+
+// TODO: Use string slices to avoid clones?
+#[derive(Clone, Debug, Deserialize, Serialize)]
+struct Course {
+    description: Option<String>,
+    offered: bool,
+    points: u8,
+    prerequisites: Vec<String>,
+    restrictions: Vec<String>,
+    subject_areas: HashSet<String>,
+    subtitle: String,
+    timetable: Vec<CourseOffering>,
+    title: String,
+}
+
+#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
+struct CourseOffering {
+    course_reference_number: u16,
+    trimester: Trimester,
+}
+
+#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, PartialOrd, Ord, Serialize)]
+enum Trimester {
+    One,
+    Two,
+    Three,
+    BlockDates,
+    PartYear,
+    OneTwo,
+    TwoThree,
+    ThreeOne,
+    FullYear,
+}
+
+impl TryFrom<&str> for Trimester {
+    type Error = String;
+
+    fn try_from(value: &str) -> Result<Self, Self::Error> {
+        match value {
+            "1/3" => Ok(Self::One),
+            "2/3" => Ok(Self::Two),
+            "3/3" => Ok(Self::Three),
+            "block dates/3" => Ok(Self::BlockDates),
+            "part year/3" => Ok(Self::PartYear),
+            "1+2/3" => Ok(Self::OneTwo),
+            "2+3/3" => Ok(Self::TwoThree),
+            "3+1/3" => Ok(Self::ThreeOne),
+            "1+2+3/3" => Ok(Self::FullYear),
+            _ => Err(String::from("Invalid trimester.")),
+        }
+    }
+}
+
+#[derive(Clone, Deserialize, Serialize)]
+struct JsonExport {
+    courses: HashMap<String, Course>,
+}
+
+impl Default for Course {
+    fn default() -> Self {
+        Self {
+            description: Option::default(),
+            offered: true,
+            points: u8::default(),
+            prerequisites: Vec::default(),
+            restrictions: Vec::default(),
+            subject_areas: HashSet::default(),
+            subtitle: String::default(),
+            timetable: Vec::default(),
+            title: String::default(),
+        }
+    }
+}
+
+impl fmt::Display for Course {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "Course {{ title: {}, subtitle: {}, offered: {}, areas: [{}] }}",
+            self.title,
+            self.subtitle,
+            self.offered,
+            self.subject_areas
+                .iter()
+                // Necessary as Rust refuses to build Vec<&str> with &String.
+                .map(|s| &s[..])
+                .collect::<Vec<&str>>()
+                .join(", "),
+        )
+    }
+}
+
+fn main() {
+    tracing_subscriber::fmt()
+        .with_max_level(LevelFilter::INFO)
+        .init();
+
+    let html = include_str!("../courses.html");
+
+    let document = Html::parse_document(html);
+
+    let mut course_map: HashMap<String, Course> = HashMap::new();
+
+    let mut subject_area = String::new();
+    let mut working_course = Course::default();
+
+    for elem in document.select(&Selector::parse("p").unwrap()) {
+        let elem_value = elem.value();
+
+        if elem_value.has_class("courseid", CaseSensitivity::AsciiCaseInsensitive) {
+            course_map
+                .entry(working_course.title.clone())
+                .and_modify(|c| {
+                    c.subject_areas.insert(subject_area.clone());
+                })
+                .or_insert(working_course.clone());
+            working_course = Course::default();
+            working_course.subject_areas.insert(subject_area.clone());
+
+            elem.children().for_each(|child| {
+                child.children().for_each(|c| {
+                    if c.value().is_text() {
+                        let working = c.value().as_text().unwrap().to_string();
+
+                        // Skip over space.
+                        working_course.title = working[..working.len() - 1].to_owned();
+                    } else {
+                        working_course.subtitle = c
+                            .first_child()
+                            .unwrap()
+                            .value()
+                            .as_text()
+                            .unwrap()
+                            // Skip over "- ".
+                            .to_string()[4..]
+                            .to_owned();
+                    }
+                });
+            });
+        } else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) {
+            working_course.offered = false;
+        } else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) {
+            subject_area = elem
+                .first_child()
+                .unwrap()
+                .first_child()
+                .unwrap()
+                .value()
+                .as_text()
+                .unwrap()
+                .to_string();
+        } else if elem_value.has_class("subjectsbody", CaseSensitivity::CaseSensitive) {
+            let description = elem
+                .first_child()
+                .and_then(|el| el.first_child()?.value().as_text())
+                .map(|t| t.to_string().replace('\n', ""));
+
+            working_course.description = description;
+
+            // println!("{}", working_course.description);
+        } else if elem_value.has_class("timetable", CaseSensitivity::CaseSensitive) {
+            // Parse timetable / CRNs.
+            let details = elem
+                .first_child()
+                .and_then(|el| el.first_child()?.value().as_text())
+                .map(|t| t.to_string());
+
+            if let Some(details) = details {
+                let details_split: Vec<&str> = details.split(" • ").take(2).collect();
+
+                info!("{:#?}", &details_split);
+
+                let offering = CourseOffering {
+                    course_reference_number: details_split.last().unwrap()[4..]
+                        .parse::<u16>()
+                        .unwrap(),
+                    trimester: Trimester::try_from(*details_split.first().unwrap()).unwrap(),
+                };
+
+                working_course.timetable.push(offering);
+            }
+        } else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) {
+            // Parse course points, prerequisites, and exclusions.
+            let details = elem
+                .first_child()
+                .and_then(|el| el.first_child()?.value().as_text())
+                .map(|t| t.to_string());
+
+            if let Some(details) = details {
+                let details_split: Vec<&str> = details.split(" • ").take(2).collect();
+
+                info!("{:#?}", &details_split);
+
+                // Occasionally there is extra whitespace here, so this needs to be trimmed.
+                let points = details_split.first().unwrap().trim();
+                debug!("{:?}", points);
+
+                let points_slice = &points[..points.len() - 4];
+                info!("{:?}", points_slice);
+
+                let points = points_slice.parse::<u8>().unwrap();
+                info!("{:?}", points);
+
+                working_course.points = points;
+
+                // if let Some(requirements) = details_split.last() {
+                //     requirements.split(|c|)
+                // }
+            }
+        }
+    }
+
+    debug!("{:?}", course_map.get("COMP 102"));
+
+    course_map.remove(&String::new());
+
+    fs::write(
+        "./export.json",
+        simd_json::serde::to_string(&JsonExport {
+            courses: course_map,
+        })
+        .unwrap(),
+    )
+    .unwrap();
+
+    // course_map
+    //     .values()
+    //     .for_each(|c| println!("{:#?}", c.subject_areas));
+    // course_map.values().for_each(|c| println!("{c}"));
+}