diff options
| author | Sophie Forrest <git@sophieforrest.com> | 2024-09-10 08:53:41 +1200 |
|---|---|---|
| committer | Sophie Forrest <git@sophieforrest.com> | 2024-09-10 08:53:41 +1200 |
| commit | 08d060660f2f6bd678770026e109e0cd7429e6f1 (patch) | |
| tree | 0769a5b614be7c1b5a1a3c44da7c00999c759f63 /src/main.rs | |
| parent | d0256057ac4e2d18f30a7cd5845d315f3167ac67 (diff) | |
refactor: move parsing into separate function
Preparation for adding download capabilities.
Diffstat (limited to 'src/main.rs')
| -rw-r--r-- | src/main.rs | 49 |
1 files changed, 3 insertions, 46 deletions
diff --git a/src/main.rs b/src/main.rs index 05f3976..17a95c7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,10 +7,10 @@ use std::{collections::HashMap, fs}; -use scraper::{CaseSensitivity, Html, Selector}; +use scraper::Html; use serde::{Deserialize, Serialize}; use tracing::level_filters::LevelFilter; -use vuw_course_scraper::Course; +use vuw_course_scraper::{parse_document, Course}; /// Utility struct for exporting to JSON. #[derive(Clone, Deserialize, Serialize)] @@ -29,50 +29,7 @@ fn main() { let document = Html::parse_document(html); - let mut course_map: HashMap<&str, Course> = HashMap::new(); - - let mut subject_area = ""; - let mut working_course = Course::default(); - - for elem in document.select(&Selector::parse("p").expect("selector should always be valid")) { - let elem_value = elem.value(); - - if elem_value.has_class("courseid", CaseSensitivity::AsciiCaseInsensitive) { - course_map - .entry(working_course.title) - .and_modify(|c| { - c.subject_areas.insert(subject_area); - }) - .or_insert(working_course); - working_course = Course::default(); - working_course.subject_areas.insert(subject_area); - - working_course.parse_courseid(elem); - } else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) { - working_course.offered = false; - } else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) { - if let Some(subject_area_name) = elem.first_child().and_then(|child| { - child - .first_child() - .and_then(|nexted_child| nexted_child.value().as_text()) - }) { - subject_area = &**subject_area_name; - } - } else if elem_value.has_class("subjectsbody", CaseSensitivity::CaseSensitive) { - let description = elem - .first_child() - .and_then(|el| el.first_child()?.value().as_text()) - .map(|t| &**t); - - working_course.description = description; - } else if elem_value.has_class("timetable", CaseSensitivity::CaseSensitive) { - working_course.parse_timetable(elem); - } else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) { - working_course.parse_coursepoints(elem); - } - } - - course_map.remove(""); + let course_map = parse_document(&document); fs::write( "./export.json", |