1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
// SPDX-License-Identifier: AGPL-3.0-or-later
//! # VUW Course scraper
//!
//! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly
//! parse prerequisites, however.
use std::{collections::HashMap, fs};
use scraper::{CaseSensitivity, Html, Selector};
use serde::{Deserialize, Serialize};
use tracing::level_filters::LevelFilter;
use vuw_course_scraper::Course;
/// Utility struct for exporting to JSON.
#[derive(Clone, Deserialize, Serialize)]
struct JsonExport<'a> {
/// [`HashMap`] of all courses.
#[serde(borrow)]
courses: HashMap<&'a str, Course<'a>>,
}
fn main() {
tracing_subscriber::fmt()
.with_max_level(LevelFilter::INFO)
.init();
let html = &fs::read_to_string("./courses.html").expect("file does not exist");
let document = Html::parse_document(html);
let mut course_map: HashMap<&str, Course> = HashMap::new();
let mut subject_area = "";
let mut working_course = Course::default();
for elem in document.select(&Selector::parse("p").expect("selector should always be valid")) {
let elem_value = elem.value();
if elem_value.has_class("courseid", CaseSensitivity::AsciiCaseInsensitive) {
course_map
.entry(working_course.title)
.and_modify(|c| {
c.subject_areas.insert(subject_area);
})
.or_insert(working_course);
working_course = Course::default();
working_course.subject_areas.insert(subject_area);
working_course.parse_courseid(elem);
} else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) {
working_course.offered = false;
} else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) {
if let Some(subject_area_name) = elem.first_child().and_then(|child| {
child
.first_child()
.and_then(|nexted_child| nexted_child.value().as_text())
}) {
subject_area = &**subject_area_name;
}
} else if elem_value.has_class("subjectsbody", CaseSensitivity::CaseSensitive) {
let description = elem
.first_child()
.and_then(|el| el.first_child()?.value().as_text())
.map(|t| &**t);
working_course.description = description;
} else if elem_value.has_class("timetable", CaseSensitivity::CaseSensitive) {
working_course.parse_timetable(elem);
} else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) {
working_course.parse_coursepoints(elem);
}
}
course_map.remove("");
fs::write(
"./export.json",
simd_json::serde::to_string(&JsonExport {
courses: course_map,
})
.expect("json should parse correctly"),
)
.expect("file should be writable");
}
|