summary refs log tree commit diff
path: root/src/main.rs
blob: 05f397676b6efac4105a2c18f542559cedf62ca2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// SPDX-License-Identifier: AGPL-3.0-or-later

//! # VUW Course scraper
//!
//! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly
//! parse prerequisites, however.

use std::{collections::HashMap, fs};

use scraper::{CaseSensitivity, Html, Selector};
use serde::{Deserialize, Serialize};
use tracing::level_filters::LevelFilter;
use vuw_course_scraper::Course;

/// Utility struct for exporting to JSON.
#[derive(Clone, Deserialize, Serialize)]
struct JsonExport<'a> {
	/// [`HashMap`] of all courses.
	#[serde(borrow)]
	courses: HashMap<&'a str, Course<'a>>,
}

fn main() {
	tracing_subscriber::fmt()
		.with_max_level(LevelFilter::INFO)
		.init();

	let html = &fs::read_to_string("./courses.html").expect("file does not exist");

	let document = Html::parse_document(html);

	let mut course_map: HashMap<&str, Course> = HashMap::new();

	let mut subject_area = "";
	let mut working_course = Course::default();

	for elem in document.select(&Selector::parse("p").expect("selector should always be valid")) {
		let elem_value = elem.value();

		if elem_value.has_class("courseid", CaseSensitivity::AsciiCaseInsensitive) {
			course_map
				.entry(working_course.title)
				.and_modify(|c| {
					c.subject_areas.insert(subject_area);
				})
				.or_insert(working_course);
			working_course = Course::default();
			working_course.subject_areas.insert(subject_area);

			working_course.parse_courseid(elem);
		} else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) {
			working_course.offered = false;
		} else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) {
			if let Some(subject_area_name) = elem.first_child().and_then(|child| {
				child
					.first_child()
					.and_then(|nexted_child| nexted_child.value().as_text())
			}) {
				subject_area = &**subject_area_name;
			}
		} else if elem_value.has_class("subjectsbody", CaseSensitivity::CaseSensitive) {
			let description = elem
				.first_child()
				.and_then(|el| el.first_child()?.value().as_text())
				.map(|t| &**t);

			working_course.description = description;
		} else if elem_value.has_class("timetable", CaseSensitivity::CaseSensitive) {
			working_course.parse_timetable(elem);
		} else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) {
			working_course.parse_coursepoints(elem);
		}
	}

	course_map.remove("");

	fs::write(
		"./export.json",
		simd_json::serde::to_string(&JsonExport {
			courses: course_map,
		})
		.expect("json should parse correctly"),
	)
	.expect("file should be writable");
}