summary refs log tree commit diff
path: root/src/main.rs
blob: 68e378aea2827c8f11590bf8e9cab46e9dbf5878 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
// SPDX-License-Identifier: AGPL-3.0-or-later

//! # VUW Course scraper
//!
//! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly
//! parse prerequisites, however.

use std::collections::HashMap;

use futures_util::future::join_all;
use scraper::Html;
use serde::{Deserialize, Serialize};
use tokio::{fs, task};
use tracing::level_filters::LevelFilter;
use vuw_course_scraper::{parse_document, Course};

/// Utility struct for exporting to JSON.
#[derive(Clone, Deserialize, Serialize)]
struct JsonExport<'a> {
	/// [`HashMap`] of all courses.
	#[serde(borrow)]
	courses: HashMap<&'a str, Course<'a>>,
}

/// Transforms a URI into a documents text form through a get request.
///
/// # Panics
///
/// Panics if the website can't be reached, or the text form isn't available.
async fn into_document(st: String) -> Html {
	Html::parse_document(
		&reqwest::get(st)
			.await
			.expect("website should be available")
			.text()
			.await
			.expect("text form should be available"),
	)
}

#[tokio::main]
async fn main() {
	tracing_subscriber::fmt()
		.with_max_level(LevelFilter::ERROR)
		.init();

	let documents: Vec<Html> =
		join_all(
			["d", "u", "p", "o"]
				.iter()
				.map(|directory| {
					format!("https://service-web.wgtn.ac.nz/dotnet2/catprint.aspx?d=all&t={directory}2025")
				})
				.map(into_document),
		)
		.await;

	let all_courses: HashMap<&str, Course> = task::block_in_place(|| {
		documents
			.iter()
			.map(parse_document)
			.reduce(|m1, m2| {
				let mut mx = m1.clone();

				mx.extend(m2);

				mx
			})
			.expect("maps should reduce into one correctly")
	});

	fs::write(
		"./export-dl.json",
		task::block_in_place(|| {
			simd_json::serde::to_string(&JsonExport {
				courses: all_courses,
			})
			.expect("json should parse correctly")
		}),
	)
	.await
	.expect("file should be writable");
}