summary refs log tree commit diff
path: root/src/main.rs
blob: f5dd05fc55beaf2fdcd64c03227940c1a5471e48 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
// SPDX-License-Identifier: AGPL-3.0-or-later

//! # VUW Course scraper
//!
//! Program capable of parsing VUWs courses from the registry.

use std::{
	collections::{BTreeMap, HashMap},
	path::PathBuf,
};

use clap::Parser;
use futures_util::future::join_all;
use scraper::Html;
use serde::{Deserialize, Serialize};
use tokio::{fs, task};
use tracing::level_filters::LevelFilter;
use vuw_course_scraper::{parse_document, Course};

/// Utility struct for exporting to JSON.
#[derive(Clone, Deserialize, Serialize)]
struct JsonExport<'a> {
	/// [`HashMap`] of all courses.
	#[serde(borrow)]
	courses: BTreeMap<&'a str, Course<'a>>,
}

/// Arguments for command line interface.
#[derive(Clone, Parser)]
#[command(about, author, long_about = None, version)]
struct Args {
	/// Year of the files to download from the website.
	#[arg(short, long, default_value_t = time::OffsetDateTime::now_utc().year() + 1)]
	pub year: i32,

	/// File path of the output file.
	pub path: PathBuf,
}

/// Transforms a URI into a documents text form through a get request.
///
/// # Panics
///
/// Panics if the website can't be reached, or the text form isn't available.
async fn into_document(st: String) -> Html {
	Html::parse_document(
		&reqwest::get(st)
			.await
			.expect("website should be available")
			.text()
			.await
			.expect("text form should be available"),
	)
}

#[tokio::main]
async fn main() {
	tracing_subscriber::fmt()
		.with_max_level(LevelFilter::ERROR)
		.init();

	let args = Args::parse();

	let documents: Vec<Html> = join_all(
		["d", "u", "p", "o"]
			.iter()
			.map(|directory| {
				format!(
					"https://service-web.wgtn.ac.nz/dotnet2/catprint.aspx?d=all&t={directory}{}",
					args.year
				)
			})
			.map(into_document),
	)
	.await;

	let all_courses: HashMap<&str, Course> = task::block_in_place(|| {
		documents
			.iter()
			.map(parse_document)
			.reduce(|m1, m2| {
				let mut mx = m1.clone();

				mx.extend(m2);

				mx
			})
			.expect("maps should reduce into one correctly")
	});

	let sorted_courses: BTreeMap<&str, Course> = all_courses.into_iter().collect();

	fs::write(
		args.path,
		task::block_in_place(|| {
			simd_json::serde::to_string(&JsonExport {
				courses: sorted_courses,
			})
			.expect("json should parse correctly")
		}),
	)
	.await
	.expect("file should be writable");
}