// SPDX-License-Identifier: AGPL-3.0-or-later //! # VUW Course scraper //! //! Program capable of parsing VUWs courses from the registry. use std::{ collections::{BTreeMap, HashMap}, path::PathBuf, }; use clap::Parser; use futures_util::future::join_all; use scraper::Html; use serde::{Deserialize, Serialize}; use tokio::{fs, task}; use tracing::level_filters::LevelFilter; use vuw_course_scraper::{parse_document, Course}; /// Utility struct for exporting to JSON. #[derive(Clone, Deserialize, Serialize)] struct JsonExport<'a> { /// [`HashMap`] of all courses. #[serde(borrow)] courses: BTreeMap<&'a str, Course<'a>>, } /// Arguments for command line interface. #[derive(Clone, Parser)] #[command(about, author, long_about = None, version)] struct Args { /// Year of the files to download from the website. #[arg(short, long, default_value_t = time::OffsetDateTime::now_utc().year() + 1)] pub year: i32, /// File path of the output file. pub path: PathBuf, } /// Transforms a URI into a documents text form through a get request. /// /// # Panics /// /// Panics if the website can't be reached, or the text form isn't available. async fn into_document(st: String) -> Html { Html::parse_document( &reqwest::get(st) .await .expect("website should be available") .text() .await .expect("text form should be available"), ) } #[tokio::main] async fn main() { tracing_subscriber::fmt() .with_max_level(LevelFilter::ERROR) .init(); let args = Args::parse(); let documents: Vec = join_all( ["d", "u", "p", "o"] .iter() .map(|directory| { format!( "https://service-web.wgtn.ac.nz/dotnet2/catprint.aspx?d=all&t={directory}{}", args.year ) }) .map(into_document), ) .await; let all_courses: HashMap<&str, Course> = task::block_in_place(|| { documents .iter() .map(parse_document) .reduce(|m1, m2| { let mut mx = m1.clone(); mx.extend(m2); mx }) .expect("maps should reduce into one correctly") }); let sorted_courses: BTreeMap<&str, Course> = all_courses.into_iter().collect(); fs::write( args.path, task::block_in_place(|| { simd_json::serde::to_string(&JsonExport { courses: sorted_courses, }) .expect("json should parse correctly") }), ) .await .expect("file should be writable"); }