// SPDX-License-Identifier: AGPL-3.0-or-later //! # VUW Course scraper //! //! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly //! parse prerequisites, however. use std::collections::HashMap; use futures_util::future::join_all; use scraper::Html; use serde::{Deserialize, Serialize}; use tokio::{fs, task}; use tracing::level_filters::LevelFilter; use vuw_course_scraper::{parse_document, Course}; /// Utility struct for exporting to JSON. #[derive(Clone, Deserialize, Serialize)] struct JsonExport<'a> { /// [`HashMap`] of all courses. #[serde(borrow)] courses: HashMap<&'a str, Course<'a>>, } /// Transforms a URI into a documents text form through a get request. /// /// # Panics /// /// Panics if the website can't be reached, or the text form isn't available. async fn into_document(st: String) -> Html { Html::parse_document( &reqwest::get(st) .await .expect("website should be available") .text() .await .expect("text form should be available"), ) } #[tokio::main] async fn main() { tracing_subscriber::fmt() .with_max_level(LevelFilter::ERROR) .init(); let documents: Vec = join_all( ["d", "u", "p", "o"] .iter() .map(|directory| { format!("https://service-web.wgtn.ac.nz/dotnet2/catprint.aspx?d=all&t={directory}2025") }) .map(into_document), ) .await; let all_courses: HashMap<&str, Course> = task::block_in_place(|| { documents .iter() .map(parse_document) .reduce(|m1, m2| { let mut mx = m1.clone(); mx.extend(m2); mx }) .expect("maps should reduce into one correctly") }); fs::write( "./export-dl.json", task::block_in_place(|| { simd_json::serde::to_string(&JsonExport { courses: all_courses, }) .expect("json should parse correctly") }), ) .await .expect("file should be writable"); }