diff options
| author | Sophie Forrest <git@sophieforrest.com> | 2024-09-10 12:41:44 +1200 |
|---|---|---|
| committer | Sophie Forrest <git@sophieforrest.com> | 2024-09-10 12:41:44 +1200 |
| commit | c37c398a30f42da130142f0b2271d74b2c0465e1 (patch) | |
| tree | 74c716dac8db7dc6d64d389097fefa0dcc076544 /src/main.rs | |
| parent | e8406706852969e8e6a3194efb4c069b5ffdbe21 (diff) | |
feat: downloading
Downloading is now done with reqwest.
Diffstat (limited to 'src/main.rs')
| -rw-r--r-- | src/main.rs | 63 |
1 files changed, 52 insertions, 11 deletions
diff --git a/src/main.rs b/src/main.rs index 17a95c7..68e378a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,10 +5,12 @@ //! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly //! parse prerequisites, however. -use std::{collections::HashMap, fs}; +use std::collections::HashMap; +use futures_util::future::join_all; use scraper::Html; use serde::{Deserialize, Serialize}; +use tokio::{fs, task}; use tracing::level_filters::LevelFilter; use vuw_course_scraper::{parse_document, Course}; @@ -20,23 +22,62 @@ struct JsonExport<'a> { courses: HashMap<&'a str, Course<'a>>, } -fn main() { +/// Transforms a URI into a documents text form through a get request. +/// +/// # Panics +/// +/// Panics if the website can't be reached, or the text form isn't available. +async fn into_document(st: String) -> Html { + Html::parse_document( + &reqwest::get(st) + .await + .expect("website should be available") + .text() + .await + .expect("text form should be available"), + ) +} + +#[tokio::main] +async fn main() { tracing_subscriber::fmt() - .with_max_level(LevelFilter::INFO) + .with_max_level(LevelFilter::ERROR) .init(); - let html = &fs::read_to_string("./courses.html").expect("file does not exist"); + let documents: Vec<Html> = + join_all( + ["d", "u", "p", "o"] + .iter() + .map(|directory| { + format!("https://service-web.wgtn.ac.nz/dotnet2/catprint.aspx?d=all&t={directory}2025") + }) + .map(into_document), + ) + .await; + + let all_courses: HashMap<&str, Course> = task::block_in_place(|| { + documents + .iter() + .map(parse_document) + .reduce(|m1, m2| { + let mut mx = m1.clone(); - let document = Html::parse_document(html); + mx.extend(m2); - let course_map = parse_document(&document); + mx + }) + .expect("maps should reduce into one correctly") + }); fs::write( - "./export.json", - simd_json::serde::to_string(&JsonExport { - courses: course_map, - }) - .expect("json should parse correctly"), + "./export-dl.json", + task::block_in_place(|| { + simd_json::serde::to_string(&JsonExport { + courses: all_courses, + }) + .expect("json should parse correctly") + }), ) + .await .expect("file should be writable"); } |