From c37c398a30f42da130142f0b2271d74b2c0465e1 Mon Sep 17 00:00:00 2001 From: Sophie Forrest Date: Tue, 10 Sep 2024 12:41:44 +1200 Subject: feat: downloading Downloading is now done with reqwest. --- src/lib.rs | 16 ++++++++-------- src/main.rs | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/lib.rs b/src/lib.rs index d156c42..1246a76 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,10 +5,7 @@ //! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly //! parse prerequisites, however. -use std::{ - collections::{HashMap, HashSet}, - fmt, -}; +use std::collections::{HashMap, HashSet}; use scraper::{CaseSensitivity, ElementRef, Html, Selector}; use serde::{Deserialize, Serialize}; @@ -28,7 +25,7 @@ pub struct Course<'a> { pub offered: bool, /// Amount of points this course is worth. - pub points: u8, + pub points: f32, /// Courses that must be taken before this course. pub prerequisites: Vec<&'a str>, @@ -100,7 +97,7 @@ impl<'a> Course<'a> { info!("{:?}", points_slice); let points = points_slice - .parse::() + .parse::() .expect("should correctly parse points"); info!("{:?}", points); @@ -169,6 +166,9 @@ impl<'a> Course<'a> { .expect("course reference number should exist") .get(4..) .expect("course reference number digits should start at this indice") + .split_whitespace() + .next() + .expect("course reference number should exist") .parse::() .expect("course reference number should be parseable"), Trimester::try_from( @@ -189,7 +189,7 @@ impl Default for Course<'_> { Self { description: Option::default(), offered: true, - points: u8::default(), + points: f32::default(), prerequisites: Vec::default(), restrictions: Vec::default(), subject_areas: HashSet::default(), @@ -266,7 +266,7 @@ impl TryFrom<&str> for Trimester { "1+2/3" => Ok(Self::OneTwo), "2+3/3" => Ok(Self::TwoThree), "3+1/3" => Ok(Self::ThreeOne), - "1+2+3/3" => Ok(Self::FullYear), + "1+2+3/3" | "2+3+1/3" | "full year" => Ok(Self::FullYear), _ => Err(String::from("Invalid trimester.")), } } diff --git a/src/main.rs b/src/main.rs index 17a95c7..68e378a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,10 +5,12 @@ //! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly //! parse prerequisites, however. -use std::{collections::HashMap, fs}; +use std::collections::HashMap; +use futures_util::future::join_all; use scraper::Html; use serde::{Deserialize, Serialize}; +use tokio::{fs, task}; use tracing::level_filters::LevelFilter; use vuw_course_scraper::{parse_document, Course}; @@ -20,23 +22,62 @@ struct JsonExport<'a> { courses: HashMap<&'a str, Course<'a>>, } -fn main() { +/// Transforms a URI into a documents text form through a get request. +/// +/// # Panics +/// +/// Panics if the website can't be reached, or the text form isn't available. +async fn into_document(st: String) -> Html { + Html::parse_document( + &reqwest::get(st) + .await + .expect("website should be available") + .text() + .await + .expect("text form should be available"), + ) +} + +#[tokio::main] +async fn main() { tracing_subscriber::fmt() - .with_max_level(LevelFilter::INFO) + .with_max_level(LevelFilter::ERROR) .init(); - let html = &fs::read_to_string("./courses.html").expect("file does not exist"); + let documents: Vec = + join_all( + ["d", "u", "p", "o"] + .iter() + .map(|directory| { + format!("https://service-web.wgtn.ac.nz/dotnet2/catprint.aspx?d=all&t={directory}2025") + }) + .map(into_document), + ) + .await; + + let all_courses: HashMap<&str, Course> = task::block_in_place(|| { + documents + .iter() + .map(parse_document) + .reduce(|m1, m2| { + let mut mx = m1.clone(); - let document = Html::parse_document(html); + mx.extend(m2); - let course_map = parse_document(&document); + mx + }) + .expect("maps should reduce into one correctly") + }); fs::write( - "./export.json", - simd_json::serde::to_string(&JsonExport { - courses: course_map, - }) - .expect("json should parse correctly"), + "./export-dl.json", + task::block_in_place(|| { + simd_json::serde::to_string(&JsonExport { + courses: all_courses, + }) + .expect("json should parse correctly") + }), ) + .await .expect("file should be writable"); } -- cgit 1.4.1