// SPDX-License-Identifier: AGPL-3.0-or-later

//! # VUW Course scraper
//!
//! This is a simple program capable of parsing VUWs courses from the registry. It cannot correctly
//! parse prerequisites, however.

use std::collections::{HashMap, HashSet};

use scraper::{CaseSensitivity, ElementRef, Html, Selector};
use serde::{Deserialize, Serialize};
use tracing::{debug, info};

/// Slice used for splitting requirements for parsing.
const SPLIT_SLICE: &[char] = &[';', ','];

/// A VUW course, along with all relevant data.
#[derive(Clone, Debug, Deserialize, Serialize)]
#[non_exhaustive]
pub struct Course<'a> {
	/// Description of the course.
	pub description: Option<&'a str>,

	/// Whether this course is offered in the upcoming year.
	pub offered: bool,

	/// Amount of points this course is worth.
	pub points: f32,

	/// Courses that must be taken before this course.
	pub prerequisites: Vec<&'a str>,

	/// Courses that cannot be taken if you take this course.
	pub restrictions: Vec<&'a str>,

	/// Subject areas this course belongs to.
	pub subject_areas: HashSet<&'a str>,

	/// Subtitle of this course, its longer name.
	pub subtitle: &'a str,

	/// Timetable of this course, includes trimesters and CRNs.
	pub timetable: Vec<CourseOffering>,

	/// Title of this course, also known as the code.
	pub title: &'a str,
}

impl<'a> Course<'a> {
	/// Parses the courde id.
	///
	/// # Panics
	///
	/// Panics if string is sliced in the middle of a character.
	pub fn parse_courseid(&mut self, elem: ElementRef<'a>) {
		elem.children().for_each(|child| {
			child.children().for_each(|c| {
				if let Some(text) = c.value().as_text() {
					// The actual text we're looking for
					let text: &str = text.trim();

					self.title = text;
				} else if let Some(text) = c
					.first_child()
					.and_then(|node| node.value().as_text().map(|text| &**text))
				{
					if let Some((indice, _char)) = text.char_indices().nth(1) {
						// Skip over "-"
						self.subtitle = text.get(indice..).expect("indice should be valid").trim();
					}
				}
			});
		});
	}

	/// Parses the course points, prerequisites, and restrictions from the given element.
	///
	/// # Panics
	///
	/// Panics if parsing fails, or a slice is made in the middle of a character.
	pub fn parse_coursepoints(&mut self, elem: ElementRef<'a>) {
		// Parse course points, prerequisites, and exclusions.
		let details = elem
			.first_child()
			.and_then(|el| el.first_child()?.value().as_text());

		if let Some(details) = details {
			let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect();

			info!("{:#?}", &details_split);

			// Occasionally there is extra whitespace here, so this needs to be trimmed.
			let points = details_split.first().expect("split should exist").trim();
			debug!("{:?}", points);

			let points_slice = &points.get(..points.len() - 4).expect("should be at indice");
			info!("{:?}", points_slice);

			let points = points_slice
				.parse::<f32>()
				.expect("should correctly parse points");
			info!("{:?}", points);

			self.points = points;

			if let Some(requirements) = details_split.last().map(|s| s.trim()) {
				if requirements.starts_with("(X)") {
					self.restrictions = requirements
						.get(4..)
						.expect("should be at indice")
						.split(SPLIT_SLICE)
						.map(str::trim)
						.collect::<Vec<&str>>();
				} else if requirements.starts_with("(P)") {
					let requirements = &requirements
						.get(4..)
						.expect("should be at indice")
						.split(" (X) ")
						.collect::<Vec<&str>>();

					self.prerequisites = requirements
						.first()
						.map(|s| {
							s.split(SPLIT_SLICE)
								.map(str::trim)
								.filter(|s| !s.is_empty())
								.collect::<Vec<&str>>()
						})
						.unwrap_or_default();

					if requirements.len() > 1 {
						self.restrictions = requirements
							.last()
							.map(|s| s.split(SPLIT_SLICE).map(str::trim).collect::<Vec<&str>>())
							.unwrap_or_default();
					}
				} else {
					self.prerequisites = vec![requirements];
				}

				info!("{requirements}");
			}
		}
	}

	/// Parses the course timetable.
	///
	/// # Panics
	///
	/// Panics if CRN doesn't exist, trimester doesn't exist, slice is made in the middle of
	/// a byte, CRN isn't parseable, or trimester isn't parseable.
	pub fn parse_timetable(&mut self, elem: ElementRef<'a>) {
		// Parse timetable / CRNs.
		let details = elem
			.first_child()
			.and_then(|el| el.first_child()?.value().as_text());

		if let Some(details) = details {
			let details_split: Vec<&str> = details.split(" \u{2022} ").take(2).collect();

			info!("{:#?}", &details_split);

			let offering = CourseOffering::new(
				details_split
					.last()
					.expect("course reference number should exist")
					.get(4..)
					.expect("course reference number digits should start at this indice")
					.split_whitespace()
					.next()
					.expect("course reference number should exist")
					.parse::<u16>()
					.expect("course reference number should be parseable"),
				Trimester::try_from(
					*details_split
						.first()
						.expect("trimester element should exist"),
				)
				.expect("should be parseable into a trimester"),
			);

			self.timetable.push(offering);
		}
	}
}

impl Default for Course<'_> {
	fn default() -> Self {
		Self {
			description: Option::default(),
			offered: true,
			points: f32::default(),
			prerequisites: Vec::default(),
			restrictions: Vec::default(),
			subject_areas: HashSet::default(),
			subtitle: "",
			timetable: Vec::default(),
			title: "",
		}
	}
}

/// A course offering, includes the CRN and [`Trimester`].
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[non_exhaustive]
pub struct CourseOffering {
	/// Reference number for this coursem e.g. 11723.
	pub course_reference_number: u16,

	/// Trimester this course is offered in.
	pub trimester: Trimester,
}

impl CourseOffering {
	/// Creates a new [`CourseOffering`].
	#[must_use]
	pub const fn new(course_reference_number: u16, trimester: Trimester) -> Self {
		Self {
			course_reference_number,
			trimester,
		}
	}
}

/// Trimester information Victoria University of Wellington offers.
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, PartialOrd, Ord, Serialize)]
pub enum Trimester {
	/// Trimester one.
	One,

	/// Trimester two.
	Two,

	/// Trimester three.
	Three,

	/// Block dates. See course page for more information.
	BlockDates,

	/// Part year. See course page for more information.
	PartYear,

	/// Trimesters one and two.
	OneTwo,

	/// Trimesters two and three.
	TwoThree,

	/// Trimesters three and one.
	ThreeOne,

	/// Trimesters one, two, and three.
	FullYear,
}

impl TryFrom<&str> for Trimester {
	type Error = String;

	fn try_from(value: &str) -> Result<Self, Self::Error> {
		match value {
			"1/3" => Ok(Self::One),
			"2/3" => Ok(Self::Two),
			"3/3" => Ok(Self::Three),
			"block dates/3" => Ok(Self::BlockDates),
			"part year/3" => Ok(Self::PartYear),
			"1+2/3" => Ok(Self::OneTwo),
			"2+3/3" => Ok(Self::TwoThree),
			"3+1/3" => Ok(Self::ThreeOne),
			"1+2+3/3" | "2+3+1/3" | "full year" => Ok(Self::FullYear),
			_ => Err(String::from("Invalid trimester.")),
		}
	}
}

/// Parses a [`Html`] document into a [`HashMap`] of courses.
///
/// # Panics
///
/// Panics if [`Selector`] fails to parse.
#[must_use]
pub fn parse_document(document: &Html) -> HashMap<&str, Course<'_>> {
	let mut course_map: HashMap<&str, Course> = HashMap::new();

	let mut subject_area = "";
	let mut working_course = Course::default();

	// PERF: Could we gain a meaningful speed boost by splitting this into chunks of each course?
	for elem in document.select(&Selector::parse("p").expect("selector should always be valid")) {
		let elem_value = elem.value();

		if elem_value.has_class("courseid", CaseSensitivity::AsciiCaseInsensitive) {
			course_map
				.entry(working_course.title)
				.and_modify(|c| {
					c.subject_areas.insert(subject_area);
				})
				.or_insert(working_course);
			working_course = Course::default();
			working_course.subject_areas.insert(subject_area);

			working_course.parse_courseid(elem);
		} else if elem_value.has_class("notoffered", CaseSensitivity::CaseSensitive) {
			working_course.offered = false;
		} else if elem_value.has_class("subjectarea", CaseSensitivity::CaseSensitive) {
			if let Some(subject_area_name) = elem.first_child().and_then(|child| {
				child
					.first_child()
					.and_then(|nexted_child| nexted_child.value().as_text())
			}) {
				subject_area = &**subject_area_name;
			}
		} else if elem_value.has_class("subjectsbody", CaseSensitivity::CaseSensitive) {
			let description = elem
				.first_child()
				.and_then(|el| el.first_child()?.value().as_text())
				.map(|t| &**t);

			working_course.description = description;
		} else if elem_value.has_class("timetable", CaseSensitivity::CaseSensitive) {
			working_course.parse_timetable(elem);
		} else if elem_value.has_class("coursepoints", CaseSensitivity::CaseSensitive) {
			working_course.parse_coursepoints(elem);
		}
	}

	course_map.remove("");

	course_map
}