summary refs log tree commit diff
path: root/crates
diff options
context:
space:
mode:
authorSophie Forrest <git@sophieforrest.com>2024-08-30 23:35:45 +1200
committerSophie Forrest <git@sophieforrest.com>2024-08-30 23:35:45 +1200
commit3c163eabc78ddbd26bb250ef5ad6da28cd61adc6 (patch)
tree58e17534e1db18813554d4fb6e67020f898b655d /crates
parent17b78f8cb127817b93f7e6ced7e55d8748806a80 (diff)
feat: split engine into crates
Diffstat (limited to '')
-rw-r--r--crates/brainf_lexer/Cargo.toml8
-rw-r--r--crates/brainf_lexer/src/lexer.rs (renamed from src/lexer.rs)58
-rw-r--r--crates/brainf_lexer/src/lib.rs105
-rw-r--r--crates/brainf_rs/Cargo.toml22
-rw-r--r--crates/brainf_rs/src/engine.rs (renamed from src/engine.rs)28
-rw-r--r--crates/brainf_rs/src/executor.rs (renamed from src/executor.rs)0
-rw-r--r--crates/brainf_rs/src/lib.rs (renamed from src/lib.rs)7
-rw-r--r--crates/brainf_rs/src/main.rs (renamed from src/main.rs)0
-rw-r--r--crates/brainf_rs/src/parser.rs (renamed from src/parser.rs)39
-rw-r--r--crates/brainf_rs/src/utility.rs (renamed from src/utility.rs)8
10 files changed, 194 insertions, 81 deletions
diff --git a/crates/brainf_lexer/Cargo.toml b/crates/brainf_lexer/Cargo.toml
new file mode 100644
index 0000000..58be7a5
--- /dev/null
+++ b/crates/brainf_lexer/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "brainf_lexer"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+logos = "0.13.0"
+thiserror = "1.0.44"
diff --git a/src/lexer.rs b/crates/brainf_lexer/src/lexer.rs
index 786c873..b95cd87 100644
--- a/src/lexer.rs
+++ b/crates/brainf_lexer/src/lexer.rs
@@ -1,40 +1,60 @@
-//! Lexer for Brainfuck
+//! Lexer implementation using logos.
+
+#![expect(clippy::indexing_slicing)]
+
+use logos::{Lexer, Logos};
 
 /// List of operator codes for the lexer
 /// Note: Any input symbol that is not in this list is a comment
-#[derive(Clone, Copy, Debug)]
-pub enum OperatorCode {
+
+fn loop_callback(lex: &Lexer<Token>) -> (usize, usize) {
+	let span = lex.span();
+
+	(span.start, span.len())
+}
+
+/// List of Tokens for the lexer
+/// Note: Any input symbol that is not in this list is a comment
+#[derive(Clone, Copy, Debug, Logos, PartialEq, Eq)]
+#[logos(skip r"[^<>+\-.,\[\]]+")]
+pub enum Token {
 	/// `>`
 	///
 	/// Increment the data pointer by one (to point to the next cell to the
 	/// right).
+	#[token(">")]
 	IncrementPointer,
 
 	/// `<`
 	///
 	/// Decrement the data pointer by one (to point to the next cell to the
 	/// left).
+	#[token("<")]
 	DecrementPointer,
 
 	/// `+`
 	///
 	/// Increment the byte at the data pointer by one.
+	#[token("+")]
 	IncrementByte,
 
 	/// `-`
 	///
 	/// Decrement the byte at the data pointer by one.
+	#[token("-")]
 	DecrementByte,
 
 	/// `.`
 	///
 	/// Output the byte at the data pointer.
+	#[token(".")]
 	OutputByte,
 
 	/// `,`
 	///
 	/// Accept one byte of input, storing its value in the byte at the data
 	/// pointer.
+	#[token(",")]
 	InputByte,
 
 	/// `[`
@@ -42,38 +62,14 @@ pub enum OperatorCode {
 	/// If the byte at the data pointer is zero, then instead of moving the
 	/// instruction pointer forward to the next command, jump it forward to the
 	/// command after the matching ] command.
-	StartLoop {
-		/// Offset of the bracket in the source.
-		offset: usize,
-	},
+	#[token("[", loop_callback)]
+	StartLoop((usize, usize)),
 
 	/// `]`
 	///
 	/// If the byte at the data pointer is nonzero, then instead of moving the
 	/// instruction pointer forward to the next command, jump it back to the
 	/// command after the matching [ command.
-	EndLoop {
-		/// Offset of the bracket in the source.
-		offset: usize,
-	},
-}
-
-/// Perform lexical analysis on the input brainfuck code
-#[must_use]
-pub fn lex(input: &str) -> Vec<OperatorCode> {
-	input
-		.char_indices()
-		.filter_map(|(i, symbol)| match symbol {
-			'>' => Some(OperatorCode::IncrementPointer),
-			'<' => Some(OperatorCode::DecrementPointer),
-			'+' => Some(OperatorCode::IncrementByte),
-			'-' => Some(OperatorCode::DecrementByte),
-			'.' => Some(OperatorCode::OutputByte),
-			',' => Some(OperatorCode::InputByte),
-			'[' => Some(OperatorCode::StartLoop { offset: i }),
-			']' => Some(OperatorCode::EndLoop { offset: i }),
-			// Any symbol that does not match one of the above is a comment
-			_ => None,
-		})
-		.collect()
+	#[token("]", loop_callback)]
+	EndLoop((usize, usize)),
 }
diff --git a/crates/brainf_lexer/src/lib.rs b/crates/brainf_lexer/src/lib.rs
new file mode 100644
index 0000000..7f6e5be
--- /dev/null
+++ b/crates/brainf_lexer/src/lib.rs
@@ -0,0 +1,105 @@
+#![feature(lint_reasons)]
+#![deny(clippy::complexity)]
+#![deny(clippy::nursery)]
+#![deny(clippy::pedantic)]
+#![deny(clippy::perf)]
+#![deny(clippy::suspicious)]
+#![deny(clippy::alloc_instead_of_core)]
+#![deny(clippy::as_underscore)]
+#![deny(clippy::clone_on_ref_ptr)]
+#![deny(clippy::create_dir)]
+#![warn(clippy::dbg_macro)]
+#![deny(clippy::default_numeric_fallback)]
+#![deny(clippy::default_union_representation)]
+#![deny(clippy::deref_by_slicing)]
+#![deny(clippy::empty_structs_with_brackets)]
+#![deny(clippy::exit)]
+#![deny(clippy::expect_used)]
+#![deny(clippy::filetype_is_file)]
+#![deny(clippy::fn_to_numeric_cast)]
+#![deny(clippy::format_push_string)]
+#![deny(clippy::get_unwrap)]
+#![deny(clippy::if_then_some_else_none)]
+#![allow(
+	clippy::implicit_return,
+	reason = "returns should be done implicitly, not explicitly"
+)]
+#![deny(clippy::indexing_slicing)]
+#![deny(clippy::large_include_file)]
+#![deny(clippy::let_underscore_must_use)]
+#![deny(clippy::lossy_float_literal)]
+#![deny(clippy::map_err_ignore)]
+#![deny(clippy::mem_forget)]
+#![deny(clippy::missing_docs_in_private_items)]
+#![deny(clippy::missing_trait_methods)]
+#![deny(clippy::mod_module_files)]
+#![deny(clippy::multiple_inherent_impl)]
+#![deny(clippy::mutex_atomic)]
+#![deny(clippy::needless_return)]
+#![deny(clippy::non_ascii_literal)]
+#![deny(clippy::panic_in_result_fn)]
+#![deny(clippy::pattern_type_mismatch)]
+#![deny(clippy::rc_buffer)]
+#![deny(clippy::rc_mutex)]
+#![deny(clippy::rest_pat_in_fully_bound_structs)]
+#![deny(clippy::same_name_method)]
+#![deny(clippy::separated_literal_suffix)]
+#![deny(clippy::str_to_string)]
+#![deny(clippy::string_add)]
+#![deny(clippy::string_slice)]
+#![deny(clippy::string_to_string)]
+#![allow(
+	clippy::tabs_in_doc_comments,
+	reason = "tabs are preferred for this project"
+)]
+#![deny(clippy::try_err)]
+#![deny(clippy::undocumented_unsafe_blocks)]
+#![deny(clippy::unnecessary_self_imports)]
+#![deny(clippy::unneeded_field_pattern)]
+#![deny(clippy::unwrap_in_result)]
+#![deny(clippy::unwrap_used)]
+#![warn(clippy::use_debug)]
+#![deny(clippy::verbose_file_reads)]
+#![deny(clippy::wildcard_dependencies)]
+#![deny(clippy::wildcard_enum_match_arm)]
+#![deny(missing_copy_implementations)]
+#![deny(missing_debug_implementations)]
+#![deny(missing_docs)]
+#![deny(single_use_lifetimes)]
+#![deny(unsafe_code)]
+#![deny(unused)]
+
+//! # `brainf_lexer`
+//!
+//! Implementation of a Brainfuck lexer in Rust.
+
+mod lexer;
+
+pub use lexer::Token;
+use logos::Logos;
+use thiserror::Error;
+
+/// Error type for lexer.
+#[derive(Clone, Copy, Debug, Error)]
+pub enum Error {
+	/// Logos was unable to lex part of the input.
+	#[error("lexer was unable to lex input")]
+	LexingError,
+}
+
+/// Lexes the Brainfuck input, returning a Vec of Tokens.
+///
+/// # Errors
+///
+/// This function will return an error if the lexer is unable to lex one or more
+/// of the input characters.
+pub fn lex(input: &str) -> Result<Vec<Token>, Error> {
+	lexer::Token::lexer(input).try_fold(Vec::new(), |mut arr, result| {
+		result
+			.map_or(Err(()), |token| {
+				arr.push(token);
+				Ok(arr)
+			})
+			.map_err(|_err| Error::LexingError)
+	})
+}
diff --git a/crates/brainf_rs/Cargo.toml b/crates/brainf_rs/Cargo.toml
new file mode 100644
index 0000000..5520652
--- /dev/null
+++ b/crates/brainf_rs/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "brainf_rs"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+brainf_lexer = { path = "../brainf_lexer" }
+byteorder = { optional = true, version = "1.4.3" }
+clap = { features = ["derive"], version = "4.3.21" }
+num-traits = "0.2.16"
+fs-err = "2.9.0"
+logos = "0.13.0"
+miette = { features = ["fancy"], version = "5.10.0" }
+thiserror = "1.0.44"
+widestring = { default-features = false, optional = true, version = "1.0.2" }
+
+[features]
+default = ["engine-u16", "engine-u32", "utilities"]
+bigint-engine = ["dep:byteorder", "dep:widestring"]
+engine-u16 = ["bigint-engine"]
+engine-u32 = ["bigint-engine"]
+utilities = []
diff --git a/src/engine.rs b/crates/brainf_rs/src/engine.rs
index 3269dff..e60acaa 100644
--- a/src/engine.rs
+++ b/crates/brainf_rs/src/engine.rs
@@ -3,11 +3,7 @@
 //! This predominantly allows implementation of a [`u16`] executor.
 
 #[cfg(feature = "bigint-engine")]
-use std::io::Cursor;
-use std::io::Read;
-
-#[cfg(feature = "bigint-engine")]
-use byteorder::{BigEndian, ReadBytesExt};
+use byteorder::{NativeEndian, ReadBytesExt};
 use num_traits::{One, Unsigned, WrappingAdd, WrappingSub, Zero};
 use thiserror::Error;
 
@@ -55,11 +51,7 @@ impl Engine for executor::U8 {
 	type TapeInner = u8;
 
 	fn read_byte() -> Result<u8, Error> {
-		let mut input: [u8; 1] = [0; 1];
-
-		std::io::stdin().read_exact(&mut input)?;
-
-		Ok(input[0])
+		Ok(std::io::stdin().read_u8()?)
 	}
 
 	fn write_byte(byte: u8) -> Result<(), Error> {
@@ -74,13 +66,7 @@ impl Engine for executor::U16 {
 	type TapeInner = u16;
 
 	fn read_byte() -> Result<u16, Error> {
-		let mut input: [u8; 2] = [0; 2];
-
-		std::io::stdin().read_exact(&mut input)?;
-
-		let mut reader = Cursor::new(input);
-
-		Ok(reader.read_u16::<BigEndian>()?)
+		Ok(std::io::stdin().read_u16::<NativeEndian>()?)
 	}
 
 	fn write_byte(byte: u16) -> Result<(), Error> {
@@ -98,13 +84,7 @@ impl Engine for executor::U32 {
 	type TapeInner = u32;
 
 	fn read_byte() -> Result<u32, Error> {
-		let mut input: [u8; 4] = [0; 4];
-
-		std::io::stdin().read_exact(&mut input)?;
-
-		let mut reader = Cursor::new(input);
-
-		Ok(reader.read_u32::<BigEndian>()?)
+		Ok(std::io::stdin().read_u32::<NativeEndian>()?)
 	}
 
 	fn write_byte(byte: u32) -> Result<(), Error> {
diff --git a/src/executor.rs b/crates/brainf_rs/src/executor.rs
index c5fff93..c5fff93 100644
--- a/src/executor.rs
+++ b/crates/brainf_rs/src/executor.rs
diff --git a/src/lib.rs b/crates/brainf_rs/src/lib.rs
index 71c5a99..f5c8987 100644
--- a/src/lib.rs
+++ b/crates/brainf_rs/src/lib.rs
@@ -83,13 +83,12 @@ extern crate test;
 
 mod engine;
 pub mod executor;
-pub mod lexer;
 pub mod parser;
 #[cfg(feature = "utilities")]
 pub mod utility;
 
+pub use brainf_lexer::{lex, Token};
 pub use executor::{U16 as ExecutorU16, U32 as ExecutorU32, U8 as ExecutorU8};
-pub use lexer::{lex, OperatorCode};
 use miette::Diagnostic;
 pub use parser::{parse, Instruction};
 use thiserror::Error;
@@ -101,6 +100,10 @@ pub enum Error {
 	#[error(transparent)]
 	Io(#[from] std::io::Error),
 
+	/// Error occurred while lexing the input.
+	#[error(transparent)]
+	Lexer(#[from] brainf_lexer::Error),
+
 	/// An error that occurred while parsing Brainfuck code.
 	#[diagnostic(transparent)]
 	#[error(transparent)]
diff --git a/src/main.rs b/crates/brainf_rs/src/main.rs
index 13868a6..13868a6 100644
--- a/src/main.rs
+++ b/crates/brainf_rs/src/main.rs
diff --git a/src/parser.rs b/crates/brainf_rs/src/parser.rs
index e639516..c9b3246 100644
--- a/src/parser.rs
+++ b/crates/brainf_rs/src/parser.rs
@@ -1,11 +1,10 @@
 //! Parser implementation for Brainfuck. Parses operator codes into instruction
 //! sets.
 
+use brainf_lexer::Token;
 use miette::{Diagnostic, SourceSpan};
 use thiserror::Error;
 
-use crate::lexer::OperatorCode;
-
 /// Parsed instructions for Brainfuck.
 #[derive(Clone, Debug)]
 pub enum Instruction {
@@ -102,49 +101,49 @@ pub enum Error {
 /// This function will return an error if a loop is encountered with no
 /// beginning, a loop is encountered with no ending, or if the parser attempts
 /// to slice out of bounds.
-pub fn parse(src: &str, operator_codes: &[OperatorCode]) -> Result<Vec<Instruction>, Error> {
+pub fn parse(src: &str, tokens: &[Token]) -> Result<Vec<Instruction>, Error> {
 	let mut program: Vec<Instruction> = Vec::new();
 	let mut loop_stack: i32 = 0;
 	let mut loop_start = 0;
-	let mut loop_source_offset: usize = 0;
+	let mut loop_span: (usize, usize) = (0, 0);
 
-	operator_codes
+	tokens
 		.iter()
 		.enumerate()
 		.try_for_each(|(i, operator_code)| -> Result<(), Error> {
 			match (loop_stack, *operator_code) {
-				(0i32, OperatorCode::StartLoop { offset }) => {
+				(0i32, Token::StartLoop(span)) => {
 					loop_start = i;
-					loop_source_offset = offset;
+					loop_span = span;
 					loop_stack += 1i32;
 				}
 				(0i32, _) => {
 					if let Some(instruction) = match *operator_code {
-						OperatorCode::IncrementPointer => Some(Instruction::IncrementPointer),
-						OperatorCode::DecrementPointer => Some(Instruction::DecrementPointer),
-						OperatorCode::IncrementByte => Some(Instruction::IncrementByte),
-						OperatorCode::DecrementByte => Some(Instruction::DecrementByte),
-						OperatorCode::OutputByte => Some(Instruction::OutputByte),
-						OperatorCode::InputByte => Some(Instruction::InputByte),
-						OperatorCode::EndLoop { offset } => {
+						Token::IncrementPointer => Some(Instruction::IncrementPointer),
+						Token::DecrementPointer => Some(Instruction::DecrementPointer),
+						Token::IncrementByte => Some(Instruction::IncrementByte),
+						Token::DecrementByte => Some(Instruction::DecrementByte),
+						Token::OutputByte => Some(Instruction::OutputByte),
+						Token::InputByte => Some(Instruction::InputByte),
+						Token::EndLoop(span) => {
 							return Err(Error::LoopWithNoBeginning {
 								input: src.to_owned(),
-								loop_src: (offset, 1).into(),
+								loop_src: span.into(),
 							})
 						}
 						// We don't care about this variant as it is handled in a subsequent arm
-						OperatorCode::StartLoop { .. } => None,
+						Token::StartLoop { .. } => None,
 					} {
 						program.push(instruction);
 					}
 				}
-				(_, OperatorCode::StartLoop { .. }) => loop_stack += 1i32,
-				(_, OperatorCode::EndLoop { .. }) => {
+				(_, Token::StartLoop { .. }) => loop_stack += 1i32,
+				(_, Token::EndLoop { .. }) => {
 					loop_stack -= 1i32;
 					if loop_stack == 0i32 {
 						let loop_program = parse(
 							src,
-							match operator_codes.get(loop_start + 1..i) {
+							match tokens.get(loop_start + 1..i) {
 								Some(value) => value,
 								None => return Err(Error::SliceOutOfBounds(loop_start + 1..i)),
 							},
@@ -164,7 +163,7 @@ pub fn parse(src: &str, operator_codes: &[OperatorCode]) -> Result<Vec<Instructi
 	} else {
 		Err(Error::LoopWithNoEnding {
 			input: src.to_owned(),
-			loop_src: (loop_source_offset, 1).into(),
+			loop_src: loop_span.into(),
 		})
 	}
 }
diff --git a/src/utility.rs b/crates/brainf_rs/src/utility.rs
index 514343c..c83dd42 100644
--- a/src/utility.rs
+++ b/crates/brainf_rs/src/utility.rs
@@ -18,9 +18,9 @@ pub fn execute_from_file<E: Engine>(
 ) -> Result<(), Error> {
 	let input = fs_err::read_to_string(path.as_ref())?;
 
-	let operator_codes = lex(&input);
+	let tokens = lex(&input)?;
 
-	let instructions = parse(&input, &operator_codes)?;
+	let instructions = parse(&input, &tokens)?;
 
 	let mut data_pointer = 0;
 
@@ -38,9 +38,9 @@ pub fn execute_from_file<E: Engine>(
 /// execution fails. See documentation for [`crate::parser::parse`] and
 /// [`crate::executor::execute`].
 pub fn execute_from_str<E: Engine>(input: &str, tape: &mut [E::TapeInner]) -> Result<(), Error> {
-	let operator_codes = lex(input);
+	let tokens = lex(input)?;
 
-	let instructions = parse(input, &operator_codes)?;
+	let instructions = parse(input, &tokens)?;
 
 	let mut data_pointer = 0;