summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--crates/brainf_lexer/src/lexer.rs (renamed from src/lexer.rs)58
-rw-r--r--crates/brainf_rs/src/engine.rs (renamed from src/engine.rs)28
-rw-r--r--crates/brainf_rs/src/executor.rs (renamed from src/executor.rs)0
-rw-r--r--crates/brainf_rs/src/lib.rs (renamed from src/lib.rs)7
-rw-r--r--crates/brainf_rs/src/main.rs (renamed from src/main.rs)0
-rw-r--r--crates/brainf_rs/src/parser.rs (renamed from src/parser.rs)39
-rw-r--r--crates/brainf_rs/src/utility.rs (renamed from src/utility.rs)8
7 files changed, 59 insertions, 81 deletions
diff --git a/src/lexer.rs b/crates/brainf_lexer/src/lexer.rs
index 786c873..b95cd87 100644
--- a/src/lexer.rs
+++ b/crates/brainf_lexer/src/lexer.rs
@@ -1,40 +1,60 @@
-//! Lexer for Brainfuck
+//! Lexer implementation using logos.
+
+#![expect(clippy::indexing_slicing)]
+
+use logos::{Lexer, Logos};
 
 /// List of operator codes for the lexer
 /// Note: Any input symbol that is not in this list is a comment
-#[derive(Clone, Copy, Debug)]
-pub enum OperatorCode {
+
+fn loop_callback(lex: &Lexer<Token>) -> (usize, usize) {
+	let span = lex.span();
+
+	(span.start, span.len())
+}
+
+/// List of Tokens for the lexer
+/// Note: Any input symbol that is not in this list is a comment
+#[derive(Clone, Copy, Debug, Logos, PartialEq, Eq)]
+#[logos(skip r"[^<>+\-.,\[\]]+")]
+pub enum Token {
 	/// `>`
 	///
 	/// Increment the data pointer by one (to point to the next cell to the
 	/// right).
+	#[token(">")]
 	IncrementPointer,
 
 	/// `<`
 	///
 	/// Decrement the data pointer by one (to point to the next cell to the
 	/// left).
+	#[token("<")]
 	DecrementPointer,
 
 	/// `+`
 	///
 	/// Increment the byte at the data pointer by one.
+	#[token("+")]
 	IncrementByte,
 
 	/// `-`
 	///
 	/// Decrement the byte at the data pointer by one.
+	#[token("-")]
 	DecrementByte,
 
 	/// `.`
 	///
 	/// Output the byte at the data pointer.
+	#[token(".")]
 	OutputByte,
 
 	/// `,`
 	///
 	/// Accept one byte of input, storing its value in the byte at the data
 	/// pointer.
+	#[token(",")]
 	InputByte,
 
 	/// `[`
@@ -42,38 +62,14 @@ pub enum OperatorCode {
 	/// If the byte at the data pointer is zero, then instead of moving the
 	/// instruction pointer forward to the next command, jump it forward to the
 	/// command after the matching ] command.
-	StartLoop {
-		/// Offset of the bracket in the source.
-		offset: usize,
-	},
+	#[token("[", loop_callback)]
+	StartLoop((usize, usize)),
 
 	/// `]`
 	///
 	/// If the byte at the data pointer is nonzero, then instead of moving the
 	/// instruction pointer forward to the next command, jump it back to the
 	/// command after the matching [ command.
-	EndLoop {
-		/// Offset of the bracket in the source.
-		offset: usize,
-	},
-}
-
-/// Perform lexical analysis on the input brainfuck code
-#[must_use]
-pub fn lex(input: &str) -> Vec<OperatorCode> {
-	input
-		.char_indices()
-		.filter_map(|(i, symbol)| match symbol {
-			'>' => Some(OperatorCode::IncrementPointer),
-			'<' => Some(OperatorCode::DecrementPointer),
-			'+' => Some(OperatorCode::IncrementByte),
-			'-' => Some(OperatorCode::DecrementByte),
-			'.' => Some(OperatorCode::OutputByte),
-			',' => Some(OperatorCode::InputByte),
-			'[' => Some(OperatorCode::StartLoop { offset: i }),
-			']' => Some(OperatorCode::EndLoop { offset: i }),
-			// Any symbol that does not match one of the above is a comment
-			_ => None,
-		})
-		.collect()
+	#[token("]", loop_callback)]
+	EndLoop((usize, usize)),
 }
diff --git a/src/engine.rs b/crates/brainf_rs/src/engine.rs
index 3269dff..e60acaa 100644
--- a/src/engine.rs
+++ b/crates/brainf_rs/src/engine.rs
@@ -3,11 +3,7 @@
 //! This predominantly allows implementation of a [`u16`] executor.
 
 #[cfg(feature = "bigint-engine")]
-use std::io::Cursor;
-use std::io::Read;
-
-#[cfg(feature = "bigint-engine")]
-use byteorder::{BigEndian, ReadBytesExt};
+use byteorder::{NativeEndian, ReadBytesExt};
 use num_traits::{One, Unsigned, WrappingAdd, WrappingSub, Zero};
 use thiserror::Error;
 
@@ -55,11 +51,7 @@ impl Engine for executor::U8 {
 	type TapeInner = u8;
 
 	fn read_byte() -> Result<u8, Error> {
-		let mut input: [u8; 1] = [0; 1];
-
-		std::io::stdin().read_exact(&mut input)?;
-
-		Ok(input[0])
+		Ok(std::io::stdin().read_u8()?)
 	}
 
 	fn write_byte(byte: u8) -> Result<(), Error> {
@@ -74,13 +66,7 @@ impl Engine for executor::U16 {
 	type TapeInner = u16;
 
 	fn read_byte() -> Result<u16, Error> {
-		let mut input: [u8; 2] = [0; 2];
-
-		std::io::stdin().read_exact(&mut input)?;
-
-		let mut reader = Cursor::new(input);
-
-		Ok(reader.read_u16::<BigEndian>()?)
+		Ok(std::io::stdin().read_u16::<NativeEndian>()?)
 	}
 
 	fn write_byte(byte: u16) -> Result<(), Error> {
@@ -98,13 +84,7 @@ impl Engine for executor::U32 {
 	type TapeInner = u32;
 
 	fn read_byte() -> Result<u32, Error> {
-		let mut input: [u8; 4] = [0; 4];
-
-		std::io::stdin().read_exact(&mut input)?;
-
-		let mut reader = Cursor::new(input);
-
-		Ok(reader.read_u32::<BigEndian>()?)
+		Ok(std::io::stdin().read_u32::<NativeEndian>()?)
 	}
 
 	fn write_byte(byte: u32) -> Result<(), Error> {
diff --git a/src/executor.rs b/crates/brainf_rs/src/executor.rs
index c5fff93..c5fff93 100644
--- a/src/executor.rs
+++ b/crates/brainf_rs/src/executor.rs
diff --git a/src/lib.rs b/crates/brainf_rs/src/lib.rs
index 71c5a99..f5c8987 100644
--- a/src/lib.rs
+++ b/crates/brainf_rs/src/lib.rs
@@ -83,13 +83,12 @@ extern crate test;
 
 mod engine;
 pub mod executor;
-pub mod lexer;
 pub mod parser;
 #[cfg(feature = "utilities")]
 pub mod utility;
 
+pub use brainf_lexer::{lex, Token};
 pub use executor::{U16 as ExecutorU16, U32 as ExecutorU32, U8 as ExecutorU8};
-pub use lexer::{lex, OperatorCode};
 use miette::Diagnostic;
 pub use parser::{parse, Instruction};
 use thiserror::Error;
@@ -101,6 +100,10 @@ pub enum Error {
 	#[error(transparent)]
 	Io(#[from] std::io::Error),
 
+	/// Error occurred while lexing the input.
+	#[error(transparent)]
+	Lexer(#[from] brainf_lexer::Error),
+
 	/// An error that occurred while parsing Brainfuck code.
 	#[diagnostic(transparent)]
 	#[error(transparent)]
diff --git a/src/main.rs b/crates/brainf_rs/src/main.rs
index 13868a6..13868a6 100644
--- a/src/main.rs
+++ b/crates/brainf_rs/src/main.rs
diff --git a/src/parser.rs b/crates/brainf_rs/src/parser.rs
index e639516..c9b3246 100644
--- a/src/parser.rs
+++ b/crates/brainf_rs/src/parser.rs
@@ -1,11 +1,10 @@
 //! Parser implementation for Brainfuck. Parses operator codes into instruction
 //! sets.
 
+use brainf_lexer::Token;
 use miette::{Diagnostic, SourceSpan};
 use thiserror::Error;
 
-use crate::lexer::OperatorCode;
-
 /// Parsed instructions for Brainfuck.
 #[derive(Clone, Debug)]
 pub enum Instruction {
@@ -102,49 +101,49 @@ pub enum Error {
 /// This function will return an error if a loop is encountered with no
 /// beginning, a loop is encountered with no ending, or if the parser attempts
 /// to slice out of bounds.
-pub fn parse(src: &str, operator_codes: &[OperatorCode]) -> Result<Vec<Instruction>, Error> {
+pub fn parse(src: &str, tokens: &[Token]) -> Result<Vec<Instruction>, Error> {
 	let mut program: Vec<Instruction> = Vec::new();
 	let mut loop_stack: i32 = 0;
 	let mut loop_start = 0;
-	let mut loop_source_offset: usize = 0;
+	let mut loop_span: (usize, usize) = (0, 0);
 
-	operator_codes
+	tokens
 		.iter()
 		.enumerate()
 		.try_for_each(|(i, operator_code)| -> Result<(), Error> {
 			match (loop_stack, *operator_code) {
-				(0i32, OperatorCode::StartLoop { offset }) => {
+				(0i32, Token::StartLoop(span)) => {
 					loop_start = i;
-					loop_source_offset = offset;
+					loop_span = span;
 					loop_stack += 1i32;
 				}
 				(0i32, _) => {
 					if let Some(instruction) = match *operator_code {
-						OperatorCode::IncrementPointer => Some(Instruction::IncrementPointer),
-						OperatorCode::DecrementPointer => Some(Instruction::DecrementPointer),
-						OperatorCode::IncrementByte => Some(Instruction::IncrementByte),
-						OperatorCode::DecrementByte => Some(Instruction::DecrementByte),
-						OperatorCode::OutputByte => Some(Instruction::OutputByte),
-						OperatorCode::InputByte => Some(Instruction::InputByte),
-						OperatorCode::EndLoop { offset } => {
+						Token::IncrementPointer => Some(Instruction::IncrementPointer),
+						Token::DecrementPointer => Some(Instruction::DecrementPointer),
+						Token::IncrementByte => Some(Instruction::IncrementByte),
+						Token::DecrementByte => Some(Instruction::DecrementByte),
+						Token::OutputByte => Some(Instruction::OutputByte),
+						Token::InputByte => Some(Instruction::InputByte),
+						Token::EndLoop(span) => {
 							return Err(Error::LoopWithNoBeginning {
 								input: src.to_owned(),
-								loop_src: (offset, 1).into(),
+								loop_src: span.into(),
 							})
 						}
 						// We don't care about this variant as it is handled in a subsequent arm
-						OperatorCode::StartLoop { .. } => None,
+						Token::StartLoop { .. } => None,
 					} {
 						program.push(instruction);
 					}
 				}
-				(_, OperatorCode::StartLoop { .. }) => loop_stack += 1i32,
-				(_, OperatorCode::EndLoop { .. }) => {
+				(_, Token::StartLoop { .. }) => loop_stack += 1i32,
+				(_, Token::EndLoop { .. }) => {
 					loop_stack -= 1i32;
 					if loop_stack == 0i32 {
 						let loop_program = parse(
 							src,
-							match operator_codes.get(loop_start + 1..i) {
+							match tokens.get(loop_start + 1..i) {
 								Some(value) => value,
 								None => return Err(Error::SliceOutOfBounds(loop_start + 1..i)),
 							},
@@ -164,7 +163,7 @@ pub fn parse(src: &str, operator_codes: &[OperatorCode]) -> Result<Vec<Instructi
 	} else {
 		Err(Error::LoopWithNoEnding {
 			input: src.to_owned(),
-			loop_src: (loop_source_offset, 1).into(),
+			loop_src: loop_span.into(),
 		})
 	}
 }
diff --git a/src/utility.rs b/crates/brainf_rs/src/utility.rs
index 514343c..c83dd42 100644
--- a/src/utility.rs
+++ b/crates/brainf_rs/src/utility.rs
@@ -18,9 +18,9 @@ pub fn execute_from_file<E: Engine>(
 ) -> Result<(), Error> {
 	let input = fs_err::read_to_string(path.as_ref())?;
 
-	let operator_codes = lex(&input);
+	let tokens = lex(&input)?;
 
-	let instructions = parse(&input, &operator_codes)?;
+	let instructions = parse(&input, &tokens)?;
 
 	let mut data_pointer = 0;
 
@@ -38,9 +38,9 @@ pub fn execute_from_file<E: Engine>(
 /// execution fails. See documentation for [`crate::parser::parse`] and
 /// [`crate::executor::execute`].
 pub fn execute_from_str<E: Engine>(input: &str, tape: &mut [E::TapeInner]) -> Result<(), Error> {
-	let operator_codes = lex(input);
+	let tokens = lex(input)?;
 
-	let instructions = parse(input, &operator_codes)?;
+	let instructions = parse(input, &tokens)?;
 
 	let mut data_pointer = 0;