diff options
| author | Sophie Forrest <git@sophieforrest.com> | 2024-08-30 23:35:45 +1200 |
|---|---|---|
| committer | Sophie Forrest <git@sophieforrest.com> | 2024-08-30 23:35:45 +1200 |
| commit | 3c163eabc78ddbd26bb250ef5ad6da28cd61adc6 (patch) | |
| tree | 58e17534e1db18813554d4fb6e67020f898b655d /src | |
| parent | 17b78f8cb127817b93f7e6ced7e55d8748806a80 (diff) | |
feat: split engine into crates
Diffstat (limited to '')
| -rw-r--r-- | crates/brainf_lexer/src/lexer.rs (renamed from src/lexer.rs) | 58 | ||||
| -rw-r--r-- | crates/brainf_rs/src/engine.rs (renamed from src/engine.rs) | 28 | ||||
| -rw-r--r-- | crates/brainf_rs/src/executor.rs (renamed from src/executor.rs) | 0 | ||||
| -rw-r--r-- | crates/brainf_rs/src/lib.rs (renamed from src/lib.rs) | 7 | ||||
| -rw-r--r-- | crates/brainf_rs/src/main.rs (renamed from src/main.rs) | 0 | ||||
| -rw-r--r-- | crates/brainf_rs/src/parser.rs (renamed from src/parser.rs) | 39 | ||||
| -rw-r--r-- | crates/brainf_rs/src/utility.rs (renamed from src/utility.rs) | 8 |
7 files changed, 59 insertions, 81 deletions
diff --git a/src/lexer.rs b/crates/brainf_lexer/src/lexer.rs index 786c873..b95cd87 100644 --- a/src/lexer.rs +++ b/crates/brainf_lexer/src/lexer.rs @@ -1,40 +1,60 @@ -//! Lexer for Brainfuck +//! Lexer implementation using logos. + +#![expect(clippy::indexing_slicing)] + +use logos::{Lexer, Logos}; /// List of operator codes for the lexer /// Note: Any input symbol that is not in this list is a comment -#[derive(Clone, Copy, Debug)] -pub enum OperatorCode { + +fn loop_callback(lex: &Lexer<Token>) -> (usize, usize) { + let span = lex.span(); + + (span.start, span.len()) +} + +/// List of Tokens for the lexer +/// Note: Any input symbol that is not in this list is a comment +#[derive(Clone, Copy, Debug, Logos, PartialEq, Eq)] +#[logos(skip r"[^<>+\-.,\[\]]+")] +pub enum Token { /// `>` /// /// Increment the data pointer by one (to point to the next cell to the /// right). + #[token(">")] IncrementPointer, /// `<` /// /// Decrement the data pointer by one (to point to the next cell to the /// left). + #[token("<")] DecrementPointer, /// `+` /// /// Increment the byte at the data pointer by one. + #[token("+")] IncrementByte, /// `-` /// /// Decrement the byte at the data pointer by one. + #[token("-")] DecrementByte, /// `.` /// /// Output the byte at the data pointer. + #[token(".")] OutputByte, /// `,` /// /// Accept one byte of input, storing its value in the byte at the data /// pointer. + #[token(",")] InputByte, /// `[` @@ -42,38 +62,14 @@ pub enum OperatorCode { /// If the byte at the data pointer is zero, then instead of moving the /// instruction pointer forward to the next command, jump it forward to the /// command after the matching ] command. - StartLoop { - /// Offset of the bracket in the source. - offset: usize, - }, + #[token("[", loop_callback)] + StartLoop((usize, usize)), /// `]` /// /// If the byte at the data pointer is nonzero, then instead of moving the /// instruction pointer forward to the next command, jump it back to the /// command after the matching [ command. - EndLoop { - /// Offset of the bracket in the source. - offset: usize, - }, -} - -/// Perform lexical analysis on the input brainfuck code -#[must_use] -pub fn lex(input: &str) -> Vec<OperatorCode> { - input - .char_indices() - .filter_map(|(i, symbol)| match symbol { - '>' => Some(OperatorCode::IncrementPointer), - '<' => Some(OperatorCode::DecrementPointer), - '+' => Some(OperatorCode::IncrementByte), - '-' => Some(OperatorCode::DecrementByte), - '.' => Some(OperatorCode::OutputByte), - ',' => Some(OperatorCode::InputByte), - '[' => Some(OperatorCode::StartLoop { offset: i }), - ']' => Some(OperatorCode::EndLoop { offset: i }), - // Any symbol that does not match one of the above is a comment - _ => None, - }) - .collect() + #[token("]", loop_callback)] + EndLoop((usize, usize)), } diff --git a/src/engine.rs b/crates/brainf_rs/src/engine.rs index 3269dff..e60acaa 100644 --- a/src/engine.rs +++ b/crates/brainf_rs/src/engine.rs @@ -3,11 +3,7 @@ //! This predominantly allows implementation of a [`u16`] executor. #[cfg(feature = "bigint-engine")] -use std::io::Cursor; -use std::io::Read; - -#[cfg(feature = "bigint-engine")] -use byteorder::{BigEndian, ReadBytesExt}; +use byteorder::{NativeEndian, ReadBytesExt}; use num_traits::{One, Unsigned, WrappingAdd, WrappingSub, Zero}; use thiserror::Error; @@ -55,11 +51,7 @@ impl Engine for executor::U8 { type TapeInner = u8; fn read_byte() -> Result<u8, Error> { - let mut input: [u8; 1] = [0; 1]; - - std::io::stdin().read_exact(&mut input)?; - - Ok(input[0]) + Ok(std::io::stdin().read_u8()?) } fn write_byte(byte: u8) -> Result<(), Error> { @@ -74,13 +66,7 @@ impl Engine for executor::U16 { type TapeInner = u16; fn read_byte() -> Result<u16, Error> { - let mut input: [u8; 2] = [0; 2]; - - std::io::stdin().read_exact(&mut input)?; - - let mut reader = Cursor::new(input); - - Ok(reader.read_u16::<BigEndian>()?) + Ok(std::io::stdin().read_u16::<NativeEndian>()?) } fn write_byte(byte: u16) -> Result<(), Error> { @@ -98,13 +84,7 @@ impl Engine for executor::U32 { type TapeInner = u32; fn read_byte() -> Result<u32, Error> { - let mut input: [u8; 4] = [0; 4]; - - std::io::stdin().read_exact(&mut input)?; - - let mut reader = Cursor::new(input); - - Ok(reader.read_u32::<BigEndian>()?) + Ok(std::io::stdin().read_u32::<NativeEndian>()?) } fn write_byte(byte: u32) -> Result<(), Error> { diff --git a/src/executor.rs b/crates/brainf_rs/src/executor.rs index c5fff93..c5fff93 100644 --- a/src/executor.rs +++ b/crates/brainf_rs/src/executor.rs diff --git a/src/lib.rs b/crates/brainf_rs/src/lib.rs index 71c5a99..f5c8987 100644 --- a/src/lib.rs +++ b/crates/brainf_rs/src/lib.rs @@ -83,13 +83,12 @@ extern crate test; mod engine; pub mod executor; -pub mod lexer; pub mod parser; #[cfg(feature = "utilities")] pub mod utility; +pub use brainf_lexer::{lex, Token}; pub use executor::{U16 as ExecutorU16, U32 as ExecutorU32, U8 as ExecutorU8}; -pub use lexer::{lex, OperatorCode}; use miette::Diagnostic; pub use parser::{parse, Instruction}; use thiserror::Error; @@ -101,6 +100,10 @@ pub enum Error { #[error(transparent)] Io(#[from] std::io::Error), + /// Error occurred while lexing the input. + #[error(transparent)] + Lexer(#[from] brainf_lexer::Error), + /// An error that occurred while parsing Brainfuck code. #[diagnostic(transparent)] #[error(transparent)] diff --git a/src/main.rs b/crates/brainf_rs/src/main.rs index 13868a6..13868a6 100644 --- a/src/main.rs +++ b/crates/brainf_rs/src/main.rs diff --git a/src/parser.rs b/crates/brainf_rs/src/parser.rs index e639516..c9b3246 100644 --- a/src/parser.rs +++ b/crates/brainf_rs/src/parser.rs @@ -1,11 +1,10 @@ //! Parser implementation for Brainfuck. Parses operator codes into instruction //! sets. +use brainf_lexer::Token; use miette::{Diagnostic, SourceSpan}; use thiserror::Error; -use crate::lexer::OperatorCode; - /// Parsed instructions for Brainfuck. #[derive(Clone, Debug)] pub enum Instruction { @@ -102,49 +101,49 @@ pub enum Error { /// This function will return an error if a loop is encountered with no /// beginning, a loop is encountered with no ending, or if the parser attempts /// to slice out of bounds. -pub fn parse(src: &str, operator_codes: &[OperatorCode]) -> Result<Vec<Instruction>, Error> { +pub fn parse(src: &str, tokens: &[Token]) -> Result<Vec<Instruction>, Error> { let mut program: Vec<Instruction> = Vec::new(); let mut loop_stack: i32 = 0; let mut loop_start = 0; - let mut loop_source_offset: usize = 0; + let mut loop_span: (usize, usize) = (0, 0); - operator_codes + tokens .iter() .enumerate() .try_for_each(|(i, operator_code)| -> Result<(), Error> { match (loop_stack, *operator_code) { - (0i32, OperatorCode::StartLoop { offset }) => { + (0i32, Token::StartLoop(span)) => { loop_start = i; - loop_source_offset = offset; + loop_span = span; loop_stack += 1i32; } (0i32, _) => { if let Some(instruction) = match *operator_code { - OperatorCode::IncrementPointer => Some(Instruction::IncrementPointer), - OperatorCode::DecrementPointer => Some(Instruction::DecrementPointer), - OperatorCode::IncrementByte => Some(Instruction::IncrementByte), - OperatorCode::DecrementByte => Some(Instruction::DecrementByte), - OperatorCode::OutputByte => Some(Instruction::OutputByte), - OperatorCode::InputByte => Some(Instruction::InputByte), - OperatorCode::EndLoop { offset } => { + Token::IncrementPointer => Some(Instruction::IncrementPointer), + Token::DecrementPointer => Some(Instruction::DecrementPointer), + Token::IncrementByte => Some(Instruction::IncrementByte), + Token::DecrementByte => Some(Instruction::DecrementByte), + Token::OutputByte => Some(Instruction::OutputByte), + Token::InputByte => Some(Instruction::InputByte), + Token::EndLoop(span) => { return Err(Error::LoopWithNoBeginning { input: src.to_owned(), - loop_src: (offset, 1).into(), + loop_src: span.into(), }) } // We don't care about this variant as it is handled in a subsequent arm - OperatorCode::StartLoop { .. } => None, + Token::StartLoop { .. } => None, } { program.push(instruction); } } - (_, OperatorCode::StartLoop { .. }) => loop_stack += 1i32, - (_, OperatorCode::EndLoop { .. }) => { + (_, Token::StartLoop { .. }) => loop_stack += 1i32, + (_, Token::EndLoop { .. }) => { loop_stack -= 1i32; if loop_stack == 0i32 { let loop_program = parse( src, - match operator_codes.get(loop_start + 1..i) { + match tokens.get(loop_start + 1..i) { Some(value) => value, None => return Err(Error::SliceOutOfBounds(loop_start + 1..i)), }, @@ -164,7 +163,7 @@ pub fn parse(src: &str, operator_codes: &[OperatorCode]) -> Result<Vec<Instructi } else { Err(Error::LoopWithNoEnding { input: src.to_owned(), - loop_src: (loop_source_offset, 1).into(), + loop_src: loop_span.into(), }) } } diff --git a/src/utility.rs b/crates/brainf_rs/src/utility.rs index 514343c..c83dd42 100644 --- a/src/utility.rs +++ b/crates/brainf_rs/src/utility.rs @@ -18,9 +18,9 @@ pub fn execute_from_file<E: Engine>( ) -> Result<(), Error> { let input = fs_err::read_to_string(path.as_ref())?; - let operator_codes = lex(&input); + let tokens = lex(&input)?; - let instructions = parse(&input, &operator_codes)?; + let instructions = parse(&input, &tokens)?; let mut data_pointer = 0; @@ -38,9 +38,9 @@ pub fn execute_from_file<E: Engine>( /// execution fails. See documentation for [`crate::parser::parse`] and /// [`crate::executor::execute`]. pub fn execute_from_str<E: Engine>(input: &str, tape: &mut [E::TapeInner]) -> Result<(), Error> { - let operator_codes = lex(input); + let tokens = lex(input)?; - let instructions = parse(input, &operator_codes)?; + let instructions = parse(input, &tokens)?; let mut data_pointer = 0; |