diff options
Diffstat (limited to '')
| -rw-r--r-- | crates/brainf_lexer/Cargo.toml | 8 | ||||
| -rw-r--r-- | crates/brainf_lexer/src/lexer.rs (renamed from src/lexer.rs) | 58 | ||||
| -rw-r--r-- | crates/brainf_lexer/src/lib.rs | 105 | ||||
| -rw-r--r-- | crates/brainf_rs/Cargo.toml | 22 | ||||
| -rw-r--r-- | crates/brainf_rs/src/engine.rs (renamed from src/engine.rs) | 28 | ||||
| -rw-r--r-- | crates/brainf_rs/src/executor.rs (renamed from src/executor.rs) | 0 | ||||
| -rw-r--r-- | crates/brainf_rs/src/lib.rs (renamed from src/lib.rs) | 7 | ||||
| -rw-r--r-- | crates/brainf_rs/src/main.rs (renamed from src/main.rs) | 0 | ||||
| -rw-r--r-- | crates/brainf_rs/src/parser.rs (renamed from src/parser.rs) | 39 | ||||
| -rw-r--r-- | crates/brainf_rs/src/utility.rs (renamed from src/utility.rs) | 8 |
10 files changed, 194 insertions, 81 deletions
diff --git a/crates/brainf_lexer/Cargo.toml b/crates/brainf_lexer/Cargo.toml new file mode 100644 index 0000000..58be7a5 --- /dev/null +++ b/crates/brainf_lexer/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "brainf_lexer" +version = "0.1.0" +edition = "2021" + +[dependencies] +logos = "0.13.0" +thiserror = "1.0.44" diff --git a/src/lexer.rs b/crates/brainf_lexer/src/lexer.rs index 786c873..b95cd87 100644 --- a/src/lexer.rs +++ b/crates/brainf_lexer/src/lexer.rs @@ -1,40 +1,60 @@ -//! Lexer for Brainfuck +//! Lexer implementation using logos. + +#![expect(clippy::indexing_slicing)] + +use logos::{Lexer, Logos}; /// List of operator codes for the lexer /// Note: Any input symbol that is not in this list is a comment -#[derive(Clone, Copy, Debug)] -pub enum OperatorCode { + +fn loop_callback(lex: &Lexer<Token>) -> (usize, usize) { + let span = lex.span(); + + (span.start, span.len()) +} + +/// List of Tokens for the lexer +/// Note: Any input symbol that is not in this list is a comment +#[derive(Clone, Copy, Debug, Logos, PartialEq, Eq)] +#[logos(skip r"[^<>+\-.,\[\]]+")] +pub enum Token { /// `>` /// /// Increment the data pointer by one (to point to the next cell to the /// right). + #[token(">")] IncrementPointer, /// `<` /// /// Decrement the data pointer by one (to point to the next cell to the /// left). + #[token("<")] DecrementPointer, /// `+` /// /// Increment the byte at the data pointer by one. + #[token("+")] IncrementByte, /// `-` /// /// Decrement the byte at the data pointer by one. + #[token("-")] DecrementByte, /// `.` /// /// Output the byte at the data pointer. + #[token(".")] OutputByte, /// `,` /// /// Accept one byte of input, storing its value in the byte at the data /// pointer. + #[token(",")] InputByte, /// `[` @@ -42,38 +62,14 @@ pub enum OperatorCode { /// If the byte at the data pointer is zero, then instead of moving the /// instruction pointer forward to the next command, jump it forward to the /// command after the matching ] command. - StartLoop { - /// Offset of the bracket in the source. - offset: usize, - }, + #[token("[", loop_callback)] + StartLoop((usize, usize)), /// `]` /// /// If the byte at the data pointer is nonzero, then instead of moving the /// instruction pointer forward to the next command, jump it back to the /// command after the matching [ command. - EndLoop { - /// Offset of the bracket in the source. - offset: usize, - }, -} - -/// Perform lexical analysis on the input brainfuck code -#[must_use] -pub fn lex(input: &str) -> Vec<OperatorCode> { - input - .char_indices() - .filter_map(|(i, symbol)| match symbol { - '>' => Some(OperatorCode::IncrementPointer), - '<' => Some(OperatorCode::DecrementPointer), - '+' => Some(OperatorCode::IncrementByte), - '-' => Some(OperatorCode::DecrementByte), - '.' => Some(OperatorCode::OutputByte), - ',' => Some(OperatorCode::InputByte), - '[' => Some(OperatorCode::StartLoop { offset: i }), - ']' => Some(OperatorCode::EndLoop { offset: i }), - // Any symbol that does not match one of the above is a comment - _ => None, - }) - .collect() + #[token("]", loop_callback)] + EndLoop((usize, usize)), } diff --git a/crates/brainf_lexer/src/lib.rs b/crates/brainf_lexer/src/lib.rs new file mode 100644 index 0000000..7f6e5be --- /dev/null +++ b/crates/brainf_lexer/src/lib.rs @@ -0,0 +1,105 @@ +#![feature(lint_reasons)] +#![deny(clippy::complexity)] +#![deny(clippy::nursery)] +#![deny(clippy::pedantic)] +#![deny(clippy::perf)] +#![deny(clippy::suspicious)] +#![deny(clippy::alloc_instead_of_core)] +#![deny(clippy::as_underscore)] +#![deny(clippy::clone_on_ref_ptr)] +#![deny(clippy::create_dir)] +#![warn(clippy::dbg_macro)] +#![deny(clippy::default_numeric_fallback)] +#![deny(clippy::default_union_representation)] +#![deny(clippy::deref_by_slicing)] +#![deny(clippy::empty_structs_with_brackets)] +#![deny(clippy::exit)] +#![deny(clippy::expect_used)] +#![deny(clippy::filetype_is_file)] +#![deny(clippy::fn_to_numeric_cast)] +#![deny(clippy::format_push_string)] +#![deny(clippy::get_unwrap)] +#![deny(clippy::if_then_some_else_none)] +#![allow( + clippy::implicit_return, + reason = "returns should be done implicitly, not explicitly" +)] +#![deny(clippy::indexing_slicing)] +#![deny(clippy::large_include_file)] +#![deny(clippy::let_underscore_must_use)] +#![deny(clippy::lossy_float_literal)] +#![deny(clippy::map_err_ignore)] +#![deny(clippy::mem_forget)] +#![deny(clippy::missing_docs_in_private_items)] +#![deny(clippy::missing_trait_methods)] +#![deny(clippy::mod_module_files)] +#![deny(clippy::multiple_inherent_impl)] +#![deny(clippy::mutex_atomic)] +#![deny(clippy::needless_return)] +#![deny(clippy::non_ascii_literal)] +#![deny(clippy::panic_in_result_fn)] +#![deny(clippy::pattern_type_mismatch)] +#![deny(clippy::rc_buffer)] +#![deny(clippy::rc_mutex)] +#![deny(clippy::rest_pat_in_fully_bound_structs)] +#![deny(clippy::same_name_method)] +#![deny(clippy::separated_literal_suffix)] +#![deny(clippy::str_to_string)] +#![deny(clippy::string_add)] +#![deny(clippy::string_slice)] +#![deny(clippy::string_to_string)] +#![allow( + clippy::tabs_in_doc_comments, + reason = "tabs are preferred for this project" +)] +#![deny(clippy::try_err)] +#![deny(clippy::undocumented_unsafe_blocks)] +#![deny(clippy::unnecessary_self_imports)] +#![deny(clippy::unneeded_field_pattern)] +#![deny(clippy::unwrap_in_result)] +#![deny(clippy::unwrap_used)] +#![warn(clippy::use_debug)] +#![deny(clippy::verbose_file_reads)] +#![deny(clippy::wildcard_dependencies)] +#![deny(clippy::wildcard_enum_match_arm)] +#![deny(missing_copy_implementations)] +#![deny(missing_debug_implementations)] +#![deny(missing_docs)] +#![deny(single_use_lifetimes)] +#![deny(unsafe_code)] +#![deny(unused)] + +//! # `brainf_lexer` +//! +//! Implementation of a Brainfuck lexer in Rust. + +mod lexer; + +pub use lexer::Token; +use logos::Logos; +use thiserror::Error; + +/// Error type for lexer. +#[derive(Clone, Copy, Debug, Error)] +pub enum Error { + /// Logos was unable to lex part of the input. + #[error("lexer was unable to lex input")] + LexingError, +} + +/// Lexes the Brainfuck input, returning a Vec of Tokens. +/// +/// # Errors +/// +/// This function will return an error if the lexer is unable to lex one or more +/// of the input characters. +pub fn lex(input: &str) -> Result<Vec<Token>, Error> { + lexer::Token::lexer(input).try_fold(Vec::new(), |mut arr, result| { + result + .map_or(Err(()), |token| { + arr.push(token); + Ok(arr) + }) + .map_err(|_err| Error::LexingError) + }) +} diff --git a/crates/brainf_rs/Cargo.toml b/crates/brainf_rs/Cargo.toml new file mode 100644 index 0000000..5520652 --- /dev/null +++ b/crates/brainf_rs/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "brainf_rs" +version = "0.1.0" +edition = "2021" + +[dependencies] +brainf_lexer = { path = "../brainf_lexer" } +byteorder = { optional = true, version = "1.4.3" } +clap = { features = ["derive"], version = "4.3.21" } +num-traits = "0.2.16" +fs-err = "2.9.0" +logos = "0.13.0" +miette = { features = ["fancy"], version = "5.10.0" } +thiserror = "1.0.44" +widestring = { default-features = false, optional = true, version = "1.0.2" } + +[features] +default = ["engine-u16", "engine-u32", "utilities"] +bigint-engine = ["dep:byteorder", "dep:widestring"] +engine-u16 = ["bigint-engine"] +engine-u32 = ["bigint-engine"] +utilities = [] diff --git a/src/engine.rs b/crates/brainf_rs/src/engine.rs index 3269dff..e60acaa 100644 --- a/src/engine.rs +++ b/crates/brainf_rs/src/engine.rs @@ -3,11 +3,7 @@ //! This predominantly allows implementation of a [`u16`] executor. #[cfg(feature = "bigint-engine")] -use std::io::Cursor; -use std::io::Read; - -#[cfg(feature = "bigint-engine")] -use byteorder::{BigEndian, ReadBytesExt}; +use byteorder::{NativeEndian, ReadBytesExt}; use num_traits::{One, Unsigned, WrappingAdd, WrappingSub, Zero}; use thiserror::Error; @@ -55,11 +51,7 @@ impl Engine for executor::U8 { type TapeInner = u8; fn read_byte() -> Result<u8, Error> { - let mut input: [u8; 1] = [0; 1]; - - std::io::stdin().read_exact(&mut input)?; - - Ok(input[0]) + Ok(std::io::stdin().read_u8()?) } fn write_byte(byte: u8) -> Result<(), Error> { @@ -74,13 +66,7 @@ impl Engine for executor::U16 { type TapeInner = u16; fn read_byte() -> Result<u16, Error> { - let mut input: [u8; 2] = [0; 2]; - - std::io::stdin().read_exact(&mut input)?; - - let mut reader = Cursor::new(input); - - Ok(reader.read_u16::<BigEndian>()?) + Ok(std::io::stdin().read_u16::<NativeEndian>()?) } fn write_byte(byte: u16) -> Result<(), Error> { @@ -98,13 +84,7 @@ impl Engine for executor::U32 { type TapeInner = u32; fn read_byte() -> Result<u32, Error> { - let mut input: [u8; 4] = [0; 4]; - - std::io::stdin().read_exact(&mut input)?; - - let mut reader = Cursor::new(input); - - Ok(reader.read_u32::<BigEndian>()?) + Ok(std::io::stdin().read_u32::<NativeEndian>()?) } fn write_byte(byte: u32) -> Result<(), Error> { diff --git a/src/executor.rs b/crates/brainf_rs/src/executor.rs index c5fff93..c5fff93 100644 --- a/src/executor.rs +++ b/crates/brainf_rs/src/executor.rs diff --git a/src/lib.rs b/crates/brainf_rs/src/lib.rs index 71c5a99..f5c8987 100644 --- a/src/lib.rs +++ b/crates/brainf_rs/src/lib.rs @@ -83,13 +83,12 @@ extern crate test; mod engine; pub mod executor; -pub mod lexer; pub mod parser; #[cfg(feature = "utilities")] pub mod utility; +pub use brainf_lexer::{lex, Token}; pub use executor::{U16 as ExecutorU16, U32 as ExecutorU32, U8 as ExecutorU8}; -pub use lexer::{lex, OperatorCode}; use miette::Diagnostic; pub use parser::{parse, Instruction}; use thiserror::Error; @@ -101,6 +100,10 @@ pub enum Error { #[error(transparent)] Io(#[from] std::io::Error), + /// Error occurred while lexing the input. + #[error(transparent)] + Lexer(#[from] brainf_lexer::Error), + /// An error that occurred while parsing Brainfuck code. #[diagnostic(transparent)] #[error(transparent)] diff --git a/src/main.rs b/crates/brainf_rs/src/main.rs index 13868a6..13868a6 100644 --- a/src/main.rs +++ b/crates/brainf_rs/src/main.rs diff --git a/src/parser.rs b/crates/brainf_rs/src/parser.rs index e639516..c9b3246 100644 --- a/src/parser.rs +++ b/crates/brainf_rs/src/parser.rs @@ -1,11 +1,10 @@ //! Parser implementation for Brainfuck. Parses operator codes into instruction //! sets. +use brainf_lexer::Token; use miette::{Diagnostic, SourceSpan}; use thiserror::Error; -use crate::lexer::OperatorCode; - /// Parsed instructions for Brainfuck. #[derive(Clone, Debug)] pub enum Instruction { @@ -102,49 +101,49 @@ pub enum Error { /// This function will return an error if a loop is encountered with no /// beginning, a loop is encountered with no ending, or if the parser attempts /// to slice out of bounds. -pub fn parse(src: &str, operator_codes: &[OperatorCode]) -> Result<Vec<Instruction>, Error> { +pub fn parse(src: &str, tokens: &[Token]) -> Result<Vec<Instruction>, Error> { let mut program: Vec<Instruction> = Vec::new(); let mut loop_stack: i32 = 0; let mut loop_start = 0; - let mut loop_source_offset: usize = 0; + let mut loop_span: (usize, usize) = (0, 0); - operator_codes + tokens .iter() .enumerate() .try_for_each(|(i, operator_code)| -> Result<(), Error> { match (loop_stack, *operator_code) { - (0i32, OperatorCode::StartLoop { offset }) => { + (0i32, Token::StartLoop(span)) => { loop_start = i; - loop_source_offset = offset; + loop_span = span; loop_stack += 1i32; } (0i32, _) => { if let Some(instruction) = match *operator_code { - OperatorCode::IncrementPointer => Some(Instruction::IncrementPointer), - OperatorCode::DecrementPointer => Some(Instruction::DecrementPointer), - OperatorCode::IncrementByte => Some(Instruction::IncrementByte), - OperatorCode::DecrementByte => Some(Instruction::DecrementByte), - OperatorCode::OutputByte => Some(Instruction::OutputByte), - OperatorCode::InputByte => Some(Instruction::InputByte), - OperatorCode::EndLoop { offset } => { + Token::IncrementPointer => Some(Instruction::IncrementPointer), + Token::DecrementPointer => Some(Instruction::DecrementPointer), + Token::IncrementByte => Some(Instruction::IncrementByte), + Token::DecrementByte => Some(Instruction::DecrementByte), + Token::OutputByte => Some(Instruction::OutputByte), + Token::InputByte => Some(Instruction::InputByte), + Token::EndLoop(span) => { return Err(Error::LoopWithNoBeginning { input: src.to_owned(), - loop_src: (offset, 1).into(), + loop_src: span.into(), }) } // We don't care about this variant as it is handled in a subsequent arm - OperatorCode::StartLoop { .. } => None, + Token::StartLoop { .. } => None, } { program.push(instruction); } } - (_, OperatorCode::StartLoop { .. }) => loop_stack += 1i32, - (_, OperatorCode::EndLoop { .. }) => { + (_, Token::StartLoop { .. }) => loop_stack += 1i32, + (_, Token::EndLoop { .. }) => { loop_stack -= 1i32; if loop_stack == 0i32 { let loop_program = parse( src, - match operator_codes.get(loop_start + 1..i) { + match tokens.get(loop_start + 1..i) { Some(value) => value, None => return Err(Error::SliceOutOfBounds(loop_start + 1..i)), }, @@ -164,7 +163,7 @@ pub fn parse(src: &str, operator_codes: &[OperatorCode]) -> Result<Vec<Instructi } else { Err(Error::LoopWithNoEnding { input: src.to_owned(), - loop_src: (loop_source_offset, 1).into(), + loop_src: loop_span.into(), }) } } diff --git a/src/utility.rs b/crates/brainf_rs/src/utility.rs index 514343c..c83dd42 100644 --- a/src/utility.rs +++ b/crates/brainf_rs/src/utility.rs @@ -18,9 +18,9 @@ pub fn execute_from_file<E: Engine>( ) -> Result<(), Error> { let input = fs_err::read_to_string(path.as_ref())?; - let operator_codes = lex(&input); + let tokens = lex(&input)?; - let instructions = parse(&input, &operator_codes)?; + let instructions = parse(&input, &tokens)?; let mut data_pointer = 0; @@ -38,9 +38,9 @@ pub fn execute_from_file<E: Engine>( /// execution fails. See documentation for [`crate::parser::parse`] and /// [`crate::executor::execute`]. pub fn execute_from_str<E: Engine>(input: &str, tape: &mut [E::TapeInner]) -> Result<(), Error> { - let operator_codes = lex(input); + let tokens = lex(input)?; - let instructions = parse(input, &operator_codes)?; + let instructions = parse(input, &tokens)?; let mut data_pointer = 0; |