diff options
| author | Sophie Forrest <git@sophieforrest.com> | 2024-08-30 23:35:45 +1200 |
|---|---|---|
| committer | Sophie Forrest <git@sophieforrest.com> | 2024-08-30 23:35:45 +1200 |
| commit | 3c163eabc78ddbd26bb250ef5ad6da28cd61adc6 (patch) | |
| tree | 58e17534e1db18813554d4fb6e67020f898b655d /crates/brainf_lexer/src/lexer.rs | |
| parent | 17b78f8cb127817b93f7e6ced7e55d8748806a80 (diff) | |
feat: split engine into crates
Diffstat (limited to '')
| -rw-r--r-- | crates/brainf_lexer/src/lexer.rs (renamed from src/lexer.rs) | 58 |
1 files changed, 27 insertions, 31 deletions
diff --git a/src/lexer.rs b/crates/brainf_lexer/src/lexer.rs index 786c873..b95cd87 100644 --- a/src/lexer.rs +++ b/crates/brainf_lexer/src/lexer.rs @@ -1,40 +1,60 @@ -//! Lexer for Brainfuck +//! Lexer implementation using logos. + +#![expect(clippy::indexing_slicing)] + +use logos::{Lexer, Logos}; /// List of operator codes for the lexer /// Note: Any input symbol that is not in this list is a comment -#[derive(Clone, Copy, Debug)] -pub enum OperatorCode { + +fn loop_callback(lex: &Lexer<Token>) -> (usize, usize) { + let span = lex.span(); + + (span.start, span.len()) +} + +/// List of Tokens for the lexer +/// Note: Any input symbol that is not in this list is a comment +#[derive(Clone, Copy, Debug, Logos, PartialEq, Eq)] +#[logos(skip r"[^<>+\-.,\[\]]+")] +pub enum Token { /// `>` /// /// Increment the data pointer by one (to point to the next cell to the /// right). + #[token(">")] IncrementPointer, /// `<` /// /// Decrement the data pointer by one (to point to the next cell to the /// left). + #[token("<")] DecrementPointer, /// `+` /// /// Increment the byte at the data pointer by one. + #[token("+")] IncrementByte, /// `-` /// /// Decrement the byte at the data pointer by one. + #[token("-")] DecrementByte, /// `.` /// /// Output the byte at the data pointer. + #[token(".")] OutputByte, /// `,` /// /// Accept one byte of input, storing its value in the byte at the data /// pointer. + #[token(",")] InputByte, /// `[` @@ -42,38 +62,14 @@ pub enum OperatorCode { /// If the byte at the data pointer is zero, then instead of moving the /// instruction pointer forward to the next command, jump it forward to the /// command after the matching ] command. - StartLoop { - /// Offset of the bracket in the source. - offset: usize, - }, + #[token("[", loop_callback)] + StartLoop((usize, usize)), /// `]` /// /// If the byte at the data pointer is nonzero, then instead of moving the /// instruction pointer forward to the next command, jump it back to the /// command after the matching [ command. - EndLoop { - /// Offset of the bracket in the source. - offset: usize, - }, -} - -/// Perform lexical analysis on the input brainfuck code -#[must_use] -pub fn lex(input: &str) -> Vec<OperatorCode> { - input - .char_indices() - .filter_map(|(i, symbol)| match symbol { - '>' => Some(OperatorCode::IncrementPointer), - '<' => Some(OperatorCode::DecrementPointer), - '+' => Some(OperatorCode::IncrementByte), - '-' => Some(OperatorCode::DecrementByte), - '.' => Some(OperatorCode::OutputByte), - ',' => Some(OperatorCode::InputByte), - '[' => Some(OperatorCode::StartLoop { offset: i }), - ']' => Some(OperatorCode::EndLoop { offset: i }), - // Any symbol that does not match one of the above is a comment - _ => None, - }) - .collect() + #[token("]", loop_callback)] + EndLoop((usize, usize)), } |