From 5126c9ed83fe6169463566e74f966a4a63e57ca0 Mon Sep 17 00:00:00 2001 From: Sophie Forrest Date: Fri, 30 Aug 2024 23:35:45 +1200 Subject: feat: initial commit of brainf interpreter --- src/lexer.rs | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 src/lexer.rs (limited to 'src/lexer.rs') diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..786c873 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,79 @@ +//! Lexer for Brainfuck + +/// List of operator codes for the lexer +/// Note: Any input symbol that is not in this list is a comment +#[derive(Clone, Copy, Debug)] +pub enum OperatorCode { + /// `>` + /// + /// Increment the data pointer by one (to point to the next cell to the + /// right). + IncrementPointer, + + /// `<` + /// + /// Decrement the data pointer by one (to point to the next cell to the + /// left). + DecrementPointer, + + /// `+` + /// + /// Increment the byte at the data pointer by one. + IncrementByte, + + /// `-` + /// + /// Decrement the byte at the data pointer by one. + DecrementByte, + + /// `.` + /// + /// Output the byte at the data pointer. + OutputByte, + + /// `,` + /// + /// Accept one byte of input, storing its value in the byte at the data + /// pointer. + InputByte, + + /// `[` + /// + /// If the byte at the data pointer is zero, then instead of moving the + /// instruction pointer forward to the next command, jump it forward to the + /// command after the matching ] command. + StartLoop { + /// Offset of the bracket in the source. + offset: usize, + }, + + /// `]` + /// + /// If the byte at the data pointer is nonzero, then instead of moving the + /// instruction pointer forward to the next command, jump it back to the + /// command after the matching [ command. + EndLoop { + /// Offset of the bracket in the source. + offset: usize, + }, +} + +/// Perform lexical analysis on the input brainfuck code +#[must_use] +pub fn lex(input: &str) -> Vec { + input + .char_indices() + .filter_map(|(i, symbol)| match symbol { + '>' => Some(OperatorCode::IncrementPointer), + '<' => Some(OperatorCode::DecrementPointer), + '+' => Some(OperatorCode::IncrementByte), + '-' => Some(OperatorCode::DecrementByte), + '.' => Some(OperatorCode::OutputByte), + ',' => Some(OperatorCode::InputByte), + '[' => Some(OperatorCode::StartLoop { offset: i }), + ']' => Some(OperatorCode::EndLoop { offset: i }), + // Any symbol that does not match one of the above is a comment + _ => None, + }) + .collect() +} -- cgit 1.4.1