diff options
Diffstat (limited to '')
| -rw-r--r-- | src/lexer.rs | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..786c873 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,79 @@ +//! Lexer for Brainfuck + +/// List of operator codes for the lexer +/// Note: Any input symbol that is not in this list is a comment +#[derive(Clone, Copy, Debug)] +pub enum OperatorCode { + /// `>` + /// + /// Increment the data pointer by one (to point to the next cell to the + /// right). + IncrementPointer, + + /// `<` + /// + /// Decrement the data pointer by one (to point to the next cell to the + /// left). + DecrementPointer, + + /// `+` + /// + /// Increment the byte at the data pointer by one. + IncrementByte, + + /// `-` + /// + /// Decrement the byte at the data pointer by one. + DecrementByte, + + /// `.` + /// + /// Output the byte at the data pointer. + OutputByte, + + /// `,` + /// + /// Accept one byte of input, storing its value in the byte at the data + /// pointer. + InputByte, + + /// `[` + /// + /// If the byte at the data pointer is zero, then instead of moving the + /// instruction pointer forward to the next command, jump it forward to the + /// command after the matching ] command. + StartLoop { + /// Offset of the bracket in the source. + offset: usize, + }, + + /// `]` + /// + /// If the byte at the data pointer is nonzero, then instead of moving the + /// instruction pointer forward to the next command, jump it back to the + /// command after the matching [ command. + EndLoop { + /// Offset of the bracket in the source. + offset: usize, + }, +} + +/// Perform lexical analysis on the input brainfuck code +#[must_use] +pub fn lex(input: &str) -> Vec<OperatorCode> { + input + .char_indices() + .filter_map(|(i, symbol)| match symbol { + '>' => Some(OperatorCode::IncrementPointer), + '<' => Some(OperatorCode::DecrementPointer), + '+' => Some(OperatorCode::IncrementByte), + '-' => Some(OperatorCode::DecrementByte), + '.' => Some(OperatorCode::OutputByte), + ',' => Some(OperatorCode::InputByte), + '[' => Some(OperatorCode::StartLoop { offset: i }), + ']' => Some(OperatorCode::EndLoop { offset: i }), + // Any symbol that does not match one of the above is a comment + _ => None, + }) + .collect() +} |