summary refs log tree commit diff
path: root/src/lexer.rs
diff options
context:
space:
mode:
authorSophie Forrest <git@sophieforrest.com>2024-08-30 23:35:45 +1200
committerSophie Forrest <git@sophieforrest.com>2024-08-30 23:35:45 +1200
commit5126c9ed83fe6169463566e74f966a4a63e57ca0 (patch)
tree99be8b093f6736eba59c529300519985d9c1c5c6 /src/lexer.rs
feat: initial commit of brainf interpreter
Diffstat (limited to '')
-rw-r--r--src/lexer.rs79
1 files changed, 79 insertions, 0 deletions
diff --git a/src/lexer.rs b/src/lexer.rs
new file mode 100644
index 0000000..786c873
--- /dev/null
+++ b/src/lexer.rs
@@ -0,0 +1,79 @@
+//! Lexer for Brainfuck
+
+/// List of operator codes for the lexer
+/// Note: Any input symbol that is not in this list is a comment
+#[derive(Clone, Copy, Debug)]
+pub enum OperatorCode {
+	/// `>`
+	///
+	/// Increment the data pointer by one (to point to the next cell to the
+	/// right).
+	IncrementPointer,
+
+	/// `<`
+	///
+	/// Decrement the data pointer by one (to point to the next cell to the
+	/// left).
+	DecrementPointer,
+
+	/// `+`
+	///
+	/// Increment the byte at the data pointer by one.
+	IncrementByte,
+
+	/// `-`
+	///
+	/// Decrement the byte at the data pointer by one.
+	DecrementByte,
+
+	/// `.`
+	///
+	/// Output the byte at the data pointer.
+	OutputByte,
+
+	/// `,`
+	///
+	/// Accept one byte of input, storing its value in the byte at the data
+	/// pointer.
+	InputByte,
+
+	/// `[`
+	///
+	/// If the byte at the data pointer is zero, then instead of moving the
+	/// instruction pointer forward to the next command, jump it forward to the
+	/// command after the matching ] command.
+	StartLoop {
+		/// Offset of the bracket in the source.
+		offset: usize,
+	},
+
+	/// `]`
+	///
+	/// If the byte at the data pointer is nonzero, then instead of moving the
+	/// instruction pointer forward to the next command, jump it back to the
+	/// command after the matching [ command.
+	EndLoop {
+		/// Offset of the bracket in the source.
+		offset: usize,
+	},
+}
+
+/// Perform lexical analysis on the input brainfuck code
+#[must_use]
+pub fn lex(input: &str) -> Vec<OperatorCode> {
+	input
+		.char_indices()
+		.filter_map(|(i, symbol)| match symbol {
+			'>' => Some(OperatorCode::IncrementPointer),
+			'<' => Some(OperatorCode::DecrementPointer),
+			'+' => Some(OperatorCode::IncrementByte),
+			'-' => Some(OperatorCode::DecrementByte),
+			'.' => Some(OperatorCode::OutputByte),
+			',' => Some(OperatorCode::InputByte),
+			'[' => Some(OperatorCode::StartLoop { offset: i }),
+			']' => Some(OperatorCode::EndLoop { offset: i }),
+			// Any symbol that does not match one of the above is a comment
+			_ => None,
+		})
+		.collect()
+}