summary refs log tree commit diff
path: root/src/parser.rs
diff options
context:
space:
mode:
authorSophie Forrest <git@sophieforrest.com>2024-08-30 23:35:45 +1200
committerSophie Forrest <git@sophieforrest.com>2024-08-30 23:35:45 +1200
commit5126c9ed83fe6169463566e74f966a4a63e57ca0 (patch)
tree99be8b093f6736eba59c529300519985d9c1c5c6 /src/parser.rs
feat: initial commit of brainf interpreter
Diffstat (limited to '')
-rw-r--r--src/parser.rs164
1 files changed, 164 insertions, 0 deletions
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..cffa6db
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,164 @@
+//! Parser implementation for Brainfuck. Parses operator codes into instruction
+//! sets.
+
+use miette::{Diagnostic, SourceSpan};
+use thiserror::Error;
+
+use crate::lexer::OperatorCode;
+
+/// Parsed instructions for Brainfuck.
+#[derive(Clone, Debug)]
+pub enum Instruction {
+	/// `>`
+	///
+	/// Increment the data pointer by one (to point to the next cell to the
+	/// right).
+	IncrementPointer,
+
+	/// `<`
+	///
+	/// Decrement the data pointer by one (to point to the next cell to the
+	/// left).
+	DecrementPointer,
+
+	/// `+`
+	///
+	/// Increment the byte at the data pointer by one.
+	IncrementByte,
+
+	/// `-`
+	///
+	/// Decrement the byte at the data pointer by one.
+	DecrementByte,
+
+	/// `.`
+	///
+	/// Output the byte at the data pointer.
+	OutputByte,
+
+	/// `,`
+	///
+	/// Accept one byte of input, storing its value in the byte at the data
+	/// pointer.
+	InputByte,
+
+	/// `[]`
+	///
+	/// Loops through the inner instructions.
+	Loop(Vec<Instruction>),
+}
+
+/// Error type for errors that occur during parsing from [`OperatorCode`]s to
+/// [`Instruction`]s.
+#[derive(Debug, Diagnostic, Error)]
+pub enum Error {
+	/// Parser encountered a loop with no beginning.
+	#[diagnostic(help("try closing the loop"))]
+	#[error("loop closed at {loop_src:?} has no beginning")]
+	LoopWithNoBeginning {
+		/// Source code associated with diagnostic
+		#[source_code]
+		input: String,
+
+		/// SourceSpan of the loop bracket.
+		#[label("loop ending")]
+		loop_src: SourceSpan,
+	},
+
+	/// Parser encountered a loop with no ending.
+	#[diagnostic(help("try closing the loop"))]
+	#[error("loop beginning at {loop_src:?} has no ending")]
+	LoopWithNoEnding {
+		/// Source code associated with diagnostic
+		#[source_code]
+		input: String,
+
+		/// SourceSpan of the loop bracket.
+		#[label("loop beginning")]
+		loop_src: SourceSpan,
+	},
+
+	/// Parser sliced out of bounds.
+	#[error("parser sliced out of bounds")]
+	SliceOutOfBounds(std::ops::Range<usize>),
+}
+
+/// Parses the operator codes into instruction codes.
+///
+/// # Parameters
+///
+/// * `src` - The source the operator codes originate from. This is used for
+///   error reporting.
+/// * `operator_codes` - The operator codes reveiced from the lexer.
+///
+/// # Errors
+///
+/// This function will return an error if a loop is encountered with no
+/// beginning, a loop is encountered with no ending, or if the parser attempts
+/// to slice out of bounds.
+pub fn parse(src: &str, operator_codes: &[OperatorCode]) -> Result<Vec<Instruction>, Error> {
+	let mut program: Vec<Instruction> = Vec::new();
+	let mut loop_stack: i32 = 0;
+	let mut loop_start = 0;
+	let mut loop_source_offset: usize = 0;
+
+	operator_codes
+		.iter()
+		.enumerate()
+		.try_for_each(|(i, operator_code)| -> Result<(), Error> {
+			match (loop_stack, *operator_code) {
+				(0i32, OperatorCode::StartLoop { offset }) => {
+					loop_start = i;
+					loop_source_offset = offset;
+					loop_stack += 1i32;
+				}
+				(0i32, _) => {
+					if let Some(instruction) = match *operator_code {
+						OperatorCode::IncrementPointer => Some(Instruction::IncrementPointer),
+						OperatorCode::DecrementPointer => Some(Instruction::DecrementPointer),
+						OperatorCode::IncrementByte => Some(Instruction::IncrementByte),
+						OperatorCode::DecrementByte => Some(Instruction::DecrementByte),
+						OperatorCode::OutputByte => Some(Instruction::OutputByte),
+						OperatorCode::InputByte => Some(Instruction::InputByte),
+						OperatorCode::EndLoop { offset } => {
+							return Err(Error::LoopWithNoBeginning {
+								input: src.to_owned(),
+								loop_src: (offset, 1).into(),
+							})
+						}
+						// We don't care about this variant as it is handled in a subsequent arm
+						OperatorCode::StartLoop { .. } => None,
+					} {
+						program.push(instruction);
+					}
+				}
+				(_, OperatorCode::StartLoop { .. }) => loop_stack += 1i32,
+				(_, OperatorCode::EndLoop { .. }) => {
+					loop_stack -= 1i32;
+					if loop_stack == 0i32 {
+						let loop_program = parse(
+							src,
+							match operator_codes.get(loop_start + 1..i) {
+								Some(value) => value,
+								None => return Err(Error::SliceOutOfBounds(loop_start + 1..i)),
+							},
+						)?;
+
+						program.push(Instruction::Loop(loop_program));
+					}
+				}
+				_ => (),
+			};
+
+			Ok(())
+		})?;
+
+	if loop_stack == 0i32 {
+		Ok(program)
+	} else {
+		Err(Error::LoopWithNoEnding {
+			input: src.to_owned(),
+			loop_src: (loop_source_offset, 1).into(),
+		})
+	}
+}