summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorSophie Forrest <git@sophieforrest.com>2024-08-30 23:35:45 +1200
committerSophie Forrest <git@sophieforrest.com>2024-08-30 23:35:45 +1200
commit5126c9ed83fe6169463566e74f966a4a63e57ca0 (patch)
tree99be8b093f6736eba59c529300519985d9c1c5c6 /src
feat: initial commit of brainf interpreter
Diffstat (limited to '')
-rw-r--r--src/constants.rs4
-rw-r--r--src/executor.rs89
-rw-r--r--src/lexer.rs79
-rw-r--r--src/lib.rs154
-rw-r--r--src/main.rs125
-rw-r--r--src/parser.rs164
-rw-r--r--src/utility.rs47
7 files changed, 662 insertions, 0 deletions
diff --git a/src/constants.rs b/src/constants.rs
new file mode 100644
index 0000000..6651b22
--- /dev/null
+++ b/src/constants.rs
@@ -0,0 +1,4 @@
+//! Constants and types used throughout the Brainfuck interpreter.
+
+/// The inner type used by a Brainfuck tape.
+pub type TapeInner = u8;
diff --git a/src/executor.rs b/src/executor.rs
new file mode 100644
index 0000000..f368ea9
--- /dev/null
+++ b/src/executor.rs
@@ -0,0 +1,89 @@
+//! Executor implementation for Brainfuck.
+
+use std::io::Read;
+
+use miette::Diagnostic;
+use thiserror::Error;
+
+use crate::{constants::TapeInner, parser::Instruction};
+
+/// Runtime errors that can occur in brainfuck executor.
+#[derive(Debug, Diagnostic, Error)]
+pub enum Error {
+	/// Brainfuck code performed an out of bounds index on the tape during
+	/// runtime.
+	#[error("tape indexed out of bounds, attempted index at `{0}`")]
+	IndexOutOfBounds(usize),
+
+	/// Executor was unable to read an input byte from stdin.
+	#[error("could not read input from stdin")]
+	ReadInput(#[from] std::io::Error),
+}
+
+/// Executes the provided instruction set, utilising the provided tape..
+///
+/// # Errors
+///
+/// This function will return an error if the Brainfuck code indexes out of
+/// bounds of the tape, or if the executor cannot read an input byte from stdin.
+pub fn execute(
+	instructions: &[Instruction],
+	tape: &mut [TapeInner],
+	data_pointer: &mut usize,
+) -> Result<(), Error> {
+	for instruction in instructions {
+		match *instruction {
+			Instruction::IncrementPointer => {
+				let tape_len: usize = tape.len() - 1;
+
+				if *data_pointer == tape_len {
+					*data_pointer = 0;
+				} else {
+					*data_pointer += 1;
+				}
+			}
+			Instruction::DecrementPointer => {
+				*data_pointer = match *data_pointer {
+					0 => tape.len() - 1,
+					_ => *data_pointer - 1,
+				};
+			}
+			Instruction::IncrementByte => match tape.get_mut(*data_pointer) {
+				Some(value) => *value = value.overflowing_add(1).0,
+				None => return Err(Error::IndexOutOfBounds(*data_pointer)),
+			},
+			Instruction::DecrementByte => match tape.get_mut(*data_pointer) {
+				Some(value) => *value = value.overflowing_sub(1).0,
+				None => return Err(Error::IndexOutOfBounds(*data_pointer)),
+			},
+			Instruction::OutputByte => print!(
+				"{}",
+				char::from(match tape.get(*data_pointer) {
+					Some(value) => *value,
+					None => return Err(Error::IndexOutOfBounds(*data_pointer)),
+				})
+			),
+			Instruction::InputByte => {
+				let mut input: [TapeInner; 1] = [0; 1];
+
+				std::io::stdin().read_exact(&mut input)?;
+
+				match tape.get_mut(*data_pointer) {
+					Some(value) => *value = input[0],
+					None => return Err(Error::IndexOutOfBounds(*data_pointer)),
+				};
+			}
+			Instruction::Loop(ref instructions) => {
+				while match tape.get(*data_pointer) {
+					Some(value) => *value,
+					None => return Err(Error::IndexOutOfBounds(*data_pointer)),
+				} != 0
+				{
+					execute(instructions, tape, data_pointer)?;
+				}
+			}
+		}
+	}
+
+	Ok(())
+}
diff --git a/src/lexer.rs b/src/lexer.rs
new file mode 100644
index 0000000..786c873
--- /dev/null
+++ b/src/lexer.rs
@@ -0,0 +1,79 @@
+//! Lexer for Brainfuck
+
+/// List of operator codes for the lexer
+/// Note: Any input symbol that is not in this list is a comment
+#[derive(Clone, Copy, Debug)]
+pub enum OperatorCode {
+	/// `>`
+	///
+	/// Increment the data pointer by one (to point to the next cell to the
+	/// right).
+	IncrementPointer,
+
+	/// `<`
+	///
+	/// Decrement the data pointer by one (to point to the next cell to the
+	/// left).
+	DecrementPointer,
+
+	/// `+`
+	///
+	/// Increment the byte at the data pointer by one.
+	IncrementByte,
+
+	/// `-`
+	///
+	/// Decrement the byte at the data pointer by one.
+	DecrementByte,
+
+	/// `.`
+	///
+	/// Output the byte at the data pointer.
+	OutputByte,
+
+	/// `,`
+	///
+	/// Accept one byte of input, storing its value in the byte at the data
+	/// pointer.
+	InputByte,
+
+	/// `[`
+	///
+	/// If the byte at the data pointer is zero, then instead of moving the
+	/// instruction pointer forward to the next command, jump it forward to the
+	/// command after the matching ] command.
+	StartLoop {
+		/// Offset of the bracket in the source.
+		offset: usize,
+	},
+
+	/// `]`
+	///
+	/// If the byte at the data pointer is nonzero, then instead of moving the
+	/// instruction pointer forward to the next command, jump it back to the
+	/// command after the matching [ command.
+	EndLoop {
+		/// Offset of the bracket in the source.
+		offset: usize,
+	},
+}
+
+/// Perform lexical analysis on the input brainfuck code
+#[must_use]
+pub fn lex(input: &str) -> Vec<OperatorCode> {
+	input
+		.char_indices()
+		.filter_map(|(i, symbol)| match symbol {
+			'>' => Some(OperatorCode::IncrementPointer),
+			'<' => Some(OperatorCode::DecrementPointer),
+			'+' => Some(OperatorCode::IncrementByte),
+			'-' => Some(OperatorCode::DecrementByte),
+			'.' => Some(OperatorCode::OutputByte),
+			',' => Some(OperatorCode::InputByte),
+			'[' => Some(OperatorCode::StartLoop { offset: i }),
+			']' => Some(OperatorCode::EndLoop { offset: i }),
+			// Any symbol that does not match one of the above is a comment
+			_ => None,
+		})
+		.collect()
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..725084a
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,154 @@
+#![allow(incomplete_features)]
+#![feature(async_fn_in_trait)]
+#![feature(custom_inner_attributes)]
+#![feature(lint_reasons)]
+#![feature(never_type)]
+#![feature(once_cell)]
+#![feature(test)]
+#![deny(clippy::complexity)]
+#![deny(clippy::nursery)]
+#![deny(clippy::pedantic)]
+#![deny(clippy::perf)]
+#![deny(clippy::suspicious)]
+#![deny(clippy::alloc_instead_of_core)]
+#![deny(clippy::as_underscore)]
+#![deny(clippy::clone_on_ref_ptr)]
+#![deny(clippy::create_dir)]
+#![warn(clippy::dbg_macro)]
+#![deny(clippy::default_numeric_fallback)]
+#![deny(clippy::default_union_representation)]
+#![deny(clippy::deref_by_slicing)]
+#![deny(clippy::empty_structs_with_brackets)]
+#![deny(clippy::exit)]
+#![deny(clippy::expect_used)]
+#![deny(clippy::filetype_is_file)]
+#![deny(clippy::fn_to_numeric_cast)]
+#![deny(clippy::format_push_string)]
+#![deny(clippy::get_unwrap)]
+#![deny(clippy::if_then_some_else_none)]
+#![allow(
+	clippy::implicit_return,
+	reason = "returns should be done implicitly, not explicitly"
+)]
+#![deny(clippy::indexing_slicing)]
+#![deny(clippy::large_include_file)]
+#![deny(clippy::let_underscore_must_use)]
+#![deny(clippy::lossy_float_literal)]
+#![deny(clippy::map_err_ignore)]
+#![deny(clippy::mem_forget)]
+#![deny(clippy::missing_docs_in_private_items)]
+#![deny(clippy::missing_trait_methods)]
+#![deny(clippy::mod_module_files)]
+#![deny(clippy::multiple_inherent_impl)]
+#![deny(clippy::mutex_atomic)]
+#![deny(clippy::needless_return)]
+#![deny(clippy::non_ascii_literal)]
+#![deny(clippy::panic_in_result_fn)]
+#![deny(clippy::pattern_type_mismatch)]
+#![deny(clippy::rc_buffer)]
+#![deny(clippy::rc_mutex)]
+#![deny(clippy::rest_pat_in_fully_bound_structs)]
+#![deny(clippy::same_name_method)]
+#![deny(clippy::separated_literal_suffix)]
+#![deny(clippy::str_to_string)]
+#![deny(clippy::string_add)]
+#![deny(clippy::string_slice)]
+#![deny(clippy::string_to_string)]
+#![allow(
+	clippy::tabs_in_doc_comments,
+	reason = "tabs are preferred for this project"
+)]
+#![deny(clippy::try_err)]
+#![deny(clippy::undocumented_unsafe_blocks)]
+#![deny(clippy::unnecessary_self_imports)]
+#![deny(clippy::unneeded_field_pattern)]
+#![deny(clippy::unwrap_in_result)]
+#![deny(clippy::unwrap_used)]
+#![warn(clippy::use_debug)]
+#![deny(clippy::verbose_file_reads)]
+#![deny(clippy::wildcard_dependencies)]
+#![deny(clippy::wildcard_enum_match_arm)]
+#![deny(missing_copy_implementations)]
+#![deny(missing_debug_implementations)]
+#![deny(missing_docs)]
+#![deny(single_use_lifetimes)]
+#![deny(unsafe_code)]
+#![deny(unused)]
+
+//! Brainfuck RS
+//!
+//! Implementation of a Brainfuck interpreter written in Rust.
+
+#[cfg(test)]
+extern crate test;
+
+mod constants;
+pub mod executor;
+pub mod lexer;
+pub mod parser;
+#[cfg(feature = "utilities")]
+pub mod utility;
+
+pub use executor::execute;
+pub use lexer::{lex, OperatorCode};
+use miette::Diagnostic;
+pub use parser::{parse, Instruction};
+use thiserror::Error;
+
+/// Top-level error type for Brainfuck interpreter.
+#[derive(Debug, Diagnostic, Error)]
+pub enum Error {
+	/// Error occurred when reading input from a file.
+	#[error(transparent)]
+	Io(#[from] std::io::Error),
+
+	/// An error that occurred while parsing Brainfuck code.
+	#[diagnostic(transparent)]
+	#[error(transparent)]
+	Parser(#[from] parser::Error),
+
+	/// An error that occurred during runtime.
+	#[error(transparent)]
+	Runtime(#[from] executor::Error),
+}
+
+#[cfg(test)]
+mod tests {
+	use test::Bencher;
+
+	use super::*;
+	use crate::constants::TapeInner;
+
+	#[test]
+	fn hello_world() -> Result<(), Error> {
+		let mut tape: Vec<TapeInner> = vec![0; 1024];
+
+		utility::execute_from_file("./test_programs/hello_world.bf", &mut tape)?;
+
+		Ok(())
+	}
+
+	#[bench]
+	fn hello_world_from_hell(b: &mut Bencher) {
+		b.iter(|| {
+			let mut tape: Vec<TapeInner> = vec![0; 1024];
+
+			#[allow(clippy::expect_used)]
+			utility::execute_from_file("./test_programs/hello_world.bf", &mut tape)
+				.expect("failed to run");
+		});
+	}
+
+	#[test]
+	fn hello_world_short() -> Result<(), Error> {
+		let mut tape: Vec<TapeInner> = vec![0; 1024];
+
+		utility::execute_from_str(
+			"--[+++++++<---->>-->+>+>+<<<<]<.>++++[-<++++>>->--<<]>>-.>--..>+.<<<.<<-.>>+>->>.\
+			 +++[.<]",
+			&mut tape,
+		)?;
+
+		Ok(())
+	}
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..5412735
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,125 @@
+#![allow(incomplete_features)]
+#![feature(async_fn_in_trait)]
+#![feature(custom_inner_attributes)]
+#![feature(lint_reasons)]
+#![feature(never_type)]
+#![feature(once_cell)]
+#![deny(clippy::complexity)]
+#![deny(clippy::nursery)]
+#![deny(clippy::pedantic)]
+#![deny(clippy::perf)]
+#![deny(clippy::suspicious)]
+#![deny(clippy::alloc_instead_of_core)]
+#![deny(clippy::as_underscore)]
+#![deny(clippy::clone_on_ref_ptr)]
+#![deny(clippy::create_dir)]
+#![warn(clippy::dbg_macro)]
+#![deny(clippy::default_numeric_fallback)]
+#![deny(clippy::default_union_representation)]
+#![deny(clippy::deref_by_slicing)]
+#![deny(clippy::empty_structs_with_brackets)]
+#![deny(clippy::exit)]
+#![deny(clippy::filetype_is_file)]
+#![deny(clippy::fn_to_numeric_cast)]
+#![deny(clippy::format_push_string)]
+#![deny(clippy::get_unwrap)]
+#![deny(clippy::if_then_some_else_none)]
+#![allow(
+	clippy::implicit_return,
+	reason = "returns should be done implicitly, not explicitly"
+)]
+#![deny(clippy::indexing_slicing)]
+#![deny(clippy::large_include_file)]
+#![deny(clippy::let_underscore_must_use)]
+#![deny(clippy::lossy_float_literal)]
+#![deny(clippy::map_err_ignore)]
+#![deny(clippy::mem_forget)]
+#![deny(clippy::missing_docs_in_private_items)]
+#![deny(clippy::missing_trait_methods)]
+#![deny(clippy::mod_module_files)]
+#![deny(clippy::multiple_inherent_impl)]
+#![deny(clippy::mutex_atomic)]
+#![deny(clippy::needless_return)]
+#![deny(clippy::non_ascii_literal)]
+#![deny(clippy::panic_in_result_fn)]
+#![deny(clippy::pattern_type_mismatch)]
+#![deny(clippy::rc_buffer)]
+#![deny(clippy::rc_mutex)]
+#![deny(clippy::rest_pat_in_fully_bound_structs)]
+#![deny(clippy::same_name_method)]
+#![deny(clippy::separated_literal_suffix)]
+#![deny(clippy::str_to_string)]
+#![deny(clippy::string_add)]
+#![deny(clippy::string_slice)]
+#![deny(clippy::string_to_string)]
+#![allow(
+	clippy::tabs_in_doc_comments,
+	reason = "tabs are preferred for this project"
+)]
+#![deny(clippy::try_err)]
+#![deny(clippy::undocumented_unsafe_blocks)]
+#![deny(clippy::unnecessary_self_imports)]
+#![deny(clippy::unneeded_field_pattern)]
+#![deny(clippy::unwrap_in_result)]
+#![deny(clippy::unwrap_used)]
+#![warn(clippy::use_debug)]
+#![deny(clippy::verbose_file_reads)]
+#![deny(clippy::wildcard_dependencies)]
+#![deny(clippy::wildcard_enum_match_arm)]
+#![deny(missing_copy_implementations)]
+#![deny(missing_debug_implementations)]
+#![deny(missing_docs)]
+#![deny(single_use_lifetimes)]
+#![deny(unsafe_code)]
+#![deny(unused)]
+
+//! Brainfuck RS
+//!
+//! Brainfuck interpreter written in Rust
+
+use std::path::PathBuf;
+
+use brainf_rs::utility::{execute_from_file, execute_from_str};
+use clap::{Parser, Subcommand};
+use miette::Context;
+
+/// Representation of command line application for the Brainfuck interpreter.
+#[derive(Parser)]
+#[command(author, version, about, long_about = None)]
+#[command(propagate_version = true)]
+struct App {
+	/// Subcommands supported by the Brainfuck interpreter
+	#[command(subcommand)]
+	command: Command,
+}
+
+/// Implementation of subcommands for the Brainfuck interpreter.
+#[derive(Subcommand)]
+enum Command {
+	/// Interprets Brainfuck code from a provided file.
+	File {
+		/// Path to the file to interpret with Brainfuck.
+		path: PathBuf,
+	},
+
+	/// Interprets Brainfuck code from a text input on the command line.
+	Text {
+		/// Text input to be interpreted as Brainfuck.
+		input: String,
+	},
+}
+
+fn main() -> miette::Result<()> {
+	let app = App::parse();
+
+	let mut tape = vec![0u8; 1024];
+
+	match app.command {
+		Command::File { ref path } => {
+			execute_from_file(path, &mut tape).wrap_err("when executing from file")?;
+		}
+		Command::Text { ref input } => execute_from_str(input, &mut tape)?,
+	};
+
+	Ok(())
+}
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..cffa6db
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,164 @@
+//! Parser implementation for Brainfuck. Parses operator codes into instruction
+//! sets.
+
+use miette::{Diagnostic, SourceSpan};
+use thiserror::Error;
+
+use crate::lexer::OperatorCode;
+
+/// Parsed instructions for Brainfuck.
+#[derive(Clone, Debug)]
+pub enum Instruction {
+	/// `>`
+	///
+	/// Increment the data pointer by one (to point to the next cell to the
+	/// right).
+	IncrementPointer,
+
+	/// `<`
+	///
+	/// Decrement the data pointer by one (to point to the next cell to the
+	/// left).
+	DecrementPointer,
+
+	/// `+`
+	///
+	/// Increment the byte at the data pointer by one.
+	IncrementByte,
+
+	/// `-`
+	///
+	/// Decrement the byte at the data pointer by one.
+	DecrementByte,
+
+	/// `.`
+	///
+	/// Output the byte at the data pointer.
+	OutputByte,
+
+	/// `,`
+	///
+	/// Accept one byte of input, storing its value in the byte at the data
+	/// pointer.
+	InputByte,
+
+	/// `[]`
+	///
+	/// Loops through the inner instructions.
+	Loop(Vec<Instruction>),
+}
+
+/// Error type for errors that occur during parsing from [`OperatorCode`]s to
+/// [`Instruction`]s.
+#[derive(Debug, Diagnostic, Error)]
+pub enum Error {
+	/// Parser encountered a loop with no beginning.
+	#[diagnostic(help("try closing the loop"))]
+	#[error("loop closed at {loop_src:?} has no beginning")]
+	LoopWithNoBeginning {
+		/// Source code associated with diagnostic
+		#[source_code]
+		input: String,
+
+		/// SourceSpan of the loop bracket.
+		#[label("loop ending")]
+		loop_src: SourceSpan,
+	},
+
+	/// Parser encountered a loop with no ending.
+	#[diagnostic(help("try closing the loop"))]
+	#[error("loop beginning at {loop_src:?} has no ending")]
+	LoopWithNoEnding {
+		/// Source code associated with diagnostic
+		#[source_code]
+		input: String,
+
+		/// SourceSpan of the loop bracket.
+		#[label("loop beginning")]
+		loop_src: SourceSpan,
+	},
+
+	/// Parser sliced out of bounds.
+	#[error("parser sliced out of bounds")]
+	SliceOutOfBounds(std::ops::Range<usize>),
+}
+
+/// Parses the operator codes into instruction codes.
+///
+/// # Parameters
+///
+/// * `src` - The source the operator codes originate from. This is used for
+///   error reporting.
+/// * `operator_codes` - The operator codes reveiced from the lexer.
+///
+/// # Errors
+///
+/// This function will return an error if a loop is encountered with no
+/// beginning, a loop is encountered with no ending, or if the parser attempts
+/// to slice out of bounds.
+pub fn parse(src: &str, operator_codes: &[OperatorCode]) -> Result<Vec<Instruction>, Error> {
+	let mut program: Vec<Instruction> = Vec::new();
+	let mut loop_stack: i32 = 0;
+	let mut loop_start = 0;
+	let mut loop_source_offset: usize = 0;
+
+	operator_codes
+		.iter()
+		.enumerate()
+		.try_for_each(|(i, operator_code)| -> Result<(), Error> {
+			match (loop_stack, *operator_code) {
+				(0i32, OperatorCode::StartLoop { offset }) => {
+					loop_start = i;
+					loop_source_offset = offset;
+					loop_stack += 1i32;
+				}
+				(0i32, _) => {
+					if let Some(instruction) = match *operator_code {
+						OperatorCode::IncrementPointer => Some(Instruction::IncrementPointer),
+						OperatorCode::DecrementPointer => Some(Instruction::DecrementPointer),
+						OperatorCode::IncrementByte => Some(Instruction::IncrementByte),
+						OperatorCode::DecrementByte => Some(Instruction::DecrementByte),
+						OperatorCode::OutputByte => Some(Instruction::OutputByte),
+						OperatorCode::InputByte => Some(Instruction::InputByte),
+						OperatorCode::EndLoop { offset } => {
+							return Err(Error::LoopWithNoBeginning {
+								input: src.to_owned(),
+								loop_src: (offset, 1).into(),
+							})
+						}
+						// We don't care about this variant as it is handled in a subsequent arm
+						OperatorCode::StartLoop { .. } => None,
+					} {
+						program.push(instruction);
+					}
+				}
+				(_, OperatorCode::StartLoop { .. }) => loop_stack += 1i32,
+				(_, OperatorCode::EndLoop { .. }) => {
+					loop_stack -= 1i32;
+					if loop_stack == 0i32 {
+						let loop_program = parse(
+							src,
+							match operator_codes.get(loop_start + 1..i) {
+								Some(value) => value,
+								None => return Err(Error::SliceOutOfBounds(loop_start + 1..i)),
+							},
+						)?;
+
+						program.push(Instruction::Loop(loop_program));
+					}
+				}
+				_ => (),
+			};
+
+			Ok(())
+		})?;
+
+	if loop_stack == 0i32 {
+		Ok(program)
+	} else {
+		Err(Error::LoopWithNoEnding {
+			input: src.to_owned(),
+			loop_src: (loop_source_offset, 1).into(),
+		})
+	}
+}
diff --git a/src/utility.rs b/src/utility.rs
new file mode 100644
index 0000000..a903273
--- /dev/null
+++ b/src/utility.rs
@@ -0,0 +1,47 @@
+//! Utility functions for working with the Brainfuck interpreter.
+
+use std::path::Path;
+
+use crate::{constants::TapeInner, execute, lex, parse, Error};
+
+/// Utility function to execute a Brainfuck file. Lexes, parses and executes the
+/// input file.
+///
+/// # Errors
+///
+/// This function will return an error if reading the input file, parsing or
+/// execution fails. See documentation for [`crate::parser::parse`] and
+/// [`crate::executor::execute`].
+pub fn execute_from_file(path: impl AsRef<Path>, tape: &mut [TapeInner]) -> Result<(), Error> {
+	let input = fs_err::read_to_string(path.as_ref())?;
+
+	let operator_codes = lex(&input);
+
+	let instructions = parse(&input, &operator_codes)?;
+
+	let mut data_pointer = 0;
+
+	execute(&instructions, tape, &mut data_pointer)?;
+
+	Ok(())
+}
+
+/// Utility function to execute Brainfuck code. Lexes, parses and executes the
+/// input.
+///
+/// # Errors
+///
+/// This function will return an error if parsing or
+/// execution fails. See documentation for [`crate::parser::parse`] and
+/// [`crate::executor::execute`].
+pub fn execute_from_str(input: &str, tape: &mut [TapeInner]) -> Result<(), Error> {
+	let operator_codes = lex(input);
+
+	let instructions = parse(input, &operator_codes)?;
+
+	let mut data_pointer = 0;
+
+	execute(&instructions, tape, &mut data_pointer)?;
+
+	Ok(())
+}