summary refs log tree commit diff
path: root/crates/brainf_lexer
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--crates/brainf_lexer/Cargo.toml8
-rw-r--r--crates/brainf_lexer/src/lexer.rs (renamed from src/lexer.rs)58
-rw-r--r--crates/brainf_lexer/src/lib.rs105
3 files changed, 140 insertions, 31 deletions
diff --git a/crates/brainf_lexer/Cargo.toml b/crates/brainf_lexer/Cargo.toml
new file mode 100644
index 0000000..58be7a5
--- /dev/null
+++ b/crates/brainf_lexer/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "brainf_lexer"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+logos = "0.13.0"
+thiserror = "1.0.44"
diff --git a/src/lexer.rs b/crates/brainf_lexer/src/lexer.rs
index 786c873..b95cd87 100644
--- a/src/lexer.rs
+++ b/crates/brainf_lexer/src/lexer.rs
@@ -1,40 +1,60 @@
-//! Lexer for Brainfuck
+//! Lexer implementation using logos.
+
+#![expect(clippy::indexing_slicing)]
+
+use logos::{Lexer, Logos};
 
 /// List of operator codes for the lexer
 /// Note: Any input symbol that is not in this list is a comment
-#[derive(Clone, Copy, Debug)]
-pub enum OperatorCode {
+
+fn loop_callback(lex: &Lexer<Token>) -> (usize, usize) {
+	let span = lex.span();
+
+	(span.start, span.len())
+}
+
+/// List of Tokens for the lexer
+/// Note: Any input symbol that is not in this list is a comment
+#[derive(Clone, Copy, Debug, Logos, PartialEq, Eq)]
+#[logos(skip r"[^<>+\-.,\[\]]+")]
+pub enum Token {
 	/// `>`
 	///
 	/// Increment the data pointer by one (to point to the next cell to the
 	/// right).
+	#[token(">")]
 	IncrementPointer,
 
 	/// `<`
 	///
 	/// Decrement the data pointer by one (to point to the next cell to the
 	/// left).
+	#[token("<")]
 	DecrementPointer,
 
 	/// `+`
 	///
 	/// Increment the byte at the data pointer by one.
+	#[token("+")]
 	IncrementByte,
 
 	/// `-`
 	///
 	/// Decrement the byte at the data pointer by one.
+	#[token("-")]
 	DecrementByte,
 
 	/// `.`
 	///
 	/// Output the byte at the data pointer.
+	#[token(".")]
 	OutputByte,
 
 	/// `,`
 	///
 	/// Accept one byte of input, storing its value in the byte at the data
 	/// pointer.
+	#[token(",")]
 	InputByte,
 
 	/// `[`
@@ -42,38 +62,14 @@ pub enum OperatorCode {
 	/// If the byte at the data pointer is zero, then instead of moving the
 	/// instruction pointer forward to the next command, jump it forward to the
 	/// command after the matching ] command.
-	StartLoop {
-		/// Offset of the bracket in the source.
-		offset: usize,
-	},
+	#[token("[", loop_callback)]
+	StartLoop((usize, usize)),
 
 	/// `]`
 	///
 	/// If the byte at the data pointer is nonzero, then instead of moving the
 	/// instruction pointer forward to the next command, jump it back to the
 	/// command after the matching [ command.
-	EndLoop {
-		/// Offset of the bracket in the source.
-		offset: usize,
-	},
-}
-
-/// Perform lexical analysis on the input brainfuck code
-#[must_use]
-pub fn lex(input: &str) -> Vec<OperatorCode> {
-	input
-		.char_indices()
-		.filter_map(|(i, symbol)| match symbol {
-			'>' => Some(OperatorCode::IncrementPointer),
-			'<' => Some(OperatorCode::DecrementPointer),
-			'+' => Some(OperatorCode::IncrementByte),
-			'-' => Some(OperatorCode::DecrementByte),
-			'.' => Some(OperatorCode::OutputByte),
-			',' => Some(OperatorCode::InputByte),
-			'[' => Some(OperatorCode::StartLoop { offset: i }),
-			']' => Some(OperatorCode::EndLoop { offset: i }),
-			// Any symbol that does not match one of the above is a comment
-			_ => None,
-		})
-		.collect()
+	#[token("]", loop_callback)]
+	EndLoop((usize, usize)),
 }
diff --git a/crates/brainf_lexer/src/lib.rs b/crates/brainf_lexer/src/lib.rs
new file mode 100644
index 0000000..7f6e5be
--- /dev/null
+++ b/crates/brainf_lexer/src/lib.rs
@@ -0,0 +1,105 @@
+#![feature(lint_reasons)]
+#![deny(clippy::complexity)]
+#![deny(clippy::nursery)]
+#![deny(clippy::pedantic)]
+#![deny(clippy::perf)]
+#![deny(clippy::suspicious)]
+#![deny(clippy::alloc_instead_of_core)]
+#![deny(clippy::as_underscore)]
+#![deny(clippy::clone_on_ref_ptr)]
+#![deny(clippy::create_dir)]
+#![warn(clippy::dbg_macro)]
+#![deny(clippy::default_numeric_fallback)]
+#![deny(clippy::default_union_representation)]
+#![deny(clippy::deref_by_slicing)]
+#![deny(clippy::empty_structs_with_brackets)]
+#![deny(clippy::exit)]
+#![deny(clippy::expect_used)]
+#![deny(clippy::filetype_is_file)]
+#![deny(clippy::fn_to_numeric_cast)]
+#![deny(clippy::format_push_string)]
+#![deny(clippy::get_unwrap)]
+#![deny(clippy::if_then_some_else_none)]
+#![allow(
+	clippy::implicit_return,
+	reason = "returns should be done implicitly, not explicitly"
+)]
+#![deny(clippy::indexing_slicing)]
+#![deny(clippy::large_include_file)]
+#![deny(clippy::let_underscore_must_use)]
+#![deny(clippy::lossy_float_literal)]
+#![deny(clippy::map_err_ignore)]
+#![deny(clippy::mem_forget)]
+#![deny(clippy::missing_docs_in_private_items)]
+#![deny(clippy::missing_trait_methods)]
+#![deny(clippy::mod_module_files)]
+#![deny(clippy::multiple_inherent_impl)]
+#![deny(clippy::mutex_atomic)]
+#![deny(clippy::needless_return)]
+#![deny(clippy::non_ascii_literal)]
+#![deny(clippy::panic_in_result_fn)]
+#![deny(clippy::pattern_type_mismatch)]
+#![deny(clippy::rc_buffer)]
+#![deny(clippy::rc_mutex)]
+#![deny(clippy::rest_pat_in_fully_bound_structs)]
+#![deny(clippy::same_name_method)]
+#![deny(clippy::separated_literal_suffix)]
+#![deny(clippy::str_to_string)]
+#![deny(clippy::string_add)]
+#![deny(clippy::string_slice)]
+#![deny(clippy::string_to_string)]
+#![allow(
+	clippy::tabs_in_doc_comments,
+	reason = "tabs are preferred for this project"
+)]
+#![deny(clippy::try_err)]
+#![deny(clippy::undocumented_unsafe_blocks)]
+#![deny(clippy::unnecessary_self_imports)]
+#![deny(clippy::unneeded_field_pattern)]
+#![deny(clippy::unwrap_in_result)]
+#![deny(clippy::unwrap_used)]
+#![warn(clippy::use_debug)]
+#![deny(clippy::verbose_file_reads)]
+#![deny(clippy::wildcard_dependencies)]
+#![deny(clippy::wildcard_enum_match_arm)]
+#![deny(missing_copy_implementations)]
+#![deny(missing_debug_implementations)]
+#![deny(missing_docs)]
+#![deny(single_use_lifetimes)]
+#![deny(unsafe_code)]
+#![deny(unused)]
+
+//! # `brainf_lexer`
+//!
+//! Implementation of a Brainfuck lexer in Rust.
+
+mod lexer;
+
+pub use lexer::Token;
+use logos::Logos;
+use thiserror::Error;
+
+/// Error type for lexer.
+#[derive(Clone, Copy, Debug, Error)]
+pub enum Error {
+	/// Logos was unable to lex part of the input.
+	#[error("lexer was unable to lex input")]
+	LexingError,
+}
+
+/// Lexes the Brainfuck input, returning a Vec of Tokens.
+///
+/// # Errors
+///
+/// This function will return an error if the lexer is unable to lex one or more
+/// of the input characters.
+pub fn lex(input: &str) -> Result<Vec<Token>, Error> {
+	lexer::Token::lexer(input).try_fold(Vec::new(), |mut arr, result| {
+		result
+			.map_or(Err(()), |token| {
+				arr.push(token);
+				Ok(arr)
+			})
+			.map_err(|_err| Error::LexingError)
+	})
+}