summary refs log tree commit diff
path: root/src/lexer.rs
blob: 786c873710f0e53a51b28e13fc36a82d4e221e9c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
//! Lexer for Brainfuck

/// List of operator codes for the lexer
/// Note: Any input symbol that is not in this list is a comment
#[derive(Clone, Copy, Debug)]
pub enum OperatorCode {
	/// `>`
	///
	/// Increment the data pointer by one (to point to the next cell to the
	/// right).
	IncrementPointer,

	/// `<`
	///
	/// Decrement the data pointer by one (to point to the next cell to the
	/// left).
	DecrementPointer,

	/// `+`
	///
	/// Increment the byte at the data pointer by one.
	IncrementByte,

	/// `-`
	///
	/// Decrement the byte at the data pointer by one.
	DecrementByte,

	/// `.`
	///
	/// Output the byte at the data pointer.
	OutputByte,

	/// `,`
	///
	/// Accept one byte of input, storing its value in the byte at the data
	/// pointer.
	InputByte,

	/// `[`
	///
	/// If the byte at the data pointer is zero, then instead of moving the
	/// instruction pointer forward to the next command, jump it forward to the
	/// command after the matching ] command.
	StartLoop {
		/// Offset of the bracket in the source.
		offset: usize,
	},

	/// `]`
	///
	/// If the byte at the data pointer is nonzero, then instead of moving the
	/// instruction pointer forward to the next command, jump it back to the
	/// command after the matching [ command.
	EndLoop {
		/// Offset of the bracket in the source.
		offset: usize,
	},
}

/// Perform lexical analysis on the input brainfuck code
#[must_use]
pub fn lex(input: &str) -> Vec<OperatorCode> {
	input
		.char_indices()
		.filter_map(|(i, symbol)| match symbol {
			'>' => Some(OperatorCode::IncrementPointer),
			'<' => Some(OperatorCode::DecrementPointer),
			'+' => Some(OperatorCode::IncrementByte),
			'-' => Some(OperatorCode::DecrementByte),
			'.' => Some(OperatorCode::OutputByte),
			',' => Some(OperatorCode::InputByte),
			'[' => Some(OperatorCode::StartLoop { offset: i }),
			']' => Some(OperatorCode::EndLoop { offset: i }),
			// Any symbol that does not match one of the above is a comment
			_ => None,
		})
		.collect()
}