use std::fs; use Token::*; #[derive(Debug, PartialEq, Eq)] pub enum Token { IF(String), ELSE(String), INT(String), FLOAT(String), STRING(String), PLUS(String), MINUS(String), MULTIPLY(String), DIVIDE(String), ASSIGN(String), EQUALS(String), NOT_EQUALS(String), GREATER_THAN(String), LESS_THAN(String), GREATER_THAN_OR_EQUAL(String), LESS_THAN_OR_EQUAL(String), LBRACE(String), RBRACE(String), LBRACKET(String), RBRACKET(String), LPAR(String), RPAR(String), TRUE(String), FALSE(String), IDENT(String), NUMBER(String), SEMI(String), EOF } #[derive(Debug, PartialEq, Eq)] pub struct TokenErr { pub error: String } pub struct Lexer { chars: Vec, pos: usize, begins: Vec, tabs: Vec, column: usize, accepts: [usize; 50] } impl Lexer { pub fn new(fname: &str) -> Result> { let chars = fs::read_to_string(fname)? .chars() .collect(); let accepts = [ 0, 0, 0, 21, 22, 8, 6, 7, 9, 26, 27, 14, 10, 13, 25, 19, 20, 25, 25, 25, 25, 25, 17, 18, 25, 25, 23, 25, 25, 25, 25, 5, 1, 25, 3, 25, 25, 25, 25, 4, 25, 25, 24, 25, 25, 2, 15, 11, 16, 12, ]; return Ok(Lexer { chars, pos: 0, begins: vec![0; 1], tabs: Vec::new(), column: 0, accepts }); } fn advance(&mut self) -> char { let c = self.chars[self.pos]; match c { '\n' => { self.column = 0; self.begins.push(self.pos + 1); }, '\t' => { self.tabs.push(self.column); self.column += 4 - (self.column % 4); } _ => self.column += 1 } self.pos += 1; return c; } fn retract(&mut self) { self.pos -= 1; let c = self.chars[self.pos]; match c { '\n' => { self.begins.pop(); self.column = self.pos - self.begins[self.begins.len() - 1]; } '\t' => { self.column = self.tabs.pop().unwrap(); } _ => self.column -= 1 } } pub fn next(&mut self) -> Result { if self.pos == self.chars.len() { return Ok(EOF); } let mut stk: Vec = Vec::new(); let mut chars: Vec = Vec::new(); let mut state: usize = 0; loop { if self.pos == self.chars.len() { break; } let c = self.advance(); state = match state { 0 => match c { '\t' => continue, '\n' => continue, '\r' => continue, ' ' => continue, '!' => 2, '(' => 3, ')' => 4, '*' => 5, '+' => 6, '-' => 7, '/' => 8, '0'..='9' => 9, ';' => 10, '<' => 11, '=' => 12, '>' => 13, 'A'..='Z' => 14, '[' => 15, ']' => 16, 'a'..='d' => 14, 'e' => 17, 'f' => 18, 'g' | 'h' => 14, 'i' => 19, 'j'..='r' => 14, 's' => 20, 't' => 21, 'u'..='z' => 14, '{' => 22, '}' => 23, _ => 1 }, 1 => { stk.push(state); chars.push(c); break; } 2 => match c { '=' => 49, _ => 1 }, 3 => match c { _ => 1 }, 4 => match c { _ => 1 }, 5 => match c { _ => 1 }, 6 => match c { _ => 1 }, 7 => match c { _ => 1 }, 8 => match c { _ => 1 }, 9 => match c { '0'..='9' => 9, _ => 1 }, 10 => match c { _ => 1 }, 11 => match c { '=' => 48, _ => 1 }, 12 => match c { '=' => 47, _ => 1 }, 13 => match c { '=' => 46, _ => 1 }, 14 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='z' => 14, _ => 1 }, 15 => match c { _ => 1 }, 16 => match c { _ => 1 }, 17 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='k' => 14, 'l' => 43, 'm'..='z' => 14, _ => 1 }, 18 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a' => 35, 'b'..='k' => 14, 'l' => 36, 'm'..='z' => 14, _ => 1 }, 19 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='e' => 14, 'f' => 32, 'g'..='m' => 14, 'n' => 33, 'o'..='z' => 14, _ => 1 }, 20 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='s' => 14, 't' => 27, 'u'..='z' => 14, _ => 1 }, 21 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='q' => 14, 'r' => 24, 's'..='z' => 14, _ => 1 }, 22 => match c { _ => 1 }, 23 => match c { _ => 1 }, 24 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='t' => 14, 'u' => 25, 'v'..='z' => 14, _ => 1 }, 25 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='d' => 14, 'e' => 26, 'f'..='z' => 14, _ => 1 }, 26 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='z' => 14, _ => 1 }, 27 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='q' => 14, 'r' => 28, 's'..='z' => 14, _ => 1 }, 28 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='h' => 14, 'i' => 29, 'j'..='z' => 14, _ => 1 }, 29 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='m' => 14, 'n' => 30, 'o'..='z' => 14, _ => 1 }, 30 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='f' => 14, 'g' => 31, 'h'..='z' => 14, _ => 1 }, 31 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='z' => 14, _ => 1 }, 32 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='z' => 14, _ => 1 }, 33 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='s' => 14, 't' => 34, 'u'..='z' => 14, _ => 1 }, 34 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='z' => 14, _ => 1 }, 35 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='k' => 14, 'l' => 40, 'm'..='z' => 14, _ => 1 }, 36 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='n' => 14, 'o' => 37, 'p'..='z' => 14, _ => 1 }, 37 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a' => 38, 'b'..='z' => 14, _ => 1 }, 38 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='s' => 14, 't' => 39, 'u'..='z' => 14, _ => 1 }, 39 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='z' => 14, _ => 1 }, 40 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='r' => 14, 's' => 41, 't'..='z' => 14, _ => 1 }, 41 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='d' => 14, 'e' => 42, 'f'..='z' => 14, _ => 1 }, 42 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='z' => 14, _ => 1 }, 43 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='r' => 14, 's' => 44, 't'..='z' => 14, _ => 1 }, 44 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='d' => 14, 'e' => 45, 'f'..='z' => 14, _ => 1 }, 45 => match c { '0'..='9' => 14, 'A'..='Z' => 14, 'a'..='z' => 14, _ => 1 }, 46 => match c { _ => 1 }, 47 => match c { _ => 1 }, 48 => match c { _ => 1 }, 49 => match c { _ => 1 }, _ => panic!("Invalid State!") }; stk.push(state); chars.push(c); } while stk.len() > 0 && self.accepts[stk[stk.len() - 1]] == 0 { stk.pop().unwrap(); chars.pop().unwrap(); self.retract(); } if stk.len() == 0 { let start = self.begins[self.begins.len() - 1]; let error_line: String = self.chars[start..] .iter() .take_while(|&&c| c != '\n') .collect(); return Err(TokenErr{error: format!( "Failed to lex from: \n{}\n{}^", error_line, " ".repeat(self.column) )}); } let word : String = chars.iter().collect(); match self.accepts[stk[stk.len() - 1]] { 1 => return Ok(IF(word)), 2 => return Ok(ELSE(word)), 3 => return Ok(INT(word)), 4 => return Ok(FLOAT(word)), 5 => return Ok(STRING(word)), 6 => return Ok(PLUS(word)), 7 => return Ok(MINUS(word)), 8 => return Ok(MULTIPLY(word)), 9 => return Ok(DIVIDE(word)), 10 => return Ok(ASSIGN(word)), 11 => return Ok(EQUALS(word)), 12 => return Ok(NOT_EQUALS(word)), 13 => return Ok(GREATER_THAN(word)), 14 => return Ok(LESS_THAN(word)), 15 => return Ok(GREATER_THAN_OR_EQUAL(word)), 16 => return Ok(LESS_THAN_OR_EQUAL(word)), 17 => return Ok(LBRACE(word)), 18 => return Ok(RBRACE(word)), 19 => return Ok(LBRACKET(word)), 20 => return Ok(RBRACKET(word)), 21 => return Ok(LPAR(word)), 22 => return Ok(RPAR(word)), 23 => return Ok(TRUE(word)), 24 => return Ok(FALSE(word)), 25 => return Ok(IDENT(word)), 26 => return Ok(NUMBER(word)), 27 => return Ok(SEMI(word)), _ => panic!("Invalid Accepting State") } } }