|
| 1 | +#include "Lexer.h" |
| 2 | +#include <map> |
| 3 | + |
| 4 | +using namespace lexer; |
| 5 | + |
| 6 | +static const std::map<char, lexer::Lexer::TokenType> tokens = { |
| 7 | + {'(', Lexer::TokenType::TOK_LEFT_PARENTHESIS}, |
| 8 | + {')', Lexer::TokenType::TOK_RIGHT_PARENTHESIS}, |
| 9 | + {'[', Lexer::TokenType::TOK_LEFT_BRACKET}, |
| 10 | + {']', Lexer::TokenType::TOK_RIGHT_BRACKET}, |
| 11 | + {'{', Lexer::TokenType::TOK_LEFT_CURLY_BRACKET}, |
| 12 | + {'}', Lexer::TokenType::TOK_RIGHT_CURLY_BRACKET}, |
| 13 | +}; |
| 14 | + |
| 15 | +const Lexer::Token& Lexer::Lex() { |
| 16 | + SkipWhiteSpace(); |
| 17 | + char *start = m_start; |
| 18 | + m_last_token.m_type = InternalLex(); |
| 19 | + m_last_token.m_value = llvm::StringRef(start, m_start - start); |
| 20 | + return m_last_token; |
| 21 | +} |
| 22 | + |
| 23 | +void Lexer::SkipWhiteSpace() { |
| 24 | + while(*m_start == ' ' || *m_start == '\t') |
| 25 | + m_start++; |
| 26 | +} |
| 27 | + |
| 28 | +Lexer::TokenType Lexer::InternalLex() { |
| 29 | + if (m_start >= m_end) |
| 30 | + return TOK_EOF; |
| 31 | + /** |
| 32 | + * ascii |
| 33 | + * 000 nul 001 soh 002 stx 003 etx 004 eot 005 enq 006 ack 007 bel |
| 34 | + * 010 bs 011 ht 012 nl 013 vt 014 np 015 cr 016 so 017 si |
| 35 | + * 020 dle 021 dc1 022 dc2 023 dc3 024 dc4 025 nak 026 syn 027 etb |
| 36 | + * 030 can 031 em 032 sub 033 esc 034 fs 035 gs 036 rs 037 us |
| 37 | + * 040 sp 041 ! 042 " 043 # 044 $ 045 % 046 & 047 ' |
| 38 | + * 050 ( 051 ) 052 * 053 + 054 , 055 - 056 . 057 / |
| 39 | + * 060 0 061 1 062 2 063 3 064 4 065 5 066 6 067 7 |
| 40 | + * 070 8 071 9 072 : 073 ; 074 < 075 = 076 > 077 ? |
| 41 | + * 100 @ 101 A 102 B 103 C 104 D 105 E 106 F 107 G |
| 42 | + * 110 H 111 I 112 J 113 K 114 L 115 M 116 N 117 O |
| 43 | + * 120 P 121 Q 122 R 123 S 124 T 125 U 126 V 127 W |
| 44 | + * 130 X 131 Y 132 Z 133 [ 134 \ 135 ] 136 ^ 137 _ |
| 45 | + * 140 ` 141 a 142 b 143 c 144 d 145 e 146 f 147 g |
| 46 | + * 150 h 151 i 152 j 153 k 154 l 155 m 156 n 157 o |
| 47 | + * 160 p 161 q 162 r 163 s 164 t 165 u 166 v 167 w |
| 48 | + * 170 x 171 y 172 z 173 { 174 | 175 } 176 ~ 177 del |
| 49 | + */ |
| 50 | + switch (*m_start) { |
| 51 | + case '!': |
| 52 | + case '#'...'&': |
| 53 | + case '('...'/': |
| 54 | + case ':'...'@': |
| 55 | + case '['...'^': |
| 56 | + case '{'...'~': |
| 57 | + case '`': |
| 58 | + return DoOperator(); |
| 59 | + case '"': |
| 60 | + case '\'': |
| 61 | + return DoStringLiteral(); |
| 62 | + case '0'...'9': |
| 63 | + return DoNumberLiteral(); |
| 64 | + case '_': |
| 65 | + case 'A'...'Z': |
| 66 | + case 'a'...'z': |
| 67 | + return DoIdentifier(); |
| 68 | + default: |
| 69 | + return TOK_INVALID; |
| 70 | + } |
| 71 | +} |
| 72 | + |
| 73 | +Lexer::TokenType Lexer::DoOperator() { |
| 74 | + std::map<char, lexer::Lexer::TokenType>::const_iterator it = tokens.find(*m_start); |
| 75 | + if (it != tokens.end()) { |
| 76 | + m_start++; |
| 77 | + return it->second; |
| 78 | + } |
| 79 | + return TOK_INVALID; |
| 80 | +} |
| 81 | + |
| 82 | +Lexer::TokenType Lexer::DoNumberLiteral() { |
| 83 | + return TOK_INVALID; |
| 84 | +} |
| 85 | + |
| 86 | +Lexer::TokenType Lexer::DoIdentifier() { |
| 87 | + return TOK_INVALID; |
| 88 | +} |
| 89 | + |
| 90 | +Lexer::TokenType Lexer::DoStringLiteral() { |
| 91 | + return TOK_EOL; |
| 92 | +} |
0 commit comments