-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.h
90 lines (80 loc) · 3.1 KB
/
lexer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include "utils.h"
class Lexer {
vector<Token> tokens;
bool identifier_chain = false;
bool number_chain = false;
string chain_tmp;
int lines = 0;
void clearNumberChain(){
if(number_chain){
tokens.emplace_back(Token(chain_tmp, NUMBER_LITERAL));
number_chain = false;
chain_tmp = "";
}
}
void clearIdentifierChain(){
if(identifier_chain){
tokens.emplace_back(Token(chain_tmp, IDENTIFIER));
identifier_chain = false;
chain_tmp = "";
}
}
public:
vector<Token> tokenize(string source) {
bool noConsider = false;
for (int i=0; i < int(source.length()); i++) {
if(noConsider){
noConsider = false;
continue;
}
char token = source[i];
if(token == ' ' || token == '\n'){
clearIdentifierChain();
clearNumberChain();
}
if (token == ' ') continue;
else if (token == '\n') {
lines++;
tokens.emplace_back(Token("\\n", NEWLINE));
} else if (isdigit(token)) {
if(number_chain) chain_tmp.append(string(1, token));
else if(isdigit(source[i+1])){
number_chain = true;
chain_tmp.append(string(1, token));
} else tokens.emplace_back(Token(token, NUMBER_LITERAL));
} else if (token == '+') {
clearNumberChain();
tokens.emplace_back(Token(token, PLUS));
} else if (token == '-') {
clearNumberChain();
if((source[i-1] == '=' || Utils::isOperator(string(1, source[i-1]))) && isdigit(source[i+1])){
tokens.emplace_back(Token(source[i+1], NEGATIVE_NUMBER_LITERAL));
noConsider = true;
} else {
tokens.emplace_back(Token(token, MINUS));
}
} else if (token == '*') {
clearNumberChain();
tokens.emplace_back(Token(token, MULTIPLY));
} else if (token == '/') {
clearNumberChain();
tokens.emplace_back(Token(token, DIVIDE));
} else if (token == '=') {
tokens.emplace_back(Token(token, ASSIGNMENT));
} else if(token == '('){
tokens.emplace_back(Token(token, OPENING_BRACKET));
} else if(token == ')'){
tokens.emplace_back(Token(token, CLOSING_BRACKET));
} else if(token == '#'){
tokens.emplace_back(Token("#", HASHTAG));
} else if(regex_match(string(1, token), regex("[a-zA-Z]"))) {
if(identifier_chain) chain_tmp.append(string(1, token));
else if(regex_match(string(1, source[i+1]), regex("[a-zA-Z]"))){
identifier_chain = true;
chain_tmp.append(string(1, token));
} else tokens.emplace_back(Token(token, IDENTIFIER));
}
}
return tokens;
}
};