Skip to content

Commit 93273d7

Browse files
committed
[lexer] lexer place holder
1 parent 3ae9983 commit 93273d7

File tree

4 files changed

+201
-0
lines changed

4 files changed

+201
-0
lines changed

CMakeLists.txt

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
set(LLVM_LINK_COMPONENTS
2+
Support
3+
)
4+
5+
add_llvm_tool(postfix
6+
postfix.cpp
7+
Lexer.cpp
8+
)

Lexer.cpp

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#include "Lexer.h"
2+
#include <map>
3+
4+
using namespace lexer;
5+
6+
static const std::map<char, lexer::Lexer::TokenType> tokens = {
7+
{'(', Lexer::TokenType::TOK_LEFT_PARENTHESIS},
8+
{')', Lexer::TokenType::TOK_RIGHT_PARENTHESIS},
9+
{'[', Lexer::TokenType::TOK_LEFT_BRACKET},
10+
{']', Lexer::TokenType::TOK_RIGHT_BRACKET},
11+
{'{', Lexer::TokenType::TOK_LEFT_CURLY_BRACKET},
12+
{'}', Lexer::TokenType::TOK_RIGHT_CURLY_BRACKET},
13+
};
14+
15+
const Lexer::Token& Lexer::Lex() {
16+
SkipWhiteSpace();
17+
char *start = m_start;
18+
m_last_token.m_type = InternalLex();
19+
m_last_token.m_value = llvm::StringRef(start, m_start - start);
20+
return m_last_token;
21+
}
22+
23+
void Lexer::SkipWhiteSpace() {
24+
while(*m_start == ' ' || *m_start == '\t')
25+
m_start++;
26+
}
27+
28+
Lexer::TokenType Lexer::InternalLex() {
29+
if (m_start >= m_end)
30+
return TOK_EOF;
31+
/**
32+
* ascii
33+
* 000 nul 001 soh 002 stx 003 etx 004 eot 005 enq 006 ack 007 bel
34+
* 010 bs 011 ht 012 nl 013 vt 014 np 015 cr 016 so 017 si
35+
* 020 dle 021 dc1 022 dc2 023 dc3 024 dc4 025 nak 026 syn 027 etb
36+
* 030 can 031 em 032 sub 033 esc 034 fs 035 gs 036 rs 037 us
37+
* 040 sp 041 ! 042 " 043 # 044 $ 045 % 046 & 047 '
38+
* 050 ( 051 ) 052 * 053 + 054 , 055 - 056 . 057 /
39+
* 060 0 061 1 062 2 063 3 064 4 065 5 066 6 067 7
40+
* 070 8 071 9 072 : 073 ; 074 < 075 = 076 > 077 ?
41+
* 100 @ 101 A 102 B 103 C 104 D 105 E 106 F 107 G
42+
* 110 H 111 I 112 J 113 K 114 L 115 M 116 N 117 O
43+
* 120 P 121 Q 122 R 123 S 124 T 125 U 126 V 127 W
44+
* 130 X 131 Y 132 Z 133 [ 134 \ 135 ] 136 ^ 137 _
45+
* 140 ` 141 a 142 b 143 c 144 d 145 e 146 f 147 g
46+
* 150 h 151 i 152 j 153 k 154 l 155 m 156 n 157 o
47+
* 160 p 161 q 162 r 163 s 164 t 165 u 166 v 167 w
48+
* 170 x 171 y 172 z 173 { 174 | 175 } 176 ~ 177 del
49+
*/
50+
switch (*m_start) {
51+
case '!':
52+
case '#'...'&':
53+
case '('...'/':
54+
case ':'...'@':
55+
case '['...'^':
56+
case '{'...'~':
57+
case '`':
58+
return DoOperator();
59+
case '"':
60+
case '\'':
61+
return DoStringLiteral();
62+
case '0'...'9':
63+
return DoNumberLiteral();
64+
case '_':
65+
case 'A'...'Z':
66+
case 'a'...'z':
67+
return DoIdentifier();
68+
default:
69+
return TOK_INVALID;
70+
}
71+
}
72+
73+
Lexer::TokenType Lexer::DoOperator() {
74+
std::map<char, lexer::Lexer::TokenType>::const_iterator it = tokens.find(*m_start);
75+
if (it != tokens.end()) {
76+
m_start++;
77+
return it->second;
78+
}
79+
return TOK_INVALID;
80+
}
81+
82+
Lexer::TokenType Lexer::DoNumberLiteral() {
83+
return TOK_INVALID;
84+
}
85+
86+
Lexer::TokenType Lexer::DoIdentifier() {
87+
return TOK_INVALID;
88+
}
89+
90+
Lexer::TokenType Lexer::DoStringLiteral() {
91+
return TOK_EOL;
92+
}

Lexer.h

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
2+
#ifndef LLVM_LEXER_H
3+
#define LLVM_LEXER_H
4+
5+
#include <llvm/ADT/StringRef.h>
6+
7+
namespace lexer {
8+
class Lexer {
9+
public:
10+
Lexer(char *src): m_start(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, llvm::StringRef()){}
11+
12+
enum TokenType {
13+
TOK_INVALID,
14+
TOK_EOF,
15+
TOK_LEFT_PARENTHESIS,
16+
TOK_RIGHT_PARENTHESIS,
17+
TOK_LEFT_BRACKET,
18+
TOK_RIGHT_BRACKET,
19+
TOK_LEFT_CURLY_BRACKET,
20+
TOK_RIGHT_CURLY_BRACKET,
21+
TOK_EOL
22+
};
23+
24+
struct Token {
25+
Token(TokenType type, llvm::StringRef value):m_type(type),m_value(value){}
26+
TokenType m_type;
27+
llvm::StringRef m_value;
28+
};
29+
30+
const Token& Lex();
31+
32+
private:
33+
char *m_start;
34+
char *m_end;
35+
Token m_last_token;
36+
37+
void SkipWhiteSpace();
38+
39+
TokenType InternalLex();
40+
41+
Lexer::TokenType DoOperator();
42+
43+
Lexer::TokenType DoNumberLiteral();
44+
45+
Lexer::TokenType DoIdentifier();
46+
47+
Lexer::TokenType DoStringLiteral();
48+
};
49+
}
50+
51+
52+
#endif //LLVM_LEXER_H

postfix.cpp

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include <llvm/Support/raw_ostream.h>
2+
#include <iostream>
3+
#include "Lexer.h"
4+
5+
using lexer::Lexer;
6+
7+
int
8+
main() {
9+
char c[1024];
10+
std::string text;
11+
llvm::raw_string_ostream ostream(text);
12+
while (std::cin >> c) {
13+
std::cout << ">>" << c;
14+
ostream << c;
15+
}
16+
Lexer lex((char *) ostream.str().c_str());
17+
while(true) {
18+
const Lexer::Token& t = lex.Lex();
19+
switch (t.m_type) {
20+
case Lexer::TokenType::TOK_INVALID:
21+
std::cout << "Unsupported token: " << t.m_value.str() << std::endl;
22+
return 1;
23+
case Lexer::TokenType::TOK_LEFT_PARENTHESIS:
24+
std::cout << "( -> " << t.m_value.str() << std::endl;
25+
break;
26+
case Lexer::TokenType::TOK_RIGHT_PARENTHESIS:
27+
std::cout << ") -> " << t.m_value.str() << std::endl;
28+
break;
29+
case Lexer::TokenType::TOK_LEFT_BRACKET:
30+
std::cout << "[ -> " << t.m_value.str() << std::endl;
31+
break;
32+
case Lexer::TokenType::TOK_RIGHT_BRACKET:
33+
std::cout << "] -> " << t.m_value.str() << std::endl;
34+
break;
35+
case Lexer::TokenType::TOK_LEFT_CURLY_BRACKET:
36+
std::cout << "{ -> " << t.m_value.str() << std::endl;
37+
break;
38+
case Lexer::TokenType::TOK_RIGHT_CURLY_BRACKET:
39+
std::cout << "} -> " << t.m_value.str() << std::endl;
40+
break;
41+
case Lexer::TokenType::TOK_EOF:
42+
return 0;
43+
default:
44+
std::cout << "Unknown token: " << t.m_value.str() << std::endl;
45+
return 1;
46+
47+
}
48+
}
49+
}

0 commit comments

Comments
 (0)