Skip to content

Commit 7b53fe7

Browse files
committed
Save tokens position and print in on errors
1 parent fc18363 commit 7b53fe7

File tree

7 files changed

+164
-131
lines changed

7 files changed

+164
-131
lines changed

src/em/Lexer.cpp

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,52 +10,54 @@ std::vector<Token> Lexer::scanTokens() {
1010
while (mPosition < mProgram.size()) {
1111
switch (auto c = mProgram[mPosition]) {
1212
case '{':
13-
tokens.emplace_back(TokenType::LEFT_BRACE, c);
13+
tokens.emplace_back(TokenType::LEFT_BRACE, c, mLocation);
1414
break;
1515
case '}':
16-
tokens.emplace_back(TokenType::RIGHT_BRACE, c);
16+
tokens.emplace_back(TokenType::RIGHT_BRACE, c, mLocation);
1717
break;
1818
case '(':
19-
tokens.emplace_back(TokenType::LEFT_PAREN, c);
19+
tokens.emplace_back(TokenType::LEFT_PAREN, c, mLocation);
2020
break;
2121
case ')':
22-
tokens.emplace_back(TokenType::RIGHT_PAREN, c);
22+
tokens.emplace_back(TokenType::RIGHT_PAREN, c, mLocation);
2323
break;
2424
case ',':
25-
tokens.emplace_back(TokenType::COMMA, c);
25+
tokens.emplace_back(TokenType::COMMA, c, mLocation);
2626
break;
2727
case '|':
28-
tokens.emplace_back(TokenType::VERTICAL_BAR, c);
28+
tokens.emplace_back(TokenType::VERTICAL_BAR, c, mLocation);
2929
break;
3030
case ':':
31-
if (mProgram[++mPosition] == '=') {
32-
tokens.emplace_back(TokenType::ASSIGN, L":=");
31+
if (mProgram[updatePosition()] == '=') {
32+
tokens.emplace_back(TokenType::ASSIGN, L":=", mLocation);
3333
break;
3434
}
35-
throw std::logic_error("Expected = after :");
35+
throw std::logic_error("Expected '=' after ':' at " + mLocation.str());
3636
case '=':
37-
tokens.emplace_back(TokenType::EQUAL, c);
37+
tokens.emplace_back(TokenType::EQUAL, c, mLocation);
3838
break;
3939
case u'':
40-
tokens.emplace_back(TokenType::NOT_EQUAL, c);
40+
tokens.emplace_back(TokenType::NOT_EQUAL, c, mLocation);
4141
break;
4242
case u'':
43-
tokens.emplace_back(TokenType::UNION, c);
43+
tokens.emplace_back(TokenType::UNION, c, mLocation);
4444
break;
4545
case u'':
46-
tokens.emplace_back(TokenType::INTERSECTION, c);
46+
tokens.emplace_back(TokenType::INTERSECTION, c, mLocation);
4747
break;
4848
case u'':
49-
tokens.emplace_back(TokenType::SUBSET, c);
49+
tokens.emplace_back(TokenType::SUBSET, c, mLocation);
5050
break;
5151
case u'':
52-
tokens.emplace_back(TokenType::NOT_SUBSET, c);
52+
tokens.emplace_back(TokenType::NOT_SUBSET, c, mLocation);
5353
break;
5454
case u'':
55-
tokens.emplace_back(TokenType::ELEMENT_OF, c);
55+
tokens.emplace_back(TokenType::ELEMENT_OF, c, mLocation);
5656
break;
5757
case '\n':
58-
tokens.emplace_back(TokenType::LINE_BREAK, L"\\n");
58+
tokens.emplace_back(TokenType::LINE_BREAK, L"\\n", mLocation);
59+
mLocation.line++;
60+
mLocation.column = 0;
5961
break;
6062
default:
6163
if (std::isdigit(c)) {
@@ -65,13 +67,13 @@ std::vector<Token> Lexer::scanTokens() {
6567
} else if (!std::isspace(c)) {
6668
throw std::invalid_argument("Character " +
6769
utils::string::wStringToString({c}) +
68-
" is not valid");
70+
" is not valid at " + mLocation.str());
6971
}
7072
break;
7173
}
72-
mPosition++;
74+
updatePosition();
7375
}
74-
tokens.emplace_back(TokenType::END_OF_FILE, L"🔚");
76+
tokens.emplace_back(TokenType::END_OF_FILE, L"🔚", mLocation);
7577
return tokens;
7678
}
7779

@@ -81,12 +83,13 @@ Token Lexer::scanNumber() {
8183
if (!std::isdigit(mProgram[mPosition])) {
8284
break;
8385
}
84-
mPosition++;
86+
updatePosition();
8587
}
8688
auto len = mPosition - start;
87-
mPosition--;
89+
updatePosition(-1);
8890
auto substr = mProgram.substr(start, len);
89-
return {TokenType::NUMBER, std::wstring(substr.cbegin(), substr.cend())};
91+
return {TokenType::NUMBER, std::wstring(substr.cbegin(), substr.cend()),
92+
mLocation};
9093
}
9194

9295
Token Lexer::scanIdentifier() {
@@ -95,12 +98,19 @@ Token Lexer::scanIdentifier() {
9598
if (!isValidForIdentifier(mProgram[mPosition])) {
9699
break;
97100
}
98-
mPosition++;
101+
updatePosition();
99102
}
100103
auto len = mPosition - start;
101-
mPosition--;
104+
updatePosition(-1);
102105
auto substr = mProgram.substr(start, len);
103-
return {TokenType::IDENTIFIER, std::wstring(substr.cbegin(), substr.cend())};
106+
return {TokenType::IDENTIFIER, std::wstring(substr.cbegin(), substr.cend()),
107+
mLocation};
108+
}
109+
110+
unsigned int Lexer::updatePosition(int offset) {
111+
mPosition += offset;
112+
mLocation.column += offset;
113+
return mPosition;
104114
}
105115

106116
bool Lexer::isValidForIdentifier(wchar_t character) {

src/em/Lexer.h

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,29 @@
11
#pragma once
22

33
#include <vector>
4+
45
#include "Token.h"
56

67
namespace em {
78

8-
class Lexer {
9-
public:
10-
explicit Lexer(std::wstring mProgram);
9+
class Lexer {
10+
public:
11+
explicit Lexer(std::wstring mProgram);
12+
13+
std::vector<Token> scanTokens();
1114

12-
std::vector<Token> scanTokens();
15+
private:
16+
Token scanNumber();
1317

14-
private:
15-
Token scanNumber();
18+
Token scanIdentifier();
1619

17-
Token scanIdentifier();
20+
unsigned int updatePosition(int offset = 1);
1821

19-
static bool isValidForIdentifier(wchar_t character);
22+
static bool isValidForIdentifier(wchar_t character);
2023

21-
std::wstring mProgram;
22-
unsigned int mPosition{0};
23-
};
24+
std::wstring mProgram;
25+
unsigned int mPosition{0};
26+
Token::Location mLocation{1, 1};
27+
};
2428

2529
} // namespace em

src/em/Parser.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ Token Parser::consume(TokenType tokenType) {
184184
if (match(tokenType)) {
185185
return previous();
186186
}
187-
throw std::logic_error("Expected " + TokenTypeToString(tokenType));
187+
throw std::logic_error("Expected " + TokenTypeToString(tokenType) + " at " +
188+
mTokens[mPosition].location().str());
188189
}
189190

190191
bool Parser::match(TokenType tokenType) { return match({tokenType}); }

src/em/Token.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,22 @@
33
#include "utils/StringUtils.h"
44

55
namespace em {
6-
Token::Token(TokenType type, std::wstring text)
7-
: mType(type), mText(std::move(text)) {}
6+
Token::Token(TokenType type, std::wstring text, Location location)
7+
: mType(type), mText(std::move(text)), mLocation(location) {}
88

9-
Token::Token(TokenType type, wchar_t text)
10-
: mType(type), mText(std::wstring{text}) {}
9+
Token::Token(TokenType type, wchar_t text, Location location)
10+
: mType(type), mText(std::wstring{text}), mLocation(location) {}
1111

1212
TokenType Token::type() const { return mType; }
1313

1414
std::wstring Token::text() const { return mText; }
1515

16+
Token::Location Token::location() const { return mLocation; }
17+
1618
std::ostream& operator<<(std::ostream& os, const Token& token) {
1719
os << "(" << TokenTypeToString(token.mType) << ", '"
18-
<< utils::string::wStringToString(token.mText) << "')";
20+
<< utils::string::wStringToString(token.mText) << "', ' "
21+
<< token.mLocation.str().c_str() << "')";
1922
return os;
2023
}
2124
} // namespace em

src/em/Token.h

Lines changed: 88 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -6,89 +6,101 @@
66

77
namespace em {
88

9-
enum class TokenType {
10-
LEFT_BRACE,
11-
RIGHT_BRACE,
12-
LEFT_PAREN,
13-
RIGHT_PAREN,
14-
COMMA,
15-
VERTICAL_BAR,
16-
17-
ASSIGN,
18-
19-
EQUAL,
20-
NOT_EQUAL,
21-
UNION,
22-
INTERSECTION,
23-
SUBSET,
24-
NOT_SUBSET,
25-
ELEMENT_OF,
26-
27-
NUMBER,
28-
IDENTIFIER,
29-
30-
LINE_BREAK,
31-
END_OF_FILE
32-
};
9+
enum class TokenType {
10+
LEFT_BRACE,
11+
RIGHT_BRACE,
12+
LEFT_PAREN,
13+
RIGHT_PAREN,
14+
COMMA,
15+
VERTICAL_BAR,
3316

34-
inline std::string TokenTypeToString(TokenType tokenType) {
35-
switch (tokenType) {
36-
case TokenType::LEFT_BRACE:
37-
return "LEFT_BRACKET";
38-
case TokenType::RIGHT_BRACE:
39-
return "RIGHT_BRACKET";
40-
case TokenType::LEFT_PAREN:
41-
return "LEFT_PAREN";
42-
case TokenType::RIGHT_PAREN:
43-
return "RIGHT_PAREN";
44-
case TokenType::COMMA:
45-
return "COMMA";
46-
case TokenType::VERTICAL_BAR:
47-
return "VERTICAL_BAR";
48-
case TokenType::ASSIGN:
49-
return "ASSIGN";
50-
case TokenType::NUMBER:
51-
return "NUMBER";
52-
case TokenType::EQUAL:
53-
return "EQUAL";
54-
case TokenType::NOT_EQUAL:
55-
return "NOT_EQUAL";
56-
case TokenType::UNION:
57-
return "UNION";
58-
case TokenType::INTERSECTION:
59-
return "INTERSECTION";
60-
case TokenType::SUBSET:
61-
return "SUBSET";
62-
case TokenType::NOT_SUBSET:
63-
return "NOT_SUBSET";
64-
case TokenType::ELEMENT_OF:
65-
return "ELEMENT_OF";
66-
case TokenType::IDENTIFIER:
67-
return "IDENTIFIER";
68-
case TokenType::LINE_BREAK:
69-
return "LINE_BREAK";
70-
case TokenType::END_OF_FILE:
71-
return "END_OF_FILE";
72-
default:
73-
assert(false);
74-
}
75-
}
17+
ASSIGN,
7618

77-
class Token {
78-
public:
79-
Token(TokenType type, std::wstring text);
19+
EQUAL,
20+
NOT_EQUAL,
21+
UNION,
22+
INTERSECTION,
23+
SUBSET,
24+
NOT_SUBSET,
25+
ELEMENT_OF,
8026

81-
Token(TokenType type, wchar_t text);
27+
NUMBER,
28+
IDENTIFIER,
8229

83-
[[nodiscard]] TokenType type() const;
30+
LINE_BREAK,
31+
END_OF_FILE
32+
};
8433

85-
[[nodiscard]] std::wstring text() const;
34+
inline std::string TokenTypeToString(TokenType tokenType) {
35+
switch (tokenType) {
36+
case TokenType::LEFT_BRACE:
37+
return "LEFT_BRACKET";
38+
case TokenType::RIGHT_BRACE:
39+
return "RIGHT_BRACKET";
40+
case TokenType::LEFT_PAREN:
41+
return "LEFT_PAREN";
42+
case TokenType::RIGHT_PAREN:
43+
return "RIGHT_PAREN";
44+
case TokenType::COMMA:
45+
return "COMMA";
46+
case TokenType::VERTICAL_BAR:
47+
return "VERTICAL_BAR";
48+
case TokenType::ASSIGN:
49+
return "ASSIGN";
50+
case TokenType::NUMBER:
51+
return "NUMBER";
52+
case TokenType::EQUAL:
53+
return "EQUAL";
54+
case TokenType::NOT_EQUAL:
55+
return "NOT_EQUAL";
56+
case TokenType::UNION:
57+
return "UNION";
58+
case TokenType::INTERSECTION:
59+
return "INTERSECTION";
60+
case TokenType::SUBSET:
61+
return "SUBSET";
62+
case TokenType::NOT_SUBSET:
63+
return "NOT_SUBSET";
64+
case TokenType::ELEMENT_OF:
65+
return "ELEMENT_OF";
66+
case TokenType::IDENTIFIER:
67+
return "IDENTIFIER";
68+
case TokenType::LINE_BREAK:
69+
return "LINE_BREAK";
70+
case TokenType::END_OF_FILE:
71+
return "END_OF_FILE";
72+
default:
73+
assert(false);
74+
}
75+
}
8676

87-
friend std::ostream& operator<<(std::ostream& os, const Token& token);
77+
class Token {
78+
public:
79+
struct Location {
80+
unsigned int line, column;
8881

89-
private:
90-
TokenType mType;
91-
std::wstring mText;
82+
[[nodiscard]] std::string str() const {
83+
return "line: " + std::to_string(line) +
84+
", col:" + std::to_string(column);
85+
}
9286
};
9387

88+
Token(TokenType type, std::wstring text, Location location);
89+
90+
Token(TokenType type, wchar_t text, Location location);
91+
92+
[[nodiscard]] TokenType type() const;
93+
94+
[[nodiscard]] std::wstring text() const;
95+
96+
[[nodiscard]] Location location() const;
97+
98+
friend std::ostream& operator<<(std::ostream& os, const Token& token);
99+
100+
private:
101+
TokenType mType;
102+
std::wstring mText;
103+
Location mLocation;
104+
};
105+
94106
} // namespace em

0 commit comments

Comments
 (0)