Skip to content

Commit 51bd234

Browse files
committed
Rewrite error diagnostic mechanism
1 parent fa37711 commit 51bd234

File tree

3 files changed

+129
-104
lines changed

3 files changed

+129
-104
lines changed

src/globals.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1652,24 +1652,40 @@ void fatal(char *msg)
16521652
abort();
16531653
}
16541654

1655+
/* Reports error and prints occurred position context,
1656+
* if the given location is NULL or source file is missing,
1657+
* then fallbacks to fatal(char *).
1658+
*/
16551659
void error_at(char *msg, source_location_t *loc)
16561660
{
1657-
int offset, start_idx, i = 0, len = loc->len, pos = loc->pos;
1661+
int offset, start_idx, i = 0, len, pos;
16581662
char diagnostic[MAX_LINE_LEN];
1663+
1664+
if (!loc)
1665+
fatal(msg);
1666+
1667+
len = loc->len;
1668+
pos = loc->pos;
1669+
16591670
strbuf_t *src = hashmap_get(SRC_FILE_MAP, loc->filename);
16601671

1672+
if (!src)
1673+
fatal(msg);
1674+
16611675
if (len < 1)
16621676
len = 1;
16631677

16641678
printf("%s:%d:%d: [Error]: %s\n", loc->filename, loc->line, loc->column,
16651679
msg);
16661680
printf("%6d | ", loc->line);
16671681

1682+
/* Finds line's start position */
16681683
for (offset = pos; offset >= 0 && src->elements[offset] != '\n'; offset--)
16691684
;
16701685

16711686
start_idx = offset + 1;
16721687

1688+
/* Copies whole line to diagnostic buffer */
16731689
for (offset = start_idx;
16741690
offset < src->capacity && src->elements[offset] != '\n' &&
16751691
src->elements[offset] != '\0';

src/lexer.c

Lines changed: 54 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ token_t *new_token(token_kind_t kind, source_location_t *loc, int len)
202202
return token;
203203
}
204204

205-
token_t *lex_token(strbuf_t *buf, source_location_t *loc, token_t *prev)
205+
token_t *lex_token(strbuf_t *buf, source_location_t *loc)
206206
{
207207
token_t *token;
208208
char token_buffer[MAX_TOKEN_LEN], ch = peek_char(buf, 0);
@@ -274,7 +274,7 @@ token_t *lex_token(strbuf_t *buf, source_location_t *loc, token_t *prev)
274274
pos++;
275275
loc->column += 2;
276276
buf->size = pos;
277-
return lex_token(buf, loc, prev);
277+
return lex_token(buf, loc);
278278
}
279279
}
280280

@@ -297,7 +297,7 @@ token_t *lex_token(strbuf_t *buf, source_location_t *loc, token_t *prev)
297297
} while (ch && !is_newline(ch));
298298
loc->column += pos - buf->size + 1;
299299
buf->size = pos;
300-
return lex_token(buf, loc, prev);
300+
return lex_token(buf, loc);
301301
}
302302

303303
if (ch == '=') {
@@ -920,7 +920,8 @@ token_stream_t *lex_token_by_file(char *filename)
920920
/* FIXME: We should normalize filename first to make cache works as expected
921921
*/
922922

923-
token_t *head = NULL, *tail = NULL, *cur = NULL, *prev = NULL;
923+
token_t head;
924+
token_t *cur = &head;
924925
token_stream_t *tks;
925926
/* initialie source location with the following configuration:
926927
* pos is at 0,
@@ -942,47 +943,35 @@ token_stream_t *lex_token_by_file(char *filename)
942943
buf->size = 0;
943944

944945
while (buf->size < buf->capacity) {
945-
cur = lex_token(buf, &loc, prev);
946-
947-
if (cur->kind != T_whitespace && cur->kind != T_tab &&
948-
cur->kind != T_eof)
949-
prev = cur;
950-
951-
/* Append token to token stream */
952-
if (!head) {
953-
/* Token stream unintialized */
954-
head = cur;
955-
tail = head;
956-
}
957-
958-
tail->next = cur;
959-
tail = cur;
946+
cur->next = lex_token(buf, &loc);
947+
cur = cur->next;
960948

961949
if (cur->kind == T_eof)
962950
break;
963951
}
964952

965-
if (!head) {
966-
head = arena_calloc(TOKEN_ARENA, 1, sizeof(token_t));
967-
head->kind = T_eof;
968-
memcpy(&head->location, &loc, sizeof(source_location_t));
969-
tail = head;
953+
if (!head.next) {
954+
head.next = arena_calloc(TOKEN_ARENA, 1, sizeof(token_t));
955+
head.next->kind = T_eof;
956+
memcpy(&head.next->location, &loc, sizeof(source_location_t));
957+
cur = head.next;
970958
}
971959

972-
if (tail->kind != T_eof)
960+
if (cur->kind != T_eof)
973961
error_at("Internal error, expected eof at the end of file",
974-
&tail->location);
962+
&cur->location);
975963

976964
tks = malloc(sizeof(token_stream_t));
977-
tks->head = head;
978-
tks->tail = tail;
965+
tks->head = head.next;
966+
tks->tail = cur;
979967
hashmap_put(TOKEN_CACHE, filename, tks);
980968
return tks;
981969
}
982970

983971
token_stream_t *include_libc()
984972
{
985-
token_t *head = NULL, *tail = NULL, *cur = NULL, *prev = NULL;
973+
token_t head;
974+
token_t *cur = &head, *tk;
986975
token_stream_t *tks;
987976
char *filename = "lib/c.c";
988977
strbuf_t *buf = LIBC_SRC;
@@ -1000,41 +989,29 @@ token_stream_t *include_libc()
1000989
buf->size = 0;
1001990

1002991
while (buf->size < buf->capacity) {
1003-
cur = lex_token(buf, &loc, prev);
1004-
1005-
if (cur->kind != T_whitespace && cur->kind != T_tab &&
1006-
cur->kind != T_eof)
1007-
prev = cur;
1008-
1009-
/* Append token to token stream */
1010-
if (!head) {
1011-
/* Token stream unintialized */
1012-
head = cur;
1013-
tail = head;
1014-
}
1015-
1016-
tail->next = cur;
1017-
tail = cur;
992+
tk = lex_token(buf, &loc);
1018993

1019-
if (cur->kind == T_eof)
994+
/* Early break to discard eof token, so later
995+
* we can concat libc token stream with actual
996+
* input file's token stream.
997+
*/
998+
if (tk->kind == T_eof)
1020999
break;
1000+
1001+
cur->next = tk;
1002+
cur = cur->next;
10211003
}
10221004

1023-
if (!head)
1005+
if (!head.next)
10241006
fatal("Unable to include libc");
10251007

1026-
if (tail->kind != T_eof)
1008+
if (tk->kind != T_eof)
10271009
error_at("Internal error, expected eof at the end of file",
1028-
&tail->location);
1029-
1030-
/* Discard eof so later input file's token stream can join at correct
1031-
* position */
1032-
prev->next = NULL;
1033-
tail = prev;
1010+
&cur->location);
10341011

10351012
tks = malloc(sizeof(token_stream_t));
1036-
tks->head = head;
1037-
tks->tail = tail;
1013+
tks->head = head.next;
1014+
tks->tail = cur;
10381015
hashmap_put(TOKEN_CACHE, filename, tks);
10391016
return tks;
10401017
}
@@ -1051,6 +1028,25 @@ void skip_unused_token(void)
10511028
}
10521029
}
10531030

1031+
/* Fetches current token's location. */
1032+
source_location_t *cur_token_loc()
1033+
{
1034+
return &cur_token->location;
1035+
}
1036+
1037+
/* Finds next token's location, whitespace, tab, and newline tokens are skipped,
1038+
* if current token is eof, then returns eof token's location instead.
1039+
*/
1040+
source_location_t *next_token_loc()
1041+
{
1042+
skip_unused_token();
1043+
1044+
if (cur_token->kind == T_eof)
1045+
return &cur_token->location;
1046+
1047+
return &cur_token->next->location;
1048+
}
1049+
10541050
/* Lex next token with aliasing enabled */
10551051
token_kind_t lex_next(void)
10561052
{
@@ -1063,9 +1059,7 @@ token_kind_t lex_next(void)
10631059
return cur_token->kind;
10641060
}
10651061

1066-
/* Accepts next token if token types are matched. To disable aliasing on next
1067-
* token, use 'lex_accept_internal'.
1068-
*/
1062+
/* Accepts next token if token types are matched. */
10691063
bool lex_accept(token_kind_t kind)
10701064
{
10711065
skip_unused_token();
@@ -1092,7 +1086,7 @@ bool lex_peek(token_kind_t kind, char *value)
10921086
}
10931087

10941088
/* Strictly match next token with given token type and copy token's literal to
1095-
* value. To disable aliasing on next token, use 'lex_ident_internal'.
1089+
* value.
10961090
*/
10971091
void lex_ident(token_kind_t token, char *value)
10981092
{
@@ -1107,8 +1101,7 @@ void lex_ident(token_kind_t token, char *value)
11071101
error_at("Unexpected token", &tk->location);
11081102
}
11091103

1110-
/* Strictly match next token with given token type. To disable aliasing on next
1111-
* token, use 'lex_expect_internal'.
1104+
/* Strictly match next token with given token type.
11121105
*/
11131106
void lex_expect(token_kind_t token)
11141107
{

0 commit comments

Comments
 (0)