Skip to content

Commit

Permalink
feat(scanner): store uint16 in String
Browse files Browse the repository at this point in the history
  • Loading branch information
calebdw committed Jan 28, 2024
1 parent 7ef25b2 commit 45488df
Showing 1 changed file with 30 additions and 10 deletions.
40 changes: 30 additions & 10 deletions common/scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,12 @@ enum TokenType {
typedef struct {
uint32_t len;
uint32_t cap;
char *data;
uint16_t *data;
} String;

static String string_new() { return (String){.cap = 16, .len = 0, .data = calloc(1, sizeof(char) * 17)}; }
static String string_new() {
return (String){.cap = 16, .len = 0, .data = calloc(17, sizeof(uint16_t))};
}

typedef struct {
String word;
Expand All @@ -114,6 +116,20 @@ typedef struct {

typedef enum { Error, End } ScanContentResult;

static inline signed string_cmp(String a, String b) {
if (a.len != b.len) {
return a.len < b.len ? -1 : 1;
}

for (uint32_t i = 0; i < a.len; i++) {
if (a.data[i] != b.data[i]) {
return a.data[i] < b.data[i] ? -1 : 1;
}
}

return 0;
}

static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }

static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
Expand All @@ -124,13 +140,14 @@ static unsigned serialize(Scanner *scanner, char *buffer) {
buffer[size++] = (char)scanner->open_heredocs.len;
for (unsigned j = 0; j < scanner->open_heredocs.len; j++) {
Heredoc *heredoc = &scanner->open_heredocs.data[j];
if (size + 2 + heredoc->word.len >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
unsigned word_bytes = heredoc->word.len * sizeof(heredoc->word.data[0]);
if (size + 2 + word_bytes >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
return 0;
}
buffer[size++] = (char)heredoc->end_word_indentation_allowed;
buffer[size++] = (char)heredoc->word.len;
memcpy(&buffer[size], heredoc->word.data, heredoc->word.len);
size += heredoc->word.len;
memcpy(&buffer[size], heredoc->word.data, word_bytes);
size += word_bytes;
}

return size;
Expand All @@ -151,10 +168,11 @@ static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
heredoc.end_word_indentation_allowed = buffer[size++];
heredoc.word = string_new();
uint8_t word_length = buffer[size++];
unsigned word_bytes = word_length * sizeof(heredoc.word.data[0]);
STRING_GROW(heredoc.word, word_length);
memcpy(heredoc.word.data, buffer + size, word_length);
memcpy(heredoc.word.data, buffer + size, word_bytes);
heredoc.word.len = word_length;
size += word_length;
size += word_bytes;
VEC_PUSH(scanner->open_heredocs, heredoc);
}
}
Expand Down Expand Up @@ -182,7 +200,9 @@ static inline bool scan_whitespace(TSLexer *lexer) {
}
}

static inline bool is_valid_name_char(TSLexer *lexer) { return iswalnum(lexer->lookahead) || lexer->lookahead == '_'; }
static inline bool is_valid_name_char(TSLexer *lexer) {
return iswalnum(lexer->lookahead) || lexer->lookahead == '_' || lexer->lookahead >= 0x80;
}

static inline bool is_escapable_sequence(TSLexer *lexer) {
// Note: remember to also update the escape_sequence rule in the
Expand Down Expand Up @@ -346,7 +366,7 @@ static bool scan_encapsed_part_string(Scanner *scanner, TSLexer *lexer, bool is_
case '\\':
advance(lexer);

// \{ should not be interprented as an escape sequence, but both
// \{ should not be interpreted as an escape sequence, but both
// should be consumed as normal characters
if (lexer->lookahead == '{') {
advance(lexer);
Expand Down Expand Up @@ -499,7 +519,7 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
}

String word = scan_heredoc_word(lexer);
if (strcmp(word.data, heredoc.word.data) != 0) {
if (string_cmp(word, heredoc.word) != 0) {
STRING_FREE(word);
return false;
}
Expand Down

0 comments on commit 45488df

Please sign in to comment.