From f0a8ff2b7dadbbc4e32fc2a3417791842d13d79f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0imon=20Tup=C3=BD?= Date: Sat, 20 Jan 2024 22:27:30 +0100 Subject: [PATCH] Added support for char literals. --- source/abstract_syntax_tree/node.cpp | 1 + source/abstract_syntax_tree/node.h | 1 + source/compiler/compiler/diagnostics.h | 2 ++ .../compiler/type_system/semantic_context.cpp | 1 + source/compiler/test/main.s | 7 ++----- source/ir_translator/ir_translator.cpp | 14 ++++++++++++- source/ir_translator/ir_translator.h | 1 + source/parser/data_type.cpp | 1 + source/parser/parser.cpp | 17 ++++++++++++++++ source/parser/parser.h | 1 + source/tests/main.cpp | 20 ++++++++++--------- source/tokenizer/token.cpp | 3 +++ source/tokenizer/token.h | 2 ++ source/tokenizer/tokenizer.cpp | 20 ++++++++++++++++++- source/tokenizer/tokenizer.h | 5 ++++- source/type_checker/type_checker.cpp | 7 +++++++ source/type_checker/type_checker.h | 1 + source/utility/filesystem/filepath.h | 4 ++++ tests/literals/char.s | 4 ++++ tests/literals/i16.s | 5 +++++ tests/literals/i32.s | 5 +++-- tests/literals/i64.s | 5 +++++ tests/literals/i8.s | 5 +++++ tests/literals/string.s | 5 +++++ tests/literals/u16.s | 5 +++++ tests/literals/u32.s | 5 +++++ tests/literals/u64.s | 5 +++++ tests/literals/u8.s | 5 +++++ 28 files changed, 138 insertions(+), 19 deletions(-) create mode 100644 tests/literals/char.s create mode 100644 tests/literals/i16.s create mode 100644 tests/literals/i64.s create mode 100644 tests/literals/i8.s create mode 100644 tests/literals/string.s create mode 100644 tests/literals/u16.s create mode 100644 tests/literals/u32.s create mode 100644 tests/literals/u64.s create mode 100644 tests/literals/u8.s diff --git a/source/abstract_syntax_tree/node.cpp b/source/abstract_syntax_tree/node.cpp index 2dc66d28..2eb63821 100644 --- a/source/abstract_syntax_tree/node.cpp +++ b/source/abstract_syntax_tree/node.cpp @@ -28,6 +28,7 @@ namespace sigma { case OPERATOR_MODULO: return "OPERATOR_MODULO"; case NUMERICAL_LITERAL: return "NUMERICAL_LITERAL"; + case CHARACTER_LITERAL: return "CHARACTER_LITERAL"; case STRING_LITERAL: return "STRING_LITERAL"; case BOOL_LITERAL: return "BOOL_LITERAL"; diff --git a/source/abstract_syntax_tree/node.h b/source/abstract_syntax_tree/node.h index 32c8c70e..7084015d 100644 --- a/source/abstract_syntax_tree/node.h +++ b/source/abstract_syntax_tree/node.h @@ -42,6 +42,7 @@ namespace sigma { OPERATOR_MODULO, NUMERICAL_LITERAL, + CHARACTER_LITERAL, STRING_LITERAL, BOOL_LITERAL }; diff --git a/source/compiler/compiler/diagnostics.h b/source/compiler/compiler/diagnostics.h index 418d45eb..35c39eed 100644 --- a/source/compiler/compiler/diagnostics.h +++ b/source/compiler/compiler/diagnostics.h @@ -19,6 +19,7 @@ namespace sigma { // tokenizer (2000 - 2999) INVALID_STRING_TERMINATOR = 2000, + INVALID_CHAR_TERMINATOR, NUMERICAL_LITERAL_FP_WITHOUT_DOT, NUMERICAL_LITERAL_UNSIGNED_WITH_DOT, NUMERICAL_LITERAL_MORE_THAN_ONE_DOT, @@ -85,6 +86,7 @@ namespace sigma { // tokenizer { code::INVALID_STRING_TERMINATOR, "invalid string literal terminator detected" }, + { code::INVALID_CHAR_TERMINATOR, "invalid char literal terminator detected" }, { code::NUMERICAL_LITERAL_FP_WITHOUT_DOT, "numerical floating-point literal without '.' character detected" }, { code::NUMERICAL_LITERAL_UNSIGNED_WITH_DOT, "unsigned numerical literal with '.' character detected" }, { code::NUMERICAL_LITERAL_MORE_THAN_ONE_DOT, "numerical literal with more than one '.' character detected" }, diff --git a/source/compiler/compiler/type_system/semantic_context.cpp b/source/compiler/compiler/type_system/semantic_context.cpp index 7f636813..48c05e38 100644 --- a/source/compiler/compiler/type_system/semantic_context.cpp +++ b/source/compiler/compiler/type_system/semantic_context.cpp @@ -400,6 +400,7 @@ namespace sigma { case data_type::U8: return I8_TYPE; case data_type::I16: case data_type::U16: return I16_TYPE; + case data_type::CHAR: case data_type::I32: case data_type::U32: return I32_TYPE; case data_type::I64: diff --git a/source/compiler/test/main.s b/source/compiler/test/main.s index f07c7294..43996bf6 100644 --- a/source/compiler/test/main.s +++ b/source/compiler/test/main.s @@ -5,13 +5,10 @@ // - check for memory oversteps in the parser // - add namespaces to error messages, whenever applicable (ie. x::y::test) // - namespace directives should probably be a part of the function signature? -// - handle literal overflow in the typechecker, instead of the IR translator i32 main() { - u64 size = 20; - i32* value = malloc(size); - printf("%d\n", value); - void x; + char c = 'x'; + printf("test %c\n", c); ret 0; } diff --git a/source/ir_translator/ir_translator.cpp b/source/ir_translator/ir_translator.cpp index 9849e538..4c570dca 100644 --- a/source/ir_translator/ir_translator.cpp +++ b/source/ir_translator/ir_translator.cpp @@ -35,6 +35,7 @@ namespace sigma { // literals case node_type::NUMERICAL_LITERAL: return translate_numerical_literal(ast_node); + case node_type::CHARACTER_LITERAL: return translate_character_literal(ast_node); case node_type::STRING_LITERAL: return translate_string_literal(ast_node); case node_type::BOOL_LITERAL: return translate_bool_literal(ast_node); default: PANIC("irgen for node '{}' is not implemented", ast_node->type.to_string()); @@ -169,6 +170,12 @@ namespace sigma { return literal_to_ir(numerical_literal_node->get()); } + auto ir_translator::translate_character_literal(handle character_literal_node) const -> handle { + const std::string& value = m_context.strings.get(character_literal_node->get().value_key); + ASSERT(value.size() == 1, "invalid char literal length"); + return m_context.builder.create_signed_integer(value[0], 32); + } + auto ir_translator::translate_string_literal(handle string_literal_node) const -> handle { const std::string& value = m_context.strings.get(string_literal_node->get().value_key); return m_context.builder.create_string(value); @@ -238,10 +245,15 @@ namespace sigma { NOT_IMPLEMENTED(); } - bool overflow = false; + bool overflow; // ignored switch (literal.type.base_type) { + case data_type::I8: return m_context.builder.create_signed_integer(utility::detail::from_string(value, overflow), 8); + case data_type::I16: return m_context.builder.create_signed_integer(utility::detail::from_string(value, overflow), 16); case data_type::I32: return m_context.builder.create_signed_integer(utility::detail::from_string(value, overflow), 32); + case data_type::I64: return m_context.builder.create_signed_integer(utility::detail::from_string(value, overflow), 64); + case data_type::U8: return m_context.builder.create_unsigned_integer(utility::detail::from_string(value, overflow), 8); + case data_type::U16: return m_context.builder.create_unsigned_integer(utility::detail::from_string(value, overflow), 16); case data_type::U32: return m_context.builder.create_unsigned_integer(utility::detail::from_string(value, overflow), 32); case data_type::U64: return m_context.builder.create_unsigned_integer(utility::detail::from_string(value, overflow), 64); default: NOT_IMPLEMENTED(); diff --git a/source/ir_translator/ir_translator.h b/source/ir_translator/ir_translator.h index 4ea4572c..70f6a955 100644 --- a/source/ir_translator/ir_translator.h +++ b/source/ir_translator/ir_translator.h @@ -36,6 +36,7 @@ namespace sigma { void translate_branch(handle branch_node, handle exit_control); auto translate_numerical_literal(handle numerical_literal_node) const->handle; + auto translate_character_literal(handle character_literal_node) const->handle; auto translate_string_literal(handle string_literal_node) const->handle; auto translate_bool_literal(handle bool_literal_node) const->handle; diff --git a/source/parser/data_type.cpp b/source/parser/data_type.cpp index 86a463fe..3e61eeae 100644 --- a/source/parser/data_type.cpp +++ b/source/parser/data_type.cpp @@ -89,6 +89,7 @@ namespace sigma { case token_type::U64: return U64; case token_type::BOOL: return BOOL; case token_type::VOID: return VOID; + case token_type::CHAR: return CHAR; default: PANIC("undefined token -> type conversion for token '{}'", token.to_string()); } diff --git a/source/parser/parser.cpp b/source/parser/parser.cpp index 1317590c..44624048 100644 --- a/source/parser/parser.cpp +++ b/source/parser/parser.cpp @@ -496,6 +496,7 @@ namespace sigma { switch (m_tokens.get_current_token()) { case token_type::IDENTIFIER: return parse_identifier_statement(); case token_type::STRING_LITERAL: return parse_string_literal(); + case token_type::CHARACTER_LITERAL: return parse_character_literal(); case token_type::MINUS_SIGN: return parse_negative_expression(); case token_type::BOOL_LITERAL_TRUE: case token_type::BOOL_LITERAL_FALSE: return parse_bool_literal(); @@ -556,6 +557,22 @@ namespace sigma { return literal_node; } + auto parser::parse_character_literal() const -> utility::result> { + EXPECT_CURRENT_TOKEN(token_type::CHARACTER_LITERAL); + const handle location = m_tokens.get_current_token_location(); + + // create the string node + const handle char_node = create_node(node_type::CHARACTER_LITERAL, 0); + + // initialize the literal + auto& literal = char_node->get(); + literal.value_key = m_tokens.get_current().symbol_key; + literal.location = location; + literal.type = { data_type::CHAR, 0 }; // char + + return char_node; + } + auto parser::parse_string_literal() const -> utility::result> { EXPECT_CURRENT_TOKEN(token_type::STRING_LITERAL); const handle location = m_tokens.get_current_token_location(); diff --git a/source/parser/parser.h b/source/parser/parser.h index f0a08ec9..d0f1e8a0 100644 --- a/source/parser/parser.h +++ b/source/parser/parser.h @@ -51,6 +51,7 @@ namespace sigma { // literals auto parse_numerical_literal() const-> utility::result>; + auto parse_character_literal() const->utility::result>; auto parse_string_literal() const-> utility::result>; auto parse_bool_literal() const -> utility::result>; diff --git a/source/tests/main.cpp b/source/tests/main.cpp index 453accb9..e73784d1 100644 --- a/source/tests/main.cpp +++ b/source/tests/main.cpp @@ -4,23 +4,25 @@ using namespace utility::types; -#define STOUD_FILE "STDOUT.txt" -#define STERR_FILE "STDERR.txt" +#define STDOUT_FILE "STDOUT.txt" +#define STDERR_FILE "STDERR.txt" bool run_test(const filepath& path, const filepath& compiler_path) { - const std::string command = std::format("{} compile {} -e none > {} 2> {}", compiler_path, path, STOUD_FILE, STERR_FILE); + const std::string command = std::format("{} compile {} -e none > {} 2> {}", compiler_path, path, STDOUT_FILE, STDERR_FILE); + const filepath& pretty_path = path.get_parent_path().get_filename() / path.get_filename(); + const i32 return_code = utility::shell::execute(command); if(return_code == 0) { - utility::console::print("{:<30} OK\n", path.get_filename().to_string()); + utility::console::print("{:<40} OK\n", pretty_path.to_string()); return false; } - utility::console::printerr("{:<30} ERROR\n", path.get_filename().to_string()); + utility::console::printerr("{:<40} ERROR\n", pretty_path.to_string()); - const auto file_result = utility::fs::file::load(STERR_FILE); + const auto file_result = utility::fs::file::load(STDERR_FILE); if(file_result.has_error()) { - throw std::runtime_error(std::format("cannot open file {}", STERR_FILE).c_str()); + throw std::runtime_error(std::format("cannot open file {}", STDERR_FILE).c_str()); } utility::console::printerr("'{}'\n", file_result.get_value()); @@ -57,8 +59,8 @@ i32 run_all_tests(const parametric::parameters& params) { // cleanup try { - utility::fs::remove(STOUD_FILE); - utility::fs::remove(STERR_FILE); + utility::fs::remove(STDOUT_FILE); + utility::fs::remove(STDERR_FILE); } catch(const std::exception& exception) { utility::console::printerr("error: {}\n", exception.what()); encountered_error = true; diff --git a/source/tokenizer/token.cpp b/source/tokenizer/token.cpp index 88baa100..2cbbcdcc 100644 --- a/source/tokenizer/token.cpp +++ b/source/tokenizer/token.cpp @@ -16,6 +16,7 @@ namespace sigma { case token_type::U64: case token_type::BOOL: case token_type::VOID: + case token_type::CHAR: return true; default: return false; @@ -67,6 +68,7 @@ namespace sigma { case token_type::BOOL: return "BOOL"; case token_type::VOID: return "VOID"; + case token_type::CHAR: return "CHAR"; // control flow case token_type::RET: return "RET"; @@ -84,6 +86,7 @@ namespace sigma { case token_type::HEXADECIMAL_LITERAL: return "HEXADECIMAL_LITERAL"; case token_type::BINARY_LITERAL: return "BINARY_LITERAL"; case token_type::STRING_LITERAL: return "STRING_LITERAL"; + case token_type::CHARACTER_LITERAL: return "CHARACTER_LITERAL"; case token_type::BOOL_LITERAL_TRUE: return "BOOL_LITERAL_TRUE"; case token_type::BOOL_LITERAL_FALSE: return "BOOL_LITERAL_FALSE"; diff --git a/source/tokenizer/token.h b/source/tokenizer/token.h index f0dcb7c0..e99823e1 100644 --- a/source/tokenizer/token.h +++ b/source/tokenizer/token.h @@ -41,6 +41,7 @@ namespace sigma { BOOL, VOID, + CHAR, // control flow RET, // ret @@ -58,6 +59,7 @@ namespace sigma { HEXADECIMAL_LITERAL, // 0x / 0X BINARY_LITERAL, // 0b / 0B STRING_LITERAL, // "text" + CHARACTER_LITERAL, // 'x' BOOL_LITERAL_TRUE, // true BOOL_LITERAL_FALSE, // false diff --git a/source/tokenizer/tokenizer.cpp b/source/tokenizer/tokenizer.cpp index 1b8857ca..e07d836f 100644 --- a/source/tokenizer/tokenizer.cpp +++ b/source/tokenizer/tokenizer.cpp @@ -59,7 +59,7 @@ namespace sigma { // single quote characters are interpreted as character literals if(m_last_character == '\'') { - NOT_IMPLEMENTED(); + return get_char_literal_token(); } // double quote characters are interpreted as string literals @@ -255,6 +255,24 @@ namespace sigma { }; } + auto tokenizer::get_char_literal_token() -> utility::result { + get_next_char(); // read the character after the opening quote + m_current_section = get_escaped_character(); + get_next_char(); // read the closing quote + + if(m_last_character != '\'') { + return error::emit(error::code::INVALID_CHAR_TERMINATOR); + } + + get_next_char(); + + return token_info{ + .tok = { token_type::CHARACTER_LITERAL }, + .location = m_context.allocator.emplace(m_token_start_location), + .symbol_key = m_context.strings.insert(m_current_section) + }; + } + auto tokenizer::get_special_token() -> utility::result { m_current_section = m_last_character; diff --git a/source/tokenizer/tokenizer.h b/source/tokenizer/tokenizer.h index 39c10657..e8287e1c 100644 --- a/source/tokenizer/tokenizer.h +++ b/source/tokenizer/tokenizer.h @@ -16,6 +16,7 @@ namespace sigma { [[nodiscard]] auto get_alphabetical_token() -> utility::result; [[nodiscard]] auto get_numerical_token() -> utility::result; [[nodiscard]] auto get_string_literal_token() -> utility::result; + [[nodiscard]] auto get_char_literal_token() -> utility::result; [[nodiscard]] auto get_special_token() -> utility::result; void consume_spaces(); @@ -52,9 +53,11 @@ namespace sigma { { "u32", token_type::U32 }, { "u64", token_type::U64 }, { "bool", token_type::BOOL }, + { "void", token_type::VOID }, + { "char", token_type::CHAR }, + { "true", token_type::BOOL_LITERAL_TRUE }, { "false", token_type::BOOL_LITERAL_FALSE }, - { "void", token_type::VOID }, }; const std::unordered_map m_special_tokens = { diff --git a/source/type_checker/type_checker.cpp b/source/type_checker/type_checker.cpp index adafacff..ae7b5ac3 100644 --- a/source/type_checker/type_checker.cpp +++ b/source/type_checker/type_checker.cpp @@ -48,6 +48,7 @@ namespace sigma { // literals { node_type::NUMERICAL_LITERAL, &type_checker::type_check_numerical_literal }, + { node_type::CHARACTER_LITERAL, &type_checker::type_check_character_literal }, { node_type::STRING_LITERAL, &type_checker::type_check_string_literal }, { node_type::BOOL_LITERAL, &type_checker::type_check_bool_literal }, }; @@ -269,6 +270,12 @@ namespace sigma { return literal.type; } + auto type_checker::type_check_character_literal(handle literal_node, data_type expected) -> utility::result { + auto& literal = literal_node->get(); + apply_expected_data_type(literal.type, expected); + return literal.type; + } + auto type_checker::type_check_string_literal(handle literal_node, data_type expected) -> utility::result { auto& literal = literal_node->get(); apply_expected_data_type(literal.type, expected); diff --git a/source/type_checker/type_checker.h b/source/type_checker/type_checker.h index 60f43d5f..074b37fb 100644 --- a/source/type_checker/type_checker.h +++ b/source/type_checker/type_checker.h @@ -30,6 +30,7 @@ namespace sigma { auto type_check_variable_assignment(handle assignment_node, data_type expected)->utility::result; auto type_check_numerical_literal(handle literal_node, data_type expected) -> utility::result; + auto type_check_character_literal(handle literal_node, data_type expected) -> utility::result; auto type_check_string_literal(handle literal_node, data_type expected) -> utility::result; auto type_check_bool_literal(handle literal_node, data_type expected) -> utility::result; diff --git a/source/utility/filesystem/filepath.h b/source/utility/filesystem/filepath.h index 76f658b4..22632d5b 100644 --- a/source/utility/filesystem/filepath.h +++ b/source/utility/filesystem/filepath.h @@ -58,6 +58,10 @@ namespace utility::types { auto get_filename() const -> filepath { return m_path.filename(); } + + auto operator/(const filepath& other) { + return m_path / other.m_path; + } private: path_type m_path; }; diff --git a/tests/literals/char.s b/tests/literals/char.s new file mode 100644 index 00000000..9bf1107b --- /dev/null +++ b/tests/literals/char.s @@ -0,0 +1,4 @@ +i32 main() { + char c = 'x'; + ret 0; +} diff --git a/tests/literals/i16.s b/tests/literals/i16.s new file mode 100644 index 00000000..07c127e6 --- /dev/null +++ b/tests/literals/i16.s @@ -0,0 +1,5 @@ +i32 main() { + i32 min = -32768; + i32 max = 32767; + ret 0; +} \ No newline at end of file diff --git a/tests/literals/i32.s b/tests/literals/i32.s index 195559d2..92ba5948 100644 --- a/tests/literals/i32.s +++ b/tests/literals/i32.s @@ -1,4 +1,5 @@ i32 main() { - i32 value = 1000; + i32 min = -2147483648; + i32 max = 2147483647; ret 0; -} +} \ No newline at end of file diff --git a/tests/literals/i64.s b/tests/literals/i64.s new file mode 100644 index 00000000..a0d4595c --- /dev/null +++ b/tests/literals/i64.s @@ -0,0 +1,5 @@ +i32 main() { + i32 min = -9223372036854775808; + i32 max = 9223372036854775807; + ret 0; +} \ No newline at end of file diff --git a/tests/literals/i8.s b/tests/literals/i8.s new file mode 100644 index 00000000..e5132e70 --- /dev/null +++ b/tests/literals/i8.s @@ -0,0 +1,5 @@ +i32 main() { + i32 min = -128; + i32 max = 127; + ret 0; +} diff --git a/tests/literals/string.s b/tests/literals/string.s new file mode 100644 index 00000000..b95dd484 --- /dev/null +++ b/tests/literals/string.s @@ -0,0 +1,5 @@ +i32 main() { + char* str1 = "test"; + char* str2 = "\n\n\n"; + ret 0; +} diff --git a/tests/literals/u16.s b/tests/literals/u16.s new file mode 100644 index 00000000..08483e7f --- /dev/null +++ b/tests/literals/u16.s @@ -0,0 +1,5 @@ +i32 main() { + i32 min = 0; + i32 max = 65535; + ret 0; +} diff --git a/tests/literals/u32.s b/tests/literals/u32.s new file mode 100644 index 00000000..8ed858d1 --- /dev/null +++ b/tests/literals/u32.s @@ -0,0 +1,5 @@ +i32 main() { + i32 min = 0; + i32 max = 4294967295; + ret 0; +} diff --git a/tests/literals/u64.s b/tests/literals/u64.s new file mode 100644 index 00000000..2631fd98 --- /dev/null +++ b/tests/literals/u64.s @@ -0,0 +1,5 @@ +i32 main() { + i32 min = 0; + i32 max = 18446744073709551615; + ret 0; +} diff --git a/tests/literals/u8.s b/tests/literals/u8.s new file mode 100644 index 00000000..7a29ebb7 --- /dev/null +++ b/tests/literals/u8.s @@ -0,0 +1,5 @@ +i32 main() { + i32 min = 0; + i32 max = 255; + ret 0; +}