diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 2a8585cda..6a6c83586 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -31,7 +31,8 @@ jobs: run: | cmake -Bbuild -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=clang-16 -DCMAKE_CXX_COMPILER=clang++-16 \ - -DARK_SANITIZERS=Off -DARK_BUILD_EXE=On -DARK_BUILD_MODULES=Off + -DARK_SANITIZERS=Off -DARK_BUILD_EXE=On -DARK_BUILD_MODULES=Off \ + -DARK_UNITY_BUILD=On cmake --build build --config Release -- -j 4 - uses: actions/setup-node@v4 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39d1c55f0..c0aba7f69 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -274,7 +274,8 @@ jobs: -DCMAKE_CXX_COMPILER=/AFLplusplus/afl-c++ \ -DCMAKE_BUILD_TYPE=Release \ -DARK_BUILD_EXE=On \ - -DARK_SANITIZERS=On + -DARK_SANITIZERS=On \ + -DARK_UNITY_BUILD=On cmake --build ${BUILD_FOLDER} --config Release -j 4 - name: Fuzz diff --git a/.github/workflows/setup-compilers/action.yaml b/.github/workflows/setup-compilers/action.yaml index 12e52f2c5..78e2f9101 100644 --- a/.github/workflows/setup-compilers/action.yaml +++ b/.github/workflows/setup-compilers/action.yaml @@ -102,7 +102,8 @@ runs: -DARK_COVERAGE=${{ inputs.coverage }} \ -DARK_BUILD_EXE=On \ -DARK_BUILD_MODULES=$ToggleModules -DARK_MOD_ALL=$ToggleModules -DARK_MOD_DRAFT=$ToggleModules \ - -DARK_TESTS=On + -DARK_TESTS=On \ + -DARK_UNITY_BUILD=On cmake --build build --config $BUILD_TYPE -j $(nproc) - name: Configure CMake Ark @@ -121,5 +122,6 @@ runs: -DARK_BUILD_EXE=On \ -DARK_BUILD_MODULES=$ToggleModules -DARK_MOD_ALL=$ToggleModules -DARK_MOD_DRAFT=$ToggleModules \ -DARK_TESTS=On \ + -DARK_UNITY_BUILD=On \ $additional_args cmake --build build --config $BUILD_TYPE -j $(nproc) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfb0c5a02..606c5845e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -104,6 +104,10 @@ - loops have their own scope: variables created inside a loop won't leak outside it - upgraded fmtlib to 11.1.3-13 - allow capture in nested scope (before it was targeting only the current scope) +- `-bcr` option can be given a source file, it will then be compiled before its bytecode is shown +- magic numbers for tables start in bytecode files have been changed from 0x01, 0x02, 0x03 to 0xA1, 0xA2, 0xA3 (symbols, values, code) to make them stand out in hex editors +- magic numbers for value types in bytecode files have been changed from 0x01, 0x02, 0x03 to 0xF1, 0xF2, 0xF3 (number, string, function) +- numbers in the values table in bytecode files are no longer stringified but their IEEE754 representation is now encoded on 12 bytes (4 for the exponent, 8 for the mantissa) ### Removed - removed unused `NodeType::Closure` diff --git a/CMakeLists.txt b/CMakeLists.txt index ab7d39545..10e8e6414 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.12) +cmake_minimum_required(VERSION 3.16) project(ark CXX) @@ -22,6 +22,7 @@ option(ARK_SANITIZERS "Enable ASAN and UBSAN" Off) option(ARK_TESTS "Build ArkScript unit tests" Off) option(ARK_BENCHMARKS "Build ArkScript benchmarks" Off) option(ARK_COVERAGE "Enable coverage while building (clang, gcc) (requires ARK_TESTS to be On)" Off) +option(ARK_UNITY_BUILD "Enable unity build" Off) include(cmake/link_time_optimization.cmake) include(cmake/sanitizers.cmake) @@ -53,6 +54,11 @@ target_include_directories(ArkReactor ${ark_SOURCE_DIR}/include) target_compile_features(ArkReactor PRIVATE cxx_std_20) +if (ARK_UNITY_BUILD) + set_target_properties(ArkReactor PROPERTIES UNITY_BUILD ON UNITY_BUILD_MODE BATCH UNITY_BUILD_BATCH_SIZE 16) + set_source_files_properties(src/arkreactor/VM/VM.cpp PROPERTIES SKIP_UNITY_BUILD_INCLUSION true) +endif () + if (CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG OR APPLE) message(STATUS "Enabling computed gotos") target_compile_definitions(ArkReactor PRIVATE ARK_USE_COMPUTED_GOTOS=1) @@ -194,6 +200,10 @@ if (ARK_TESTS) target_compile_features(unittests PRIVATE cxx_std_20) target_compile_definitions(unittests PRIVATE ARK_TESTS_ROOT="${CMAKE_CURRENT_SOURCE_DIR}/") + if (ARK_UNITY_BUILD) + set_target_properties(unittests PROPERTIES UNITY_BUILD ON UNITY_BUILD_MODE BATCH UNITY_BUILD_BATCH_SIZE 16) + endif () + if (ARK_COVERAGE AND CMAKE_COMPILER_IS_CLANG) target_compile_options(unittests PRIVATE -coverage -fcoverage-mapping -fprofile-instr-generate) target_link_options(unittests PRIVATE -coverage -fcoverage-mapping -fprofile-instr-generate) @@ -250,6 +260,10 @@ if (ARK_BUILD_EXE) target_link_libraries(arkscript PUBLIC ArkReactor replxx clipp) target_compile_features(arkscript PRIVATE cxx_std_20) + if (ARK_UNITY_BUILD) + set_target_properties(arkscript PROPERTIES UNITY_BUILD ON UNITY_BUILD_MODE BATCH UNITY_BUILD_BATCH_SIZE 16) + endif () + enable_lto(arkscript) # Installs the arkscript executable. diff --git a/Dockerfile b/Dockerfile index 986ecb0ec..0a7e95cc5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,7 +33,7 @@ COPY --from=submodule-initializor /out . COPY --from=submodule-initializor /rev . RUN cmake -H. -Bbuild -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ - -DARK_BUILD_EXE=On -DARK_COMMIT="$(cat rev)" \ + -DARK_BUILD_EXE=On -DARK_COMMIT="$(cat rev)" -DARK_UNITY_BUILD=On \ && cmake --build build --target arkscript -- -j $(nproc) FROM alpine:3.19 AS organizer diff --git a/README.md b/README.md index af43fda63..072dc528b 100644 --- a/README.md +++ b/README.md @@ -172,23 +172,25 @@ DESCRIPTION ArkScript programming language SYNOPSIS - arkscript -h - arkscript -v - arkscript --dev-info - arkscript -e + arkscript -h + arkscript -v + arkscript --dev-info + arkscript -e arkscript -c [-d] [-f(importsolver|no-importsolver)] [-f(macroprocessor|no-macroprocessor)] [-f(optimizer|no-optimizer)] + [-f(iroptimizer|no-iroptimizer)] [-fdump-ir] arkscript [-d] [-L ] [-f(importsolver|no-importsolver)] [-f(macroprocessor|no-macroprocessor)] [-f(optimizer|no-optimizer)] + [-f(iroptimizer|no-iroptimizer)] [-fdump-ir] - arkscript -f [--(dry-run|check)] - arkscript --ast [-d] [-L ] - arkscript -bcr -on - arkscript -bcr -a [-s ] - arkscript -bcr -st [-s ] - arkscript -bcr -vt [-s ] - arkscript -bcr [-cs] [-p ] [-s ] + arkscript -f [--(dry-run|check)] + arkscript --ast [-d] [-L ] + arkscript -bcr -on + arkscript -bcr -a [-s ] + arkscript -bcr -st [-s ] + arkscript -bcr -vt [-s ] + arkscript -bcr [-cs] [-p ] [-s ] OPTIONS -h, --help Display this message @@ -206,6 +208,10 @@ OPTIONS Toggle on and off the macro processor pass -f(optimizer|no-optimizer) Toggle on and off the optimizer pass + -f(iroptimizer|no-iroptimizer) + Toggle on and off the IR optimizer pass + + -fdump-ir Dump IR to file.ark.ir -d, --debug... Increase debug level (default: 0) -L, --lib Set the location of the ArkScript standard library. Paths can be @@ -218,6 +224,10 @@ OPTIONS Toggle on and off the macro processor pass -f(optimizer|no-optimizer) Toggle on and off the optimizer pass + -f(iroptimizer|no-iroptimizer) + Toggle on and off the IR optimizer pass + + -fdump-ir Dump IR to file.ark.ir -f, --format Format the given source file in place --dry-run Do not modify the file, only print out the changes --check Check if a file formating is correctly, without modifying it. @@ -230,8 +240,8 @@ OPTIONS delimited by ';' -bcr, --bytecode-reader Launch the bytecode reader - If file isn't a bytecode file, the cached compiled will be - loaded ; if there are none, it will be compiled first + .arkc bytecode file or .ark source file that will be compiled + first -on, --only-names Display only the bytecode segments names and sizes -a, --all Display all the bytecode segments (default) @@ -242,7 +252,7 @@ OPTIONS -s, --slice Select a slice of instructions in the bytecode VERSION - 4.0.0-ff04fd55 + 4.0.0-c24c8f22 LICENSE Mozilla Public License 2.0 diff --git a/harden.dockerfile b/harden.dockerfile index 904eb3ce1..23835fcd0 100644 --- a/harden.dockerfile +++ b/harden.dockerfile @@ -33,7 +33,7 @@ COPY --from=submodule-initializor /out . COPY --from=submodule-initializor /rev . RUN cmake -H. -Bbuild -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ - -DARK_BUILD_EXE=On -DARK_ENABLE_SYSTEM=Off -DARK_COMMIT="$(cat rev)" \ + -DARK_BUILD_EXE=On -DARK_ENABLE_SYSTEM=Off -DARK_COMMIT="$(cat rev)" -DARK_UNITY_BUILD=On \ && cmake --build build --target arkscript -- -j $(nproc) FROM alpine:3.19 AS organizer diff --git a/include/Ark/Compiler/Common.hpp b/include/Ark/Compiler/Common.hpp index 10e740991..a1e8a9b1f 100644 --- a/include/Ark/Compiler/Common.hpp +++ b/include/Ark/Compiler/Common.hpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace Ark { @@ -24,6 +25,21 @@ namespace Ark namespace Ark::internal { + namespace bytecode + { + constexpr std::array Magic = { 'a', 'r', 'k', '\0' }; + constexpr std::array Version = { + ARK_VERSION_MAJOR & 0xff00, + ARK_VERSION_MAJOR & 0x00ff, + ARK_VERSION_MINOR & 0xff00, + ARK_VERSION_MINOR & 0x00ff, + ARK_VERSION_PATCH & 0xff00, + ARK_VERSION_PATCH & 0x00ff + }; + constexpr std::size_t TimestampLength = 8; + constexpr std::size_t HeaderSize = Magic.size() + Version.size() + TimestampLength; + } + /// The different node types available enum class NodeType { diff --git a/include/Ark/Compiler/Compiler.hpp b/include/Ark/Compiler/Compiler.hpp index 6797674f7..7e9f1ee2e 100644 --- a/include/Ark/Compiler/Compiler.hpp +++ b/include/Ark/Compiler/Compiler.hpp @@ -85,7 +85,6 @@ namespace Ark::internal std::vector m_temp_pages; ///< we need temporary code pages for some compilations passes IR::label_t m_current_label = 0; - unsigned m_debug; ///< the debug level of the compiler Logger m_logger; /** diff --git a/include/Ark/Compiler/Instructions.hpp b/include/Ark/Compiler/Instructions.hpp index b06a76b56..aede62115 100644 --- a/include/Ark/Compiler/Instructions.hpp +++ b/include/Ark/Compiler/Instructions.hpp @@ -28,12 +28,12 @@ namespace Ark::internal { // @role Does nothing, useful for padding NOP = 0x00, - SYM_TABLE_START = 0x01, - VAL_TABLE_START = 0x02, - NUMBER_TYPE = 0x01, - STRING_TYPE = 0x02, - FUNC_TYPE = 0x03, - CODE_SEGMENT_START = 0x03, + SYM_TABLE_START = 0xA1, + VAL_TABLE_START = 0xA2, + CODE_SEGMENT_START = 0xA3, + NUMBER_TYPE = 0xF1, + STRING_TYPE = 0xF2, + FUNC_TYPE = 0xF3, // @args symbol id // @role Load a symbol from its ID onto the stack diff --git a/include/Ark/Compiler/Serialization/IEEE754Serializer.hpp b/include/Ark/Compiler/Serialization/IEEE754Serializer.hpp new file mode 100644 index 000000000..351cb1359 --- /dev/null +++ b/include/Ark/Compiler/Serialization/IEEE754Serializer.hpp @@ -0,0 +1,60 @@ +#ifndef ARK_COMPILER_SERIALIZATION_IEEE754SERIALIZER_HPP +#define ARK_COMPILER_SERIALIZATION_IEEE754SERIALIZER_HPP + +#include +#include +#include +#include +#include + +namespace Ark::internal::ieee754 +{ + // Narrowing conversion from long long to double, 9223372036854775807 becomes 9223372036854775808. + // This gives us an error margin of 1.08420217248550443400745280086994171142578125 * 10^-19, + // which is acceptable. + static constexpr auto MaxLong = static_cast(std::numeric_limits::max()); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + template + constexpr T bswap(T i) + { + return i; + } +#else + // https://stackoverflow.com/a/36937049/21584 + template + constexpr T bswap_impl(T i, std::index_sequence) + { + return (((i >> N * CHAR_BIT & static_cast(-1)) << (sizeof(T) - 1 - N) * CHAR_BIT) | ...); + } + template > + constexpr T bswap(T i) + { + return std::bit_cast(bswap_impl(std::bit_cast(i), std::make_index_sequence {})); + } +#endif + + struct DecomposedDouble + { + int32_t exponent; + int64_t mantissa; + }; + + [[nodiscard]] inline DecomposedDouble serialize(const double n) + { + int exp = 0; + const auto mant = static_cast(MaxLong * std::frexp(n, &exp)); + + return DecomposedDouble { + .exponent = std::bit_cast(bswap(exp)), + .mantissa = bswap(mant) + }; + } + + [[nodiscard]] inline double deserialize(const DecomposedDouble d) + { + return std::ldexp(static_cast(bswap(d.mantissa)) / MaxLong, std::bit_cast(bswap(d.exponent))); + } +} + +#endif // ARK_COMPILER_SERIALIZATION_IEEE754SERIALIZER_HPP diff --git a/include/Ark/Compiler/Serialization/IntegerSerializer.hpp b/include/Ark/Compiler/Serialization/IntegerSerializer.hpp new file mode 100644 index 000000000..84a1fa851 --- /dev/null +++ b/include/Ark/Compiler/Serialization/IntegerSerializer.hpp @@ -0,0 +1,67 @@ +#ifndef ARK_COMPILER_SERIALIZATION_INTEGERSERIALIZER_HPP +#define ARK_COMPILER_SERIALIZATION_INTEGERSERIALIZER_HPP + +#include +#include +#include + +namespace Ark::internal +{ + void serializeToVecLE(std::integral auto number, std::vector& out) + { + constexpr auto mask = static_cast(0xff); + + for (std::size_t i = 0; i < sizeof(decltype(number)); ++i) + out.push_back(static_cast((number & (mask << (8 * i))) >> (8 * i))); + } + + void serializeToVecBE(std::integral auto number, std::vector& out) + { + constexpr auto pad = sizeof(decltype(number)) - 1; + constexpr auto mask = static_cast(0xff); + + for (std::size_t i = 0; i < sizeof(decltype(number)); ++i) + { + const auto shift = 8 * (pad - i); + out.push_back(static_cast((number & (mask << shift)) >> shift)); + } + } + + void serializeOn2BytesToVecLE(std::integral auto number, std::vector& out) + { + constexpr auto mask = static_cast(0xff); + for (std::size_t i = 0; i < 2; ++i) + out.push_back(static_cast((number & (mask << (8 * i))) >> (8 * i))); + } + + void serializeOn2BytesToVecBE(std::integral auto number, std::vector& out) + { + constexpr auto mask = static_cast(0xff); + for (std::size_t i = 0; i < 2; ++i) + out.push_back(static_cast((number & (mask << (8 * (1 - i)))) >> (8 * (1 - i)))); + } + + template + T deserializeLE(std::vector::const_iterator begin, std::vector::const_iterator end) + { + constexpr std::size_t length = sizeof(T); + T result {}; + for (std::size_t i = 0; i < length && begin != end; ++i, ++begin) + result += static_cast(*begin) << (8 * i); + + return result; + } + + template + T deserializeBE(std::vector::const_iterator begin, std::vector::const_iterator end) + { + constexpr std::size_t length = sizeof(T) - 1; + T result {}; + for (std::size_t i = 0; i < length && begin != end; ++i, ++begin) + result += static_cast(*begin) << (8 * (length - i)); + + return result; + } +} + +#endif // ARK_COMPILER_SERIALIZATION_INTEGERSERIALIZER_HPP diff --git a/src/arkreactor/Builtins/Builtins.cpp b/src/arkreactor/Builtins/Builtins.cpp index 4166787f2..82541b446 100644 --- a/src/arkreactor/Builtins/Builtins.cpp +++ b/src/arkreactor/Builtins/Builtins.cpp @@ -1,6 +1,6 @@ -#define _USE_MATH_DEFINES -#include #include +#include +#include #include @@ -12,10 +12,10 @@ namespace Ark::internal::Builtins namespace Mathematics { - extern const Value pi_ = Value(M_PI); + extern const Value pi_ = Value(std::numbers::pi); extern const Value e_ = Value(std::exp(1.0)); - extern const Value tau_ = Value(M_PI * 2.0); - extern const Value inf_ = Value(HUGE_VAL); + extern const Value tau_ = Value(std::numbers::pi * 2.0); + extern const Value inf_ = Value(std::numeric_limits::infinity()); extern const Value nan_ = Value(std::numeric_limits::signaling_NaN()); } diff --git a/src/arkreactor/Compiler/BytecodeReader.cpp b/src/arkreactor/Compiler/BytecodeReader.cpp index 88aa19495..780c7bfae 100644 --- a/src/arkreactor/Compiler/BytecodeReader.cpp +++ b/src/arkreactor/Compiler/BytecodeReader.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -38,9 +40,11 @@ namespace Ark bool BytecodeReader::checkMagic() const { - return m_bytecode.size() >= 4 && m_bytecode[0] == 'a' && - m_bytecode[1] == 'r' && m_bytecode[2] == 'k' && - m_bytecode[3] == internal::Instruction::NOP; + return m_bytecode.size() >= bytecode::Magic.size() && + m_bytecode[0] == bytecode::Magic[0] && + m_bytecode[1] == bytecode::Magic[1] && + m_bytecode[2] == bytecode::Magic[2] && + m_bytecode[3] == bytecode::Magic[3]; } const bytecode_t& BytecodeReader::bytecode() noexcept @@ -50,7 +54,7 @@ namespace Ark Version BytecodeReader::version() const { - if (!checkMagic() || m_bytecode.size() < 10) + if (!checkMagic() || m_bytecode.size() < bytecode::Magic.size() + bytecode::Version.size()) return Version { 0, 0, 0 }; return Version { @@ -63,7 +67,7 @@ namespace Ark unsigned long long BytecodeReader::timestamp() const { // 4 (ark\0) + version (2 bytes / number) + timestamp = 18 bytes - if (!checkMagic() || m_bytecode.size() < 18) + if (!checkMagic() || m_bytecode.size() < bytecode::HeaderSize) return 0; // reading the timestamp in big endian @@ -80,27 +84,27 @@ namespace Ark std::vector BytecodeReader::sha256() const { - if (!checkMagic() || m_bytecode.size() < 18 + picosha2::k_digest_size) + if (!checkMagic() || m_bytecode.size() < bytecode::HeaderSize + picosha2::k_digest_size) return {}; std::vector sha(picosha2::k_digest_size); for (std::size_t i = 0; i < picosha2::k_digest_size; ++i) - sha[i] = m_bytecode[18 + i]; + sha[i] = m_bytecode[bytecode::HeaderSize + i]; return sha; } Symbols BytecodeReader::symbols() const { - if (!checkMagic() || m_bytecode.size() < 18 + picosha2::k_digest_size || - m_bytecode[18 + picosha2::k_digest_size] != SYM_TABLE_START) + if (!checkMagic() || m_bytecode.size() < bytecode::HeaderSize + picosha2::k_digest_size || + m_bytecode[bytecode::HeaderSize + picosha2::k_digest_size] != SYM_TABLE_START) return {}; - std::size_t i = 18 + picosha2::k_digest_size + 1; + std::size_t i = bytecode::HeaderSize + picosha2::k_digest_size + 1; const uint16_t size = readNumber(i); i++; Symbols block; - block.start = 18 + picosha2::k_digest_size; + block.start = bytecode::HeaderSize + picosha2::k_digest_size; block.symbols.reserve(size); for (uint16_t j = 0; j < size; ++j) @@ -140,10 +144,16 @@ namespace Ark if (type == NUMBER_TYPE) { - std::string val; - while (m_bytecode[i] != 0) - val.push_back(static_cast(m_bytecode[i++])); - block.values.emplace_back(std::stod(val)); + auto exp = deserializeLE( + m_bytecode.begin() + static_cast::difference_type>(i), m_bytecode.end()); + i += sizeof(decltype(exp)); + auto mant = deserializeLE( + m_bytecode.begin() + static_cast::difference_type>(i), m_bytecode.end()); + i += sizeof(decltype(mant)); + + const ieee754::DecomposedDouble d { exp, mant }; + double val = ieee754::deserialize(d); + block.values.emplace_back(val); } else if (type == STRING_TYPE) { @@ -201,7 +211,7 @@ namespace Ark { if (!checkMagic()) { - fmt::print("Invalid format"); + fmt::println("Invalid format"); return; } diff --git a/src/arkreactor/Compiler/Compiler.cpp b/src/arkreactor/Compiler/Compiler.cpp index efb09b39f..91308f9ad 100644 --- a/src/arkreactor/Compiler/Compiler.cpp +++ b/src/arkreactor/Compiler/Compiler.cpp @@ -19,7 +19,7 @@ namespace Ark::internal using namespace literals; Compiler::Compiler(const unsigned debug) : - m_debug(debug), m_logger("Compiler", debug) + m_logger("Compiler", debug) {} void Compiler::process(const Node& ast) diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp index ae54cf7d1..4c6f95ff8 100644 --- a/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp +++ b/src/arkreactor/Compiler/IntermediateRepresentation/IRCompiler.cpp @@ -8,6 +8,8 @@ #include #include +#include +#include namespace Ark::internal { @@ -39,12 +41,10 @@ namespace Ark::internal m_bytecode.push_back(0_u8); } - constexpr std::size_t header_size = 18; - // generate a hash of the tables + bytecode std::vector hash_out(picosha2::k_digest_size); - picosha2::hash256(m_bytecode.begin() + header_size, m_bytecode.end(), hash_out); - m_bytecode.insert(m_bytecode.begin() + header_size, hash_out.begin(), hash_out.end()); + picosha2::hash256(m_bytecode.begin() + bytecode::HeaderSize, m_bytecode.end(), hash_out); + m_bytecode.insert(m_bytecode.begin() + bytecode::HeaderSize, hash_out.begin(), hash_out.end()); m_logger.traceEnd(); } @@ -186,10 +186,7 @@ namespace Ark::internal // push version for (const int n : std::array { ARK_VERSION_MAJOR, ARK_VERSION_MINOR, ARK_VERSION_PATCH }) - { - m_bytecode.push_back(static_cast((n & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(n & 0x00ff)); - } + serializeOn2BytesToVecBE(n, m_bytecode); // push timestamp const long long timestamp = std::chrono::duration_cast( @@ -210,8 +207,7 @@ namespace Ark::internal throw std::overflow_error(fmt::format("Too many symbols: {}, exceeds the maximum size of 2^16 - 1", symbol_size)); m_bytecode.push_back(SYM_TABLE_START); - m_bytecode.push_back(static_cast((symbol_size & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(symbol_size & 0x00ff)); + serializeOn2BytesToVecBE(symbol_size, m_bytecode); for (const auto& sym : symbols) { @@ -227,8 +223,7 @@ namespace Ark::internal throw std::overflow_error(fmt::format("Too many values: {}, exceeds the maximum size of 2^16 - 1", value_size)); m_bytecode.push_back(VAL_TABLE_START); - m_bytecode.push_back(static_cast((value_size & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(value_size & 0x00ff)); + serializeOn2BytesToVecBE(value_size, m_bytecode); for (const ValTableElem& val : values) { @@ -238,10 +233,9 @@ namespace Ark::internal { m_bytecode.push_back(NUMBER_TYPE); const auto n = std::get(val.value); - std::string t = std::to_string(n); - std::ranges::transform(t, std::back_inserter(m_bytecode), [](const char i) { - return static_cast(i); - }); + const auto [exponent, mantissa] = ieee754::serialize(n); + serializeToVecLE(exponent, m_bytecode); + serializeToVecLE(mantissa, m_bytecode); break; } @@ -259,8 +253,7 @@ namespace Ark::internal { m_bytecode.push_back(FUNC_TYPE); const std::size_t addr = std::get(val.value); - m_bytecode.push_back(static_cast((addr & 0xff00) >> 8)); - m_bytecode.push_back(static_cast(addr & 0x00ff)); + serializeOn2BytesToVecBE(addr, m_bytecode); break; } } diff --git a/src/arkreactor/VM/State.cpp b/src/arkreactor/VM/State.cpp index 3b555ca69..3b31028f4 100644 --- a/src/arkreactor/VM/State.cpp +++ b/src/arkreactor/VM/State.cpp @@ -155,7 +155,7 @@ namespace Ark const auto bytecode_hash = bcr.sha256(); std::vector hash(picosha2::k_digest_size); - picosha2::hash256(m_bytecode.begin() + 18 + picosha2::k_digest_size, m_bytecode.end(), hash); + picosha2::hash256(m_bytecode.begin() + bytecode::HeaderSize + picosha2::k_digest_size, m_bytecode.end(), hash); // checking integrity for (std::size_t j = 0; j < picosha2::k_digest_size; ++j) { diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp index 46b857192..054e44a39 100644 --- a/src/arkscript/main.cpp +++ b/src/arkscript/main.cpp @@ -123,7 +123,7 @@ int main(int argc, char** argv) ) | ( required("-bcr", "--bytecode-reader").set(selected, mode::bytecode_reader).doc("Launch the bytecode reader") - & value("file", file).doc("If file isn't a bytecode file, the cached compiled will be loaded ; if there are none, it will be compiled first") + & value("file", file).doc(".arkc bytecode file or .ark source file that will be compiled first") , ( option("-on", "--only-names").set(segment, Ark::BytecodeSegment::HeadersOnly).doc("Display only the bytecode segments names and sizes") | ( @@ -294,6 +294,16 @@ int main(int argc, char** argv) { Ark::BytecodeReader bcr; bcr.feed(file); + if (!bcr.checkMagic()) + { + // we got a potentially non-compiled file + fmt::println("Compiling {}...", file); + + Ark::Welder welder(debug, lib_paths); + welder.computeASTFromFile(file); + welder.generateBytecode(); + bcr.feed(welder.bytecode()); + } if (bcr_page == max_uint16 && bcr_start == max_uint16) bcr.display(segment); diff --git a/tests/unittests/BytecodeReaderSuite.cpp b/tests/unittests/BytecodeReaderSuite.cpp index 4df8b1924..062e34030 100644 --- a/tests/unittests/BytecodeReaderSuite.cpp +++ b/tests/unittests/BytecodeReaderSuite.cpp @@ -1,38 +1,60 @@ #include #include +#include +#include #include +#include + +#include "TestsHelper.hpp" using namespace boost; ut::suite<"BytecodeReader"> bcr_suite = [] { using namespace ut; + Ark::Welder welder(0, { std::filesystem::path(ARK_TESTS_ROOT "/lib/") }); + + const auto time_start = + static_cast(std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + + should("compile without error") = [&] { + expect(mut(welder).computeASTFromFile(get_resource_path("BytecodeReaderSuite/ackermann.ark"))); + expect(mut(welder).generateBytecode()); + }; + + const auto time_end = + static_cast(std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + Ark::BytecodeReader bcr; - bcr.feed(ARK_TESTS_ROOT "tests/unittests/resources/BytecodeReaderSuite/ackermann.arkc"); + const auto bytecode = welder.bytecode(); + bcr.feed(bytecode); - "bytecode"_test = [bcr] { + "bytecode"_test = [&] { should("find the version") = [bcr] { auto [major, minor, patch] = bcr.version(); - expect(that % major == 4); - expect(that % minor == 0); - expect(that % patch == 0); + expect(that % major == ARK_VERSION_MAJOR); + expect(that % minor == ARK_VERSION_MINOR); + expect(that % patch == ARK_VERSION_PATCH); }; - should("find the timestamp") = [bcr] { + should("find the timestamp") = [bcr, time_start, time_end] { const auto time = bcr.timestamp(); - expect(that % time == 1717523961ull); + expect(that % time >= time_start); + expect(that % time <= time_end); }; - should("find the sha256") = [bcr] { + should("find the sha256") = [bcr, bytecode] { const auto sha256 = bcr.sha256(); - const auto expected_sha = std::vector { - 0xcf, 0x79, 0x82, 0x6b, 0x81, 0x5c, 0xe4, 0x11, - 0xce, 0x25, 0xbe, 0xc3, 0x05, 0x91, 0x21, 0x7f, - 0x6c, 0x70, 0x54, 0x70, 0xd8, 0x8b, 0x2b, 0x90, - 0x82, 0xcd, 0x70, 0x2e, 0xeb, 0x51, 0xb2, 0x75 - }; + std::vector expected_sha(picosha2::k_digest_size); + // compute sha256 after header + sha + picosha2::hash256(bytecode.begin() + Ark::internal::bytecode::HeaderSize + 32, bytecode.end(), expected_sha); + expect(that % sha256 == expected_sha); }; @@ -61,18 +83,21 @@ ut::suite<"BytecodeReader"> bcr_suite = [] { Ark::Value(static_cast(1)), Ark::Value(0), Ark::Value(1), - Ark::Value(7), - Ark::Value(3) + Ark::Value("Ackermann-Péter function, m=3, n=6: "), + Ark::Value(3), + Ark::Value(6) }; expect(that % values_block.values.size() == expected_values.size()); expect(that % values_block.start == symbols_block.end); - // + 1 for the header - // + 2 for the size - // + 5 for the type tags - // + 2 for the pageaddr - // + 4*8 for the numbers represented as strings on 8 chars - // + 5 for the \0 at the end of each value - expect(that % values_block.end == values_block.start + 1 + 2 + 5 + 2 + 4 * 8 + 5); + expect( + that % values_block.end == values_block.start + 1 // header size + + 2 // size of the table + + 6 // number of type tags + + 2 // page addr length + + 4 * 12 // number represented as DecomposedDouble + + 37 // string length + + 6 // null terminator + ); }; should("list all code page") = [values_block, pages, start_code] { @@ -80,8 +105,8 @@ ut::suite<"BytecodeReader"> bcr_suite = [] { expect(that % pages.size() == 2ull); // 7 instructions on 4 bytes expect(that % pages[0].size() == 7 * 4ull); - // 32 instructions on 4 bytes - expect(that % pages[1].size() == 32 * 4ull); + // 24 instructions on 4 bytes + expect(that % pages[1].size() == 24 * 4ull); }; }; }; diff --git a/tests/unittests/CompilerSuite.cpp b/tests/unittests/CompilerSuite.cpp index 4c4a0c541..df78bc315 100644 --- a/tests/unittests/CompilerSuite.cpp +++ b/tests/unittests/CompilerSuite.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include "TestsHelper.hpp" @@ -10,6 +12,42 @@ using namespace boost; ut::suite<"Compiler"> compiler_suite = [] { using namespace ut; + const std::vector nums = { 0.11, 0.000000000011, 2, -2, 12, 6, 4, 0, 14657892.35, 3.141592653589, 4092.7984 }; + + "IEEE754 serialization"_test = [&] { + using namespace Ark::internal::ieee754; + + for (double original : nums) + { + const auto decomp = serialize(original); + auto recomp = deserialize(decomp); + expect(that % recomp == original); + } + }; + + "IEEE754 serialization via integer serialization Little Endian"_test = [&] { + using namespace Ark::internal; + + for (const double original : nums) + { + std::vector bytecode {}; + + const auto [exponent, mantissa] = ieee754::serialize(original); + serializeToVecLE(exponent, bytecode); + serializeToVecLE(mantissa, bytecode); + + ieee754::DecomposedDouble d { 0, 0 }; + d.exponent = deserializeLE(bytecode.begin(), bytecode.end()); + d.mantissa = deserializeLE( + bytecode.begin() + static_cast::difference_type>(sizeof(decltype(ieee754::DecomposedDouble::exponent))), + bytecode.end()); + + double val = ieee754::deserialize(d); + + expect(that % val == original); + } + }; + "Word construction"_test = [] { should("create a word with a single argument on 2 bytes") = [] { const auto word = Ark::internal::Word(12, 0x5678); diff --git a/tests/unittests/resources/BytecodeReaderSuite/ackermann.ark b/tests/unittests/resources/BytecodeReaderSuite/ackermann.ark new file mode 100644 index 000000000..879145214 --- /dev/null +++ b/tests/unittests/resources/BytecodeReaderSuite/ackermann.ark @@ -0,0 +1,21 @@ +# the Ackermann Peter function (see https://en.wikipedia.org/wiki/Ackermann_function) +# One of the simplest and earliest-discovered examples of a total computable function, +# that is not primitive. All primitive recursive functions are total and computable, +# but the Ackermann function illustrates that not all total computable functions +# are primitive recursive. +# Due to its definitions in terms of extremely deep recursion, it can be used as a +# benchmark of a compiler's ability to optimize recursion, which is the reason why +# we are using this function to benchmark the language. + +(let ackermann (fun (m n) { + (if (> m 0) + # then + (if (= 0 n) + # then + (ackermann (- m 1) 1) + # else + (ackermann (- m 1) (ackermann m (- n 1)))) + # else + (+ 1 n))})) + +(print "Ackermann-Péter function, m=3, n=6: " (ackermann 3 6)) diff --git a/tests/unittests/resources/BytecodeReaderSuite/ackermann.arkc b/tests/unittests/resources/BytecodeReaderSuite/ackermann.arkc deleted file mode 100644 index 5cc0a77a7..000000000 Binary files a/tests/unittests/resources/BytecodeReaderSuite/ackermann.arkc and /dev/null differ