Skip to content

Commit

Permalink
Allow Inlining C++ Runtime code in Vectorized Primitives
Browse files Browse the repository at this point in the history
This commit allows inlining our C++ runtime code within vectorized
primitives.

We use the link-time optimization of clang for this: we compile a new
`inkfuse_runtime_static` library with `-flto`.

When generating our C code for the interpreter and compiling it to a
shared library, we now link the objects from the
`inkfuse_runtime_static` library into the generated fragments.

This effectively removes the C/C++ compile-time/runtime optimization
boundary that existed so far. Up until now we would always generate call
statements into the runtime system.
We can now effectively inline functions from our runtime system into the
generated primitives.
  • Loading branch information
wagjamin committed Nov 3, 2023
1 parent cb9a38e commit 2b92b11
Show file tree
Hide file tree
Showing 12 changed files with 367 additions and 181 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ jobs:
# Unfortunately we're running into https://github.com/llvm/llvm-project/issues/59432
# This is some Ubuntu packaging issue that causes alloc/dealloc mismatches when asan
# is enabled with libc++
run: ASAN_OPTIONS=alloc_dealloc_mismatch=0 ./tester
run: ASAN_OPTIONS=detect_odr_violation=0,alloc_dealloc_mismatch=1 ./tester

85 changes: 76 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic -stdlib=libc++ -gdwarf-4")
# Generate DWARF 4 in debug to work on older GDB versions
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -gdwarf-4")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -O0 -fsanitize=address")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -O3")

# ---------------------------------------------------------------------------
# Dependencies
Expand Down Expand Up @@ -56,6 +57,21 @@ include_directories(
# ---------------------------------------------------------------------------
# Sources
# ---------------------------------------------------------------------------

# Runtime System C++ Files - directly linked against the generated code.
set(RUNTIME_INCLUDE_C
"${CMAKE_SOURCE_DIR}/src/runtime/ExternHashTableRuntime.h"
"${CMAKE_SOURCE_DIR}/src/runtime/HashTables.h"
"${CMAKE_SOURCE_DIR}/src/runtime/NewHashTables.h"
)

set(RUNTIME_SRC_CC
"${CMAKE_SOURCE_DIR}/src/runtime/ExternHashTableRuntime.cpp"
"${CMAKE_SOURCE_DIR}/src/runtime/HashTables.cpp"
"${CMAKE_SOURCE_DIR}/src/runtime/NewHashTables.cpp"
)

# Inkfuse C++ Files - the actual database system: executors, code generation logic, ...
set(SRC_CC
"${CMAKE_SOURCE_DIR}/src/algebra/Pipeline.cpp"
"${CMAKE_SOURCE_DIR}/src/algebra/Print.cpp"
Expand Down Expand Up @@ -132,9 +148,7 @@ set(SRC_CC
"${CMAKE_SOURCE_DIR}/src/interpreter/KeyPackingFragmentizer.cpp"
"${CMAKE_SOURCE_DIR}/src/runtime/HashRuntime.cpp"
"${CMAKE_SOURCE_DIR}/src/runtime/HashTableRuntime.cpp"
"${CMAKE_SOURCE_DIR}/src/runtime/HashTables.cpp"
"${CMAKE_SOURCE_DIR}/src/runtime/MemoryRuntime.cpp"
"${CMAKE_SOURCE_DIR}/src/runtime/NewHashTables.cpp"
"${CMAKE_SOURCE_DIR}/src/runtime/TupleMaterializer.cpp"
)

Expand Down Expand Up @@ -183,6 +197,47 @@ set(TOOLS_SRC
# ---------------------------------------------------------------------------
# Targets
# ---------------------------------------------------------------------------
# The inkfuse runtime: exposes functions that will be called from the generated
# code. The purpose of the runtime is to be able to write C++ code for e.g.
# hash tables, and make it easy to export these symbols to the generated
# C code.

# Object library that we use in other targets to expose runtime functions.
add_library(inkfuse_runtime OBJECT ${RUNTIME_SRC_CC})

# Static library for the inkfuse runtime. This is actually where some of the real
# magic happens: we generate the `inkfuse_runtime_static` static library.
# We generate this with `flto` and `fPIC`, which generates position independent
# LLVM Bytecode in the end.
# We then install that static library in tmp and unpack it into object
# files again. When generating C code in the runtime, we statically link against
# these object files before generating the shared query object.
# This allows us to inline runtime C++ code into the generated C code.
add_library(inkfuse_runtime_static STATIC ${RUNTIME_SRC_CC})
target_compile_options(inkfuse_runtime_static PUBLIC -flto -fPIC)

# Install
add_custom_target(inkfuse_install_runtime
DEPENDS inkfuse_runtime_static xxhash_static
COMMAND
cp libinkfuse_runtime_static.a /tmp
# Generate XXHash LLVM IR code
COMMAND
clang-14 -O3 -fPIC -flto vendor/xxhash/src/xxhash_src/xxhash.c -c -o xxhash_static.o
COMMAND
cp xxhash_static.o /tmp
# Unpack the runtime objects
COMMAND
ar --output /tmp x libinkfuse_runtime_static.a
)

# Add the output object files that we need to do link time optimization against
# when generating code for the interpreter.
ADD_DEFINITIONS( "-D_INKFUSE_OBJECT_DEPENDENCIES=\" \
/tmp/xxhash_static.o \
/tmp/ExternHashTableRuntime.cpp.o \
/tmp/HashTables.cpp.o \
/tmp/NewHashTables.cpp.o\"")

# Core inkfuse library, we have to declare it as a shared library
# in order to make runtime symbols visible during dlopen.
Expand All @@ -191,24 +246,36 @@ add_library(inkfuse STATIC ${SRC_CC})
# the compiled code will not find it.
# target_link_libraries(inkfuse PUBLIC )
# Need to link to dl in order to open compiled code at runtime
target_link_libraries(inkfuse PRIVATE xxhash_static gflags dl Threads::Threads)

target_link_libraries(inkfuse PRIVATE xxhash_static dl Threads::Threads)
# Every time we depend on inkfuse in some way we need to rebuild the runtime
add_dependencies(inkfuse inkfuse_install_runtime)

# inkfuse binary for running TPC-H queries in an interactive way
add_executable(inkfuse_runner tools/inkfuse_runner.cpp)
add_executable(inkfuse_runner
tools/inkfuse_runner.cpp
$<TARGET_OBJECTS:inkfuse_runtime>
)
set_property(TARGET inkfuse_runner PROPERTY ENABLE_EXPORTS 1)
target_link_libraries(inkfuse_runner PRIVATE inkfuse gflags Threads::Threads)

# inkfuse binary for benchmarking TPC-H queries in the reproducibility folder
add_executable(inkfuse_bench tools/inkfuse_bench.cpp)
add_executable(inkfuse_bench
tools/inkfuse_bench.cpp
$<TARGET_OBJECTS:inkfuse_runtime>
)
set_property(TARGET inkfuse_bench PROPERTY ENABLE_EXPORTS 1)
target_link_libraries(inkfuse_bench PRIVATE inkfuse gflags Threads::Threads)

# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------

add_executable(tester test/tester.cc ${TEST_CC})
add_executable(tester
test/tester.cc
${TEST_CC}
$<TARGET_OBJECTS:inkfuse_runtime>
)
set_property(TARGET tester PROPERTY ENABLE_EXPORTS 1)
target_include_directories(tester PRIVATE ${CMAKE_SOURCE_DIR}/test)
target_link_libraries(tester PRIVATE inkfuse gtest gmock Threads::Threads)
# Move the testdata into the binary tree for easy ingest tests.
Expand Down
2 changes: 1 addition & 1 deletion src/codegen/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ struct BackendProgram {
virtual void unlink() = 0;

/// Compile the backend program to actual machine code. The interrupt is used to stop compilation.
virtual void compileToMachinecode(InterruptableJob& interrupt) = 0;
virtual void compileToMachinecode(InterruptableJob& interrupt, bool compile_for_interpreter = false) = 0;

/// Get a function with the specified name from the compiled program.
virtual void* getFunction(std::string_view name) = 0;
Expand Down
131 changes: 113 additions & 18 deletions src/codegen/backend_c/BackendC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ namespace inkfuse {

namespace {

/// Object files generated by CMake that contain the inkfuse runtime code.
#ifndef INKFUSE_OBJECT_DEPENDENCIES
#define INKFUSE_OBJECT_DEPENDENCIES _INKFUSE_OBJECT_DEPENDENCIES
#endif

static constexpr bool debug_mode = false;

/// Generate the path for the c program.
Expand All @@ -18,6 +23,13 @@ std::string path(std::string_view program_name) {
return stream.str();
}

/// Generate the path for the object file
std::string object_path(std::string_view program_name) {
std::stringstream stream;
stream << "/tmp/" << program_name << ".o";
return stream.str();
}

/// Generate the path for the so.
std::string so_path(std::string_view program_name) {
std::stringstream stream;
Expand All @@ -27,12 +39,64 @@ std::string so_path(std::string_view program_name) {

}

void BackendProgramC::compileToMachinecode(InterruptableJob& interrupt) {
void BackendProgramC::compileToMachinecode(InterruptableJob& interrupt, bool compile_for_interpreter) {
if (!was_compiled) {
// Dump.
dump();
// Invoke the compiler.
std::stringstream command;
if (compile_for_interpreter) {
compileInterpreter(interrupt);
} else {
compileJIT(interrupt);
}
}
was_compiled = true;
}

void BackendProgramC::compileJIT(InterruptableJob& interrupt) {
// Dump to a C file.
dump();

// Invoke the compiler to generate the shared object file.
std::stringstream command;
#ifdef WITH_JIT_CLANG_14
command << "clang-14 ";
#else
const char* env = std::getenv("CUSTOM_JIT");
if (!env) {
throw std::runtime_error("Custom compiler has to be set through CUSTOM_JIT env variable.");
}
command << env << " ";
#endif
command << path(program_name);
if constexpr (debug_mode) {
command << " -g -O0 -fPIC -gdwarf-4 ";
} else {
command << " -O3 -fPIC";
}
// Add flto to generate LLVM bytecode that allows powerful link-time optimizations
command << " -shared -o ";
command << so_path(program_name);

auto command_str = command.str();

auto exit_code = Command::runShell(command_str, interrupt);
if (interrupt.getResult() == InterruptableJob::Change::Interrupted) {
return;
}
if (exit_code != 0) {
throw std::runtime_error("Compilation failed. Command: " + command_str);
}

// Add to compiled programs.
backend->generated.insert(program_name);
}

void BackendProgramC::compileInterpreter(InterruptableJob& interrupt) {
// Dump to a C file.
dump();

std::stringstream command;

// Step 1: Invoke the compiler to generate an object file.
{
#ifdef WITH_JIT_CLANG_14
command << "clang-14 ";
#else
Expand All @@ -42,29 +106,58 @@ void BackendProgramC::compileToMachinecode(InterruptableJob& interrupt) {
}
command << env << " ";
#endif
command << path(program_name);
// Use link time optimizations.
command << " -flto";
if constexpr (debug_mode) {
command << " -g -O0 -fPIC -gdwarf-4 ";
} else {
command << " -O3 -fPIC";
}
command << " -shared";
command << " -c " << path(program_name);
// Add flto to generate LLVM bytecode that allows powerful link-time optimizations
command << " -o ";
command << so_path(program_name);
auto command_str = command.str();
command << object_path(program_name);
}

auto exit_code = Command::runShell(command_str, interrupt);
if (interrupt.getResult() == InterruptableJob::Change::Interrupted) {
return;
}
if (exit_code != 0) {
throw std::runtime_error("Compilation failed.");
command << " && ";

// Step 2: Link prebuilt inkfuse static runtime against the object file.
{
#ifdef WITH_JIT_CLANG_14
command << "clang-14 ";
#else
const char* env = std::getenv("CUSTOM_JIT");
if (!env) {
throw std::runtime_error("Custom compiler has to be set through CUSTOM_JIT env variable.");
}
command << env << " ";
#endif
if constexpr (debug_mode) {
command << " -g -O0 ";
} else {
command << " -O3 ";
}
// Use link time optimizations.
command << " -flto ";
command << object_path(program_name);
command << " " << INKFUSE_OBJECT_DEPENDENCIES << " ";
// Generate shared library that can be dynamically linked by the driver.
command << " -fPIC -shared -o ";
command << so_path(program_name);
}

auto command_str = command.str();

// Add to compiled programs.
backend->generated.insert(program_name);
auto exit_code = Command::runShell(command_str, interrupt);
if (interrupt.getResult() == InterruptableJob::Change::Interrupted) {
return;
}
was_compiled = true;
if (exit_code != 0) {
throw std::runtime_error("Compilation failed. Command: " + command_str);
}

// Add to compiled programs.
backend->generated.insert(program_name);
}

BackendProgramC::~BackendProgramC() {
Expand Down Expand Up @@ -457,4 +550,6 @@ void BackendC::compileValue(const IR::Value& value, ScopedWriter::Statement& str
str.stream() << value.str();
}

#undef INKFUSE_OBJECT_DEPENDENCIES

}
6 changes: 5 additions & 1 deletion src/codegen/backend_c/BackendC.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ struct BackendProgramC : public IR::BackendProgram {
void unlink() override;

/// Compile the backend program to actual machine code. The interrupt is used to stop compilation.
void compileToMachinecode(InterruptableJob& interrupt) override;
void compileToMachinecode(InterruptableJob& interrupt, bool compile_for_interpreter) override;
/// Fast compilation path to an SO used during JIT compilation.
void compileJIT(InterruptableJob& interrupt);
/// Slow compilation path to an SO used for generating the interpreter.
void compileInterpreter(InterruptableJob& interrupt);

/// Get a function with the specified name from the compiled program.
void* getFunction(std::string_view name) override;
Expand Down
2 changes: 1 addition & 1 deletion src/interpreter/FragmentCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ FragmentCache::FragmentCache()
BackendC backend;
program = backend.generate(*fragments);
InterruptableJob interrupt;
program->compileToMachinecode(interrupt);
program->compileToMachinecode(interrupt, /* compile_for_interpreter = */ true);
// And link directly to now slow down the first queries.
program->link();
}
Expand Down
Loading

0 comments on commit 2b92b11

Please sign in to comment.