Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Frontend fixes #355

Closed
wants to merge 36 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
8f948b3
Add polygeist::UndefOp
ivanradanov Jul 10, 2023
0615795
More builtins
ivanradanov Jul 11, 2023
15ba3a5
Fall back to libc functions when we could not emit the builtin version
ivanradanov Jul 11, 2023
b9f3efb
Add option to disable block coarsening
ivanradanov Jul 12, 2023
e98a2a1
Add an option to do coalescing-friendly thread coarsening
ivanradanov Jul 13, 2023
89224a9
Preserve information about alternatives
ivanradanov Jul 22, 2023
f06fac3
Collect more statistics
ivanradanov Jul 23, 2023
1a23156
kernel statistics bug fix
ivanradanov Jul 24, 2023
bf12b47
Further fixes
ivanradanov Jul 24, 2023
35fe6ea
Fix bug
ivanradanov Jul 24, 2023
62ceb3f
bug
ivanradanov Jul 24, 2023
ec5b7e2
bug
ivanradanov Jul 24, 2023
db64c32
Const and device mem space
ivanradanov Jul 24, 2023
f4ef77f
Fix bug in parallel to gpu
ivanradanov Aug 2, 2023
9ddf93a
Move tools/cgeist/CollectKernelStatistics.cpp
ivanradanov Aug 2, 2023
53ac6cd
Handle if and for with results when parallel unrolling
ivanradanov Aug 2, 2023
5001236
Add a way to choose specific alt
ivanradanov Aug 4, 2023
ea80a66
Choose alt before compiling gpu binaries
ivanradanov Aug 4, 2023
094c698
Remove unused gpu funcs
ivanradanov Aug 4, 2023
a41108a
Restructure code so that we remove unneeded kernels from the gpu binary
ivanradanov Aug 4, 2023
f56f79c
Fix tagging alt ids
ivanradanov Aug 4, 2023
404eccb
clang-format
ivanradanov Aug 4, 2023
cba8034
Fix some pass ordering issues
ivanradanov Aug 4, 2023
fa2612d
Inline the relegated calls to gpu kernels we generate in the frontend
ivanradanov Aug 8, 2023
d4ce163
Bug fixes around gpu kernels
ivanradanov Aug 9, 2023
6a493ad
Fix pgo test
ivanradanov Aug 12, 2023
9f1bcae
Allow for granular blcok coarsening factors
ivanradanov Aug 12, 2023
9e067de
Output the accumulated runtime of kernels, not individual ones
ivanradanov Aug 14, 2023
ce747a4
Include full path in kernel ID
ivanradanov Aug 14, 2023
7533379
Fix upper bound calculations when raising scf for to affine
ivanradanov Aug 18, 2023
b9d94a0
Prevent device side get globals from getting hoisted by CSE
ivanradanov Aug 18, 2023
1da911f
Get global has an alloca mem effect and was wrongly identified as sha…
ivanradanov Aug 18, 2023
4f40922
Add new get _device_ global op
ivanradanov Aug 18, 2023
86d5331
More flexible specification of coarsening factors
ivanradanov Aug 21, 2023
1fa4d42
Fix copy paste error
ivanradanov Aug 26, 2023
1da7e62
clang-format
ivanradanov Sep 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/polygeist/Passes/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ std::unique_ptr<Pass> createParallelLowerPass(
std::unique_ptr<Pass> createConvertCudaRTtoCPUPass();
std::unique_ptr<Pass> createConvertCudaRTtoGPUPass();
std::unique_ptr<Pass> createConvertCudaRTtoHipRTPass();
std::unique_ptr<Pass> createFixGPUFuncPass();
std::unique_ptr<Pass> createSCFParallelLoopUnrollPass(int unrollFactor = 2);
std::unique_ptr<Pass>
createConvertPolygeistToLLVMPass(const LowerToLLVMOptions &options,
Expand All @@ -49,6 +50,8 @@ createConvertParallelToGPUPass1(std::string arch = "sm_60");
std::unique_ptr<Pass>
createConvertParallelToGPUPass2(bool emitGPUKernelLaunchBounds = true);
std::unique_ptr<Pass> createMergeGPUModulesPass();
std::unique_ptr<Pass> createLowerAlternativesPass();
std::unique_ptr<Pass> createCollectKernelStatisticsPass();
std::unique_ptr<Pass> createGpuSerializeToCubinPass(
StringRef arch, StringRef features, int llvmOptLevel, int ptxasOptLevel,
std::string ptxasPath, std::string libDevicePath, bool outputIntermediate);
Expand Down
18 changes: 18 additions & 0 deletions include/polygeist/Passes/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,31 @@ def SCFParallelLoopUnroll : Pass<"scf-parallel-loop-unroll"> {
];
}

def CollectKernelStatistics : Pass<"collect-kernel-statistics", "mlir::ModuleOp"> {
let summary = "Lower cudart functions to cpu versions";
let dependentDialects = [];
let constructor = "mlir::polygeist::createCollectKernelStatisticsPass()";
}

def LowerAlternatives : Pass<"lower-alternatives", "mlir::ModuleOp"> {
let summary = "Lower alternatives if in opt mode";
let dependentDialects = [];
let constructor = "mlir::polygeist::createLowerAlternativesPass()";
}

def ConvertCudaRTtoCPU : Pass<"convert-cudart-to-cpu", "mlir::ModuleOp"> {
let summary = "Lower cudart functions to cpu versions";
let dependentDialects =
["memref::MemRefDialect", "func::FuncDialect", "LLVM::LLVMDialect"];
let constructor = "mlir::polygeist::createConvertCudaRTtoCPUPass()";
}

def FixGPUFunc : Pass<"fix-gpu-func", "mlir::gpu::GPUModuleOp"> {
let summary = "Fix nested calls to gpu functions we generate in the frontend";
let dependentDialects = ["func::FuncDialect", "LLVM::LLVMDialect", "gpu::GPUDialect"];
let constructor = "mlir::polygeist::createFixGPUFuncPass()";
}

def ConvertCudaRTtoGPU : Pass<"convert-cudart-to-gpu", "mlir::ModuleOp"> {
let summary = "Lower cudart functions to generic gpu versions";
let dependentDialects =
Expand Down
22 changes: 22 additions & 0 deletions include/polygeist/PolygeistOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@ include "mlir/IR/SymbolInterfaces.td"
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
include "mlir/Dialect/LLVMIR/LLVMOpsInterfaces.td"

def UndefOp
: Polygeist_Op<"undef", [Pure]> {
let summary = "More flexible undef op";
let skipDefaultBuilders = 1;
let results = (outs AnyType:$result);
let builders = [
OpBuilder<(ins "Type":$type), [{
$_state.types.push_back(type);
}]>];
let hasCanonicalizer = true;
}

def NoopOp
: Polygeist_Op<"noop",
[DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
Expand All @@ -29,6 +41,16 @@ def NoopOp
let description = [{}];
}

def GetDeviceGlobalOp
: Polygeist_Op<"get_device_global",
[DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
DeclareOpInterfaceMethods<SymbolUserOpInterface>]> {
let summary = "";
let arguments = (ins FlatSymbolRefAttr:$name);
let results = (outs AnyStaticShapeMemRef:$result);
let description = [{}];
}

def CacheLoad
: Polygeist_Op<"cacheload"> {

Expand Down
57 changes: 40 additions & 17 deletions lib/polygeist/ExecutionEngine/PGORuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
// PGO functions which should know whether the code in the alternatives op is
// GPU code - we can add an attrib to the alternatives op for that

#include <cstdlib>
#include <ctime>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <map>
#include <mutex>
#include <numeric>
#include <vector>

extern "C" int32_t mgpurtDeviceSynchronizeErr(void);

Expand All @@ -26,26 +29,38 @@ class PGOState {
struct timespec start_clock;
};

struct Logger {
std::map<std::string, std::vector<double>> timings;
~Logger() { PGOState::writeResults(); }
};

inline static int alternative;
inline static std::string dirname;
inline thread_local static std::mutex mutex;
inline thread_local static std::map<std::string, State *> states;
inline static Logger logger;

std::string kernelId;
const char *kernelId_c;
int totalAlternatives;

PGOState(const char *kernelId_c, int totalAlternatives)
: totalAlternatives(totalAlternatives) {
kernelId = kernelId_c;
std::string getKernelId() {
std::string kernelId = kernelId_c;
for (char &c : kernelId)
if (c == '/')
c = '+';
return kernelId;
}

PGOState(const char *kernelId_c, int totalAlternatives)
: totalAlternatives(totalAlternatives) {
this->kernelId_c = kernelId_c;
}
void end() {
struct timespec end_clock;
mgpurtDeviceSynchronizeErr();
clock_gettime(CLOCK_MONOTONIC, &end_clock);

auto kernelId = getKernelId();
std::unique_lock<std::mutex> lock(mutex);
if (states.count(kernelId) == 0) {
std::cerr << "No kernel with id " << kernelId << "running" << std::endl;
Expand All @@ -59,21 +74,16 @@ class PGOState {
double elapsed =
(tmp_clock.tv_sec + ((double)tmp_clock.tv_nsec) * .000000001);

// Only write to file if we are profiling a valid alternative
if (0 <= alternative && alternative < totalAlternatives) {
// TODO error handling
std::ofstream ofile;
ofile.open(std::string(dirname) + "/" + kernelId,
std::ios::out | std::ios::app);
ofile << alternative << " " << elapsed << std::endl;
ofile.close();
}
if (states.count(kernelId) == 0)
logger.timings[kernelId] = {};
logger.timings[kernelId].push_back(elapsed);

delete state;
states.erase(states.find(kernelId));
}

void start() {
auto kernelId = getKernelId();
std::unique_lock<std::mutex> lock(mutex);
State *state = new State();
if (states.count(kernelId) == 1) {
Expand All @@ -87,6 +97,21 @@ class PGOState {
clock_gettime(CLOCK_MONOTONIC, &state->start_clock);
}

static void writeResults() {
// Only write to file if we are profiling a valid alternative
for (auto &pair : logger.timings) {
auto &kernelId = std::get<0>(pair);
auto &timings = std::get<1>(pair);
auto elapsed = std::accumulate(timings.begin(), timings.end(), 0.0f);
// TODO error handling
std::ofstream ofile;
ofile.open(std::string(dirname) + "/" + kernelId,
std::ios::out | std::ios::app);
ofile << alternative << " " << elapsed << std::endl;
ofile.close();
}
}

int getAlternative() {
static int init = [&] {
if (char *i = getenv(POLYGEIST_PGO_ALTERNATIVE_ENV_VAR)) {
Expand All @@ -102,12 +127,10 @@ class PGOState {
this->dirname = POLYGEIST_PGO_DEFAULT_DATA_DIR;
}
std::filesystem::create_directories(dirname);

return 0;
}();
if (0 <= alternative && alternative < totalAlternatives)
return alternative;
else
return 0;
return alternative % totalAlternatives;
}

~PGOState() {}
Expand Down
Loading
Loading