Skip to content

Commit

Permalink
GPU: Move debug starthit sorting to kernel instead of debug dump
Browse files Browse the repository at this point in the history
  • Loading branch information
davidrohr committed Feb 16, 2024
1 parent e90c7d2 commit 255082a
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 17 deletions.
3 changes: 3 additions & 0 deletions GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ int GPUChainTracking::RunTPCTrackingSlices_internal()
runKernel<GPUTPCStartHitsSorter>(GetGridAuto(useStream), {iSlice});
}
#endif
if (GetProcessingSettings().comparableDebutOutput) {
runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::startHits>(GetGrid(1, 1, useStream), {iSlice});
}
DoDebugAndDump(RecoStep::TPCSliceTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile);

if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
Expand Down
14 changes: 14 additions & 0 deletions GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,17 @@ GPUdii() void GPUTPCSectorDebugSortKernels::Thread<GPUTPCSectorDebugSortKernels:
}
}
}

template <>
GPUdii() void GPUTPCSectorDebugSortKernels::Thread<GPUTPCSectorDebugSortKernels::startHits>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker)
{
if (iThread || iBlock) {
return;
}
GPUCommonAlgorithm::sortDeviceDynamic(tracker.TrackletStartHits(), tracker.TrackletStartHits() + *tracker.NStartHits(), [](const GPUTPCHitId& a, const GPUTPCHitId& b) {
if (a.RowIndex() != b.RowIndex()) {
return (a.RowIndex() < b.RowIndex());
}
return (a.HitIndex() < b.HitIndex());
});
}
3 changes: 2 additions & 1 deletion GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class GPUTPCSectorDebugSortKernels : public GPUKernelTemplate
{
public:
enum K { defaultKernel = 0,
hitData = 0 };
hitData = 0,
startHits = 1 };
GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSliceTracking; }
typedef GPUTPCTracker processorType;
GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; }
Expand Down
3 changes: 2 additions & 1 deletion GPU/GPUTracking/SliceTracker/GPUTPCTracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,8 @@ class GPUTPCTracker : public GPUProcessor
GPUhd() GPUglobalref() GPUAtomic(unsigned int) * NStartHits() const { return &mCommonMem->nStartHits; }

GPUhd() GPUglobalref() const GPUTPCHitId& TrackletStartHit(int i) const { return mTrackletStartHits[i]; }
GPUhd() GPUglobalref() GPUTPCHitId* TrackletStartHits() const { return mTrackletStartHits; }
GPUhd() GPUglobalref() const GPUTPCHitId* TrackletStartHits() const { return mTrackletStartHits; }
GPUhd() GPUglobalref() GPUTPCHitId* TrackletStartHits() { return mTrackletStartHits; }
GPUhd() GPUglobalref() GPUTPCHitId* TrackletTmpStartHits() const { return mTrackletTmpStartHits; }
MEM_CLASS_PRE2()
GPUhd() GPUglobalref() const MEM_LG2(GPUTPCTracklet) & Tracklet(int i) const { return mTracklets[i]; }
Expand Down
15 changes: 0 additions & 15 deletions GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -99,25 +99,10 @@ void GPUTPCTracker::DumpHitWeights(std::ostream& out)
}
}

int GPUTPCTracker::StarthitSortComparison(const void* a, const void* b)
{
// qsort helper function to sort start hits
const GPUTPCHitId* aa = reinterpret_cast<const GPUTPCHitId*>(a);
const GPUTPCHitId* bb = reinterpret_cast<const GPUTPCHitId*>(b);

if (aa->RowIndex() != bb->RowIndex()) {
return (aa->RowIndex() - bb->RowIndex());
}
return (aa->HitIndex() - bb->HitIndex());
}

void GPUTPCTracker::DumpStartHits(std::ostream& out)
{
// sort start hits and dump to file
out << "\nStart Hits: (Slice" << mISlice << ") (" << *NStartHits() << ")" << std::endl;
if (mRec->GetProcessingSettings().comparableDebutOutput) {
qsort(TrackletStartHits(), *NStartHits(), sizeof(GPUTPCHitId), StarthitSortComparison);
}
for (unsigned int i = 0; i < *NStartHits(); i++) {
out << TrackletStartHit(i).RowIndex() << "-" << TrackletStartHit(i).HitIndex() << std::endl;
}
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/kernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ o2_gpu_add_kernel("GPUTPCGlobalTrackingCopyNumbers" NO_OCL1 single
o2_gpu_add_kernel("GPUTPCCreateSliceData" LB single)
o2_gpu_add_kernel("GPUTPCGlobalTracking" LB single)
o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" NO single)
o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" NO single)
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" LB simple GPUTPCClusterOccupancyMapBin* map)
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" LB simple GPUTPCClusterOccupancyMapBin* map)
o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" LB simple int mode)
Expand Down

0 comments on commit 255082a

Please sign in to comment.