From 27b2ca280aaf10e42458b4c8695424a9b923d09d Mon Sep 17 00:00:00 2001 From: GNiendorf Date: Wed, 11 Mar 2026 18:24:29 -0400 Subject: [PATCH] various cpu improvements --- RecoTracker/LSTCore/src/alpaka/Kernels.h | 95 ++- .../LSTCore/src/alpaka/LSTEvent.dev.cc | 38 +- RecoTracker/LSTCore/src/alpaka/LSTEvent.h | 4 + RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 745 +++++++++--------- .../LSTCore/src/alpaka/NeuralNetwork.h | 3 +- .../LSTCore/src/alpaka/PixelQuintuplet.h | 72 +- RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 479 +++++------ RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 112 ++- RecoTracker/LSTCore/src/alpaka/Segment.h | 400 ++++++---- .../LSTCore/src/alpaka/TrackCandidate.h | 59 +- RecoTracker/LSTCore/src/alpaka/Triplet.h | 304 ++++--- 11 files changed, 1243 insertions(+), 1068 deletions(-) diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h index 49c700f48ecc6..3533b08fdb51e 100644 --- a/RecoTracker/LSTCore/src/alpaka/Kernels.h +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -242,12 +242,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int quintupletModuleIndices_lowmod2 = ranges.quintupletModuleIndices()[lowmod2]; + // Module-level eta pre-check: skip module pairs where Q5s are far apart in eta. + // Within a module, Q5 etas are tightly clustered (<0.1), so 0.3 margin is very safe. + if (alpaka::math::abs(acc, + __H2F(quintuplets.eta()[quintupletModuleIndices_lowmod1]) - + __H2F(quintuplets.eta()[quintupletModuleIndices_lowmod2])) > 0.3f) + continue; + for (unsigned int ix1 = 0; ix1 < nQuintuplets_lowmod1; ix1 += 1) { unsigned int ix = quintupletModuleIndices_lowmod1 + ix1; if (quintuplets.isDup()[ix] & 1) continue; - bool isPT5_ix = quintuplets.partOfPT5()[ix]; + const bool isPT5_ix = quintuplets.partOfPT5()[ix]; + const float eta1 = __H2F(quintuplets.eta()[ix]); + const float phi1 = __H2F(quintuplets.phi()[ix]); + const float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) { unsigned int jx = quintupletModuleIndices_lowmod2 + jx1; @@ -257,31 +267,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (quintuplets.isDup()[jx] & 1) continue; - bool isPT5_jx = quintuplets.partOfPT5()[jx]; + const bool isPT5_jx = quintuplets.partOfPT5()[jx]; if (isPT5_ix && isPT5_jx) continue; - float eta1 = __H2F(quintuplets.eta()[ix]); - float phi1 = __H2F(quintuplets.phi()[ix]); - float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); - - float eta2 = __H2F(quintuplets.eta()[jx]); - float phi2 = __H2F(quintuplets.phi()[jx]); - float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); - - float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2); - + const float eta2 = __H2F(quintuplets.eta()[jx]); + const float dEta = alpaka::math::abs(acc, eta1 - eta2); if (dEta > 0.1f) continue; + const float phi2 = __H2F(quintuplets.phi()[jx]); + const float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2); if (alpaka::math::abs(acc, dPhi) > 0.1f) continue; - float dR2 = dEta * dEta + dPhi * dPhi; - int nMatched = checkHitsT5(ix, jx, quintuplets); - const int minNHitsForDup_T5 = 5; + const float dR2 = dEta * dEta + dPhi * dPhi; + const int nMatched = checkHitsT5(ix, jx, quintuplets); + constexpr int minNHitsForDup_T5 = 5; float d2 = 0.f; CMS_UNROLL_LOOP @@ -291,12 +294,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } if (((dR2 < 0.001f || nMatched >= minNHitsForDup_T5) && d2 < 1.0f) || (dR2 < 0.02f && d2 < 0.1f)) { + const float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); if (isPT5_jx || score_rphisum1 > score_rphisum2) { rmQuintupletFromMemory(quintuplets, ix, true); + break; // ix is now dup, no need to compare further } else if (isPT5_ix || score_rphisum1 < score_rphisum2) { rmQuintupletFromMemory(quintuplets, jx, true); } else { rmQuintupletFromMemory(quintuplets, (ix < jx ? ix : jx), true); + if (ix < jx) + break; // ix was marked dup } } } @@ -458,11 +465,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(Acc2D const& acc, PixelQuintuplets pixelQuintuplets) const { unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelQuintuplets)) { + float eta1 = __H2F(pixelQuintuplets.eta()[ix]); + float phi1 = __H2F(pixelQuintuplets.phi()[ix]); float score1 = __H2F(pixelQuintuplets.score()[ix]); for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, nPixelQuintuplets)) { if (ix == jx) continue; + float eta2 = __H2F(pixelQuintuplets.eta()[jx]); + if (alpaka::math::abs(acc, eta1 - eta2) > 0.1f) + continue; + + float phi2 = __H2F(pixelQuintuplets.phi()[jx]); + if (alpaka::math::abs(acc, cms::alpakatools::deltaPhi(acc, phi1, phi2)) > 0.1f) + continue; + int nMatched = checkHitspT5(ix, jx, pixelQuintuplets); float score2 = __H2F(pixelQuintuplets.score()[jx]); const int minNHitsForDup_pT5 = 7; @@ -477,53 +494,64 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } }; + // pLS dedup using eta-sorted indices. On CPU the sorted order enables early termination + // via break; on GPU each thread that breaks simply exits its own iteration. struct CheckHitspLS { ALPAKA_FN_ACC void operator()(Acc2D const& acc, ModulesConst modules, SegmentsOccupancyConst segmentsOccupancy, PixelSeedsConst pixelSeeds, PixelSegments pixelSegments, - bool secondpass) const { + bool secondpass, + unsigned int const* __restrict__ etaOrder) const { int pixelModuleIndex = modules.nLowerModules(); unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex]; if (nPixelSegments > n_max_pixel_segments_per_module) nPixelSegments = n_max_pixel_segments_per_module; - for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelSegments)) { + for (unsigned int si : cms::alpakatools::uniform_elements_y(acc, nPixelSegments)) { + unsigned int ix = etaOrder[si]; if (secondpass && (!pixelSeeds.isQuad()[ix] || (pixelSegments.isDup()[ix] & 1))) continue; - auto const& phits1 = pixelSegments.pLSHitsIdxs()[ix]; float eta_pix1 = pixelSeeds.eta()[ix]; float phi_pix1 = pixelSeeds.phi()[ix]; - for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, ix + 1, nPixelSegments)) { + for (unsigned int sj : cms::alpakatools::uniform_elements_x(acc, si + 1, nPixelSegments)) { + unsigned int jx = etaOrder[sj]; float eta_pix2 = pixelSeeds.eta()[jx]; - float phi_pix2 = pixelSeeds.phi()[jx]; - if (alpaka::math::abs(acc, eta_pix2 - eta_pix1) > 0.1f) - continue; + if (eta_pix2 - eta_pix1 > 0.1f) + break; if (secondpass && (!pixelSeeds.isQuad()[jx] || (pixelSegments.isDup()[jx] & 1))) continue; - int8_t quad_diff = pixelSeeds.isQuad()[ix] - pixelSeeds.isQuad()[jx]; - float score_diff = pixelSegments.score()[ix] - pixelSegments.score()[jx]; - // Always keep quads over trips. If they are the same, we want the object with better score + float phi_pix2 = pixelSeeds.phi()[jx]; + + // Use min/max of original indices so hit comparison direction is consistent + // (lower index = phits1), since the eta-sorted iteration order doesn't + // guarantee ix < jx in original indices. + unsigned int lo = (ix < jx) ? ix : jx; + unsigned int hi = (ix < jx) ? jx : ix; + + int8_t quad_diff = pixelSeeds.isQuad()[lo] - pixelSeeds.isQuad()[hi]; + float score_diff = pixelSegments.score()[lo] - pixelSegments.score()[hi]; int idxToRemove; if (quad_diff > 0) - idxToRemove = jx; + idxToRemove = hi; else if (quad_diff < 0) - idxToRemove = ix; + idxToRemove = lo; else if (score_diff < 0) - idxToRemove = jx; + idxToRemove = hi; else if (score_diff > 0) - idxToRemove = ix; + idxToRemove = lo; else - idxToRemove = ix; + idxToRemove = lo; - auto const& phits2 = pixelSegments.pLSHitsIdxs()[jx]; + auto const& phits1 = pixelSegments.pLSHitsIdxs()[lo]; + auto const& phits2 = pixelSegments.pLSHitsIdxs()[hi]; int npMatched = 0; for (int i = 0; i < Params_pLS::kHits; i++) { @@ -536,7 +564,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } if (pmatched) { npMatched++; - // Only one hit is enough if (secondpass) break; } @@ -546,7 +573,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { rmPixelSegmentFromMemory(pixelSegments, idxToRemove, secondpass); } if (secondpass) { - float dEta = alpaka::math::abs(acc, eta_pix1 - eta_pix2); + float dEta = alpaka::math::abs(acc, eta_pix2 - eta_pix1); float dPhi = cms::alpakatools::deltaPhi(acc, phi_pix1, phi_pix2); float dR2 = dEta * dEta + dPhi * dPhi; diff --git a/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc b/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc index 111afe0a286db..9f35b7d2b54f1 100644 --- a/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc @@ -15,7 +15,9 @@ #include "Triplet.h" #include "Quadruplet.h" +#include #include +#include using Device = ALPAKA_ACCELERATOR_NAMESPACE::Device; using Queue = ALPAKA_ACCELERATOR_NAMESPACE::Queue; @@ -75,6 +77,7 @@ void LSTEvent::resetEventSync() { pixelTripletsDC_.reset(); pixelQuintupletsDC_.reset(); quadrupletsDC_.reset(); + etaSortedPLSIndex_.reset(); lstInputHC_.reset(); hitsHC_.reset(); @@ -569,6 +572,7 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) if (!no_pls_dupclean) { auto const checkHitspLS_workDiv = cms::alpakatools::make_workdiv({max_blocks * 4, max_blocks / 4}, {16, 16}); + buildEtaSortedPLSIndex(); alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, @@ -576,7 +580,8 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) segmentsDC_->const_view().segmentsOccupancy(), lstInputDC_->const_view().pixelSeeds(), pixelSegmentsDC_->view(), - true); + true, + etaSortedPLSIndex_->data()); } // Counting kernel @@ -1005,10 +1010,38 @@ void LSTEvent::createQuintuplets() { } } +void LSTEvent::buildEtaSortedPLSIndex() { + if (etaSortedPLSIndex_) + return; // already built + + auto nSeg_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto nSeg_view_d = cms::alpakatools::make_device_view( + queue_, segmentsDC_->const_view().segmentsOccupancy().nSegments()[pixelModuleIndex_]); + alpaka::memcpy(queue_, nSeg_buf_h, nSeg_view_d); + alpaka::wait(queue_); + unsigned int nPixelSegments = std::min(*nSeg_buf_h.data(), n_max_pixel_segments_per_module); + + auto eta_buf_h = cms::alpakatools::make_host_buffer(queue_, nPixelSegments); + auto eta_view_d = + cms::alpakatools::make_device_view(queue_, lstInputDC_->const_view().pixelSeeds().eta(), nPixelSegments); + alpaka::memcpy(queue_, eta_buf_h, eta_view_d, nPixelSegments); + alpaka::wait(queue_); + + auto order_buf_h = cms::alpakatools::make_host_buffer(queue_, nPixelSegments); + auto* order = order_buf_h.data(); + std::iota(order, order + nPixelSegments, 0u); + auto const* eta = eta_buf_h.data(); + std::sort(order, order + nPixelSegments, [eta](unsigned int a, unsigned int b) { return eta[a] < eta[b]; }); + + etaSortedPLSIndex_.emplace(cms::alpakatools::make_device_buffer(queue_, nPixelSegments)); + alpaka::memcpy(queue_, *etaSortedPLSIndex_, order_buf_h, nPixelSegments); +} + void LSTEvent::pixelLineSegmentCleaning(bool no_pls_dupclean) { if (!no_pls_dupclean) { auto const checkHitspLS_workDiv = cms::alpakatools::make_workdiv({max_blocks * 4, max_blocks / 4}, {16, 16}); + buildEtaSortedPLSIndex(); alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, @@ -1016,7 +1049,8 @@ void LSTEvent::pixelLineSegmentCleaning(bool no_pls_dupclean) { segmentsDC_->const_view().segmentsOccupancy(), lstInputDC_->const_view().pixelSeeds(), pixelSegmentsDC_->view(), - false); + false, + etaSortedPLSIndex_->data()); } } diff --git a/RecoTracker/LSTCore/src/alpaka/LSTEvent.h b/RecoTracker/LSTCore/src/alpaka/LSTEvent.h index ca94566cad404..0ae3a47378dd1 100644 --- a/RecoTracker/LSTCore/src/alpaka/LSTEvent.h +++ b/RecoTracker/LSTCore/src/alpaka/LSTEvent.h @@ -35,6 +35,7 @@ #include "RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h" #include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { @@ -98,6 +99,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { EndcapGeometryDevDeviceCollection const& endcapGeometry_; bool objectsStatistics_ = false; double memoryAllocatedMB_ = 0; + std::optional> etaSortedPLSIndex_; + + void buildEtaSortedPLSIndex(); public: // Constructor used for CMSSW integration. Uses an external queue. diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 806869522e062..e5b3e700d106e 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -13,15 +13,41 @@ #include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + // Pre-computed module-constant data for MiniDoublet kernels. + // Populated once per module to avoid redundant SoA loads in the inner hit-pair loop. + struct ModuleMDData { + uint16_t lowerModuleIndex; + short subdet; + short side; + short moduleType; + short moduleLayerType; + unsigned int iL; // layer - 1 + bool isTilted; + bool isEndcapTwoS; + bool isGloballyInner; + + // shiftStripHits constants + float slope; // dxdys + float drdz; // drdzs[lowerModuleIndex] + float moduleSep; // moduleGapSize result + + // dPhiThreshold pre-computed constants + float miniPVoff; + float miniMuls; + float miniTilt2; // 0 for non-tilted and endcap + float sqrtTermBase; // miniMuls^2 + miniPVoff^2 + float sqrtTermFlat; // sqrt(sqrtTermBase), valid for barrel flat + }; + template ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc, MiniDoublets mds, HitsBaseConst hitsBase, HitsExtendedConst hitsExtended, - ModulesConst modules, + ModuleMDData const& mod, unsigned int lowerHitIdx, unsigned int upperHitIdx, - uint16_t lowerModuleIdx, float dz, float dPhi, float dPhiChange, @@ -34,9 +60,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //the index into which this MD needs to be written will be computed in the kernel //nMDs variable will be incremented in the kernel, no need to worry about that here - mds.moduleIndices()[idx] = lowerModuleIdx; + mds.moduleIndices()[idx] = mod.lowerModuleIndex; unsigned int anchorHitIndex, outerHitIndex; - if (modules.moduleType()[lowerModuleIdx] == PS and modules.moduleLayerType()[lowerModuleIdx] == Strip) { + if (mod.moduleType == PS and mod.moduleLayerType == Strip) { mds.anchorHitIndices()[idx] = upperHitIdx; mds.outerHitIndices()[idx] = lowerHitIdx; @@ -72,8 +98,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorHighEdgeY()[idx] = hitsExtended.highEdgeYs()[anchorHitIndex]; mds.anchorLowEdgeX()[idx] = hitsExtended.lowEdgeXs()[anchorHitIndex]; mds.anchorLowEdgeY()[idx] = hitsExtended.lowEdgeYs()[anchorHitIndex]; - mds.anchorHighEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorHighEdgeY()[idx], mds.anchorHighEdgeX()[idx]); - mds.anchorLowEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorLowEdgeY()[idx], mds.anchorLowEdgeX()[idx]); + // Edge phi values are only meaningful for TwoS endcap modules (where highEdgeX/Y are set in HitLoopKernel). + // Downstream readers (Segment.h, Quintuplet.h) only access these when outerLayerEndcapTwoS is true. + if (mod.isEndcapTwoS) { + mds.anchorHighEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorHighEdgeY()[idx], mds.anchorHighEdgeX()[idx]); + mds.anchorLowEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorLowEdgeY()[idx], mds.anchorLowEdgeX()[idx]); + } mds.outerX()[idx] = hitsBase.xs()[outerHitIndex]; mds.outerY()[idx] = hitsBase.ys()[outerHitIndex]; @@ -89,6 +119,49 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { #endif } + // Overload for callers (Segment.h) that still pass ModulesConst + moduleIndex. + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc, + MiniDoublets mds, + HitsBaseConst hitsBase, + HitsExtendedConst hitsExtended, + ModulesConst modules, + unsigned int lowerHitIdx, + unsigned int upperHitIdx, + uint16_t lowerModuleIdx, + float dz, + float dPhi, + float dPhiChange, + float shiftedX, + float shiftedY, + float shiftedZ, + float noShiftedDphi, + float noShiftedDPhiChange, + unsigned int idx) { + ModuleMDData mod; + mod.lowerModuleIndex = lowerModuleIdx; + mod.moduleType = modules.moduleType()[lowerModuleIdx]; + mod.moduleLayerType = modules.moduleLayerType()[lowerModuleIdx]; + mod.subdet = modules.subdets()[lowerModuleIdx]; + mod.isEndcapTwoS = (mod.subdet == Endcap && mod.moduleType == TwoS); + addMDToMemory(acc, + mds, + hitsBase, + hitsExtended, + mod, + lowerHitIdx, + upperHitIdx, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDPhiChange, + idx); + } + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isTighterTiltedModules(ModulesConst modules, uint16_t moduleIndex) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing // This is the same as what was previously considered as"isNormalTiltedModules" @@ -132,76 +205,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } template - ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold(TAcc const& acc, - float rt, - ModulesConst modules, - uint16_t moduleIndex, - const float ptCut, - float dPhi = 0, - float dz = 0) { - // ================================================================= - // Various constants - // ================================================================= - //mean of the horizontal layer position in y; treat this as R below - - // ================================================================= - // Computing some components that make up the cut threshold - // ================================================================= - - unsigned int iL = modules.layers()[moduleIndex] - 1; + ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold( + TAcc const& acc, float rt, ModuleMDData const& mod, const float ptCut, float dPhi = 0, float dz = 0) { const float miniSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - const float rLayNominal = - ((modules.subdets()[moduleIndex] == Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); - const float miniPVoff = 0.1f / rLayNominal; - const float miniMuls = ((modules.subdets()[moduleIndex] == Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut - : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); - const bool isTilted = modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] != Center; - //the lower module is sent in irrespective of its layer type. We need to fetch the drdz properly - - float drdz; - if (isTilted) { - if (modules.moduleType()[moduleIndex] == PS and modules.moduleLayerType()[moduleIndex] == Strip) { - drdz = modules.drdzs()[moduleIndex]; - } else { - drdz = modules.drdzs()[modules.partnerModuleIndices()[moduleIndex]]; - } + + if (mod.subdet == Barrel and mod.side == Center) { + return miniSlope + mod.sqrtTermFlat; + } else if (mod.subdet == Barrel) { + return miniSlope + alpaka::math::sqrt(acc, mod.sqrtTermBase + mod.miniTilt2 * miniSlope * miniSlope); } else { - drdz = 0; - } - const float miniTilt2 = ((isTilted) ? (0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdz * drdz) / - (1.f + drdz * drdz) / moduleGapSize(modules, moduleIndex) - : 0); - - // Compute luminous region requirement for endcap - const float miniLum = alpaka::math::abs(acc, dPhi * kDeltaZLum / dz); // Balaji's new error - - // ================================================================= - // Return the threshold value - // ================================================================= - // Following condition is met if the module is central and flatly lying - if (modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] == Center) { - return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff); - } - // Following condition is met if the module is central and tilted - else if (modules.subdets()[moduleIndex] == Barrel and - modules.sides()[moduleIndex] != Center) //all types of tilted modules - { - return miniSlope + - alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniTilt2 * miniSlope * miniSlope); - } - // If not barrel, it is Endcap - else { - return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniLum * miniLum); + const float miniLum = alpaka::math::abs(acc, dPhi * kDeltaZLum / dz); + return miniSlope + alpaka::math::sqrt(acc, mod.sqrtTermBase + miniLum * miniLum); } } template ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc, - ModulesConst modules, - uint16_t lowerModuleIndex, - uint16_t upperModuleIndex, - unsigned int lowerHitIndex, - unsigned int upperHitIndex, + ModuleMDData const& mod, float* shiftedCoords, float xLower, float yLower, @@ -211,52 +231,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float yUpper, float zUpper, float rtUpper) { - // This is the strip shift scheme that is explained in http://uaf-10.t2.ucsd.edu/~phchang/talks/PhilipChang20190607_SDL_Update.pdf (see backup slides) - // The main feature of this shifting is that the strip hits are shifted to be "aligned" in the line of sight from interaction point to the the pixel hit. - // (since pixel hit is well defined in 3-d) - // The strip hit is shifted along the strip detector to be placed in a guessed position where we think they would have actually crossed - // The size of the radial direction shift due to module separation gap is computed in "radial" size, while the shift is done along the actual strip orientation - // This means that there may be very very subtle edge effects coming from whether the strip hit is center of the module or the at the edge of the module - // But this should be relatively minor effect - - // dependent variables for this if statement - // lowerModule - // lowerHit - // upperHit - // endcapGeometry - // tiltedGeometry - - // Some variables relevant to the function - float xp; // pixel x (pixel hit x) - float yp; // pixel y (pixel hit y) - float zp; // pixel y (pixel hit y) - float rtp; // pixel y (pixel hit y) - float xa; // "anchor" x (the anchor position on the strip module plane from pixel hit) - float ya; // "anchor" y (the anchor position on the strip module plane from pixel hit) - float xo; // old x (before the strip hit is moved up or down) - float yo; // old y (before the strip hit is moved up or down) - float xn; // new x (after the strip hit is moved up or down) - float yn; // new y (after the strip hit is moved up or down) - float abszn; // new z in absolute value - float zn; // new z with the sign (+/-) accounted - float angleA; // in r-z plane the theta of the pixel hit in polar coordinate is the angleA - float angleB; // this is the angle of tilted module in r-z plane ("drdz"), for endcap this is 90 degrees - bool isEndcap; // If endcap, drdz = infinity - float moduleSeparation; - float drprime; // The radial shift size in x-y plane projection - float drprime_x; // x-component of drprime - float drprime_y; // y-component of drprime - const float& slope = - modules.dxdys()[lowerModuleIndex]; // The slope of the possible strip hits for a given module in x-y plane - float absArctanSlope; - float angleM; // the angle M is the angle of rotation of the module in x-y plane if the possible strip hits are along the x-axis, then angleM = 0, and if the possible strip hits are along y-axis angleM = 90 degrees - float absdzprime; // The distance between the two points after shifting - const float& drdz_ = modules.drdzs()[lowerModuleIndex]; - // Assign hit pointers based on their hit type + float xp, yp, zp, rtp; + float xo, yo; bool pHitInverted = false; - if (modules.moduleType()[lowerModuleIndex] == PS) { - // TODO: This is somewhat of an mystery.... somewhat confused why this is the case - if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + if (mod.moduleType == PS) { + if (mod.moduleLayerType == Pixel) { xo = xUpper; yo = yUpper; xp = xLower; @@ -281,82 +260,64 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { rtp = rtLower; } - // If it is endcap some of the math gets simplified (and also computers don't like infinities) - isEndcap = modules.subdets()[lowerModuleIndex] == Endcap; + const bool isEndcap = (mod.subdet == Endcap); + + const float hypot_rz = alpaka::math::sqrt(acc, rtp * rtp + zp * zp); + const float sinA = rtp / hypot_rz; + const float cosA = alpaka::math::abs(acc, zp) / hypot_rz; - // NOTE: TODO: Keep in mind that the sin(atan) function can be simplified to something like x / sqrt(1 + x^2) and similar for cos - // I am not sure how slow sin, atan, cos, functions are in c++. If x / sqrt(1 + x^2) are faster change this later to reduce arithmetic computation time - angleA = alpaka::math::abs(acc, alpaka::math::atan(acc, rtp / zp)); - angleB = - ((isEndcap) - ? kPi / 2.f - : alpaka::math::atan( - acc, - drdz_)); // The tilt module on the positive z-axis has negative drdz slope in r-z plane and vice versa + float sinApB; + if (isEndcap) { + sinApB = cosA; + } else { + const float inv_hypot_drdz = 1.f / alpaka::math::sqrt(acc, 1.f + mod.drdz * mod.drdz); + sinApB = sinA * inv_hypot_drdz + cosA * mod.drdz * inv_hypot_drdz; + } - moduleSeparation = moduleGapSize(modules, lowerModuleIndex); + float moduleSeparation = mod.moduleSep; - // Sign flips if the pixel is later layer - if (modules.isGloballyInner()[lowerModuleIndex] == pHitInverted) { + if (mod.isGloballyInner == pHitInverted) { moduleSeparation *= -1; } - drprime = (moduleSeparation / alpaka::math::sin(acc, angleA + angleB)) * alpaka::math::sin(acc, angleA); - - // Compute arctan of the slope and take care of the slope = infinity case - absArctanSlope = - ((slope != kVerticalModuleSlope && edm::isFinite(slope)) ? fabs(alpaka::math::atan(acc, slope)) : kPi / 2.f); + float drprime = moduleSeparation * sinA / sinApB; - // Depending on which quadrant the pixel hit lies, we define the angleM by shifting them slightly differently - if (xp > 0 and yp > 0) { - angleM = absArctanSlope; - } else if (xp > 0 and yp < 0) { - angleM = kPi - absArctanSlope; - } else if (xp < 0 and yp < 0) { - angleM = kPi + absArctanSlope; - } else // if (xp < 0 and yp > 0) - { - angleM = 2.f * kPi - absArctanSlope; + float drprime_x, drprime_y; + const float slope = mod.slope; + if (slope != kVerticalModuleSlope && edm::isFinite(slope)) { + const float inv_hypot_slope = 1.f / alpaka::math::sqrt(acc, 1.f + slope * slope); + drprime_x = drprime * ((xp > 0.f) - (xp < 0.f)) * alpaka::math::abs(acc, slope) * inv_hypot_slope; + drprime_y = drprime * ((yp > 0.f) - (yp < 0.f)) * inv_hypot_slope; + } else { + drprime_x = drprime * ((xp > 0.f) - (xp < 0.f)); + drprime_y = 0.f; } - // Then since the angleM sign is taken care of properly - drprime_x = drprime * alpaka::math::sin(acc, angleM); - drprime_y = drprime * alpaka::math::cos(acc, angleM); - - // The new anchor position is - xa = xp + drprime_x; - ya = yp + drprime_y; + float xa = xp + drprime_x; + float ya = yp + drprime_y; - // Compute the new strip hit position (if the slope value is in special condition take care of the exceptions) - if (slope == kVerticalModuleSlope || - edm::isNotFinite(slope)) // Designated for tilted module when the slope is infinity (module lying along y-axis) - { - xn = xa; // New x point is simply where the anchor is - yn = yo; // No shift in y + float xn, yn; + if (slope == kVerticalModuleSlope || edm::isNotFinite(slope)) { + xn = xa; + yn = yo; } else if (slope == 0) { - xn = xo; // New x point is simply where the anchor is - yn = ya; // No shift in y + xn = xo; + yn = ya; } else { - xn = (slope * xa + (1.f / slope) * xo - ya + yo) / (slope + (1.f / slope)); // new xn - yn = (xn - xa) * slope + ya; // new yn + xn = (slope * xa + (1.f / slope) * xo - ya + yo) / (slope + (1.f / slope)); + yn = (xn - xa) * slope + ya; } - // Computing new Z position - absdzprime = alpaka::math::abs( - acc, - moduleSeparation / alpaka::math::sin(acc, angleA + angleB) * - alpaka::math::cos( - acc, - angleA)); // module separation sign is for shifting in radial direction for z-axis direction take care of the sign later + float absdzprime = alpaka::math::abs(acc, moduleSeparation * cosA / sinApB); - // Depending on which one as closer to the interactin point compute the new z wrt to the pixel properly - if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + float abszn; + if (mod.moduleLayerType == Pixel) { abszn = alpaka::math::abs(acc, zp) + absdzprime; } else { abszn = alpaka::math::abs(acc, zp) - absdzprime; } - zn = abszn * ((zp > 0) ? 1 : -1); // Apply the sign of the zn + float zn = abszn * ((zp > 0) ? 1 : -1); shiftedCoords[0] = xn; shiftedCoords[1] = yn; @@ -364,282 +325,288 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } template - ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, - ModulesConst modules, - uint16_t lowerModuleIndex, - uint16_t upperModuleIndex, - unsigned int lowerHitIndex, - unsigned int upperHitIndex, - float& dz, - float& dPhi, - float& dPhiChange, - float& shiftedX, - float& shiftedY, - float& shiftedZ, - float& noShiftedDphi, - float& noShiftedDphiChange, - float xLower, - float yLower, - float zLower, - float rtLower, - float xUpper, - float yUpper, - float zUpper, - float rtUpper, - const float ptCut) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, + ModuleMDData const& mod, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper, + const float ptCut) { dz = zLower - zUpper; - const float dzCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f; + const float dzCut = mod.moduleType == PS ? 2.f : 10.f; const float sign = ((dz > 0) - (dz < 0)) * ((zLower > 0) - (zLower < 0)); const float invertedcrossercut = (alpaka::math::abs(acc, dz) > 2) * sign; - if ((alpaka::math::abs(acc, dz) >= dzCut) || (invertedcrossercut > 0)) + if ((alpaka::math::abs(acc, dz) >= dzCut) || (invertedcrossercut > 0)) { return false; + } - float miniCut = 0; - - miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel - ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex, ptCut) - : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex, ptCut); + float miniCut = mod.moduleLayerType == Pixel ? dPhiThreshold(acc, rtLower, mod, ptCut) + : dPhiThreshold(acc, rtUpper, mod, ptCut); + const float miniCutSq = miniCut * miniCut; + const float tanMiniCutSq = miniCutSq * (1.f + (2.f / 3.f) * miniCutSq); - // Cut #2: dphi difference - // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085 - float xn = 0.f, yn = 0.f; + float x1, y1, x2, y2, r1sq, r2sq; float shiftedRt2 = 0.f; - if (modules.sides()[lowerModuleIndex] != Center) // If barrel and not center it is tilted - { - // Shift the hits and calculate new xn, yn position + + if (mod.isTilted) { float shiftedCoords[3]; - shiftStripHits(acc, - modules, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, - shiftedCoords, - xLower, - yLower, - zLower, - rtLower, - xUpper, - yUpper, - zUpper, - rtUpper); - xn = shiftedCoords[0]; - yn = shiftedCoords[1]; - - // Lower or the upper hit needs to be modified depending on which one was actually shifted - if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + shiftStripHits(acc, mod, shiftedCoords, xLower, yLower, zLower, rtLower, xUpper, yUpper, zUpper, rtUpper); + float xn = shiftedCoords[0]; + float yn = shiftedCoords[1]; + shiftedRt2 = xn * xn + yn * yn; + + if (mod.moduleLayerType == Pixel) { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; - shiftedRt2 = xn * xn + yn * yn; - - dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); //function from Hit.cc - noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + x1 = xLower; + y1 = yLower; + x2 = xn; + y2 = yn; + r1sq = rtLower * rtLower; + r2sq = shiftedRt2; } else { shiftedX = xn; shiftedY = yn; shiftedZ = zLower; - shiftedRt2 = xn * xn + yn * yn; - dPhi = cms::alpakatools::deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); - noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + x1 = xn; + y1 = yn; + x2 = xUpper; + y2 = yUpper; + r1sq = shiftedRt2; + r2sq = rtUpper * rtUpper; } } else { shiftedX = 0.f; shiftedY = 0.f; shiftedZ = 0.f; - shiftedRt2 = 0.f; - dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); - noShiftedDphi = dPhi; + x1 = xLower; + y1 = yLower; + x2 = xUpper; + y2 = yUpper; + r1sq = rtLower * rtLower; + r2sq = rtUpper * rtUpper; + } + + const float crossDP = x1 * y2 - x2 * y1; + const float dotDP = x1 * x2 + y1 * y2; + if (dotDP <= 0.f || crossDP * crossDP >= tanMiniCutSq * dotDP * dotDP) { + return false; } - if (alpaka::math::abs(acc, dPhi) >= miniCut) + const float rInnerSq = alpaka::math::min(acc, r1sq, r2sq); + const float dotDC = dotDP - rInnerSq; + if (dotDC <= 0.f || crossDP * crossDP >= tanMiniCutSq * dotDC * dotDC) { return false; + } + + dPhi = alpaka::math::atan2(acc, crossDP, dotDP); + if (r1sq < r2sq) { + dPhiChange = alpaka::math::atan2(acc, crossDP, dotDP - r1sq); + } else { + dPhiChange = alpaka::math::atan2(acc, -crossDP, dotDP - r2sq); + } - // Cut #3: The dphi change going from lower Hit to upper Hit - // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3076 - if (modules.sides()[lowerModuleIndex] != Center) { - // When it is tilted, use the new shifted positions - if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { - // dPhi Change should be calculated so that the upper hit has higher rt. - // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. - // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) - // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) - // setdeltaPhiChange(lowerHit.rt() < upperHitMod.rt() ? lowerHit.deltaPhiChange(upperHitMod) : upperHitMod.deltaPhiChange(lowerHit)); - - dPhiChange = (rtLower * rtLower < shiftedRt2) ? deltaPhiChange(acc, xLower, yLower, shiftedX, shiftedY) - : deltaPhiChange(acc, shiftedX, shiftedY, xLower, yLower); - noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) - : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); +#ifdef CUT_VALUE_DEBUG + if (mod.isTilted) { + const float crossNS = xLower * yUpper - xUpper * yLower; + const float dotNS = xLower * xUpper + yLower * yUpper; + noShiftedDphi = alpaka::math::atan2(acc, crossNS, dotNS); + if (rtLower < rtUpper) { + noShiftedDphiChange = alpaka::math::atan2(acc, crossNS, dotNS - rtLower * rtLower); } else { - // dPhi Change should be calculated so that the upper hit has higher rt. - // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. - // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) - // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) - - dPhiChange = (shiftedRt2 < rtUpper * rtUpper) ? deltaPhiChange(acc, shiftedX, shiftedY, xUpper, yUpper) - : deltaPhiChange(acc, xUpper, yUpper, shiftedX, shiftedY); - noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) - : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + noShiftedDphiChange = alpaka::math::atan2(acc, -crossNS, dotNS - rtUpper * rtUpper); } } else { - // When it is flat lying module, whichever is the lowerSide will always have rt lower - dPhiChange = deltaPhiChange(acc, xLower, yLower, xUpper, yUpper); + noShiftedDphi = dPhi; noShiftedDphiChange = dPhiChange; } +#endif - return alpaka::math::abs(acc, dPhiChange) < miniCut; + return true; } template - ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, - ModulesConst modules, - uint16_t lowerModuleIndex, - uint16_t upperModuleIndex, - unsigned int lowerHitIndex, - unsigned int upperHitIndex, - float& drt, - float& dPhi, - float& dPhiChange, - float& shiftedX, - float& shiftedY, - float& shiftedZ, - float& noShiftedDphi, - float& noShiftedDphichange, - float xLower, - float yLower, - float zLower, - float rtLower, - float xUpper, - float yUpper, - float zUpper, - float rtUpper, - const float ptCut) { - // There are series of cuts that applies to mini-doublet in a "endcap" region - // Cut #1 : dz cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) - // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3093 - // For PS module in case when it is tilted a different dz (after the strip hit shift) is calculated later. - - float dz = zLower - zUpper; // Not const since later it might change depending on the type of module + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, + ModuleMDData const& mod, + float& drt, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphichange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper, + const float ptCut) { + float dz = zLower - zUpper; const float dzCut = 1.f; - if (alpaka::math::abs(acc, dz) >= dzCut) + if (alpaka::math::abs(acc, dz) >= dzCut) { return false; - // Cut #2 : drt cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) - // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3100 - const float drtCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f; + } + const float drtCut = mod.moduleType == PS ? 2.f : 10.f; drt = rtLower - rtUpper; - if (alpaka::math::abs(acc, drt) >= drtCut) + if (alpaka::math::abs(acc, drt) >= drtCut) { return false; - // The new scheme shifts strip hits to be "aligned" along the line of sight from interaction point to the pixel hit (if it is PS modules) + } float xn = 0, yn = 0, zn = 0; float shiftedCoords[3]; - shiftStripHits(acc, - modules, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, - shiftedCoords, - xLower, - yLower, - zLower, - rtLower, - xUpper, - yUpper, - zUpper, - rtUpper); + shiftStripHits(acc, mod, shiftedCoords, xLower, yLower, zLower, rtLower, xUpper, yUpper, zUpper, rtUpper); xn = shiftedCoords[0]; yn = shiftedCoords[1]; zn = shiftedCoords[2]; - if (modules.moduleType()[lowerModuleIndex] == PS) { - // Appropriate lower or upper hit is modified after checking which one was actually shifted - if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + // Compute dPhi using cross/dot product with single atan2 (instead of 2 atan2 from deltaPhi). + // deltaPhi(x1,y1,x2,y2) = atan2(x1*y2 - x2*y1, x1*x2 + y1*y2). + float dpX1, dpY1, dpX2, dpY2; // the two points for dPhi + if (mod.moduleType == PS) { + if (mod.moduleLayerType == Pixel) { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; - dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); - noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dpX1 = xLower; + dpY1 = yLower; + dpX2 = xn; + dpY2 = yn; } else { shiftedX = xn; shiftedY = yn; shiftedZ = zLower; - dPhi = cms::alpakatools::deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); - noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dpX1 = xn; + dpY1 = yn; + dpX2 = xUpper; + dpY2 = yUpper; } } else { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; - dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xn, yn); - noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dpX1 = xLower; + dpY1 = yLower; + dpX2 = xn; + dpY2 = yn; + } + + const float crossDP = dpX1 * dpY2 - dpX2 * dpY1; + const float dotDP = dpX1 * dpX2 + dpY1 * dpY2; + + // Algebraic dPhiChange pre-check: reject pairs guaranteed to fail the dPhiChange cut + // without computing atan2. Uses |tan(dPhi)| = |crossDP/dotDP| as an overestimate of + // |dPhi| for the miniLum term (since tan(x) >= x for x in [0, pi/2)), making this + // strictly looser than the actual cut -- physics-safe (never rejects a pair that would pass). + { + float rt = mod.moduleLayerType == Pixel ? rtLower : rtUpper; + float miniSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + if (dotDP > 0.f) { + // Use |tan(dPhi)| = |crossDP|/dotDP as overestimate of |dPhi| for miniLum + float absCrossDP = alpaka::math::abs(acc, crossDP); + float tanDPhiOverDz = absCrossDP / (dotDP * alpaka::math::abs(acc, dz)); + float miniLum = tanDPhiOverDz * kDeltaZLum; // overestimates actual miniLum + float miniCut = miniSlope + alpaka::math::sqrt(acc, mod.sqrtTermBase + miniLum * miniLum); + // dPhiChange = dPhi * (|zLower| + |dz|) / |dz|, so the dPhiChange cut + // |dPhiChange| >= miniCut becomes |dPhi| >= miniCut * |dz| / (|zLower| + |dz|) + // Using crossDP/dotDP: |crossDP/dotDP| >= miniCut * |dz| / (|zLower| + |dz|) + // => |crossDP| * (|zLower| + |dz|) >= miniCut * dotDP * |dz| + float absDz = alpaka::math::abs(acc, dz); + float lhs = absCrossDP * (alpaka::math::abs(acc, zLower) + absDz); + float rhs = miniCut * dotDP * absDz; + if (lhs >= rhs) + return false; + } else { + // dotDP <= 0 means |dPhi| > pi/2 -- guaranteed to fail any reasonable cut + return false; + } } + dPhi = alpaka::math::atan2(acc, crossDP, dotDP); + // dz needs to change if it is a PS module where the strip hits are shifted in order to properly account for the case when a tilted module falls under "endcap logic" // if it was an endcap it will have zero effect - if (modules.moduleType()[lowerModuleIndex] == PS) { - dz = modules.moduleLayerType()[lowerModuleIndex] == Pixel ? zLower - zn : zUpper - zn; + if (mod.moduleType == PS) { + dz = mod.moduleLayerType == Pixel ? zLower - zn : zUpper - zn; } - float miniCut = 0; - miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel - ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex, ptCut, dPhi, dz) - : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex, ptCut, dPhi, dz); + float miniCut = mod.moduleLayerType == Pixel ? dPhiThreshold(acc, rtLower, mod, ptCut, dPhi, dz) + : dPhiThreshold(acc, rtUpper, mod, ptCut, dPhi, dz); - if (alpaka::math::abs(acc, dPhi) >= miniCut) + if (alpaka::math::abs(acc, dPhi) >= miniCut) { return false; + } // Cut #4: Another cut on the dphi after some modification // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3119-L3124 float dzFrac = alpaka::math::abs(acc, dz) / alpaka::math::abs(acc, zLower); dPhiChange = dPhi / dzFrac * (1.f + dzFrac); + + if (alpaka::math::abs(acc, dPhiChange) >= miniCut) { + return false; + } + + // Passed all cuts -- compute noShifted values for storage (only ~2% of pairs reach here). +#ifdef CUT_VALUE_DEBUG + const float crossNS = xLower * yUpper - xUpper * yLower; + const float dotNS = xLower * xUpper + yLower * yUpper; + noShiftedDphi = alpaka::math::atan2(acc, crossNS, dotNS); noShiftedDphichange = noShiftedDphi / dzFrac * (1.f + dzFrac); +#endif - return alpaka::math::abs(acc, dPhiChange) < miniCut; + return true; } template - ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, - ModulesConst modules, - uint16_t lowerModuleIndex, - uint16_t upperModuleIndex, - unsigned int lowerHitIndex, - unsigned int upperHitIndex, - float& dz, - float& dPhi, - float& dPhiChange, - float& shiftedX, - float& shiftedY, - float& shiftedZ, - float& noShiftedDphi, - float& noShiftedDphiChange, - float xLower, - float yLower, - float zLower, - float rtLower, - float xUpper, - float yUpper, - float zUpper, - float rtUpper, - const float ptCut, - uint16_t clustSizeLower, - uint16_t clustSizeUpper, - const uint16_t clustSizeCut) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runMiniDoubletDefaultAlgo(TAcc const& acc, + ModuleMDData const& mod, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper, + const float ptCut, + uint16_t clustSizeLower, + uint16_t clustSizeUpper, + const uint16_t clustSizeCut) { if (clustSizeLower > clustSizeCut or clustSizeUpper > clustSizeCut) { return false; } - if (modules.subdets()[lowerModuleIndex] == Barrel) { + if (mod.subdet == Barrel) { return runMiniDoubletDefaultAlgoBarrel(acc, - modules, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, + mod, dz, dPhi, dPhiChange, @@ -659,11 +626,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ptCut); } else { return runMiniDoubletDefaultAlgoEndcap(acc, - modules, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, + mod, dz, dPhi, dPhiChange, @@ -696,7 +659,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float ptCut, const uint16_t clustSizeCut) const { for (uint16_t lowerModuleIndex : cms::alpakatools::uniform_elements_y(acc, modules.nLowerModules())) { - uint16_t upperModuleIndex = modules.partnerModuleIndices()[lowerModuleIndex]; int nLowerHits = hitsRanges.hitRangesnLower()[lowerModuleIndex]; int nUpperHits = hitsRanges.hitRangesnUpper()[lowerModuleIndex]; if (hitsRanges.hitRangesLower()[lowerModuleIndex] == -1) @@ -705,6 +667,42 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int loHitArrayIndex = hitsRanges.hitRangesLower()[lowerModuleIndex]; int limit = nUpperHits * nLowerHits; + // Hoist module-constant data once per module to avoid redundant SoA loads per hit pair. + ModuleMDData mod; + mod.lowerModuleIndex = lowerModuleIndex; + mod.subdet = modules.subdets()[lowerModuleIndex]; + mod.side = modules.sides()[lowerModuleIndex]; + mod.moduleType = modules.moduleType()[lowerModuleIndex]; + mod.moduleLayerType = modules.moduleLayerType()[lowerModuleIndex]; + mod.iL = modules.layers()[lowerModuleIndex] - 1; + mod.isTilted = (mod.subdet == Barrel && mod.side != Center); + mod.isEndcapTwoS = (mod.subdet == Endcap && mod.moduleType == TwoS); + mod.isGloballyInner = modules.isGloballyInner()[lowerModuleIndex]; + mod.slope = modules.dxdys()[lowerModuleIndex]; + mod.drdz = modules.drdzs()[lowerModuleIndex]; + mod.moduleSep = moduleGapSize(modules, lowerModuleIndex); + + // Pre-compute dPhiThreshold module-constant parts + float rLayNominal = (mod.subdet == Barrel) ? kMiniRminMeanBarrel[mod.iL] : kMiniRminMeanEndcap[mod.iL]; + mod.miniPVoff = 0.1f / rLayNominal; + mod.miniMuls = (mod.subdet == Barrel) ? kMiniMulsPtScaleBarrel[mod.iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[mod.iL] * 3.f / ptCut; + mod.sqrtTermBase = mod.miniMuls * mod.miniMuls + mod.miniPVoff * mod.miniPVoff; + mod.sqrtTermFlat = alpaka::math::sqrt(acc, mod.sqrtTermBase); + + if (mod.isTilted) { + float drdzThresh; + if (mod.moduleType == PS and mod.moduleLayerType == Strip) { + drdzThresh = modules.drdzs()[lowerModuleIndex]; + } else { + drdzThresh = modules.drdzs()[modules.partnerModuleIndices()[lowerModuleIndex]]; + } + mod.miniTilt2 = 0.25f * (kPixelPSZpitch * kPixelPSZpitch) * (drdzThresh * drdzThresh) / + (1.f + drdzThresh * drdzThresh) / mod.moduleSep; + } else { + mod.miniTilt2 = 0.f; + } + for (int hitIndex : cms::alpakatools::uniform_elements_x(acc, limit)) { int lowerHitIndex = hitIndex / nUpperHits; int upperHitIndex = hitIndex % nUpperHits; @@ -727,11 +725,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dz, dphi, dphichange, shiftedX, shiftedY, shiftedZ, noShiftedDphi, noShiftedDphiChange; bool success = runMiniDoubletDefaultAlgo(acc, - modules, - lowerModuleIndex, - upperModuleIndex, - lowerHitArrayIndex, - upperHitArrayIndex, + mod, dz, dphi, dphichange, @@ -769,10 +763,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds, hitsBase, hitsExtended, - modules, + mod, lowerHitArrayIndex, upperHitArrayIndex, - lowerModuleIndex, dz, dphi, dphichange, diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h index d58917c81dee3..8e12327f5bcb6 100644 --- a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h @@ -43,7 +43,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float sigmoid_activation(TAcc const& acc, const float x) { - return alpaka::math::exp(acc, x) / (alpaka::math::exp(acc, x) + 1.f); + const float e = alpaka::math::exp(acc, x); + return e / (e + 1.f); } template diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index e81b3bff105a8..dc525da4fbf99 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -493,15 +493,38 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float pixelRadiusTemp, tripletRadius, rPhiChiSquaredTemp, rzChiSquaredTemp, rPhiChiSquaredInwardsTemp, centerXTemp, centerYTemp, pixelRadiusErrorTemp; + // Construct PixelSeedData for the pixel segment (shared with pT3 path). + unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; + PixelSeedData pix; + pix.ptIn = pixelSeeds.ptIn()[pixelSegmentArrayIndex]; + pix.px = pixelSeeds.px()[pixelSegmentArrayIndex]; + pix.py = pixelSeeds.py()[pixelSegmentArrayIndex]; + pix.pz = pixelSeeds.pz()[pixelSegmentArrayIndex]; + pix.ptErr = pixelSeeds.ptErr()[pixelSegmentArrayIndex]; + pix.etaErr = pixelSeeds.etaErr()[pixelSegmentArrayIndex]; + pix.eta = pixelSeeds.eta()[pixelSegmentArrayIndex]; + pix.phi = pixelSeeds.phi()[pixelSegmentArrayIndex]; + pix.charge = pixelSeeds.charge()[pixelSegmentArrayIndex]; + pix.circleCenterX = pixelSegments.circleCenterX()[pixelSegmentArrayIndex]; + pix.circleCenterY = pixelSegments.circleCenterY()[pixelSegmentArrayIndex]; + pix.circleRadius = pixelSegments.circleRadius()[pixelSegmentArrayIndex]; + pix.x_InLo = mds.anchorX()[pixelInnerMDIndex]; + pix.y_InLo = mds.anchorY()[pixelInnerMDIndex]; + pix.z_InLo = mds.anchorZ()[pixelInnerMDIndex]; + pix.rt_InLo = mds.anchorRt()[pixelInnerMDIndex]; + pix.x_InUp = mds.anchorX()[pixelOuterMDIndex]; + pix.y_InUp = mds.anchorY()[pixelOuterMDIndex]; + pix.z_InUp = mds.anchorZ()[pixelOuterMDIndex]; + pix.rt_InUp = mds.anchorRt()[pixelOuterMDIndex]; + pix.alpha_InLo = __H2F(segments.dPhiChanges()[pixelSegmentIndex]); + if (not runPixelTripletDefaultAlgo(acc, modules, - ranges, mds, segments, - pixelSeeds, - pixelSegments, + pix, triplets, - pixelSegmentIndex, t5InnerT3Index, pixelRadiusTemp, tripletRadius, @@ -520,9 +543,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int secondSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][1]; unsigned int thirdSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][0]; unsigned int fourthSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][1]; - - unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; - unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0]; unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0]; unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1]; @@ -538,10 +558,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t lowerModuleIndices[Params_T5::kLayers] = { lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5}; - float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]}; - float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; - float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; - float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]}; + float rtPix[Params_pLS::kLayers] = {pix.rt_InLo, pix.rt_InUp}; + float xPix[Params_pLS::kLayers] = {pix.x_InLo, pix.x_InUp}; + float yPix[Params_pLS::kLayers] = {pix.y_InLo, pix.y_InUp}; + float zPix[Params_pLS::kLayers] = {pix.z_InLo, pix.z_InUp}; float zs[Params_T5::kLayers] = {mds.anchorZ()[firstMDIndex], mds.anchorZ()[secondMDIndex], mds.anchorZ()[thirdMDIndex], @@ -553,31 +573,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorRt()[fourthMDIndex], mds.anchorRt()[fifthMDIndex]}; - float pixelSegmentPt = pixelSeeds.ptIn()[pixelSegmentArrayIndex]; - float pixelSegmentPx = pixelSeeds.px()[pixelSegmentArrayIndex]; - float pixelSegmentPy = pixelSeeds.py()[pixelSegmentArrayIndex]; - float pixelSegmentPz = pixelSeeds.pz()[pixelSegmentArrayIndex]; - int pixelSegmentCharge = pixelSeeds.charge()[pixelSegmentArrayIndex]; - rzChiSquared = 0; //get the appropriate centers - pixelRadius = pixelSegments.circleRadius()[pixelSegmentArrayIndex]; - - rzChiSquared = computePT5RZChiSquared(acc, - modules, - lowerModuleIndices, - rtPix, - xPix, - yPix, - zPix, - rts, - zs, - pixelSegmentPt, - pixelSegmentPx, - pixelSegmentPy, - pixelSegmentPz, - pixelSegmentCharge); + pixelRadius = pix.circleRadius; + + rzChiSquared = computePT5RZChiSquared( + acc, modules, lowerModuleIndices, rtPix, xPix, yPix, zPix, rts, zs, pix.ptIn, pix.px, pix.py, pix.pz, pix.charge); if (pixelRadius < 5.0f * kR1GeVf) { //only apply r-z chi2 cuts for <5GeV tracks if (not passPT5RZChiSquaredCuts(modules, @@ -603,8 +605,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorY()[fifthMDIndex]}; //get the appropriate centers - centerX = pixelSegments.circleCenterX()[pixelSegmentArrayIndex]; - centerY = pixelSegments.circleCenterY()[pixelSegmentArrayIndex]; + centerX = pix.circleCenterX; + centerY = pix.circleCenterY; float T5CenterX = quintuplets.regressionCenterX()[quintupletIndex]; float T5CenterY = quintuplets.regressionCenterY()[quintupletIndex]; diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 6dfa2358d568f..5c2fe8c8b787a 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -15,37 +15,39 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + // Pre-loaded pixel segment data, invariant across triplets for a given pixel segment. + // Hoisted to avoid redundant SoA loads through the pT3 function chain. + struct PixelSeedData { + float ptIn, px, py, pz, ptErr, etaErr; + float eta, phi; + int charge; + float circleCenterX, circleCenterY, circleRadius; + float x_InLo, y_InLo, z_InLo, rt_InLo; // pixel inner MD (pLSMD0) + float x_InUp, y_InUp, z_InUp, rt_InUp; // pixel outer MD (pLSMD1) + float alpha_InLo; // pixel segment dPhiChange + }; + template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - uint16_t pixelModuleIndex, + const PixelSeedData& pix, uint16_t segmentInnerModuleIndex, uint16_t segmentOuterModuleIndex, - unsigned int pLSIndex, unsigned int segmentIndex, - unsigned int pLSMD0Index, - unsigned int pLSMD1Index, unsigned int segmentMD0Index, unsigned int segmentMD1Index, const float ptCut); template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - uint16_t pixelModuleIndex, + const PixelSeedData& pix, uint16_t segmentInnerModuleIndex, uint16_t segmentOuterModuleIndex, - unsigned int pLSIndex, unsigned int segmentIndex, - unsigned int pLSMD0Index, - unsigned int pLSMD1Index, unsigned int segmentMD0Index, unsigned int segmentMD1Index, const float ptCut); @@ -124,59 +126,43 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTrackletDefaultAlgopT3(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - uint16_t pixelLowerModuleIndex, + const PixelSeedData& pix, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, - unsigned int innerSegmentIndex, unsigned int outerSegmentIndex, const float ptCut) { short outerInnerLowerModuleSubdet = modules.subdets()[outerInnerLowerModuleIndex]; short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; - unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; - unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; - - unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][0]; - unsigned int fourthMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + unsigned int segmentMD0Index = segments.mdIndices()[outerSegmentIndex][0]; + unsigned int segmentMD1Index = segments.mdIndices()[outerSegmentIndex][1]; if (outerInnerLowerModuleSubdet == Barrel and (outerOuterLowerModuleSubdet == Barrel or outerOuterLowerModuleSubdet == Endcap)) { return runTripletDefaultAlgoPPBB(acc, modules, - ranges, mds, segments, - pixelSeeds, - pixelLowerModuleIndex, + pix, outerInnerLowerModuleIndex, outerOuterLowerModuleIndex, - innerSegmentIndex, outerSegmentIndex, - firstMDIndex, - secondMDIndex, - thirdMDIndex, - fourthMDIndex, + segmentMD0Index, + segmentMD1Index, ptCut); } else if (outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { return runTripletDefaultAlgoPPEE(acc, modules, - ranges, mds, segments, - pixelSeeds, - pixelLowerModuleIndex, + pix, outerInnerLowerModuleIndex, outerOuterLowerModuleIndex, - innerSegmentIndex, outerSegmentIndex, - firstMDIndex, - secondMDIndex, - thirdMDIndex, - fourthMDIndex, + segmentMD0Index, + segmentMD1Index, ptCut); } return false; @@ -537,13 +523,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTripletDefaultAlgo(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - PixelSegmentsConst pixelSegments, + const PixelSeedData& pix, TripletsConst triplets, - unsigned int pixelSegmentIndex, unsigned int tripletIndex, float& pixelRadius, float& tripletRadius, @@ -556,63 +539,48 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float ptCut, bool runDNN = true, bool runChiSquaredCuts = true) { - //run pT4 compatibility between the pixel segment and inner segment, and between the pixel and outer segment of the triplet - uint16_t pixelModuleIndex = segments.innerLowerModuleIndices()[pixelSegmentIndex]; - uint16_t lowerModuleIndex = triplets.lowerModuleIndices()[tripletIndex][0]; uint16_t middleModuleIndex = triplets.lowerModuleIndices()[tripletIndex][1]; uint16_t upperModuleIndex = triplets.lowerModuleIndices()[tripletIndex][2]; - { - // pixel segment vs inner segment of the triplet - if (not runPixelTrackletDefaultAlgopT3(acc, - modules, - ranges, - mds, - segments, - pixelSeeds, - pixelModuleIndex, - lowerModuleIndex, - middleModuleIndex, - pixelSegmentIndex, - triplets.segmentIndices()[tripletIndex][0], - ptCut)) - return false; - - //pixel segment vs outer segment of triplet - if (not runPixelTrackletDefaultAlgopT3(acc, - modules, - ranges, - mds, - segments, - pixelSeeds, - pixelModuleIndex, - middleModuleIndex, - upperModuleIndex, - pixelSegmentIndex, - triplets.segmentIndices()[tripletIndex][1], - ptCut)) - return false; - } + // Radius criterion FIRST (cheap check before expensive tracklet computations) + pixelRadius = pix.ptIn * kR1GeVf; + pixelRadiusError = pix.ptErr * kR1GeVf; + tripletRadius = triplets.radius()[tripletIndex]; - //pt matching between the pixel ptin and the triplet circle pt - unsigned int pixelSegmentArrayIndex = pixelSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; - float pixelSegmentPt = pixelSeeds.ptIn()[pixelSegmentArrayIndex]; - float pixelSegmentPtError = pixelSeeds.ptErr()[pixelSegmentArrayIndex]; - float pixelSegmentPx = pixelSeeds.px()[pixelSegmentArrayIndex]; - float pixelSegmentPy = pixelSeeds.py()[pixelSegmentArrayIndex]; - float pixelSegmentPz = pixelSeeds.pz()[pixelSegmentArrayIndex]; - int pixelSegmentCharge = pixelSeeds.charge()[pixelSegmentArrayIndex]; + if (not passRadiusCriterion(acc, + modules, + pixelRadius, + pixelRadiusError, + tripletRadius, + lowerModuleIndex, + middleModuleIndex, + upperModuleIndex)) + return false; - float pixelG = pixelSegments.circleCenterX()[pixelSegmentArrayIndex]; - float pixelF = pixelSegments.circleCenterY()[pixelSegmentArrayIndex]; - float pixelRadiusPCA = pixelSegments.circleRadius()[pixelSegmentArrayIndex]; + // Expensive pT4 compatibility checks (pixel segment vs inner/outer segments of the triplet) + if (not runPixelTrackletDefaultAlgopT3(acc, + modules, + mds, + segments, + pix, + lowerModuleIndex, + middleModuleIndex, + triplets.segmentIndices()[tripletIndex][0], + ptCut)) + return false; - unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; - unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; + if (not runPixelTrackletDefaultAlgopT3(acc, + modules, + mds, + segments, + pix, + middleModuleIndex, + upperModuleIndex, + triplets.segmentIndices()[tripletIndex][1], + ptCut)) + return false; - pixelRadius = pixelSegmentPt * kR1GeVf; - pixelRadiusError = pixelSegmentPtError * kR1GeVf; unsigned int tripletInnerSegmentIndex = triplets.segmentIndices()[tripletIndex][0]; unsigned int tripletOuterSegmentIndex = triplets.segmentIndices()[tripletIndex][1]; @@ -625,20 +593,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float ys[Params_T3::kLayers] = { mds.anchorY()[firstMDIndex], mds.anchorY()[secondMDIndex], mds.anchorY()[thirdMDIndex]}; - float g, f; - tripletRadius = triplets.radius()[tripletIndex]; - g = triplets.centerX()[tripletIndex]; - f = triplets.centerY()[tripletIndex]; - - if (not passRadiusCriterion(acc, - modules, - pixelRadius, - pixelRadiusError, - tripletRadius, - lowerModuleIndex, - middleModuleIndex, - upperModuleIndex)) - return false; + float g = triplets.centerX()[tripletIndex]; + float f = triplets.centerY()[tripletIndex]; uint16_t lowerModuleIndices[Params_T3::kLayers] = {lowerModuleIndex, middleModuleIndex, upperModuleIndex}; @@ -647,10 +603,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorRt()[firstMDIndex], mds.anchorRt()[secondMDIndex], mds.anchorRt()[thirdMDIndex]}; float zs[Params_T3::kLayers] = { mds.anchorZ()[firstMDIndex], mds.anchorZ()[secondMDIndex], mds.anchorZ()[thirdMDIndex]}; - float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]}; - float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; - float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; - float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]}; + float rtPix[Params_pLS::kLayers] = {pix.rt_InLo, pix.rt_InUp}; + float xPix[Params_pLS::kLayers] = {pix.x_InLo, pix.x_InUp}; + float yPix[Params_pLS::kLayers] = {pix.y_InLo, pix.y_InUp}; + float zPix[Params_pLS::kLayers] = {pix.z_InLo, pix.z_InUp}; rzChiSquared = computePT3RZChiSquared(acc, modules, @@ -663,18 +619,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { xs, ys, zs, - pixelSegmentPt, - pixelSegmentPx, - pixelSegmentPy, - pixelSegmentPz, - pixelSegmentCharge); - if (runChiSquaredCuts && pixelSegmentPt < 5.0f) { + pix.ptIn, + pix.px, + pix.py, + pix.pz, + pix.charge); + if (runChiSquaredCuts && pix.ptIn < 5.0f) { if (!passPT3RZChiSquaredCuts(modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rzChiSquared)) return false; } - rPhiChiSquared = - computePT3RPhiChiSquared(acc, modules, lowerModuleIndices, pixelG, pixelF, pixelRadiusPCA, xs, ys); + rPhiChiSquared = computePT3RPhiChiSquared( + acc, modules, lowerModuleIndices, pix.circleCenterX, pix.circleCenterY, pix.circleRadius, xs, ys); rPhiChiSquaredInwards = computePT3RPhiChiSquaredInwards(g, f, tripletRadius, xPix, yPix); } @@ -691,8 +647,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelRadius, pixelRadiusError, rzChiSquared, - pixelSeeds.eta()[pixelSegmentArrayIndex], - pixelSegmentPt, + pix.eta, + pix.ptIn, module_type_3)) { return false; } @@ -719,6 +675,41 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { for (unsigned int i_pLS : cms::alpakatools::uniform_elements_z(acc, nPixelSegments)) { auto iLSModule_max = connectedPixelIndex[i_pLS] + connectedPixelSize[i_pLS]; + if (pixelSegments.isDup()[i_pLS]) + continue; + if (pixelSegments.partOfPT5()[i_pLS]) + continue; + + // Hoist pixel segment data: loaded once per pixel segment instead of + // redundantly through the runPixelTripletDefaultAlgo call chain. + uint16_t pixelModuleIndex = modules.nLowerModules(); + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS; + unsigned int pixelMD0 = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelMD1 = segments.mdIndices()[pixelSegmentIndex][1]; + + PixelSeedData pix; + pix.ptIn = pixelSeeds.ptIn()[i_pLS]; + pix.px = pixelSeeds.px()[i_pLS]; + pix.py = pixelSeeds.py()[i_pLS]; + pix.pz = pixelSeeds.pz()[i_pLS]; + pix.ptErr = pixelSeeds.ptErr()[i_pLS]; + pix.etaErr = pixelSeeds.etaErr()[i_pLS]; + pix.eta = pixelSeeds.eta()[i_pLS]; + pix.phi = pixelSeeds.phi()[i_pLS]; + pix.charge = pixelSeeds.charge()[i_pLS]; + pix.circleCenterX = pixelSegments.circleCenterX()[i_pLS]; + pix.circleCenterY = pixelSegments.circleCenterY()[i_pLS]; + pix.circleRadius = pixelSegments.circleRadius()[i_pLS]; + pix.x_InLo = mds.anchorX()[pixelMD0]; + pix.y_InLo = mds.anchorY()[pixelMD0]; + pix.z_InLo = mds.anchorZ()[pixelMD0]; + pix.rt_InLo = mds.anchorRt()[pixelMD0]; + pix.x_InUp = mds.anchorX()[pixelMD1]; + pix.y_InUp = mds.anchorY()[pixelMD1]; + pix.z_InUp = mds.anchorZ()[pixelMD1]; + pix.rt_InUp = mds.anchorRt()[pixelMD1]; + pix.alpha_InLo = __H2F(segments.dPhiChanges()[pixelSegmentIndex]); + for (unsigned int iLSModule : cms::alpakatools::uniform_elements_y(acc, connectedPixelIndex[i_pLS], iLSModule_max)) { uint16_t tripletLowerModuleIndex = @@ -736,18 +727,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (modules.moduleType()[tripletLowerModuleIndex] == TwoS) continue; - uint16_t pixelModuleIndex = modules.nLowerModules(); unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[tripletLowerModuleIndex]; if (nOuterTriplets == 0) continue; - unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS; - - if (pixelSegments.isDup()[i_pLS]) - continue; - if (pixelSegments.partOfPT5()[i_pLS]) - continue; //don't make pT3s for those pixels that are part of pT5 - short layer2_adjustment; if (modules.layers()[tripletLowerModuleIndex] == 1) { layer2_adjustment = 1; @@ -773,13 +756,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelRadiusError; bool success = runPixelTripletDefaultAlgo(acc, modules, - ranges, mds, segments, - pixelSeeds, - pixelSegments, + pix, triplets, - pixelSegmentIndex, outerTripletIndex, pixelRadius, tripletRadius, @@ -798,9 +778,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float eta = mds.anchorEta()[segments .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]]; - float eta_pix = pixelSeeds.eta()[i_pLS]; - float phi_pix = pixelSeeds.phi()[i_pLS]; - float pt = pixelSeeds.ptIn()[i_pLS]; float score = rPhiChiSquared + rPhiChiSquaredInwards; unsigned int totOccupancyPixelTriplets = alpaka::atomicAdd(acc, &pixelTriplets.totOccupancyPixelTriplets(), 1u, alpaka::hierarchy::Threads{}); @@ -825,11 +802,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { rPhiChiSquaredInwards, rzChiSquared, pixelTripletIndex, - pt, + pix.ptIn, eta, phi, - eta_pix, - phi_pix, + pix.eta, + pix.phi, pixelRadiusError, score); triplets.partOfPT3()[outerTripletIndex] = true; @@ -844,17 +821,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - uint16_t pixelModuleIndex, + const PixelSeedData& pix, uint16_t segmentInnerModuleIndex, uint16_t segmentOuterModuleIndex, - unsigned int pLSIndex, unsigned int segmentIndex, - unsigned int pLSMD0Index, - unsigned int pLSMD1Index, unsigned int segmentMD0Index, unsigned int segmentMD1Index, const float ptCut) { @@ -862,44 +834,43 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { bool isPS_OutLo = (modules.moduleType()[segmentInnerModuleIndex] == PS); - float rt_InLo = mds.anchorRt()[pLSMD0Index]; - float rt_InUp = mds.anchorRt()[pLSMD1Index]; + const float rt_InLo = pix.rt_InLo; + const float rt_InUp = pix.rt_InUp; float rt_OutLo = mds.anchorRt()[segmentMD0Index]; float rt_OutUp = mds.anchorRt()[segmentMD1Index]; - float z_InUp = mds.anchorZ()[pLSMD1Index]; + const float z_InUp = pix.z_InUp; float z_OutLo = mds.anchorZ()[segmentMD0Index]; - float x_InLo = mds.anchorX()[pLSMD0Index]; - float x_InUp = mds.anchorX()[pLSMD1Index]; + const float x_InLo = pix.x_InLo; + const float x_InUp = pix.x_InUp; float x_OutLo = mds.anchorX()[segmentMD0Index]; float x_OutUp = mds.anchorX()[segmentMD1Index]; - float y_InLo = mds.anchorY()[pLSMD0Index]; - float y_InUp = mds.anchorY()[pLSMD1Index]; + const float y_InLo = pix.y_InLo; + const float y_InUp = pix.y_InUp; float y_OutLo = mds.anchorY()[segmentMD0Index]; float y_OutUp = mds.anchorY()[segmentMD1Index]; float rt_InOut = rt_InUp; - unsigned int pixelSegmentArrayIndex = pLSIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; - float ptIn = pixelSeeds.ptIn()[pixelSegmentArrayIndex]; + float ptIn = pix.ptIn; float ptSLo = ptIn; - float px = pixelSeeds.px()[pixelSegmentArrayIndex]; - float py = pixelSeeds.py()[pixelSegmentArrayIndex]; - float pz = pixelSeeds.pz()[pixelSegmentArrayIndex]; - float ptErr = pixelSeeds.ptErr()[pixelSegmentArrayIndex]; - float etaErr = pixelSeeds.etaErr()[pixelSegmentArrayIndex]; + float px = pix.px; + float py = pix.py; + float pz = pix.pz; + float ptErr = pix.ptErr; + float etaErr = pix.etaErr; ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); - float alpha1GeV_OutLo = - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + // For the typical range rt_OutLo * k2Rinv1GeVf / ptCut < 0.04, asin(x) ~ x + // and tan(x)/x ~ 1 + x^2/3. + float alpha1GeV_OutLo = alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax); const float rtRatio_OutLoInOut = rt_OutLo / rt_InOut; // Outer segment beginning rt divided by inner segment beginning rt; - float dzDrtScale = - alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + float dzDrtScale = 1.f + alpha1GeV_OutLo * alpha1GeV_OutLo / 3.f; // The track can bend in r-z plane slightly const float zpitch_InLo = 0.05f; const float zpitch_InOut = 0.05f; float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); @@ -953,21 +924,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float diffX = x_OutLo - x_InLo; float diffY = y_OutLo - y_InLo; - dPhi = cms::alpakatools::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + dPhi = fastDeltaPhi(acc, midPointX, midPointY, diffX, diffY); if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; //lots of array accesses below this... - float alpha_InLo = __H2F(segments.dPhiChanges()[pLSIndex]); + float alpha_InLo = pix.alpha_InLo; float alpha_OutLo = __H2F(segments.dPhiChanges()[segmentIndex]); bool isEC_lastLayer = modules.subdets()[segmentOuterModuleIndex] == Endcap and modules.moduleType()[segmentOuterModuleIndex] == TwoS; float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; - alpha_OutUp = cms::alpakatools::deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + alpha_OutUp = fastDeltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); alpha_OutUp_highEdge = alpha_OutUp; alpha_OutUp_lowEdge = alpha_OutUp; @@ -981,42 +952,42 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float tl_axis_lowEdge_x = tl_axis_x; float tl_axis_lowEdge_y = tl_axis_y; - betaIn = -cms::alpakatools::deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + betaIn = -fastDeltaPhi(acc, px, py, tl_axis_x, tl_axis_y); float betaInRHmin = betaIn; float betaInRHmax = betaIn; - betaOut = -alpha_OutUp + cms::alpakatools::deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + betaOut = -alpha_OutUp + fastDeltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; if (isEC_lastLayer) { - alpha_OutUp_highEdge = cms::alpakatools::deltaPhi(acc, - mds.anchorHighEdgeX()[segmentMD1Index], - mds.anchorHighEdgeY()[segmentMD1Index], - mds.anchorHighEdgeX()[segmentMD1Index] - x_OutLo, - mds.anchorHighEdgeY()[segmentMD1Index] - y_OutLo); - alpha_OutUp_lowEdge = cms::alpakatools::deltaPhi(acc, - mds.anchorLowEdgeX()[segmentMD1Index], - mds.anchorLowEdgeY()[segmentMD1Index], - mds.anchorLowEdgeX()[segmentMD1Index] - x_OutLo, - mds.anchorLowEdgeY()[segmentMD1Index] - y_OutLo); + alpha_OutUp_highEdge = fastDeltaPhi(acc, + mds.anchorHighEdgeX()[segmentMD1Index], + mds.anchorHighEdgeY()[segmentMD1Index], + mds.anchorHighEdgeX()[segmentMD1Index] - x_OutLo, + mds.anchorHighEdgeY()[segmentMD1Index] - y_OutLo); + alpha_OutUp_lowEdge = fastDeltaPhi(acc, + mds.anchorLowEdgeX()[segmentMD1Index], + mds.anchorLowEdgeY()[segmentMD1Index], + mds.anchorLowEdgeX()[segmentMD1Index] - x_OutLo, + mds.anchorLowEdgeY()[segmentMD1Index] - y_OutLo); tl_axis_highEdge_x = mds.anchorHighEdgeX()[segmentMD1Index] - x_InUp; tl_axis_highEdge_y = mds.anchorHighEdgeY()[segmentMD1Index] - y_InUp; tl_axis_lowEdge_x = mds.anchorLowEdgeX()[segmentMD1Index] - x_InUp; tl_axis_lowEdge_y = mds.anchorLowEdgeY()[segmentMD1Index] - y_InUp; - betaOutRHmin = -alpha_OutUp_highEdge + cms::alpakatools::deltaPhi(acc, - mds.anchorHighEdgeX()[segmentMD1Index], - mds.anchorHighEdgeY()[segmentMD1Index], - tl_axis_highEdge_x, - tl_axis_highEdge_y); - betaOutRHmax = -alpha_OutUp_lowEdge + cms::alpakatools::deltaPhi(acc, - mds.anchorLowEdgeX()[segmentMD1Index], - mds.anchorLowEdgeY()[segmentMD1Index], - tl_axis_lowEdge_x, - tl_axis_lowEdge_y); + betaOutRHmin = -alpha_OutUp_highEdge + fastDeltaPhi(acc, + mds.anchorHighEdgeX()[segmentMD1Index], + mds.anchorHighEdgeY()[segmentMD1Index], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + fastDeltaPhi(acc, + mds.anchorLowEdgeX()[segmentMD1Index], + mds.anchorLowEdgeY()[segmentMD1Index], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); } //beta computation @@ -1052,19 +1023,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float min_ptBeta_ptBetaMax = alpaka::math::min( acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); - const float alphaInAbsReg = - alpaka::math::max(acc, - alpaka::math::abs(acc, alpha_InLo), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InUp * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); - const float alphaOutAbsReg = - alpaka::math::max(acc, - alpaka::math::abs(acc, alpha_OutLo), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + // asin(x) ~ x for small arguments (rt * k2Rinv1GeVf / 3.0 < 0.01) + const float alphaInAbsReg = alpaka::math::max( + acc, alpaka::math::abs(acc, alpha_InLo), alpaka::math::min(acc, rt_InUp * k2Rinv1GeVf / 3.0f, kSinAlphaMax)); + const float alphaOutAbsReg = alpaka::math::max( + acc, alpaka::math::abs(acc, alpha_OutLo), alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax)); const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InUp); const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); - const float sinDPhi = alpaka::math::sin(acc, dPhi); + // dPhi is small (passed dPhiCut), so sin(dPhi) ~ dPhi + const float sinDPhi = dPhi; const float dBetaRIn2 = 0; // TODO-RH float dBetaROut = 0; @@ -1081,9 +1050,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float dBetaROut2 = dBetaROut * dBetaROut; - //FIXME: need faster version - betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + - (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + betaOutCut = alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax) + (0.02f / sdOut_d) + + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); //Cut #6: The real beta cut if (alpaka::math::abs(acc, betaOut) >= betaOutCut) @@ -1101,17 +1069,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - uint16_t pixelModuleIndex, + const PixelSeedData& pix, uint16_t segmentInnerModuleIndex, uint16_t segmentOuterModuleIndex, - unsigned int pLSIndex, unsigned int segmentIndex, - unsigned int pLSMD0Index, - unsigned int pLSMD1Index, unsigned int segmentMD0Index, unsigned int segmentMD1Index, const float ptCut) { @@ -1119,36 +1082,34 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { bool isPS_OutLo = (modules.moduleType()[segmentInnerModuleIndex] == PS); - float z_InUp = mds.anchorZ()[pLSMD1Index]; + const float z_InUp = pix.z_InUp; float z_OutLo = mds.anchorZ()[segmentMD0Index]; if (z_InUp * z_OutLo <= 0) return false; - float rt_InLo = mds.anchorRt()[pLSMD0Index]; - float rt_InUp = mds.anchorRt()[pLSMD1Index]; + const float rt_InLo = pix.rt_InLo; + const float rt_InUp = pix.rt_InUp; float rt_OutLo = mds.anchorRt()[segmentMD0Index]; float rt_OutUp = mds.anchorRt()[segmentMD1Index]; - float x_InLo = mds.anchorX()[pLSMD0Index]; - float x_InUp = mds.anchorX()[pLSMD1Index]; + const float x_InLo = pix.x_InLo; + const float x_InUp = pix.x_InUp; float x_OutLo = mds.anchorX()[segmentMD0Index]; float x_OutUp = mds.anchorX()[segmentMD1Index]; - float y_InLo = mds.anchorY()[pLSMD0Index]; - float y_InUp = mds.anchorY()[pLSMD1Index]; + const float y_InLo = pix.y_InLo; + const float y_InUp = pix.y_InUp; float y_OutLo = mds.anchorY()[segmentMD0Index]; float y_OutUp = mds.anchorY()[segmentMD1Index]; - unsigned int pixelSegmentArrayIndex = pLSIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; - - float ptIn = pixelSeeds.ptIn()[pixelSegmentArrayIndex]; + float ptIn = pix.ptIn; float ptSLo = ptIn; - float px = pixelSeeds.px()[pixelSegmentArrayIndex]; - float py = pixelSeeds.py()[pixelSegmentArrayIndex]; - float pz = pixelSeeds.pz()[pixelSegmentArrayIndex]; - float ptErr = pixelSeeds.ptErr()[pixelSegmentArrayIndex]; - float etaErr = pixelSeeds.etaErr()[pixelSegmentArrayIndex]; + float px = pix.px; + float py = pix.py; + float pz = pix.pz; + float ptErr = pix.ptErr; + float etaErr = pix.etaErr; ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); @@ -1157,8 +1118,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); float zGeom = zpitch_InLo + zpitch_OutLo; - const float slope = alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - const float dzDrtScale = alpaka::math::tan(acc, slope) / slope; //FIXME: need approximate value + // asin(x) ~ x and tan(x)/x ~ 1 + x^2/3 for small x + const float alpha1GeV_OutLo_EE = alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax); + const float dzDrtScale = 1.f + alpha1GeV_OutLo_EE * alpha1GeV_OutLo_EE / 3.f; const float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InUp); bool isOutSgInnerMDPS = modules.moduleType()[segmentInnerModuleIndex] == PS; @@ -1207,10 +1169,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if ((rt_OutLo < rtLo_point) || (rt_OutLo > rtHi_point)) return false; - const float alpha1GeV_OutLo = - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + // Reuse alpha1GeV_OutLo_EE computed above (same value as asin approximation) const float pvOffset = 0.1f / rt_OutLo; - dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + dPhiCut = alpha1GeV_OutLo_EE + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); float midPointX = 0.5f * (x_InLo + x_OutLo); float midPointY = 0.5f * (y_InLo + y_OutLo); @@ -1218,13 +1179,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float diffX = x_OutLo - x_InLo; float diffY = y_OutLo - y_InLo; - dPhi = cms::alpakatools::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + dPhi = fastDeltaPhi(acc, midPointX, midPointY, diffX, diffY); // Cut #5: deltaPhiChange if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; - float alpha_InLo = __H2F(segments.dPhiChanges()[pLSIndex]); + float alpha_InLo = pix.alpha_InLo; float alpha_OutLo = __H2F(segments.dPhiChanges()[segmentIndex]); bool isEC_lastLayer = @@ -1232,7 +1193,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; - alpha_OutUp = cms::alpakatools::deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + alpha_OutUp = fastDeltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); alpha_OutUp_highEdge = alpha_OutUp; alpha_OutUp_lowEdge = alpha_OutUp; @@ -1245,41 +1206,41 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float tl_axis_lowEdge_x = tl_axis_x; float tl_axis_lowEdge_y = tl_axis_y; - betaIn = -cms::alpakatools::deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + betaIn = -fastDeltaPhi(acc, px, py, tl_axis_x, tl_axis_y); float betaInRHmin = betaIn; float betaInRHmax = betaIn; - betaOut = -alpha_OutUp + cms::alpakatools::deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + betaOut = -alpha_OutUp + fastDeltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); float betaOutRHmin = betaOut; float betaOutRHmax = betaOut; if (isEC_lastLayer) { - alpha_OutUp_highEdge = cms::alpakatools::deltaPhi(acc, - mds.anchorHighEdgeX()[segmentMD1Index], - mds.anchorHighEdgeY()[segmentMD1Index], - mds.anchorHighEdgeX()[segmentMD1Index] - x_OutLo, - mds.anchorHighEdgeY()[segmentMD1Index] - y_OutLo); - alpha_OutUp_lowEdge = cms::alpakatools::deltaPhi(acc, - mds.anchorLowEdgeX()[segmentMD1Index], - mds.anchorLowEdgeY()[segmentMD1Index], - mds.anchorLowEdgeX()[segmentMD1Index] - x_OutLo, - mds.anchorLowEdgeY()[segmentMD1Index] - y_OutLo); + alpha_OutUp_highEdge = fastDeltaPhi(acc, + mds.anchorHighEdgeX()[segmentMD1Index], + mds.anchorHighEdgeY()[segmentMD1Index], + mds.anchorHighEdgeX()[segmentMD1Index] - x_OutLo, + mds.anchorHighEdgeY()[segmentMD1Index] - y_OutLo); + alpha_OutUp_lowEdge = fastDeltaPhi(acc, + mds.anchorLowEdgeX()[segmentMD1Index], + mds.anchorLowEdgeY()[segmentMD1Index], + mds.anchorLowEdgeX()[segmentMD1Index] - x_OutLo, + mds.anchorLowEdgeY()[segmentMD1Index] - y_OutLo); tl_axis_highEdge_x = mds.anchorHighEdgeX()[segmentMD1Index] - x_InUp; tl_axis_highEdge_y = mds.anchorHighEdgeY()[segmentMD1Index] - y_InUp; tl_axis_lowEdge_x = mds.anchorLowEdgeX()[segmentMD1Index] - x_InUp; tl_axis_lowEdge_y = mds.anchorLowEdgeY()[segmentMD1Index] - y_InUp; - betaOutRHmin = -alpha_OutUp_highEdge + cms::alpakatools::deltaPhi(acc, - mds.anchorHighEdgeX()[segmentMD1Index], - mds.anchorHighEdgeY()[segmentMD1Index], - tl_axis_highEdge_x, - tl_axis_highEdge_y); - betaOutRHmax = -alpha_OutUp_lowEdge + cms::alpakatools::deltaPhi(acc, - mds.anchorLowEdgeX()[segmentMD1Index], - mds.anchorLowEdgeY()[segmentMD1Index], - tl_axis_lowEdge_x, - tl_axis_lowEdge_y); + betaOutRHmin = -alpha_OutUp_highEdge + fastDeltaPhi(acc, + mds.anchorHighEdgeX()[segmentMD1Index], + mds.anchorHighEdgeY()[segmentMD1Index], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + fastDeltaPhi(acc, + mds.anchorLowEdgeX()[segmentMD1Index], + mds.anchorLowEdgeY()[segmentMD1Index], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); } //beta computation @@ -1314,19 +1275,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); - const float alphaInAbsReg = - alpaka::math::max(acc, - alpaka::math::abs(acc, alpha_InLo), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InUp * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); - const float alphaOutAbsReg = - alpaka::math::max(acc, - alpaka::math::abs(acc, alpha_OutLo), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + // asin(x) ~ x for small arguments + const float alphaInAbsReg = alpaka::math::max( + acc, alpaka::math::abs(acc, alpha_InLo), alpaka::math::min(acc, rt_InUp * k2Rinv1GeVf / 3.0f, kSinAlphaMax)); + const float alphaOutAbsReg = alpaka::math::max( + acc, alpaka::math::abs(acc, alpha_OutLo), alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax)); const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InUp); const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); - const float sinDPhi = alpaka::math::sin(acc, dPhi); + // dPhi is small (passed dPhiCut), so sin(dPhi) ~ dPhi + const float sinDPhi = dPhi; const float dBetaRIn2 = 0; // TODO-RH float dBetaROut = 0; @@ -1343,10 +1302,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float dBetaROut2 = dBetaROut * dBetaROut; - betaOutCut = - alpaka::math::asin( - acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) //FIXME: need faster version - + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + betaOutCut = alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax) + (0.02f / sdOut_d) + + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); //Cut #6: The real beta cut if (alpaka::math::abs(acc, betaOut) >= betaOutCut) diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h index 15b0aa1f7f405..0a9a8af533649 100644 --- a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -19,6 +19,14 @@ #include "NeuralNetwork.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + // Compute angle from vector (x1,y1) to vector (x2,y2) using a single atan2. + // Equivalent to cms::alpakatools::deltaPhi(acc, x1, y1, x2, y2) which uses 2 atan2 calls. + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float fastDeltaPhi(TAcc const& acc, float x1, float y1, float x2, float y2) { + return alpaka::math::atan2(acc, x1 * y2 - x2 * y1, x1 * x2 + y1 * y2); + } + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(TripletsConst triplets, Quintuplets quintuplets, unsigned int innerTripletIndex, @@ -790,12 +798,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float sdOut_dr, float dr, float lIn) { + // All asin arguments below are sd_dr * k2Rinv1GeVf / pt (< 0.02), so asin(x) ~ x. if (lIn == 0) { betaOut += alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), - betaOut); + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax), betaOut); return; } @@ -805,19 +811,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap { const float betaInUpd = - betaIn + - alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), - betaIn); //FIXME: need a faster version + betaIn + alpaka::math::copysign( + acc, + alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax), + betaIn); const float betaOutUpd = - betaOut + - alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), - betaOut); //FIXME: need a faster version + betaOut + alpaka::math::copysign( + acc, + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax), + betaOut); betaAv = 0.5f * (betaInUpd + betaOutUpd); //1st update @@ -825,13 +827,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { 1.f / alpaka::math::abs(acc, dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv)); //get a better pt estimate betaIn += alpaka::math::copysign( - acc, - alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), - betaIn); //FIXME: need a faster version + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax), betaIn); betaOut += alpaka::math::copysign( - acc, - alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), - betaOut); //FIXME: need a faster version + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax), betaOut); //update the av and pt betaAv = 0.5f * (betaIn + betaOut); //2nd update @@ -842,20 +840,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); const float betaInUpd = - betaIn + - alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), - betaIn); //FIXME: need a faster version + betaIn + alpaka::math::copysign( + acc, + alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax), + betaIn); const float betaOutUpd = betaOut + alpaka::math::copysign( acc, - alpaka::math::asin( - acc, - alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), - betaIn); //FIXME: need a faster version + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax), + betaIn); betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) ? (0.5f * (betaInUpd + betaOutUpd)) : betaInUpd; @@ -863,15 +857,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //1st update pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate betaIn += alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), - betaIn); //FIXME: need a faster version + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax), betaIn); betaOut += alpaka::math::copysign( - acc, - alpaka::math::asin( - acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), - betaIn); //FIXME: need a faster version + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax), betaIn); //update the av and pt betaAv = 0.5f * (betaIn + betaOut); //2nd update @@ -913,7 +901,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; - float dPhi = cms::alpakatools::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + float dPhi = fastDeltaPhi(acc, midPointX, midPointY, diffX, diffY); // First obtaining the raw betaIn and betaOut values without any correction and just purely based on the mini-doublet hit positions float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); @@ -1021,18 +1009,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confimm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); - const float alphaInAbsReg = - alpaka::math::max(acc, - alpaka::math::abs(acc, alpha_InLo), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); - const float alphaOutAbsReg = - alpaka::math::max(acc, - alpaka::math::abs(acc, alpha_OutLo), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaInAbsReg = alpaka::math::max( + acc, alpaka::math::abs(acc, alpha_InLo), alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax)); + const float alphaOutAbsReg = alpaka::math::max( + acc, alpaka::math::abs(acc, alpha_OutLo), alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax)); const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); - const float sinDPhi = alpaka::math::sin(acc, dPhi); + const float sinDPhi = dPhi; // dPhi is small (passed dPhiCut), so sin(dPhi) ~ dPhi float dBetaROut = 0; if (isEC_lastLayer) { @@ -1090,7 +1074,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; - float dPhi = cms::alpakatools::deltaPhi(acc, midPointX, midPointY, diffX, diffY); + float dPhi = fastDeltaPhi(acc, midPointX, midPointY, diffX, diffY); float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); float sdIn_alpha_min = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); @@ -1187,18 +1171,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); - const float alphaInAbsReg = - alpaka::math::max(acc, - alpaka::math::abs(acc, sdIn_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); - const float alphaOutAbsReg = - alpaka::math::max(acc, - alpaka::math::abs(acc, sdOut_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaInAbsReg = alpaka::math::max( + acc, alpaka::math::abs(acc, sdIn_alpha), alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax)); + const float alphaOutAbsReg = alpaka::math::max( + acc, alpaka::math::abs(acc, sdOut_alpha), alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax)); const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); - const float sinDPhi = alpaka::math::sin(acc, dPhi); + const float sinDPhi = dPhi; // dPhi is small (passed dPhiCut), so sin(dPhi) ~ dPhi const float dBetaRIn2 = 0; // TODO-RH float dBetaROut = 0; @@ -1332,14 +1312,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); - const float alphaInAbsReg = - alpaka::math::max(acc, - alpaka::math::abs(acc, sdIn_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); - const float alphaOutAbsReg = - alpaka::math::max(acc, - alpaka::math::abs(acc, sdOut_alpha), - alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaInAbsReg = alpaka::math::max( + acc, alpaka::math::abs(acc, sdIn_alpha), alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax)); + const float alphaOutAbsReg = alpaka::math::max( + acc, alpaka::math::abs(acc, sdOut_alpha), alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax)); const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 6437625761950..1f2c41241ed59 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -85,10 +85,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return moduleSeparation; } + // Pre-loaded module data for segment creation, eliminating redundant SoA lookups + // in inner loops. Populated once per module (outer/middle loop level). + struct ModuleSegData { + short subdet; + short side; + short layer; + unsigned int iL; // layer - 1 + short moduleType; + float drdz; + float moduleGapSize; + bool isTilted; + float segMiniTilt2; // 0.25 * kPixelPSZpitch^2 * drdz^2 / (1+drdz^2) / gap^2; 0 if not tilted + float sdMuls; // kMiniMulsPtScale[iL] * 3 / ptCut + }; + template ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc, float* dAlphaThresholdValues, - ModulesConst modules, + ModuleSegData const& innerMod, + ModuleSegData const& outerMod, MiniDoubletsConst mds, float xIn, float yIn, @@ -98,77 +114,52 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float yOut, float zOut, float rtOut, - uint16_t innerLowerModuleIndex, - uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex, const float ptCut) { - float sdMuls = (modules.subdets()[innerLowerModuleIndex] == Barrel) - ? kMiniMulsPtScaleBarrel[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut - : kMiniMulsPtScaleEndcap[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut; + const float sdMuls = innerMod.sdMuls; //more accurate then outer rt - inner rt float segmentDr = alpaka::math::sqrt(acc, (yOut - yIn) * (yOut - yIn) + (xOut - xIn) * (xOut - xIn)); - const float dAlpha_Bfield = - alpaka::math::asin(acc, alpaka::math::min(acc, segmentDr * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - - bool isInnerTilted = - modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] != Center; - bool isOuterTilted = - modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] != Center; - - float drdzInner = modules.drdzs()[innerLowerModuleIndex]; - float drdzOuter = modules.drdzs()[outerLowerModuleIndex]; - float innerModuleGapSize = moduleGapSize_seg(modules, innerLowerModuleIndex); - float outerModuleGapSize = moduleGapSize_seg(modules, outerLowerModuleIndex); - const float innerminiTilt2 = isInnerTilted - ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzInner * drdzInner) / - (1.f + drdzInner * drdzInner) / (innerModuleGapSize * innerModuleGapSize)) - : 0; - - const float outerminiTilt2 = isOuterTilted - ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzOuter * drdzOuter) / - (1.f + drdzOuter * drdzOuter) / (outerModuleGapSize * outerModuleGapSize)) - : 0; - - float miniDelta = innerModuleGapSize; + // For segments, segmentDr * k2Rinv1GeVf / ptCut is always small (<0.03), + // so asin(x) ~ x to better than 0.01% accuracy. + const float dAlpha_Bfield = alpaka::math::min(acc, segmentDr * k2Rinv1GeVf / ptCut, kSinAlphaMax); float sdLumForInnerMini2; float sdLumForOuterMini2; - if (modules.subdets()[innerLowerModuleIndex] == Barrel) { - sdLumForInnerMini2 = innerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); + if (innerMod.subdet == Barrel) { + sdLumForInnerMini2 = innerMod.segMiniTilt2 * (dAlpha_Bfield * dAlpha_Bfield); } else { sdLumForInnerMini2 = (mds.dphis()[innerMDIndex] * mds.dphis()[innerMDIndex]) * (kDeltaZLum * kDeltaZLum) / (mds.dzs()[innerMDIndex] * mds.dzs()[innerMDIndex]); } - if (modules.subdets()[outerLowerModuleIndex] == Barrel) { - sdLumForOuterMini2 = outerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); + if (outerMod.subdet == Barrel) { + sdLumForOuterMini2 = outerMod.segMiniTilt2 * (dAlpha_Bfield * dAlpha_Bfield); } else { sdLumForOuterMini2 = (mds.dphis()[outerMDIndex] * mds.dphis()[outerMDIndex]) * (kDeltaZLum * kDeltaZLum) / (mds.dzs()[outerMDIndex] * mds.dzs()[outerMDIndex]); } // Unique stuff for the segment dudes alone + const float miniDelta = innerMod.moduleGapSize; float dAlpha_res_inner = - 0.02f / miniDelta * - (modules.subdets()[innerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); + 0.02f / miniDelta * (innerMod.subdet == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); float dAlpha_res_outer = - 0.02f / miniDelta * - (modules.subdets()[outerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); + 0.02f / miniDelta * (outerMod.subdet == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); float dAlpha_res = dAlpha_res_inner + dAlpha_res_outer; - if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] == Center) { + if (innerMod.subdet == Barrel and innerMod.side == Center) { dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } else { dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForInnerMini2); } - if (modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] == Center) { + if (outerMod.subdet == Barrel and outerMod.side == Center) { dAlphaThresholdValues[1] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } else { dAlphaThresholdValues[1] = @@ -335,15 +326,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float dPhiChange = cms::alpakatools::reducePhiRange( acc, cms::alpakatools::phi(acc, xOut - xIn, yOut - yIn) - mds.anchorPhi()[innerMD]); - return alpaka::math::abs(acc, dPhiChange) < sdCut; + if (alpaka::math::abs(acc, dPhiChange) >= sdCut) + return false; + + return true; } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passDeltaPhiCutsEndcap(TAcc const& acc, - ModulesConst modules, MiniDoubletsConst mds, - uint16_t innerLm, - uint16_t outerLm, unsigned int innerMD, unsigned int outerMD, const float dPhi, @@ -360,15 +351,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float dzFrac = dz / zIn; const float dPhiChange = dPhi / dzFrac * (1.f + dzFrac); - return alpaka::math::abs(acc, dPhiChange) < sdSlope; + if (alpaka::math::abs(acc, dPhiChange) >= sdSlope) + return false; + + return true; } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc, - ModulesConst modules, + ModuleSegData const& innerMod, + ModuleSegData const& outerMod, MiniDoubletsConst mds, - uint16_t innerLowerModuleIndex, - uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex, float& dPhi, @@ -401,10 +394,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { zOut = mds.anchorZ()[outerMDIndex]; rtOut = mds.anchorRt()[outerMDIndex]; - float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - float dzDrtScale = alpaka::math::tan(acc, sdSlope) / sdSlope; //FIXME: need appropriate value + const float sdSin = alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax); + const float cosSlope = alpaka::math::sqrt(acc, 1.f - sdSin * sdSin); + // Use 1/cosSlope >= tan(asin(s))/asin(s) for z window (safe, wider window; defers asin) + const float dzDrtScale = 1.f / cosSlope; - const float zGeom = modules.layers()[innerLowerModuleIndex] <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; + const float zGeom = innerMod.layer <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; //slope-correction only on outer end zLo = zIn + (zIn - kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - zGeom; @@ -415,26 +410,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); - if (!passDeltaPhiCutsBarrel(acc, - modules, - mds, - innerLowerModuleIndex, - outerLowerModuleIndex, - innerMDIndex, - outerMDIndex, - dPhi, - rtOut, - sdSlope, - ptCut)) + // dPhiChange cut using cross-product (subsumes old dPhi cut; no asin needed) + const float sdMuls = innerMod.sdMuls; + const float sdPVoff = 0.1f / rtOut; + const float eps = alpaka::math::sqrt(acc, sdMuls * sdMuls + sdPVoff * sdPVoff); + const float dx = xOut - xIn; + const float dy = yOut - yIn; + const float crossDC = dx * yIn - dy * xIn; + const float dotDC = dx * xIn + dy * yIn; + const float tanSlope = sdSin / cosSlope; + const float tanSdCut = tanSlope + eps * (1.f + tanSlope * tanSlope); + const float tanSdCutSq = tanSdCut * tanSdCut; + + if (dotDC <= 0.f || crossDC * crossDC >= tanSdCutSq * dotDC * dotDC) return false; - dPhiChange = cms::alpakatools::reducePhiRange( - acc, cms::alpakatools::phi(acc, xOut - xIn, yOut - yIn) - mds.anchorPhi()[innerMDIndex]); + // Only survivors: compute actual dPhiChange via single atan2 + dPhiChange = alpaka::math::atan2(acc, -crossDC, dotDC); float dAlphaThresholdValues[3]; dAlphaThreshold(acc, dAlphaThresholdValues, - modules, + innerMod, + outerMod, mds, xIn, yIn, @@ -444,8 +442,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { yOut, zOut, rtOut, - innerLowerModuleIndex, - outerLowerModuleIndex, innerMDIndex, outerMDIndex, ptCut); @@ -462,17 +458,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (alpaka::math::abs(acc, dAlphaInnerMDSegment) >= dAlphaInnerMDSegmentThreshold) return false; + if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) return false; - return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; + + if (alpaka::math::abs(acc, dAlphaInnerMDOuterMD) >= dAlphaInnerMDOuterMDThreshold) + return false; + + return true; } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc, - ModulesConst modules, + ModuleSegData const& innerMod, + ModuleSegData const& outerMod, MiniDoubletsConst mds, - uint16_t innerLowerModuleIndex, - uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex, float& dPhi, @@ -505,10 +505,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { zOut = mds.anchorZ()[outerMDIndex]; rtOut = mds.anchorRt()[outerMDIndex]; - bool outerLayerEndcapTwoS = - (modules.subdets()[outerLowerModuleIndex] == Endcap) && (modules.moduleType()[outerLowerModuleIndex] == TwoS); + bool outerLayerEndcapTwoS = (outerMod.subdet == Endcap) && (outerMod.moduleType == TwoS); - float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float sdSinEC = alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax); + const float cosSlope = alpaka::math::sqrt(acc, 1.f - sdSinEC * sdSinEC); float disks2SMinRadius = 60.f; float rtGeom = ((rtIn < disks2SMinRadius && rtOut < disks2SMinRadius) @@ -522,7 +522,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dz = zOut - zIn; float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); - float drtDzScale = sdSlope / alpaka::math::tan(acc, sdSlope); + // Use cosSlope <= asin(s)*cos(asin(s))/s for rt window (safe, wider window; defers asin) + const float drtDzScale = cosSlope; //rt should increase rtLo = alpaka::math::max(acc, rtIn * (1.f + dz / (zIn + dLum) * drtDzScale) - rtGeom, rtIn - 0.5f * rtGeom); @@ -533,19 +534,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; + // Algebraic phi pre-check: |dPhi| > asin(s) iff |tan(dPhi)| > s/sqrt(1-s^2) (exact for |dPhi| < pi/2) + // Uses already-loaded x,y; avoids asin + anchorPhi reads for rejected pairs. + const float crossPhi = xIn * yOut - xOut * yIn; + const float dotPhi = xIn * xOut + yIn * yOut; + const float tanSlopeSq = sdSinEC * sdSinEC / (1.f - sdSinEC * sdSinEC); + if (dotPhi <= 0.f || crossPhi * crossPhi > tanSlopeSq * dotPhi * dotPhi) + return false; + + // Compute asin only for pairs surviving the algebraic phi pre-check + const float sdSlope = alpaka::math::asin(acc, sdSinEC); + dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); - if (!passDeltaPhiCutsEndcap(acc, - modules, - mds, - innerLowerModuleIndex, - outerLowerModuleIndex, - innerMDIndex, - outerMDIndex, - dPhi, - rtOut, - sdSlope, - ptCut)) + if (!passDeltaPhiCutsEndcap(acc, mds, innerMDIndex, outerMDIndex, dPhi, rtOut, sdSlope, ptCut)) return false; if (outerLayerEndcapTwoS) { @@ -569,7 +571,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dAlphaThresholdValues[3]; dAlphaThreshold(acc, dAlphaThresholdValues, - modules, + innerMod, + outerMod, mds, xIn, yIn, @@ -579,8 +582,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { yOut, zOut, rtOut, - innerLowerModuleIndex, - outerLowerModuleIndex, innerMDIndex, outerMDIndex, ptCut); @@ -597,17 +598,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (alpaka::math::abs(acc, dAlphaInnerMDSegment) >= dAlphaInnerMDSegmentThreshold) return false; + if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) return false; - return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; + + if (alpaka::math::abs(acc, dAlphaInnerMDOuterMD) >= dAlphaInnerMDOuterMDThreshold) + return false; + + return true; } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc, - ModulesConst modules, + ModuleSegData const& innerMod, + ModuleSegData const& outerMod, MiniDoubletsConst mds, - uint16_t innerLowerModuleIndex, - uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex, float& dPhi, @@ -626,16 +631,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& rtHi, #endif const float ptCut) { - if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.subdets()[outerLowerModuleIndex] == Barrel) { + if (innerMod.subdet == Barrel and outerMod.subdet == Barrel) { #ifdef CUT_VALUE_DEBUG rtLo = -999.f; rtHi = -999.f; #endif return runSegmentDefaultAlgoBarrel(acc, - modules, + innerMod, + outerMod, mds, - innerLowerModuleIndex, - outerLowerModuleIndex, innerMDIndex, outerMDIndex, dPhi, @@ -658,10 +662,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { zHi = -999.f; #endif return runSegmentDefaultAlgoEndcap(acc, - modules, + innerMod, + outerMod, mds, - innerLowerModuleIndex, - outerLowerModuleIndex, innerMDIndex, outerMDIndex, dPhi, @@ -697,6 +700,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (nInnerMDs == 0) continue; + // Pre-load inner module data once per module (outer loop) + ModuleSegData innerMod; + innerMod.subdet = modules.subdets()[innerLowerModuleIndex]; + innerMod.side = modules.sides()[innerLowerModuleIndex]; + innerMod.layer = modules.layers()[innerLowerModuleIndex]; + innerMod.iL = innerMod.layer - 1; + innerMod.moduleType = modules.moduleType()[innerLowerModuleIndex]; + innerMod.drdz = modules.drdzs()[innerLowerModuleIndex]; + innerMod.moduleGapSize = moduleGapSize_seg(modules, innerLowerModuleIndex); + innerMod.isTilted = (innerMod.subdet == Barrel and innerMod.side != Center); + innerMod.segMiniTilt2 = + innerMod.isTilted + ? (0.25f * (kPixelPSZpitch * kPixelPSZpitch) * (innerMod.drdz * innerMod.drdz) / + (1.f + innerMod.drdz * innerMod.drdz) / (innerMod.moduleGapSize * innerMod.moduleGapSize)) + : 0.f; + innerMod.sdMuls = (innerMod.subdet == Barrel) ? kMiniMulsPtScaleBarrel[innerMod.iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[innerMod.iL] * 3.f / ptCut; + unsigned int nConnectedModules = modules.nConnectedModules()[innerLowerModuleIndex]; for (uint16_t outerLowerModuleArrayIdx : cms::alpakatools::uniform_elements_y(acc, nConnectedModules)) { @@ -708,6 +729,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (limit == 0) continue; + + // Pre-load outer module data once per module pair (middle loop) + ModuleSegData outerMod; + outerMod.subdet = modules.subdets()[outerLowerModuleIndex]; + outerMod.side = modules.sides()[outerLowerModuleIndex]; + outerMod.layer = modules.layers()[outerLowerModuleIndex]; + outerMod.iL = outerMod.layer - 1; + outerMod.moduleType = modules.moduleType()[outerLowerModuleIndex]; + outerMod.drdz = modules.drdzs()[outerLowerModuleIndex]; + outerMod.moduleGapSize = moduleGapSize_seg(modules, outerLowerModuleIndex); + outerMod.isTilted = (outerMod.subdet == Barrel and outerMod.side != Center); + outerMod.segMiniTilt2 = + outerMod.isTilted + ? (0.25f * (kPixelPSZpitch * kPixelPSZpitch) * (outerMod.drdz * outerMod.drdz) / + (1.f + outerMod.drdz * outerMod.drdz) / (outerMod.moduleGapSize * outerMod.moduleGapSize)) + : 0.f; + outerMod.sdMuls = (outerMod.subdet == Barrel) ? kMiniMulsPtScaleBarrel[outerMod.iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[outerMod.iL] * 3.f / ptCut; + for (unsigned int hitIndex : cms::alpakatools::uniform_elements_x(acc, limit)) { unsigned int innerMDArrayIdx = hitIndex / nOuterMDs; unsigned int outerMDArrayIdx = hitIndex % nOuterMDs; @@ -727,29 +767,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { dPhiMax = 0; dPhiChangeMin = 0; dPhiChangeMax = 0; - if (runSegmentDefaultAlgo(acc, - modules, - mds, - innerLowerModuleIndex, - outerLowerModuleIndex, - innerMDIndex, - outerMDIndex, - dPhi, - dPhiMin, - dPhiMax, - dPhiChange, - dPhiChangeMin, - dPhiChangeMax, + bool pass = runSegmentDefaultAlgo(acc, + innerMod, + outerMod, + mds, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax, #ifdef CUT_VALUE_DEBUG - dAlphaInnerMDSegment, - dAlphaOuterMDSegment, - dAlphaInnerMDOuterMD, - zLo, - zHi, - rtLo, - rtHi, + dAlphaInnerMDSegment, + dAlphaOuterMDSegment, + dAlphaInnerMDOuterMD, + zLo, + zHi, + rtLo, + rtHi, #endif - ptCut)) { + ptCut); + + if (pass) { unsigned int totOccupancySegments = alpaka::atomicAdd(acc, &segmentsOccupancy.totOccupancySegments()[innerLowerModuleIndex], @@ -799,21 +840,81 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passDeltaPhiCutsSelector(TAcc const& acc, - ModulesConst modules, + ModuleSegData const& innerMod, + ModuleSegData const& outerMod, MiniDoubletsConst mds, - uint16_t innerLm, - uint16_t outerLm, unsigned int innerMD, unsigned int outerMD, const float ptCut) { - const float dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMD] - mds.anchorPhi()[innerMD]); + const float rtIn = mds.anchorRt()[innerMD]; const float rtOut = mds.anchorRt()[outerMD]; - const float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - - if (modules.subdets()[innerLm] == Barrel && modules.subdets()[outerLm] == Barrel) - return passDeltaPhiCutsBarrel(acc, modules, mds, innerLm, outerLm, innerMD, outerMD, dPhi, rtOut, sdSlope, ptCut); - else - return passDeltaPhiCutsEndcap(acc, modules, mds, innerLm, outerLm, innerMD, outerMD, dPhi, rtOut, sdSlope, ptCut); + const float sdSin = alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax); + const float cosSlope = alpaka::math::sqrt(acc, 1.f - sdSin * sdSin); + + if (innerMod.subdet == Barrel && outerMod.subdet == Barrel) { + // Barrel: z-range pre-filter + cross-product dPhiChange test (no asin needed). + const float zIn = mds.anchorZ()[innerMD]; + const float zOut = mds.anchorZ()[outerMD]; + const float dzDrtScale = 1.f / cosSlope; + const float zGeom = innerMod.layer <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; + const float zLo = zIn + (zIn - kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - zGeom; + const float zHi = zIn + (zIn + kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + zGeom; + if (zOut < zLo || zOut > zHi) + return false; + + // Cross-product dPhiChange test (equivalent to |tan(dPhiChange)| < tan(sdCut), no asin) + const float sdMuls = innerMod.sdMuls; + const float sdPVoff = 0.1f / rtOut; + const float eps = alpaka::math::sqrt(acc, sdMuls * sdMuls + sdPVoff * sdPVoff); + const float xIn = mds.anchorX()[innerMD]; + const float yIn = mds.anchorY()[innerMD]; + const float xOut = mds.anchorX()[outerMD]; + const float yOut = mds.anchorY()[outerMD]; + const float dx = xOut - xIn; + const float dy = yOut - yIn; + const float crossDC = dx * yIn - dy * xIn; + const float dotDC = dx * xIn + dy * yIn; + const float tanSlope = sdSin / cosSlope; + const float tanSdCut = tanSlope + eps * (1.f + tanSlope * tanSlope); + return dotDC > 0.f && crossDC * crossDC < tanSdCut * tanSdCut * dotDC * dotDC; + } else { + // Endcap/mixed: z-sign + rt-range pre-filter before asin (same as creation kernel). + const float zIn = mds.anchorZ()[innerMD]; + const float zOut = mds.anchorZ()[outerMD]; + if (zIn * zOut < 0.f) + return false; + + const float dz = zOut - zIn; + const float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + const float drtDzScale = cosSlope; + constexpr float disks2SMinRadius = 60.f; + const float rtGeom = + ((rtIn < disks2SMinRadius && rtOut < disks2SMinRadius) + ? (2.f * kPixelPSZpitch) + : ((rtIn < disks2SMinRadius || rtOut < disks2SMinRadius) ? (kPixelPSZpitch + kStrip2SZpitch) + : (2.f * kStrip2SZpitch))); + const float rtLo = + alpaka::math::max(acc, rtIn * (1.f + dz / (zIn + dLum) * drtDzScale) - rtGeom, rtIn - 0.5f * rtGeom); + const float rtHi = rtIn * (zOut - dLum) / (zIn - dLum) + rtGeom; + if (rtOut < rtLo || rtOut > rtHi) + return false; + + // Algebraic phi pre-check: |dPhi| > asin(s) iff |tan(dPhi)| > s/sqrt(1-s^2) (exact for |dPhi| < pi/2) + const float xIn = mds.anchorX()[innerMD]; + const float yIn = mds.anchorY()[innerMD]; + const float xOut = mds.anchorX()[outerMD]; + const float yOut = mds.anchorY()[outerMD]; + const float crossPhi = xIn * yOut - xOut * yIn; + const float dotPhi = xIn * xOut + yIn * yOut; + const float tanSlopeSq = sdSin * sdSin / (1.f - sdSin * sdSin); + if (dotPhi <= 0.f || crossPhi * crossPhi > tanSlopeSq * dotPhi * dotPhi) + return false; + + // Survivors: compute asin + dPhi cuts + const float sdSlope = alpaka::math::asin(acc, sdSin); + const float dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMD] - mds.anchorPhi()[innerMD]); + return passDeltaPhiCutsEndcap(acc, mds, innerMD, outerMD, dPhi, rtOut, sdSlope, ptCut); + } } struct CountMiniDoubletConnections { @@ -837,12 +938,48 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (nConnectedModules == 0) continue; + // Pre-load inner module data once per module (outer loop) + ModuleSegData innerMod; + innerMod.subdet = modules.subdets()[innerLowerModuleIndex]; + innerMod.side = modules.sides()[innerLowerModuleIndex]; + innerMod.layer = modules.layers()[innerLowerModuleIndex]; + innerMod.iL = innerMod.layer - 1; + innerMod.moduleType = modules.moduleType()[innerLowerModuleIndex]; + innerMod.drdz = modules.drdzs()[innerLowerModuleIndex]; + innerMod.moduleGapSize = moduleGapSize_seg(modules, innerLowerModuleIndex); + innerMod.isTilted = (innerMod.subdet == Barrel and innerMod.side != Center); + innerMod.segMiniTilt2 = + innerMod.isTilted + ? (0.25f * (kPixelPSZpitch * kPixelPSZpitch) * (innerMod.drdz * innerMod.drdz) / + (1.f + innerMod.drdz * innerMod.drdz) / (innerMod.moduleGapSize * innerMod.moduleGapSize)) + : 0.f; + innerMod.sdMuls = (innerMod.subdet == Barrel) ? kMiniMulsPtScaleBarrel[innerMod.iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[innerMod.iL] * 3.f / ptCut; + for (uint16_t outerLowerModuleArrayIdx : cms::alpakatools::uniform_elements_y(acc, nConnectedModules)) { const uint16_t outerLowerModuleIndex = modules.moduleMap()[innerLowerModuleIndex][outerLowerModuleArrayIdx]; const unsigned int nOuterMDs = mdsOccupancy.nMDs()[outerLowerModuleIndex]; if (nOuterMDs == 0) continue; + // Pre-load outer module data once per module pair (middle loop) + ModuleSegData outerMod; + outerMod.subdet = modules.subdets()[outerLowerModuleIndex]; + outerMod.side = modules.sides()[outerLowerModuleIndex]; + outerMod.layer = modules.layers()[outerLowerModuleIndex]; + outerMod.iL = outerMod.layer - 1; + outerMod.moduleType = modules.moduleType()[outerLowerModuleIndex]; + outerMod.drdz = modules.drdzs()[outerLowerModuleIndex]; + outerMod.moduleGapSize = moduleGapSize_seg(modules, outerLowerModuleIndex); + outerMod.isTilted = (outerMod.subdet == Barrel and outerMod.side != Center); + outerMod.segMiniTilt2 = + outerMod.isTilted + ? (0.25f * (kPixelPSZpitch * kPixelPSZpitch) * (outerMod.drdz * outerMod.drdz) / + (1.f + outerMod.drdz * outerMod.drdz) / (outerMod.moduleGapSize * outerMod.moduleGapSize)) + : 0.f; + outerMod.sdMuls = (outerMod.subdet == Barrel) ? kMiniMulsPtScaleBarrel[outerMod.iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[outerMod.iL] * 3.f / ptCut; + const unsigned int limit = nInnerMDs * nOuterMDs; for (unsigned int hitIndex : cms::alpakatools::uniform_elements_x(acc, limit)) { @@ -853,8 +990,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const unsigned int outerMDIndex = mdRanges[outerLowerModuleIndex][0] + outerMDArrayIdx; // Increment the connected max if the LS passes the delta phi cuts. - if (passDeltaPhiCutsSelector( - acc, modules, mds, innerLowerModuleIndex, outerLowerModuleIndex, innerMDIndex, outerMDIndex, ptCut)) { + if (passDeltaPhiCutsSelector(acc, innerMod, outerMod, mds, innerMDIndex, outerMDIndex, ptCut)) { alpaka::atomicAdd(acc, &mds.connectedMax()[innerMDIndex], 1u, alpaka::hierarchy::Threads{}); } } diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 3768f021a0cfb..ab82be43506d7 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -328,18 +328,38 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { for (unsigned int trackCandidateIndex : cms::alpakatools::uniform_elements_x(acc, nTrackCandidates)) { LSTObjType type = candsBase.trackCandidateType()[trackCandidateIndex]; unsigned int innerTrackletIdx = candsExtended.objectIndices()[trackCandidateIndex][0]; + + // Early spatial filter: compute TC eta and phi based on type and skip if far away. + float eta2, phi2; + if (type == LSTObjType::T5) { + eta2 = __H2F(quintuplets.eta()[innerTrackletIdx]); + phi2 = __H2F(quintuplets.phi()[innerTrackletIdx]); + } else if (type == LSTObjType::pT3) { + eta2 = __H2F(pixelTriplets.eta_pix()[innerTrackletIdx]); + phi2 = __H2F(pixelTriplets.phi_pix()[innerTrackletIdx]); + } else if (type == LSTObjType::pT5) { + eta2 = pixelSeeds.eta()[innerTrackletIdx - prefix]; + phi2 = pixelSeeds.phi()[innerTrackletIdx - prefix]; + } else { + continue; + } + + if (alpaka::math::abs(acc, eta1 - eta2) > 0.2f) + continue; + + float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2); + if (alpaka::math::abs(acc, dPhi) > 0.2f) + continue; + + float dEta = eta1 - eta2; + float dR2 = dEta * dEta + dPhi * dPhi; + if (type == LSTObjType::T5) { - unsigned int quintupletIndex = innerTrackletIdx; // T5 index - float eta2 = __H2F(quintuplets.eta()[quintupletIndex]); - float phi2 = __H2F(quintuplets.phi()[quintupletIndex]); - float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2); - float dR2 = dEta * dEta + dPhi * dPhi; // Cut on pLS-T5 embed distance. if (dR2 < 0.02f) { float d2 = 0.f; CMS_UNROLL_LOOP for (unsigned k = 0; k < Params_pLS::kEmbed; ++k) { - const float diff = plsEmbed[k] - quintuplets.t5Embed()[quintupletIndex][k]; + const float diff = plsEmbed[k] - quintuplets.t5Embed()[innerTrackletIdx][k]; d2 += diff * diff; } // Compare squared embedding distance to the cut value for the eta bin. @@ -347,36 +367,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelSegments.isDup()[pixelArrayIndex] = true; } } - } - if (type == LSTObjType::pT3) { + } else if (type == LSTObjType::pT3) { int pLSIndex = pixelTriplets.pixelSegmentIndices()[innerTrackletIdx]; int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hitsBase); if (npMatched > 0) pixelSegments.isDup()[pixelArrayIndex] = true; - int pT3Index = innerTrackletIdx; - float eta2 = __H2F(pixelTriplets.eta_pix()[pT3Index]); - float phi2 = __H2F(pixelTriplets.phi_pix()[pT3Index]); - float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2); - - float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) pixelSegments.isDup()[pixelArrayIndex] = true; - } - if (type == LSTObjType::pT5) { - unsigned int pLSIndex = innerTrackletIdx; - int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hitsBase); - if (npMatched > 0) { + } else if (type == LSTObjType::pT5) { + int npMatched = checkPixelHits(prefix + pixelArrayIndex, innerTrackletIdx, mds, segments, hitsBase); + if (npMatched > 0) pixelSegments.isDup()[pixelArrayIndex] = true; - } - float eta2 = pixelSeeds.eta()[pLSIndex - prefix]; - float phi2 = pixelSeeds.phi()[pLSIndex - prefix]; - float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2); - - float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) pixelSegments.isDup()[pixelArrayIndex] = true; } diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 81c166cd1f4f5..fe0fe442e2c04 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -14,6 +14,33 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + // Pre-loaded inner-segment-constant data for passPointingConstraint. + // Populated once per inner segment, reused across all outer segments in the inner loop. + struct T3InnerHitData { + float x1, y1; // first MD position (cm) + float x2, y2; // second MD position (shared MD, cm) + float rt1, rt2; // anchorRt for first two MDs (cm) + float drt_InSeg; // rt2 - rt1 + float rt_InSeg; // sqrt((x2-x1)^2 + (y2-y1)^2) + float sdIn_alpha; // dPhiChange of inner segment + float sdIn_alphaRHmin; // dPhiChangeMin (for endcap path) + float sdIn_alphaRHmax; // dPhiChangeMax (for endcap path) + // Precomputed sin/cos of alpha values for algebraic betaIn check (avoids per-candidate atan2). + float sin_alpha, cos_alpha; + float sin_alphaRHmin, cos_alphaRHmin; // for endcap EEE path + float sin_alphaRHmax, cos_alphaRHmax; // for endcap EEE path + short innerSubdet; // subdet of inner-inner module + short middleSubdet; // subdet of middle module + }; + + // Pre-loaded hit coordinates for passRZConstraint. + // All values in cm, passRZConstraint converts to meters internally. + struct T3HitCoords { + float x1, y1, z1, rt1; + float x2, y2, z2, rt2; + float x3, y3, z3, rt3; + }; + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(ModulesConst modules, MiniDoubletsConst mds, SegmentsConst segments, @@ -72,13 +99,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, ModulesConst modules, - MiniDoubletsConst mds, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, - unsigned int firstMDIndex, - unsigned int secondMDIndex, - unsigned int thirdMDIndex, + T3HitCoords const& h, float circleRadius, float circleCenterX, float circleCenterY, @@ -89,26 +113,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const short layer3 = modules.lstLayers()[outerOuterLowerModuleIndex]; //all the values are stored in the unit of cm, in the calculation below we need to be cautious if we want to use the meter unit - //get r and z - const float r1 = mds.anchorRt()[firstMDIndex] / 100; - const float r2 = mds.anchorRt()[secondMDIndex] / 100; - const float r3 = mds.anchorRt()[thirdMDIndex] / 100; + //get r and z (convert from cm to m) + const float r1 = h.rt1 / 100; + const float r2 = h.rt2 / 100; + const float r3 = h.rt3 / 100; - const float z1 = mds.anchorZ()[firstMDIndex] / 100; - const float z2 = mds.anchorZ()[secondMDIndex] / 100; - const float z3 = mds.anchorZ()[thirdMDIndex] / 100; + const float z1 = h.z1 / 100; + const float z2 = h.z2 / 100; + const float z3 = h.z3 / 100; //use linear approximation for regions 9 and 20-24 because it works better (see https://github.com/SegmentLinking/cmssw/pull/92) float residual = alpaka::math::abs(acc, z2 - ((z3 - z1) / (r3 - r1) * (r2 - r1) + z1)); - //get the x,y position of each MD - const float x1 = mds.anchorX()[firstMDIndex] / 100; - const float x2 = mds.anchorX()[secondMDIndex] / 100; - const float x3 = mds.anchorX()[thirdMDIndex] / 100; + //get the x,y position of each MD (convert from cm to m) + const float x1 = h.x1 / 100; + const float x2 = h.x2 / 100; + const float x3 = h.x3 / 100; - const float y1 = mds.anchorY()[firstMDIndex] / 100; - const float y2 = mds.anchorY()[secondMDIndex] / 100; - const float y3 = mds.anchorY()[thirdMDIndex] / 100; + const float y1 = h.y1 / 100; + const float y2 = h.y2 / 100; + const float y3 = h.y3 / 100; float cross = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1); charge = -1 * ((int)copysignf(1.0f, cross)); @@ -186,13 +210,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { else if (y3 > y2 && y2 > y1) py = alpaka::math::abs(acc, py); - float AO = alpaka::math::sqrt( - acc, (x_other - x_center) * (x_other - x_center) + (y_other - y_center) * (y_other - y_center)); - float BO = - alpaka::math::sqrt(acc, (x_init - x_center) * (x_init - x_center) + (y_init - y_center) * (y_init - y_center)); + // All 3 hits lie on the fitted circle, so AO = BO = R = circleRadius/100. + // Law of Cosines: cos(dPhi) = (2R^2 - AB^2) / (2R^2) = 1 - AB^2/(2R^2) + float R = circleRadius / 100; float AB2 = (x_other - x_init) * (x_other - x_init) + (y_other - y_init) * (y_other - y_init); - float dPhi = alpaka::math::acos(acc, (AO * AO + BO * BO - AB2) / (2 * AO * BO)); //Law of Cosines - float ds = circleRadius / 100 * dPhi; + float dPhi = alpaka::math::acos(acc, 1 - AB2 / (2 * R * R)); + float ds = R * dPhi; float pz = dz / ds * pt; float p = alpaka::math::sqrt(acc, px * px + py * py + pz * pz); @@ -216,8 +239,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float A = paraB * paraB + paraC * paraC; float B = 2 * paraA * paraB; float C = paraA * paraA - paraC * paraC; - float sol1 = (-B + alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); - float sol2 = (-B - alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float disc = alpaka::math::sqrt(acc, B * B - 4 * A * C); + float sol1 = (-B + disc) / (2 * A); + float sol2 = (-B - disc) / (2 * A); float solz1 = alpaka::math::asin(acc, sol1) / rou * pz / p + z_init; float solz2 = alpaka::math::asin(acc, sol2) / rou * pz / p + z_init; float diffz1 = (solz1 - z_target) * 100; @@ -228,8 +252,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { : ((alpaka::math::abs(acc, diffz1) < alpaka::math::abs(acc, diffz2)) ? diffz1 : diffz2); } else { // for endcap float s = (z_target - z_init) * p / pz; - float x = x_init + px / a * alpaka::math::sin(acc, rou * s) - py / a * (1 - alpaka::math::cos(acc, rou * s)); - float y = y_init + py / a * alpaka::math::sin(acc, rou * s) + px / a * (1 - alpaka::math::cos(acc, rou * s)); + float sinRS = alpaka::math::sin(acc, rou * s); + float cosRS = alpaka::math::cos(acc, rou * s); + float x = x_init + px / a * sinRS - py / a * (1 - cosRS); + float y = y_init + py / a * sinRS + px / a * (1 - cosRS); residual = (r_target - alpaka::math::sqrt(acc, x * x + y * y)) * 100; } @@ -326,66 +352,45 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } template - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, - ModulesConst modules, - MiniDoubletsConst mds, - SegmentsConst segments, - unsigned int firstMDIndex, - unsigned int secondMDIndex, - unsigned int thirdMDIndex, - uint16_t innerInnerLowerModuleIndex, - uint16_t middleLowerModuleIndex, - uint16_t outerOuterLowerModuleIndex, - unsigned int innerSegmentIndex, - const float ptCut) { - const float x1 = mds.anchorX()[firstMDIndex]; - const float x2 = mds.anchorX()[secondMDIndex]; - const float x3 = mds.anchorX()[thirdMDIndex]; - const float y1 = mds.anchorY()[firstMDIndex]; - const float y2 = mds.anchorY()[secondMDIndex]; - const float y3 = mds.anchorY()[thirdMDIndex]; - - const short innerInnerLowerModuleSubdet = modules.subdets()[innerInnerLowerModuleIndex]; - const short middleLowerModuleSubdet = modules.subdets()[middleLowerModuleIndex]; - const short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; - - const float rt_InLo = mds.anchorRt()[firstMDIndex]; - const float rt_InOut = mds.anchorRt()[secondMDIndex]; - const float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); - - const float drt_InSeg = rt_InOut - rt_InLo; - const float drt_tl_axis = alpaka::math::sqrt(acc, (x3 - x1) * (x3 - x1) + (y3 - y1) * (y3 - y1)); - - //innerOuterAnchor - innerInnerAnchor - const float rt_InSeg = alpaka::math::sqrt(acc, (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)); - - const float betaIn = - sdIn_alpha - cms::alpakatools::reducePhiRange( - acc, cms::alpakatools::phi(acc, x3 - x1, y3 - y1) - mds.anchorPhi()[firstMDIndex]); + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint( + TAcc const& acc, T3InnerHitData const& inner, float x3, float y3, short outerSubdet, const float ptCut) { + const float dx = x3 - inner.x1; + const float dy = y3 - inner.y1; + const float drt_tl_axis = alpaka::math::sqrt(acc, dx * dx + dy * dy); + + // For T3, the argument (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut is always < 0.02, + // so asin(x) ~ x to better than 0.01% accuracy. const float betaInCut = - alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + - (0.02f / drt_InSeg); - - float betaInRHmin = betaIn; - - if (innerInnerLowerModuleSubdet == Endcap and middleLowerModuleSubdet == Endcap and - outerOuterLowerModuleSubdet == Endcap) { - float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); - float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); - - betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; - float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; - float swapTemp; - - if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { - swapTemp = betaInRHmin; - betaInRHmin = betaInRHmax; - betaInRHmax = swapTemp; + alpaka::math::min(acc, (-inner.rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax) + + (0.02f / inner.drt_InSeg); + + // Algebraic betaIn check: exact replacement of |betaIn| < betaInCut without atan2. + // betaIn = alpha - deltaPhi(dir_13, hit1), where deltaPhi = atan2(cross, dot). + // |alpha - atan2(cross, dot)| < cut <==> sin^2(alpha - atan2) < cut^2 when cos > 0. + // sin(alpha - atan2) = (sin_alpha * dot - cos_alpha * cross) / sqrt(cross^2 + dot^2). + // cross = x1*(y3-y1) - y1*(x3-x1) = x1*y3 - y1*x3 (angle from hit1 to dir_13). + const float crossPC = inner.x1 * y3 - inner.y1 * x3; + const float dotPC = x3 * inner.x1 + y3 * inner.y1 - inner.rt1 * inner.rt1; + const float r2 = crossPC * crossPC + dotPC * dotPC; + const float betaInCutSq = betaInCut * betaInCut; + + if (inner.innerSubdet == Endcap and inner.middleSubdet == Endcap and outerSubdet == Endcap) { + // EEE: check both alpha variants, pass if the one with smaller |betaIn| is within cut + const float sinValMin = inner.sin_alphaRHmin * dotPC - inner.cos_alphaRHmin * crossPC; + const float sinValMax = inner.sin_alphaRHmax * dotPC - inner.cos_alphaRHmax * crossPC; + const float sqMin = sinValMin * sinValMin; + const float sqMax = sinValMax * sinValMax; + + if (sqMin <= sqMax) { + return sqMin < betaInCutSq * r2 and (inner.cos_alphaRHmin * dotPC + inner.sin_alphaRHmin * crossPC > 0.f); + } else { + return sqMax < betaInCutSq * r2 and (inner.cos_alphaRHmax * dotPC + inner.sin_alphaRHmax * crossPC > 0.f); } } //Beta cut - return alpaka::math::abs(acc, betaInRHmin) < betaInCut; + const float sinVal = inner.sin_alpha * dotPC - inner.cos_alpha * crossPC; + return sinVal * sinVal < betaInCutSq * r2 and (inner.cos_alpha * dotPC + inner.sin_alpha * crossPC > 0.f); } template @@ -410,46 +415,50 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0]; const unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; - const float x1 = mds.anchorX()[firstMDIndex]; - const float x2 = mds.anchorX()[secondMDIndex]; - const float x3 = mds.anchorX()[thirdMDIndex]; - const float y1 = mds.anchorY()[firstMDIndex]; - const float y2 = mds.anchorY()[secondMDIndex]; - const float y3 = mds.anchorY()[thirdMDIndex]; + // Load ALL hit coordinates ONCE - reused by circle fit, passRZConstraint, and betaIn computation + T3HitCoords h; + h.x1 = mds.anchorX()[firstMDIndex]; + h.y1 = mds.anchorY()[firstMDIndex]; + h.z1 = mds.anchorZ()[firstMDIndex]; + h.rt1 = mds.anchorRt()[firstMDIndex]; + h.x2 = mds.anchorX()[secondMDIndex]; + h.y2 = mds.anchorY()[secondMDIndex]; + h.z2 = mds.anchorZ()[secondMDIndex]; + h.rt2 = mds.anchorRt()[secondMDIndex]; + h.x3 = mds.anchorX()[thirdMDIndex]; + h.y3 = mds.anchorY()[thirdMDIndex]; + h.z3 = mds.anchorZ()[thirdMDIndex]; + h.rt3 = mds.anchorRt()[thirdMDIndex]; std::tie(circleRadius, circleCenterX, circleCenterY) = - computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3); + computeRadiusFromThreeAnchorHits(acc, h.x1, h.y1, h.x2, h.y2, h.x3, h.y3); if (not passRZConstraint(acc, modules, - mds, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, - firstMDIndex, - secondMDIndex, - thirdMDIndex, + h, circleRadius, circleCenterX, circleCenterY, charge)) return false; - const float rt_InLo = mds.anchorRt()[firstMDIndex]; - const float rt_InOut = mds.anchorRt()[secondMDIndex]; + // Reuse pre-loaded coordinates for betaIn computation (no SoA reloads) const float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); - const float drt_InSeg = rt_InOut - rt_InLo; - const float drt_tl_axis = alpaka::math::sqrt(acc, (x3 - x1) * (x3 - x1) + (y3 - y1) * (y3 - y1)); + const float drt_InSeg = h.rt2 - h.rt1; + const float drt_tl_axis = alpaka::math::sqrt(acc, (h.x3 - h.x1) * (h.x3 - h.x1) + (h.y3 - h.y1) * (h.y3 - h.y1)); //innerOuterAnchor - innerInnerAnchor - const float rt_InSeg = alpaka::math::sqrt(acc, (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)); + const float rt_InSeg = alpaka::math::sqrt(acc, (h.x2 - h.x1) * (h.x2 - h.x1) + (h.y2 - h.y1) * (h.y2 - h.y1)); - betaIn = sdIn_alpha - cms::alpakatools::reducePhiRange( - acc, cms::alpakatools::phi(acc, x3 - x1, y3 - y1) - mds.anchorPhi()[firstMDIndex]); + betaIn = + sdIn_alpha - cms::alpakatools::reducePhiRange( + acc, cms::alpakatools::phi(acc, h.x3 - h.x1, h.y3 - h.y1) - mds.anchorPhi()[firstMDIndex]); betaInCut = - alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + - (0.02f / drt_InSeg); + alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax) + (0.02f / drt_InSeg); bool inference = lst::t3dnn::runInference(acc, mds, firstMDIndex, secondMDIndex, thirdMDIndex, circleRadius, betaIn, t3Scores); @@ -514,6 +523,32 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t middleLowerModuleIndex = segments.outerLowerModuleIndices()[innerSegmentIndex]; int middleMDIndiceInner = segments.mdIndices()[innerSegmentIndex][1]; + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + + // Pre-load inner-segment-constant data (reused across all outer segments) + T3InnerHitData inner; + inner.x1 = mds.anchorX()[firstMDIndex]; + inner.y1 = mds.anchorY()[firstMDIndex]; + inner.x2 = mds.anchorX()[middleMDIndiceInner]; + inner.y2 = mds.anchorY()[middleMDIndiceInner]; + inner.rt1 = mds.anchorRt()[firstMDIndex]; + inner.rt2 = mds.anchorRt()[middleMDIndiceInner]; + inner.drt_InSeg = inner.rt2 - inner.rt1; + inner.rt_InSeg = alpaka::math::sqrt( + acc, (inner.x2 - inner.x1) * (inner.x2 - inner.x1) + (inner.y2 - inner.y1) * (inner.y2 - inner.y1)); + inner.sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + inner.sin_alpha = alpaka::math::sin(acc, inner.sdIn_alpha); + inner.cos_alpha = alpaka::math::cos(acc, inner.sdIn_alpha); + inner.innerSubdet = modules.subdets()[innerInnerLowerModuleIndex]; + inner.middleSubdet = modules.subdets()[middleLowerModuleIndex]; + inner.sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + inner.sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); + if (inner.innerSubdet == Endcap and inner.middleSubdet == Endcap) { + inner.sin_alphaRHmin = alpaka::math::sin(acc, inner.sdIn_alphaRHmin); + inner.cos_alphaRHmin = alpaka::math::cos(acc, inner.sdIn_alphaRHmin); + inner.sin_alphaRHmax = alpaka::math::sin(acc, inner.sdIn_alphaRHmax); + inner.cos_alphaRHmax = alpaka::math::cos(acc, inner.sdIn_alphaRHmax); + } unsigned int nOuterSegments = segmentsOccupancy.nSegments()[middleLowerModuleIndex]; for (unsigned int outerSegmentArrayIndex : cms::alpakatools::uniform_elements_x(acc, nOuterSegments)) { @@ -524,22 +559,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; uint16_t outerOuterLowerModuleIndex = segments.outerLowerModuleIndices()[outerSegmentIndex]; - unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; - unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0]; unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + float x3 = mds.anchorX()[thirdMDIndex]; + float y3 = mds.anchorY()[thirdMDIndex]; + short outerSubdet = modules.subdets()[outerOuterLowerModuleIndex]; - if (not passPointingConstraint(acc, - modules, - mds, - segments, - firstMDIndex, - secondMDIndex, - thirdMDIndex, - innerInnerLowerModuleIndex, - middleLowerModuleIndex, - outerOuterLowerModuleIndex, - innerSegmentIndex, - ptCut)) + if (not passPointingConstraint(acc, inner, x3, y3, outerSubdet, ptCut)) continue; // Match inner Sg and Outer Sg @@ -672,29 +697,46 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (nOuterSegments == 0) continue; + const unsigned int firstMDIndex = mdIndices[innerSegmentIndex][0]; + + // Pre-load inner-segment-constant data (reused across all outer segments) + T3InnerHitData inner; + inner.x1 = mds.anchorX()[firstMDIndex]; + inner.y1 = mds.anchorY()[firstMDIndex]; + inner.x2 = mds.anchorX()[mdShared]; + inner.y2 = mds.anchorY()[mdShared]; + inner.rt1 = mds.anchorRt()[firstMDIndex]; + inner.rt2 = mds.anchorRt()[mdShared]; + inner.drt_InSeg = inner.rt2 - inner.rt1; + inner.rt_InSeg = alpaka::math::sqrt( + acc, (inner.x2 - inner.x1) * (inner.x2 - inner.x1) + (inner.y2 - inner.y1) * (inner.y2 - inner.y1)); + inner.sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + inner.sin_alpha = alpaka::math::sin(acc, inner.sdIn_alpha); + inner.cos_alpha = alpaka::math::cos(acc, inner.sdIn_alpha); + inner.innerSubdet = modules.subdets()[innerLowerModuleArrayIdx]; + inner.middleSubdet = modules.subdets()[middleLowerModuleIndex]; + inner.sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + inner.sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); + if (inner.innerSubdet == Endcap and inner.middleSubdet == Endcap) { + inner.sin_alphaRHmin = alpaka::math::sin(acc, inner.sdIn_alphaRHmin); + inner.cos_alphaRHmin = alpaka::math::cos(acc, inner.sdIn_alphaRHmin); + inner.sin_alphaRHmax = alpaka::math::sin(acc, inner.sdIn_alphaRHmax); + inner.cos_alphaRHmax = alpaka::math::cos(acc, inner.sdIn_alphaRHmax); + } + for (unsigned int outerSegmentArrayIndex : cms::alpakatools::uniform_elements_x(acc, nOuterSegments)) { const unsigned int outerSegmentIndex = segmentRanges[middleLowerModuleIndex][0] + outerSegmentArrayIndex; if (mdIndices[outerSegmentIndex][0] != mdShared) continue; - unsigned int firstMDIndex = mdIndices[innerSegmentIndex][0]; - unsigned int secondMDIndex = mdShared; unsigned int thirdMDIndex = mdIndices[outerSegmentIndex][1]; uint16_t outerOuterLowerModuleIndex = outerLowerModuleIndices[outerSegmentIndex]; + float x3 = mds.anchorX()[thirdMDIndex]; + float y3 = mds.anchorY()[thirdMDIndex]; + short outerSubdet = modules.subdets()[outerOuterLowerModuleIndex]; - if (not passPointingConstraint(acc, - modules, - mds, - segments, - firstMDIndex, - secondMDIndex, - thirdMDIndex, - innerLowerModuleArrayIdx, - middleLowerModuleIndex, - outerOuterLowerModuleIndex, - innerSegmentIndex, - ptCut)) + if (not passPointingConstraint(acc, inner, x3, y3, outerSubdet, ptCut)) continue; alpaka::atomicAdd(acc, &segments.connectedMax()[innerSegmentIndex], 1u, alpaka::hierarchy::Threads{});