diff --git a/HeterogeneousCore/AlpakaMath/interface/deltaPhi.h b/HeterogeneousCore/AlpakaMath/interface/deltaPhi.h index 2430375ebd64f..ec72d051030f4 100644 --- a/HeterogeneousCore/AlpakaMath/interface/deltaPhi.h +++ b/HeterogeneousCore/AlpakaMath/interface/deltaPhi.h @@ -22,7 +22,7 @@ namespace cms::alpakatools { template ALPAKA_FN_HOST_ACC inline T deltaPhi(TAcc const& acc, T x1, T y1, T x2, T y2) { - return reducePhiRange(acc, alpaka::math::atan2(acc, -y2, -x2) - alpaka::math::atan2(acc, -y1, -x1)); + return alpaka::math::atan2(acc, x1 * y2 - x2 * y1, x1 * x2 + y1 * y2); } template diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h index 49c700f48ecc6..33759e0b49004 100644 --- a/RecoTracker/LSTCore/src/alpaka/Kernels.h +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -247,7 +247,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (quintuplets.isDup()[ix] & 1) continue; - bool isPT5_ix = quintuplets.partOfPT5()[ix]; + const bool isPT5_ix = quintuplets.partOfPT5()[ix]; + const float eta1 = __H2F(quintuplets.eta()[ix]); + const float phi1 = __H2F(quintuplets.phi()[ix]); + const float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) { unsigned int jx = quintupletModuleIndices_lowmod2 + jx1; @@ -257,31 +260,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (quintuplets.isDup()[jx] & 1) continue; - bool isPT5_jx = quintuplets.partOfPT5()[jx]; + const bool isPT5_jx = quintuplets.partOfPT5()[jx]; if (isPT5_ix && isPT5_jx) continue; - float eta1 = __H2F(quintuplets.eta()[ix]); - float phi1 = __H2F(quintuplets.phi()[ix]); - float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); - - float eta2 = __H2F(quintuplets.eta()[jx]); - float phi2 = __H2F(quintuplets.phi()[jx]); - float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); - - float dEta = alpaka::math::abs(acc, eta1 - eta2); - float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2); - + const float eta2 = __H2F(quintuplets.eta()[jx]); + const float dEta = alpaka::math::abs(acc, eta1 - eta2); if (dEta > 0.1f) continue; + const float phi2 = __H2F(quintuplets.phi()[jx]); + const float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2); if (alpaka::math::abs(acc, dPhi) > 0.1f) continue; - float dR2 = dEta * dEta + dPhi * dPhi; - int nMatched = checkHitsT5(ix, jx, quintuplets); - const int minNHitsForDup_T5 = 5; + const float dR2 = dEta * dEta + dPhi * dPhi; + const int nMatched = checkHitsT5(ix, jx, quintuplets); + constexpr int minNHitsForDup_T5 = 5; float d2 = 0.f; CMS_UNROLL_LOOP @@ -291,6 +287,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } if (((dR2 < 0.001f || nMatched >= minNHitsForDup_T5) && d2 < 1.0f) || (dR2 < 0.02f && d2 < 0.1f)) { + const float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); if (isPT5_jx || score_rphisum1 > score_rphisum2) { rmQuintupletFromMemory(quintuplets, ix, true); } else if (isPT5_ix || score_rphisum1 < score_rphisum2) { @@ -458,11 +455,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ALPAKA_FN_ACC void operator()(Acc2D const& acc, PixelQuintuplets pixelQuintuplets) const { unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelQuintuplets)) { + float eta1 = __H2F(pixelQuintuplets.eta()[ix]); + float phi1 = __H2F(pixelQuintuplets.phi()[ix]); float score1 = __H2F(pixelQuintuplets.score()[ix]); for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, nPixelQuintuplets)) { if (ix == jx) continue; + float eta2 = __H2F(pixelQuintuplets.eta()[jx]); + if (alpaka::math::abs(acc, eta1 - eta2) > 0.2f) + continue; + + float phi2 = __H2F(pixelQuintuplets.phi()[jx]); + if (alpaka::math::abs(acc, cms::alpakatools::deltaPhi(acc, phi1, phi2)) > 0.2f) + continue; + int nMatched = checkHitspT5(ix, jx, pixelQuintuplets); float score2 = __H2F(pixelQuintuplets.score()[jx]); const int minNHitsForDup_pT5 = 7; diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h index 806869522e062..f89ec20988e76 100644 --- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -13,15 +13,41 @@ #include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + // Pre-computed module-constant data for MiniDoublet kernels. + // Populated once per module to avoid redundant SoA loads in the inner hit-pair loop. + struct ModuleMDData { + uint16_t lowerModuleIndex; + short subdet; + short side; + short moduleType; + short moduleLayerType; + unsigned int iL; // layer - 1 + bool isTilted; + bool isEndcapTwoS; + bool isGloballyInner; + + // shiftStripHits constants + float slope; // dxdys + float drdz; // drdzs[lowerModuleIndex] + float moduleSep; // moduleGapSize result + + // dPhiThreshold pre-computed constants + float miniPVoff; + float miniMuls; + float miniTilt2; // 0 for non-tilted and endcap + float sqrtTermBase; // miniMuls^2 + miniPVoff^2 + float sqrtTermFlat; // sqrt(sqrtTermBase), valid for barrel flat + }; + template ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc, MiniDoublets mds, HitsBaseConst hitsBase, HitsExtendedConst hitsExtended, - ModulesConst modules, + ModuleMDData const& mod, unsigned int lowerHitIdx, unsigned int upperHitIdx, - uint16_t lowerModuleIdx, float dz, float dPhi, float dPhiChange, @@ -34,9 +60,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //the index into which this MD needs to be written will be computed in the kernel //nMDs variable will be incremented in the kernel, no need to worry about that here - mds.moduleIndices()[idx] = lowerModuleIdx; + mds.moduleIndices()[idx] = mod.lowerModuleIndex; unsigned int anchorHitIndex, outerHitIndex; - if (modules.moduleType()[lowerModuleIdx] == PS and modules.moduleLayerType()[lowerModuleIdx] == Strip) { + if (mod.moduleType == PS and mod.moduleLayerType == Strip) { mds.anchorHitIndices()[idx] = upperHitIdx; mds.outerHitIndices()[idx] = lowerHitIdx; @@ -72,8 +98,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorHighEdgeY()[idx] = hitsExtended.highEdgeYs()[anchorHitIndex]; mds.anchorLowEdgeX()[idx] = hitsExtended.lowEdgeXs()[anchorHitIndex]; mds.anchorLowEdgeY()[idx] = hitsExtended.lowEdgeYs()[anchorHitIndex]; - mds.anchorHighEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorHighEdgeY()[idx], mds.anchorHighEdgeX()[idx]); - mds.anchorLowEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorLowEdgeY()[idx], mds.anchorLowEdgeX()[idx]); + // Edge phi only read downstream when outerLayerEndcapTwoS is true; skip atan2 for other modules. + if (mod.isEndcapTwoS) { + mds.anchorHighEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorHighEdgeY()[idx], mds.anchorHighEdgeX()[idx]); + mds.anchorLowEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorLowEdgeY()[idx], mds.anchorLowEdgeX()[idx]); + } mds.outerX()[idx] = hitsBase.xs()[outerHitIndex]; mds.outerY()[idx] = hitsBase.ys()[outerHitIndex]; @@ -89,6 +118,49 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { #endif } + // Overload for callers (Segment.h) that still pass ModulesConst + moduleIndex. + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc, + MiniDoublets mds, + HitsBaseConst hitsBase, + HitsExtendedConst hitsExtended, + ModulesConst modules, + unsigned int lowerHitIdx, + unsigned int upperHitIdx, + uint16_t lowerModuleIdx, + float dz, + float dPhi, + float dPhiChange, + float shiftedX, + float shiftedY, + float shiftedZ, + float noShiftedDphi, + float noShiftedDPhiChange, + unsigned int idx) { + ModuleMDData mod; + mod.lowerModuleIndex = lowerModuleIdx; + mod.moduleType = modules.moduleType()[lowerModuleIdx]; + mod.moduleLayerType = modules.moduleLayerType()[lowerModuleIdx]; + mod.subdet = modules.subdets()[lowerModuleIdx]; + mod.isEndcapTwoS = (mod.subdet == Endcap && mod.moduleType == TwoS); + addMDToMemory(acc, + mds, + hitsBase, + hitsExtended, + mod, + lowerHitIdx, + upperHitIdx, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDPhiChange, + idx); + } + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isTighterTiltedModules(ModulesConst modules, uint16_t moduleIndex) { // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing // This is the same as what was previously considered as"isNormalTiltedModules" @@ -132,76 +204,28 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } template - ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold(TAcc const& acc, - float rt, - ModulesConst modules, - uint16_t moduleIndex, - const float ptCut, - float dPhi = 0, - float dz = 0) { - // ================================================================= - // Various constants - // ================================================================= - //mean of the horizontal layer position in y; treat this as R below - - // ================================================================= - // Computing some components that make up the cut threshold - // ================================================================= - - unsigned int iL = modules.layers()[moduleIndex] - 1; + ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold( + TAcc const& acc, float rt, ModuleMDData const& mod, const float ptCut, float dPhi = 0, float dz = 0) { const float miniSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - const float rLayNominal = - ((modules.subdets()[moduleIndex] == Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); - const float miniPVoff = 0.1f / rLayNominal; - const float miniMuls = ((modules.subdets()[moduleIndex] == Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut - : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); - const bool isTilted = modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] != Center; - //the lower module is sent in irrespective of its layer type. We need to fetch the drdz properly - - float drdz; - if (isTilted) { - if (modules.moduleType()[moduleIndex] == PS and modules.moduleLayerType()[moduleIndex] == Strip) { - drdz = modules.drdzs()[moduleIndex]; - } else { - drdz = modules.drdzs()[modules.partnerModuleIndices()[moduleIndex]]; - } - } else { - drdz = 0; - } - const float miniTilt2 = ((isTilted) ? (0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdz * drdz) / - (1.f + drdz * drdz) / moduleGapSize(modules, moduleIndex) - : 0); - - // Compute luminous region requirement for endcap - const float miniLum = alpaka::math::abs(acc, dPhi * kDeltaZLum / dz); // Balaji's new error - - // ================================================================= - // Return the threshold value - // ================================================================= - // Following condition is met if the module is central and flatly lying - if (modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] == Center) { - return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff); + + // Barrel flat: no tilt or luminous region correction + if (mod.subdet == Barrel and mod.side == Center) { + return miniSlope + mod.sqrtTermFlat; } - // Following condition is met if the module is central and tilted - else if (modules.subdets()[moduleIndex] == Barrel and - modules.sides()[moduleIndex] != Center) //all types of tilted modules - { - return miniSlope + - alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniTilt2 * miniSlope * miniSlope); + // Barrel tilted + else if (mod.subdet == Barrel) { + return miniSlope + alpaka::math::sqrt(acc, mod.sqrtTermBase + mod.miniTilt2 * miniSlope * miniSlope); } - // If not barrel, it is Endcap + // Endcap: luminous region correction else { - return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniLum * miniLum); + const float miniLum = alpaka::math::abs(acc, dPhi * kDeltaZLum / dz); + return miniSlope + alpaka::math::sqrt(acc, mod.sqrtTermBase + miniLum * miniLum); } } template ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc, - ModulesConst modules, - uint16_t lowerModuleIndex, - uint16_t upperModuleIndex, - unsigned int lowerHitIndex, - unsigned int upperHitIndex, + ModuleMDData const& mod, float* shiftedCoords, float xLower, float yLower, @@ -219,44 +243,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { // This means that there may be very very subtle edge effects coming from whether the strip hit is center of the module or the at the edge of the module // But this should be relatively minor effect - // dependent variables for this if statement - // lowerModule - // lowerHit - // upperHit - // endcapGeometry - // tiltedGeometry - - // Some variables relevant to the function - float xp; // pixel x (pixel hit x) - float yp; // pixel y (pixel hit y) - float zp; // pixel y (pixel hit y) - float rtp; // pixel y (pixel hit y) - float xa; // "anchor" x (the anchor position on the strip module plane from pixel hit) - float ya; // "anchor" y (the anchor position on the strip module plane from pixel hit) - float xo; // old x (before the strip hit is moved up or down) - float yo; // old y (before the strip hit is moved up or down) - float xn; // new x (after the strip hit is moved up or down) - float yn; // new y (after the strip hit is moved up or down) - float abszn; // new z in absolute value - float zn; // new z with the sign (+/-) accounted - float angleA; // in r-z plane the theta of the pixel hit in polar coordinate is the angleA - float angleB; // this is the angle of tilted module in r-z plane ("drdz"), for endcap this is 90 degrees - bool isEndcap; // If endcap, drdz = infinity - float moduleSeparation; - float drprime; // The radial shift size in x-y plane projection - float drprime_x; // x-component of drprime - float drprime_y; // y-component of drprime - const float& slope = - modules.dxdys()[lowerModuleIndex]; // The slope of the possible strip hits for a given module in x-y plane - float absArctanSlope; - float angleM; // the angle M is the angle of rotation of the module in x-y plane if the possible strip hits are along the x-axis, then angleM = 0, and if the possible strip hits are along y-axis angleM = 90 degrees - float absdzprime; // The distance between the two points after shifting - const float& drdz_ = modules.drdzs()[lowerModuleIndex]; - // Assign hit pointers based on their hit type + float xp; // pixel x (pixel hit x) + float yp; // pixel y (pixel hit y) + float zp; // pixel z + float rtp; // pixel rt + float xo; // old x (before the strip hit is moved up or down) + float yo; // old y (before the strip hit is moved up or down) bool pHitInverted = false; - if (modules.moduleType()[lowerModuleIndex] == PS) { - // TODO: This is somewhat of an mystery.... somewhat confused why this is the case - if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + if (mod.moduleType == PS) { + if (mod.moduleLayerType == Pixel) { xo = xUpper; yo = yUpper; xp = xLower; @@ -281,82 +276,70 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { rtp = rtLower; } - // If it is endcap some of the math gets simplified (and also computers don't like infinities) - isEndcap = modules.subdets()[lowerModuleIndex] == Endcap; + const bool isEndcap = (mod.subdet == Endcap); - // NOTE: TODO: Keep in mind that the sin(atan) function can be simplified to something like x / sqrt(1 + x^2) and similar for cos - // I am not sure how slow sin, atan, cos, functions are in c++. If x / sqrt(1 + x^2) are faster change this later to reduce arithmetic computation time - angleA = alpaka::math::abs(acc, alpaka::math::atan(acc, rtp / zp)); - angleB = - ((isEndcap) - ? kPi / 2.f - : alpaka::math::atan( - acc, - drdz_)); // The tilt module on the positive z-axis has negative drdz slope in r-z plane and vice versa + // Algebraic trig: sin(atan(r/z)) = r/hypot, cos(atan(r/z)) = |z|/hypot + const float hypot_rz = alpaka::math::sqrt(acc, rtp * rtp + zp * zp); + const float sinA = rtp / hypot_rz; + const float cosA = alpaka::math::abs(acc, zp) / hypot_rz; + + // sin(A+B) via angle-addition identity; endcap: B=pi/2 so sin(A+pi/2)=cosA + // The tilt module on the positive z-axis has negative drdz slope in r-z plane and vice versa + float sinApB; + if (isEndcap) { + sinApB = cosA; + } else { + const float inv_hypot_drdz = 1.f / alpaka::math::sqrt(acc, 1.f + mod.drdz * mod.drdz); + sinApB = sinA * inv_hypot_drdz + cosA * mod.drdz * inv_hypot_drdz; + } - moduleSeparation = moduleGapSize(modules, lowerModuleIndex); + float moduleSeparation = mod.moduleSep; // Sign flips if the pixel is later layer - if (modules.isGloballyInner()[lowerModuleIndex] == pHitInverted) { + if (mod.isGloballyInner == pHitInverted) { moduleSeparation *= -1; } - drprime = (moduleSeparation / alpaka::math::sin(acc, angleA + angleB)) * alpaka::math::sin(acc, angleA); + float drprime = moduleSeparation * sinA / sinApB; - // Compute arctan of the slope and take care of the slope = infinity case - absArctanSlope = - ((slope != kVerticalModuleSlope && edm::isFinite(slope)) ? fabs(alpaka::math::atan(acc, slope)) : kPi / 2.f); - - // Depending on which quadrant the pixel hit lies, we define the angleM by shifting them slightly differently - if (xp > 0 and yp > 0) { - angleM = absArctanSlope; - } else if (xp > 0 and yp < 0) { - angleM = kPi - absArctanSlope; - } else if (xp < 0 and yp < 0) { - angleM = kPi + absArctanSlope; - } else // if (xp < 0 and yp > 0) - { - angleM = 2.f * kPi - absArctanSlope; + // Algebraic: sin(atan(slope)) = |slope|/sqrt(1+slope^2), cos(atan(slope)) = 1/sqrt(1+slope^2) + float drprime_x, drprime_y; + const float slope = mod.slope; + if (slope != kVerticalModuleSlope && edm::isFinite(slope)) { + const float inv_hypot_slope = 1.f / alpaka::math::sqrt(acc, 1.f + slope * slope); + drprime_x = drprime * ((xp > 0.f) - (xp < 0.f)) * alpaka::math::abs(acc, slope) * inv_hypot_slope; + drprime_y = drprime * ((yp > 0.f) - (yp < 0.f)) * inv_hypot_slope; + } else { + drprime_x = drprime * ((xp > 0.f) - (xp < 0.f)); + drprime_y = 0.f; } - // Then since the angleM sign is taken care of properly - drprime_x = drprime * alpaka::math::sin(acc, angleM); - drprime_y = drprime * alpaka::math::cos(acc, angleM); + float xa = xp + drprime_x; // anchor x (the guessed position on the strip module plane) + float ya = yp + drprime_y; // anchor y - // The new anchor position is - xa = xp + drprime_x; - ya = yp + drprime_y; - - // Compute the new strip hit position (if the slope value is in special condition take care of the exceptions) - if (slope == kVerticalModuleSlope || - edm::isNotFinite(slope)) // Designated for tilted module when the slope is infinity (module lying along y-axis) - { - xn = xa; // New x point is simply where the anchor is - yn = yo; // No shift in y + // Compute the new strip hit position (handle slope = infinity and slope = 0 cases) + float xn, yn; + if (slope == kVerticalModuleSlope || edm::isNotFinite(slope)) { + xn = xa; + yn = yo; } else if (slope == 0) { - xn = xo; // New x point is simply where the anchor is - yn = ya; // No shift in y + xn = xo; + yn = ya; } else { - xn = (slope * xa + (1.f / slope) * xo - ya + yo) / (slope + (1.f / slope)); // new xn - yn = (xn - xa) * slope + ya; // new yn + xn = (slope * xa + (1.f / slope) * xo - ya + yo) / (slope + (1.f / slope)); + yn = (xn - xa) * slope + ya; } - // Computing new Z position - absdzprime = alpaka::math::abs( - acc, - moduleSeparation / alpaka::math::sin(acc, angleA + angleB) * - alpaka::math::cos( - acc, - angleA)); // module separation sign is for shifting in radial direction for z-axis direction take care of the sign later + float absdzprime = alpaka::math::abs(acc, moduleSeparation * cosA / sinApB); - // Depending on which one as closer to the interactin point compute the new z wrt to the pixel properly - if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + float abszn; + if (mod.moduleLayerType == Pixel) { abszn = alpaka::math::abs(acc, zp) + absdzprime; } else { abszn = alpaka::math::abs(acc, zp) - absdzprime; } - zn = abszn * ((zp > 0) ? 1 : -1); // Apply the sign of the zn + float zn = abszn * ((zp > 0) ? 1 : -1); shiftedCoords[0] = xn; shiftedCoords[1] = yn; @@ -364,127 +347,114 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } template - ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, - ModulesConst modules, - uint16_t lowerModuleIndex, - uint16_t upperModuleIndex, - unsigned int lowerHitIndex, - unsigned int upperHitIndex, - float& dz, - float& dPhi, - float& dPhiChange, - float& shiftedX, - float& shiftedY, - float& shiftedZ, - float& noShiftedDphi, - float& noShiftedDphiChange, - float xLower, - float yLower, - float zLower, - float rtLower, - float xUpper, - float yUpper, - float zUpper, - float rtUpper, - const float ptCut) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, + ModuleMDData const& mod, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper, + const float ptCut) { dz = zLower - zUpper; - const float dzCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f; + const float dzCut = mod.moduleType == PS ? 2.f : 10.f; const float sign = ((dz > 0) - (dz < 0)) * ((zLower > 0) - (zLower < 0)); const float invertedcrossercut = (alpaka::math::abs(acc, dz) > 2) * sign; - if ((alpaka::math::abs(acc, dz) >= dzCut) || (invertedcrossercut > 0)) + if ((alpaka::math::abs(acc, dz) >= dzCut) || (invertedcrossercut > 0)) { return false; + } - float miniCut = 0; - - miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel - ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex, ptCut) - : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex, ptCut); + float miniCut = mod.moduleLayerType == Pixel ? dPhiThreshold(acc, rtLower, mod, ptCut) + : dPhiThreshold(acc, rtUpper, mod, ptCut); - // Cut #2: dphi difference - // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085 - float xn = 0.f, yn = 0.f; + float x1, y1, x2, y2, r1sq, r2sq; float shiftedRt2 = 0.f; - if (modules.sides()[lowerModuleIndex] != Center) // If barrel and not center it is tilted - { - // Shift the hits and calculate new xn, yn position + + if (mod.isTilted) { float shiftedCoords[3]; - shiftStripHits(acc, - modules, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, - shiftedCoords, - xLower, - yLower, - zLower, - rtLower, - xUpper, - yUpper, - zUpper, - rtUpper); - xn = shiftedCoords[0]; - yn = shiftedCoords[1]; - - // Lower or the upper hit needs to be modified depending on which one was actually shifted - if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + shiftStripHits(acc, mod, shiftedCoords, xLower, yLower, zLower, rtLower, xUpper, yUpper, zUpper, rtUpper); + float xn = shiftedCoords[0]; + float yn = shiftedCoords[1]; + shiftedRt2 = xn * xn + yn * yn; + + if (mod.moduleLayerType == Pixel) { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; - shiftedRt2 = xn * xn + yn * yn; - - dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); //function from Hit.cc - noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + x1 = xLower; + y1 = yLower; + x2 = xn; + y2 = yn; + r1sq = rtLower * rtLower; + r2sq = shiftedRt2; } else { shiftedX = xn; shiftedY = yn; shiftedZ = zLower; - shiftedRt2 = xn * xn + yn * yn; - dPhi = cms::alpakatools::deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); - noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + x1 = xn; + y1 = yn; + x2 = xUpper; + y2 = yUpper; + r1sq = shiftedRt2; + r2sq = rtUpper * rtUpper; } } else { shiftedX = 0.f; shiftedY = 0.f; shiftedZ = 0.f; - shiftedRt2 = 0.f; - dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); - noShiftedDphi = dPhi; + x1 = xLower; + y1 = yLower; + x2 = xUpper; + y2 = yUpper; + r1sq = rtLower * rtLower; + r2sq = rtUpper * rtUpper; } + // Cross-product pre-checks: Pade [2,2] approximant overestimates tan^2(miniCut) + const float crossDP = x1 * y2 - x2 * y1; + const float dotDP = x1 * x2 + y1 * y2; + const float miniCutSq = miniCut * miniCut; + const float tanMiniCutSq = miniCutSq / (1.f - (2.f / 3.f) * miniCutSq); + if (dotDP <= 0.f || crossDP * crossDP >= tanMiniCutSq * dotDP * dotDP) + return false; + + const float rInnerSq = alpaka::math::min(acc, r1sq, r2sq); + const float dotDC = dotDP - rInnerSq; + if (dotDC <= 0.f || crossDP * crossDP >= tanMiniCutSq * dotDC * dotDC) + return false; + + // Cut #2: dphi difference + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085 + dPhi = alpaka::math::atan2(acc, crossDP, dotDP); + noShiftedDphi = mod.isTilted ? cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper) : dPhi; + if (alpaka::math::abs(acc, dPhi) >= miniCut) return false; // Cut #3: The dphi change going from lower Hit to upper Hit // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3076 - if (modules.sides()[lowerModuleIndex] != Center) { - // When it is tilted, use the new shifted positions - if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { - // dPhi Change should be calculated so that the upper hit has higher rt. - // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. - // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) - // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) - // setdeltaPhiChange(lowerHit.rt() < upperHitMod.rt() ? lowerHit.deltaPhiChange(upperHitMod) : upperHitMod.deltaPhiChange(lowerHit)); - - dPhiChange = (rtLower * rtLower < shiftedRt2) ? deltaPhiChange(acc, xLower, yLower, shiftedX, shiftedY) - : deltaPhiChange(acc, shiftedX, shiftedY, xLower, yLower); - noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) - : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); - } else { - // dPhi Change should be calculated so that the upper hit has higher rt. - // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. - // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) - // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) - - dPhiChange = (shiftedRt2 < rtUpper * rtUpper) ? deltaPhiChange(acc, shiftedX, shiftedY, xUpper, yUpper) - : deltaPhiChange(acc, xUpper, yUpper, shiftedX, shiftedY); - noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) - : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); - } + // dPhiChange should be calculated so that the upper hit has higher rt. + // The strip hit shifting should guarantee rt ordering, but we check explicitly for safety. + if (r1sq < r2sq) { + dPhiChange = alpaka::math::atan2(acc, crossDP, dotDP - r1sq); + } else { + dPhiChange = alpaka::math::atan2(acc, -crossDP, dotDP - r2sq); + } + if (mod.isTilted) { + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); } else { - // When it is flat lying module, whichever is the lowerSide will always have rt lower - dPhiChange = deltaPhiChange(acc, xLower, yLower, xUpper, yUpper); noShiftedDphiChange = dPhiChange; } @@ -492,154 +462,168 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } template - ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, - ModulesConst modules, - uint16_t lowerModuleIndex, - uint16_t upperModuleIndex, - unsigned int lowerHitIndex, - unsigned int upperHitIndex, - float& drt, - float& dPhi, - float& dPhiChange, - float& shiftedX, - float& shiftedY, - float& shiftedZ, - float& noShiftedDphi, - float& noShiftedDphichange, - float xLower, - float yLower, - float zLower, - float rtLower, - float xUpper, - float yUpper, - float zUpper, - float rtUpper, - const float ptCut) { - // There are series of cuts that applies to mini-doublet in a "endcap" region - // Cut #1 : dz cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, + ModuleMDData const& mod, + float& drt, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphichange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper, + const float ptCut) { + // Cut #1: dz cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3093 // For PS module in case when it is tilted a different dz (after the strip hit shift) is calculated later. - float dz = zLower - zUpper; // Not const since later it might change depending on the type of module const float dzCut = 1.f; - if (alpaka::math::abs(acc, dz) >= dzCut) + if (alpaka::math::abs(acc, dz) >= dzCut) { return false; - // Cut #2 : drt cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) + } + // Cut #2: drt cut. The drt difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3100 - const float drtCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f; + const float drtCut = mod.moduleType == PS ? 2.f : 10.f; drt = rtLower - rtUpper; - if (alpaka::math::abs(acc, drt) >= drtCut) + if (alpaka::math::abs(acc, drt) >= drtCut) { return false; - // The new scheme shifts strip hits to be "aligned" along the line of sight from interaction point to the pixel hit (if it is PS modules) + } float xn = 0, yn = 0, zn = 0; float shiftedCoords[3]; - shiftStripHits(acc, - modules, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, - shiftedCoords, - xLower, - yLower, - zLower, - rtLower, - xUpper, - yUpper, - zUpper, - rtUpper); + shiftStripHits(acc, mod, shiftedCoords, xLower, yLower, zLower, rtLower, xUpper, yUpper, zUpper, rtUpper); xn = shiftedCoords[0]; yn = shiftedCoords[1]; zn = shiftedCoords[2]; - if (modules.moduleType()[lowerModuleIndex] == PS) { - // Appropriate lower or upper hit is modified after checking which one was actually shifted - if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + float dpX1, dpY1, dpX2, dpY2; + if (mod.moduleType == PS) { + if (mod.moduleLayerType == Pixel) { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; - dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); - noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dpX1 = xLower; + dpY1 = yLower; + dpX2 = xn; + dpY2 = yn; } else { shiftedX = xn; shiftedY = yn; shiftedZ = zLower; - dPhi = cms::alpakatools::deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); - noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dpX1 = xn; + dpY1 = yn; + dpX2 = xUpper; + dpY2 = yUpper; } } else { shiftedX = xn; shiftedY = yn; shiftedZ = zUpper; - dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xn, yn); - noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); + dpX1 = xLower; + dpY1 = yLower; + dpX2 = xn; + dpY2 = yn; + } + + const float crossDP = dpX1 * dpY2 - dpX2 * dpY1; + const float dotDP = dpX1 * dpX2 + dpY1 * dpY2; + + // Algebraic dPhiChange pre-check using only arithmetic (looser than actual cut): + // - Underestimates |dPhi| via Taylor lower bound: atan(z) >= z - z^3/3 for z in [0,1] + // - Overestimates asin(x) via cubic upper bound: asin(x) <= x + 0.2*x^3 for x in [0,0.5] + // - Overestimates sqrt(A^2+B^2) via alpha-max-beta-min: <= max(A,B) + 0.415*min(A,B) + // where 0.415 > sqrt(2)-1 = 0.4142. + if (dotDP > 0.f) { + float absCrossDP = alpaka::math::abs(acc, crossDP); + + if (absCrossDP >= dotDP) + return false; + + float z = absCrossDP / dotDP; + float approxDPhiLower = z * (1.f - 0.33333333f * z * z); + + float rt = mod.moduleLayerType == Pixel ? rtLower : rtUpper; + float x = alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax); + float fast_miniSlope = x * (1.f + 0.2f * x * x); + + float miniLum = z / alpaka::math::abs(acc, dz) * kDeltaZLum; + float maxAB = alpaka::math::max(acc, mod.sqrtTermFlat, miniLum); + float minAB = alpaka::math::min(acc, mod.sqrtTermFlat, miniLum); + float fast_miniCutEC = fast_miniSlope + maxAB + 0.415f * minAB; + + if (approxDPhiLower >= fast_miniCutEC) + return false; + } else { + return false; } + // Cut #3: dphi + dPhi = alpaka::math::atan2(acc, crossDP, dotDP); + // dz needs to change if it is a PS module where the strip hits are shifted in order to properly account for the case when a tilted module falls under "endcap logic" // if it was an endcap it will have zero effect - if (modules.moduleType()[lowerModuleIndex] == PS) { - dz = modules.moduleLayerType()[lowerModuleIndex] == Pixel ? zLower - zn : zUpper - zn; + if (mod.moduleType == PS) { + dz = mod.moduleLayerType == Pixel ? zLower - zn : zUpper - zn; } - float miniCut = 0; - miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel - ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex, ptCut, dPhi, dz) - : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex, ptCut, dPhi, dz); + float miniCut = mod.moduleLayerType == Pixel ? dPhiThreshold(acc, rtLower, mod, ptCut, dPhi, dz) + : dPhiThreshold(acc, rtUpper, mod, ptCut, dPhi, dz); - if (alpaka::math::abs(acc, dPhi) >= miniCut) + if (alpaka::math::abs(acc, dPhi) >= miniCut) { return false; + } - // Cut #4: Another cut on the dphi after some modification + // Cut #4: dPhiChange // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3119-L3124 float dzFrac = alpaka::math::abs(acc, dz) / alpaka::math::abs(acc, zLower); dPhiChange = dPhi / dzFrac * (1.f + dzFrac); + noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper); noShiftedDphichange = noShiftedDphi / dzFrac * (1.f + dzFrac); return alpaka::math::abs(acc, dPhiChange) < miniCut; } template - ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, - ModulesConst modules, - uint16_t lowerModuleIndex, - uint16_t upperModuleIndex, - unsigned int lowerHitIndex, - unsigned int upperHitIndex, - float& dz, - float& dPhi, - float& dPhiChange, - float& shiftedX, - float& shiftedY, - float& shiftedZ, - float& noShiftedDphi, - float& noShiftedDphiChange, - float xLower, - float yLower, - float zLower, - float rtLower, - float xUpper, - float yUpper, - float zUpper, - float rtUpper, - const float ptCut, - uint16_t clustSizeLower, - uint16_t clustSizeUpper, - const uint16_t clustSizeCut) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runMiniDoubletDefaultAlgo(TAcc const& acc, + ModuleMDData const& mod, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper, + const float ptCut, + uint16_t clustSizeLower, + uint16_t clustSizeUpper, + const uint16_t clustSizeCut) { if (clustSizeLower > clustSizeCut or clustSizeUpper > clustSizeCut) { return false; } - if (modules.subdets()[lowerModuleIndex] == Barrel) { + if (mod.subdet == Barrel) { return runMiniDoubletDefaultAlgoBarrel(acc, - modules, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, + mod, dz, dPhi, dPhiChange, @@ -659,11 +643,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { ptCut); } else { return runMiniDoubletDefaultAlgoEndcap(acc, - modules, - lowerModuleIndex, - upperModuleIndex, - lowerHitIndex, - upperHitIndex, + mod, dz, dPhi, dPhiChange, @@ -696,7 +676,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float ptCut, const uint16_t clustSizeCut) const { for (uint16_t lowerModuleIndex : cms::alpakatools::uniform_elements_y(acc, modules.nLowerModules())) { - uint16_t upperModuleIndex = modules.partnerModuleIndices()[lowerModuleIndex]; int nLowerHits = hitsRanges.hitRangesnLower()[lowerModuleIndex]; int nUpperHits = hitsRanges.hitRangesnUpper()[lowerModuleIndex]; if (hitsRanges.hitRangesLower()[lowerModuleIndex] == -1) @@ -705,6 +684,42 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int loHitArrayIndex = hitsRanges.hitRangesLower()[lowerModuleIndex]; int limit = nUpperHits * nLowerHits; + // Hoist module-constant data once per module to avoid redundant SoA loads per hit pair. + ModuleMDData mod; + mod.lowerModuleIndex = lowerModuleIndex; + mod.subdet = modules.subdets()[lowerModuleIndex]; + mod.side = modules.sides()[lowerModuleIndex]; + mod.moduleType = modules.moduleType()[lowerModuleIndex]; + mod.moduleLayerType = modules.moduleLayerType()[lowerModuleIndex]; + mod.iL = modules.layers()[lowerModuleIndex] - 1; + mod.isTilted = (mod.subdet == Barrel && mod.side != Center); + mod.isEndcapTwoS = (mod.subdet == Endcap && mod.moduleType == TwoS); + mod.isGloballyInner = modules.isGloballyInner()[lowerModuleIndex]; + mod.slope = modules.dxdys()[lowerModuleIndex]; + mod.drdz = modules.drdzs()[lowerModuleIndex]; + mod.moduleSep = moduleGapSize(modules, lowerModuleIndex); + + // Pre-compute dPhiThreshold module-constant parts + float rLayNominal = (mod.subdet == Barrel) ? kMiniRminMeanBarrel[mod.iL] : kMiniRminMeanEndcap[mod.iL]; + mod.miniPVoff = 0.1f / rLayNominal; + mod.miniMuls = (mod.subdet == Barrel) ? kMiniMulsPtScaleBarrel[mod.iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[mod.iL] * 3.f / ptCut; + mod.sqrtTermBase = mod.miniMuls * mod.miniMuls + mod.miniPVoff * mod.miniPVoff; + mod.sqrtTermFlat = alpaka::math::sqrt(acc, mod.sqrtTermBase); + + if (mod.isTilted) { + float drdzThresh; + if (mod.moduleType == PS and mod.moduleLayerType == Strip) { + drdzThresh = modules.drdzs()[lowerModuleIndex]; + } else { + drdzThresh = modules.drdzs()[modules.partnerModuleIndices()[lowerModuleIndex]]; + } + mod.miniTilt2 = 0.25f * (kPixelPSZpitch * kPixelPSZpitch) * (drdzThresh * drdzThresh) / + (1.f + drdzThresh * drdzThresh) / mod.moduleSep; + } else { + mod.miniTilt2 = 0.f; + } + for (int hitIndex : cms::alpakatools::uniform_elements_x(acc, limit)) { int lowerHitIndex = hitIndex / nUpperHits; int upperHitIndex = hitIndex % nUpperHits; @@ -727,11 +742,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dz, dphi, dphichange, shiftedX, shiftedY, shiftedZ, noShiftedDphi, noShiftedDphiChange; bool success = runMiniDoubletDefaultAlgo(acc, - modules, - lowerModuleIndex, - upperModuleIndex, - lowerHitArrayIndex, - upperHitArrayIndex, + mod, dz, dphi, dphichange, @@ -769,10 +780,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds, hitsBase, hitsExtended, - modules, + mod, lowerHitArrayIndex, upperHitArrayIndex, - lowerModuleIndex, dz, dphi, dphichange, diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h index d58917c81dee3..8e12327f5bcb6 100644 --- a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h @@ -43,7 +43,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE float sigmoid_activation(TAcc const& acc, const float x) { - return alpaka::math::exp(acc, x) / (alpaka::math::exp(acc, x) + 1.f); + const float e = alpaka::math::exp(acc, x); + return e / (e + 1.f); } template diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h index e81b3bff105a8..d137889a98fbf 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -493,15 +493,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float pixelRadiusTemp, tripletRadius, rPhiChiSquaredTemp, rzChiSquaredTemp, rPhiChiSquaredInwardsTemp, centerXTemp, centerYTemp, pixelRadiusErrorTemp; + PixelSeedData pixelData = + loadPixelSeedData(pixelSeeds, pixelSegments, mds, segments, pixelSegmentIndex, pixelSegmentArrayIndex); + if (not runPixelTripletDefaultAlgo(acc, modules, - ranges, mds, segments, - pixelSeeds, - pixelSegments, + pixelData, triplets, - pixelSegmentIndex, t5InnerT3Index, pixelRadiusTemp, tripletRadius, @@ -520,9 +520,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { unsigned int secondSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][1]; unsigned int thirdSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][0]; unsigned int fourthSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][1]; - - unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; - unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0]; unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0]; unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1]; @@ -538,10 +535,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t lowerModuleIndices[Params_T5::kLayers] = { lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5}; - float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]}; - float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; - float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; - float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]}; + float rtPix[Params_pLS::kLayers] = {pixelData.rt_InLo, pixelData.rt_InUp}; + float xPix[Params_pLS::kLayers] = {pixelData.x_InLo, pixelData.x_InUp}; + float yPix[Params_pLS::kLayers] = {pixelData.y_InLo, pixelData.y_InUp}; + float zPix[Params_pLS::kLayers] = {pixelData.z_InLo, pixelData.z_InUp}; float zs[Params_T5::kLayers] = {mds.anchorZ()[firstMDIndex], mds.anchorZ()[secondMDIndex], mds.anchorZ()[thirdMDIndex], @@ -553,16 +550,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorRt()[fourthMDIndex], mds.anchorRt()[fifthMDIndex]}; - float pixelSegmentPt = pixelSeeds.ptIn()[pixelSegmentArrayIndex]; - float pixelSegmentPx = pixelSeeds.px()[pixelSegmentArrayIndex]; - float pixelSegmentPy = pixelSeeds.py()[pixelSegmentArrayIndex]; - float pixelSegmentPz = pixelSeeds.pz()[pixelSegmentArrayIndex]; - int pixelSegmentCharge = pixelSeeds.charge()[pixelSegmentArrayIndex]; - rzChiSquared = 0; //get the appropriate centers - pixelRadius = pixelSegments.circleRadius()[pixelSegmentArrayIndex]; + pixelRadius = pixelData.circleRadius; rzChiSquared = computePT5RZChiSquared(acc, modules, @@ -573,11 +564,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { zPix, rts, zs, - pixelSegmentPt, - pixelSegmentPx, - pixelSegmentPy, - pixelSegmentPz, - pixelSegmentCharge); + pixelData.ptIn, + pixelData.px, + pixelData.py, + pixelData.pz, + pixelData.charge); if (pixelRadius < 5.0f * kR1GeVf) { //only apply r-z chi2 cuts for <5GeV tracks if (not passPT5RZChiSquaredCuts(modules, @@ -603,8 +594,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorY()[fifthMDIndex]}; //get the appropriate centers - centerX = pixelSegments.circleCenterX()[pixelSegmentArrayIndex]; - centerY = pixelSegments.circleCenterY()[pixelSegmentArrayIndex]; + centerX = pixelData.circleCenterX; + centerY = pixelData.circleCenterY; float T5CenterX = quintuplets.regressionCenterX()[quintupletIndex]; float T5CenterY = quintuplets.regressionCenterY()[quintupletIndex]; diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h index 6dfa2358d568f..6f42b0c96f7b1 100644 --- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -15,37 +15,72 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + // Pre-loaded pixel segment data, invariant across triplets for a given pixel segment. + // Hoisted to avoid redundant SoA loads through the pT3 function chain. + struct PixelSeedData { + float ptIn, px, py, pz, ptErr, etaErr; + float eta, phi; + int charge; + float circleCenterX, circleCenterY, circleRadius; + float x_InLo, y_InLo, z_InLo, rt_InLo; // pixel inner MD (pLSMD0) + float x_InUp, y_InUp, z_InUp, rt_InUp; // pixel outer MD (pLSMD1) + float alpha_InLo; // pixel segment dPhiChange + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE PixelSeedData loadPixelSeedData(PixelSeedsConst pixelSeeds, + PixelSegmentsConst pixelSegments, + MiniDoubletsConst mds, + SegmentsConst segments, + unsigned int pixelSegmentIndex, + unsigned int pixelSegmentArrayIndex) { + unsigned int pixelMD0 = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelMD1 = segments.mdIndices()[pixelSegmentIndex][1]; + PixelSeedData pixelData; + pixelData.ptIn = pixelSeeds.ptIn()[pixelSegmentArrayIndex]; + pixelData.px = pixelSeeds.px()[pixelSegmentArrayIndex]; + pixelData.py = pixelSeeds.py()[pixelSegmentArrayIndex]; + pixelData.pz = pixelSeeds.pz()[pixelSegmentArrayIndex]; + pixelData.ptErr = pixelSeeds.ptErr()[pixelSegmentArrayIndex]; + pixelData.etaErr = pixelSeeds.etaErr()[pixelSegmentArrayIndex]; + pixelData.eta = pixelSeeds.eta()[pixelSegmentArrayIndex]; + pixelData.phi = pixelSeeds.phi()[pixelSegmentArrayIndex]; + pixelData.charge = pixelSeeds.charge()[pixelSegmentArrayIndex]; + pixelData.circleCenterX = pixelSegments.circleCenterX()[pixelSegmentArrayIndex]; + pixelData.circleCenterY = pixelSegments.circleCenterY()[pixelSegmentArrayIndex]; + pixelData.circleRadius = pixelSegments.circleRadius()[pixelSegmentArrayIndex]; + pixelData.x_InLo = mds.anchorX()[pixelMD0]; + pixelData.y_InLo = mds.anchorY()[pixelMD0]; + pixelData.z_InLo = mds.anchorZ()[pixelMD0]; + pixelData.rt_InLo = mds.anchorRt()[pixelMD0]; + pixelData.x_InUp = mds.anchorX()[pixelMD1]; + pixelData.y_InUp = mds.anchorY()[pixelMD1]; + pixelData.z_InUp = mds.anchorZ()[pixelMD1]; + pixelData.rt_InUp = mds.anchorRt()[pixelMD1]; + pixelData.alpha_InLo = __H2F(segments.dPhiChanges()[pixelSegmentIndex]); + return pixelData; + } + template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - uint16_t pixelModuleIndex, + const PixelSeedData& pixelData, uint16_t segmentInnerModuleIndex, uint16_t segmentOuterModuleIndex, - unsigned int pLSIndex, unsigned int segmentIndex, - unsigned int pLSMD0Index, - unsigned int pLSMD1Index, unsigned int segmentMD0Index, unsigned int segmentMD1Index, const float ptCut); template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - uint16_t pixelModuleIndex, + const PixelSeedData& pixelData, uint16_t segmentInnerModuleIndex, uint16_t segmentOuterModuleIndex, - unsigned int pLSIndex, unsigned int segmentIndex, - unsigned int pLSMD0Index, - unsigned int pLSMD1Index, unsigned int segmentMD0Index, unsigned int segmentMD1Index, const float ptCut); @@ -124,59 +159,43 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTrackletDefaultAlgopT3(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - uint16_t pixelLowerModuleIndex, + const PixelSeedData& pixelData, uint16_t outerInnerLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, - unsigned int innerSegmentIndex, unsigned int outerSegmentIndex, const float ptCut) { short outerInnerLowerModuleSubdet = modules.subdets()[outerInnerLowerModuleIndex]; short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; - unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; - unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; - - unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][0]; - unsigned int fourthMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + unsigned int segmentMD0Index = segments.mdIndices()[outerSegmentIndex][0]; + unsigned int segmentMD1Index = segments.mdIndices()[outerSegmentIndex][1]; if (outerInnerLowerModuleSubdet == Barrel and (outerOuterLowerModuleSubdet == Barrel or outerOuterLowerModuleSubdet == Endcap)) { return runTripletDefaultAlgoPPBB(acc, modules, - ranges, mds, segments, - pixelSeeds, - pixelLowerModuleIndex, + pixelData, outerInnerLowerModuleIndex, outerOuterLowerModuleIndex, - innerSegmentIndex, outerSegmentIndex, - firstMDIndex, - secondMDIndex, - thirdMDIndex, - fourthMDIndex, + segmentMD0Index, + segmentMD1Index, ptCut); } else if (outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { return runTripletDefaultAlgoPPEE(acc, modules, - ranges, mds, segments, - pixelSeeds, - pixelLowerModuleIndex, + pixelData, outerInnerLowerModuleIndex, outerOuterLowerModuleIndex, - innerSegmentIndex, outerSegmentIndex, - firstMDIndex, - secondMDIndex, - thirdMDIndex, - fourthMDIndex, + segmentMD0Index, + segmentMD1Index, ptCut); } return false; @@ -537,13 +556,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTripletDefaultAlgo(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - PixelSegmentsConst pixelSegments, + const PixelSeedData& pixelData, TripletsConst triplets, - unsigned int pixelSegmentIndex, unsigned int tripletIndex, float& pixelRadius, float& tripletRadius, @@ -556,63 +572,47 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float ptCut, bool runDNN = true, bool runChiSquaredCuts = true) { - //run pT4 compatibility between the pixel segment and inner segment, and between the pixel and outer segment of the triplet - uint16_t pixelModuleIndex = segments.innerLowerModuleIndices()[pixelSegmentIndex]; - uint16_t lowerModuleIndex = triplets.lowerModuleIndices()[tripletIndex][0]; uint16_t middleModuleIndex = triplets.lowerModuleIndices()[tripletIndex][1]; uint16_t upperModuleIndex = triplets.lowerModuleIndices()[tripletIndex][2]; - { - // pixel segment vs inner segment of the triplet - if (not runPixelTrackletDefaultAlgopT3(acc, - modules, - ranges, - mds, - segments, - pixelSeeds, - pixelModuleIndex, - lowerModuleIndex, - middleModuleIndex, - pixelSegmentIndex, - triplets.segmentIndices()[tripletIndex][0], - ptCut)) - return false; - - //pixel segment vs outer segment of triplet - if (not runPixelTrackletDefaultAlgopT3(acc, - modules, - ranges, - mds, - segments, - pixelSeeds, - pixelModuleIndex, - middleModuleIndex, - upperModuleIndex, - pixelSegmentIndex, - triplets.segmentIndices()[tripletIndex][1], - ptCut)) - return false; - } + // Cheap radius check first, before expensive tracklet compatibility checks. + pixelRadius = pixelData.ptIn * kR1GeVf; + pixelRadiusError = pixelData.ptErr * kR1GeVf; + tripletRadius = triplets.radius()[tripletIndex]; - //pt matching between the pixel ptin and the triplet circle pt - unsigned int pixelSegmentArrayIndex = pixelSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; - float pixelSegmentPt = pixelSeeds.ptIn()[pixelSegmentArrayIndex]; - float pixelSegmentPtError = pixelSeeds.ptErr()[pixelSegmentArrayIndex]; - float pixelSegmentPx = pixelSeeds.px()[pixelSegmentArrayIndex]; - float pixelSegmentPy = pixelSeeds.py()[pixelSegmentArrayIndex]; - float pixelSegmentPz = pixelSeeds.pz()[pixelSegmentArrayIndex]; - int pixelSegmentCharge = pixelSeeds.charge()[pixelSegmentArrayIndex]; + if (not passRadiusCriterion(acc, + modules, + pixelRadius, + pixelRadiusError, + tripletRadius, + lowerModuleIndex, + middleModuleIndex, + upperModuleIndex)) + return false; - float pixelG = pixelSegments.circleCenterX()[pixelSegmentArrayIndex]; - float pixelF = pixelSegments.circleCenterY()[pixelSegmentArrayIndex]; - float pixelRadiusPCA = pixelSegments.circleRadius()[pixelSegmentArrayIndex]; + if (not runPixelTrackletDefaultAlgopT3(acc, + modules, + mds, + segments, + pixelData, + lowerModuleIndex, + middleModuleIndex, + triplets.segmentIndices()[tripletIndex][0], + ptCut)) + return false; - unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; - unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; + if (not runPixelTrackletDefaultAlgopT3(acc, + modules, + mds, + segments, + pixelData, + middleModuleIndex, + upperModuleIndex, + triplets.segmentIndices()[tripletIndex][1], + ptCut)) + return false; - pixelRadius = pixelSegmentPt * kR1GeVf; - pixelRadiusError = pixelSegmentPtError * kR1GeVf; unsigned int tripletInnerSegmentIndex = triplets.segmentIndices()[tripletIndex][0]; unsigned int tripletOuterSegmentIndex = triplets.segmentIndices()[tripletIndex][1]; @@ -625,20 +625,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float ys[Params_T3::kLayers] = { mds.anchorY()[firstMDIndex], mds.anchorY()[secondMDIndex], mds.anchorY()[thirdMDIndex]}; - float g, f; - tripletRadius = triplets.radius()[tripletIndex]; - g = triplets.centerX()[tripletIndex]; - f = triplets.centerY()[tripletIndex]; - - if (not passRadiusCriterion(acc, - modules, - pixelRadius, - pixelRadiusError, - tripletRadius, - lowerModuleIndex, - middleModuleIndex, - upperModuleIndex)) - return false; + float g = triplets.centerX()[tripletIndex]; + float f = triplets.centerY()[tripletIndex]; uint16_t lowerModuleIndices[Params_T3::kLayers] = {lowerModuleIndex, middleModuleIndex, upperModuleIndex}; @@ -647,10 +635,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { mds.anchorRt()[firstMDIndex], mds.anchorRt()[secondMDIndex], mds.anchorRt()[thirdMDIndex]}; float zs[Params_T3::kLayers] = { mds.anchorZ()[firstMDIndex], mds.anchorZ()[secondMDIndex], mds.anchorZ()[thirdMDIndex]}; - float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]}; - float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; - float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; - float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]}; + float rtPix[Params_pLS::kLayers] = {pixelData.rt_InLo, pixelData.rt_InUp}; + float xPix[Params_pLS::kLayers] = {pixelData.x_InLo, pixelData.x_InUp}; + float yPix[Params_pLS::kLayers] = {pixelData.y_InLo, pixelData.y_InUp}; + float zPix[Params_pLS::kLayers] = {pixelData.z_InLo, pixelData.z_InUp}; rzChiSquared = computePT3RZChiSquared(acc, modules, @@ -663,18 +651,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { xs, ys, zs, - pixelSegmentPt, - pixelSegmentPx, - pixelSegmentPy, - pixelSegmentPz, - pixelSegmentCharge); - if (runChiSquaredCuts && pixelSegmentPt < 5.0f) { + pixelData.ptIn, + pixelData.px, + pixelData.py, + pixelData.pz, + pixelData.charge); + if (runChiSquaredCuts && pixelData.ptIn < 5.0f) { if (!passPT3RZChiSquaredCuts(modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rzChiSquared)) return false; } - rPhiChiSquared = - computePT3RPhiChiSquared(acc, modules, lowerModuleIndices, pixelG, pixelF, pixelRadiusPCA, xs, ys); + rPhiChiSquared = computePT3RPhiChiSquared(acc, + modules, + lowerModuleIndices, + pixelData.circleCenterX, + pixelData.circleCenterY, + pixelData.circleRadius, + xs, + ys); rPhiChiSquaredInwards = computePT3RPhiChiSquaredInwards(g, f, tripletRadius, xPix, yPix); } @@ -691,8 +685,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelRadius, pixelRadiusError, rzChiSquared, - pixelSeeds.eta()[pixelSegmentArrayIndex], - pixelSegmentPt, + pixelData.eta, + pixelData.ptIn, module_type_3)) { return false; } @@ -719,6 +713,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { for (unsigned int i_pLS : cms::alpakatools::uniform_elements_z(acc, nPixelSegments)) { auto iLSModule_max = connectedPixelIndex[i_pLS] + connectedPixelSize[i_pLS]; + if (pixelSegments.isDup()[i_pLS]) + continue; + if (pixelSegments.partOfPT5()[i_pLS]) + continue; + + // Hoist pixel segment data: loaded once per pixel segment instead of + // redundantly through the runPixelTripletDefaultAlgo call chain. + uint16_t pixelModuleIndex = modules.nLowerModules(); + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS; + + PixelSeedData pixelData = loadPixelSeedData(pixelSeeds, pixelSegments, mds, segments, pixelSegmentIndex, i_pLS); + for (unsigned int iLSModule : cms::alpakatools::uniform_elements_y(acc, connectedPixelIndex[i_pLS], iLSModule_max)) { uint16_t tripletLowerModuleIndex = @@ -736,18 +742,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (modules.moduleType()[tripletLowerModuleIndex] == TwoS) continue; - uint16_t pixelModuleIndex = modules.nLowerModules(); unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[tripletLowerModuleIndex]; if (nOuterTriplets == 0) continue; - unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS; - - if (pixelSegments.isDup()[i_pLS]) - continue; - if (pixelSegments.partOfPT5()[i_pLS]) - continue; //don't make pT3s for those pixels that are part of pT5 - short layer2_adjustment; if (modules.layers()[tripletLowerModuleIndex] == 1) { layer2_adjustment = 1; @@ -773,13 +771,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelRadiusError; bool success = runPixelTripletDefaultAlgo(acc, modules, - ranges, mds, segments, - pixelSeeds, - pixelSegments, + pixelData, triplets, - pixelSegmentIndex, outerTripletIndex, pixelRadius, tripletRadius, @@ -798,9 +793,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float eta = mds.anchorEta()[segments .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]]; - float eta_pix = pixelSeeds.eta()[i_pLS]; - float phi_pix = pixelSeeds.phi()[i_pLS]; - float pt = pixelSeeds.ptIn()[i_pLS]; float score = rPhiChiSquared + rPhiChiSquaredInwards; unsigned int totOccupancyPixelTriplets = alpaka::atomicAdd(acc, &pixelTriplets.totOccupancyPixelTriplets(), 1u, alpaka::hierarchy::Threads{}); @@ -825,11 +817,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { rPhiChiSquaredInwards, rzChiSquared, pixelTripletIndex, - pt, + pixelData.ptIn, eta, phi, - eta_pix, - phi_pix, + pixelData.eta, + pixelData.phi, pixelRadiusError, score); triplets.partOfPT3()[outerTripletIndex] = true; @@ -844,17 +836,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - uint16_t pixelModuleIndex, + const PixelSeedData& pixelData, uint16_t segmentInnerModuleIndex, uint16_t segmentOuterModuleIndex, - unsigned int pLSIndex, unsigned int segmentIndex, - unsigned int pLSMD0Index, - unsigned int pLSMD1Index, unsigned int segmentMD0Index, unsigned int segmentMD1Index, const float ptCut) { @@ -862,34 +849,33 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { bool isPS_OutLo = (modules.moduleType()[segmentInnerModuleIndex] == PS); - float rt_InLo = mds.anchorRt()[pLSMD0Index]; - float rt_InUp = mds.anchorRt()[pLSMD1Index]; + const float rt_InLo = pixelData.rt_InLo; + const float rt_InUp = pixelData.rt_InUp; float rt_OutLo = mds.anchorRt()[segmentMD0Index]; float rt_OutUp = mds.anchorRt()[segmentMD1Index]; - float z_InUp = mds.anchorZ()[pLSMD1Index]; + const float z_InUp = pixelData.z_InUp; float z_OutLo = mds.anchorZ()[segmentMD0Index]; - float x_InLo = mds.anchorX()[pLSMD0Index]; - float x_InUp = mds.anchorX()[pLSMD1Index]; + const float x_InLo = pixelData.x_InLo; + const float x_InUp = pixelData.x_InUp; float x_OutLo = mds.anchorX()[segmentMD0Index]; float x_OutUp = mds.anchorX()[segmentMD1Index]; - float y_InLo = mds.anchorY()[pLSMD0Index]; - float y_InUp = mds.anchorY()[pLSMD1Index]; + const float y_InLo = pixelData.y_InLo; + const float y_InUp = pixelData.y_InUp; float y_OutLo = mds.anchorY()[segmentMD0Index]; float y_OutUp = mds.anchorY()[segmentMD1Index]; float rt_InOut = rt_InUp; - unsigned int pixelSegmentArrayIndex = pLSIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; - float ptIn = pixelSeeds.ptIn()[pixelSegmentArrayIndex]; + float ptIn = pixelData.ptIn; float ptSLo = ptIn; - float px = pixelSeeds.px()[pixelSegmentArrayIndex]; - float py = pixelSeeds.py()[pixelSegmentArrayIndex]; - float pz = pixelSeeds.pz()[pixelSegmentArrayIndex]; - float ptErr = pixelSeeds.ptErr()[pixelSegmentArrayIndex]; - float etaErr = pixelSeeds.etaErr()[pixelSegmentArrayIndex]; + float px = pixelData.px; + float py = pixelData.py; + float pz = pixelData.pz; + float ptErr = pixelData.ptErr; + float etaErr = pixelData.etaErr; ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); @@ -960,7 +946,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { //lots of array accesses below this... - float alpha_InLo = __H2F(segments.dPhiChanges()[pLSIndex]); + float alpha_InLo = pixelData.alpha_InLo; float alpha_OutLo = __H2F(segments.dPhiChanges()[segmentIndex]); bool isEC_lastLayer = @@ -1101,17 +1087,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, ModulesConst modules, - ObjectRangesConst ranges, MiniDoubletsConst mds, SegmentsConst segments, - PixelSeedsConst pixelSeeds, - uint16_t pixelModuleIndex, + const PixelSeedData& pixelData, uint16_t segmentInnerModuleIndex, uint16_t segmentOuterModuleIndex, - unsigned int pLSIndex, unsigned int segmentIndex, - unsigned int pLSMD0Index, - unsigned int pLSMD1Index, unsigned int segmentMD0Index, unsigned int segmentMD1Index, const float ptCut) { @@ -1119,36 +1100,34 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { bool isPS_OutLo = (modules.moduleType()[segmentInnerModuleIndex] == PS); - float z_InUp = mds.anchorZ()[pLSMD1Index]; + const float z_InUp = pixelData.z_InUp; float z_OutLo = mds.anchorZ()[segmentMD0Index]; if (z_InUp * z_OutLo <= 0) return false; - float rt_InLo = mds.anchorRt()[pLSMD0Index]; - float rt_InUp = mds.anchorRt()[pLSMD1Index]; + const float rt_InLo = pixelData.rt_InLo; + const float rt_InUp = pixelData.rt_InUp; float rt_OutLo = mds.anchorRt()[segmentMD0Index]; float rt_OutUp = mds.anchorRt()[segmentMD1Index]; - float x_InLo = mds.anchorX()[pLSMD0Index]; - float x_InUp = mds.anchorX()[pLSMD1Index]; + const float x_InLo = pixelData.x_InLo; + const float x_InUp = pixelData.x_InUp; float x_OutLo = mds.anchorX()[segmentMD0Index]; float x_OutUp = mds.anchorX()[segmentMD1Index]; - float y_InLo = mds.anchorY()[pLSMD0Index]; - float y_InUp = mds.anchorY()[pLSMD1Index]; + const float y_InLo = pixelData.y_InLo; + const float y_InUp = pixelData.y_InUp; float y_OutLo = mds.anchorY()[segmentMD0Index]; float y_OutUp = mds.anchorY()[segmentMD1Index]; - unsigned int pixelSegmentArrayIndex = pLSIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; - - float ptIn = pixelSeeds.ptIn()[pixelSegmentArrayIndex]; + float ptIn = pixelData.ptIn; float ptSLo = ptIn; - float px = pixelSeeds.px()[pixelSegmentArrayIndex]; - float py = pixelSeeds.py()[pixelSegmentArrayIndex]; - float pz = pixelSeeds.pz()[pixelSegmentArrayIndex]; - float ptErr = pixelSeeds.ptErr()[pixelSegmentArrayIndex]; - float etaErr = pixelSeeds.etaErr()[pixelSegmentArrayIndex]; + float px = pixelData.px; + float py = pixelData.py; + float pz = pixelData.pz; + float ptErr = pixelData.ptErr; + float etaErr = pixelData.etaErr; ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); @@ -1224,7 +1203,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (alpaka::math::abs(acc, dPhi) > dPhiCut) return false; - float alpha_InLo = __H2F(segments.dPhiChanges()[pLSIndex]); + float alpha_InLo = pixelData.alpha_InLo; float alpha_OutLo = __H2F(segments.dPhiChanges()[segmentIndex]); bool isEC_lastLayer = @@ -1343,10 +1322,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float dBetaROut2 = dBetaROut * dBetaROut; - betaOutCut = - alpaka::math::asin( - acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) //FIXME: need faster version - + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + //FIXME: need faster version + betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); //Cut #6: The real beta cut if (alpaka::math::abs(acc, betaOut) >= betaOutCut) diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h index 6437625761950..b839418cdd5c8 100644 --- a/RecoTracker/LSTCore/src/alpaka/Segment.h +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -85,10 +85,46 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return moduleSeparation; } + // Pre-loaded module data for segment creation, eliminating redundant SoA lookups + // in inner loops. Populated once per module (outer/middle loop level). + struct ModuleSegData { + short subdet; + short side; + short layer; + unsigned int iL; // layer - 1 + short moduleType; + float drdz; + float moduleGapSize; + bool isTilted; + float segMiniTilt2; // 0.25 * kPixelPSZpitch^2 * drdz^2 / (1+drdz^2) / gap^2; 0 if not tilted + float sdMuls; // kMiniMulsPtScale[iL] * 3 / ptCut + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE ModuleSegData loadModuleSegData(ModulesConst modules, + uint16_t moduleIndex, + const float ptCut) { + ModuleSegData mod; + mod.subdet = modules.subdets()[moduleIndex]; + mod.side = modules.sides()[moduleIndex]; + mod.layer = modules.layers()[moduleIndex]; + mod.iL = mod.layer - 1; + mod.moduleType = modules.moduleType()[moduleIndex]; + mod.drdz = modules.drdzs()[moduleIndex]; + mod.moduleGapSize = moduleGapSize_seg(modules, moduleIndex); + mod.isTilted = (mod.subdet == Barrel and mod.side != Center); + mod.segMiniTilt2 = mod.isTilted ? (0.25f * (kPixelPSZpitch * kPixelPSZpitch) * (mod.drdz * mod.drdz) / + (1.f + mod.drdz * mod.drdz) / (mod.moduleGapSize * mod.moduleGapSize)) + : 0.f; + mod.sdMuls = (mod.subdet == Barrel) ? kMiniMulsPtScaleBarrel[mod.iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[mod.iL] * 3.f / ptCut; + return mod; + } + template ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc, float* dAlphaThresholdValues, - ModulesConst modules, + ModuleSegData const& innerMod, + ModuleSegData const& outerMod, MiniDoubletsConst mds, float xIn, float yIn, @@ -98,14 +134,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float yOut, float zOut, float rtOut, - uint16_t innerLowerModuleIndex, - uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex, const float ptCut) { - float sdMuls = (modules.subdets()[innerLowerModuleIndex] == Barrel) - ? kMiniMulsPtScaleBarrel[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut - : kMiniMulsPtScaleEndcap[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut; + const float sdMuls = innerMod.sdMuls; //more accurate then outer rt - inner rt float segmentDr = alpaka::math::sqrt(acc, (yOut - yIn) * (yOut - yIn) + (xOut - xIn) * (xOut - xIn)); @@ -113,62 +145,40 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const float dAlpha_Bfield = alpaka::math::asin(acc, alpaka::math::min(acc, segmentDr * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - bool isInnerTilted = - modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] != Center; - bool isOuterTilted = - modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] != Center; - - float drdzInner = modules.drdzs()[innerLowerModuleIndex]; - float drdzOuter = modules.drdzs()[outerLowerModuleIndex]; - float innerModuleGapSize = moduleGapSize_seg(modules, innerLowerModuleIndex); - float outerModuleGapSize = moduleGapSize_seg(modules, outerLowerModuleIndex); - const float innerminiTilt2 = isInnerTilted - ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzInner * drdzInner) / - (1.f + drdzInner * drdzInner) / (innerModuleGapSize * innerModuleGapSize)) - : 0; - - const float outerminiTilt2 = isOuterTilted - ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzOuter * drdzOuter) / - (1.f + drdzOuter * drdzOuter) / (outerModuleGapSize * outerModuleGapSize)) - : 0; - - float miniDelta = innerModuleGapSize; - float sdLumForInnerMini2; float sdLumForOuterMini2; - if (modules.subdets()[innerLowerModuleIndex] == Barrel) { - sdLumForInnerMini2 = innerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); + if (innerMod.subdet == Barrel) { + sdLumForInnerMini2 = innerMod.segMiniTilt2 * (dAlpha_Bfield * dAlpha_Bfield); } else { sdLumForInnerMini2 = (mds.dphis()[innerMDIndex] * mds.dphis()[innerMDIndex]) * (kDeltaZLum * kDeltaZLum) / (mds.dzs()[innerMDIndex] * mds.dzs()[innerMDIndex]); } - if (modules.subdets()[outerLowerModuleIndex] == Barrel) { - sdLumForOuterMini2 = outerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); + if (outerMod.subdet == Barrel) { + sdLumForOuterMini2 = outerMod.segMiniTilt2 * (dAlpha_Bfield * dAlpha_Bfield); } else { sdLumForOuterMini2 = (mds.dphis()[outerMDIndex] * mds.dphis()[outerMDIndex]) * (kDeltaZLum * kDeltaZLum) / (mds.dzs()[outerMDIndex] * mds.dzs()[outerMDIndex]); } // Unique stuff for the segment dudes alone + const float miniDelta = innerMod.moduleGapSize; float dAlpha_res_inner = - 0.02f / miniDelta * - (modules.subdets()[innerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); + 0.02f / miniDelta * (innerMod.subdet == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); float dAlpha_res_outer = - 0.02f / miniDelta * - (modules.subdets()[outerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); + 0.02f / miniDelta * (outerMod.subdet == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); float dAlpha_res = dAlpha_res_inner + dAlpha_res_outer; - if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] == Center) { + if (innerMod.subdet == Barrel and innerMod.side == Center) { dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } else { dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForInnerMini2); } - if (modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] == Center) { + if (outerMod.subdet == Barrel and outerMod.side == Center) { dAlphaThresholdValues[1] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); } else { dAlphaThresholdValues[1] = @@ -340,10 +350,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passDeltaPhiCutsEndcap(TAcc const& acc, - ModulesConst modules, MiniDoubletsConst mds, - uint16_t innerLm, - uint16_t outerLm, unsigned int innerMD, unsigned int outerMD, const float dPhi, @@ -365,10 +372,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc, - ModulesConst modules, + ModuleSegData const& innerMod, + ModuleSegData const& outerMod, MiniDoubletsConst mds, - uint16_t innerLowerModuleIndex, - uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex, float& dPhi, @@ -401,10 +407,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { zOut = mds.anchorZ()[outerMDIndex]; rtOut = mds.anchorRt()[outerMDIndex]; - float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - float dzDrtScale = alpaka::math::tan(acc, sdSlope) / sdSlope; //FIXME: need appropriate value + const float sdSinBarrel = alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax); + float sdSlope = alpaka::math::asin(acc, sdSinBarrel); + // Exact: tan(asin(s))/asin(s) = s/(asin(s)*sqrt(1-s^2)), eliminates tan call + float dzDrtScale = sdSinBarrel / (sdSlope * alpaka::math::sqrt(acc, 1.f - sdSinBarrel * sdSinBarrel)); - const float zGeom = modules.layers()[innerLowerModuleIndex] <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; + const float zGeom = innerMod.layer <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; //slope-correction only on outer end zLo = zIn + (zIn - kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - zGeom; @@ -413,28 +421,44 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if ((zOut < zLo) || (zOut > zHi)) return false; + // Trig-free loose pre-check: sin(asin(s)+M) < s+M, so (s+M)^2 overestimates sin^2(sdCut). + // Rejects obvious failures before expensive anchorPhi reads + reducePhiRange. + { + const float cross = xIn * yOut - xOut * yIn; + const float dot = xIn * xOut + yIn * yOut; + if (dot <= 0.f) + return false; + const float sqrtTerm = alpaka::math::sqrt(acc, innerMod.sdMuls * innerMod.sdMuls + 0.01f / (rtOut * rtOut)); + const float looseCut = sdSinBarrel + sqrtTerm; + const float looseCutSq = looseCut * looseCut; + const float crossSq = cross * cross; + if (crossSq >= looseCutSq * (crossSq + dot * dot)) + return false; + const float dotChange = dot - (rtIn * rtIn); + if (dotChange <= 0.f || crossSq >= looseCutSq * (crossSq + dotChange * dotChange)) + return false; + } + dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); - if (!passDeltaPhiCutsBarrel(acc, - modules, - mds, - innerLowerModuleIndex, - outerLowerModuleIndex, - innerMDIndex, - outerMDIndex, - dPhi, - rtOut, - sdSlope, - ptCut)) + const float sdMuls = innerMod.sdMuls; + const float sdPVoff = 0.1f / rtOut; + const float sdCut = sdSlope + alpaka::math::sqrt(acc, sdMuls * sdMuls + sdPVoff * sdPVoff); + + if (alpaka::math::abs(acc, dPhi) > sdCut) return false; dPhiChange = cms::alpakatools::reducePhiRange( acc, cms::alpakatools::phi(acc, xOut - xIn, yOut - yIn) - mds.anchorPhi()[innerMDIndex]); + if (alpaka::math::abs(acc, dPhiChange) >= sdCut) + return false; + float dAlphaThresholdValues[3]; dAlphaThreshold(acc, dAlphaThresholdValues, - modules, + innerMod, + outerMod, mds, xIn, yIn, @@ -444,8 +468,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { yOut, zOut, rtOut, - innerLowerModuleIndex, - outerLowerModuleIndex, innerMDIndex, outerMDIndex, ptCut); @@ -464,15 +486,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) return false; - return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; + if (alpaka::math::abs(acc, dAlphaInnerMDOuterMD) >= dAlphaInnerMDOuterMDThreshold) + return false; + + return true; } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc, - ModulesConst modules, + ModuleSegData const& innerMod, + ModuleSegData const& outerMod, MiniDoubletsConst mds, - uint16_t innerLowerModuleIndex, - uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex, float& dPhi, @@ -505,10 +529,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { zOut = mds.anchorZ()[outerMDIndex]; rtOut = mds.anchorRt()[outerMDIndex]; - bool outerLayerEndcapTwoS = - (modules.subdets()[outerLowerModuleIndex] == Endcap) && (modules.moduleType()[outerLowerModuleIndex] == TwoS); + bool outerLayerEndcapTwoS = (outerMod.subdet == Endcap) && (outerMod.moduleType == TwoS); - float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float sdSinEC = alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax); float disks2SMinRadius = 60.f; float rtGeom = ((rtIn < disks2SMinRadius && rtOut < disks2SMinRadius) @@ -522,7 +545,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dz = zOut - zIn; float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); - float drtDzScale = sdSlope / alpaka::math::tan(acc, sdSlope); + float sdSlope = alpaka::math::asin(acc, sdSinEC); + // Exact: asin(s)/tan(asin(s)) = asin(s)*sqrt(1-s^2)/s, eliminates tan call + float drtDzScale = sdSlope * alpaka::math::sqrt(acc, 1.f - sdSinEC * sdSinEC) / sdSinEC; //rt should increase rtLo = alpaka::math::max(acc, rtIn * (1.f + dz / (zIn + dLum) * drtDzScale) - rtGeom, rtIn - 0.5f * rtGeom); @@ -533,19 +558,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if ((rtOut < rtLo) || (rtOut > rtHi)) return false; + // Phi pre-check: |tan(dPhi)| > tan(sdSlope) is exact for |dPhi| > sdSlope. + // Since final cut uses sdCut = sdSlope + eps > sdSlope, this is looser. + const float crossPhi = xIn * yOut - xOut * yIn; + const float dotPhi = xIn * xOut + yIn * yOut; + const float tanSlopeSq = sdSinEC * sdSinEC / (1.f - sdSinEC * sdSinEC); + if (dotPhi <= 0.f || crossPhi * crossPhi > tanSlopeSq * dotPhi * dotPhi) + return false; + dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); - if (!passDeltaPhiCutsEndcap(acc, - modules, - mds, - innerLowerModuleIndex, - outerLowerModuleIndex, - innerMDIndex, - outerMDIndex, - dPhi, - rtOut, - sdSlope, - ptCut)) + if (!passDeltaPhiCutsEndcap(acc, mds, innerMDIndex, outerMDIndex, dPhi, rtOut, sdSlope, ptCut)) return false; if (outerLayerEndcapTwoS) { @@ -569,7 +592,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float dAlphaThresholdValues[3]; dAlphaThreshold(acc, dAlphaThresholdValues, - modules, + innerMod, + outerMod, mds, xIn, yIn, @@ -579,8 +603,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { yOut, zOut, rtOut, - innerLowerModuleIndex, - outerLowerModuleIndex, innerMDIndex, outerMDIndex, ptCut); @@ -599,15 +621,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { return false; if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) return false; - return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; + if (alpaka::math::abs(acc, dAlphaInnerMDOuterMD) >= dAlphaInnerMDOuterMDThreshold) + return false; + + return true; } template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc, - ModulesConst modules, + ModuleSegData const& innerMod, + ModuleSegData const& outerMod, MiniDoubletsConst mds, - uint16_t innerLowerModuleIndex, - uint16_t outerLowerModuleIndex, unsigned int innerMDIndex, unsigned int outerMDIndex, float& dPhi, @@ -626,16 +650,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float& rtHi, #endif const float ptCut) { - if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.subdets()[outerLowerModuleIndex] == Barrel) { + if (innerMod.subdet == Barrel and outerMod.subdet == Barrel) { #ifdef CUT_VALUE_DEBUG rtLo = -999.f; rtHi = -999.f; #endif return runSegmentDefaultAlgoBarrel(acc, - modules, + innerMod, + outerMod, mds, - innerLowerModuleIndex, - outerLowerModuleIndex, innerMDIndex, outerMDIndex, dPhi, @@ -658,10 +681,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { zHi = -999.f; #endif return runSegmentDefaultAlgoEndcap(acc, - modules, + innerMod, + outerMod, mds, - innerLowerModuleIndex, - outerLowerModuleIndex, innerMDIndex, outerMDIndex, dPhi, @@ -697,6 +719,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (nInnerMDs == 0) continue; + ModuleSegData innerMod = loadModuleSegData(modules, innerLowerModuleIndex, ptCut); + unsigned int nConnectedModules = modules.nConnectedModules()[innerLowerModuleIndex]; for (uint16_t outerLowerModuleArrayIdx : cms::alpakatools::uniform_elements_y(acc, nConnectedModules)) { @@ -708,6 +732,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (limit == 0) continue; + + ModuleSegData outerMod = loadModuleSegData(modules, outerLowerModuleIndex, ptCut); + for (unsigned int hitIndex : cms::alpakatools::uniform_elements_x(acc, limit)) { unsigned int innerMDArrayIdx = hitIndex / nOuterMDs; unsigned int outerMDArrayIdx = hitIndex % nOuterMDs; @@ -727,29 +754,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { dPhiMax = 0; dPhiChangeMin = 0; dPhiChangeMax = 0; - if (runSegmentDefaultAlgo(acc, - modules, - mds, - innerLowerModuleIndex, - outerLowerModuleIndex, - innerMDIndex, - outerMDIndex, - dPhi, - dPhiMin, - dPhiMax, - dPhiChange, - dPhiChangeMin, - dPhiChangeMax, + bool pass = runSegmentDefaultAlgo(acc, + innerMod, + outerMod, + mds, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax, #ifdef CUT_VALUE_DEBUG - dAlphaInnerMDSegment, - dAlphaOuterMDSegment, - dAlphaInnerMDOuterMD, - zLo, - zHi, - rtLo, - rtHi, + dAlphaInnerMDSegment, + dAlphaOuterMDSegment, + dAlphaInnerMDOuterMD, + zLo, + zHi, + rtLo, + rtHi, #endif - ptCut)) { + ptCut); + + if (pass) { unsigned int totOccupancySegments = alpaka::atomicAdd(acc, &segmentsOccupancy.totOccupancySegments()[innerLowerModuleIndex], @@ -799,21 +827,96 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passDeltaPhiCutsSelector(TAcc const& acc, - ModulesConst modules, + ModuleSegData const& innerMod, + ModuleSegData const& outerMod, MiniDoubletsConst mds, - uint16_t innerLm, - uint16_t outerLm, unsigned int innerMD, unsigned int outerMD, const float ptCut) { - const float dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMD] - mds.anchorPhi()[innerMD]); const float rtOut = mds.anchorRt()[outerMD]; - const float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); - - if (modules.subdets()[innerLm] == Barrel && modules.subdets()[outerLm] == Barrel) - return passDeltaPhiCutsBarrel(acc, modules, mds, innerLm, outerLm, innerMD, outerMD, dPhi, rtOut, sdSlope, ptCut); - else - return passDeltaPhiCutsEndcap(acc, modules, mds, innerLm, outerLm, innerMD, outerMD, dPhi, rtOut, sdSlope, ptCut); + const float sdSin = alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax); + + const float cosSlope = alpaka::math::sqrt(acc, 1.f - sdSin * sdSin); + + if (innerMod.subdet == Barrel && outerMod.subdet == Barrel) { + // Barrel counting: trig-free loose checks only. + // All bounds are strictly looser than creation kernel cuts. + const float rtIn = mds.anchorRt()[innerMD]; + const float zIn = mds.anchorZ()[innerMD]; + const float zOut = mds.anchorZ()[outerMD]; + + // z-window: 1/cosSlope >= tan(sdSlope)/sdSlope, so looser than creation. + const float dzDrtScale = 1.f / cosSlope; + const float zGeom = innerMod.layer <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; + const float zLo = zIn + (zIn - kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - zGeom; + const float zHi = zIn + (zIn + kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + zGeom; + if (zOut < zLo || zOut > zHi) + return false; + + const float xIn = mds.anchorX()[innerMD]; + const float yIn = mds.anchorY()[innerMD]; + const float xOut = mds.anchorX()[outerMD]; + const float yOut = mds.anchorY()[outerMD]; + + const float cross = xIn * yOut - xOut * yIn; + const float dot = xIn * xOut + yIn * yOut; + if (dot <= 0.f) + return false; + + // sin(asin(s)+M) < s+M, so (s+M)^2 overestimates sin^2(sdCut), looser than creation. + const float sqrtTerm = alpaka::math::sqrt(acc, innerMod.sdMuls * innerMod.sdMuls + 0.01f / (rtOut * rtOut)); + const float looseCut = sdSin + sqrtTerm; + const float looseCutSq = looseCut * looseCut; + const float crossSq = cross * cross; + + // dPhi check + if (crossSq >= looseCutSq * (crossSq + dot * dot)) + return false; + + // dPhiChange check + const float dotChange = dot - (rtIn * rtIn); + if (dotChange <= 0.f || crossSq >= looseCutSq * (crossSq + dotChange * dotChange)) + return false; + + return true; + } else { + // Endcap counting: z-sign + rt-range + phi pre-checks, then exact cuts on survivors. + const float zIn = mds.anchorZ()[innerMD]; + const float zOut = mds.anchorZ()[outerMD]; + if (zIn * zOut < 0.f) + return false; + + // rt-range pre-filter: cosSlope <= asin(s)/tan(asin(s)), so looser than creation. + const float rtIn = mds.anchorRt()[innerMD]; + const float dz = zOut - zIn; + const float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + const float drtDzScale = cosSlope; + const float rtGeom = + ((rtIn < 60.f && rtOut < 60.f) + ? (2.f * kPixelPSZpitch) + : ((rtIn < 60.f || rtOut < 60.f) ? (kPixelPSZpitch + kStrip2SZpitch) : (2.f * kStrip2SZpitch))); + const float rtLo = + alpaka::math::max(acc, rtIn * (1.f + dz / (zIn + dLum) * drtDzScale) - rtGeom, rtIn - 0.5f * rtGeom); + const float rtHi = rtIn * (zOut - dLum) / (zIn - dLum) + rtGeom; + if (rtOut < rtLo || rtOut > rtHi) + return false; + + // Phi pre-check: |tan(dPhi)| > tan(sdSlope) is exact for |dPhi| > sdSlope. + // Since the final dPhiChange cut implies |dPhi| < sdSlope, this is looser than creation. + const float xIn = mds.anchorX()[innerMD]; + const float yIn = mds.anchorY()[innerMD]; + const float xOut = mds.anchorX()[outerMD]; + const float yOut = mds.anchorY()[outerMD]; + const float crossPhi = xIn * yOut - xOut * yIn; + const float dotPhi = xIn * xOut + yIn * yOut; + const float tanSlopeSq = sdSin * sdSin / (1.f - sdSin * sdSin); + if (dotPhi <= 0.f || crossPhi * crossPhi > tanSlopeSq * dotPhi * dotPhi) + return false; + + const float sdSlope = alpaka::math::asin(acc, sdSin); + const float dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMD] - mds.anchorPhi()[innerMD]); + return passDeltaPhiCutsEndcap(acc, mds, innerMD, outerMD, dPhi, rtOut, sdSlope, ptCut); + } } struct CountMiniDoubletConnections { @@ -837,12 +940,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (nConnectedModules == 0) continue; + ModuleSegData innerMod = loadModuleSegData(modules, innerLowerModuleIndex, ptCut); + for (uint16_t outerLowerModuleArrayIdx : cms::alpakatools::uniform_elements_y(acc, nConnectedModules)) { const uint16_t outerLowerModuleIndex = modules.moduleMap()[innerLowerModuleIndex][outerLowerModuleArrayIdx]; const unsigned int nOuterMDs = mdsOccupancy.nMDs()[outerLowerModuleIndex]; if (nOuterMDs == 0) continue; + ModuleSegData outerMod = loadModuleSegData(modules, outerLowerModuleIndex, ptCut); + const unsigned int limit = nInnerMDs * nOuterMDs; for (unsigned int hitIndex : cms::alpakatools::uniform_elements_x(acc, limit)) { @@ -853,8 +960,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const unsigned int outerMDIndex = mdRanges[outerLowerModuleIndex][0] + outerMDArrayIdx; // Increment the connected max if the LS passes the delta phi cuts. - if (passDeltaPhiCutsSelector( - acc, modules, mds, innerLowerModuleIndex, outerLowerModuleIndex, innerMDIndex, outerMDIndex, ptCut)) { + if (passDeltaPhiCutsSelector(acc, innerMod, outerMod, mds, innerMDIndex, outerMDIndex, ptCut)) { alpaka::atomicAdd(acc, &mds.connectedMax()[innerMDIndex], 1u, alpaka::hierarchy::Threads{}); } } diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h index 3768f021a0cfb..8aafd359bffdc 100644 --- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -329,9 +329,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { LSTObjType type = candsBase.trackCandidateType()[trackCandidateIndex]; unsigned int innerTrackletIdx = candsExtended.objectIndices()[trackCandidateIndex][0]; if (type == LSTObjType::T5) { - unsigned int quintupletIndex = innerTrackletIdx; // T5 index - float eta2 = __H2F(quintuplets.eta()[quintupletIndex]); - float phi2 = __H2F(quintuplets.phi()[quintupletIndex]); + float eta2 = __H2F(quintuplets.eta()[innerTrackletIdx]); + float phi2 = __H2F(quintuplets.phi()[innerTrackletIdx]); float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; @@ -339,7 +338,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (dR2 < 0.02f) { float d2 = 0.f; CMS_UNROLL_LOOP for (unsigned k = 0; k < Params_pLS::kEmbed; ++k) { - const float diff = plsEmbed[k] - quintuplets.t5Embed()[quintupletIndex][k]; + const float diff = plsEmbed[k] - quintuplets.t5Embed()[innerTrackletIdx][k]; d2 += diff * diff; } // Compare squared embedding distance to the cut value for the eta bin. @@ -347,24 +346,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { pixelSegments.isDup()[pixelArrayIndex] = true; } } - } - if (type == LSTObjType::pT3) { + } else if (type == LSTObjType::pT3) { int pLSIndex = pixelTriplets.pixelSegmentIndices()[innerTrackletIdx]; int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hitsBase); if (npMatched > 0) pixelSegments.isDup()[pixelArrayIndex] = true; - int pT3Index = innerTrackletIdx; - float eta2 = __H2F(pixelTriplets.eta_pix()[pT3Index]); - float phi2 = __H2F(pixelTriplets.phi_pix()[pT3Index]); + float eta2 = __H2F(pixelTriplets.eta_pix()[innerTrackletIdx]); + float phi2 = __H2F(pixelTriplets.phi_pix()[innerTrackletIdx]); float dEta = alpaka::math::abs(acc, eta1 - eta2); float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2); float dR2 = dEta * dEta + dPhi * dPhi; if (dR2 < 0.000001f) pixelSegments.isDup()[pixelArrayIndex] = true; - } - if (type == LSTObjType::pT5) { + } else if (type == LSTObjType::pT5) { unsigned int pLSIndex = innerTrackletIdx; int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hitsBase); if (npMatched > 0) { diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h index 81c166cd1f4f5..393a0bf7d520f 100644 --- a/RecoTracker/LSTCore/src/alpaka/Triplet.h +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -14,6 +14,68 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + // Pre-loaded inner-segment-constant data for passPointingConstraint. + // Populated once per inner segment, reused across all outer segments in the inner loop. + struct T3InnerSegData { + float x1, y1; // first MD position (cm) + float x2, y2; // second MD position (shared MD, cm) + float rt1, rt2; // anchorRt for first two MDs (cm) + float drt_InSeg; // rt2 - rt1 + float rt_InSeg; // sqrt((x2-x1)^2 + (y2-y1)^2) + float sdIn_alpha; // dPhiChange of inner segment + float sdIn_alphaRHmin; // dPhiChangeMin (for endcap path) + float sdIn_alphaRHmax; // dPhiChangeMax (for endcap path) + // Precomputed sin/cos for algebraic betaIn check (avoids per-candidate atan2). + float sin_alpha, cos_alpha; + float sin_alphaRHmin, cos_alphaRHmin; // for endcap EEE path + float sin_alphaRHmax, cos_alphaRHmax; // for endcap EEE path + short innerSubdet; // subdet of inner-inner module + short middleSubdet; // subdet of middle module + }; + + // Pre-loaded hit coordinates for passRZConstraint. + // All values in cm, passRZConstraint converts to meters internally. + struct T3HitCoords { + float x1, y1, z1, rt1; + float x2, y2, z2, rt2; + float x3, y3, z3, rt3; + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE T3InnerSegData loadT3InnerSegData(TAcc const& acc, + MiniDoubletsConst mds, + SegmentsConst segments, + ModulesConst modules, + unsigned int innerSegmentIndex, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex) { + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; + T3InnerSegData d; + d.x1 = mds.anchorX()[firstMDIndex]; + d.y1 = mds.anchorY()[firstMDIndex]; + d.x2 = mds.anchorX()[secondMDIndex]; + d.y2 = mds.anchorY()[secondMDIndex]; + d.rt1 = mds.anchorRt()[firstMDIndex]; + d.rt2 = mds.anchorRt()[secondMDIndex]; + d.drt_InSeg = d.rt2 - d.rt1; + d.rt_InSeg = alpaka::math::sqrt(acc, (d.x2 - d.x1) * (d.x2 - d.x1) + (d.y2 - d.y1) * (d.y2 - d.y1)); + d.sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + d.sin_alpha = alpaka::math::sin(acc, d.sdIn_alpha); + d.cos_alpha = alpaka::math::cos(acc, d.sdIn_alpha); + d.innerSubdet = modules.subdets()[innerInnerLowerModuleIndex]; + d.middleSubdet = modules.subdets()[middleLowerModuleIndex]; + d.sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + d.sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); + if (d.innerSubdet == Endcap and d.middleSubdet == Endcap) { + d.sin_alphaRHmin = alpaka::math::sin(acc, d.sdIn_alphaRHmin); + d.cos_alphaRHmin = alpaka::math::cos(acc, d.sdIn_alphaRHmin); + d.sin_alphaRHmax = alpaka::math::sin(acc, d.sdIn_alphaRHmax); + d.cos_alphaRHmax = alpaka::math::cos(acc, d.sdIn_alphaRHmax); + } + return d; + } + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(ModulesConst modules, MiniDoubletsConst mds, SegmentsConst segments, @@ -72,13 +134,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { template ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, ModulesConst modules, - MiniDoubletsConst mds, uint16_t innerInnerLowerModuleIndex, uint16_t middleLowerModuleIndex, uint16_t outerOuterLowerModuleIndex, - unsigned int firstMDIndex, - unsigned int secondMDIndex, - unsigned int thirdMDIndex, + T3HitCoords const& hitCoords, float circleRadius, float circleCenterX, float circleCenterY, @@ -89,26 +148,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const short layer3 = modules.lstLayers()[outerOuterLowerModuleIndex]; //all the values are stored in the unit of cm, in the calculation below we need to be cautious if we want to use the meter unit - //get r and z - const float r1 = mds.anchorRt()[firstMDIndex] / 100; - const float r2 = mds.anchorRt()[secondMDIndex] / 100; - const float r3 = mds.anchorRt()[thirdMDIndex] / 100; + //get r and z (convert from cm to m) + const float r1 = hitCoords.rt1 / 100; + const float r2 = hitCoords.rt2 / 100; + const float r3 = hitCoords.rt3 / 100; - const float z1 = mds.anchorZ()[firstMDIndex] / 100; - const float z2 = mds.anchorZ()[secondMDIndex] / 100; - const float z3 = mds.anchorZ()[thirdMDIndex] / 100; + const float z1 = hitCoords.z1 / 100; + const float z2 = hitCoords.z2 / 100; + const float z3 = hitCoords.z3 / 100; //use linear approximation for regions 9 and 20-24 because it works better (see https://github.com/SegmentLinking/cmssw/pull/92) float residual = alpaka::math::abs(acc, z2 - ((z3 - z1) / (r3 - r1) * (r2 - r1) + z1)); - //get the x,y position of each MD - const float x1 = mds.anchorX()[firstMDIndex] / 100; - const float x2 = mds.anchorX()[secondMDIndex] / 100; - const float x3 = mds.anchorX()[thirdMDIndex] / 100; + //get the x,y position of each MD (convert from cm to m) + const float x1 = hitCoords.x1 / 100; + const float x2 = hitCoords.x2 / 100; + const float x3 = hitCoords.x3 / 100; - const float y1 = mds.anchorY()[firstMDIndex] / 100; - const float y2 = mds.anchorY()[secondMDIndex] / 100; - const float y3 = mds.anchorY()[thirdMDIndex] / 100; + const float y1 = hitCoords.y1 / 100; + const float y2 = hitCoords.y2 / 100; + const float y3 = hitCoords.y3 / 100; float cross = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1); charge = -1 * ((int)copysignf(1.0f, cross)); @@ -186,13 +245,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { else if (y3 > y2 && y2 > y1) py = alpaka::math::abs(acc, py); - float AO = alpaka::math::sqrt( - acc, (x_other - x_center) * (x_other - x_center) + (y_other - y_center) * (y_other - y_center)); - float BO = - alpaka::math::sqrt(acc, (x_init - x_center) * (x_init - x_center) + (y_init - y_center) * (y_init - y_center)); + // All 3 hits lie on the fitted circle, so AO = BO = R; eliminates 2 sqrt calls. + float R = circleRadius / 100; float AB2 = (x_other - x_init) * (x_other - x_init) + (y_other - y_init) * (y_other - y_init); - float dPhi = alpaka::math::acos(acc, (AO * AO + BO * BO - AB2) / (2 * AO * BO)); //Law of Cosines - float ds = circleRadius / 100 * dPhi; + float dPhi = alpaka::math::acos(acc, 1 - AB2 / (2 * R * R)); + float ds = R * dPhi; float pz = dz / ds * pt; float p = alpaka::math::sqrt(acc, px * px + py * py + pz * pz); @@ -216,8 +273,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { float A = paraB * paraB + paraC * paraC; float B = 2 * paraA * paraB; float C = paraA * paraA - paraC * paraC; - float sol1 = (-B + alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); - float sol2 = (-B - alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + // Shared discriminant: compute sqrt once instead of twice. + float disc = alpaka::math::sqrt(acc, B * B - 4 * A * C); + float sol1 = (-B + disc) / (2 * A); + float sol2 = (-B - disc) / (2 * A); float solz1 = alpaka::math::asin(acc, sol1) / rou * pz / p + z_init; float solz2 = alpaka::math::asin(acc, sol2) / rou * pz / p + z_init; float diffz1 = (solz1 - z_target) * 100; @@ -228,8 +287,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { : ((alpaka::math::abs(acc, diffz1) < alpaka::math::abs(acc, diffz2)) ? diffz1 : diffz2); } else { // for endcap float s = (z_target - z_init) * p / pz; - float x = x_init + px / a * alpaka::math::sin(acc, rou * s) - py / a * (1 - alpaka::math::cos(acc, rou * s)); - float y = y_init + py / a * alpaka::math::sin(acc, rou * s) + px / a * (1 - alpaka::math::cos(acc, rou * s)); + // Shared sin/cos: compute once instead of twice each. + float sinRS = alpaka::math::sin(acc, rou * s); + float cosRS = alpaka::math::cos(acc, rou * s); + float x = x_init + px / a * sinRS - py / a * (1 - cosRS); + float y = y_init + py / a * sinRS + px / a * (1 - cosRS); residual = (r_target - alpaka::math::sqrt(acc, x * x + y * y)) * 100; } @@ -326,66 +388,45 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { } template - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, - ModulesConst modules, - MiniDoubletsConst mds, - SegmentsConst segments, - unsigned int firstMDIndex, - unsigned int secondMDIndex, - unsigned int thirdMDIndex, - uint16_t innerInnerLowerModuleIndex, - uint16_t middleLowerModuleIndex, - uint16_t outerOuterLowerModuleIndex, - unsigned int innerSegmentIndex, - const float ptCut) { - const float x1 = mds.anchorX()[firstMDIndex]; - const float x2 = mds.anchorX()[secondMDIndex]; - const float x3 = mds.anchorX()[thirdMDIndex]; - const float y1 = mds.anchorY()[firstMDIndex]; - const float y2 = mds.anchorY()[secondMDIndex]; - const float y3 = mds.anchorY()[thirdMDIndex]; - - const short innerInnerLowerModuleSubdet = modules.subdets()[innerInnerLowerModuleIndex]; - const short middleLowerModuleSubdet = modules.subdets()[middleLowerModuleIndex]; - const short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; - - const float rt_InLo = mds.anchorRt()[firstMDIndex]; - const float rt_InOut = mds.anchorRt()[secondMDIndex]; - const float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint( + TAcc const& acc, T3InnerSegData const& innerSegData, float x3, float y3, short outerSubdet, const float ptCut) { + const float dx = x3 - innerSegData.x1; + const float dy = y3 - innerSegData.y1; + const float drt_tl_axis = alpaka::math::sqrt(acc, dx * dx + dy * dy); - const float drt_InSeg = rt_InOut - rt_InLo; - const float drt_tl_axis = alpaka::math::sqrt(acc, (x3 - x1) * (x3 - x1) + (y3 - y1) * (y3 - y1)); - - //innerOuterAnchor - innerInnerAnchor - const float rt_InSeg = alpaka::math::sqrt(acc, (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)); - - const float betaIn = - sdIn_alpha - cms::alpakatools::reducePhiRange( - acc, cms::alpakatools::phi(acc, x3 - x1, y3 - y1) - mds.anchorPhi()[firstMDIndex]); const float betaInCut = - alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + - (0.02f / drt_InSeg); - - float betaInRHmin = betaIn; - - if (innerInnerLowerModuleSubdet == Endcap and middleLowerModuleSubdet == Endcap and - outerOuterLowerModuleSubdet == Endcap) { - float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); - float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); - - betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; - float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; - float swapTemp; - - if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { - swapTemp = betaInRHmin; - betaInRHmin = betaInRHmax; - betaInRHmax = swapTemp; + alpaka::math::asin( + acc, alpaka::math::min(acc, (-innerSegData.rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / innerSegData.drt_InSeg); + + // Algebraic betaIn check: |sin(betaIn)| < sin(betaInCut) is exact equivalent of + // |betaIn| < betaInCut (for |betaIn| < pi/2, betaInCut < pi/2). Avoids per-candidate atan2. + // cross = x1*y3 - y1*x3, dot = x3*x1 + y3*y1 - rt1^2 (angle from hit1 to dir_13) + const float crossPC = innerSegData.x1 * y3 - innerSegData.y1 * x3; + const float dotPC = x3 * innerSegData.x1 + y3 * innerSegData.y1 - innerSegData.rt1 * innerSegData.rt1; + const float r2 = crossPC * crossPC + dotPC * dotPC; + const float sinBetaInCut = alpaka::math::sin(acc, betaInCut); + const float sinBetaInCutSq = sinBetaInCut * sinBetaInCut; + + if (innerSegData.innerSubdet == Endcap and innerSegData.middleSubdet == Endcap and outerSubdet == Endcap) { + // EEE: check both alpha variants, pass if the one with smaller |betaIn| is within cut + const float sinValMin = innerSegData.sin_alphaRHmin * dotPC - innerSegData.cos_alphaRHmin * crossPC; + const float sinValMax = innerSegData.sin_alphaRHmax * dotPC - innerSegData.cos_alphaRHmax * crossPC; + const float sqMin = sinValMin * sinValMin; + const float sqMax = sinValMax * sinValMax; + + if (sqMin <= sqMax) { + return sqMin < sinBetaInCutSq * r2 and + (innerSegData.cos_alphaRHmin * dotPC + innerSegData.sin_alphaRHmin * crossPC > 0.f); + } else { + return sqMax < sinBetaInCutSq * r2 and + (innerSegData.cos_alphaRHmax * dotPC + innerSegData.sin_alphaRHmax * crossPC > 0.f); } } - //Beta cut - return alpaka::math::abs(acc, betaInRHmin) < betaInCut; + const float sinVal = innerSegData.sin_alpha * dotPC - innerSegData.cos_alpha * crossPC; + return sinVal * sinVal < sinBetaInCutSq * r2 and + (innerSegData.cos_alpha * dotPC + innerSegData.sin_alpha * crossPC > 0.f); } template @@ -410,43 +451,51 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { const unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0]; const unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; - const float x1 = mds.anchorX()[firstMDIndex]; - const float x2 = mds.anchorX()[secondMDIndex]; - const float x3 = mds.anchorX()[thirdMDIndex]; - const float y1 = mds.anchorY()[firstMDIndex]; - const float y2 = mds.anchorY()[secondMDIndex]; - const float y3 = mds.anchorY()[thirdMDIndex]; - - std::tie(circleRadius, circleCenterX, circleCenterY) = - computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3); + T3HitCoords hitCoords; + hitCoords.x1 = mds.anchorX()[firstMDIndex]; + hitCoords.y1 = mds.anchorY()[firstMDIndex]; + hitCoords.z1 = mds.anchorZ()[firstMDIndex]; + hitCoords.rt1 = mds.anchorRt()[firstMDIndex]; + hitCoords.x2 = mds.anchorX()[secondMDIndex]; + hitCoords.y2 = mds.anchorY()[secondMDIndex]; + hitCoords.z2 = mds.anchorZ()[secondMDIndex]; + hitCoords.rt2 = mds.anchorRt()[secondMDIndex]; + hitCoords.x3 = mds.anchorX()[thirdMDIndex]; + hitCoords.y3 = mds.anchorY()[thirdMDIndex]; + hitCoords.z3 = mds.anchorZ()[thirdMDIndex]; + hitCoords.rt3 = mds.anchorRt()[thirdMDIndex]; + + std::tie(circleRadius, circleCenterX, circleCenterY) = computeRadiusFromThreeAnchorHits( + acc, hitCoords.x1, hitCoords.y1, hitCoords.x2, hitCoords.y2, hitCoords.x3, hitCoords.y3); if (not passRZConstraint(acc, modules, - mds, innerInnerLowerModuleIndex, middleLowerModuleIndex, outerOuterLowerModuleIndex, - firstMDIndex, - secondMDIndex, - thirdMDIndex, + hitCoords, circleRadius, circleCenterX, circleCenterY, charge)) return false; - const float rt_InLo = mds.anchorRt()[firstMDIndex]; - const float rt_InOut = mds.anchorRt()[secondMDIndex]; const float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); - const float drt_InSeg = rt_InOut - rt_InLo; - const float drt_tl_axis = alpaka::math::sqrt(acc, (x3 - x1) * (x3 - x1) + (y3 - y1) * (y3 - y1)); + const float drt_InSeg = hitCoords.rt2 - hitCoords.rt1; + const float drt_tl_axis = alpaka::math::sqrt(acc, + (hitCoords.x3 - hitCoords.x1) * (hitCoords.x3 - hitCoords.x1) + + (hitCoords.y3 - hitCoords.y1) * (hitCoords.y3 - hitCoords.y1)); //innerOuterAnchor - innerInnerAnchor - const float rt_InSeg = alpaka::math::sqrt(acc, (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)); + const float rt_InSeg = alpaka::math::sqrt(acc, + (hitCoords.x2 - hitCoords.x1) * (hitCoords.x2 - hitCoords.x1) + + (hitCoords.y2 - hitCoords.y1) * (hitCoords.y2 - hitCoords.y1)); betaIn = sdIn_alpha - cms::alpakatools::reducePhiRange( - acc, cms::alpakatools::phi(acc, x3 - x1, y3 - y1) - mds.anchorPhi()[firstMDIndex]); + acc, + cms::alpakatools::phi(acc, hitCoords.x3 - hitCoords.x1, hitCoords.y3 - hitCoords.y1) - + mds.anchorPhi()[firstMDIndex]); betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + (0.02f / drt_InSeg); @@ -515,6 +564,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { uint16_t middleLowerModuleIndex = segments.outerLowerModuleIndices()[innerSegmentIndex]; int middleMDIndiceInner = segments.mdIndices()[innerSegmentIndex][1]; + T3InnerSegData innerSegData = loadT3InnerSegData( + acc, mds, segments, modules, innerSegmentIndex, innerInnerLowerModuleIndex, middleLowerModuleIndex); + unsigned int nOuterSegments = segmentsOccupancy.nSegments()[middleLowerModuleIndex]; for (unsigned int outerSegmentArrayIndex : cms::alpakatools::uniform_elements_x(acc, nOuterSegments)) { unsigned int outerSegmentIndex = ranges.segmentRanges()[middleLowerModuleIndex][0] + outerSegmentArrayIndex; @@ -524,22 +576,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { continue; uint16_t outerOuterLowerModuleIndex = segments.outerLowerModuleIndices()[outerSegmentIndex]; - unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; - unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0]; unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + float x3 = mds.anchorX()[thirdMDIndex]; + float y3 = mds.anchorY()[thirdMDIndex]; + short outerSubdet = modules.subdets()[outerOuterLowerModuleIndex]; - if (not passPointingConstraint(acc, - modules, - mds, - segments, - firstMDIndex, - secondMDIndex, - thirdMDIndex, - innerInnerLowerModuleIndex, - middleLowerModuleIndex, - outerOuterLowerModuleIndex, - innerSegmentIndex, - ptCut)) + if (not passPointingConstraint(acc, innerSegData, x3, y3, outerSubdet, ptCut)) continue; // Match inner Sg and Outer Sg @@ -672,29 +714,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { if (nOuterSegments == 0) continue; + T3InnerSegData innerSegData = loadT3InnerSegData( + acc, mds, segments, modules, innerSegmentIndex, innerLowerModuleArrayIdx, middleLowerModuleIndex); + for (unsigned int outerSegmentArrayIndex : cms::alpakatools::uniform_elements_x(acc, nOuterSegments)) { const unsigned int outerSegmentIndex = segmentRanges[middleLowerModuleIndex][0] + outerSegmentArrayIndex; if (mdIndices[outerSegmentIndex][0] != mdShared) continue; - unsigned int firstMDIndex = mdIndices[innerSegmentIndex][0]; - unsigned int secondMDIndex = mdShared; unsigned int thirdMDIndex = mdIndices[outerSegmentIndex][1]; uint16_t outerOuterLowerModuleIndex = outerLowerModuleIndices[outerSegmentIndex]; + float x3 = mds.anchorX()[thirdMDIndex]; + float y3 = mds.anchorY()[thirdMDIndex]; + short outerSubdet = modules.subdets()[outerOuterLowerModuleIndex]; - if (not passPointingConstraint(acc, - modules, - mds, - segments, - firstMDIndex, - secondMDIndex, - thirdMDIndex, - innerLowerModuleArrayIdx, - middleLowerModuleIndex, - outerOuterLowerModuleIndex, - innerSegmentIndex, - ptCut)) + if (not passPointingConstraint(acc, innerSegData, x3, y3, outerSubdet, ptCut)) continue; alpaka::atomicAdd(acc, &segments.connectedMax()[innerSegmentIndex], 1u, alpaka::hierarchy::Threads{});