diff --git a/HeterogeneousCore/AlpakaMath/interface/deltaPhi.h b/HeterogeneousCore/AlpakaMath/interface/deltaPhi.h
index 2430375ebd64f..ec72d051030f4 100644
--- a/HeterogeneousCore/AlpakaMath/interface/deltaPhi.h
+++ b/HeterogeneousCore/AlpakaMath/interface/deltaPhi.h
@@ -22,7 +22,7 @@ namespace cms::alpakatools {
 
   template <alpaka::concepts::Acc TAcc, typename T>
   ALPAKA_FN_HOST_ACC inline T deltaPhi(TAcc const& acc, T x1, T y1, T x2, T y2) {
-    return reducePhiRange(acc, alpaka::math::atan2(acc, -y2, -x2) - alpaka::math::atan2(acc, -y1, -x1));
+    return alpaka::math::atan2(acc, x1 * y2 - x2 * y1, x1 * x2 + y1 * y2);
   }
 
   template <alpaka::concepts::Acc TAcc, typename T>
diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h
index 49c700f48ecc6..33759e0b49004 100644
--- a/RecoTracker/LSTCore/src/alpaka/Kernels.h
+++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h
@@ -247,7 +247,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
             if (quintuplets.isDup()[ix] & 1)
               continue;
 
-            bool isPT5_ix = quintuplets.partOfPT5()[ix];
+            const bool isPT5_ix = quintuplets.partOfPT5()[ix];
+            const float eta1 = __H2F(quintuplets.eta()[ix]);
+            const float phi1 = __H2F(quintuplets.phi()[ix]);
+            const float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);
 
             for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) {
               unsigned int jx = quintupletModuleIndices_lowmod2 + jx1;
@@ -257,31 +260,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
               if (quintuplets.isDup()[jx] & 1)
                 continue;
 
-              bool isPT5_jx = quintuplets.partOfPT5()[jx];
+              const bool isPT5_jx = quintuplets.partOfPT5()[jx];
 
               if (isPT5_ix && isPT5_jx)
                 continue;
 
-              float eta1 = __H2F(quintuplets.eta()[ix]);
-              float phi1 = __H2F(quintuplets.phi()[ix]);
-              float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);
-
-              float eta2 = __H2F(quintuplets.eta()[jx]);
-              float phi2 = __H2F(quintuplets.phi()[jx]);
-              float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]);
-
-              float dEta = alpaka::math::abs(acc, eta1 - eta2);
-              float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2);
-
+              const float eta2 = __H2F(quintuplets.eta()[jx]);
+              const float dEta = alpaka::math::abs(acc, eta1 - eta2);
               if (dEta > 0.1f)
                 continue;
 
+              const float phi2 = __H2F(quintuplets.phi()[jx]);
+              const float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2);
               if (alpaka::math::abs(acc, dPhi) > 0.1f)
                 continue;
 
-              float dR2 = dEta * dEta + dPhi * dPhi;
-              int nMatched = checkHitsT5(ix, jx, quintuplets);
-              const int minNHitsForDup_T5 = 5;
+              const float dR2 = dEta * dEta + dPhi * dPhi;
+              const int nMatched = checkHitsT5(ix, jx, quintuplets);
+              constexpr int minNHitsForDup_T5 = 5;
 
               float d2 = 0.f;
               CMS_UNROLL_LOOP
@@ -291,6 +287,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
               }
 
               if (((dR2 < 0.001f || nMatched >= minNHitsForDup_T5) && d2 < 1.0f) || (dR2 < 0.02f && d2 < 0.1f)) {
+                const float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]);
                 if (isPT5_jx || score_rphisum1 > score_rphisum2) {
                   rmQuintupletFromMemory(quintuplets, ix, true);
                 } else if (isPT5_ix || score_rphisum1 < score_rphisum2) {
@@ -458,11 +455,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     ALPAKA_FN_ACC void operator()(Acc2D const& acc, PixelQuintuplets pixelQuintuplets) const {
       unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets();
       for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelQuintuplets)) {
+        float eta1 = __H2F(pixelQuintuplets.eta()[ix]);
+        float phi1 = __H2F(pixelQuintuplets.phi()[ix]);
         float score1 = __H2F(pixelQuintuplets.score()[ix]);
         for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, nPixelQuintuplets)) {
           if (ix == jx)
             continue;
 
+          float eta2 = __H2F(pixelQuintuplets.eta()[jx]);
+          if (alpaka::math::abs(acc, eta1 - eta2) > 0.2f)
+            continue;
+
+          float phi2 = __H2F(pixelQuintuplets.phi()[jx]);
+          if (alpaka::math::abs(acc, cms::alpakatools::deltaPhi(acc, phi1, phi2)) > 0.2f)
+            continue;
+
           int nMatched = checkHitspT5(ix, jx, pixelQuintuplets);
           float score2 = __H2F(pixelQuintuplets.score()[jx]);
           const int minNHitsForDup_pT5 = 7;
diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h
index 806869522e062..f89ec20988e76 100644
--- a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h
+++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h
@@ -13,15 +13,41 @@
 #include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
+
+  // Pre-computed module-constant data for MiniDoublet kernels.
+  // Populated once per module to avoid redundant SoA loads in the inner hit-pair loop.
+  struct ModuleMDData {
+    uint16_t lowerModuleIndex;
+    short subdet;
+    short side;
+    short moduleType;
+    short moduleLayerType;
+    unsigned int iL;  // layer - 1
+    bool isTilted;
+    bool isEndcapTwoS;
+    bool isGloballyInner;
+
+    // shiftStripHits constants
+    float slope;      // dxdys
+    float drdz;       // drdzs[lowerModuleIndex]
+    float moduleSep;  // moduleGapSize result
+
+    // dPhiThreshold pre-computed constants
+    float miniPVoff;
+    float miniMuls;
+    float miniTilt2;     // 0 for non-tilted and endcap
+    float sqrtTermBase;  // miniMuls^2 + miniPVoff^2
+    float sqrtTermFlat;  // sqrt(sqrtTermBase), valid for barrel flat
+  };
+
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc,
                                                     MiniDoublets mds,
                                                     HitsBaseConst hitsBase,
                                                     HitsExtendedConst hitsExtended,
-                                                    ModulesConst modules,
+                                                    ModuleMDData const& mod,
                                                     unsigned int lowerHitIdx,
                                                     unsigned int upperHitIdx,
-                                                    uint16_t lowerModuleIdx,
                                                     float dz,
                                                     float dPhi,
                                                     float dPhiChange,
@@ -34,9 +60,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     //the index into which this MD needs to be written will be computed in the kernel
     //nMDs variable will be incremented in the kernel, no need to worry about that here
 
-    mds.moduleIndices()[idx] = lowerModuleIdx;
+    mds.moduleIndices()[idx] = mod.lowerModuleIndex;
     unsigned int anchorHitIndex, outerHitIndex;
-    if (modules.moduleType()[lowerModuleIdx] == PS and modules.moduleLayerType()[lowerModuleIdx] == Strip) {
+    if (mod.moduleType == PS and mod.moduleLayerType == Strip) {
       mds.anchorHitIndices()[idx] = upperHitIdx;
       mds.outerHitIndices()[idx] = lowerHitIdx;
 
@@ -72,8 +98,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     mds.anchorHighEdgeY()[idx] = hitsExtended.highEdgeYs()[anchorHitIndex];
     mds.anchorLowEdgeX()[idx] = hitsExtended.lowEdgeXs()[anchorHitIndex];
     mds.anchorLowEdgeY()[idx] = hitsExtended.lowEdgeYs()[anchorHitIndex];
-    mds.anchorHighEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorHighEdgeY()[idx], mds.anchorHighEdgeX()[idx]);
-    mds.anchorLowEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorLowEdgeY()[idx], mds.anchorLowEdgeX()[idx]);
+    // Edge phi only read downstream when outerLayerEndcapTwoS is true; skip atan2 for other modules.
+    if (mod.isEndcapTwoS) {
+      mds.anchorHighEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorHighEdgeY()[idx], mds.anchorHighEdgeX()[idx]);
+      mds.anchorLowEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorLowEdgeY()[idx], mds.anchorLowEdgeX()[idx]);
+    }
 
     mds.outerX()[idx] = hitsBase.xs()[outerHitIndex];
     mds.outerY()[idx] = hitsBase.ys()[outerHitIndex];
@@ -89,6 +118,49 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 #endif
   }
 
+  // Overload for callers (Segment.h) that still pass ModulesConst + moduleIndex.
+  template <alpaka::concepts::Acc TAcc>
+  ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc,
+                                                    MiniDoublets mds,
+                                                    HitsBaseConst hitsBase,
+                                                    HitsExtendedConst hitsExtended,
+                                                    ModulesConst modules,
+                                                    unsigned int lowerHitIdx,
+                                                    unsigned int upperHitIdx,
+                                                    uint16_t lowerModuleIdx,
+                                                    float dz,
+                                                    float dPhi,
+                                                    float dPhiChange,
+                                                    float shiftedX,
+                                                    float shiftedY,
+                                                    float shiftedZ,
+                                                    float noShiftedDphi,
+                                                    float noShiftedDPhiChange,
+                                                    unsigned int idx) {
+    ModuleMDData mod;
+    mod.lowerModuleIndex = lowerModuleIdx;
+    mod.moduleType = modules.moduleType()[lowerModuleIdx];
+    mod.moduleLayerType = modules.moduleLayerType()[lowerModuleIdx];
+    mod.subdet = modules.subdets()[lowerModuleIdx];
+    mod.isEndcapTwoS = (mod.subdet == Endcap && mod.moduleType == TwoS);
+    addMDToMemory(acc,
+                  mds,
+                  hitsBase,
+                  hitsExtended,
+                  mod,
+                  lowerHitIdx,
+                  upperHitIdx,
+                  dz,
+                  dPhi,
+                  dPhiChange,
+                  shiftedX,
+                  shiftedY,
+                  shiftedZ,
+                  noShiftedDphi,
+                  noShiftedDPhiChange,
+                  idx);
+  }
+
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isTighterTiltedModules(ModulesConst modules, uint16_t moduleIndex) {
     // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing
     // This is the same as what was previously considered as"isNormalTiltedModules"
@@ -132,76 +204,28 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
   }
 
   template <alpaka::concepts::Acc TAcc>
-  ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold(TAcc const& acc,
-                                                     float rt,
-                                                     ModulesConst modules,
-                                                     uint16_t moduleIndex,
-                                                     const float ptCut,
-                                                     float dPhi = 0,
-                                                     float dz = 0) {
-    // =================================================================
-    // Various constants
-    // =================================================================
-    //mean of the horizontal layer position in y; treat this as R below
-
-    // =================================================================
-    // Computing some components that make up the cut threshold
-    // =================================================================
-
-    unsigned int iL = modules.layers()[moduleIndex] - 1;
+  ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold(
+      TAcc const& acc, float rt, ModuleMDData const& mod, const float ptCut, float dPhi = 0, float dz = 0) {
     const float miniSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax));
-    const float rLayNominal =
-        ((modules.subdets()[moduleIndex] == Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]);
-    const float miniPVoff = 0.1f / rLayNominal;
-    const float miniMuls = ((modules.subdets()[moduleIndex] == Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut
-                                                                       : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut);
-    const bool isTilted = modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] != Center;
-    //the lower module is sent in irrespective of its layer type. We need to fetch the drdz properly
-
-    float drdz;
-    if (isTilted) {
-      if (modules.moduleType()[moduleIndex] == PS and modules.moduleLayerType()[moduleIndex] == Strip) {
-        drdz = modules.drdzs()[moduleIndex];
-      } else {
-        drdz = modules.drdzs()[modules.partnerModuleIndices()[moduleIndex]];
-      }
-    } else {
-      drdz = 0;
-    }
-    const float miniTilt2 = ((isTilted) ? (0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdz * drdz) /
-                                              (1.f + drdz * drdz) / moduleGapSize(modules, moduleIndex)
-                                        : 0);
-
-    // Compute luminous region requirement for endcap
-    const float miniLum = alpaka::math::abs(acc, dPhi * kDeltaZLum / dz);  // Balaji's new error
-
-    // =================================================================
-    // Return the threshold value
-    // =================================================================
-    // Following condition is met if the module is central and flatly lying
-    if (modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] == Center) {
-      return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff);
+
+    // Barrel flat: no tilt or luminous region correction
+    if (mod.subdet == Barrel and mod.side == Center) {
+      return miniSlope + mod.sqrtTermFlat;
     }
-    // Following condition is met if the module is central and tilted
-    else if (modules.subdets()[moduleIndex] == Barrel and
-             modules.sides()[moduleIndex] != Center)  //all types of tilted modules
-    {
-      return miniSlope +
-             alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniTilt2 * miniSlope * miniSlope);
+    // Barrel tilted
+    else if (mod.subdet == Barrel) {
+      return miniSlope + alpaka::math::sqrt(acc, mod.sqrtTermBase + mod.miniTilt2 * miniSlope * miniSlope);
     }
-    // If not barrel, it is Endcap
+    // Endcap: luminous region correction
     else {
-      return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniLum * miniLum);
+      const float miniLum = alpaka::math::abs(acc, dPhi * kDeltaZLum / dz);
+      return miniSlope + alpaka::math::sqrt(acc, mod.sqrtTermBase + miniLum * miniLum);
     }
   }
 
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc,
-                                                     ModulesConst modules,
-                                                     uint16_t lowerModuleIndex,
-                                                     uint16_t upperModuleIndex,
-                                                     unsigned int lowerHitIndex,
-                                                     unsigned int upperHitIndex,
+                                                     ModuleMDData const& mod,
                                                      float* shiftedCoords,
                                                      float xLower,
                                                      float yLower,
@@ -219,44 +243,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     // This means that there may be very very subtle edge effects coming from whether the strip hit is center of the module or the at the edge of the module
     // But this should be relatively minor effect
 
-    // dependent variables for this if statement
-    // lowerModule
-    // lowerHit
-    // upperHit
-    // endcapGeometry
-    // tiltedGeometry
-
-    // Some variables relevant to the function
-    float xp;       // pixel x (pixel hit x)
-    float yp;       // pixel y (pixel hit y)
-    float zp;       // pixel y (pixel hit y)
-    float rtp;      // pixel y (pixel hit y)
-    float xa;       // "anchor" x (the anchor position on the strip module plane from pixel hit)
-    float ya;       // "anchor" y (the anchor position on the strip module plane from pixel hit)
-    float xo;       // old x (before the strip hit is moved up or down)
-    float yo;       // old y (before the strip hit is moved up or down)
-    float xn;       // new x (after the strip hit is moved up or down)
-    float yn;       // new y (after the strip hit is moved up or down)
-    float abszn;    // new z in absolute value
-    float zn;       // new z with the sign (+/-) accounted
-    float angleA;   // in r-z plane the theta of the pixel hit in polar coordinate is the angleA
-    float angleB;   // this is the angle of tilted module in r-z plane ("drdz"), for endcap this is 90 degrees
-    bool isEndcap;  // If endcap, drdz = infinity
-    float moduleSeparation;
-    float drprime;    // The radial shift size in x-y plane projection
-    float drprime_x;  // x-component of drprime
-    float drprime_y;  // y-component of drprime
-    const float& slope =
-        modules.dxdys()[lowerModuleIndex];  // The slope of the possible strip hits for a given module in x-y plane
-    float absArctanSlope;
-    float angleM;  // the angle M is the angle of rotation of the module in x-y plane if the possible strip hits are along the x-axis, then angleM = 0, and if the possible strip hits are along y-axis angleM = 90 degrees
-    float absdzprime;  // The distance between the two points after shifting
-    const float& drdz_ = modules.drdzs()[lowerModuleIndex];
-    // Assign hit pointers based on their hit type
+    float xp;   // pixel x (pixel hit x)
+    float yp;   // pixel y (pixel hit y)
+    float zp;   // pixel z
+    float rtp;  // pixel rt
+    float xo;   // old x (before the strip hit is moved up or down)
+    float yo;   // old y (before the strip hit is moved up or down)
     bool pHitInverted = false;
-    if (modules.moduleType()[lowerModuleIndex] == PS) {
-      // TODO: This is somewhat of an mystery.... somewhat confused why this is the case
-      if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) {
+    if (mod.moduleType == PS) {
+      if (mod.moduleLayerType == Pixel) {
         xo = xUpper;
         yo = yUpper;
         xp = xLower;
@@ -281,82 +276,70 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
       rtp = rtLower;
     }
 
-    // If it is endcap some of the math gets simplified (and also computers don't like infinities)
-    isEndcap = modules.subdets()[lowerModuleIndex] == Endcap;
+    const bool isEndcap = (mod.subdet == Endcap);
 
-    // NOTE: TODO: Keep in mind that the sin(atan) function can be simplified to something like x / sqrt(1 + x^2) and similar for cos
-    // I am not sure how slow sin, atan, cos, functions are in c++. If x / sqrt(1 + x^2) are faster change this later to reduce arithmetic computation time
-    angleA = alpaka::math::abs(acc, alpaka::math::atan(acc, rtp / zp));
-    angleB =
-        ((isEndcap)
-             ? kPi / 2.f
-             : alpaka::math::atan(
-                   acc,
-                   drdz_));  // The tilt module on the positive z-axis has negative drdz slope in r-z plane and vice versa
+    // Algebraic trig: sin(atan(r/z)) = r/hypot, cos(atan(r/z)) = |z|/hypot
+    const float hypot_rz = alpaka::math::sqrt(acc, rtp * rtp + zp * zp);
+    const float sinA = rtp / hypot_rz;
+    const float cosA = alpaka::math::abs(acc, zp) / hypot_rz;
+
+    // sin(A+B) via angle-addition identity; endcap: B=pi/2 so sin(A+pi/2)=cosA
+    // The tilt module on the positive z-axis has negative drdz slope in r-z plane and vice versa
+    float sinApB;
+    if (isEndcap) {
+      sinApB = cosA;
+    } else {
+      const float inv_hypot_drdz = 1.f / alpaka::math::sqrt(acc, 1.f + mod.drdz * mod.drdz);
+      sinApB = sinA * inv_hypot_drdz + cosA * mod.drdz * inv_hypot_drdz;
+    }
 
-    moduleSeparation = moduleGapSize(modules, lowerModuleIndex);
+    float moduleSeparation = mod.moduleSep;
 
     // Sign flips if the pixel is later layer
-    if (modules.isGloballyInner()[lowerModuleIndex] == pHitInverted) {
+    if (mod.isGloballyInner == pHitInverted) {
       moduleSeparation *= -1;
     }
 
-    drprime = (moduleSeparation / alpaka::math::sin(acc, angleA + angleB)) * alpaka::math::sin(acc, angleA);
+    float drprime = moduleSeparation * sinA / sinApB;
 
-    // Compute arctan of the slope and take care of the slope = infinity case
-    absArctanSlope =
-        ((slope != kVerticalModuleSlope && edm::isFinite(slope)) ? fabs(alpaka::math::atan(acc, slope)) : kPi / 2.f);
-
-    // Depending on which quadrant the pixel hit lies, we define the angleM by shifting them slightly differently
-    if (xp > 0 and yp > 0) {
-      angleM = absArctanSlope;
-    } else if (xp > 0 and yp < 0) {
-      angleM = kPi - absArctanSlope;
-    } else if (xp < 0 and yp < 0) {
-      angleM = kPi + absArctanSlope;
-    } else  // if (xp < 0 and yp > 0)
-    {
-      angleM = 2.f * kPi - absArctanSlope;
+    // Algebraic: sin(atan(slope)) = |slope|/sqrt(1+slope^2), cos(atan(slope)) = 1/sqrt(1+slope^2)
+    float drprime_x, drprime_y;
+    const float slope = mod.slope;
+    if (slope != kVerticalModuleSlope && edm::isFinite(slope)) {
+      const float inv_hypot_slope = 1.f / alpaka::math::sqrt(acc, 1.f + slope * slope);
+      drprime_x = drprime * ((xp > 0.f) - (xp < 0.f)) * alpaka::math::abs(acc, slope) * inv_hypot_slope;
+      drprime_y = drprime * ((yp > 0.f) - (yp < 0.f)) * inv_hypot_slope;
+    } else {
+      drprime_x = drprime * ((xp > 0.f) - (xp < 0.f));
+      drprime_y = 0.f;
     }
 
-    // Then since the angleM sign is taken care of properly
-    drprime_x = drprime * alpaka::math::sin(acc, angleM);
-    drprime_y = drprime * alpaka::math::cos(acc, angleM);
+    float xa = xp + drprime_x;  // anchor x (the guessed position on the strip module plane)
+    float ya = yp + drprime_y;  // anchor y
 
-    // The new anchor position is
-    xa = xp + drprime_x;
-    ya = yp + drprime_y;
-
-    // Compute the new strip hit position (if the slope value is in special condition take care of the exceptions)
-    if (slope == kVerticalModuleSlope ||
-        edm::isNotFinite(slope))  // Designated for tilted module when the slope is infinity (module lying along y-axis)
-    {
-      xn = xa;  // New x point is simply where the anchor is
-      yn = yo;  // No shift in y
+    // Compute the new strip hit position (handle slope = infinity and slope = 0 cases)
+    float xn, yn;
+    if (slope == kVerticalModuleSlope || edm::isNotFinite(slope)) {
+      xn = xa;
+      yn = yo;
     } else if (slope == 0) {
-      xn = xo;  // New x point is simply where the anchor is
-      yn = ya;  // No shift in y
+      xn = xo;
+      yn = ya;
     } else {
-      xn = (slope * xa + (1.f / slope) * xo - ya + yo) / (slope + (1.f / slope));  // new xn
-      yn = (xn - xa) * slope + ya;                                                 // new yn
+      xn = (slope * xa + (1.f / slope) * xo - ya + yo) / (slope + (1.f / slope));
+      yn = (xn - xa) * slope + ya;
     }
 
-    // Computing new Z position
-    absdzprime = alpaka::math::abs(
-        acc,
-        moduleSeparation / alpaka::math::sin(acc, angleA + angleB) *
-            alpaka::math::cos(
-                acc,
-                angleA));  // module separation sign is for shifting in radial direction for z-axis direction take care of the sign later
+    float absdzprime = alpaka::math::abs(acc, moduleSeparation * cosA / sinApB);
 
-    // Depending on which one as closer to the interactin point compute the new z wrt to the pixel properly
-    if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) {
+    float abszn;
+    if (mod.moduleLayerType == Pixel) {
       abszn = alpaka::math::abs(acc, zp) + absdzprime;
     } else {
       abszn = alpaka::math::abs(acc, zp) - absdzprime;
     }
 
-    zn = abszn * ((zp > 0) ? 1 : -1);  // Apply the sign of the zn
+    float zn = abszn * ((zp > 0) ? 1 : -1);
 
     shiftedCoords[0] = xn;
     shiftedCoords[1] = yn;
@@ -364,127 +347,114 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
   }
 
   template <alpaka::concepts::Acc TAcc>
-  ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc,
-                                                     ModulesConst modules,
-                                                     uint16_t lowerModuleIndex,
-                                                     uint16_t upperModuleIndex,
-                                                     unsigned int lowerHitIndex,
-                                                     unsigned int upperHitIndex,
-                                                     float& dz,
-                                                     float& dPhi,
-                                                     float& dPhiChange,
-                                                     float& shiftedX,
-                                                     float& shiftedY,
-                                                     float& shiftedZ,
-                                                     float& noShiftedDphi,
-                                                     float& noShiftedDphiChange,
-                                                     float xLower,
-                                                     float yLower,
-                                                     float zLower,
-                                                     float rtLower,
-                                                     float xUpper,
-                                                     float yUpper,
-                                                     float zUpper,
-                                                     float rtUpper,
-                                                     const float ptCut) {
+  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc,
+                                                                      ModuleMDData const& mod,
+                                                                      float& dz,
+                                                                      float& dPhi,
+                                                                      float& dPhiChange,
+                                                                      float& shiftedX,
+                                                                      float& shiftedY,
+                                                                      float& shiftedZ,
+                                                                      float& noShiftedDphi,
+                                                                      float& noShiftedDphiChange,
+                                                                      float xLower,
+                                                                      float yLower,
+                                                                      float zLower,
+                                                                      float rtLower,
+                                                                      float xUpper,
+                                                                      float yUpper,
+                                                                      float zUpper,
+                                                                      float rtUpper,
+                                                                      const float ptCut) {
     dz = zLower - zUpper;
-    const float dzCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f;
+    const float dzCut = mod.moduleType == PS ? 2.f : 10.f;
     const float sign = ((dz > 0) - (dz < 0)) * ((zLower > 0) - (zLower < 0));
     const float invertedcrossercut = (alpaka::math::abs(acc, dz) > 2) * sign;
 
-    if ((alpaka::math::abs(acc, dz) >= dzCut) || (invertedcrossercut > 0))
+    if ((alpaka::math::abs(acc, dz) >= dzCut) || (invertedcrossercut > 0)) {
       return false;
+    }
 
-    float miniCut = 0;
-
-    miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel
-                  ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex, ptCut)
-                  : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex, ptCut);
+    float miniCut = mod.moduleLayerType == Pixel ? dPhiThreshold(acc, rtLower, mod, ptCut)
+                                                 : dPhiThreshold(acc, rtUpper, mod, ptCut);
 
-    // Cut #2: dphi difference
-    // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085
-    float xn = 0.f, yn = 0.f;
+    float x1, y1, x2, y2, r1sq, r2sq;
     float shiftedRt2 = 0.f;
-    if (modules.sides()[lowerModuleIndex] != Center)  // If barrel and not center it is tilted
-    {
-      // Shift the hits and calculate new xn, yn position
+
+    if (mod.isTilted) {
       float shiftedCoords[3];
-      shiftStripHits(acc,
-                     modules,
-                     lowerModuleIndex,
-                     upperModuleIndex,
-                     lowerHitIndex,
-                     upperHitIndex,
-                     shiftedCoords,
-                     xLower,
-                     yLower,
-                     zLower,
-                     rtLower,
-                     xUpper,
-                     yUpper,
-                     zUpper,
-                     rtUpper);
-      xn = shiftedCoords[0];
-      yn = shiftedCoords[1];
-
-      // Lower or the upper hit needs to be modified depending on which one was actually shifted
-      if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) {
+      shiftStripHits(acc, mod, shiftedCoords, xLower, yLower, zLower, rtLower, xUpper, yUpper, zUpper, rtUpper);
+      float xn = shiftedCoords[0];
+      float yn = shiftedCoords[1];
+      shiftedRt2 = xn * xn + yn * yn;
+
+      if (mod.moduleLayerType == Pixel) {
         shiftedX = xn;
         shiftedY = yn;
         shiftedZ = zUpper;
-        shiftedRt2 = xn * xn + yn * yn;
-
-        dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, shiftedX, shiftedY);  //function from Hit.cc
-        noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper);
+        x1 = xLower;
+        y1 = yLower;
+        x2 = xn;
+        y2 = yn;
+        r1sq = rtLower * rtLower;
+        r2sq = shiftedRt2;
       } else {
         shiftedX = xn;
         shiftedY = yn;
         shiftedZ = zLower;
-        shiftedRt2 = xn * xn + yn * yn;
-        dPhi = cms::alpakatools::deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper);
-        noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper);
+        x1 = xn;
+        y1 = yn;
+        x2 = xUpper;
+        y2 = yUpper;
+        r1sq = shiftedRt2;
+        r2sq = rtUpper * rtUpper;
       }
     } else {
       shiftedX = 0.f;
       shiftedY = 0.f;
       shiftedZ = 0.f;
-      shiftedRt2 = 0.f;
-      dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper);
-      noShiftedDphi = dPhi;
+      x1 = xLower;
+      y1 = yLower;
+      x2 = xUpper;
+      y2 = yUpper;
+      r1sq = rtLower * rtLower;
+      r2sq = rtUpper * rtUpper;
     }
 
+    // Cross-product pre-checks: Pade [2,2] approximant overestimates tan^2(miniCut)
+    const float crossDP = x1 * y2 - x2 * y1;
+    const float dotDP = x1 * x2 + y1 * y2;
+    const float miniCutSq = miniCut * miniCut;
+    const float tanMiniCutSq = miniCutSq / (1.f - (2.f / 3.f) * miniCutSq);
+    if (dotDP <= 0.f || crossDP * crossDP >= tanMiniCutSq * dotDP * dotDP)
+      return false;
+
+    const float rInnerSq = alpaka::math::min(acc, r1sq, r2sq);
+    const float dotDC = dotDP - rInnerSq;
+    if (dotDC <= 0.f || crossDP * crossDP >= tanMiniCutSq * dotDC * dotDC)
+      return false;
+
+    // Cut #2: dphi difference
+    // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085
+    dPhi = alpaka::math::atan2(acc, crossDP, dotDP);
+    noShiftedDphi = mod.isTilted ? cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper) : dPhi;
+
     if (alpaka::math::abs(acc, dPhi) >= miniCut)
       return false;
 
     // Cut #3: The dphi change going from lower Hit to upper Hit
     // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3076
-    if (modules.sides()[lowerModuleIndex] != Center) {
-      // When it is tilted, use the new shifted positions
-      if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) {
-        // dPhi Change should be calculated so that the upper hit has higher rt.
-        // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this.
-        // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering)
-        // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?)
-        // setdeltaPhiChange(lowerHit.rt() < upperHitMod.rt() ? lowerHit.deltaPhiChange(upperHitMod) : upperHitMod.deltaPhiChange(lowerHit));
-
-        dPhiChange = (rtLower * rtLower < shiftedRt2) ? deltaPhiChange(acc, xLower, yLower, shiftedX, shiftedY)
-                                                      : deltaPhiChange(acc, shiftedX, shiftedY, xLower, yLower);
-        noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper)
-                                                : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower);
-      } else {
-        // dPhi Change should be calculated so that the upper hit has higher rt.
-        // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this.
-        // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering)
-        // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?)
-
-        dPhiChange = (shiftedRt2 < rtUpper * rtUpper) ? deltaPhiChange(acc, shiftedX, shiftedY, xUpper, yUpper)
-                                                      : deltaPhiChange(acc, xUpper, yUpper, shiftedX, shiftedY);
-        noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper)
-                                                : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower);
-      }
+    // dPhiChange should be calculated so that the upper hit has higher rt.
+    // The strip hit shifting should guarantee rt ordering, but we check explicitly for safety.
+    if (r1sq < r2sq) {
+      dPhiChange = alpaka::math::atan2(acc, crossDP, dotDP - r1sq);
+    } else {
+      dPhiChange = alpaka::math::atan2(acc, -crossDP, dotDP - r2sq);
+    }
+    if (mod.isTilted) {
+      noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper)
+                                              : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower);
     } else {
-      // When it is flat lying module, whichever is the lowerSide will always have rt lower
-      dPhiChange = deltaPhiChange(acc, xLower, yLower, xUpper, yUpper);
       noShiftedDphiChange = dPhiChange;
     }
 
@@ -492,154 +462,168 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
   }
 
   template <alpaka::concepts::Acc TAcc>
-  ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc,
-                                                     ModulesConst modules,
-                                                     uint16_t lowerModuleIndex,
-                                                     uint16_t upperModuleIndex,
-                                                     unsigned int lowerHitIndex,
-                                                     unsigned int upperHitIndex,
-                                                     float& drt,
-                                                     float& dPhi,
-                                                     float& dPhiChange,
-                                                     float& shiftedX,
-                                                     float& shiftedY,
-                                                     float& shiftedZ,
-                                                     float& noShiftedDphi,
-                                                     float& noShiftedDphichange,
-                                                     float xLower,
-                                                     float yLower,
-                                                     float zLower,
-                                                     float rtLower,
-                                                     float xUpper,
-                                                     float yUpper,
-                                                     float zUpper,
-                                                     float rtUpper,
-                                                     const float ptCut) {
-    // There are series of cuts that applies to mini-doublet in a "endcap" region
-    // Cut #1 : dz cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap)
+  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc,
+                                                                      ModuleMDData const& mod,
+                                                                      float& drt,
+                                                                      float& dPhi,
+                                                                      float& dPhiChange,
+                                                                      float& shiftedX,
+                                                                      float& shiftedY,
+                                                                      float& shiftedZ,
+                                                                      float& noShiftedDphi,
+                                                                      float& noShiftedDphichange,
+                                                                      float xLower,
+                                                                      float yLower,
+                                                                      float zLower,
+                                                                      float rtLower,
+                                                                      float xUpper,
+                                                                      float yUpper,
+                                                                      float zUpper,
+                                                                      float rtUpper,
+                                                                      const float ptCut) {
+    // Cut #1: dz cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap)
     // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3093
     // For PS module in case when it is tilted a different dz (after the strip hit shift) is calculated later.
-
     float dz = zLower - zUpper;  // Not const since later it might change depending on the type of module
 
     const float dzCut = 1.f;
 
-    if (alpaka::math::abs(acc, dz) >= dzCut)
+    if (alpaka::math::abs(acc, dz) >= dzCut) {
       return false;
-    // Cut #2 : drt cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap)
+    }
+    // Cut #2: drt cut. The drt difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap)
     // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3100
-    const float drtCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f;
+    const float drtCut = mod.moduleType == PS ? 2.f : 10.f;
     drt = rtLower - rtUpper;
-    if (alpaka::math::abs(acc, drt) >= drtCut)
+    if (alpaka::math::abs(acc, drt) >= drtCut) {
       return false;
-    // The new scheme shifts strip hits to be "aligned" along the line of sight from interaction point to the pixel hit (if it is PS modules)
+    }
     float xn = 0, yn = 0, zn = 0;
 
     float shiftedCoords[3];
-    shiftStripHits(acc,
-                   modules,
-                   lowerModuleIndex,
-                   upperModuleIndex,
-                   lowerHitIndex,
-                   upperHitIndex,
-                   shiftedCoords,
-                   xLower,
-                   yLower,
-                   zLower,
-                   rtLower,
-                   xUpper,
-                   yUpper,
-                   zUpper,
-                   rtUpper);
+    shiftStripHits(acc, mod, shiftedCoords, xLower, yLower, zLower, rtLower, xUpper, yUpper, zUpper, rtUpper);
 
     xn = shiftedCoords[0];
     yn = shiftedCoords[1];
     zn = shiftedCoords[2];
 
-    if (modules.moduleType()[lowerModuleIndex] == PS) {
-      // Appropriate lower or upper hit is modified after checking which one was actually shifted
-      if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) {
+    float dpX1, dpY1, dpX2, dpY2;
+    if (mod.moduleType == PS) {
+      if (mod.moduleLayerType == Pixel) {
         shiftedX = xn;
         shiftedY = yn;
         shiftedZ = zUpper;
-        dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, shiftedX, shiftedY);
-        noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper);
+        dpX1 = xLower;
+        dpY1 = yLower;
+        dpX2 = xn;
+        dpY2 = yn;
       } else {
         shiftedX = xn;
         shiftedY = yn;
         shiftedZ = zLower;
-        dPhi = cms::alpakatools::deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper);
-        noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper);
+        dpX1 = xn;
+        dpY1 = yn;
+        dpX2 = xUpper;
+        dpY2 = yUpper;
       }
     } else {
       shiftedX = xn;
       shiftedY = yn;
       shiftedZ = zUpper;
-      dPhi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xn, yn);
-      noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper);
+      dpX1 = xLower;
+      dpY1 = yLower;
+      dpX2 = xn;
+      dpY2 = yn;
+    }
+
+    const float crossDP = dpX1 * dpY2 - dpX2 * dpY1;
+    const float dotDP = dpX1 * dpX2 + dpY1 * dpY2;
+
+    // Algebraic dPhiChange pre-check using only arithmetic (looser than actual cut):
+    // - Underestimates |dPhi| via Taylor lower bound: atan(z) >= z - z^3/3 for z in [0,1]
+    // - Overestimates asin(x) via cubic upper bound: asin(x) <= x + 0.2*x^3 for x in [0,0.5]
+    // - Overestimates sqrt(A^2+B^2) via alpha-max-beta-min: <= max(A,B) + 0.415*min(A,B)
+    //   where 0.415 > sqrt(2)-1 = 0.4142.
+    if (dotDP > 0.f) {
+      float absCrossDP = alpaka::math::abs(acc, crossDP);
+
+      if (absCrossDP >= dotDP)
+        return false;
+
+      float z = absCrossDP / dotDP;
+      float approxDPhiLower = z * (1.f - 0.33333333f * z * z);
+
+      float rt = mod.moduleLayerType == Pixel ? rtLower : rtUpper;
+      float x = alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax);
+      float fast_miniSlope = x * (1.f + 0.2f * x * x);
+
+      float miniLum = z / alpaka::math::abs(acc, dz) * kDeltaZLum;
+      float maxAB = alpaka::math::max(acc, mod.sqrtTermFlat, miniLum);
+      float minAB = alpaka::math::min(acc, mod.sqrtTermFlat, miniLum);
+      float fast_miniCutEC = fast_miniSlope + maxAB + 0.415f * minAB;
+
+      if (approxDPhiLower >= fast_miniCutEC)
+        return false;
+    } else {
+      return false;
     }
 
+    // Cut #3: dphi
+    dPhi = alpaka::math::atan2(acc, crossDP, dotDP);
+
     // dz needs to change if it is a PS module where the strip hits are shifted in order to properly account for the case when a tilted module falls under "endcap logic"
     // if it was an endcap it will have zero effect
-    if (modules.moduleType()[lowerModuleIndex] == PS) {
-      dz = modules.moduleLayerType()[lowerModuleIndex] == Pixel ? zLower - zn : zUpper - zn;
+    if (mod.moduleType == PS) {
+      dz = mod.moduleLayerType == Pixel ? zLower - zn : zUpper - zn;
     }
 
-    float miniCut = 0;
-    miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel
-                  ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex, ptCut, dPhi, dz)
-                  : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex, ptCut, dPhi, dz);
+    float miniCut = mod.moduleLayerType == Pixel ? dPhiThreshold(acc, rtLower, mod, ptCut, dPhi, dz)
+                                                 : dPhiThreshold(acc, rtUpper, mod, ptCut, dPhi, dz);
 
-    if (alpaka::math::abs(acc, dPhi) >= miniCut)
+    if (alpaka::math::abs(acc, dPhi) >= miniCut) {
       return false;
+    }
 
-    // Cut #4: Another cut on the dphi after some modification
+    // Cut #4: dPhiChange
     // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3119-L3124
 
     float dzFrac = alpaka::math::abs(acc, dz) / alpaka::math::abs(acc, zLower);
     dPhiChange = dPhi / dzFrac * (1.f + dzFrac);
+    noShiftedDphi = cms::alpakatools::deltaPhi(acc, xLower, yLower, xUpper, yUpper);
     noShiftedDphichange = noShiftedDphi / dzFrac * (1.f + dzFrac);
 
     return alpaka::math::abs(acc, dPhiChange) < miniCut;
   }
 
   template <alpaka::concepts::Acc TAcc>
-  ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc,
-                                               ModulesConst modules,
-                                               uint16_t lowerModuleIndex,
-                                               uint16_t upperModuleIndex,
-                                               unsigned int lowerHitIndex,
-                                               unsigned int upperHitIndex,
-                                               float& dz,
-                                               float& dPhi,
-                                               float& dPhiChange,
-                                               float& shiftedX,
-                                               float& shiftedY,
-                                               float& shiftedZ,
-                                               float& noShiftedDphi,
-                                               float& noShiftedDphiChange,
-                                               float xLower,
-                                               float yLower,
-                                               float zLower,
-                                               float rtLower,
-                                               float xUpper,
-                                               float yUpper,
-                                               float zUpper,
-                                               float rtUpper,
-                                               const float ptCut,
-                                               uint16_t clustSizeLower,
-                                               uint16_t clustSizeUpper,
-                                               const uint16_t clustSizeCut) {
+  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runMiniDoubletDefaultAlgo(TAcc const& acc,
+                                                                ModuleMDData const& mod,
+                                                                float& dz,
+                                                                float& dPhi,
+                                                                float& dPhiChange,
+                                                                float& shiftedX,
+                                                                float& shiftedY,
+                                                                float& shiftedZ,
+                                                                float& noShiftedDphi,
+                                                                float& noShiftedDphiChange,
+                                                                float xLower,
+                                                                float yLower,
+                                                                float zLower,
+                                                                float rtLower,
+                                                                float xUpper,
+                                                                float yUpper,
+                                                                float zUpper,
+                                                                float rtUpper,
+                                                                const float ptCut,
+                                                                uint16_t clustSizeLower,
+                                                                uint16_t clustSizeUpper,
+                                                                const uint16_t clustSizeCut) {
     if (clustSizeLower > clustSizeCut or clustSizeUpper > clustSizeCut) {
       return false;
     }
-    if (modules.subdets()[lowerModuleIndex] == Barrel) {
+    if (mod.subdet == Barrel) {
       return runMiniDoubletDefaultAlgoBarrel(acc,
-                                             modules,
-                                             lowerModuleIndex,
-                                             upperModuleIndex,
-                                             lowerHitIndex,
-                                             upperHitIndex,
+                                             mod,
                                              dz,
                                              dPhi,
                                              dPhiChange,
@@ -659,11 +643,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                              ptCut);
     } else {
       return runMiniDoubletDefaultAlgoEndcap(acc,
-                                             modules,
-                                             lowerModuleIndex,
-                                             upperModuleIndex,
-                                             lowerHitIndex,
-                                             upperHitIndex,
+                                             mod,
                                              dz,
                                              dPhi,
                                              dPhiChange,
@@ -696,7 +676,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                   const float ptCut,
                                   const uint16_t clustSizeCut) const {
       for (uint16_t lowerModuleIndex : cms::alpakatools::uniform_elements_y(acc, modules.nLowerModules())) {
-        uint16_t upperModuleIndex = modules.partnerModuleIndices()[lowerModuleIndex];
         int nLowerHits = hitsRanges.hitRangesnLower()[lowerModuleIndex];
         int nUpperHits = hitsRanges.hitRangesnUpper()[lowerModuleIndex];
         if (hitsRanges.hitRangesLower()[lowerModuleIndex] == -1)
@@ -705,6 +684,42 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
         unsigned int loHitArrayIndex = hitsRanges.hitRangesLower()[lowerModuleIndex];
         int limit = nUpperHits * nLowerHits;
 
+        // Hoist module-constant data once per module to avoid redundant SoA loads per hit pair.
+        ModuleMDData mod;
+        mod.lowerModuleIndex = lowerModuleIndex;
+        mod.subdet = modules.subdets()[lowerModuleIndex];
+        mod.side = modules.sides()[lowerModuleIndex];
+        mod.moduleType = modules.moduleType()[lowerModuleIndex];
+        mod.moduleLayerType = modules.moduleLayerType()[lowerModuleIndex];
+        mod.iL = modules.layers()[lowerModuleIndex] - 1;
+        mod.isTilted = (mod.subdet == Barrel && mod.side != Center);
+        mod.isEndcapTwoS = (mod.subdet == Endcap && mod.moduleType == TwoS);
+        mod.isGloballyInner = modules.isGloballyInner()[lowerModuleIndex];
+        mod.slope = modules.dxdys()[lowerModuleIndex];
+        mod.drdz = modules.drdzs()[lowerModuleIndex];
+        mod.moduleSep = moduleGapSize(modules, lowerModuleIndex);
+
+        // Pre-compute dPhiThreshold module-constant parts
+        float rLayNominal = (mod.subdet == Barrel) ? kMiniRminMeanBarrel[mod.iL] : kMiniRminMeanEndcap[mod.iL];
+        mod.miniPVoff = 0.1f / rLayNominal;
+        mod.miniMuls = (mod.subdet == Barrel) ? kMiniMulsPtScaleBarrel[mod.iL] * 3.f / ptCut
+                                              : kMiniMulsPtScaleEndcap[mod.iL] * 3.f / ptCut;
+        mod.sqrtTermBase = mod.miniMuls * mod.miniMuls + mod.miniPVoff * mod.miniPVoff;
+        mod.sqrtTermFlat = alpaka::math::sqrt(acc, mod.sqrtTermBase);
+
+        if (mod.isTilted) {
+          float drdzThresh;
+          if (mod.moduleType == PS and mod.moduleLayerType == Strip) {
+            drdzThresh = modules.drdzs()[lowerModuleIndex];
+          } else {
+            drdzThresh = modules.drdzs()[modules.partnerModuleIndices()[lowerModuleIndex]];
+          }
+          mod.miniTilt2 = 0.25f * (kPixelPSZpitch * kPixelPSZpitch) * (drdzThresh * drdzThresh) /
+                          (1.f + drdzThresh * drdzThresh) / mod.moduleSep;
+        } else {
+          mod.miniTilt2 = 0.f;
+        }
+
         for (int hitIndex : cms::alpakatools::uniform_elements_x(acc, limit)) {
           int lowerHitIndex = hitIndex / nUpperHits;
           int upperHitIndex = hitIndex % nUpperHits;
@@ -727,11 +742,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
           float dz, dphi, dphichange, shiftedX, shiftedY, shiftedZ, noShiftedDphi, noShiftedDphiChange;
           bool success = runMiniDoubletDefaultAlgo(acc,
-                                                   modules,
-                                                   lowerModuleIndex,
-                                                   upperModuleIndex,
-                                                   lowerHitArrayIndex,
-                                                   upperHitArrayIndex,
+                                                   mod,
                                                    dz,
                                                    dphi,
                                                    dphichange,
@@ -769,10 +780,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                             mds,
                             hitsBase,
                             hitsExtended,
-                            modules,
+                            mod,
                             lowerHitArrayIndex,
                             upperHitArrayIndex,
-                            lowerModuleIndex,
                             dz,
                             dphi,
                             dphichange,
diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h
index d58917c81dee3..8e12327f5bcb6 100644
--- a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h
+++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h
@@ -43,7 +43,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE float sigmoid_activation(TAcc const& acc, const float x) {
-    return alpaka::math::exp(acc, x) / (alpaka::math::exp(acc, x) + 1.f);
+    const float e = alpaka::math::exp(acc, x);
+    return e / (e + 1.f);
   }
 
   template <int IN_FEATURES, int OUT_FEATURES>
diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h
index e81b3bff105a8..d137889a98fbf 100644
--- a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h
+++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h
@@ -493,15 +493,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     float pixelRadiusTemp, tripletRadius, rPhiChiSquaredTemp, rzChiSquaredTemp, rPhiChiSquaredInwardsTemp, centerXTemp,
         centerYTemp, pixelRadiusErrorTemp;
 
+    PixelSeedData pixelData =
+        loadPixelSeedData(pixelSeeds, pixelSegments, mds, segments, pixelSegmentIndex, pixelSegmentArrayIndex);
+
     if (not runPixelTripletDefaultAlgo<dnn::pt3dnn::pT5WP>(acc,
                                                            modules,
-                                                           ranges,
                                                            mds,
                                                            segments,
-                                                           pixelSeeds,
-                                                           pixelSegments,
+                                                           pixelData,
                                                            triplets,
-                                                           pixelSegmentIndex,
                                                            t5InnerT3Index,
                                                            pixelRadiusTemp,
                                                            tripletRadius,
@@ -520,9 +520,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     unsigned int secondSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][1];
     unsigned int thirdSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][0];
     unsigned int fourthSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][1];
-
-    unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0];
-    unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1];
     unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0];
     unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0];
     unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1];
@@ -538,10 +535,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     uint16_t lowerModuleIndices[Params_T5::kLayers] = {
         lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5};
 
-    float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]};
-    float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]};
-    float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]};
-    float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]};
+    float rtPix[Params_pLS::kLayers] = {pixelData.rt_InLo, pixelData.rt_InUp};
+    float xPix[Params_pLS::kLayers] = {pixelData.x_InLo, pixelData.x_InUp};
+    float yPix[Params_pLS::kLayers] = {pixelData.y_InLo, pixelData.y_InUp};
+    float zPix[Params_pLS::kLayers] = {pixelData.z_InLo, pixelData.z_InUp};
     float zs[Params_T5::kLayers] = {mds.anchorZ()[firstMDIndex],
                                     mds.anchorZ()[secondMDIndex],
                                     mds.anchorZ()[thirdMDIndex],
@@ -553,16 +550,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                      mds.anchorRt()[fourthMDIndex],
                                      mds.anchorRt()[fifthMDIndex]};
 
-    float pixelSegmentPt = pixelSeeds.ptIn()[pixelSegmentArrayIndex];
-    float pixelSegmentPx = pixelSeeds.px()[pixelSegmentArrayIndex];
-    float pixelSegmentPy = pixelSeeds.py()[pixelSegmentArrayIndex];
-    float pixelSegmentPz = pixelSeeds.pz()[pixelSegmentArrayIndex];
-    int pixelSegmentCharge = pixelSeeds.charge()[pixelSegmentArrayIndex];
-
     rzChiSquared = 0;
 
     //get the appropriate centers
-    pixelRadius = pixelSegments.circleRadius()[pixelSegmentArrayIndex];
+    pixelRadius = pixelData.circleRadius;
 
     rzChiSquared = computePT5RZChiSquared(acc,
                                           modules,
@@ -573,11 +564,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                           zPix,
                                           rts,
                                           zs,
-                                          pixelSegmentPt,
-                                          pixelSegmentPx,
-                                          pixelSegmentPy,
-                                          pixelSegmentPz,
-                                          pixelSegmentCharge);
+                                          pixelData.ptIn,
+                                          pixelData.px,
+                                          pixelData.py,
+                                          pixelData.pz,
+                                          pixelData.charge);
 
     if (pixelRadius < 5.0f * kR1GeVf) {  //only apply r-z chi2 cuts for <5GeV tracks
       if (not passPT5RZChiSquaredCuts(modules,
@@ -603,8 +594,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                     mds.anchorY()[fifthMDIndex]};
 
     //get the appropriate centers
-    centerX = pixelSegments.circleCenterX()[pixelSegmentArrayIndex];
-    centerY = pixelSegments.circleCenterY()[pixelSegmentArrayIndex];
+    centerX = pixelData.circleCenterX;
+    centerY = pixelData.circleCenterY;
 
     float T5CenterX = quintuplets.regressionCenterX()[quintupletIndex];
     float T5CenterY = quintuplets.regressionCenterY()[quintupletIndex];
diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h
index 6dfa2358d568f..6f42b0c96f7b1 100644
--- a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h
+++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h
@@ -15,37 +15,72 @@
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
+  // Pre-loaded pixel segment data, invariant across triplets for a given pixel segment.
+  // Hoisted to avoid redundant SoA loads through the pT3 function chain.
+  struct PixelSeedData {
+    float ptIn, px, py, pz, ptErr, etaErr;
+    float eta, phi;
+    int charge;
+    float circleCenterX, circleCenterY, circleRadius;
+    float x_InLo, y_InLo, z_InLo, rt_InLo;  // pixel inner MD (pLSMD0)
+    float x_InUp, y_InUp, z_InUp, rt_InUp;  // pixel outer MD (pLSMD1)
+    float alpha_InLo;                       // pixel segment dPhiChange
+  };
+
+  ALPAKA_FN_ACC ALPAKA_FN_INLINE PixelSeedData loadPixelSeedData(PixelSeedsConst pixelSeeds,
+                                                                 PixelSegmentsConst pixelSegments,
+                                                                 MiniDoubletsConst mds,
+                                                                 SegmentsConst segments,
+                                                                 unsigned int pixelSegmentIndex,
+                                                                 unsigned int pixelSegmentArrayIndex) {
+    unsigned int pixelMD0 = segments.mdIndices()[pixelSegmentIndex][0];
+    unsigned int pixelMD1 = segments.mdIndices()[pixelSegmentIndex][1];
+    PixelSeedData pixelData;
+    pixelData.ptIn = pixelSeeds.ptIn()[pixelSegmentArrayIndex];
+    pixelData.px = pixelSeeds.px()[pixelSegmentArrayIndex];
+    pixelData.py = pixelSeeds.py()[pixelSegmentArrayIndex];
+    pixelData.pz = pixelSeeds.pz()[pixelSegmentArrayIndex];
+    pixelData.ptErr = pixelSeeds.ptErr()[pixelSegmentArrayIndex];
+    pixelData.etaErr = pixelSeeds.etaErr()[pixelSegmentArrayIndex];
+    pixelData.eta = pixelSeeds.eta()[pixelSegmentArrayIndex];
+    pixelData.phi = pixelSeeds.phi()[pixelSegmentArrayIndex];
+    pixelData.charge = pixelSeeds.charge()[pixelSegmentArrayIndex];
+    pixelData.circleCenterX = pixelSegments.circleCenterX()[pixelSegmentArrayIndex];
+    pixelData.circleCenterY = pixelSegments.circleCenterY()[pixelSegmentArrayIndex];
+    pixelData.circleRadius = pixelSegments.circleRadius()[pixelSegmentArrayIndex];
+    pixelData.x_InLo = mds.anchorX()[pixelMD0];
+    pixelData.y_InLo = mds.anchorY()[pixelMD0];
+    pixelData.z_InLo = mds.anchorZ()[pixelMD0];
+    pixelData.rt_InLo = mds.anchorRt()[pixelMD0];
+    pixelData.x_InUp = mds.anchorX()[pixelMD1];
+    pixelData.y_InUp = mds.anchorY()[pixelMD1];
+    pixelData.z_InUp = mds.anchorZ()[pixelMD1];
+    pixelData.rt_InUp = mds.anchorRt()[pixelMD1];
+    pixelData.alpha_InLo = __H2F(segments.dPhiChanges()[pixelSegmentIndex]);
+    return pixelData;
+  }
+
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc,
                                                                 ModulesConst modules,
-                                                                ObjectRangesConst ranges,
                                                                 MiniDoubletsConst mds,
                                                                 SegmentsConst segments,
-                                                                PixelSeedsConst pixelSeeds,
-                                                                uint16_t pixelModuleIndex,
+                                                                const PixelSeedData& pixelData,
                                                                 uint16_t segmentInnerModuleIndex,
                                                                 uint16_t segmentOuterModuleIndex,
-                                                                unsigned int pLSIndex,
                                                                 unsigned int segmentIndex,
-                                                                unsigned int pLSMD0Index,
-                                                                unsigned int pLSMD1Index,
                                                                 unsigned int segmentMD0Index,
                                                                 unsigned int segmentMD1Index,
                                                                 const float ptCut);
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc,
                                                                 ModulesConst modules,
-                                                                ObjectRangesConst ranges,
                                                                 MiniDoubletsConst mds,
                                                                 SegmentsConst segments,
-                                                                PixelSeedsConst pixelSeeds,
-                                                                uint16_t pixelModuleIndex,
+                                                                const PixelSeedData& pixelData,
                                                                 uint16_t segmentInnerModuleIndex,
                                                                 uint16_t segmentOuterModuleIndex,
-                                                                unsigned int pLSIndex,
                                                                 unsigned int segmentIndex,
-                                                                unsigned int pLSMD0Index,
-                                                                unsigned int pLSMD1Index,
                                                                 unsigned int segmentMD0Index,
                                                                 unsigned int segmentMD1Index,
                                                                 const float ptCut);
@@ -124,59 +159,43 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTrackletDefaultAlgopT3(TAcc const& acc,
                                                                      ModulesConst modules,
-                                                                     ObjectRangesConst ranges,
                                                                      MiniDoubletsConst mds,
                                                                      SegmentsConst segments,
-                                                                     PixelSeedsConst pixelSeeds,
-                                                                     uint16_t pixelLowerModuleIndex,
+                                                                     const PixelSeedData& pixelData,
                                                                      uint16_t outerInnerLowerModuleIndex,
                                                                      uint16_t outerOuterLowerModuleIndex,
-                                                                     unsigned int innerSegmentIndex,
                                                                      unsigned int outerSegmentIndex,
                                                                      const float ptCut) {
     short outerInnerLowerModuleSubdet = modules.subdets()[outerInnerLowerModuleIndex];
     short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex];
 
-    unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0];
-    unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1];
-
-    unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][0];
-    unsigned int fourthMDIndex = segments.mdIndices()[outerSegmentIndex][1];
+    unsigned int segmentMD0Index = segments.mdIndices()[outerSegmentIndex][0];
+    unsigned int segmentMD1Index = segments.mdIndices()[outerSegmentIndex][1];
 
     if (outerInnerLowerModuleSubdet == Barrel and
         (outerOuterLowerModuleSubdet == Barrel or outerOuterLowerModuleSubdet == Endcap)) {
       return runTripletDefaultAlgoPPBB(acc,
                                        modules,
-                                       ranges,
                                        mds,
                                        segments,
-                                       pixelSeeds,
-                                       pixelLowerModuleIndex,
+                                       pixelData,
                                        outerInnerLowerModuleIndex,
                                        outerOuterLowerModuleIndex,
-                                       innerSegmentIndex,
                                        outerSegmentIndex,
-                                       firstMDIndex,
-                                       secondMDIndex,
-                                       thirdMDIndex,
-                                       fourthMDIndex,
+                                       segmentMD0Index,
+                                       segmentMD1Index,
                                        ptCut);
     } else if (outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) {
       return runTripletDefaultAlgoPPEE(acc,
                                        modules,
-                                       ranges,
                                        mds,
                                        segments,
-                                       pixelSeeds,
-                                       pixelLowerModuleIndex,
+                                       pixelData,
                                        outerInnerLowerModuleIndex,
                                        outerOuterLowerModuleIndex,
-                                       innerSegmentIndex,
                                        outerSegmentIndex,
-                                       firstMDIndex,
-                                       secondMDIndex,
-                                       thirdMDIndex,
-                                       fourthMDIndex,
+                                       segmentMD0Index,
+                                       segmentMD1Index,
                                        ptCut);
     }
     return false;
@@ -537,13 +556,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
   template <typename WP = dnn::pt3dnn::pT3WP, alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTripletDefaultAlgo(TAcc const& acc,
                                                                  ModulesConst modules,
-                                                                 ObjectRangesConst ranges,
                                                                  MiniDoubletsConst mds,
                                                                  SegmentsConst segments,
-                                                                 PixelSeedsConst pixelSeeds,
-                                                                 PixelSegmentsConst pixelSegments,
+                                                                 const PixelSeedData& pixelData,
                                                                  TripletsConst triplets,
-                                                                 unsigned int pixelSegmentIndex,
                                                                  unsigned int tripletIndex,
                                                                  float& pixelRadius,
                                                                  float& tripletRadius,
@@ -556,63 +572,47 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                                                  const float ptCut,
                                                                  bool runDNN = true,
                                                                  bool runChiSquaredCuts = true) {
-    //run pT4 compatibility between the pixel segment and inner segment, and between the pixel and outer segment of the triplet
-    uint16_t pixelModuleIndex = segments.innerLowerModuleIndices()[pixelSegmentIndex];
-
     uint16_t lowerModuleIndex = triplets.lowerModuleIndices()[tripletIndex][0];
     uint16_t middleModuleIndex = triplets.lowerModuleIndices()[tripletIndex][1];
     uint16_t upperModuleIndex = triplets.lowerModuleIndices()[tripletIndex][2];
 
-    {
-      // pixel segment vs inner segment of the triplet
-      if (not runPixelTrackletDefaultAlgopT3(acc,
-                                             modules,
-                                             ranges,
-                                             mds,
-                                             segments,
-                                             pixelSeeds,
-                                             pixelModuleIndex,
-                                             lowerModuleIndex,
-                                             middleModuleIndex,
-                                             pixelSegmentIndex,
-                                             triplets.segmentIndices()[tripletIndex][0],
-                                             ptCut))
-        return false;
-
-      //pixel segment vs outer segment of triplet
-      if (not runPixelTrackletDefaultAlgopT3(acc,
-                                             modules,
-                                             ranges,
-                                             mds,
-                                             segments,
-                                             pixelSeeds,
-                                             pixelModuleIndex,
-                                             middleModuleIndex,
-                                             upperModuleIndex,
-                                             pixelSegmentIndex,
-                                             triplets.segmentIndices()[tripletIndex][1],
-                                             ptCut))
-        return false;
-    }
+    // Cheap radius check first, before expensive tracklet compatibility checks.
+    pixelRadius = pixelData.ptIn * kR1GeVf;
+    pixelRadiusError = pixelData.ptErr * kR1GeVf;
+    tripletRadius = triplets.radius()[tripletIndex];
 
-    //pt matching between the pixel ptin and the triplet circle pt
-    unsigned int pixelSegmentArrayIndex = pixelSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex];
-    float pixelSegmentPt = pixelSeeds.ptIn()[pixelSegmentArrayIndex];
-    float pixelSegmentPtError = pixelSeeds.ptErr()[pixelSegmentArrayIndex];
-    float pixelSegmentPx = pixelSeeds.px()[pixelSegmentArrayIndex];
-    float pixelSegmentPy = pixelSeeds.py()[pixelSegmentArrayIndex];
-    float pixelSegmentPz = pixelSeeds.pz()[pixelSegmentArrayIndex];
-    int pixelSegmentCharge = pixelSeeds.charge()[pixelSegmentArrayIndex];
+    if (not passRadiusCriterion(acc,
+                                modules,
+                                pixelRadius,
+                                pixelRadiusError,
+                                tripletRadius,
+                                lowerModuleIndex,
+                                middleModuleIndex,
+                                upperModuleIndex))
+      return false;
 
-    float pixelG = pixelSegments.circleCenterX()[pixelSegmentArrayIndex];
-    float pixelF = pixelSegments.circleCenterY()[pixelSegmentArrayIndex];
-    float pixelRadiusPCA = pixelSegments.circleRadius()[pixelSegmentArrayIndex];
+    if (not runPixelTrackletDefaultAlgopT3(acc,
+                                           modules,
+                                           mds,
+                                           segments,
+                                           pixelData,
+                                           lowerModuleIndex,
+                                           middleModuleIndex,
+                                           triplets.segmentIndices()[tripletIndex][0],
+                                           ptCut))
+      return false;
 
-    unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0];
-    unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1];
+    if (not runPixelTrackletDefaultAlgopT3(acc,
+                                           modules,
+                                           mds,
+                                           segments,
+                                           pixelData,
+                                           middleModuleIndex,
+                                           upperModuleIndex,
+                                           triplets.segmentIndices()[tripletIndex][1],
+                                           ptCut))
+      return false;
 
-    pixelRadius = pixelSegmentPt * kR1GeVf;
-    pixelRadiusError = pixelSegmentPtError * kR1GeVf;
     unsigned int tripletInnerSegmentIndex = triplets.segmentIndices()[tripletIndex][0];
     unsigned int tripletOuterSegmentIndex = triplets.segmentIndices()[tripletIndex][1];
 
@@ -625,20 +625,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     float ys[Params_T3::kLayers] = {
         mds.anchorY()[firstMDIndex], mds.anchorY()[secondMDIndex], mds.anchorY()[thirdMDIndex]};
 
-    float g, f;
-    tripletRadius = triplets.radius()[tripletIndex];
-    g = triplets.centerX()[tripletIndex];
-    f = triplets.centerY()[tripletIndex];
-
-    if (not passRadiusCriterion(acc,
-                                modules,
-                                pixelRadius,
-                                pixelRadiusError,
-                                tripletRadius,
-                                lowerModuleIndex,
-                                middleModuleIndex,
-                                upperModuleIndex))
-      return false;
+    float g = triplets.centerX()[tripletIndex];
+    float f = triplets.centerY()[tripletIndex];
 
     uint16_t lowerModuleIndices[Params_T3::kLayers] = {lowerModuleIndex, middleModuleIndex, upperModuleIndex};
 
@@ -647,10 +635,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
           mds.anchorRt()[firstMDIndex], mds.anchorRt()[secondMDIndex], mds.anchorRt()[thirdMDIndex]};
       float zs[Params_T3::kLayers] = {
           mds.anchorZ()[firstMDIndex], mds.anchorZ()[secondMDIndex], mds.anchorZ()[thirdMDIndex]};
-      float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]};
-      float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]};
-      float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]};
-      float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]};
+      float rtPix[Params_pLS::kLayers] = {pixelData.rt_InLo, pixelData.rt_InUp};
+      float xPix[Params_pLS::kLayers] = {pixelData.x_InLo, pixelData.x_InUp};
+      float yPix[Params_pLS::kLayers] = {pixelData.y_InLo, pixelData.y_InUp};
+      float zPix[Params_pLS::kLayers] = {pixelData.z_InLo, pixelData.z_InUp};
 
       rzChiSquared = computePT3RZChiSquared(acc,
                                             modules,
@@ -663,18 +651,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                             xs,
                                             ys,
                                             zs,
-                                            pixelSegmentPt,
-                                            pixelSegmentPx,
-                                            pixelSegmentPy,
-                                            pixelSegmentPz,
-                                            pixelSegmentCharge);
-      if (runChiSquaredCuts && pixelSegmentPt < 5.0f) {
+                                            pixelData.ptIn,
+                                            pixelData.px,
+                                            pixelData.py,
+                                            pixelData.pz,
+                                            pixelData.charge);
+      if (runChiSquaredCuts && pixelData.ptIn < 5.0f) {
         if (!passPT3RZChiSquaredCuts(modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rzChiSquared))
           return false;
       }
 
-      rPhiChiSquared =
-          computePT3RPhiChiSquared(acc, modules, lowerModuleIndices, pixelG, pixelF, pixelRadiusPCA, xs, ys);
+      rPhiChiSquared = computePT3RPhiChiSquared(acc,
+                                                modules,
+                                                lowerModuleIndices,
+                                                pixelData.circleCenterX,
+                                                pixelData.circleCenterY,
+                                                pixelData.circleRadius,
+                                                xs,
+                                                ys);
 
       rPhiChiSquaredInwards = computePT3RPhiChiSquaredInwards(g, f, tripletRadius, xPix, yPix);
     }
@@ -691,8 +685,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                                   pixelRadius,
                                                   pixelRadiusError,
                                                   rzChiSquared,
-                                                  pixelSeeds.eta()[pixelSegmentArrayIndex],
-                                                  pixelSegmentPt,
+                                                  pixelData.eta,
+                                                  pixelData.ptIn,
                                                   module_type_3)) {
       return false;
     }
@@ -719,6 +713,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
       for (unsigned int i_pLS : cms::alpakatools::uniform_elements_z(acc, nPixelSegments)) {
         auto iLSModule_max = connectedPixelIndex[i_pLS] + connectedPixelSize[i_pLS];
 
+        if (pixelSegments.isDup()[i_pLS])
+          continue;
+        if (pixelSegments.partOfPT5()[i_pLS])
+          continue;
+
+        // Hoist pixel segment data: loaded once per pixel segment instead of
+        // redundantly through the runPixelTripletDefaultAlgo call chain.
+        uint16_t pixelModuleIndex = modules.nLowerModules();
+        unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS;
+
+        PixelSeedData pixelData = loadPixelSeedData(pixelSeeds, pixelSegments, mds, segments, pixelSegmentIndex, i_pLS);
+
         for (unsigned int iLSModule :
              cms::alpakatools::uniform_elements_y(acc, connectedPixelIndex[i_pLS], iLSModule_max)) {
           uint16_t tripletLowerModuleIndex =
@@ -736,18 +742,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
           if (modules.moduleType()[tripletLowerModuleIndex] == TwoS)
             continue;
 
-          uint16_t pixelModuleIndex = modules.nLowerModules();
           unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[tripletLowerModuleIndex];
           if (nOuterTriplets == 0)
             continue;
 
-          unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS;
-
-          if (pixelSegments.isDup()[i_pLS])
-            continue;
-          if (pixelSegments.partOfPT5()[i_pLS])
-            continue;  //don't make pT3s for those pixels that are part of pT5
-
           short layer2_adjustment;
           if (modules.layers()[tripletLowerModuleIndex] == 1) {
             layer2_adjustment = 1;
@@ -773,13 +771,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                 pixelRadiusError;
             bool success = runPixelTripletDefaultAlgo(acc,
                                                       modules,
-                                                      ranges,
                                                       mds,
                                                       segments,
-                                                      pixelSeeds,
-                                                      pixelSegments,
+                                                      pixelData,
                                                       triplets,
-                                                      pixelSegmentIndex,
                                                       outerTripletIndex,
                                                       pixelRadius,
                                                       tripletRadius,
@@ -798,9 +793,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
               float eta =
                   mds.anchorEta()[segments
                                       .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]];
-              float eta_pix = pixelSeeds.eta()[i_pLS];
-              float phi_pix = pixelSeeds.phi()[i_pLS];
-              float pt = pixelSeeds.ptIn()[i_pLS];
               float score = rPhiChiSquared + rPhiChiSquaredInwards;
               unsigned int totOccupancyPixelTriplets =
                   alpaka::atomicAdd(acc, &pixelTriplets.totOccupancyPixelTriplets(), 1u, alpaka::hierarchy::Threads{});
@@ -825,11 +817,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                         rPhiChiSquaredInwards,
                                         rzChiSquared,
                                         pixelTripletIndex,
-                                        pt,
+                                        pixelData.ptIn,
                                         eta,
                                         phi,
-                                        eta_pix,
-                                        phi_pix,
+                                        pixelData.eta,
+                                        pixelData.phi,
                                         pixelRadiusError,
                                         score);
                 triplets.partOfPT3()[outerTripletIndex] = true;
@@ -844,17 +836,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc,
                                                                 ModulesConst modules,
-                                                                ObjectRangesConst ranges,
                                                                 MiniDoubletsConst mds,
                                                                 SegmentsConst segments,
-                                                                PixelSeedsConst pixelSeeds,
-                                                                uint16_t pixelModuleIndex,
+                                                                const PixelSeedData& pixelData,
                                                                 uint16_t segmentInnerModuleIndex,
                                                                 uint16_t segmentOuterModuleIndex,
-                                                                unsigned int pLSIndex,
                                                                 unsigned int segmentIndex,
-                                                                unsigned int pLSMD0Index,
-                                                                unsigned int pLSMD1Index,
                                                                 unsigned int segmentMD0Index,
                                                                 unsigned int segmentMD1Index,
                                                                 const float ptCut) {
@@ -862,34 +849,33 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
     bool isPS_OutLo = (modules.moduleType()[segmentInnerModuleIndex] == PS);
 
-    float rt_InLo = mds.anchorRt()[pLSMD0Index];
-    float rt_InUp = mds.anchorRt()[pLSMD1Index];
+    const float rt_InLo = pixelData.rt_InLo;
+    const float rt_InUp = pixelData.rt_InUp;
     float rt_OutLo = mds.anchorRt()[segmentMD0Index];
     float rt_OutUp = mds.anchorRt()[segmentMD1Index];
 
-    float z_InUp = mds.anchorZ()[pLSMD1Index];
+    const float z_InUp = pixelData.z_InUp;
     float z_OutLo = mds.anchorZ()[segmentMD0Index];
 
-    float x_InLo = mds.anchorX()[pLSMD0Index];
-    float x_InUp = mds.anchorX()[pLSMD1Index];
+    const float x_InLo = pixelData.x_InLo;
+    const float x_InUp = pixelData.x_InUp;
     float x_OutLo = mds.anchorX()[segmentMD0Index];
     float x_OutUp = mds.anchorX()[segmentMD1Index];
 
-    float y_InLo = mds.anchorY()[pLSMD0Index];
-    float y_InUp = mds.anchorY()[pLSMD1Index];
+    const float y_InLo = pixelData.y_InLo;
+    const float y_InUp = pixelData.y_InUp;
     float y_OutLo = mds.anchorY()[segmentMD0Index];
     float y_OutUp = mds.anchorY()[segmentMD1Index];
 
     float rt_InOut = rt_InUp;
 
-    unsigned int pixelSegmentArrayIndex = pLSIndex - ranges.segmentModuleIndices()[pixelModuleIndex];
-    float ptIn = pixelSeeds.ptIn()[pixelSegmentArrayIndex];
+    float ptIn = pixelData.ptIn;
     float ptSLo = ptIn;
-    float px = pixelSeeds.px()[pixelSegmentArrayIndex];
-    float py = pixelSeeds.py()[pixelSegmentArrayIndex];
-    float pz = pixelSeeds.pz()[pixelSegmentArrayIndex];
-    float ptErr = pixelSeeds.ptErr()[pixelSegmentArrayIndex];
-    float etaErr = pixelSeeds.etaErr()[pixelSegmentArrayIndex];
+    float px = pixelData.px;
+    float py = pixelData.py;
+    float pz = pixelData.pz;
+    float ptErr = pixelData.ptErr;
+    float etaErr = pixelData.etaErr;
     ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo));
     ptSLo = alpaka::math::min(acc, 10.0f, ptSLo);
 
@@ -960,7 +946,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
     //lots of array accesses below this...
 
-    float alpha_InLo = __H2F(segments.dPhiChanges()[pLSIndex]);
+    float alpha_InLo = pixelData.alpha_InLo;
     float alpha_OutLo = __H2F(segments.dPhiChanges()[segmentIndex]);
 
     bool isEC_lastLayer =
@@ -1101,17 +1087,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc,
                                                                 ModulesConst modules,
-                                                                ObjectRangesConst ranges,
                                                                 MiniDoubletsConst mds,
                                                                 SegmentsConst segments,
-                                                                PixelSeedsConst pixelSeeds,
-                                                                uint16_t pixelModuleIndex,
+                                                                const PixelSeedData& pixelData,
                                                                 uint16_t segmentInnerModuleIndex,
                                                                 uint16_t segmentOuterModuleIndex,
-                                                                unsigned int pLSIndex,
                                                                 unsigned int segmentIndex,
-                                                                unsigned int pLSMD0Index,
-                                                                unsigned int pLSMD1Index,
                                                                 unsigned int segmentMD0Index,
                                                                 unsigned int segmentMD1Index,
                                                                 const float ptCut) {
@@ -1119,36 +1100,34 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
     bool isPS_OutLo = (modules.moduleType()[segmentInnerModuleIndex] == PS);
 
-    float z_InUp = mds.anchorZ()[pLSMD1Index];
+    const float z_InUp = pixelData.z_InUp;
     float z_OutLo = mds.anchorZ()[segmentMD0Index];
 
     if (z_InUp * z_OutLo <= 0)
       return false;
 
-    float rt_InLo = mds.anchorRt()[pLSMD0Index];
-    float rt_InUp = mds.anchorRt()[pLSMD1Index];
+    const float rt_InLo = pixelData.rt_InLo;
+    const float rt_InUp = pixelData.rt_InUp;
     float rt_OutLo = mds.anchorRt()[segmentMD0Index];
     float rt_OutUp = mds.anchorRt()[segmentMD1Index];
 
-    float x_InLo = mds.anchorX()[pLSMD0Index];
-    float x_InUp = mds.anchorX()[pLSMD1Index];
+    const float x_InLo = pixelData.x_InLo;
+    const float x_InUp = pixelData.x_InUp;
     float x_OutLo = mds.anchorX()[segmentMD0Index];
     float x_OutUp = mds.anchorX()[segmentMD1Index];
 
-    float y_InLo = mds.anchorY()[pLSMD0Index];
-    float y_InUp = mds.anchorY()[pLSMD1Index];
+    const float y_InLo = pixelData.y_InLo;
+    const float y_InUp = pixelData.y_InUp;
     float y_OutLo = mds.anchorY()[segmentMD0Index];
     float y_OutUp = mds.anchorY()[segmentMD1Index];
 
-    unsigned int pixelSegmentArrayIndex = pLSIndex - ranges.segmentModuleIndices()[pixelModuleIndex];
-
-    float ptIn = pixelSeeds.ptIn()[pixelSegmentArrayIndex];
+    float ptIn = pixelData.ptIn;
     float ptSLo = ptIn;
-    float px = pixelSeeds.px()[pixelSegmentArrayIndex];
-    float py = pixelSeeds.py()[pixelSegmentArrayIndex];
-    float pz = pixelSeeds.pz()[pixelSegmentArrayIndex];
-    float ptErr = pixelSeeds.ptErr()[pixelSegmentArrayIndex];
-    float etaErr = pixelSeeds.etaErr()[pixelSegmentArrayIndex];
+    float px = pixelData.px;
+    float py = pixelData.py;
+    float pz = pixelData.pz;
+    float ptErr = pixelData.ptErr;
+    float etaErr = pixelData.etaErr;
 
     ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo));
     ptSLo = alpaka::math::min(acc, 10.0f, ptSLo);
@@ -1224,7 +1203,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     if (alpaka::math::abs(acc, dPhi) > dPhiCut)
       return false;
 
-    float alpha_InLo = __H2F(segments.dPhiChanges()[pLSIndex]);
+    float alpha_InLo = pixelData.alpha_InLo;
     float alpha_OutLo = __H2F(segments.dPhiChanges()[segmentIndex]);
 
     bool isEC_lastLayer =
@@ -1343,10 +1322,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
     const float dBetaROut2 = dBetaROut * dBetaROut;
 
-    betaOutCut =
-        alpaka::math::asin(
-            acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax))  //FIXME: need faster version
-        + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2);
+    //FIXME: need faster version
+    betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) +
+                 (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2);
 
     //Cut #6: The real beta cut
     if (alpaka::math::abs(acc, betaOut) >= betaOutCut)
diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h
index 6437625761950..b839418cdd5c8 100644
--- a/RecoTracker/LSTCore/src/alpaka/Segment.h
+++ b/RecoTracker/LSTCore/src/alpaka/Segment.h
@@ -85,10 +85,46 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     return moduleSeparation;
   }
 
+  // Pre-loaded module data for segment creation, eliminating redundant SoA lookups
+  // in inner loops. Populated once per module (outer/middle loop level).
+  struct ModuleSegData {
+    short subdet;
+    short side;
+    short layer;
+    unsigned int iL;  // layer - 1
+    short moduleType;
+    float drdz;
+    float moduleGapSize;
+    bool isTilted;
+    float segMiniTilt2;  // 0.25 * kPixelPSZpitch^2 * drdz^2 / (1+drdz^2) / gap^2; 0 if not tilted
+    float sdMuls;        // kMiniMulsPtScale[iL] * 3 / ptCut
+  };
+
+  ALPAKA_FN_ACC ALPAKA_FN_INLINE ModuleSegData loadModuleSegData(ModulesConst modules,
+                                                                 uint16_t moduleIndex,
+                                                                 const float ptCut) {
+    ModuleSegData mod;
+    mod.subdet = modules.subdets()[moduleIndex];
+    mod.side = modules.sides()[moduleIndex];
+    mod.layer = modules.layers()[moduleIndex];
+    mod.iL = mod.layer - 1;
+    mod.moduleType = modules.moduleType()[moduleIndex];
+    mod.drdz = modules.drdzs()[moduleIndex];
+    mod.moduleGapSize = moduleGapSize_seg(modules, moduleIndex);
+    mod.isTilted = (mod.subdet == Barrel and mod.side != Center);
+    mod.segMiniTilt2 = mod.isTilted ? (0.25f * (kPixelPSZpitch * kPixelPSZpitch) * (mod.drdz * mod.drdz) /
+                                       (1.f + mod.drdz * mod.drdz) / (mod.moduleGapSize * mod.moduleGapSize))
+                                    : 0.f;
+    mod.sdMuls = (mod.subdet == Barrel) ? kMiniMulsPtScaleBarrel[mod.iL] * 3.f / ptCut
+                                        : kMiniMulsPtScaleEndcap[mod.iL] * 3.f / ptCut;
+    return mod;
+  }
+
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc,
                                                       float* dAlphaThresholdValues,
-                                                      ModulesConst modules,
+                                                      ModuleSegData const& innerMod,
+                                                      ModuleSegData const& outerMod,
                                                       MiniDoubletsConst mds,
                                                       float xIn,
                                                       float yIn,
@@ -98,14 +134,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                                       float yOut,
                                                       float zOut,
                                                       float rtOut,
-                                                      uint16_t innerLowerModuleIndex,
-                                                      uint16_t outerLowerModuleIndex,
                                                       unsigned int innerMDIndex,
                                                       unsigned int outerMDIndex,
                                                       const float ptCut) {
-    float sdMuls = (modules.subdets()[innerLowerModuleIndex] == Barrel)
-                       ? kMiniMulsPtScaleBarrel[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut
-                       : kMiniMulsPtScaleEndcap[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut;
+    const float sdMuls = innerMod.sdMuls;
 
     //more accurate then outer rt - inner rt
     float segmentDr = alpaka::math::sqrt(acc, (yOut - yIn) * (yOut - yIn) + (xOut - xIn) * (xOut - xIn));
@@ -113,62 +145,40 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     const float dAlpha_Bfield =
         alpaka::math::asin(acc, alpaka::math::min(acc, segmentDr * k2Rinv1GeVf / ptCut, kSinAlphaMax));
 
-    bool isInnerTilted =
-        modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] != Center;
-    bool isOuterTilted =
-        modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] != Center;
-
-    float drdzInner = modules.drdzs()[innerLowerModuleIndex];
-    float drdzOuter = modules.drdzs()[outerLowerModuleIndex];
-    float innerModuleGapSize = moduleGapSize_seg(modules, innerLowerModuleIndex);
-    float outerModuleGapSize = moduleGapSize_seg(modules, outerLowerModuleIndex);
-    const float innerminiTilt2 = isInnerTilted
-                                     ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzInner * drdzInner) /
-                                        (1.f + drdzInner * drdzInner) / (innerModuleGapSize * innerModuleGapSize))
-                                     : 0;
-
-    const float outerminiTilt2 = isOuterTilted
-                                     ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzOuter * drdzOuter) /
-                                        (1.f + drdzOuter * drdzOuter) / (outerModuleGapSize * outerModuleGapSize))
-                                     : 0;
-
-    float miniDelta = innerModuleGapSize;
-
     float sdLumForInnerMini2;
     float sdLumForOuterMini2;
 
-    if (modules.subdets()[innerLowerModuleIndex] == Barrel) {
-      sdLumForInnerMini2 = innerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield);
+    if (innerMod.subdet == Barrel) {
+      sdLumForInnerMini2 = innerMod.segMiniTilt2 * (dAlpha_Bfield * dAlpha_Bfield);
     } else {
       sdLumForInnerMini2 = (mds.dphis()[innerMDIndex] * mds.dphis()[innerMDIndex]) * (kDeltaZLum * kDeltaZLum) /
                            (mds.dzs()[innerMDIndex] * mds.dzs()[innerMDIndex]);
     }
 
-    if (modules.subdets()[outerLowerModuleIndex] == Barrel) {
-      sdLumForOuterMini2 = outerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield);
+    if (outerMod.subdet == Barrel) {
+      sdLumForOuterMini2 = outerMod.segMiniTilt2 * (dAlpha_Bfield * dAlpha_Bfield);
     } else {
       sdLumForOuterMini2 = (mds.dphis()[outerMDIndex] * mds.dphis()[outerMDIndex]) * (kDeltaZLum * kDeltaZLum) /
                            (mds.dzs()[outerMDIndex] * mds.dzs()[outerMDIndex]);
     }
 
     // Unique stuff for the segment dudes alone
+    const float miniDelta = innerMod.moduleGapSize;
     float dAlpha_res_inner =
-        0.02f / miniDelta *
-        (modules.subdets()[innerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn);
+        0.02f / miniDelta * (innerMod.subdet == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn);
     float dAlpha_res_outer =
-        0.02f / miniDelta *
-        (modules.subdets()[outerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut);
+        0.02f / miniDelta * (outerMod.subdet == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut);
 
     float dAlpha_res = dAlpha_res_inner + dAlpha_res_outer;
 
-    if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] == Center) {
+    if (innerMod.subdet == Barrel and innerMod.side == Center) {
       dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls);
     } else {
       dAlphaThresholdValues[0] =
           dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForInnerMini2);
     }
 
-    if (modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] == Center) {
+    if (outerMod.subdet == Barrel and outerMod.side == Center) {
       dAlphaThresholdValues[1] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls);
     } else {
       dAlphaThresholdValues[1] =
@@ -340,10 +350,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passDeltaPhiCutsEndcap(TAcc const& acc,
-                                                             ModulesConst modules,
                                                              MiniDoubletsConst mds,
-                                                             uint16_t innerLm,
-                                                             uint16_t outerLm,
                                                              unsigned int innerMD,
                                                              unsigned int outerMD,
                                                              const float dPhi,
@@ -365,10 +372,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc,
-                                                                  ModulesConst modules,
+                                                                  ModuleSegData const& innerMod,
+                                                                  ModuleSegData const& outerMod,
                                                                   MiniDoubletsConst mds,
-                                                                  uint16_t innerLowerModuleIndex,
-                                                                  uint16_t outerLowerModuleIndex,
                                                                   unsigned int innerMDIndex,
                                                                   unsigned int outerMDIndex,
                                                                   float& dPhi,
@@ -401,10 +407,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     zOut = mds.anchorZ()[outerMDIndex];
     rtOut = mds.anchorRt()[outerMDIndex];
 
-    float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax));
-    float dzDrtScale = alpaka::math::tan(acc, sdSlope) / sdSlope;  //FIXME: need appropriate value
+    const float sdSinBarrel = alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax);
+    float sdSlope = alpaka::math::asin(acc, sdSinBarrel);
+    // Exact: tan(asin(s))/asin(s) = s/(asin(s)*sqrt(1-s^2)), eliminates tan call
+    float dzDrtScale = sdSinBarrel / (sdSlope * alpaka::math::sqrt(acc, 1.f - sdSinBarrel * sdSinBarrel));
 
-    const float zGeom = modules.layers()[innerLowerModuleIndex] <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch;
+    const float zGeom = innerMod.layer <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch;
 
     //slope-correction only on outer end
     zLo = zIn + (zIn - kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - zGeom;
@@ -413,28 +421,44 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     if ((zOut < zLo) || (zOut > zHi))
       return false;
 
+    // Trig-free loose pre-check: sin(asin(s)+M) < s+M, so (s+M)^2 overestimates sin^2(sdCut).
+    // Rejects obvious failures before expensive anchorPhi reads + reducePhiRange.
+    {
+      const float cross = xIn * yOut - xOut * yIn;
+      const float dot = xIn * xOut + yIn * yOut;
+      if (dot <= 0.f)
+        return false;
+      const float sqrtTerm = alpaka::math::sqrt(acc, innerMod.sdMuls * innerMod.sdMuls + 0.01f / (rtOut * rtOut));
+      const float looseCut = sdSinBarrel + sqrtTerm;
+      const float looseCutSq = looseCut * looseCut;
+      const float crossSq = cross * cross;
+      if (crossSq >= looseCutSq * (crossSq + dot * dot))
+        return false;
+      const float dotChange = dot - (rtIn * rtIn);
+      if (dotChange <= 0.f || crossSq >= looseCutSq * (crossSq + dotChange * dotChange))
+        return false;
+    }
+
     dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]);
 
-    if (!passDeltaPhiCutsBarrel(acc,
-                                modules,
-                                mds,
-                                innerLowerModuleIndex,
-                                outerLowerModuleIndex,
-                                innerMDIndex,
-                                outerMDIndex,
-                                dPhi,
-                                rtOut,
-                                sdSlope,
-                                ptCut))
+    const float sdMuls = innerMod.sdMuls;
+    const float sdPVoff = 0.1f / rtOut;
+    const float sdCut = sdSlope + alpaka::math::sqrt(acc, sdMuls * sdMuls + sdPVoff * sdPVoff);
+
+    if (alpaka::math::abs(acc, dPhi) > sdCut)
       return false;
 
     dPhiChange = cms::alpakatools::reducePhiRange(
         acc, cms::alpakatools::phi(acc, xOut - xIn, yOut - yIn) - mds.anchorPhi()[innerMDIndex]);
 
+    if (alpaka::math::abs(acc, dPhiChange) >= sdCut)
+      return false;
+
     float dAlphaThresholdValues[3];
     dAlphaThreshold(acc,
                     dAlphaThresholdValues,
-                    modules,
+                    innerMod,
+                    outerMod,
                     mds,
                     xIn,
                     yIn,
@@ -444,8 +468,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                     yOut,
                     zOut,
                     rtOut,
-                    innerLowerModuleIndex,
-                    outerLowerModuleIndex,
                     innerMDIndex,
                     outerMDIndex,
                     ptCut);
@@ -464,15 +486,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
       return false;
     if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold)
       return false;
-    return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold;
+    if (alpaka::math::abs(acc, dAlphaInnerMDOuterMD) >= dAlphaInnerMDOuterMDThreshold)
+      return false;
+
+    return true;
   }
 
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc,
-                                                                  ModulesConst modules,
+                                                                  ModuleSegData const& innerMod,
+                                                                  ModuleSegData const& outerMod,
                                                                   MiniDoubletsConst mds,
-                                                                  uint16_t innerLowerModuleIndex,
-                                                                  uint16_t outerLowerModuleIndex,
                                                                   unsigned int innerMDIndex,
                                                                   unsigned int outerMDIndex,
                                                                   float& dPhi,
@@ -505,10 +529,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     zOut = mds.anchorZ()[outerMDIndex];
     rtOut = mds.anchorRt()[outerMDIndex];
 
-    bool outerLayerEndcapTwoS =
-        (modules.subdets()[outerLowerModuleIndex] == Endcap) && (modules.moduleType()[outerLowerModuleIndex] == TwoS);
+    bool outerLayerEndcapTwoS = (outerMod.subdet == Endcap) && (outerMod.moduleType == TwoS);
 
-    float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax));
+    const float sdSinEC = alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax);
     float disks2SMinRadius = 60.f;
 
     float rtGeom = ((rtIn < disks2SMinRadius && rtOut < disks2SMinRadius)
@@ -522,7 +545,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
     float dz = zOut - zIn;
     float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn);
-    float drtDzScale = sdSlope / alpaka::math::tan(acc, sdSlope);
+    float sdSlope = alpaka::math::asin(acc, sdSinEC);
+    // Exact: asin(s)/tan(asin(s)) = asin(s)*sqrt(1-s^2)/s, eliminates tan call
+    float drtDzScale = sdSlope * alpaka::math::sqrt(acc, 1.f - sdSinEC * sdSinEC) / sdSinEC;
 
     //rt should increase
     rtLo = alpaka::math::max(acc, rtIn * (1.f + dz / (zIn + dLum) * drtDzScale) - rtGeom, rtIn - 0.5f * rtGeom);
@@ -533,19 +558,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     if ((rtOut < rtLo) || (rtOut > rtHi))
       return false;
 
+    // Phi pre-check: |tan(dPhi)| > tan(sdSlope) is exact for |dPhi| > sdSlope.
+    // Since final cut uses sdCut = sdSlope + eps > sdSlope, this is looser.
+    const float crossPhi = xIn * yOut - xOut * yIn;
+    const float dotPhi = xIn * xOut + yIn * yOut;
+    const float tanSlopeSq = sdSinEC * sdSinEC / (1.f - sdSinEC * sdSinEC);
+    if (dotPhi <= 0.f || crossPhi * crossPhi > tanSlopeSq * dotPhi * dotPhi)
+      return false;
+
     dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]);
 
-    if (!passDeltaPhiCutsEndcap(acc,
-                                modules,
-                                mds,
-                                innerLowerModuleIndex,
-                                outerLowerModuleIndex,
-                                innerMDIndex,
-                                outerMDIndex,
-                                dPhi,
-                                rtOut,
-                                sdSlope,
-                                ptCut))
+    if (!passDeltaPhiCutsEndcap(acc, mds, innerMDIndex, outerMDIndex, dPhi, rtOut, sdSlope, ptCut))
       return false;
 
     if (outerLayerEndcapTwoS) {
@@ -569,7 +592,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     float dAlphaThresholdValues[3];
     dAlphaThreshold(acc,
                     dAlphaThresholdValues,
-                    modules,
+                    innerMod,
+                    outerMod,
                     mds,
                     xIn,
                     yIn,
@@ -579,8 +603,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                     yOut,
                     zOut,
                     rtOut,
-                    innerLowerModuleIndex,
-                    outerLowerModuleIndex,
                     innerMDIndex,
                     outerMDIndex,
                     ptCut);
@@ -599,15 +621,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
       return false;
     if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold)
       return false;
-    return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold;
+    if (alpaka::math::abs(acc, dAlphaInnerMDOuterMD) >= dAlphaInnerMDOuterMDThreshold)
+      return false;
+
+    return true;
   }
 
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc,
-                                                            ModulesConst modules,
+                                                            ModuleSegData const& innerMod,
+                                                            ModuleSegData const& outerMod,
                                                             MiniDoubletsConst mds,
-                                                            uint16_t innerLowerModuleIndex,
-                                                            uint16_t outerLowerModuleIndex,
                                                             unsigned int innerMDIndex,
                                                             unsigned int outerMDIndex,
                                                             float& dPhi,
@@ -626,16 +650,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                                                             float& rtHi,
 #endif
                                                             const float ptCut) {
-    if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.subdets()[outerLowerModuleIndex] == Barrel) {
+    if (innerMod.subdet == Barrel and outerMod.subdet == Barrel) {
 #ifdef CUT_VALUE_DEBUG
       rtLo = -999.f;
       rtHi = -999.f;
 #endif
       return runSegmentDefaultAlgoBarrel(acc,
-                                         modules,
+                                         innerMod,
+                                         outerMod,
                                          mds,
-                                         innerLowerModuleIndex,
-                                         outerLowerModuleIndex,
                                          innerMDIndex,
                                          outerMDIndex,
                                          dPhi,
@@ -658,10 +681,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
       zHi = -999.f;
 #endif
       return runSegmentDefaultAlgoEndcap(acc,
-                                         modules,
+                                         innerMod,
+                                         outerMod,
                                          mds,
-                                         innerLowerModuleIndex,
-                                         outerLowerModuleIndex,
                                          innerMDIndex,
                                          outerMDIndex,
                                          dPhi,
@@ -697,6 +719,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
         if (nInnerMDs == 0)
           continue;
 
+        ModuleSegData innerMod = loadModuleSegData(modules, innerLowerModuleIndex, ptCut);
+
         unsigned int nConnectedModules = modules.nConnectedModules()[innerLowerModuleIndex];
 
         for (uint16_t outerLowerModuleArrayIdx : cms::alpakatools::uniform_elements_y(acc, nConnectedModules)) {
@@ -708,6 +732,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
           if (limit == 0)
             continue;
+
+          ModuleSegData outerMod = loadModuleSegData(modules, outerLowerModuleIndex, ptCut);
+
           for (unsigned int hitIndex : cms::alpakatools::uniform_elements_x(acc, limit)) {
             unsigned int innerMDArrayIdx = hitIndex / nOuterMDs;
             unsigned int outerMDArrayIdx = hitIndex % nOuterMDs;
@@ -727,29 +754,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
             dPhiMax = 0;
             dPhiChangeMin = 0;
             dPhiChangeMax = 0;
-            if (runSegmentDefaultAlgo(acc,
-                                      modules,
-                                      mds,
-                                      innerLowerModuleIndex,
-                                      outerLowerModuleIndex,
-                                      innerMDIndex,
-                                      outerMDIndex,
-                                      dPhi,
-                                      dPhiMin,
-                                      dPhiMax,
-                                      dPhiChange,
-                                      dPhiChangeMin,
-                                      dPhiChangeMax,
+            bool pass = runSegmentDefaultAlgo(acc,
+                                              innerMod,
+                                              outerMod,
+                                              mds,
+                                              innerMDIndex,
+                                              outerMDIndex,
+                                              dPhi,
+                                              dPhiMin,
+                                              dPhiMax,
+                                              dPhiChange,
+                                              dPhiChangeMin,
+                                              dPhiChangeMax,
 #ifdef CUT_VALUE_DEBUG
-                                      dAlphaInnerMDSegment,
-                                      dAlphaOuterMDSegment,
-                                      dAlphaInnerMDOuterMD,
-                                      zLo,
-                                      zHi,
-                                      rtLo,
-                                      rtHi,
+                                              dAlphaInnerMDSegment,
+                                              dAlphaOuterMDSegment,
+                                              dAlphaInnerMDOuterMD,
+                                              zLo,
+                                              zHi,
+                                              rtLo,
+                                              rtHi,
 #endif
-                                      ptCut)) {
+                                              ptCut);
+
+            if (pass) {
               unsigned int totOccupancySegments =
                   alpaka::atomicAdd(acc,
                                     &segmentsOccupancy.totOccupancySegments()[innerLowerModuleIndex],
@@ -799,21 +827,96 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passDeltaPhiCutsSelector(TAcc const& acc,
-                                                               ModulesConst modules,
+                                                               ModuleSegData const& innerMod,
+                                                               ModuleSegData const& outerMod,
                                                                MiniDoubletsConst mds,
-                                                               uint16_t innerLm,
-                                                               uint16_t outerLm,
                                                                unsigned int innerMD,
                                                                unsigned int outerMD,
                                                                const float ptCut) {
-    const float dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMD] - mds.anchorPhi()[innerMD]);
     const float rtOut = mds.anchorRt()[outerMD];
-    const float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax));
-
-    if (modules.subdets()[innerLm] == Barrel && modules.subdets()[outerLm] == Barrel)
-      return passDeltaPhiCutsBarrel(acc, modules, mds, innerLm, outerLm, innerMD, outerMD, dPhi, rtOut, sdSlope, ptCut);
-    else
-      return passDeltaPhiCutsEndcap(acc, modules, mds, innerLm, outerLm, innerMD, outerMD, dPhi, rtOut, sdSlope, ptCut);
+    const float sdSin = alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax);
+
+    const float cosSlope = alpaka::math::sqrt(acc, 1.f - sdSin * sdSin);
+
+    if (innerMod.subdet == Barrel && outerMod.subdet == Barrel) {
+      // Barrel counting: trig-free loose checks only.
+      // All bounds are strictly looser than creation kernel cuts.
+      const float rtIn = mds.anchorRt()[innerMD];
+      const float zIn = mds.anchorZ()[innerMD];
+      const float zOut = mds.anchorZ()[outerMD];
+
+      // z-window: 1/cosSlope >= tan(sdSlope)/sdSlope, so looser than creation.
+      const float dzDrtScale = 1.f / cosSlope;
+      const float zGeom = innerMod.layer <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch;
+      const float zLo = zIn + (zIn - kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - zGeom;
+      const float zHi = zIn + (zIn + kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + zGeom;
+      if (zOut < zLo || zOut > zHi)
+        return false;
+
+      const float xIn = mds.anchorX()[innerMD];
+      const float yIn = mds.anchorY()[innerMD];
+      const float xOut = mds.anchorX()[outerMD];
+      const float yOut = mds.anchorY()[outerMD];
+
+      const float cross = xIn * yOut - xOut * yIn;
+      const float dot = xIn * xOut + yIn * yOut;
+      if (dot <= 0.f)
+        return false;
+
+      // sin(asin(s)+M) < s+M, so (s+M)^2 overestimates sin^2(sdCut), looser than creation.
+      const float sqrtTerm = alpaka::math::sqrt(acc, innerMod.sdMuls * innerMod.sdMuls + 0.01f / (rtOut * rtOut));
+      const float looseCut = sdSin + sqrtTerm;
+      const float looseCutSq = looseCut * looseCut;
+      const float crossSq = cross * cross;
+
+      // dPhi check
+      if (crossSq >= looseCutSq * (crossSq + dot * dot))
+        return false;
+
+      // dPhiChange check
+      const float dotChange = dot - (rtIn * rtIn);
+      if (dotChange <= 0.f || crossSq >= looseCutSq * (crossSq + dotChange * dotChange))
+        return false;
+
+      return true;
+    } else {
+      // Endcap counting: z-sign + rt-range + phi pre-checks, then exact cuts on survivors.
+      const float zIn = mds.anchorZ()[innerMD];
+      const float zOut = mds.anchorZ()[outerMD];
+      if (zIn * zOut < 0.f)
+        return false;
+
+      // rt-range pre-filter: cosSlope <= asin(s)/tan(asin(s)), so looser than creation.
+      const float rtIn = mds.anchorRt()[innerMD];
+      const float dz = zOut - zIn;
+      const float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn);
+      const float drtDzScale = cosSlope;
+      const float rtGeom =
+          ((rtIn < 60.f && rtOut < 60.f)
+               ? (2.f * kPixelPSZpitch)
+               : ((rtIn < 60.f || rtOut < 60.f) ? (kPixelPSZpitch + kStrip2SZpitch) : (2.f * kStrip2SZpitch)));
+      const float rtLo =
+          alpaka::math::max(acc, rtIn * (1.f + dz / (zIn + dLum) * drtDzScale) - rtGeom, rtIn - 0.5f * rtGeom);
+      const float rtHi = rtIn * (zOut - dLum) / (zIn - dLum) + rtGeom;
+      if (rtOut < rtLo || rtOut > rtHi)
+        return false;
+
+      // Phi pre-check: |tan(dPhi)| > tan(sdSlope) is exact for |dPhi| > sdSlope.
+      // Since the final dPhiChange cut implies |dPhi| < sdSlope, this is looser than creation.
+      const float xIn = mds.anchorX()[innerMD];
+      const float yIn = mds.anchorY()[innerMD];
+      const float xOut = mds.anchorX()[outerMD];
+      const float yOut = mds.anchorY()[outerMD];
+      const float crossPhi = xIn * yOut - xOut * yIn;
+      const float dotPhi = xIn * xOut + yIn * yOut;
+      const float tanSlopeSq = sdSin * sdSin / (1.f - sdSin * sdSin);
+      if (dotPhi <= 0.f || crossPhi * crossPhi > tanSlopeSq * dotPhi * dotPhi)
+        return false;
+
+      const float sdSlope = alpaka::math::asin(acc, sdSin);
+      const float dPhi = cms::alpakatools::reducePhiRange(acc, mds.anchorPhi()[outerMD] - mds.anchorPhi()[innerMD]);
+      return passDeltaPhiCutsEndcap(acc, mds, innerMD, outerMD, dPhi, rtOut, sdSlope, ptCut);
+    }
   }
 
   struct CountMiniDoubletConnections {
@@ -837,12 +940,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
         if (nConnectedModules == 0)
           continue;
 
+        ModuleSegData innerMod = loadModuleSegData(modules, innerLowerModuleIndex, ptCut);
+
         for (uint16_t outerLowerModuleArrayIdx : cms::alpakatools::uniform_elements_y(acc, nConnectedModules)) {
           const uint16_t outerLowerModuleIndex = modules.moduleMap()[innerLowerModuleIndex][outerLowerModuleArrayIdx];
           const unsigned int nOuterMDs = mdsOccupancy.nMDs()[outerLowerModuleIndex];
           if (nOuterMDs == 0)
             continue;
 
+          ModuleSegData outerMod = loadModuleSegData(modules, outerLowerModuleIndex, ptCut);
+
           const unsigned int limit = nInnerMDs * nOuterMDs;
 
           for (unsigned int hitIndex : cms::alpakatools::uniform_elements_x(acc, limit)) {
@@ -853,8 +960,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
             const unsigned int outerMDIndex = mdRanges[outerLowerModuleIndex][0] + outerMDArrayIdx;
 
             // Increment the connected max if the LS passes the delta phi cuts.
-            if (passDeltaPhiCutsSelector(
-                    acc, modules, mds, innerLowerModuleIndex, outerLowerModuleIndex, innerMDIndex, outerMDIndex, ptCut)) {
+            if (passDeltaPhiCutsSelector(acc, innerMod, outerMod, mds, innerMDIndex, outerMDIndex, ptCut)) {
               alpaka::atomicAdd(acc, &mds.connectedMax()[innerMDIndex], 1u, alpaka::hierarchy::Threads{});
             }
           }
diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h
index 3768f021a0cfb..8aafd359bffdc 100644
--- a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h
+++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h
@@ -329,9 +329,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
           LSTObjType type = candsBase.trackCandidateType()[trackCandidateIndex];
           unsigned int innerTrackletIdx = candsExtended.objectIndices()[trackCandidateIndex][0];
           if (type == LSTObjType::T5) {
-            unsigned int quintupletIndex = innerTrackletIdx;  // T5 index
-            float eta2 = __H2F(quintuplets.eta()[quintupletIndex]);
-            float phi2 = __H2F(quintuplets.phi()[quintupletIndex]);
+            float eta2 = __H2F(quintuplets.eta()[innerTrackletIdx]);
+            float phi2 = __H2F(quintuplets.phi()[innerTrackletIdx]);
             float dEta = alpaka::math::abs(acc, eta1 - eta2);
             float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2);
             float dR2 = dEta * dEta + dPhi * dPhi;
@@ -339,7 +338,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
             if (dR2 < 0.02f) {
               float d2 = 0.f;
               CMS_UNROLL_LOOP for (unsigned k = 0; k < Params_pLS::kEmbed; ++k) {
-                const float diff = plsEmbed[k] - quintuplets.t5Embed()[quintupletIndex][k];
+                const float diff = plsEmbed[k] - quintuplets.t5Embed()[innerTrackletIdx][k];
                 d2 += diff * diff;
               }
               // Compare squared embedding distance to the cut value for the eta bin.
@@ -347,24 +346,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                 pixelSegments.isDup()[pixelArrayIndex] = true;
               }
             }
-          }
-          if (type == LSTObjType::pT3) {
+          } else if (type == LSTObjType::pT3) {
             int pLSIndex = pixelTriplets.pixelSegmentIndices()[innerTrackletIdx];
             int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hitsBase);
             if (npMatched > 0)
               pixelSegments.isDup()[pixelArrayIndex] = true;
 
-            int pT3Index = innerTrackletIdx;
-            float eta2 = __H2F(pixelTriplets.eta_pix()[pT3Index]);
-            float phi2 = __H2F(pixelTriplets.phi_pix()[pT3Index]);
+            float eta2 = __H2F(pixelTriplets.eta_pix()[innerTrackletIdx]);
+            float phi2 = __H2F(pixelTriplets.phi_pix()[innerTrackletIdx]);
             float dEta = alpaka::math::abs(acc, eta1 - eta2);
             float dPhi = cms::alpakatools::deltaPhi(acc, phi1, phi2);
 
             float dR2 = dEta * dEta + dPhi * dPhi;
             if (dR2 < 0.000001f)
               pixelSegments.isDup()[pixelArrayIndex] = true;
-          }
-          if (type == LSTObjType::pT5) {
+          } else if (type == LSTObjType::pT5) {
             unsigned int pLSIndex = innerTrackletIdx;
             int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hitsBase);
             if (npMatched > 0) {
diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h
index 81c166cd1f4f5..393a0bf7d520f 100644
--- a/RecoTracker/LSTCore/src/alpaka/Triplet.h
+++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h
@@ -14,6 +14,68 @@
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
 
+  // Pre-loaded inner-segment-constant data for passPointingConstraint.
+  // Populated once per inner segment, reused across all outer segments in the inner loop.
+  struct T3InnerSegData {
+    float x1, y1;           // first MD position (cm)
+    float x2, y2;           // second MD position (shared MD, cm)
+    float rt1, rt2;         // anchorRt for first two MDs (cm)
+    float drt_InSeg;        // rt2 - rt1
+    float rt_InSeg;         // sqrt((x2-x1)^2 + (y2-y1)^2)
+    float sdIn_alpha;       // dPhiChange of inner segment
+    float sdIn_alphaRHmin;  // dPhiChangeMin (for endcap path)
+    float sdIn_alphaRHmax;  // dPhiChangeMax (for endcap path)
+    // Precomputed sin/cos for algebraic betaIn check (avoids per-candidate atan2).
+    float sin_alpha, cos_alpha;
+    float sin_alphaRHmin, cos_alphaRHmin;  // for endcap EEE path
+    float sin_alphaRHmax, cos_alphaRHmax;  // for endcap EEE path
+    short innerSubdet;                     // subdet of inner-inner module
+    short middleSubdet;                    // subdet of middle module
+  };
+
+  // Pre-loaded hit coordinates for passRZConstraint.
+  // All values in cm, passRZConstraint converts to meters internally.
+  struct T3HitCoords {
+    float x1, y1, z1, rt1;
+    float x2, y2, z2, rt2;
+    float x3, y3, z3, rt3;
+  };
+
+  template <alpaka::concepts::Acc TAcc>
+  ALPAKA_FN_ACC ALPAKA_FN_INLINE T3InnerSegData loadT3InnerSegData(TAcc const& acc,
+                                                                   MiniDoubletsConst mds,
+                                                                   SegmentsConst segments,
+                                                                   ModulesConst modules,
+                                                                   unsigned int innerSegmentIndex,
+                                                                   uint16_t innerInnerLowerModuleIndex,
+                                                                   uint16_t middleLowerModuleIndex) {
+    unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0];
+    unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1];
+    T3InnerSegData d;
+    d.x1 = mds.anchorX()[firstMDIndex];
+    d.y1 = mds.anchorY()[firstMDIndex];
+    d.x2 = mds.anchorX()[secondMDIndex];
+    d.y2 = mds.anchorY()[secondMDIndex];
+    d.rt1 = mds.anchorRt()[firstMDIndex];
+    d.rt2 = mds.anchorRt()[secondMDIndex];
+    d.drt_InSeg = d.rt2 - d.rt1;
+    d.rt_InSeg = alpaka::math::sqrt(acc, (d.x2 - d.x1) * (d.x2 - d.x1) + (d.y2 - d.y1) * (d.y2 - d.y1));
+    d.sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]);
+    d.sin_alpha = alpaka::math::sin(acc, d.sdIn_alpha);
+    d.cos_alpha = alpaka::math::cos(acc, d.sdIn_alpha);
+    d.innerSubdet = modules.subdets()[innerInnerLowerModuleIndex];
+    d.middleSubdet = modules.subdets()[middleLowerModuleIndex];
+    d.sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]);
+    d.sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]);
+    if (d.innerSubdet == Endcap and d.middleSubdet == Endcap) {
+      d.sin_alphaRHmin = alpaka::math::sin(acc, d.sdIn_alphaRHmin);
+      d.cos_alphaRHmin = alpaka::math::cos(acc, d.sdIn_alphaRHmin);
+      d.sin_alphaRHmax = alpaka::math::sin(acc, d.sdIn_alphaRHmax);
+      d.cos_alphaRHmax = alpaka::math::cos(acc, d.sdIn_alphaRHmax);
+    }
+    return d;
+  }
+
   ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(ModulesConst modules,
                                                          MiniDoubletsConst mds,
                                                          SegmentsConst segments,
@@ -72,13 +134,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
   template <alpaka::concepts::Acc TAcc>
   ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc,
                                                        ModulesConst modules,
-                                                       MiniDoubletsConst mds,
                                                        uint16_t innerInnerLowerModuleIndex,
                                                        uint16_t middleLowerModuleIndex,
                                                        uint16_t outerOuterLowerModuleIndex,
-                                                       unsigned int firstMDIndex,
-                                                       unsigned int secondMDIndex,
-                                                       unsigned int thirdMDIndex,
+                                                       T3HitCoords const& hitCoords,
                                                        float circleRadius,
                                                        float circleCenterX,
                                                        float circleCenterY,
@@ -89,26 +148,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     const short layer3 = modules.lstLayers()[outerOuterLowerModuleIndex];
 
     //all the values are stored in the unit of cm, in the calculation below we need to be cautious if we want to use the meter unit
-    //get r and z
-    const float r1 = mds.anchorRt()[firstMDIndex] / 100;
-    const float r2 = mds.anchorRt()[secondMDIndex] / 100;
-    const float r3 = mds.anchorRt()[thirdMDIndex] / 100;
+    //get r and z (convert from cm to m)
+    const float r1 = hitCoords.rt1 / 100;
+    const float r2 = hitCoords.rt2 / 100;
+    const float r3 = hitCoords.rt3 / 100;
 
-    const float z1 = mds.anchorZ()[firstMDIndex] / 100;
-    const float z2 = mds.anchorZ()[secondMDIndex] / 100;
-    const float z3 = mds.anchorZ()[thirdMDIndex] / 100;
+    const float z1 = hitCoords.z1 / 100;
+    const float z2 = hitCoords.z2 / 100;
+    const float z3 = hitCoords.z3 / 100;
 
     //use linear approximation for regions 9 and 20-24 because it works better (see https://github.com/SegmentLinking/cmssw/pull/92)
     float residual = alpaka::math::abs(acc, z2 - ((z3 - z1) / (r3 - r1) * (r2 - r1) + z1));
 
-    //get the x,y position of each MD
-    const float x1 = mds.anchorX()[firstMDIndex] / 100;
-    const float x2 = mds.anchorX()[secondMDIndex] / 100;
-    const float x3 = mds.anchorX()[thirdMDIndex] / 100;
+    //get the x,y position of each MD (convert from cm to m)
+    const float x1 = hitCoords.x1 / 100;
+    const float x2 = hitCoords.x2 / 100;
+    const float x3 = hitCoords.x3 / 100;
 
-    const float y1 = mds.anchorY()[firstMDIndex] / 100;
-    const float y2 = mds.anchorY()[secondMDIndex] / 100;
-    const float y3 = mds.anchorY()[thirdMDIndex] / 100;
+    const float y1 = hitCoords.y1 / 100;
+    const float y2 = hitCoords.y2 / 100;
+    const float y3 = hitCoords.y3 / 100;
 
     float cross = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1);
     charge = -1 * ((int)copysignf(1.0f, cross));
@@ -186,13 +245,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     else if (y3 > y2 && y2 > y1)
       py = alpaka::math::abs(acc, py);
 
-    float AO = alpaka::math::sqrt(
-        acc, (x_other - x_center) * (x_other - x_center) + (y_other - y_center) * (y_other - y_center));
-    float BO =
-        alpaka::math::sqrt(acc, (x_init - x_center) * (x_init - x_center) + (y_init - y_center) * (y_init - y_center));
+    // All 3 hits lie on the fitted circle, so AO = BO = R; eliminates 2 sqrt calls.
+    float R = circleRadius / 100;
     float AB2 = (x_other - x_init) * (x_other - x_init) + (y_other - y_init) * (y_other - y_init);
-    float dPhi = alpaka::math::acos(acc, (AO * AO + BO * BO - AB2) / (2 * AO * BO));  //Law of Cosines
-    float ds = circleRadius / 100 * dPhi;
+    float dPhi = alpaka::math::acos(acc, 1 - AB2 / (2 * R * R));
+    float ds = R * dPhi;
     float pz = dz / ds * pt;
 
     float p = alpaka::math::sqrt(acc, px * px + py * py + pz * pz);
@@ -216,8 +273,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
       float A = paraB * paraB + paraC * paraC;
       float B = 2 * paraA * paraB;
       float C = paraA * paraA - paraC * paraC;
-      float sol1 = (-B + alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A);
-      float sol2 = (-B - alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A);
+      // Shared discriminant: compute sqrt once instead of twice.
+      float disc = alpaka::math::sqrt(acc, B * B - 4 * A * C);
+      float sol1 = (-B + disc) / (2 * A);
+      float sol2 = (-B - disc) / (2 * A);
       float solz1 = alpaka::math::asin(acc, sol1) / rou * pz / p + z_init;
       float solz2 = alpaka::math::asin(acc, sol2) / rou * pz / p + z_init;
       float diffz1 = (solz1 - z_target) * 100;
@@ -228,8 +287,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
                      : ((alpaka::math::abs(acc, diffz1) < alpaka::math::abs(acc, diffz2)) ? diffz1 : diffz2);
     } else {  // for endcap
       float s = (z_target - z_init) * p / pz;
-      float x = x_init + px / a * alpaka::math::sin(acc, rou * s) - py / a * (1 - alpaka::math::cos(acc, rou * s));
-      float y = y_init + py / a * alpaka::math::sin(acc, rou * s) + px / a * (1 - alpaka::math::cos(acc, rou * s));
+      // Shared sin/cos: compute once instead of twice each.
+      float sinRS = alpaka::math::sin(acc, rou * s);
+      float cosRS = alpaka::math::cos(acc, rou * s);
+      float x = x_init + px / a * sinRS - py / a * (1 - cosRS);
+      float y = y_init + py / a * sinRS + px / a * (1 - cosRS);
       residual = (r_target - alpaka::math::sqrt(acc, x * x + y * y)) * 100;
     }
 
@@ -326,66 +388,45 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
   }
 
   template <alpaka::concepts::Acc TAcc>
-  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc,
-                                                             ModulesConst modules,
-                                                             MiniDoubletsConst mds,
-                                                             SegmentsConst segments,
-                                                             unsigned int firstMDIndex,
-                                                             unsigned int secondMDIndex,
-                                                             unsigned int thirdMDIndex,
-                                                             uint16_t innerInnerLowerModuleIndex,
-                                                             uint16_t middleLowerModuleIndex,
-                                                             uint16_t outerOuterLowerModuleIndex,
-                                                             unsigned int innerSegmentIndex,
-                                                             const float ptCut) {
-    const float x1 = mds.anchorX()[firstMDIndex];
-    const float x2 = mds.anchorX()[secondMDIndex];
-    const float x3 = mds.anchorX()[thirdMDIndex];
-    const float y1 = mds.anchorY()[firstMDIndex];
-    const float y2 = mds.anchorY()[secondMDIndex];
-    const float y3 = mds.anchorY()[thirdMDIndex];
-
-    const short innerInnerLowerModuleSubdet = modules.subdets()[innerInnerLowerModuleIndex];
-    const short middleLowerModuleSubdet = modules.subdets()[middleLowerModuleIndex];
-    const short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex];
-
-    const float rt_InLo = mds.anchorRt()[firstMDIndex];
-    const float rt_InOut = mds.anchorRt()[secondMDIndex];
-    const float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]);
+  ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(
+      TAcc const& acc, T3InnerSegData const& innerSegData, float x3, float y3, short outerSubdet, const float ptCut) {
+    const float dx = x3 - innerSegData.x1;
+    const float dy = y3 - innerSegData.y1;
+    const float drt_tl_axis = alpaka::math::sqrt(acc, dx * dx + dy * dy);
 
-    const float drt_InSeg = rt_InOut - rt_InLo;
-    const float drt_tl_axis = alpaka::math::sqrt(acc, (x3 - x1) * (x3 - x1) + (y3 - y1) * (y3 - y1));
-
-    //innerOuterAnchor - innerInnerAnchor
-    const float rt_InSeg = alpaka::math::sqrt(acc, (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1));
-
-    const float betaIn =
-        sdIn_alpha - cms::alpakatools::reducePhiRange(
-                         acc, cms::alpakatools::phi(acc, x3 - x1, y3 - y1) - mds.anchorPhi()[firstMDIndex]);
     const float betaInCut =
-        alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) +
-        (0.02f / drt_InSeg);
-
-    float betaInRHmin = betaIn;
-
-    if (innerInnerLowerModuleSubdet == Endcap and middleLowerModuleSubdet == Endcap and
-        outerOuterLowerModuleSubdet == Endcap) {
-      float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]);
-      float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]);
-
-      betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha;
-      float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha;
-      float swapTemp;
-
-      if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) {
-        swapTemp = betaInRHmin;
-        betaInRHmin = betaInRHmax;
-        betaInRHmax = swapTemp;
+        alpaka::math::asin(
+            acc, alpaka::math::min(acc, (-innerSegData.rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) +
+        (0.02f / innerSegData.drt_InSeg);
+
+    // Algebraic betaIn check: |sin(betaIn)| < sin(betaInCut) is exact equivalent of
+    // |betaIn| < betaInCut (for |betaIn| < pi/2, betaInCut < pi/2). Avoids per-candidate atan2.
+    // cross = x1*y3 - y1*x3, dot = x3*x1 + y3*y1 - rt1^2 (angle from hit1 to dir_13)
+    const float crossPC = innerSegData.x1 * y3 - innerSegData.y1 * x3;
+    const float dotPC = x3 * innerSegData.x1 + y3 * innerSegData.y1 - innerSegData.rt1 * innerSegData.rt1;
+    const float r2 = crossPC * crossPC + dotPC * dotPC;
+    const float sinBetaInCut = alpaka::math::sin(acc, betaInCut);
+    const float sinBetaInCutSq = sinBetaInCut * sinBetaInCut;
+
+    if (innerSegData.innerSubdet == Endcap and innerSegData.middleSubdet == Endcap and outerSubdet == Endcap) {
+      // EEE: check both alpha variants, pass if the one with smaller |betaIn| is within cut
+      const float sinValMin = innerSegData.sin_alphaRHmin * dotPC - innerSegData.cos_alphaRHmin * crossPC;
+      const float sinValMax = innerSegData.sin_alphaRHmax * dotPC - innerSegData.cos_alphaRHmax * crossPC;
+      const float sqMin = sinValMin * sinValMin;
+      const float sqMax = sinValMax * sinValMax;
+
+      if (sqMin <= sqMax) {
+        return sqMin < sinBetaInCutSq * r2 and
+               (innerSegData.cos_alphaRHmin * dotPC + innerSegData.sin_alphaRHmin * crossPC > 0.f);
+      } else {
+        return sqMax < sinBetaInCutSq * r2 and
+               (innerSegData.cos_alphaRHmax * dotPC + innerSegData.sin_alphaRHmax * crossPC > 0.f);
       }
     }
 
-    //Beta cut
-    return alpaka::math::abs(acc, betaInRHmin) < betaInCut;
+    const float sinVal = innerSegData.sin_alpha * dotPC - innerSegData.cos_alpha * crossPC;
+    return sinVal * sinVal < sinBetaInCutSq * r2 and
+           (innerSegData.cos_alpha * dotPC + innerSegData.sin_alpha * crossPC > 0.f);
   }
 
   template <alpaka::concepts::Acc TAcc>
@@ -410,43 +451,51 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
     const unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0];
     const unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1];
 
-    const float x1 = mds.anchorX()[firstMDIndex];
-    const float x2 = mds.anchorX()[secondMDIndex];
-    const float x3 = mds.anchorX()[thirdMDIndex];
-    const float y1 = mds.anchorY()[firstMDIndex];
-    const float y2 = mds.anchorY()[secondMDIndex];
-    const float y3 = mds.anchorY()[thirdMDIndex];
-
-    std::tie(circleRadius, circleCenterX, circleCenterY) =
-        computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3);
+    T3HitCoords hitCoords;
+    hitCoords.x1 = mds.anchorX()[firstMDIndex];
+    hitCoords.y1 = mds.anchorY()[firstMDIndex];
+    hitCoords.z1 = mds.anchorZ()[firstMDIndex];
+    hitCoords.rt1 = mds.anchorRt()[firstMDIndex];
+    hitCoords.x2 = mds.anchorX()[secondMDIndex];
+    hitCoords.y2 = mds.anchorY()[secondMDIndex];
+    hitCoords.z2 = mds.anchorZ()[secondMDIndex];
+    hitCoords.rt2 = mds.anchorRt()[secondMDIndex];
+    hitCoords.x3 = mds.anchorX()[thirdMDIndex];
+    hitCoords.y3 = mds.anchorY()[thirdMDIndex];
+    hitCoords.z3 = mds.anchorZ()[thirdMDIndex];
+    hitCoords.rt3 = mds.anchorRt()[thirdMDIndex];
+
+    std::tie(circleRadius, circleCenterX, circleCenterY) = computeRadiusFromThreeAnchorHits(
+        acc, hitCoords.x1, hitCoords.y1, hitCoords.x2, hitCoords.y2, hitCoords.x3, hitCoords.y3);
 
     if (not passRZConstraint(acc,
                              modules,
-                             mds,
                              innerInnerLowerModuleIndex,
                              middleLowerModuleIndex,
                              outerOuterLowerModuleIndex,
-                             firstMDIndex,
-                             secondMDIndex,
-                             thirdMDIndex,
+                             hitCoords,
                              circleRadius,
                              circleCenterX,
                              circleCenterY,
                              charge))
       return false;
 
-    const float rt_InLo = mds.anchorRt()[firstMDIndex];
-    const float rt_InOut = mds.anchorRt()[secondMDIndex];
     const float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]);
 
-    const float drt_InSeg = rt_InOut - rt_InLo;
-    const float drt_tl_axis = alpaka::math::sqrt(acc, (x3 - x1) * (x3 - x1) + (y3 - y1) * (y3 - y1));
+    const float drt_InSeg = hitCoords.rt2 - hitCoords.rt1;
+    const float drt_tl_axis = alpaka::math::sqrt(acc,
+                                                 (hitCoords.x3 - hitCoords.x1) * (hitCoords.x3 - hitCoords.x1) +
+                                                     (hitCoords.y3 - hitCoords.y1) * (hitCoords.y3 - hitCoords.y1));
 
     //innerOuterAnchor - innerInnerAnchor
-    const float rt_InSeg = alpaka::math::sqrt(acc, (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1));
+    const float rt_InSeg = alpaka::math::sqrt(acc,
+                                              (hitCoords.x2 - hitCoords.x1) * (hitCoords.x2 - hitCoords.x1) +
+                                                  (hitCoords.y2 - hitCoords.y1) * (hitCoords.y2 - hitCoords.y1));
 
     betaIn = sdIn_alpha - cms::alpakatools::reducePhiRange(
-                              acc, cms::alpakatools::phi(acc, x3 - x1, y3 - y1) - mds.anchorPhi()[firstMDIndex]);
+                              acc,
+                              cms::alpakatools::phi(acc, hitCoords.x3 - hitCoords.x1, hitCoords.y3 - hitCoords.y1) -
+                                  mds.anchorPhi()[firstMDIndex]);
     betaInCut =
         alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) +
         (0.02f / drt_InSeg);
@@ -515,6 +564,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
           uint16_t middleLowerModuleIndex = segments.outerLowerModuleIndices()[innerSegmentIndex];
           int middleMDIndiceInner = segments.mdIndices()[innerSegmentIndex][1];
 
+          T3InnerSegData innerSegData = loadT3InnerSegData(
+              acc, mds, segments, modules, innerSegmentIndex, innerInnerLowerModuleIndex, middleLowerModuleIndex);
+
           unsigned int nOuterSegments = segmentsOccupancy.nSegments()[middleLowerModuleIndex];
           for (unsigned int outerSegmentArrayIndex : cms::alpakatools::uniform_elements_x(acc, nOuterSegments)) {
             unsigned int outerSegmentIndex = ranges.segmentRanges()[middleLowerModuleIndex][0] + outerSegmentArrayIndex;
@@ -524,22 +576,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
               continue;
 
             uint16_t outerOuterLowerModuleIndex = segments.outerLowerModuleIndices()[outerSegmentIndex];
-            unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0];
-            unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0];
             unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1];
+            float x3 = mds.anchorX()[thirdMDIndex];
+            float y3 = mds.anchorY()[thirdMDIndex];
+            short outerSubdet = modules.subdets()[outerOuterLowerModuleIndex];
 
-            if (not passPointingConstraint(acc,
-                                           modules,
-                                           mds,
-                                           segments,
-                                           firstMDIndex,
-                                           secondMDIndex,
-                                           thirdMDIndex,
-                                           innerInnerLowerModuleIndex,
-                                           middleLowerModuleIndex,
-                                           outerOuterLowerModuleIndex,
-                                           innerSegmentIndex,
-                                           ptCut))
+            if (not passPointingConstraint(acc, innerSegData, x3, y3, outerSubdet, ptCut))
               continue;
 
             // Match inner Sg and Outer Sg
@@ -672,29 +714,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
           if (nOuterSegments == 0)
             continue;
 
+          T3InnerSegData innerSegData = loadT3InnerSegData(
+              acc, mds, segments, modules, innerSegmentIndex, innerLowerModuleArrayIdx, middleLowerModuleIndex);
+
           for (unsigned int outerSegmentArrayIndex : cms::alpakatools::uniform_elements_x(acc, nOuterSegments)) {
             const unsigned int outerSegmentIndex = segmentRanges[middleLowerModuleIndex][0] + outerSegmentArrayIndex;
 
             if (mdIndices[outerSegmentIndex][0] != mdShared)
               continue;
 
-            unsigned int firstMDIndex = mdIndices[innerSegmentIndex][0];
-            unsigned int secondMDIndex = mdShared;
             unsigned int thirdMDIndex = mdIndices[outerSegmentIndex][1];
             uint16_t outerOuterLowerModuleIndex = outerLowerModuleIndices[outerSegmentIndex];
+            float x3 = mds.anchorX()[thirdMDIndex];
+            float y3 = mds.anchorY()[thirdMDIndex];
+            short outerSubdet = modules.subdets()[outerOuterLowerModuleIndex];
 
-            if (not passPointingConstraint(acc,
-                                           modules,
-                                           mds,
-                                           segments,
-                                           firstMDIndex,
-                                           secondMDIndex,
-                                           thirdMDIndex,
-                                           innerLowerModuleArrayIdx,
-                                           middleLowerModuleIndex,
-                                           outerOuterLowerModuleIndex,
-                                           innerSegmentIndex,
-                                           ptCut))
+            if (not passPointingConstraint(acc, innerSegData, x3, y3, outerSubdet, ptCut))
               continue;
 
             alpaka::atomicAdd(acc, &segments.connectedMax()[innerSegmentIndex], 1u, alpaka::hierarchy::Threads{});