Skip to content

Conversation

@jinhuang1102
Copy link
Contributor

This patch fixes a profile metadata missing in the ExpandMemCmp pass when it expanding memcmp calls. This would cause branches between different blocks to lose their profile data, potentially leading to suboptimal code generation.

The patch updates the ExpandMemCmp pass to set branch weights to a default unknown(50/50 weights) value when a profile is available. This prevents the expansion from making a previously profiled branch unprofiled.

The patch also includes updates to the tests to reflect the new branch weights.

@llvmbot
Copy link
Member

llvmbot commented Nov 29, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Jin Huang (jinhuang1102)

Changes

This patch fixes a profile metadata missing in the ExpandMemCmp pass when it expanding memcmp calls. This would cause branches between different blocks to lose their profile data, potentially leading to suboptimal code generation.

The patch updates the ExpandMemCmp pass to set branch weights to a default unknown(50/50 weights) value when a profile is available. This prevents the expansion from making a previously profiled branch unprofiled.

The patch also includes updates to the tests to reflect the new branch weights.


Full diff: https://github.com/llvm/llvm-project/pull/169979.diff

4 Files Affected:

  • (modified) llvm/lib/CodeGen/ExpandMemCmp.cpp (+21-15)
  • (modified) llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll (+14-6)
  • (modified) llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll (+11-4)
  • (modified) llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll (+19-7)
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 74f93e1979532..61ab0cb869904 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -62,7 +63,6 @@ static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize(
 
 namespace {
 
-
 // This class provides helper functions to expand a memcmp library call into an
 // inline expansion.
 class MemCmpExpansion {
@@ -92,8 +92,7 @@ class MemCmpExpansion {
   // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
   struct LoadEntry {
     LoadEntry(unsigned LoadSize, uint64_t Offset)
-        : LoadSize(LoadSize), Offset(Offset) {
-    }
+        : LoadSize(LoadSize), Offset(Offset) {}
 
     // The size of the load for this block, in bytes.
     unsigned LoadSize;
@@ -488,6 +487,8 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
   // continue to next LoadCmpBlock or EndBlock.
   BasicBlock *BB = Builder.GetInsertBlock();
   BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+  setExplicitlyUnknownBranchWeightsIfProfiled(*CmpBr, DEBUG_TYPE,
+                                              CI->getFunction());
   Builder.Insert(CmpBr);
   if (DTU)
     DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
@@ -552,6 +553,8 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
   // to next LoadCmpBlock or EndBlock.
   BasicBlock *BB = Builder.GetInsertBlock();
   BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
+  setExplicitlyUnknownBranchWeightsIfProfiled(*CmpBr, DEBUG_TYPE,
+                                              CI->getFunction());
   Builder.Insert(CmpBr);
   if (DTU)
     DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
@@ -592,6 +595,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
   Value *Res =
       Builder.CreateSelect(Cmp, Constant::getAllOnesValue(Builder.getInt32Ty()),
                            ConstantInt::get(Builder.getInt32Ty(), 1));
+  setExplicitlyUnknownBranchWeightsIfProfiled(*cast<Instruction>(Res),
+                                              DEBUG_TYPE, CI->getFunction());
 
   PhiRes->addIncoming(Res, ResBlock.BB);
   BranchInst *NewBr = BranchInst::Create(EndBlock);
@@ -724,7 +729,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
     // calculate which source was larger. The calculation requires the
     // two loaded source values of each load compare block.
     // These will be saved in the phi nodes created by setupResultBlockPHINodes.
-    if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
+    if (!IsUsedForZeroCmp)
+      setupResultBlockPHINodes();
 
     // Create the number of required load compare basic blocks.
     createLoadCmpBlocks();
@@ -853,15 +859,14 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
   const bool IsUsedForZeroCmp =
       IsBCmp || isOnlyUsedInZeroEqualityComparison(CI);
   bool OptForSize = llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
-  auto Options = TTI->enableMemCmpExpansion(OptForSize,
-                                            IsUsedForZeroCmp);
-  if (!Options) return false;
+  auto Options = TTI->enableMemCmpExpansion(OptForSize, IsUsedForZeroCmp);
+  if (!Options)
+    return false;
 
   if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
     Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
 
-  if (OptForSize &&
-      MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+  if (OptForSize && MaxLoadsPerMemcmpOptSize.getNumOccurrences())
     Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
 
   if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
@@ -907,13 +912,14 @@ class ExpandMemCmpLegacyPass : public FunctionPass {
   }
 
   bool runOnFunction(Function &F) override {
-    if (skipFunction(F)) return false;
+    if (skipFunction(F))
+      return false;
 
     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
     if (!TPC) {
       return false;
     }
-    const TargetLowering* TL =
+    const TargetLowering *TL =
         TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
 
     const TargetLibraryInfo *TLI =
@@ -921,9 +927,9 @@ class ExpandMemCmpLegacyPass : public FunctionPass {
     const TargetTransformInfo *TTI =
         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
     auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
-    auto *BFI = (PSI && PSI->hasProfileSummary()) ?
-           &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
-           nullptr;
+    auto *BFI = (PSI && PSI->hasProfileSummary())
+                    ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
+                    : nullptr;
     DominatorTree *DT = nullptr;
     if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
       DT = &DTWP->getDomTree();
@@ -969,7 +975,7 @@ PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
   if (DT)
     DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
 
-  const DataLayout& DL = F.getDataLayout();
+  const DataLayout &DL = F.getDataLayout();
   bool MadeChanges = false;
   for (auto BBIt = F.begin(); BBIt != F.end();) {
     if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll
index 7df48d878bd0b..0ed3af535ac9c 100644
--- a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3
 ; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
 ; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
 
@@ -98,15 +98,15 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
   ret i32 %call
 }
 
-define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
+define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
 ; CHECK-LABEL: define i32 @cmp7(
-; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) {
+; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) !prof [[PROF0:![0-9]+]] {
 ; CHECK-NEXT:    br label [[LOADBB:%.*]]
 ; CHECK:       res_block:
 ; CHECK-NEXT:    [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
 ; CHECK-NEXT:    [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1, !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    br label [[ENDBLOCK:%.*]]
 ; CHECK:       loadbb:
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[X]], align 1
@@ -114,7 +114,7 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
 ; CHECK-NEXT:    [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
 ; CHECK-NEXT:    [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
-; CHECK-NEXT:    br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]], !prof [[PROF1]]
 ; CHECK:       loadbb1:
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
@@ -123,7 +123,7 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
 ; CHECK-NEXT:    [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
 ; CHECK-NEXT:    [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]], !prof [[PROF1]]
 ; CHECK:       endblock:
 ; CHECK-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[PHI_RES]]
@@ -860,3 +860,11 @@ define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
+;.
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
index 0507ec9de542e..30da8b391f55d 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -expand-memcmp -mtriple=i686-unknown-unknown   -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32
 ; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown   -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32
 
@@ -34,12 +34,12 @@ define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonl
   ret i32 %call
 }
 
-define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
+define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
 ; X32-LABEL: @cmp3(
 ; X32-NEXT:    br label [[LOADBB:%.*]]
 ; X32:       res_block:
 ; X32-NEXT:    [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
-; X32-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X32-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1, !prof [[PROF1:![0-9]+]]
 ; X32-NEXT:    br label [[ENDBLOCK:%.*]]
 ; X32:       loadbb:
 ; X32-NEXT:    [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
@@ -47,7 +47,7 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
 ; X32-NEXT:    [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
 ; X32-NEXT:    [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
 ; X32-NEXT:    [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
-; X32-NEXT:    br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32-NEXT:    br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]], !prof [[PROF1]]
 ; X32:       loadbb1:
 ; X32-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2
 ; X32-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2
@@ -564,3 +564,10 @@ define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
   ret i32 %conv
 }
 
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; X32: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; X32: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; X32: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
+;.
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
index 86dc3e5245f24..b5075a2fcff0b 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128         < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD
 ; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128         < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD
 ; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128         < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD
@@ -36,12 +36,12 @@ define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonl
   ret i32 %call
 }
 
-define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
+define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
 ; X64-LABEL: @cmp3(
 ; X64-NEXT:    br label [[LOADBB:%.*]]
 ; X64:       res_block:
 ; X64-NEXT:    [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
-; X64-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1, !prof [[PROF1:![0-9]+]]
 ; X64-NEXT:    br label [[ENDBLOCK:%.*]]
 ; X64:       loadbb:
 ; X64-NEXT:    [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
@@ -49,7 +49,7 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
 ; X64-NEXT:    [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
 ; X64-NEXT:    [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
 ; X64-NEXT:    [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
-; X64-NEXT:    br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-NEXT:    br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]], !prof [[PROF1]]
 ; X64:       loadbb1:
 ; X64-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2
 ; X64-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2
@@ -474,7 +474,7 @@ define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
   ret i32 %conv
 }
 
-define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
+define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
 ; X64_1LD-LABEL: @cmp_eq3(
 ; X64_1LD-NEXT:    br label [[LOADBB:%.*]]
 ; X64_1LD:       res_block:
@@ -483,14 +483,14 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
 ; X64_1LD-NEXT:    [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
 ; X64_1LD-NEXT:    [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]], !prof [[PROF1]]
 ; X64_1LD:       loadbb1:
 ; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
 ; X64_1LD-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; X64_1LD-NEXT:    br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT:    br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]], !prof [[PROF1]]
 ; X64_1LD:       endblock:
 ; X64_1LD-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
 ; X64_1LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -1076,3 +1076,15 @@ define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
   ret i32 %conv
 }
 
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; X64_1LD: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; X64_2LD: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; X64_1LD: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; X64_1LD: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
+;.
+; X64_2LD: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; X64_2LD: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
+;.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants