diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
index 03dcfa5f81109..e6f44dd51b459 100644
--- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
@@ -9,6 +9,7 @@
 #include "IdentifierNamingCheck.h"
 
 #include "../GlobList.h"
+#include "../utils/ASTUtils.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
@@ -286,7 +287,9 @@ IdentifierNamingCheck::FileStyle IdentifierNamingCheck::getFileStyleFromOptions(
                         HPTOpt.value_or(IdentifierNamingCheck::HPT_Off));
   }
   bool IgnoreMainLike = Options.get("IgnoreMainLikeFunctions", false);
-  return {std::move(Styles), std::move(HNOption), IgnoreMainLike};
+  bool CheckAnonFieldInParent = Options.get("CheckAnonFieldInParent", false);
+  return {std::move(Styles), std::move(HNOption), IgnoreMainLike,
+          CheckAnonFieldInParent};
 }
 
 std::string IdentifierNamingCheck::HungarianNotation::getDeclTypeName(
@@ -859,6 +862,8 @@ void IdentifierNamingCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "IgnoreFailedSplit", IgnoreFailedSplit);
   Options.store(Opts, "IgnoreMainLikeFunctions",
                 MainFileStyle->isIgnoringMainLikeFunction());
+  Options.store(Opts, "CheckAnonFieldInParent",
+                MainFileStyle->isCheckingAnonFieldInParentScope());
 }
 
 bool IdentifierNamingCheck::matchesStyle(
@@ -1111,7 +1116,7 @@ std::string IdentifierNamingCheck::fixupWithStyle(
 StyleKind IdentifierNamingCheck::findStyleKind(
     const NamedDecl *D,
     ArrayRef<std::optional<IdentifierNamingCheck::NamingStyle>> NamingStyles,
-    bool IgnoreMainLikeFunctions) const {
+    bool IgnoreMainLikeFunctions, bool CheckAnonFieldInParentScope) const {
   assert(D && D->getIdentifier() && !D->getName().empty() && !D->isImplicit() &&
          "Decl must be an explicit identifier with a name.");
 
@@ -1185,29 +1190,14 @@ StyleKind IdentifierNamingCheck::findStyleKind(
   }
 
   if (const auto *Decl = dyn_cast<FieldDecl>(D)) {
-    QualType Type = Decl->getType();
-
-    if (!Type.isNull() && Type.isConstQualified()) {
-      if (NamingStyles[SK_ConstantMember])
-        return SK_ConstantMember;
-
-      if (NamingStyles[SK_Constant])
-        return SK_Constant;
+    if (CheckAnonFieldInParentScope) {
+      const RecordDecl *Record = Decl->getParent();
+      if (Record->isAnonymousStructOrUnion()) {
+        return findStyleKindForAnonField(Decl, NamingStyles);
+      }
     }
 
-    if (Decl->getAccess() == AS_private && NamingStyles[SK_PrivateMember])
-      return SK_PrivateMember;
-
-    if (Decl->getAccess() == AS_protected && NamingStyles[SK_ProtectedMember])
-      return SK_ProtectedMember;
-
-    if (Decl->getAccess() == AS_public && NamingStyles[SK_PublicMember])
-      return SK_PublicMember;
-
-    if (NamingStyles[SK_Member])
-      return SK_Member;
-
-    return SK_Invalid;
+    return findStyleKindForField(Decl, Decl->getType(), NamingStyles);
   }
 
   if (const auto *Decl = dyn_cast<ParmVarDecl>(D)) {
@@ -1244,66 +1234,7 @@ StyleKind IdentifierNamingCheck::findStyleKind(
   }
 
   if (const auto *Decl = dyn_cast<VarDecl>(D)) {
-    QualType Type = Decl->getType();
-
-    if (Decl->isConstexpr() && NamingStyles[SK_ConstexprVariable])
-      return SK_ConstexprVariable;
-
-    if (!Type.isNull() && Type.isConstQualified()) {
-      if (Decl->isStaticDataMember() && NamingStyles[SK_ClassConstant])
-        return SK_ClassConstant;
-
-      if (Decl->isFileVarDecl() && Type.getTypePtr()->isAnyPointerType() &&
-          NamingStyles[SK_GlobalConstantPointer])
-        return SK_GlobalConstantPointer;
-
-      if (Decl->isFileVarDecl() && NamingStyles[SK_GlobalConstant])
-        return SK_GlobalConstant;
-
-      if (Decl->isStaticLocal() && NamingStyles[SK_StaticConstant])
-        return SK_StaticConstant;
-
-      if (Decl->isLocalVarDecl() && Type.getTypePtr()->isAnyPointerType() &&
-          NamingStyles[SK_LocalConstantPointer])
-        return SK_LocalConstantPointer;
-
-      if (Decl->isLocalVarDecl() && NamingStyles[SK_LocalConstant])
-        return SK_LocalConstant;
-
-      if (Decl->isFunctionOrMethodVarDecl() && NamingStyles[SK_LocalConstant])
-        return SK_LocalConstant;
-
-      if (NamingStyles[SK_Constant])
-        return SK_Constant;
-    }
-
-    if (Decl->isStaticDataMember() && NamingStyles[SK_ClassMember])
-      return SK_ClassMember;
-
-    if (Decl->isFileVarDecl() && Type.getTypePtr()->isAnyPointerType() &&
-        NamingStyles[SK_GlobalPointer])
-      return SK_GlobalPointer;
-
-    if (Decl->isFileVarDecl() && NamingStyles[SK_GlobalVariable])
-      return SK_GlobalVariable;
-
-    if (Decl->isStaticLocal() && NamingStyles[SK_StaticVariable])
-      return SK_StaticVariable;
-
-    if (Decl->isLocalVarDecl() && Type.getTypePtr()->isAnyPointerType() &&
-        NamingStyles[SK_LocalPointer])
-      return SK_LocalPointer;
-
-    if (Decl->isLocalVarDecl() && NamingStyles[SK_LocalVariable])
-      return SK_LocalVariable;
-
-    if (Decl->isFunctionOrMethodVarDecl() && NamingStyles[SK_LocalVariable])
-      return SK_LocalVariable;
-
-    if (NamingStyles[SK_Variable])
-      return SK_Variable;
-
-    return SK_Invalid;
+    return findStyleKindForVar(Decl, Decl->getType(), NamingStyles);
   }
 
   if (const auto *Decl = dyn_cast<CXXMethodDecl>(D)) {
@@ -1442,12 +1373,13 @@ IdentifierNamingCheck::getDeclFailureInfo(const NamedDecl *Decl,
   if (!FileStyle.isActive())
     return std::nullopt;
 
-  return getFailureInfo(HungarianNotation.getDeclTypeName(Decl),
-                        Decl->getName(), Decl, Loc, FileStyle.getStyles(),
-                        FileStyle.getHNOption(),
-                        findStyleKind(Decl, FileStyle.getStyles(),
-                                      FileStyle.isIgnoringMainLikeFunction()),
-                        SM, IgnoreFailedSplit);
+  return getFailureInfo(
+      HungarianNotation.getDeclTypeName(Decl), Decl->getName(), Decl, Loc,
+      FileStyle.getStyles(), FileStyle.getHNOption(),
+      findStyleKind(Decl, FileStyle.getStyles(),
+                    FileStyle.isIgnoringMainLikeFunction(),
+                    FileStyle.isCheckingAnonFieldInParentScope()),
+      SM, IgnoreFailedSplit);
 }
 
 std::optional<RenamerClangTidyCheck::FailureInfo>
@@ -1496,5 +1428,114 @@ IdentifierNamingCheck::getStyleForFile(StringRef FileName) const {
   return It.first->getValue();
 }
 
+StyleKind IdentifierNamingCheck::findStyleKindForAnonField(
+    const FieldDecl *AnonField,
+    ArrayRef<std::optional<NamingStyle>> NamingStyles) const {
+  const IndirectFieldDecl *IFD =
+      utils::findOutermostIndirectFieldDeclForField(AnonField);
+  assert(IFD && "Found an anonymous record field without an IndirectFieldDecl");
+
+  QualType Type = AnonField->getType();
+
+  if (const auto *F = dyn_cast<FieldDecl>(IFD->chain().front())) {
+    return findStyleKindForField(F, Type, NamingStyles);
+  }
+
+  if (const auto *V = IFD->getVarDecl()) {
+    return findStyleKindForVar(V, Type, NamingStyles);
+  }
+
+  return SK_Invalid;
+}
+
+StyleKind IdentifierNamingCheck::findStyleKindForField(
+    const FieldDecl *Field, QualType Type,
+    ArrayRef<std::optional<NamingStyle>> NamingStyles) const {
+  if (!Type.isNull() && Type.isConstQualified()) {
+    if (NamingStyles[SK_ConstantMember])
+      return SK_ConstantMember;
+
+    if (NamingStyles[SK_Constant])
+      return SK_Constant;
+  }
+
+  if (Field->getAccess() == AS_private && NamingStyles[SK_PrivateMember])
+    return SK_PrivateMember;
+
+  if (Field->getAccess() == AS_protected && NamingStyles[SK_ProtectedMember])
+    return SK_ProtectedMember;
+
+  if (Field->getAccess() == AS_public && NamingStyles[SK_PublicMember])
+    return SK_PublicMember;
+
+  if (NamingStyles[SK_Member])
+    return SK_Member;
+
+  return SK_Invalid;
+}
+
+StyleKind IdentifierNamingCheck::findStyleKindForVar(
+    const VarDecl *Var, QualType Type,
+    ArrayRef<std::optional<NamingStyle>> NamingStyles) const {
+  if (Var->isConstexpr() && NamingStyles[SK_ConstexprVariable])
+    return SK_ConstexprVariable;
+
+  if (!Type.isNull() && Type.isConstQualified()) {
+    if (Var->isStaticDataMember() && NamingStyles[SK_ClassConstant])
+      return SK_ClassConstant;
+
+    if (Var->isFileVarDecl() && Type.getTypePtr()->isAnyPointerType() &&
+        NamingStyles[SK_GlobalConstantPointer])
+      return SK_GlobalConstantPointer;
+
+    if (Var->isFileVarDecl() && NamingStyles[SK_GlobalConstant])
+      return SK_GlobalConstant;
+
+    if (Var->isStaticLocal() && NamingStyles[SK_StaticConstant])
+      return SK_StaticConstant;
+
+    if (Var->isLocalVarDecl() && Type.getTypePtr()->isAnyPointerType() &&
+        NamingStyles[SK_LocalConstantPointer])
+      return SK_LocalConstantPointer;
+
+    if (Var->isLocalVarDecl() && NamingStyles[SK_LocalConstant])
+      return SK_LocalConstant;
+
+    if (Var->isFunctionOrMethodVarDecl() && NamingStyles[SK_LocalConstant])
+      return SK_LocalConstant;
+
+    if (NamingStyles[SK_Constant])
+      return SK_Constant;
+  }
+
+  if (Var->isStaticDataMember() && NamingStyles[SK_ClassMember])
+    return SK_ClassMember;
+
+  if (Var->isFileVarDecl() && Type.getTypePtr()->isAnyPointerType() &&
+      NamingStyles[SK_GlobalPointer])
+    return SK_GlobalPointer;
+
+  if (Var->isFileVarDecl() && NamingStyles[SK_GlobalVariable])
+    return SK_GlobalVariable;
+
+  if (Var->isStaticLocal() && NamingStyles[SK_StaticVariable])
+    return SK_StaticVariable;
+
+  if (Var->isLocalVarDecl() && Type.getTypePtr()->isAnyPointerType() &&
+      NamingStyles[SK_LocalPointer])
+    return SK_LocalPointer;
+
+  if (Var->isLocalVarDecl() && NamingStyles[SK_LocalVariable])
+    return SK_LocalVariable;
+
+  if (Var->isFunctionOrMethodVarDecl() && NamingStyles[SK_LocalVariable])
+    return SK_LocalVariable;
+
+  if (NamingStyles[SK_Variable])
+    return SK_Variable;
+
+  return SK_Invalid;
+}
+
 } // namespace readability
 } // namespace clang::tidy
diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h
index 14626981cc42d..27c8e4bc768c4 100644
--- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h
@@ -127,9 +127,11 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck {
   struct FileStyle {
     FileStyle() : IsActive(false), IgnoreMainLikeFunctions(false) {}
     FileStyle(SmallVectorImpl<std::optional<NamingStyle>> &&Styles,
-              HungarianNotationOption HNOption, bool IgnoreMainLike)
+              HungarianNotationOption HNOption, bool IgnoreMainLike,
+              bool CheckAnonFieldInParent)
         : Styles(std::move(Styles)), HNOption(std::move(HNOption)),
-          IsActive(true), IgnoreMainLikeFunctions(IgnoreMainLike) {}
+          IsActive(true), IgnoreMainLikeFunctions(IgnoreMainLike),
+          CheckAnonFieldInParentScope(CheckAnonFieldInParent) {}
 
     ArrayRef<std::optional<NamingStyle>> getStyles() const {
       assert(IsActive);
@@ -144,11 +146,16 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck {
     bool isActive() const { return IsActive; }
     bool isIgnoringMainLikeFunction() const { return IgnoreMainLikeFunctions; }
 
+    bool isCheckingAnonFieldInParentScope() const {
+      return CheckAnonFieldInParentScope;
+    }
+
   private:
     SmallVector<std::optional<NamingStyle>, 0> Styles;
     HungarianNotationOption HNOption;
     bool IsActive;
     bool IgnoreMainLikeFunctions;
+    bool CheckAnonFieldInParentScope;
   };
 
   IdentifierNamingCheck::FileStyle
@@ -175,7 +182,7 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck {
   StyleKind findStyleKind(
       const NamedDecl *D,
       ArrayRef<std::optional<IdentifierNamingCheck::NamingStyle>> NamingStyles,
-      bool IgnoreMainLikeFunctions) const;
+      bool IgnoreMainLikeFunctions, bool CheckAnonFieldInParentScope) const;
 
   std::optional<RenamerClangTidyCheck::FailureInfo> getFailureInfo(
       StringRef Type, StringRef Name, const NamedDecl *ND,
@@ -199,6 +206,19 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck {
 
   const FileStyle &getStyleForFile(StringRef FileName) const;
 
+  /// Find the style kind of a field in an anonymous record.
+  StyleKind findStyleKindForAnonField(
+      const FieldDecl *AnonField,
+      ArrayRef<std::optional<NamingStyle>> NamingStyles) const;
+
+  StyleKind findStyleKindForField(
+      const FieldDecl *Field, QualType Type,
+      ArrayRef<std::optional<NamingStyle>> NamingStyles) const;
+
+  StyleKind
+  findStyleKindForVar(const VarDecl *Var, QualType Type,
+                      ArrayRef<std::optional<NamingStyle>> NamingStyles) const;
+
   /// Stores the style options as a vector, indexed by the specified \ref
   /// StyleKind, for a given directory.
   mutable llvm::StringMap<FileStyle> NamingStylesCache;
diff --git a/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.cpp b/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.cpp
index 2c011f5c0e690..e32f79589a059 100644
--- a/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MisleadingIndentationCheck.h"
+#include "../utils/LexerUtils.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 
@@ -51,8 +52,20 @@ void MisleadingIndentationCheck::danglingElseCheck(const SourceManager &SM,
     diag(ElseLoc, "different indentation for 'if' and corresponding 'else'");
 }
 
-void MisleadingIndentationCheck::missingBracesCheck(const SourceManager &SM,
-                                                    const CompoundStmt *CStmt) {
+static bool isAtStartOfLineIncludingEmptyMacro(SourceLocation NextLoc,
+                                               const SourceManager &SM,
+                                               const LangOptions &LangOpts) {
+  const SourceLocation BeforeLoc =
+      utils::lexer::getPreviousTokenAndStart(NextLoc, SM, LangOpts).second;
+  if (BeforeLoc.isInvalid())
+    return false;
+  return SM.getExpansionLineNumber(BeforeLoc) !=
+         SM.getExpansionLineNumber(NextLoc);
+}
+
+void MisleadingIndentationCheck::missingBracesCheck(
+    const SourceManager &SM, const CompoundStmt *CStmt,
+    const LangOptions &LangOpts) {
   const static StringRef StmtNames[] = {"if", "for", "while"};
   for (unsigned int I = 0; I < CStmt->size() - 1; I++) {
     const Stmt *CurrentStmt = CStmt->body_begin()[I];
@@ -92,6 +105,8 @@ void MisleadingIndentationCheck::missingBracesCheck(const SourceManager &SM,
 
     if (NextLoc.isInvalid() || NextLoc.isMacroID())
       continue;
+    if (!isAtStartOfLineIncludingEmptyMacro(NextLoc, SM, LangOpts))
+      continue;
 
     if (SM.getExpansionColumnNumber(InnerLoc) ==
         SM.getExpansionColumnNumber(NextLoc)) {
@@ -117,7 +132,8 @@ void MisleadingIndentationCheck::check(const MatchFinder::MatchResult &Result) {
     danglingElseCheck(*Result.SourceManager, Result.Context, If);
 
   if (const auto *CStmt = Result.Nodes.getNodeAs<CompoundStmt>("compound"))
-    missingBracesCheck(*Result.SourceManager, CStmt);
+    missingBracesCheck(*Result.SourceManager, CStmt,
+                       Result.Context->getLangOpts());
 }
 
 } // namespace clang::tidy::readability
diff --git a/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h b/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h
index c336abbc7c4a9..9c92fc1e18b6f 100644
--- a/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h
@@ -32,7 +32,8 @@ class MisleadingIndentationCheck : public ClangTidyCheck {
 private:
   void danglingElseCheck(const SourceManager &SM, ASTContext *Context,
                          const IfStmt *If);
-  void missingBracesCheck(const SourceManager &SM, const CompoundStmt *CStmt);
+  void missingBracesCheck(const SourceManager &SM, const CompoundStmt *CStmt,
+                          const LangOptions &LangOpts);
 };
 
 } // namespace clang::tidy::readability
diff --git a/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp b/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp
index 64333f2c18745..fd5dadc9b01db 100644
--- a/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp
@@ -113,4 +113,28 @@ bool areStatementsIdentical(const Stmt *FirstStmt, const Stmt *SecondStmt,
   return DataFirst == DataSecond;
 }
 
+const IndirectFieldDecl *
+findOutermostIndirectFieldDeclForField(const FieldDecl *FD) {
+  const RecordDecl *Record = FD->getParent();
+  assert(Record->isAnonymousStructOrUnion() &&
+         "FD must be a field in an anonymous record");
+
+  const DeclContext *Context = Record;
+  while (isa<RecordDecl>(Context) &&
+         cast<RecordDecl>(Context)->isAnonymousStructOrUnion()) {
+    Context = Context->getParent();
+  }
+
+  // Search for the target IndirectFieldDecl within the located context.
+  for (const auto *D : Context->decls()) {
+    const auto *IFD = dyn_cast<IndirectFieldDecl>(D);
+    if (!IFD)
+      continue;
+    if (IFD->getAnonField() == FD)
+      return IFD;
+  }
+
+  return nullptr;
+}
+
 } // namespace clang::tidy::utils
diff --git a/clang-tools-extra/clang-tidy/utils/ASTUtils.h b/clang-tools-extra/clang-tidy/utils/ASTUtils.h
index 1bba5daf2fc76..6c3e54facd020 100644
--- a/clang-tools-extra/clang-tidy/utils/ASTUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/ASTUtils.h
@@ -40,6 +40,11 @@ bool rangeCanBeFixed(SourceRange Range, const SourceManager *SM);
 bool areStatementsIdentical(const Stmt *FirstStmt, const Stmt *SecondStmt,
                             const ASTContext &Context, bool Canonical = false);
 
+// Given a field of an anonymous record, find its corresponding
+// IndirectFieldDecl in the outermost possible scope.
+const IndirectFieldDecl *
+findOutermostIndirectFieldDeclForField(const FieldDecl *FD);
+
 } // namespace clang::tidy::utils
 
 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ASTUTILS_H
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index ce82063dbfe23..571808a51596a 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -454,7 +454,10 @@ Changes in existing checks
   has been enhanced, particularly within complex types like function pointers
   and cases where style checks were omitted when functions started with macros.
   Added support for C++20 ``concept`` declarations. ``Camel_Snake_Case`` and
-  ``camel_Snake_Case`` now detect more invalid identifier names.
+  ``camel_Snake_Case`` now detect more invalid identifier names. Fields in
+  anonymous records (i.e. anonymous structs and unions) now can be checked with
+  the naming rules associated with their enclosing scopes rather than the naming
+  rules of public struct/union members.
 
 - Improved :doc:`readability-implicit-bool-conversion
   <clang-tidy/checks/readability/implicit-bool-conversion>` check to take
@@ -462,6 +465,10 @@ Changes in existing checks
   `AllowPointerConditions` options. It also now provides more consistent
   suggestions when parentheses are added to the return value.
 
+- Improved :doc:`readability-misleading-indentation
+  <clang-tidy/checks/readability/misleading-indentation>` check to ignore
+  false-positives for line started with empty macro.
+
 - Improved :doc:`readability-non-const-parameter
   <clang-tidy/checks/readability/non-const-parameter>` check to ignore
   false-positives in initializer list of record.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-naming.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-naming.rst
index e36bbee394f17..2affb55cfa9ad 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-naming.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-naming.rst
@@ -42,6 +42,7 @@ The following options are described below:
 
  - :option:`AbstractClassCase`, :option:`AbstractClassPrefix`, :option:`AbstractClassSuffix`, :option:`AbstractClassIgnoredRegexp`, :option:`AbstractClassHungarianPrefix`
  - :option:`AggressiveDependentMemberLookup`
+ - :option:`CheckAnonFieldInParent`
  - :option:`ClassCase`, :option:`ClassPrefix`, :option:`ClassSuffix`, :option:`ClassIgnoredRegexp`, :option:`ClassHungarianPrefix`
  - :option:`ClassConstantCase`, :option:`ClassConstantPrefix`, :option:`ClassConstantSuffix`, :option:`ClassConstantIgnoredRegexp`, :option:`ClassConstantHungarianPrefix`
  - :option:`ClassMemberCase`, :option:`ClassMemberPrefix`, :option:`ClassMemberSuffix`, :option:`ClassMemberIgnoredRegexp`, :option:`ClassMemberHungarianPrefix`
@@ -207,6 +208,32 @@ After if AggressiveDependentMemberLookup is `true`:
       }
     };
 
+.. option:: CheckAnonFieldInParent
+
+    When set to `true`, fields in anonymous records (i.e. anonymous
+    unions and structs) will be treated as names in the enclosing scope
+    rather than public members of the anonymous record for the purpose
+    of name checking.
+
+For example:
+
+.. code-block:: c++
+
+    class Foo {
+    private:
+      union {
+        int iv_;
+        float fv_;
+      };
+    };
+
+If :option:`CheckAnonFieldInParent` is `false`, you may get warnings
+that ``iv_`` and ``fv_`` are not coherent to public member names, because
+``iv_`` and ``fv_`` are public members of the anonymous union. When
+:option:`CheckAnonFieldInParent` is `true`, ``iv_`` and ``fv_`` will be
+treated as private data members of ``Foo`` for the purpose of name checking
+and thus no warnings will be emitted.
+
 .. option:: ClassCase
 
     When defined, the check will ensure class names conform to the
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming-anon-record-fields.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming-anon-record-fields.cpp
new file mode 100644
index 0000000000000..1b4d4e924a721
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming-anon-record-fields.cpp
@@ -0,0 +1,185 @@
+// RUN: %check_clang_tidy -std=c++20 %s readability-identifier-naming %t -- \
+// RUN:   -config='{CheckOptions: { \
+// RUN:     readability-identifier-naming.CheckAnonFieldInParent: true, \
+// RUN:     readability-identifier-naming.ClassConstantCase: CamelCase, \
+// RUN:     readability-identifier-naming.ClassConstantPrefix: 'k', \
+// RUN:     readability-identifier-naming.ClassMemberCase: CamelCase, \
+// RUN:     readability-identifier-naming.ConstantCase: UPPER_CASE, \
+// RUN:     readability-identifier-naming.ConstantSuffix: '_CST', \
+// RUN:     readability-identifier-naming.ConstexprVariableCase: lower_case, \
+// RUN:     readability-identifier-naming.GlobalConstantCase: UPPER_CASE, \
+// RUN:     readability-identifier-naming.GlobalVariableCase: lower_case, \
+// RUN:     readability-identifier-naming.GlobalVariablePrefix: 'g_', \
+// RUN:     readability-identifier-naming.LocalConstantCase: CamelCase, \
+// RUN:     readability-identifier-naming.LocalConstantPrefix: 'k', \
+// RUN:     readability-identifier-naming.LocalVariableCase: lower_case, \
+// RUN:     readability-identifier-naming.MemberCase: CamelCase, \
+// RUN:     readability-identifier-naming.MemberPrefix: 'm_', \
+// RUN:     readability-identifier-naming.ConstantMemberCase: lower_case, \
+// RUN:     readability-identifier-naming.PrivateMemberPrefix: '__', \
+// RUN:     readability-identifier-naming.ProtectedMemberPrefix: '_', \
+// RUN:     readability-identifier-naming.PublicMemberCase: lower_case, \
+// RUN:     readability-identifier-naming.StaticConstantCase: UPPER_CASE, \
+// RUN:     readability-identifier-naming.StaticVariableCase: camelBack, \
+// RUN:     readability-identifier-naming.StaticVariablePrefix: 's_', \
+// RUN:     readability-identifier-naming.VariableCase: lower_case, \
+// RUN:     readability-identifier-naming.GlobalPointerCase: CamelCase, \
+// RUN:     readability-identifier-naming.GlobalPointerSuffix: '_Ptr', \
+// RUN:     readability-identifier-naming.GlobalConstantPointerCase: UPPER_CASE, \
+// RUN:     readability-identifier-naming.GlobalConstantPointerSuffix: '_Ptr', \
+// RUN:     readability-identifier-naming.LocalPointerCase: CamelCase, \
+// RUN:     readability-identifier-naming.LocalPointerPrefix: 'l_', \
+// RUN:     readability-identifier-naming.LocalConstantPointerCase: CamelCase, \
+// RUN:     readability-identifier-naming.LocalConstantPointerPrefix: 'lc_', \
+// RUN:   }}'
+
+static union {
+  int global;
+// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: invalid case style for global variable 'global'
+// CHECK-FIXES: {{^}}  int g_global;{{$}}
+
+  const int global_const;
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: invalid case style for global constant 'global_const'
+// CHECK-FIXES: {{^}}  const int GLOBAL_CONST;{{$}}
+
+  int *global_ptr;
+// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: invalid case style for global pointer 'global_ptr'
+// CHECK-FIXES: {{^}}  int *GlobalPtr_Ptr;{{$}}
+
+  int *const global_const_ptr;
+// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: invalid case style for global constant pointer 'global_const_ptr'
+// CHECK-FIXES: {{^}}  int *const GLOBAL_CONST_PTR_Ptr;{{$}}
+};
+
+namespace ns {
+
+static union {
+  int ns_global;
+// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: invalid case style for global variable 'ns_global'
+// CHECK-FIXES: {{^}}  int g_ns_global;{{$}}
+
+  const int ns_global_const;
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: invalid case style for global constant 'ns_global_const'
+// CHECK-FIXES: {{^}}  const int NS_GLOBAL_CONST;{{$}}
+
+  int *ns_global_ptr;
+// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: invalid case style for global pointer 'ns_global_ptr'
+// CHECK-FIXES: {{^}}  int *NsGlobalPtr_Ptr;{{$}}
+
+  int *const ns_global_const_ptr;
+// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: invalid case style for global constant pointer 'ns_global_const_ptr'
+// CHECK-FIXES: {{^}}  int *const NS_GLOBAL_CONST_PTR_Ptr;{{$}}
+};
+
+namespace {
+
+union {
+  int anon_ns_global;
+// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: invalid case style for global variable 'anon_ns_global'
+// CHECK-FIXES: {{^}}  int g_anon_ns_global;{{$}}
+
+  const int anon_ns_global_const;
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: invalid case style for global constant 'anon_ns_global_const'
+// CHECK-FIXES: {{^}}  const int ANON_NS_GLOBAL_CONST;{{$}}
+
+  int *anon_ns_global_ptr;
+// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: invalid case style for global pointer 'anon_ns_global_ptr'
+// CHECK-FIXES: {{^}}  int *AnonNsGlobalPtr_Ptr;{{$}}
+
+  int *const anon_ns_global_const_ptr;
+// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: invalid case style for global constant pointer 'anon_ns_global_const_ptr'
+// CHECK-FIXES: {{^}}  int *const ANON_NS_GLOBAL_CONST_PTR_Ptr;{{$}}
+};
+
+}
+
+}
+
+
+class Foo {
+public:
+  union {
+    int PubMember;
+// CHECK-MESSAGES: :[[@LINE-1]]:9: warning: invalid case style for public member 'PubMember'
+// CHECK-FIXES: {{^}}    int pub_member;{{$}}
+
+    const int PubConstMember;
+// CHECK-MESSAGES: :[[@LINE-1]]:15: warning: invalid case style for constant member 'PubConstMember'
+// CHECK-FIXES: {{^}}    const int pub_const_member;{{$}}
+
+    int *PubPtrMember;
+// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: invalid case style for public member 'PubPtrMember'
+// CHECK-FIXES: {{^}}    int *pub_ptr_member;{{$}}
+
+    int *const PubConstPtrMember;
+// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: invalid case style for constant member 'PubConstPtrMember'
+// CHECK-FIXES: {{^}}    int *const pub_const_ptr_member;{{$}}
+  };
+
+protected:
+  union {
+    int prot_member;
+// CHECK-MESSAGES: :[[@LINE-1]]:9: warning: invalid case style for protected member 'prot_member'
+// CHECK-FIXES: {{^}}    int _prot_member;{{$}}
+
+    const int prot_const_member;
+
+    int *prot_ptr_member;
+// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: invalid case style for protected member 'prot_ptr_member'
+// CHECK-FIXES: {{^}}    int *_prot_ptr_member;{{$}}
+
+    int *const prot_const_ptr_member;
+  };
+
+
+private:
+  union {
+    int pri_member;
+// CHECK-MESSAGES: :[[@LINE-1]]:9: warning: invalid case style for private member 'pri_member'
+// CHECK-FIXES: {{^}}    int __pri_member;{{$}}
+
+    const int pri_const_member;
+
+    int *pri_ptr_member;
+// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: invalid case style for private member 'pri_ptr_member'
+// CHECK-FIXES: {{^}}    int *__pri_ptr_member;{{$}}
+
+    int *const pri_const_ptr_member;
+  };
+};
+
+void test() {
+  union {
+    int local;
+
+    const int local_const;
+// CHECK-MESSAGES: :[[@LINE-1]]:15: warning: invalid case style for local constant 'local_const'
+// CHECK-FIXES: {{^}}    const int kLocalConst;{{$}}
+
+    int *local_ptr;
+// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: invalid case style for local pointer 'local_ptr'
+// CHECK-FIXES: {{^}}    int *l_LocalPtr;{{$}}
+
+    int *const local_const_ptr;
+// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: invalid case style for local constant pointer 'local_const_ptr'
+// CHECK-FIXES: {{^}}    int *const lc_LocalConstPtr;{{$}}
+  };
+
+  static union {
+    int local_static;
+// CHECK-MESSAGES: :[[@LINE-1]]:9: warning: invalid case style for static variable 'local_static'
+// CHECK-FIXES: {{^}}    int s_localStatic;{{$}}
+
+    const int local_static_const;
+// CHECK-MESSAGES: :[[@LINE-1]]:15: warning: invalid case style for static constant 'local_static_const'
+// CHECK-FIXES: {{^}}    const int LOCAL_STATIC_CONST;{{$}}
+
+    int *local_static_ptr;
+// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: invalid case style for static variable 'local_static_ptr'
+// CHECK-FIXES: {{^}}    int *s_localStaticPtr;{{$}}
+
+    int *const local_static_const_ptr;
+// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: invalid case style for static constant 'local_static_const_ptr'
+// CHECK-FIXES: {{^}}    int *const LOCAL_STATIC_CONST_PTR;{{$}}
+  };
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/misleading-indentation.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/misleading-indentation.cpp
index aea0618d120db..5d4d60f5f1a35 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/misleading-indentation.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/misleading-indentation.cpp
@@ -4,6 +4,8 @@ void foo1();
 void foo2();
 void foo3();
 
+#define E
+
 #define BLOCK \
   if (cond1)  \
     foo1();   \
@@ -109,6 +111,13 @@ void f()
   }
 
   BLOCK
+
+  if (cond1)
+    foo1();
+  else
+    foo2();
+  E foo3();
+  // CHECK-MESSAGES-NOT: :[[@LINE-1]]readability-misleading-indentation
 }
 
 void g(bool x) {
diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index f2dde7f540fb7..e7d78b03511fe 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -2441,11 +2441,9 @@ let HasMasked = false, HasVL = false, IRName = "" in {
         return Builder.CreateInsertVector(ResultType, Ops[0], Ops[2], Ops[1]);
       }
       }] in {
-    let Log2LMUL = [0, 1, 2] in {
-      foreach dst_lmul = ["(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in {
-        def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilxfd">;
-        def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">;
-      }
+    foreach dst_lmul = ["(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in {
+      def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilxfd">;
+      def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">;
     }
     foreach nf = NFList in {
       defvar T = "(Tuple:" # nf # ")";
diff --git a/clang/lib/ASTMatchers/Dynamic/Parser.cpp b/clang/lib/ASTMatchers/Dynamic/Parser.cpp
index 27096a83b8dd6..6a16c2184fcfb 100644
--- a/clang/lib/ASTMatchers/Dynamic/Parser.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Parser.cpp
@@ -299,10 +299,8 @@ class Parser::CodeTokenizer {
 
   /// Consume all leading whitespace from \c Code.
   void consumeWhitespace() {
-    Code = Code.drop_while([](char c) {
-      // Don't trim newlines.
-      return StringRef(" \t\v\f\r").contains(c);
-    });
+    // Don't trim newlines.
+    Code = Code.ltrim(" \t\v\f\r");
   }
 
   SourceLocation currentLocation() {
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index 288bc46ac5a2b..88eed570e5ad0 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -628,8 +628,7 @@ ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
     return OMF_performSelector;
 
   // The other method families may begin with a prefix of underscores.
-  while (!name.empty() && name.front() == '_')
-    name = name.substr(1);
+  name = name.ltrim('_');
 
   if (name.empty()) return OMF_None;
   switch (name.front()) {
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 3ee39133fcee7..2f8395cb8932f 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1365,8 +1365,7 @@ bool AArch64TargetInfo::validateConstraintModifier(
     StringRef Constraint, char Modifier, unsigned Size,
     std::string &SuggestedModifier) const {
   // Strip off constraint modifiers.
-  while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&')
-    Constraint = Constraint.substr(1);
+  Constraint = Constraint.ltrim("=+&");
 
   switch (Constraint[0]) {
   default:
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 6e1842fc64e50..01f9e844da12a 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -1230,8 +1230,7 @@ bool ARMTargetInfo::validateConstraintModifier(
   bool isInOut = (Constraint[0] == '+');
 
   // Strip off constraint modifiers.
-  while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&')
-    Constraint = Constraint.substr(1);
+  Constraint = Constraint.ltrim("=+&");
 
   switch (Constraint[0]) {
   default:
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index b97f88647fa49..3deaa19f8d4fc 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1613,8 +1613,7 @@ bool X86TargetInfo::validateOutputSize(const llvm::StringMap<bool> &FeatureMap,
                                        StringRef Constraint,
                                        unsigned Size) const {
   // Strip off constraint modifiers.
-  while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&')
-    Constraint = Constraint.substr(1);
+  Constraint = Constraint.ltrim("=+&");
 
   return validateOperandSize(FeatureMap, Constraint, Size);
 }
diff --git a/clang/lib/Basic/Warnings.cpp b/clang/lib/Basic/Warnings.cpp
index bab1af4f03b67..92954cab6fb04 100644
--- a/clang/lib/Basic/Warnings.cpp
+++ b/clang/lib/Basic/Warnings.cpp
@@ -96,11 +96,7 @@ void clang::ProcessWarningOptions(DiagnosticsEngine &Diags,
 
       // Check to see if this warning starts with "no-", if so, this is a
       // negative form of the option.
-      bool isPositive = true;
-      if (Opt.starts_with("no-")) {
-        isPositive = false;
-        Opt = Opt.substr(3);
-      }
+      bool isPositive = !Opt.consume_front("no-");
 
       // Figure out how this option affects the warning.  If -Wfoo, map the
       // diagnostic to a warning, if -Wno-foo, map it to ignore.
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index 25b43cefce6b5..8ae47d1680bd2 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -167,13 +167,6 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple,
     Features.push_back("-relax");
   }
 
-  // GCC Compatibility: -mno-save-restore is default, unless -msave-restore is
-  // specified.
-  if (Args.hasFlag(options::OPT_msave_restore, options::OPT_mno_save_restore, false))
-    Features.push_back("+save-restore");
-  else
-    Features.push_back("-save-restore");
-
   // -mno-unaligned-access is default, unless -munaligned-access is specified.
   AddTargetFeature(Args, Features, options::OPT_munaligned_access,
                    options::OPT_mno_unaligned_access, "fast-unaligned-access");
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index fef0522aaf45b..53e26a9f8e229 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -237,9 +237,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
     assert(Name.starts_with("m") && "Invalid feature name.");
     Name = Name.substr(1);
 
-    bool IsNegative = Name.starts_with("no-");
-    if (IsNegative)
-      Name = Name.substr(3);
+    bool IsNegative = Name.consume_front("no-");
 
 #ifndef NDEBUG
     assert(Name.starts_with("avx10.") && "Invalid AVX10 feature name.");
diff --git a/clang/lib/Headers/usermsrintrin.h b/clang/lib/Headers/usermsrintrin.h
index 6d1424ad3b2ed..61388376706dc 100644
--- a/clang/lib/Headers/usermsrintrin.h
+++ b/clang/lib/Headers/usermsrintrin.h
@@ -14,12 +14,33 @@
 #define __USERMSRINTRIN_H
 #ifdef __x86_64__
 
+/// Reads the contents of a 64-bit MSR specified in \a __A into \a dst.
+///
+/// This intrinsic corresponds to the <c> URDMSR </c> instruction.
+/// \param __A
+///    An unsigned long long.
+///
+/// \code{.operation}
+///    DEST := MSR[__A]
+/// \endcode
 static __inline__ unsigned long long
     __attribute__((__always_inline__, __nodebug__, __target__("usermsr")))
     _urdmsr(unsigned long long __A) {
   return __builtin_ia32_urdmsr(__A);
 }
 
+/// Writes the contents of \a __B into the 64-bit MSR specified in \a __A.
+///
+/// This intrinsic corresponds to the <c> UWRMSR </c> instruction.
+///
+/// \param __A
+///    An unsigned long long.
+/// \param __B
+///    An unsigned long long.
+///
+/// \code{.operation}
+///    MSR[__A] := __B
+/// \endcode
 static __inline__ void
     __attribute__((__always_inline__, __nodebug__, __target__("usermsr")))
     _uwrmsr(unsigned long long __A, unsigned long long __B) {
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vlenb.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vlenb.c
index 9d95acc33dddc..582d5fd812bc3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vlenb.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vlenb.c
@@ -21,19 +21,19 @@ unsigned long test_vlenb(void) {
   return __riscv_vlenb();
 }
 //.
-// RV32: attributes #0 = { mustprogress nofree noinline nosync nounwind willreturn memory(read) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+32bit,+d,+f,+v,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" }
-// RV32: attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(read) }
+// RV32: attributes #[[ATTR0:[0-9]+]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+32bit,+d,+f,+v,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" }
+// RV32: attributes #[[ATTR1:[0-9]+]] = { mustprogress nocallback nofree nosync nounwind willreturn memory(read) }
 //.
-// RV64: attributes #0 = { mustprogress nofree noinline nosync nounwind willreturn memory(read) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+d,+f,+v,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" }
-// RV64: attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(read) }
+// RV64: attributes #[[ATTR0:[0-9]+]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+d,+f,+v,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" }
+// RV64: attributes #[[ATTR1:[0-9]+]] = { mustprogress nocallback nofree nosync nounwind willreturn memory(read) }
 //.
-// RV32: !0 = !{i32 1, !"wchar_size", i32 4}
-// RV32: !1 = !{i32 1, !"target-abi", !"ilp32d"}
-// RV32: !2 = !{i32 8, !"SmallDataLimit", i32 0}
-// RV32: !3 = !{!"vlenb"}
+// RV32: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// RV32: [[META1:![0-9]+]] = !{i32 1, !"target-abi", !"ilp32d"}
+// RV32: [[META2:![0-9]+]] = !{i32 8, !"SmallDataLimit", i32 0}
+// RV32: [[META3]] = !{!"vlenb"}
 //.
-// RV64: !0 = !{i32 1, !"wchar_size", i32 4}
-// RV64: !1 = !{i32 1, !"target-abi", !"lp64d"}
-// RV64: !2 = !{i32 8, !"SmallDataLimit", i32 0}
-// RV64: !3 = !{!"vlenb"}
+// RV64: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// RV64: [[META1:![0-9]+]] = !{i32 1, !"target-abi", !"lp64d"}
+// RV64: [[META2:![0-9]+]] = !{i32 8, !"SmallDataLimit", i32 0}
+// RV64: [[META3]] = !{!"vlenb"}
 //.
diff --git a/clang/test/Driver/riscv-default-features.c b/clang/test/Driver/riscv-default-features.c
index 6e48f7cc37dcb..4c3883c1cc118 100644
--- a/clang/test/Driver/riscv-default-features.c
+++ b/clang/test/Driver/riscv-default-features.c
@@ -2,9 +2,7 @@
 // RUN: %clang --target=riscv64-unknown-elf -S -emit-llvm %s -o - | FileCheck %s -check-prefix=RV64
 
 // RV32: "target-features"="+32bit,+a,+c,+m,+relax,
-// RV32-SAME: -save-restore
 // RV64: "target-features"="+64bit,+a,+c,+m,+relax,
-// RV64-SAME: -save-restore
 
 // Dummy function
 int foo(void){
diff --git a/clang/test/Driver/riscv-features.c b/clang/test/Driver/riscv-features.c
index 716f3f6da57b8..d3700f71aa7e1 100644
--- a/clang/test/Driver/riscv-features.c
+++ b/clang/test/Driver/riscv-features.c
@@ -24,7 +24,7 @@
 
 // SAVE-RESTORE: "-target-feature" "+save-restore"
 // NO-SAVE-RESTORE: "-target-feature" "-save-restore"
-// DEFAULT: "-target-feature" "-save-restore"
+// DEFAULT-NOT: "-target-feature" "-save-restore"
 // DEFAULT-NOT: "-target-feature" "+save-restore"
 
 // RUN: %clang --target=riscv32-unknown-elf -### %s -munaligned-access 2>&1 | FileCheck %s -check-prefix=FAST-UNALIGNED-ACCESS
diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp
index 5b3a99adfea7c..1a018a891b56e 100644
--- a/compiler-rt/lib/hwasan/hwasan_report.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_report.cpp
@@ -260,16 +260,15 @@ static void PrintStackAllocations(const StackAllocationsRingBuffer *sa,
         Printf("Cause: %s\n", cause);
         Printf("%s", d.Default());
         Printf("%s", d.Location());
-        Printf("%p is located %zd bytes %s a %zd-byte region [%p,%p)\n",
-               untagged_addr, offset, whence, local_end - local_beg, local_beg,
-               local_end);
-        Printf("%s", d.Allocation());
         StackTracePrinter::GetOrInit()->RenderSourceLocation(
             &location, local.decl_file, local.decl_line, /* column= */ 0,
             common_flags()->symbolize_vs_style,
             common_flags()->strip_path_prefix);
-        Printf("  %s in %s %s\n", local.name, local.function_name,
-               location.data());
+        Printf(
+            "%p is located %zd bytes %s a %zd-byte local variable %s [%p,%p) "
+            "in %s %s\n",
+            untagged_addr, offset, whence, local_end - local_beg, local.name,
+            local_beg, local_end, local.function_name, location.data());
         location.clear();
         Printf("%s\n", d.Default());
       }
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h b/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h
index 8bb8304910c73..d246781fe1df5 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h
@@ -109,6 +109,10 @@ class TwoLevelMap {
     return *AddressSpaceView::LoadWritable(&map2[idx % kSize2]);
   }
 
+  void Lock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { mu_.Lock(); }
+
+  void Unlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { mu_.Unlock(); }
+
  private:
   constexpr uptr MmapSize() const {
     return RoundUpTo(kSize2 * sizeof(T), GetPageSizeCached());
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h
index 21d57d9ab2a91..279bc5de3bb93 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h
@@ -161,18 +161,32 @@ StackDepotBase<Node, kReservedBits, kTabSizeLog>::Get(u32 id) {
 
 template <class Node, int kReservedBits, int kTabSizeLog>
 void StackDepotBase<Node, kReservedBits, kTabSizeLog>::LockBeforeFork() {
-  for (int i = 0; i < kTabSize; ++i) {
-    lock(&tab[i]);
-  }
+  // Do not lock hash table. It's very expensive, but it's not rely needed. The
+  // parent process will neither lock nor unlock. Child process risks to be
+  // deadlocked on already locked buckets. To avoid deadlock we will unlock
+  // every locked buckets in `UnlockAfterFork`. This may affect consistency of
+  // the hash table, but the only issue is a few items inserted by parent
+  // process will be not found by child, and the child may insert them again,
+  // wasting some space in `stackStore`.
+
+  // We still need to lock nodes.
+  nodes.Lock();
 }
 
 template <class Node, int kReservedBits, int kTabSizeLog>
 void StackDepotBase<Node, kReservedBits, kTabSizeLog>::UnlockAfterFork(
     bool fork_child) {
+  nodes.Unlock();
+
+  // Only unlock in child process to avoid deadlock. See `LockBeforeFork`.
+  if (!fork_child)
+    return;
+
   for (int i = 0; i < kTabSize; ++i) {
     atomic_uint32_t *p = &tab[i];
     uptr s = atomic_load(p, memory_order_relaxed);
-    unlock(p, s & kUnlockMask);
+    if (s & kLockMask)
+      unlock(p, s & kUnlockMask);
   }
 }
 
diff --git a/compiler-rt/test/hwasan/TestCases/stack-overflow.c b/compiler-rt/test/hwasan/TestCases/stack-overflow.c
index 10e8d9c59e4bb..4af506e3ecf45 100644
--- a/compiler-rt/test/hwasan/TestCases/stack-overflow.c
+++ b/compiler-rt/test/hwasan/TestCases/stack-overflow.c
@@ -17,8 +17,7 @@ int main() {
   // CHECK: is located in stack of thread
   // CHECK: Potentially referenced stack objects:
   // CHECK: Cause: stack-buffer-overflow
-  // CHECK-NEXT: 0x{{.*}} is located 1 bytes after a 64-byte region
-  // CHECK-NEXT: c in buggy {{.*}}stack-overflow.c:
+  // CHECK-NEXT: 0x{{.*}} is located 1 bytes after a 64-byte local variable c [0x{{.*}},0x{{.*}}) in buggy {{.*}}stack-overflow.c:
   // CHECK: Memory tags around the buggy address
 
   // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in buggy
diff --git a/compiler-rt/test/hwasan/TestCases/stack-uar-dynamic.c b/compiler-rt/test/hwasan/TestCases/stack-uar-dynamic.c
index 7a2a11593e7af..14b9cba8aa5e4 100644
--- a/compiler-rt/test/hwasan/TestCases/stack-uar-dynamic.c
+++ b/compiler-rt/test/hwasan/TestCases/stack-uar-dynamic.c
@@ -22,7 +22,6 @@ int main() {
   char *p = buggy(1);
   // CHECK: Potentially referenced stack objects:
   // CHECK-NEXT: use-after-scope
-  // CHECK-NEXT: 0x{{.*}} is located 0 bytes inside a 64-byte region
-  // CHECK-NEXT: c in buggy
+  // CHECK-NEXT: 0x{{.*}} is located 0 bytes inside a 64-byte local variable c [0x{{.*}},0x{{.*}}) in buggy
   p[0] = 0;
 }
diff --git a/compiler-rt/test/hwasan/TestCases/stack-uar.c b/compiler-rt/test/hwasan/TestCases/stack-uar.c
index 8810701f0c9ca..9fd4381a8049e 100644
--- a/compiler-rt/test/hwasan/TestCases/stack-uar.c
+++ b/compiler-rt/test/hwasan/TestCases/stack-uar.c
@@ -51,8 +51,7 @@ int main() {
   // CHECK: is located in stack of thread
   // CHECK: Potentially referenced stack objects:
   // CHECK: Cause: use-after-scope
-  // CHECK-NEXT: 0x{{.*}} is located 0 bytes inside a 2048-byte region
-  // CHECK-NEXT: {{zzz|yyy}} in buggy {{.*}}stack-uar.c:
+  // CHECK-NEXT: 0x{{.*}} is located 0 bytes inside a 2048-byte local variable {{zzz|yyy}} [0x{{.*}},0x{{.*}}) in buggy {{.*}}stack-uar.c:
   // CHECK: Memory tags around the buggy address
 
   // NOSYM: Previously allocated frames:
diff --git a/compiler-rt/test/hwasan/TestCases/stack-uas.c b/compiler-rt/test/hwasan/TestCases/stack-uas.c
index 53a7054c1c435..a0e4eb02dd226 100644
--- a/compiler-rt/test/hwasan/TestCases/stack-uas.c
+++ b/compiler-rt/test/hwasan/TestCases/stack-uas.c
@@ -70,8 +70,7 @@ int main() {
   // CHECK: is located in stack of thread
   // CHECK: Potentially referenced stack objects:
   // CHECK: Cause: use-after-scope
-  // CHECK-NEXT: 0x{{.*}} is located 0 bytes inside a 2048-byte region
-  // CHECK-NEXT: {{zzz|yyy}} in buggy {{.*}}stack-uas.c:
+  // CHECK-NEXT: 0x{{.*}} is located 0 bytes inside a 2048-byte local variable {{zzz|yyy}} [0x{{.*}}) in buggy {{.*}}stack-uas.c:
   // CHECK: Memory tags around the buggy address
 
   // NOSYM: Previously allocated frames:
diff --git a/compiler-rt/test/hwasan/TestCases/stack-underflow.c b/compiler-rt/test/hwasan/TestCases/stack-underflow.c
index 8e5174519272f..e13955ed37b41 100644
--- a/compiler-rt/test/hwasan/TestCases/stack-underflow.c
+++ b/compiler-rt/test/hwasan/TestCases/stack-underflow.c
@@ -17,8 +17,7 @@ int main() {
   // CHECK: is located in stack of thread
   // CHECK: Potentially referenced stack objects:
   // CHECK: Cause: stack-buffer-overflow
-  // CHECK-NEXT: 0x{{.*}} is located 2 bytes before a 64-byte region
-  // CHECK-NEXT: c in buggy {{.*}}stack-underflow.c:
+  // CHECK-NEXT: 0x{{.*}} is located 2 bytes before a 64-byte local variable c [0x{{.*}},0x{{.*}}) in buggy {{.*}}stack-underflow.c:
   // CHECK: Memory tags around the buggy address
 
   // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in buggy
diff --git a/compiler-rt/test/hwasan/TestCases/strip_path_prefix.c b/compiler-rt/test/hwasan/TestCases/strip_path_prefix.c
index 80ef32699f8f4..22705ed35ce7e 100644
--- a/compiler-rt/test/hwasan/TestCases/strip_path_prefix.c
+++ b/compiler-rt/test/hwasan/TestCases/strip_path_prefix.c
@@ -23,5 +23,5 @@ int main() {
   // CHECK: READ of size 1 at
   // CHECK: #0 {{.*}} in main strip_path_prefix.c:[[@LINE-2]]
   // CHECK: Potentially referenced stack objects:
-  // CHECK: zzz in buggy strip_path_prefix.c:[[@LINE-12]]
+  // CHECK: in buggy strip_path_prefix.c:[[@LINE-12]]
 }
diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md
index 03d4310466485..6c6588025a392 100644
--- a/flang/docs/Extensions.md
+++ b/flang/docs/Extensions.md
@@ -641,6 +641,10 @@ module m
 end
 ```
 
+* When an intrinsic procedure appears in the specification part of a module
+  only in function references, but not an explicit `INTRINSIC` statement,
+  its name is not brought into other scopes by a `USE` statement.
+
 ## De Facto Standard Features
 
 * `EXTENDS_TYPE_OF()` returns `.TRUE.` if both of its arguments have the
diff --git a/flang/include/flang/Decimal/binary-floating-point.h b/flang/include/flang/Decimal/binary-floating-point.h
index b9346a8585e2d..d1992819f85aa 100644
--- a/flang/include/flang/Decimal/binary-floating-point.h
+++ b/flang/include/flang/Decimal/binary-floating-point.h
@@ -143,7 +143,7 @@ class BinaryFloatingPointNumber : public common::RealDetails<BINARY_PRECISION> {
     if (IsNaN() || IsInfinite() || keepBits >= binaryPrecision) {
       return true;
     }
-    int lostBits{binaryPrecision - keepBits};
+    int lostBits{keepBits < binaryPrecision ? binaryPrecision - keepBits : 0};
     RawType lostMask{static_cast<RawType>((RawType{1} << lostBits) - 1)};
     if (RawType lost{static_cast<RawType>(raw_ & lostMask)}; lost != 0) {
       bool increase{false};
diff --git a/flang/include/flang/Decimal/decimal.h b/flang/include/flang/Decimal/decimal.h
index a4e0ee7c84746..f0997fb63df01 100644
--- a/flang/include/flang/Decimal/decimal.h
+++ b/flang/include/flang/Decimal/decimal.h
@@ -34,6 +34,7 @@ enum ConversionResultFlags {
   Overflow = 1,
   Inexact = 2,
   Invalid = 4,
+  Underflow = 8,
 };
 
 struct ConversionToDecimalResult {
diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h
index 7d5d31b7788d7..2143d1d9b3f77 100644
--- a/flang/lib/Decimal/big-radix-floating-point.h
+++ b/flang/lib/Decimal/big-radix-floating-point.h
@@ -369,6 +369,12 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     }
     return result;
   }
+  constexpr Raw HUGE() const {
+    Raw result{static_cast<Raw>(Real::maxExponent)};
+    result <<= Real::significandBits;
+    result |= SignBit();
+    return result - 1; // decrement exponent, set all significand bits
+  }
 
   Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD
   int digits_{0}; // # of elements in digit_[] array; zero when zero
diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp
index d5b66b9fb9338..d38af0f9b8005 100644
--- a/flang/lib/Decimal/decimal-to-binary.cpp
+++ b/flang/lib/Decimal/decimal-to-binary.cpp
@@ -237,6 +237,15 @@ template <int PREC> class IntermediateFloat {
   int exponent_{0};
 };
 
+// The standard says that these overflow cases round to "representable"
+// numbers, and some popular compilers interpret that to mean +/-HUGE()
+// rather than +/-Inf.
+static inline constexpr bool RoundOverflowToHuge(
+    enum FortranRounding rounding, bool isNegative) {
+  return rounding == RoundToZero || (!isNegative && rounding == RoundDown) ||
+      (isNegative && rounding == RoundUp);
+}
+
 template <int PREC>
 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
     bool isNegative, FortranRounding rounding) const {
@@ -256,12 +265,18 @@ ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
   if (guard != 0) {
     flags |= Inexact;
   }
-  if (fraction == 0 && guard <= oneHalf) {
-    if ((!isNegative && rounding == RoundUp) ||
-        (isNegative && rounding == RoundDown)) {
-      // round to minimum nonzero value
-    } else {
-      return {Binary{}, static_cast<enum ConversionResultFlags>(flags)};
+  if (fraction == 0) {
+    if (guard <= oneHalf) {
+      if ((!isNegative && rounding == RoundUp) ||
+          (isNegative && rounding == RoundDown)) {
+        // round to least nonzero value
+        expo = 0;
+      } else { // round to zero
+        if (guard != 0) {
+          flags |= Underflow;
+        }
+        return {Binary{}, static_cast<enum ConversionResultFlags>(flags)};
+      }
     }
   } else {
     // The value is nonzero; normalize it.
@@ -301,14 +316,21 @@ ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
   }
   if (expo == 1 && fraction < topBit) {
     expo = 0; // subnormal
-  }
-  if (expo >= Binary::maxExponent) {
-    expo = Binary::maxExponent; // Inf
-    flags |= Overflow;
-    if constexpr (Binary::bits == 80) { // x87
-      fraction = IntType{1} << 63;
-    } else {
-      fraction = 0;
+    flags |= Underflow;
+  } else if (expo == 0) {
+    flags |= Underflow;
+  } else if (expo >= Binary::maxExponent) {
+    if (RoundOverflowToHuge(rounding, isNegative)) {
+      expo = Binary::maxExponent - 1;
+      fraction = mask;
+    } else { // Inf
+      expo = Binary::maxExponent;
+      flags |= Overflow;
+      if constexpr (Binary::bits == 80) { // x87
+        fraction = IntType{1} << 63;
+      } else {
+        fraction = 0;
+      }
     }
   }
   using Raw = typename Binary::RawType;
@@ -338,14 +360,22 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
   // Sanity checks for ridiculous exponents
   static constexpr int crazy{2 * Real::decimalRange + log10Radix};
   if (exponent_ < -crazy) {
+    enum ConversionResultFlags flags {
+      static_cast<enum ConversionResultFlags>(Inexact | Underflow)
+    };
     if ((!isNegative_ && rounding_ == RoundUp) ||
         (isNegative_ && rounding_ == RoundDown)) {
-      return {Real{Raw{1} | SignBit()}}; // return least nonzero value
+      // return least nonzero value
+      return {Real{Raw{1} | SignBit()}, flags};
     } else { // underflow to +/-0.
-      return {Real{SignBit()}, Inexact};
+      return {Real{SignBit()}, flags};
+    }
+  } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf
+    if (RoundOverflowToHuge(rounding_, isNegative_)) {
+      return {Real{HUGE()}};
+    } else {
+      return {Real{Infinity()}, Overflow};
     }
-  } else if (exponent_ > crazy) { // overflow to +/-Inf.
-    return {Real{Infinity()}, Overflow};
   }
   // Apply any negative decimal exponent by multiplication
   // by a power of two, adjusting the binary exponent to compensate.
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index e1cd34ddf65b6..f5f7b99aba255 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -2904,7 +2904,7 @@ void ModuleVisitor::Post(const parser::UseStmt &x) {
     }
     for (const auto &[name, symbol] : *useModuleScope_) {
       if (symbol->attrs().test(Attr::PUBLIC) && !IsUseRenamed(symbol->name()) &&
-          (!symbol->attrs().test(Attr::INTRINSIC) ||
+          (!symbol->implicitAttrs().test(Attr::INTRINSIC) ||
               symbol->has<UseDetails>()) &&
           !symbol->has<MiscDetails>() && useNames.count(name) == 0) {
         SourceName location{x.moduleName.source};
@@ -2998,7 +2998,7 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName,
     details->add_occurrence(location, *useModuleScope_);
     return;
   }
-
+  const Symbol &useUltimate{useSymbol.GetUltimate()};
   if (localSymbol.has<UnknownDetails>()) {
     localSymbol.set_details(UseDetails{localName, useSymbol});
     localSymbol.attrs() =
@@ -3010,7 +3010,6 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName,
   }
 
   Symbol &localUltimate{localSymbol.GetUltimate()};
-  const Symbol &useUltimate{useSymbol.GetUltimate()};
   if (&localUltimate == &useUltimate) {
     // use-associating the same symbol again -- ok
     return;
@@ -3044,13 +3043,19 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName,
           checkAmbiguousDerivedType(&useUltimate, localGeneric->derivedType());
     } else if (&useUltimate == &BypassGeneric(localUltimate).GetUltimate()) {
       return; // nothing to do; used subprogram is local's specific
+    } else if (useUltimate.attrs().test(Attr::INTRINSIC) &&
+        useUltimate.name() == localSymbol.name()) {
+      return; // local generic can extend intrinsic
     }
   } else if (useGeneric) {
     if (localUltimate.has<DerivedTypeDetails>()) {
       combine =
           checkAmbiguousDerivedType(&localUltimate, useGeneric->derivedType());
-    } else if (&localUltimate == &BypassGeneric(useUltimate).GetUltimate()) {
-      // Local is the specific of the used generic; replace it.
+    } else if (&localUltimate == &BypassGeneric(useUltimate).GetUltimate() ||
+        (localSymbol.attrs().test(Attr::INTRINSIC) &&
+            localUltimate.name() == useUltimate.name())) {
+      // Local is the specific of the used generic or an intrinsic with the
+      // same name; replace it.
       EraseSymbol(localSymbol);
       Symbol &newSymbol{MakeSymbol(localName,
           useUltimate.attrs() & ~Attrs{Attr::PUBLIC, Attr::PRIVATE},
@@ -3058,23 +3063,22 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName,
       newSymbol.flags() = useSymbol.flags();
       return;
     }
+  } else if (localUltimate.name() != useUltimate.name()) {
+    // not the same procedure
+  } else if (localUltimate.attrs().test(Attr::INTRINSIC) &&
+      useUltimate.attrs().test(Attr::INTRINSIC)) {
+    return;
   } else {
     auto localClass{ClassifyProcedure(localUltimate)};
     auto useClass{ClassifyProcedure(useUltimate)};
-    if (localClass == useClass &&
-        (localClass == ProcedureDefinitionClass::Intrinsic ||
-            localClass == ProcedureDefinitionClass::External) &&
-        localUltimate.name() == useUltimate.name()) {
+    if (localClass == ProcedureDefinitionClass::External &&
+        useClass == ProcedureDefinitionClass::External) {
       auto localChars{evaluate::characteristics::Procedure::Characterize(
           localUltimate, GetFoldingContext())};
       auto useChars{evaluate::characteristics::Procedure::Characterize(
           useUltimate, GetFoldingContext())};
-      if (localChars && useChars) {
-        if (*localChars == *useChars) {
-          // Same intrinsic or external procedure defined identically in two
-          // modules
-          return;
-        }
+      if (localChars && useChars && *localChars == *useChars) {
+        return; // same procedure defined identically in two modules
       }
     }
   }
@@ -4794,9 +4798,15 @@ Symbol &DeclarationVisitor::HandleAttributeStmt(
       }
     }
   } else if (symbol && symbol->has<UseDetails>()) {
-    Say(currStmtSource().value(),
-        "Cannot change %s attribute on use-associated '%s'"_err_en_US,
-        EnumToString(attr), name.source);
+    if (symbol->GetUltimate().attrs().test(attr)) {
+      Say(currStmtSource().value(),
+          "Use-associated '%s' already has '%s' attribute"_warn_en_US,
+          name.source, EnumToString(attr));
+    } else {
+      Say(currStmtSource().value(),
+          "Cannot change %s attribute on use-associated '%s'"_err_en_US,
+          EnumToString(attr), name.source);
+    }
     return *symbol;
   }
   if (!symbol) {
@@ -6244,8 +6254,8 @@ bool DeclarationVisitor::HandleUnrestrictedSpecificIntrinsicFunction(
     // recreated for it later on demand, but capturing its result type here
     // will make GetType() return a correct result without having to
     // probe the intrinsics table again.
-    Symbol &symbol{
-        MakeSymbol(InclusiveScope(), name.source, Attrs{Attr::INTRINSIC})};
+    Symbol &symbol{MakeSymbol(InclusiveScope(), name.source, Attrs{})};
+    SetImplicitAttr(symbol, Attr::INTRINSIC);
     CHECK(interface->functionResult.has_value());
     evaluate::DynamicType dyType{
         DEREF(interface->functionResult->GetTypeAndShape()).type()};
@@ -7708,8 +7718,8 @@ void ResolveNamesVisitor::HandleProcedureName(
   auto *symbol{FindSymbol(NonDerivedTypeScope(), name)};
   if (!symbol) {
     if (IsIntrinsic(name.source, flag)) {
-      symbol =
-          &MakeSymbol(InclusiveScope(), name.source, Attrs{Attr::INTRINSIC});
+      symbol = &MakeSymbol(InclusiveScope(), name.source, Attrs{});
+      SetImplicitAttr(*symbol, Attr::INTRINSIC);
     } else if (const auto ppcBuiltinScope =
                    currScope().context().GetPPCBuiltinsScope()) {
       // Check if it is a builtin from the predefined module
@@ -8047,6 +8057,11 @@ void ResolveNamesVisitor::CreateGeneric(const parser::GenericSpec &x) {
     } else if (ultimate.has<SubprogramDetails>() ||
         ultimate.has<SubprogramNameDetails>()) {
       genericDetails.set_specific(*existing);
+    } else if (ultimate.has<ProcEntityDetails>()) {
+      if (existing->name() != symbolName ||
+          !ultimate.attrs().test(Attr::INTRINSIC)) {
+        genericDetails.set_specific(*existing);
+      }
     } else if (ultimate.has<DerivedTypeDetails>()) {
       genericDetails.set_derivedType(*existing);
     } else if (&existing->owner() == &currScope()) {
diff --git a/flang/module/iso_fortran_env.f90 b/flang/module/iso_fortran_env.f90
index f1d540bc8e451..61d8a07e61133 100644
--- a/flang/module/iso_fortran_env.f90
+++ b/flang/module/iso_fortran_env.f90
@@ -23,6 +23,7 @@ module iso_fortran_env
     compiler_version => __builtin_compiler_version
 
   implicit none
+  private count
 
   ! TODO: Use PACK([x],test) in place of the array constructor idiom
   ! [(x, integer::j=1,COUNT([test]))] below once PACK() can be folded.
diff --git a/flang/runtime/connection.cpp b/flang/runtime/connection.cpp
index 0abacd7995b47..91ac9a0e14e47 100644
--- a/flang/runtime/connection.cpp
+++ b/flang/runtime/connection.cpp
@@ -46,13 +46,15 @@ SavedPosition::SavedPosition(IoStatementState &io) : io_{io} {
 }
 
 SavedPosition::~SavedPosition() {
-  ConnectionState &conn{io_.GetConnectionState()};
-  while (conn.currentRecordNumber > saved_.currentRecordNumber) {
-    io_.BackspaceRecord();
+  if (!cancelled_) {
+    ConnectionState &conn{io_.GetConnectionState()};
+    while (conn.currentRecordNumber > saved_.currentRecordNumber) {
+      io_.BackspaceRecord();
+    }
+    conn.leftTabLimit = saved_.leftTabLimit;
+    conn.furthestPositionInRecord = saved_.furthestPositionInRecord;
+    conn.positionInRecord = saved_.positionInRecord;
+    conn.pinnedFrame = saved_.pinnedFrame;
   }
-  conn.leftTabLimit = saved_.leftTabLimit;
-  conn.furthestPositionInRecord = saved_.furthestPositionInRecord;
-  conn.positionInRecord = saved_.positionInRecord;
-  conn.pinnedFrame = saved_.pinnedFrame;
 }
 } // namespace Fortran::runtime::io
diff --git a/flang/runtime/connection.h b/flang/runtime/connection.h
index 70c20e17fd01a..c9a7566f20988 100644
--- a/flang/runtime/connection.h
+++ b/flang/runtime/connection.h
@@ -111,10 +111,12 @@ class SavedPosition {
 public:
   explicit SavedPosition(IoStatementState &);
   ~SavedPosition();
+  void Cancel() { cancelled_ = true; }
 
 private:
   IoStatementState &io_;
   ConnectionState saved_;
+  bool cancelled_{false};
 };
 
 } // namespace Fortran::runtime::io
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 822099b5141b1..2b80974906777 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -478,6 +478,9 @@ static void RaiseFPExceptions(decimal::ConversionResultFlags flags) {
   if (flags & decimal::ConversionResultFlags::Overflow) {
     RAISE(FE_OVERFLOW);
   }
+  if (flags & decimal::ConversionResultFlags::Underflow) {
+    RAISE(FE_UNDERFLOW);
+  }
   if (flags & decimal::ConversionResultFlags::Inexact) {
     RAISE(FE_INEXACT);
   }
@@ -640,20 +643,30 @@ decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal(
   }
   // Package & return result
   constexpr RawType significandMask{(one << RealType::significandBits) - 1};
+  int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
   if (!fraction) {
     expo = 0;
   } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
     expo = 0; // subnormal
+    flags |= decimal::Underflow;
   } else if (expo >= RealType::maxExponent) {
-    expo = RealType::maxExponent; // +/-Inf
-    fraction = 0;
+    if (rounding == decimal::RoundToZero ||
+        (rounding == decimal::RoundDown && !isNegative) ||
+        (rounding == decimal::RoundUp && isNegative)) {
+      expo = RealType::maxExponent - 1; // +/-HUGE()
+      fraction = significandMask;
+    } else {
+      expo = RealType::maxExponent; // +/-Inf
+      fraction = 0;
+      flags |= decimal::Overflow;
+    }
   } else {
     fraction &= significandMask; // remove explicit normalization unless x87
   }
   return decimal::ConversionToBinaryResult<binaryPrecision>{
       RealType{static_cast<RawType>(signBit |
           static_cast<RawType>(expo) << RealType::significandBits | fraction)},
-      (roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
+      static_cast<decimal::ConversionResultFlags>(flags)};
 }
 
 template <int KIND>
diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp
index a4ce0b12f9111..26e066c85fed3 100644
--- a/flang/runtime/edit-output.cpp
+++ b/flang/runtime/edit-output.cpp
@@ -341,11 +341,12 @@ bool RealOutputEditing<KIND>::EditEorDOutput(const DataEdit &edit) {
         ConvertToDecimal(significantDigits, edit.modes.round, flags)};
     if (IsInfOrNaN(converted.str, static_cast<int>(converted.length))) {
       return editWidth > 0 &&
-              converted.length > static_cast<std::size_t>(editWidth)
+              converted.length + trailingBlanks_ >
+                  static_cast<std::size_t>(editWidth)
           ? EmitRepeated(io_, '*', editWidth)
           : EmitPrefix(edit, converted.length, editWidth) &&
               EmitAscii(io_, converted.str, converted.length) &&
-              EmitSuffix(edit);
+              EmitRepeated(io_, ' ', trailingBlanks_) && EmitSuffix(edit);
     }
     if (!IsZero()) {
       converted.decimalExponent -= scale;
@@ -522,8 +523,9 @@ bool RealOutputEditing<KIND>::EditFOutput(const DataEdit &edit) {
       zeroesBeforePoint = 1; // "." -> "0."
     }
     int totalLength{signLength + digitsBeforePoint + zeroesBeforePoint +
-        1 /*'.'*/ + zeroesAfterPoint + digitsAfterPoint + trailingZeroes};
-    int width{editWidth > 0 ? editWidth : totalLength};
+        1 /*'.'*/ + zeroesAfterPoint + digitsAfterPoint + trailingZeroes +
+        trailingBlanks_ /* G editing converted to F */};
+    int width{editWidth > 0 || trailingBlanks_ ? editWidth : totalLength};
     if (totalLength > width) {
       return EmitRepeated(io_, '*', width);
     }
@@ -574,8 +576,11 @@ DataEdit RealOutputEditing<KIND>::EditForGOutput(DataEdit edit) {
   trailingBlanks_ = 0;
   if (editWidth > 0) {
     int expoDigits{edit.expoDigits.value_or(0)};
+    // F'2023 13.7.5.2.3 p5: "If 0 <= s <= d, the scale factor has no effect
+    // and F(w − n).(d − s),n(’b’) editing is used where b is a blank and
+    // n is 4 for Gw.d editing, e + 2 for Gw.dEe editing if e > 0, and
+    // 4 for Gw.dE0 editing."
     trailingBlanks_ = expoDigits > 0 ? expoDigits + 2 : 4; // 'n'
-    *edit.width = std::max(0, editWidth - trailingBlanks_);
   }
   if (edit.digits.has_value()) {
     *edit.digits = std::max(0, *edit.digits - expo);
@@ -649,7 +654,7 @@ auto RealOutputEditing<KIND>::ConvertToHexadecimal(
     // x_.binaryPrecision is constant, so / can be used for readability.
     int shift{x_.binaryPrecision - 4};
     typename BinaryFloatingPoint::RawType one{1};
-    auto remaining{(one << shift) - one};
+    auto remaining{(one << x_.binaryPrecision) - one};
     for (int digits{0}; digits < significantDigits; ++digits) {
       if ((flags & decimal::Minimize) && !(fraction & remaining)) {
         break;
@@ -682,7 +687,8 @@ bool RealOutputEditing<KIND>::EditEXOutput(const DataEdit &edit) {
     flags |= decimal::AlwaysSign;
   }
   int editWidth{edit.width.value_or(0)}; // 'w' field
-  if (editWidth == 0 && !edit.digits) { // EX0 (no .d)
+  if ((editWidth == 0 && !edit.digits) || editDigits == 0) {
+    // EX0 or EXw.0
     flags |= decimal::Minimize;
     significantDigits = 28; // enough for 128-bit F.P.
   }
diff --git a/flang/runtime/findloc.cpp b/flang/runtime/findloc.cpp
index 339e0c75f05fe..6b60e523d2a47 100644
--- a/flang/runtime/findloc.cpp
+++ b/flang/runtime/findloc.cpp
@@ -84,27 +84,27 @@ template <typename EQUALITY> class LocationAccumulator {
 public:
   LocationAccumulator(
       const Descriptor &array, const Descriptor &target, bool back)
-      : array_{array}, target_{target}, back_{back} {
-    Reinitialize();
-  }
-  void Reinitialize() {
-    // per standard: result indices are all zero if no data
-    for (int j{0}; j < rank_; ++j) {
-      location_[j] = 0;
-    }
-  }
+      : array_{array}, target_{target}, back_{back} {}
+  void Reinitialize() { gotAnything_ = false; }
   template <typename A> void GetResult(A *p, int zeroBasedDim = -1) {
     if (zeroBasedDim >= 0) {
-      *p = location_[zeroBasedDim] -
-          array_.GetDimension(zeroBasedDim).LowerBound() + 1;
-    } else {
+      *p = gotAnything_ ? location_[zeroBasedDim] -
+              array_.GetDimension(zeroBasedDim).LowerBound() + 1
+                        : 0;
+    } else if (gotAnything_) {
       for (int j{0}; j < rank_; ++j) {
         p[j] = location_[j] - array_.GetDimension(j).LowerBound() + 1;
       }
+    } else {
+      // no unmasked hits? result is all zeroes
+      for (int j{0}; j < rank_; ++j) {
+        p[j] = 0;
+      }
     }
   }
   template <typename IGNORED> bool AccumulateAt(const SubscriptValue at[]) {
     if (equality_(array_, at, target_)) {
+      gotAnything_ = true;
       for (int j{0}; j < rank_; ++j) {
         location_[j] = at[j];
       }
@@ -119,6 +119,7 @@ template <typename EQUALITY> class LocationAccumulator {
   const Descriptor &target_;
   const bool back_{false};
   const int rank_{array_.rank()};
+  bool gotAnything_{false};
   SubscriptValue location_[maxRank];
   const EQUALITY equality_{};
 };
diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp
index dedf1f8364ad3..921c6e625edb5 100644
--- a/flang/runtime/io-stmt.cpp
+++ b/flang/runtime/io-stmt.cpp
@@ -189,6 +189,17 @@ InternalListIoStatementState<DIR>::InternalListIoStatementState(
     : InternalIoStatementState<DIR>{d, sourceFile, sourceLine},
       ioStatementState_{*this} {}
 
+template <Direction DIR>
+int InternalListIoStatementState<DIR>::EndIoStatement() {
+  if constexpr (DIR == Direction::Input) {
+    if (int status{ListDirectedStatementState<DIR>::EndIoStatement()};
+        status != IostatOk) {
+      return status;
+    }
+  }
+  return InternalIoStatementState<DIR>::EndIoStatement();
+}
+
 ExternalIoStatementBase::ExternalIoStatementBase(
     ExternalFileUnit &unit, const char *sourceFile, int sourceLine)
     : IoStatementBase{sourceFile, sourceLine}, unit_{unit} {}
@@ -707,6 +718,13 @@ ListDirectedStatementState<Direction::Output>::GetNextDataEdit(
   return edit;
 }
 
+int ListDirectedStatementState<Direction::Input>::EndIoStatement() {
+  if (repeatPosition_) {
+    repeatPosition_->Cancel();
+  }
+  return IostatOk;
+}
+
 std::optional<DataEdit>
 ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
     IoStatementState &io, int maxRepeat) {
@@ -818,6 +836,17 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
   return edit;
 }
 
+template <Direction DIR>
+int ExternalListIoStatementState<DIR>::EndIoStatement() {
+  if constexpr (DIR == Direction::Input) {
+    if (auto status{ListDirectedStatementState<DIR>::EndIoStatement()};
+        status != IostatOk) {
+      return status;
+    }
+  }
+  return ExternalIoStatementState<DIR>::EndIoStatement();
+}
+
 template <Direction DIR>
 bool ExternalUnformattedIoStatementState<DIR>::Receive(
     char *data, std::size_t bytes, std::size_t elementBytes) {
@@ -910,6 +939,16 @@ bool ChildUnformattedIoStatementState<DIR>::Receive(
   return this->child().parent().Receive(data, bytes, elementBytes);
 }
 
+template <Direction DIR> int ChildListIoStatementState<DIR>::EndIoStatement() {
+  if constexpr (DIR == Direction::Input) {
+    if (int status{ListDirectedStatementState<DIR>::EndIoStatement()};
+        status != IostatOk) {
+      return status;
+    }
+  }
+  return ChildIoStatementState<DIR>::EndIoStatement();
+}
+
 template class InternalIoStatementState<Direction::Output>;
 template class InternalIoStatementState<Direction::Input>;
 template class InternalFormattedIoStatementState<Direction::Output>;
@@ -1219,6 +1258,7 @@ bool InquireUnitState::Inquire(
   case HashInquiryKeyword("SIZE"):
     result = -1;
     if (unit().IsConnected()) {
+      unit().FlushOutput(*this);
       if (auto size{unit().knownSize()}) {
         result = *size;
       }
@@ -1346,7 +1386,7 @@ bool InquireUnconnectedFileState::Inquire(
   case HashInquiryKeyword("SEQUENTIAL"):
   case HashInquiryKeyword("STREAM"):
   case HashInquiryKeyword("UNFORMATTED"):
-    str = "UNKNONN";
+    str = "UNKNOWN";
     break;
   case HashInquiryKeyword("READ"):
     str =
diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index 91169f6c6e323..0b6bcbd9af025 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -304,6 +304,7 @@ class ListDirectedStatementState<Direction::Input>
     : public FormattedIoStatementState<Direction::Input> {
 public:
   bool inNamelistSequence() const { return inNamelistSequence_; }
+  int EndIoStatement();
 
   // Skips value separators, handles repetition and null values.
   // Vacant when '/' appears; present with descriptor == ListDirectedNullValue
@@ -317,6 +318,9 @@ class ListDirectedStatementState<Direction::Input>
   // NAMELIST input item.
   void ResetForNextNamelistItem(bool inNamelistSequence) {
     remaining_ = 0;
+    if (repeatPosition_) {
+      repeatPosition_->Cancel();
+    }
     eatComma_ = false;
     realPart_ = imaginaryPart_ = false;
     inNamelistSequence_ = inNamelistSequence;
@@ -399,6 +403,7 @@ class InternalListIoStatementState : public InternalIoStatementState<DIR>,
       const Descriptor &, const char *sourceFile = nullptr, int sourceLine = 0);
   IoStatementState &ioStatementState() { return ioStatementState_; }
   using ListDirectedStatementState<DIR>::GetNextDataEdit;
+  int EndIoStatement();
 
 private:
   IoStatementState ioStatementState_; // points to *this
@@ -474,6 +479,7 @@ class ExternalListIoStatementState : public ExternalIoStatementState<DIR>,
 public:
   using ExternalIoStatementState<DIR>::ExternalIoStatementState;
   using ListDirectedStatementState<DIR>::GetNextDataEdit;
+  int EndIoStatement();
 };
 
 template <Direction DIR>
@@ -532,6 +538,7 @@ class ChildListIoStatementState : public ChildIoStatementState<DIR>,
 public:
   using ChildIoStatementState<DIR>::ChildIoStatementState;
   using ListDirectedStatementState<DIR>::GetNextDataEdit;
+  int EndIoStatement();
 };
 
 template <Direction DIR>
diff --git a/flang/runtime/numeric.cpp b/flang/runtime/numeric.cpp
index 25e58e79dbba0..38835c2b753ce 100644
--- a/flang/runtime/numeric.cpp
+++ b/flang/runtime/numeric.cpp
@@ -261,12 +261,10 @@ template <int PREC, typename T> inline RT_API_ATTRS T Spacing(T x) {
 // NEAREST (16.9.139)
 template <int PREC, typename T>
 inline RT_API_ATTRS T Nearest(T x, bool positive) {
-  auto spacing{Spacing<PREC>(x)};
-  if (x == 0) {
-    auto least{std::numeric_limits<T>::denorm_min()};
-    return positive ? least : -least;
+  if (positive) {
+    return std::nextafter(x, std::numeric_limits<T>::infinity());
   } else {
-    return positive ? x + spacing : x - spacing;
+    return std::nextafter(x, -std::numeric_limits<T>::infinity());
   }
 }
 
diff --git a/flang/runtime/tools.h b/flang/runtime/tools.h
index 9811bce25acd3..ff05e76c8bb7b 100644
--- a/flang/runtime/tools.h
+++ b/flang/runtime/tools.h
@@ -94,6 +94,31 @@ static inline RT_API_ATTRS std::int64_t GetInt64(
   }
 }
 
+static inline RT_API_ATTRS std::optional<std::int64_t> GetInt64Safe(
+    const char *p, std::size_t bytes, Terminator &terminator) {
+  switch (bytes) {
+  case 1:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 1> *>(p);
+  case 2:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 2> *>(p);
+  case 4:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 4> *>(p);
+  case 8:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 8> *>(p);
+  case 16: {
+    using Int128 = CppTypeFor<TypeCategory::Integer, 16>;
+    auto n{*reinterpret_cast<const Int128 *>(p)};
+    std::int64_t result = n;
+    if (result == n) {
+      return result;
+    }
+    return std::nullopt;
+  }
+  default:
+    terminator.Crash("GetInt64Safe: no case for %zd bytes", bytes);
+  }
+}
+
 template <typename INT>
 inline RT_API_ATTRS bool SetInteger(INT &x, int kind, std::int64_t value) {
   switch (kind) {
diff --git a/flang/runtime/transformational.cpp b/flang/runtime/transformational.cpp
index da8ec05c884fa..cf1e61c0844d8 100644
--- a/flang/runtime/transformational.cpp
+++ b/flang/runtime/transformational.cpp
@@ -52,9 +52,11 @@ class ShiftControl {
           }
         }
       }
+    } else if (auto count{GetInt64Safe(
+                   shift_.OffsetElement<char>(), shiftElemLen_, terminator_)}) {
+      shiftCount_ = *count;
     } else {
-      shiftCount_ =
-          GetInt64(shift_.OffsetElement<char>(), shiftElemLen_, terminator_);
+      terminator_.Crash("%s: SHIFT= value exceeds 64 bits", which);
     }
   }
   RT_API_ATTRS SubscriptValue GetShift(const SubscriptValue resultAt[]) const {
@@ -67,8 +69,10 @@ class ShiftControl {
           ++k;
         }
       }
-      return GetInt64(
-          shift_.Element<char>(shiftAt), shiftElemLen_, terminator_);
+      auto count{GetInt64Safe(
+          shift_.Element<char>(shiftAt), shiftElemLen_, terminator_)};
+      RUNTIME_CHECK(terminator_, count.has_value());
+      return *count;
     } else {
       return shiftCount_; // invariant count extracted in Init()
     }
@@ -719,12 +723,15 @@ void RTDEF(Reshape)(Descriptor &result, const Descriptor &source,
   std::size_t resultElements{1};
   SubscriptValue shapeSubscript{shape.GetDimension(0).LowerBound()};
   for (int j{0}; j < resultRank; ++j, ++shapeSubscript) {
-    resultExtent[j] = GetInt64(
-        shape.Element<char>(&shapeSubscript), shapeElementBytes, terminator);
-    if (resultExtent[j] < 0) {
+    auto extent{GetInt64Safe(
+        shape.Element<char>(&shapeSubscript), shapeElementBytes, terminator)};
+    if (!extent) {
+      terminator.Crash("RESHAPE: value of SHAPE(%d) exceeds 64 bits", j + 1);
+    } else if (*extent < 0) {
       terminator.Crash("RESHAPE: bad value for SHAPE(%d)=%jd", j + 1,
-          static_cast<std::intmax_t>(resultExtent[j]));
+          static_cast<std::intmax_t>(*extent));
     }
+    resultExtent[j] = *extent;
     resultElements *= resultExtent[j];
   }
 
@@ -762,14 +769,16 @@ void RTDEF(Reshape)(Descriptor &result, const Descriptor &source,
     SubscriptValue orderSubscript{order->GetDimension(0).LowerBound()};
     std::size_t orderElementBytes{order->ElementBytes()};
     for (SubscriptValue j{0}; j < resultRank; ++j, ++orderSubscript) {
-      auto k{GetInt64(order->Element<char>(&orderSubscript), orderElementBytes,
-          terminator)};
-      if (k < 1 || k > resultRank || ((values >> k) & 1)) {
+      auto k{GetInt64Safe(order->Element<char>(&orderSubscript),
+          orderElementBytes, terminator)};
+      if (!k) {
+        terminator.Crash("RESHAPE: ORDER element value exceeds 64 bits");
+      } else if (*k < 1 || *k > resultRank || ((values >> *k) & 1)) {
         terminator.Crash("RESHAPE: bad value for ORDER element (%jd)",
-            static_cast<std::intmax_t>(k));
+            static_cast<std::intmax_t>(*k));
       }
-      values |= std::uint64_t{1} << k;
-      dimOrder[j] = k - 1;
+      values |= std::uint64_t{1} << *k;
+      dimOrder[j] = *k - 1;
     }
   } else {
     for (int j{0}; j < resultRank; ++j) {
diff --git a/flang/test/Semantics/contiguous01.f90 b/flang/test/Semantics/contiguous01.f90
index 1d3600aef6c55..0f086624a20ae 100644
--- a/flang/test/Semantics/contiguous01.f90
+++ b/flang/test/Semantics/contiguous01.f90
@@ -5,7 +5,7 @@ module m0
 end
 module m
   use m0
-  !ERROR: Cannot change CONTIGUOUS attribute on use-associated 'p1'
+  !WARNING: Use-associated 'p1' already has 'CONTIGUOUS' attribute
   contiguous p1
   !ERROR: Cannot change CONTIGUOUS attribute on use-associated 'p2'
   contiguous p2
diff --git a/flang/test/Semantics/intrinsics02.f90 b/flang/test/Semantics/intrinsics02.f90
new file mode 100644
index 0000000000000..0b1f7c13a1564
--- /dev/null
+++ b/flang/test/Semantics/intrinsics02.f90
@@ -0,0 +1,38 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+module explicit
+  intrinsic cos
+end
+subroutine testExplicit
+  use explicit
+  !ERROR: 'cos' is use-associated from module 'explicit' and cannot be re-declared
+  real :: cos = 2.
+end
+subroutine extendsUsedIntrinsic
+  use explicit
+  interface cos
+    pure real function mycos(x)
+      real, intent(in) :: x
+    end
+  end interface
+end
+subroutine sameIntrinsic1
+  use explicit
+  !WARNING: Use-associated 'cos' already has 'INTRINSIC' attribute
+  intrinsic cos
+  real :: one = cos(0.)
+end
+module renamer
+  use explicit, renamedCos => cos
+end
+subroutine sameIntrinsic2
+  use explicit
+  use renamer, cos => renamedCos
+  real :: one = cos(0.)
+end
+module implicit
+  real :: one = cos(0.)
+end
+subroutine testImplicit
+  use implicit
+  real :: cos = 2.
+end
diff --git a/flang/unittests/Runtime/Numeric.cpp b/flang/unittests/Runtime/Numeric.cpp
index 5afed750c0b18..43263d1ac4231 100644
--- a/flang/unittests/Runtime/Numeric.cpp
+++ b/flang/unittests/Runtime/Numeric.cpp
@@ -86,7 +86,7 @@ TEST(Numeric, Nearest) {
   EXPECT_EQ(RTNAME(Nearest8)(Real<8>{1.0}, true),
       Real<8>{1.0} + std::ldexp(Real<8>{1.0}, -52));
   EXPECT_EQ(RTNAME(Nearest8)(Real<8>{1.0}, false),
-      Real<8>{1.0} - std::ldexp(Real<8>{1.0}, -52));
+      Real<8>{1.0} - 0.5 * std::ldexp(Real<8>{1.0}, -52));
 }
 
 TEST(Numeric, Nint) {
diff --git a/flang/unittests/Runtime/NumericalFormatTest.cpp b/flang/unittests/Runtime/NumericalFormatTest.cpp
index b5b8eb0594373..9dd2771fe4a75 100644
--- a/flang/unittests/Runtime/NumericalFormatTest.cpp
+++ b/flang/unittests/Runtime/NumericalFormatTest.cpp
@@ -654,28 +654,44 @@ TEST(IOApiTests, FormatDoubleValues) {
               {"(EX24.13,';')", " 0XF.FFFFFFFFFFFF8P+1020;"},
           }},
       {// EX rounding
-          0x3ff1000000000000uLL, // 1.0625
+          0x3ff0100000000000uLL,
           {
-              {"(F7.4,';')", " 1.0625;"},
-              {"(EX9.1,';')", " 0X8.8P-3;"},
-              {"(EX9.0,';')", "  0X8.P-3;"},
-              {"(RN,EX9.0,';')", "  0X8.P-3;"},
-              {"(RU,EX9.0,';')", "  0X9.P-3;"},
-              {"(RD,EX9.0,';')", "  0X8.P-3;"},
-              {"(RZ,EX9.0,';')", "  0X8.P-3;"},
-              {"(RC,EX9.0,';')", "  0X9.P-3;"},
+              {"(F11.8,';')", " 1.00390625;"},
+              {"(EX10.2,';')", " 0X8.08P-3;"},
+              {"(EX10.1,';')", "  0X8.0P-3;"},
+              {"(EX10.0,';')", " 0X8.08P-3;"},
+              {"(EX0.0,';')", "0X8.08P-3;"},
+              {"(EX0,';')", "0X8.08P-3;"},
+              {"(RN,EX10.1,';')", "  0X8.0P-3;"},
+              {"(RU,EX10.1,';')", "  0X8.1P-3;"},
+              {"(RD,EX10.1,';')", "  0X8.0P-3;"},
+              {"(RZ,EX10.1,';')", "  0X8.0P-3;"},
+              {"(RC,EX10.1,';')", "  0X8.1P-3;"},
+              {"(RN,EX10.0,';')", " 0X8.08P-3;"},
+              {"(RU,EX10.0,';')", " 0X8.08P-3;"},
+              {"(RD,EX10.0,';')", " 0X8.08P-3;"},
+              {"(RZ,EX10.0,';')", " 0X8.08P-3;"},
+              {"(RC,EX10.0,';')", " 0X8.08P-3;"},
           }},
       {// EX rounding
-          0xbff1000000000000uLL, // -1.0625
+          0xbff0100000000000uLL,
           {
-              {"(F7.4,';')", "-1.0625;"},
-              {"(EX9.1,';')", "-0X8.8P-3;"},
-              {"(EX9.0,';')", " -0X8.P-3;"},
-              {"(RN,EX9.0,';')", " -0X8.P-3;"},
-              {"(RU,EX9.0,';')", " -0X8.P-3;"},
-              {"(RD,EX9.0,';')", " -0X9.P-3;"},
-              {"(RZ,EX9.0,';')", " -0X8.P-3;"},
-              {"(RC,EX9.0,';')", " -0X9.P-3;"},
+              {"(F11.8,';')", "-1.00390625;"},
+              {"(EX10.2,';')", "-0X8.08P-3;"},
+              {"(EX10.1,';')", " -0X8.0P-3;"},
+              {"(EX10.0,';')", "-0X8.08P-3;"},
+              {"(EX0.0,';')", "-0X8.08P-3;"},
+              {"(EX0,';')", "-0X8.08P-3;"},
+              {"(RN,EX10.1,';')", " -0X8.0P-3;"},
+              {"(RU,EX10.1,';')", " -0X8.0P-3;"},
+              {"(RD,EX10.1,';')", " -0X8.1P-3;"},
+              {"(RZ,EX10.1,';')", " -0X8.0P-3;"},
+              {"(RC,EX10.1,';')", " -0X8.1P-3;"},
+              {"(RN,EX10.0,';')", "-0X8.08P-3;"},
+              {"(RU,EX10.0,';')", "-0X8.08P-3;"},
+              {"(RD,EX10.0,';')", "-0X8.08P-3;"},
+              {"(RZ,EX10.0,';')", "-0X8.08P-3;"},
+              {"(RC,EX10.0,';')", "-0X8.08P-3;"},
           }},
   };
 
@@ -840,49 +856,70 @@ TEST(IOApiTests, FormatIntegerValues) {
 
 // Ensure double input values correctly map to raw uint64 values
 TEST(IOApiTests, EditDoubleInputValues) {
-  using TestCaseTy = std::tuple<const char *, const char *, std::uint64_t>;
+  using TestCaseTy = std::tuple<const char *, const char *, std::uint64_t, int>;
+  int ovf{IostatRealInputOverflow};
   static const std::vector<TestCaseTy> testCases{
-      {"(F18.0)", "                 0", 0x0},
-      {"(F18.0)", "                  ", 0x0},
-      {"(F18.0)", "                -0", 0x8000000000000000},
-      {"(F18.0)", "                01", 0x3ff0000000000000},
-      {"(F18.0)", "                 1", 0x3ff0000000000000},
-      {"(F18.0)", "              125.", 0x405f400000000000},
-      {"(F18.0)", "              12.5", 0x4029000000000000},
-      {"(F18.0)", "              1.25", 0x3ff4000000000000},
-      {"(F18.0)", "             01.25", 0x3ff4000000000000},
-      {"(F18.0)", "              .125", 0x3fc0000000000000},
-      {"(F18.0)", "             0.125", 0x3fc0000000000000},
-      {"(F18.0)", "             .0625", 0x3fb0000000000000},
-      {"(F18.0)", "            0.0625", 0x3fb0000000000000},
-      {"(F18.0)", "               125", 0x405f400000000000},
-      {"(F18.1)", "               125", 0x4029000000000000},
-      {"(F18.2)", "               125", 0x3ff4000000000000},
-      {"(F18.3)", "               125", 0x3fc0000000000000},
-      {"(-1P,F18.0)", "               125", 0x4093880000000000}, // 1250
-      {"(1P,F18.0)", "               125", 0x4029000000000000}, // 12.5
-      {"(BZ,F18.0)", "              125 ", 0x4093880000000000}, // 1250
-      {"(BZ,F18.0)", "       125 . e +1 ", 0x42a6bcc41e900000}, // 1.25e13
-      {"(BZ,F18.0)", "           .      ", 0x0},
-      {"(BZ,F18.0)", "           . e +1 ", 0x0},
-      {"(DC,F18.0)", "              12,5", 0x4029000000000000},
-      {"(EX22.0)", "0X0P0                 ", 0x0}, // +0.
-      {"(EX22.0)", "-0X0P0                ", 0x8000000000000000}, // -0.
-      {"(EX22.0)", "0X.8P1                ", 0x3ff0000000000000}, // 1.0
-      {"(EX22.0)", "0X8.P-3               ", 0x3ff0000000000000}, // 1.0
-      {"(EX22.0)", "0X.1P4                ", 0x3ff0000000000000}, // 1.0
-      {"(EX22.0)", "0X10.P-4              ", 0x3ff0000000000000}, // 1.0
-      {"(EX22.0)", "0X8.00P-3             ", 0x3ff0000000000000}, // 1.0
-      {"(EX22.0)", "0X80.0P-6             ", 0x4000000000000000}, // 2.0
-      {"(EX22.0)", "0XC.CCCCCCCCCCCDP-7   ", 0x3fb999999999999a}, // 0.1
-      {"(EX22.0)", "0X.8P-1021            ", 0x0010000000000000}, // min normal
-      {"(EX22.0)", "0X.8P-1022            ", 0x0008000000000000}, // subnormal
-      {"(EX22.0)", "0X.8P-1073            ", 0x0000000000000001}, // min subn.
-      {"(EX22.0)", "0X.FFFFFFFFFFFFF8P1024", 0x7fefffffffffffff}, // max finite
-      {"(EX22.0)", "0X.8P1025             ", 0x7ff0000000000000}, // +Inf
-      {"(EX22.0)", "-0X.8P1025            ", 0xfff0000000000000}, // -Inf
+      {"(F18.0)", "                 0", 0x0, 0},
+      {"(F18.0)", "                  ", 0x0, 0},
+      {"(F18.0)", "                -0", 0x8000000000000000, 0},
+      {"(F18.0)", "                01", 0x3ff0000000000000, 0},
+      {"(F18.0)", "                 1", 0x3ff0000000000000, 0},
+      {"(F18.0)", "              125.", 0x405f400000000000, 0},
+      {"(F18.0)", "              12.5", 0x4029000000000000, 0},
+      {"(F18.0)", "              1.25", 0x3ff4000000000000, 0},
+      {"(F18.0)", "             01.25", 0x3ff4000000000000, 0},
+      {"(F18.0)", "              .125", 0x3fc0000000000000, 0},
+      {"(F18.0)", "             0.125", 0x3fc0000000000000, 0},
+      {"(F18.0)", "             .0625", 0x3fb0000000000000, 0},
+      {"(F18.0)", "            0.0625", 0x3fb0000000000000, 0},
+      {"(F18.0)", "               125", 0x405f400000000000, 0},
+      {"(F18.1)", "               125", 0x4029000000000000, 0},
+      {"(F18.2)", "               125", 0x3ff4000000000000, 0},
+      {"(F18.3)", "               125", 0x3fc0000000000000, 0},
+      {"(-1P,F18.0)", "               125", 0x4093880000000000, 0}, // 1250
+      {"(1P,F18.0)", "               125", 0x4029000000000000, 0}, // 12.5
+      {"(BZ,F18.0)", "              125 ", 0x4093880000000000, 0}, // 1250
+      {"(BZ,F18.0)", "       125 . e +1 ", 0x42a6bcc41e900000, 0}, // 1.25e13
+      {"(BZ,F18.0)", "           .      ", 0x0, 0},
+      {"(BZ,F18.0)", "           . e +1 ", 0x0, 0},
+      {"(DC,F18.0)", "              12,5", 0x4029000000000000, 0},
+      {"(EX22.0)", "0X0P0                 ", 0x0, 0}, // +0.
+      {"(EX22.0)", "-0X0P0                ", 0x8000000000000000, 0}, // -0.
+      {"(EX22.0)", "0X.8P1                ", 0x3ff0000000000000, 0}, // 1.0
+      {"(EX22.0)", "0X8.P-3               ", 0x3ff0000000000000, 0}, // 1.0
+      {"(EX22.0)", "0X.1P4                ", 0x3ff0000000000000, 0}, // 1.0
+      {"(EX22.0)", "0X10.P-4              ", 0x3ff0000000000000, 0}, // 1.0
+      {"(EX22.0)", "0X8.00P-3             ", 0x3ff0000000000000, 0}, // 1.0
+      {"(EX22.0)", "0X80.0P-6             ", 0x4000000000000000, 0}, // 2.0
+      {"(EX22.0)", "0XC.CCCCCCCCCCCDP-7   ", 0x3fb999999999999a, 0}, // 0.1
+      {"(EX22.0)", "0X.8P-1021            ", 0x0010000000000000,
+          0}, // min normal
+      {"(EX22.0)", "0X.8P-1022            ", 0x0008000000000000,
+          0}, // subnormal
+      {"(EX22.0)", "0X.8P-1073            ", 0x0000000000000001,
+          0}, // min subn.
+      {"(EX22.0)", "0X.FFFFFFFFFFFFF8P1024", 0x7fefffffffffffff,
+          0}, // max finite
+      {"(EX22.0)", "0X.8P1025             ", 0x7ff0000000000000, ovf}, // +Inf
+      {"(EX22.0)", "-0X.8P1025            ", 0xfff0000000000000, ovf}, // -Inf
+      {"(RZ,F7.0)", " 2.e308", 0x7fefffffffffffff, 0}, // +HUGE()
+      {"(RD,F7.0)", " 2.e308", 0x7fefffffffffffff, 0}, // +HUGE()
+      {"(RU,F7.0)", " 2.e308", 0x7ff0000000000000, ovf}, // +Inf
+      {"(RZ,F7.0)", "-2.e308", 0xffefffffffffffff, 0}, // -HUGE()
+      {"(RD,F7.0)", "-2.e308", 0xfff0000000000000, ovf}, // -Inf
+      {"(RU,F7.0)", "-2.e308", 0xffefffffffffffff, 0}, // -HUGE()
+      {"(RZ,F7.0)", " 1.e999", 0x7fefffffffffffff, 0}, // +HUGE()
+      {"(RD,F7.0)", " 1.e999", 0x7fefffffffffffff, 0}, // +HUGE()
+      {"(RU,F7.0)", " 1.e999", 0x7ff0000000000000, ovf}, // +Inf
+      {"(RZ,F7.0)", "-1.e999", 0xffefffffffffffff, 0}, // -HUGE()
+      {"(RD,F7.0)", "-1.e999", 0xfff0000000000000, ovf}, // -Inf
+      {"(RU,F7.0)", "-1.e999", 0xffefffffffffffff, 0}, // -HUGE()
+      {"(E9.1)", " 1.0E-325", 0x0, 0},
+      {"(RU,E9.1)", " 1.0E-325", 0x1, 0},
+      {"(E9.1)", "-1.0E-325", 0x0, 0},
+      {"(RD,E9.1)", "-1.0E-325", 0x8000000000000001, 0},
   };
-  for (auto const &[format, data, want] : testCases) {
+  for (auto const &[format, data, want, iostat] : testCases) {
     auto cookie{IONAME(BeginInternalFormattedInput)(
         data, std::strlen(data), format, std::strlen(format))};
     union {
@@ -899,12 +936,14 @@ TEST(IOApiTests, EditDoubleInputValues) {
     char iomsg[bufferSize];
     std::memset(iomsg, '\0', bufferSize - 1);
 
-    // Ensure no errors were encountered reading input buffer into union value
+    // Ensure no unexpected errors were encountered reading input buffer into
+    // union value
     IONAME(GetIoMsg)(cookie, iomsg, bufferSize - 1);
     auto status{IONAME(EndIoStatement)(cookie)};
-    ASSERT_EQ(status, 0) << '\'' << format << "' failed reading '" << data
-                         << "', status " << static_cast<int>(status)
-                         << " iomsg '" << iomsg << "'";
+    ASSERT_EQ(status, iostat)
+        << '\'' << format << "' failed reading '" << data << "', status "
+        << static_cast<int>(status) << " != expected " << iostat << " iomsg '"
+        << iomsg << "'";
 
     // Ensure raw uint64 value matches expected conversion from double
     ASSERT_EQ(u.raw, want) << '\'' << format << "' failed reading '" << data
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index fe8b3c4c84c38..a92e6da56096a 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -42,6 +42,8 @@ add_libc_test(
   DEPENDS
     libc.src.__support.high_precision_decimal
     libc.src.__support.uint128
+  # FIXME Test segfaults on gfx90a GPU
+  UNIT_TEST_ONLY
 )
 
 add_libc_test(
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 2982165530c08..2e34a6c5cfa2c 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -560,7 +560,7 @@ void Writer::createECCodeMap() {
   codeMap.clear();
 
   std::optional<chpe_range_type> lastType;
-  Chunk *first = nullptr, *last = nullptr;
+  Chunk *first, *last;
 
   auto closeRange = [&]() {
     if (lastType) {
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index 96ac1e1610dd3..5709a5ced584c 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -680,16 +680,7 @@ Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) {
   llvm_unreachable("unknown symbol kind");
 }
 
-
-StringRef strip(StringRef s) {
-  while (s.starts_with(" ")) {
-    s = s.drop_front();
-  }
-  while (s.ends_with(" ")) {
-    s = s.drop_back();
-  }
-  return s;
-}
+StringRef strip(StringRef s) { return s.trim(' '); }
 
 void StubFile::parse() {
   bool first = true;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 735be3680aea0..048912beaba5a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1243,6 +1243,18 @@ class TargetTransformInfo {
       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
       const Instruction *CxtI = nullptr) const;
 
+  /// Returns the cost estimation for alternating opcode pattern that can be
+  /// lowered to a single instruction on the target. In X86 this is for the
+  /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
+  /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
+  /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
+  /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
+  /// \p VecTy is the vector type of the instruction to be generated.
+  InstructionCost getAltInstrCost(
+      VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+      const SmallBitVector &OpcodeMask,
+      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+
   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
   /// The exact mask may be passed as Mask, or else the array will be empty.
   /// The index and subtype parameters are used by the subvector insertion and
@@ -1944,6 +1956,10 @@ class TargetTransformInfo::Concept {
       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
       OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
       ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
+  virtual InstructionCost getAltInstrCost(
+      VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+      const SmallBitVector &OpcodeMask,
+      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0;
 
   virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
                                          ArrayRef<int> Mask,
@@ -2555,6 +2571,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
     return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
                                        Args, CxtI);
   }
+  InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
+                                  unsigned Opcode1,
+                                  const SmallBitVector &OpcodeMask,
+                                  TTI::TargetCostKind CostKind) const override {
+    return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
+  }
 
   InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
                                  ArrayRef<int> Mask,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 1d8f523e9792b..7ad3ce512a355 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -554,6 +554,13 @@ class TargetTransformInfoImplBase {
     return 1;
   }
 
+  InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
+                                  unsigned Opcode1,
+                                  const SmallBitVector &OpcodeMask,
+                                  TTI::TargetCostKind CostKind) const {
+    return InstructionCost::getInvalid();
+  }
+
   InstructionCost
   getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
                  TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 3f76dfdaac317..67246afa23147 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -862,6 +862,15 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost(
   return Cost;
 }
 
+InstructionCost TargetTransformInfo::getAltInstrCost(
+    VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+    const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const {
+  InstructionCost Cost =
+      TTIImpl->getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
 InstructionCost TargetTransformInfo::getShuffleCost(
     ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
     TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 8907f6fa4ff3f..a027d0c21ba0b 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -4218,6 +4218,9 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
 
   // Check whether we have enough values to read a partition name.
   if (OpNum + 1 < Record.size()) {
+    // Check Strtab has enough values for the partition.
+    if (Record[OpNum] + Record[OpNum + 1] > Strtab.size())
+      return error("Malformed partition, too large.");
     NewGA->setPartition(
         StringRef(Strtab.data() + Record[OpNum], Record[OpNum + 1]));
     OpNum += 2;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0d46c7868d87e..eafa95ce7fcf7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -546,6 +546,7 @@ namespace {
     SDValue visitFP_TO_FP16(SDNode *N);
     SDValue visitFP16_TO_FP(SDNode *N);
     SDValue visitFP_TO_BF16(SDNode *N);
+    SDValue visitBF16_TO_FP(SDNode *N);
     SDValue visitVECREDUCE(SDNode *N);
     SDValue visitVPOp(SDNode *N);
     SDValue visitGET_FPENV_MEM(SDNode *N);
@@ -2047,6 +2048,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
   case ISD::FP_TO_BF16:         return visitFP_TO_BF16(N);
+  case ISD::BF16_TO_FP:         return visitBF16_TO_FP(N);
   case ISD::FREEZE:             return visitFREEZE(N);
   case ISD::GET_FPENV_MEM:      return visitGET_FPENV_MEM(N);
   case ISD::SET_FPENV_MEM:      return visitSET_FPENV_MEM(N);
@@ -26256,14 +26258,17 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
+  auto Op = N->getOpcode();
+  assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) &&
+         "opcode should be FP16_TO_FP or BF16_TO_FP.");
   SDValue N0 = N->getOperand(0);
 
-  // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
+  // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
+  // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
-      return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
-                         N0.getOperand(0));
+      return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
     }
   }
 
@@ -26280,6 +26285,11 @@ SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
+  // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
+  return visitFP16_TO_FP(N);
+}
+
 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N0.getValueType();
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index dfe86a45df322..40665d686cf93 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -1484,6 +1484,11 @@ Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) {
     }
 
     uint32_t BodySize = FunctionEnd - Ctx.Ptr;
+    // Ensure that Function is within Ctx's buffer.
+    if (Ctx.Ptr + BodySize > Ctx.End) {
+      return make_error<GenericBinaryError>("Function extends beyond buffer",
+                                            object_error::parse_failed);
+    }
     Function.Body = ArrayRef<uint8_t>(Ctx.Ptr, BodySize);
     // This will be set later when reading in the linking metadata section.
     Function.Comdat = UINT32_MAX;
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index 168a63bb2d969..2bf68b7972e74 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -154,7 +154,10 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr) {
     return "";
 
   llvm::sys::path::make_preferred(PathNameUTF8);
-  return std::string(PathNameUTF8.data());
+
+  SmallString<256> RealPath;
+  sys::fs::real_path(PathNameUTF8, RealPath);
+  return std::string(RealPath);
 }
 
 UniqueID file_status::getUniqueID() const {
diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index 276374afee380..66a37fce5dda1 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -85,7 +85,7 @@ class LoongArchAsmParser : public MCTargetAsmParser {
   // "emitLoadAddress*" functions.
   void emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg,
                      const MCExpr *Symbol, SmallVectorImpl<Inst> &Insts,
-                     SMLoc IDLoc, MCStreamer &Out);
+                     SMLoc IDLoc, MCStreamer &Out, bool RelaxHint = false);
 
   // Helper to emit pseudo instruction "la.abs $rd, sym".
   void emitLoadAddressAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
@@ -748,12 +748,14 @@ bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info,
 void LoongArchAsmParser::emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg,
                                        const MCExpr *Symbol,
                                        SmallVectorImpl<Inst> &Insts,
-                                       SMLoc IDLoc, MCStreamer &Out) {
+                                       SMLoc IDLoc, MCStreamer &Out,
+                                       bool RelaxHint) {
   MCContext &Ctx = getContext();
   for (LoongArchAsmParser::Inst &Inst : Insts) {
     unsigned Opc = Inst.Opc;
     LoongArchMCExpr::VariantKind VK = Inst.VK;
-    const LoongArchMCExpr *LE = LoongArchMCExpr::create(Symbol, VK, Ctx);
+    const LoongArchMCExpr *LE =
+        LoongArchMCExpr::create(Symbol, VK, Ctx, RelaxHint);
     switch (Opc) {
     default:
       llvm_unreachable("unexpected opcode");
@@ -854,7 +856,7 @@ void LoongArchAsmParser::emitLoadAddressPcrel(MCInst &Inst, SMLoc IDLoc,
   Insts.push_back(
       LoongArchAsmParser::Inst(ADDI, LoongArchMCExpr::VK_LoongArch_PCALA_LO12));
 
-  emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out);
+  emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true);
 }
 
 void LoongArchAsmParser::emitLoadAddressPcrelLarge(MCInst &Inst, SMLoc IDLoc,
@@ -900,7 +902,7 @@ void LoongArchAsmParser::emitLoadAddressGot(MCInst &Inst, SMLoc IDLoc,
   Insts.push_back(
       LoongArchAsmParser::Inst(LD, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12));
 
-  emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out);
+  emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true);
 }
 
 void LoongArchAsmParser::emitLoadAddressGotLarge(MCInst &Inst, SMLoc IDLoc,
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index 45169becca37b..d2ea062dc09a7 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/EndianStream.h"
 
@@ -120,12 +121,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
                                        SmallVectorImpl<MCFixup> &Fixups,
                                        const MCSubtargetInfo &STI) const {
   assert(MO.isExpr() && "getExprOpValue expects only expressions");
+  bool RelaxCandidate = false;
+  bool EnableRelax = STI.hasFeature(LoongArch::FeatureRelax);
   const MCExpr *Expr = MO.getExpr();
   MCExpr::ExprKind Kind = Expr->getKind();
   LoongArch::Fixups FixupKind = LoongArch::fixup_loongarch_invalid;
   if (Kind == MCExpr::Target) {
     const LoongArchMCExpr *LAExpr = cast<LoongArchMCExpr>(Expr);
 
+    RelaxCandidate = LAExpr->getRelaxHint();
     switch (LAExpr->getKind()) {
     case LoongArchMCExpr::VK_LoongArch_None:
     case LoongArchMCExpr::VK_LoongArch_Invalid:
@@ -270,6 +274,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
 
   Fixups.push_back(
       MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc()));
+
+  // Emit an R_LARCH_RELAX if linker relaxation is enabled and LAExpr has relax
+  // hint.
+  if (EnableRelax && RelaxCandidate) {
+    const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx);
+    Fixups.push_back(MCFixup::create(
+        0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_relax), MI.getLoc()));
+  }
+
   return 0;
 }
 
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
index 993111552a314..82c992b1cc8c4 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
@@ -25,9 +25,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "loongarch-mcexpr"
 
-const LoongArchMCExpr *
-LoongArchMCExpr::create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx) {
-  return new (Ctx) LoongArchMCExpr(Expr, Kind);
+const LoongArchMCExpr *LoongArchMCExpr::create(const MCExpr *Expr,
+                                               VariantKind Kind, MCContext &Ctx,
+                                               bool Hint) {
+  return new (Ctx) LoongArchMCExpr(Expr, Kind, Hint);
 }
 
 void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
index 0945cf82db865..93251f8241033 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
@@ -67,16 +67,18 @@ class LoongArchMCExpr : public MCTargetExpr {
 private:
   const MCExpr *Expr;
   const VariantKind Kind;
+  const bool RelaxHint;
 
-  explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind)
-      : Expr(Expr), Kind(Kind) {}
+  explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind, bool Hint)
+      : Expr(Expr), Kind(Kind), RelaxHint(Hint) {}
 
 public:
   static const LoongArchMCExpr *create(const MCExpr *Expr, VariantKind Kind,
-                                       MCContext &Ctx);
+                                       MCContext &Ctx, bool Hint = false);
 
   VariantKind getKind() const { return Kind; }
   const MCExpr *getSubExpr() const { return Expr; }
+  bool getRelaxHint() const { return RelaxHint; }
 
   void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
   bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp
index 0830cc7feb220..f42882dafa095 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -94,11 +94,10 @@ M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM,
     setOperationAction(OP, MVT::i16, Expand);
   }
 
-  // FIXME It would be better to use a custom lowering
   for (auto OP : {ISD::SMULO, ISD::UMULO}) {
-    setOperationAction(OP, MVT::i8, Expand);
-    setOperationAction(OP, MVT::i16, Expand);
-    setOperationAction(OP, MVT::i32, Expand);
+    setOperationAction(OP, MVT::i8,  Custom);
+    setOperationAction(OP, MVT::i16, Custom);
+    setOperationAction(OP, MVT::i32, Custom);
   }
 
   for (auto OP : {ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS})
@@ -1533,46 +1532,119 @@ bool M68kTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
   return VT.bitsLE(MVT::i32) || Subtarget.atLeastM68020();
 }
 
-SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
-  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
-  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
-  // looks for this combo and may remove the "setcc" instruction if the "setcc"
-  // has only one use.
+static bool isOverflowArithmetic(unsigned Opcode) {
+  switch (Opcode) {
+  case ISD::UADDO:
+  case ISD::SADDO:
+  case ISD::USUBO:
+  case ISD::SSUBO:
+  case ISD::UMULO:
+  case ISD::SMULO:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static void lowerOverflowArithmetic(SDValue Op, SelectionDAG &DAG,
+                                    SDValue &Result, SDValue &CCR,
+                                    unsigned &CC) {
   SDNode *N = Op.getNode();
+  EVT VT = N->getValueType(0);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
-  unsigned BaseOp = 0;
-  unsigned Cond = 0;
   SDLoc DL(Op);
+
+  unsigned TruncOp = 0;
+  auto PromoteMULO = [&](unsigned ExtOp) {
+    // We don't have 8-bit multiplications, so promote i8 version of U/SMULO
+    // to i16.
+    // Ideally this should be done by legalizer but sadly there is no promotion
+    // rule for U/SMULO at this moment.
+    if (VT == MVT::i8) {
+      LHS = DAG.getNode(ExtOp, DL, MVT::i16, LHS);
+      RHS = DAG.getNode(ExtOp, DL, MVT::i16, RHS);
+      VT = MVT::i16;
+      TruncOp = ISD::TRUNCATE;
+    }
+  };
+
+  bool NoOverflow = false;
+  unsigned BaseOp = 0;
   switch (Op.getOpcode()) {
   default:
     llvm_unreachable("Unknown ovf instruction!");
   case ISD::SADDO:
     BaseOp = M68kISD::ADD;
-    Cond = M68k::COND_VS;
+    CC = M68k::COND_VS;
     break;
   case ISD::UADDO:
     BaseOp = M68kISD::ADD;
-    Cond = M68k::COND_CS;
+    CC = M68k::COND_CS;
     break;
   case ISD::SSUBO:
     BaseOp = M68kISD::SUB;
-    Cond = M68k::COND_VS;
+    CC = M68k::COND_VS;
     break;
   case ISD::USUBO:
     BaseOp = M68kISD::SUB;
-    Cond = M68k::COND_CS;
+    CC = M68k::COND_CS;
+    break;
+  case ISD::UMULO:
+    PromoteMULO(ISD::ZERO_EXTEND);
+    NoOverflow = VT != MVT::i32;
+    BaseOp = NoOverflow ? ISD::MUL : M68kISD::UMUL;
+    CC = M68k::COND_VS;
+    break;
+  case ISD::SMULO:
+    PromoteMULO(ISD::SIGN_EXTEND);
+    NoOverflow = VT != MVT::i32;
+    BaseOp = NoOverflow ? ISD::MUL : M68kISD::SMUL;
+    CC = M68k::COND_VS;
     break;
   }
 
-  // Also sets CCR.
-  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i8);
+  SDVTList VTs;
+  if (NoOverflow)
+    VTs = DAG.getVTList(VT);
+  else
+    // Also sets CCR.
+    VTs = DAG.getVTList(VT, MVT::i8);
+
   SDValue Arith = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
-  SDValue SetCC = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1),
-                              DAG.getConstant(Cond, DL, MVT::i8),
-                              SDValue(Arith.getNode(), 1));
+  Result = Arith.getValue(0);
+  if (TruncOp)
+    // Right now the only place to truncate is from i16 to i8.
+    Result = DAG.getNode(TruncOp, DL, MVT::i8, Arith);
 
-  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Arith, SetCC);
+  if (NoOverflow)
+    CCR = DAG.getConstant(0, DL, N->getValueType(1));
+  else
+    CCR = Arith.getValue(1);
+}
+
+SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+  SDNode *N = Op.getNode();
+  SDLoc DL(Op);
+
+  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+  // a "setcc" instruction that checks the overflow flag.
+  SDValue Result, CCR;
+  unsigned CC;
+  lowerOverflowArithmetic(Op, DAG, Result, CCR, CC);
+
+  SDValue Overflow;
+  if (isa<ConstantSDNode>(CCR)) {
+    // It's likely a result of operations that will not overflow
+    // hence no setcc is needed.
+    Overflow = CCR;
+  } else {
+    // Generate a M68kISD::SETCC.
+    Overflow = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1),
+                           DAG.getConstant(CC, DL, MVT::i8), CCR);
+  }
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Overflow);
 }
 
 /// Create a BTST (Bit Test) node - Test bit \p BitNo in \p Src and set
@@ -2269,55 +2341,12 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       Cond = Cmp;
       addTest = false;
     }
-  } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
-             CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
-             CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) {
-    SDValue LHS = Cond.getOperand(0);
-    SDValue RHS = Cond.getOperand(1);
-    unsigned MxOpcode;
-    unsigned MxCond;
-    SDVTList VTs;
-    switch (CondOpcode) {
-    case ISD::UADDO:
-      MxOpcode = M68kISD::ADD;
-      MxCond = M68k::COND_CS;
-      break;
-    case ISD::SADDO:
-      MxOpcode = M68kISD::ADD;
-      MxCond = M68k::COND_VS;
-      break;
-    case ISD::USUBO:
-      MxOpcode = M68kISD::SUB;
-      MxCond = M68k::COND_CS;
-      break;
-    case ISD::SSUBO:
-      MxOpcode = M68kISD::SUB;
-      MxCond = M68k::COND_VS;
-      break;
-    case ISD::UMULO:
-      MxOpcode = M68kISD::UMUL;
-      MxCond = M68k::COND_VS;
-      break;
-    case ISD::SMULO:
-      MxOpcode = M68kISD::SMUL;
-      MxCond = M68k::COND_VS;
-      break;
-    default:
-      llvm_unreachable("unexpected overflowing operator");
-    }
-    if (CondOpcode == ISD::UMULO)
-      VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i32);
-    else
-      VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
-
-    SDValue MxOp = DAG.getNode(MxOpcode, DL, VTs, LHS, RHS);
-
-    if (CondOpcode == ISD::UMULO)
-      Cond = MxOp.getValue(2);
-    else
-      Cond = MxOp.getValue(1);
-
-    CC = DAG.getConstant(MxCond, DL, MVT::i8);
+  } else if (isOverflowArithmetic(CondOpcode)) {
+    // Result is unused here.
+    SDValue Result;
+    unsigned CCode;
+    lowerOverflowArithmetic(Cond, DAG, Result, Cond, CCode);
+    CC = DAG.getConstant(CCode, DL, MVT::i8);
     addTest = false;
   }
 
@@ -2377,6 +2406,17 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  // Simple optimization when Cond is a constant to avoid generating
+  // M68kISD::CMOV if possible.
+  // TODO: Generalize this to use SelectionDAG::computeKnownBits.
+  if (auto *Const = dyn_cast<ConstantSDNode>(Cond.getNode())) {
+    const APInt &C = Const->getAPIntValue();
+    if (C.countr_zero() >= 5)
+      return Op2;
+    else if (C.countr_one() >= 5)
+      return Op1;
+  }
+
   // M68kISD::CMOV means set the result (which is operand 1) to the RHS if
   // condition is true.
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
@@ -2466,61 +2506,15 @@ SDValue M68kTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
     }
   }
   CondOpcode = Cond.getOpcode();
-  if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
-      CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO) {
-    SDValue LHS = Cond.getOperand(0);
-    SDValue RHS = Cond.getOperand(1);
-    unsigned MxOpcode;
-    unsigned MxCond;
-    SDVTList VTs;
-    // Keep this in sync with LowerXALUO, otherwise we might create redundant
-    // instructions that can't be removed afterwards (i.e. M68kISD::ADD and
-    // M68kISD::INC).
-    switch (CondOpcode) {
-    case ISD::UADDO:
-      MxOpcode = M68kISD::ADD;
-      MxCond = M68k::COND_CS;
-      break;
-    case ISD::SADDO:
-      MxOpcode = M68kISD::ADD;
-      MxCond = M68k::COND_VS;
-      break;
-    case ISD::USUBO:
-      MxOpcode = M68kISD::SUB;
-      MxCond = M68k::COND_CS;
-      break;
-    case ISD::SSUBO:
-      MxOpcode = M68kISD::SUB;
-      MxCond = M68k::COND_VS;
-      break;
-    case ISD::UMULO:
-      MxOpcode = M68kISD::UMUL;
-      MxCond = M68k::COND_VS;
-      break;
-    case ISD::SMULO:
-      MxOpcode = M68kISD::SMUL;
-      MxCond = M68k::COND_VS;
-      break;
-    default:
-      llvm_unreachable("unexpected overflowing operator");
-    }
+  if (isOverflowArithmetic(CondOpcode)) {
+    SDValue Result;
+    unsigned CCode;
+    lowerOverflowArithmetic(Cond, DAG, Result, Cond, CCode);
 
     if (Inverted)
-      MxCond = M68k::GetOppositeBranchCondition((M68k::CondCode)MxCond);
+      CCode = M68k::GetOppositeBranchCondition((M68k::CondCode)CCode);
+    CC = DAG.getConstant(CCode, DL, MVT::i8);
 
-    if (CondOpcode == ISD::UMULO)
-      VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i8);
-    else
-      VTs = DAG.getVTList(LHS.getValueType(), MVT::i8);
-
-    SDValue MxOp = DAG.getNode(MxOpcode, DL, VTs, LHS, RHS);
-
-    if (CondOpcode == ISD::UMULO)
-      Cond = MxOp.getValue(2);
-    else
-      Cond = MxOp.getValue(1);
-
-    CC = DAG.getConstant(MxCond, DL, MVT::i8);
     AddTest = false;
   } else {
     unsigned CondOpc;
diff --git a/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
index 15d2049f62cb7..1f5f1e815e2bf 100644
--- a/llvm/lib/Target/M68k/M68kInstrArithmetic.td
+++ b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
@@ -590,8 +590,9 @@ class MxDiMuOp_DD<string MN, bits<4> CMD, bit SIGNED = false,
 }
 
 // $dreg <- $dreg op $dreg
-class MxDiMuOp_DD_Long<string MN, bits<10> CMD, bit SIGNED = false>
-    : MxInst<(outs MxDRD32:$dst), (ins MxDRD32:$src, MxDRD32:$opd), MN#"\t$opd, $dst", []> {
+class MxDiMuOp_DD_Long<string MN, SDNode NODE, bits<10> CMD, bit SIGNED = false>
+    : MxInst<(outs MxDRD32:$dst), (ins MxDRD32:$src, MxDRD32:$opd), MN#"\t$opd, $dst",
+             [(set i32:$dst, CCR, (NODE i32:$src, i32:$opd))]> {
   let Inst = (ascend
     (descend CMD,
       /*MODE*/0b000, /*REGISTER*/(operand "$opd", 3)),
@@ -622,11 +623,9 @@ class MxDiMuOp_DI<string MN, bits<4> CMD, bit SIGNED = false,
 } // let Constraints
 } // Defs = [CCR]
 
-multiclass MxDiMuOp<string MN, bits<4> CMD, bit isComm = 0> {
-  let isCommutable = isComm in {
-    def "S"#NAME#"d32d16" : MxDiMuOp_DD<MN#"s", CMD, /*SIGNED*/true, MxDRD32, MxDRD16>;
-    def "U"#NAME#"d32d16" : MxDiMuOp_DD<MN#"u", CMD, /*SIGNED*/false, MxDRD32, MxDRD16>;
-  }
+multiclass MxDiMuOp<string MN, bits<4> CMD> {
+  def "S"#NAME#"d32d16" : MxDiMuOp_DD<MN#"s", CMD, /*SIGNED*/true, MxDRD32, MxDRD16>;
+  def "U"#NAME#"d32d16" : MxDiMuOp_DD<MN#"u", CMD, /*SIGNED*/false, MxDRD32, MxDRD16>;
 
   def "S"#NAME#"d32i16" : MxDiMuOp_DI<MN#"s", CMD, /*SIGNED*/true, MxDRD32, Mxi16imm>;
   def "U"#NAME#"d32i16" : MxDiMuOp_DI<MN#"u", CMD, /*SIGNED*/false, MxDRD32, Mxi16imm>;
@@ -634,8 +633,8 @@ multiclass MxDiMuOp<string MN, bits<4> CMD, bit isComm = 0> {
 
 defm DIV : MxDiMuOp<"div", 0x8>;
 
-def SDIVd32d32 : MxDiMuOp_DD_Long<"divs.l", 0x131, /*SIGNED*/true>;
-def UDIVd32d32 : MxDiMuOp_DD_Long<"divu.l", 0x131, /*SIGNED*/false>;
+def SDIVd32d32 : MxDiMuOp_DD_Long<"divs.l", sdiv, 0x131, /*SIGNED*/true>;
+def UDIVd32d32 : MxDiMuOp_DD_Long<"divu.l", udiv, 0x131, /*SIGNED*/false>;
 
 // This is used to cast immediates to 16-bits for operations which don't
 // support smaller immediate sizes.
@@ -685,13 +684,6 @@ def : Pat<(urem i16:$dst, i16:$opd),
             (LSR32di (LSR32di (UDIVd32d16 (MOVZXd32d16 $dst), $opd), 8), 8),
              MxSubRegIndex16Lo)>;
 
-
-// RR i32
-def : Pat<(sdiv i32:$dst, i32:$opd), (SDIVd32d32 $dst, $opd)>;
-
-def : Pat<(udiv i32:$dst, i32:$opd), (UDIVd32d32 $dst, $opd)>;
-
-
 // RI i8
 def : Pat<(sdiv i8:$dst, MximmSExt8:$opd),
           (EXTRACT_SUBREG
@@ -735,10 +727,10 @@ def : Pat<(urem i16:$dst, MximmSExt16:$opd),
              MxSubRegIndex16Lo)>;
 
 
-defm MUL : MxDiMuOp<"mul", 0xC, 1>;
+defm MUL : MxDiMuOp<"mul", 0xC>;
 
-def SMULd32d32 : MxDiMuOp_DD_Long<"muls.l", 0x130, /*SIGNED*/true>;
-def UMULd32d32 : MxDiMuOp_DD_Long<"mulu.l", 0x130, /*SIGNED*/false>;
+def SMULd32d32 : MxDiMuOp_DD_Long<"muls.l", MxSMul, 0x130, /*SIGNED*/true>;
+def UMULd32d32 : MxDiMuOp_DD_Long<"mulu.l", MxUMul, 0x130, /*SIGNED*/false>;
 
 // RR
 def : Pat<(mul i16:$dst, i16:$opd),
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td
index dc66e103361a4..1e40c3c48990d 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.td
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.td
@@ -55,15 +55,6 @@ def MxSDT_BiArithCCRInOut : SDTypeProfile<2, 3, [
   /*   CCR */ SDTCisSameAs<1, 4>
 ]>;
 
-// RES1, RES2, CCR <- op LHS, RHS
-def MxSDT_2BiArithCCROut : SDTypeProfile<3, 2, [
-  /* RES 1 */ SDTCisInt<0>,
-  /* RES 2 */ SDTCisSameAs<0, 1>,
-  /*   CCR */ SDTCisVT<1, i8>,
-  /*   LHS */ SDTCisSameAs<0, 2>,
-  /*   RHS */ SDTCisSameAs<0, 3>
-]>;
-
 def MxSDT_CmpTest : SDTypeProfile<1, 2, [
    /* CCR */ SDTCisVT<0, i8>,
    /* Ops */ SDTCisSameAs<1, 2>
@@ -134,7 +125,7 @@ def MxAddX : SDNode<"M68kISD::ADDX", MxSDT_BiArithCCRInOut>;
 def MxSubX : SDNode<"M68kISD::SUBX", MxSDT_BiArithCCRInOut>;
 
 def MxSMul : SDNode<"M68kISD::SMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>;
-def MxUMul : SDNode<"M68kISD::UMUL", MxSDT_2BiArithCCROut, [SDNPCommutative]>;
+def MxUMul : SDNode<"M68kISD::UMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>;
 
 def MxCmp     : SDNode<"M68kISD::CMP", MxSDT_CmpTest>;
 def MxBtst    : SDNode<"M68kISD::BTST", MxSDT_CmpTest>;
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 53e2b6b4d94ea..184000b48987e 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -462,10 +462,8 @@ static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address,
   return MCDisassembler::Success;
 }
 
-// spimm is based on rlist now.
 static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm,
                                     uint64_t Address, const void *Decoder) {
-  // TODO: check if spimm matches rlist
   Inst.addOperand(MCOperand::createImm(Imm));
   return MCDisassembler::Success;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 288c33cfe11c8..f56f49ae24571 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -410,27 +410,6 @@ class RVInstIUnary<bits<12> imm12, bits<3> funct3, RISCVOpcode opcode,
   let Inst{31-20} = imm12;
 }
 
-class RVInstIMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, RISCVOpcode opcode,
-                   dag outs, dag ins, string opcodestr, string argstr>
-    : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
-  let Inst{31} = imm7{6};
-  let Inst{30} = imm5{4};
-  let Inst{29-28} = imm7{5-4};
-  let Inst{27-26} = imm5{3-2};
-  let Inst{25-22} = imm7{3-0};
-  let Inst{21-20} = imm5{1-0};
-}
-
-class RVInstRMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcode,
-                   dag outs, dag ins, string opcodestr, string argstr>
-    : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> {
-  let Inst{31} = imm4{3};
-  let Inst{30} = imm3{2};
-  let Inst{29-28} = imm4{2-1};
-  let Inst{27-26} = imm3{1-0};
-  let Inst{25} = imm4{0};
-}
-
 class RVInstS<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
               string opcodestr, string argstr>
     : RVInst<outs, ins, opcodestr, argstr, [], InstFormatS> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 099cc0abd1424..3ee2a08089a96 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -597,18 +597,6 @@ class Priv_rr<string opcodestr, bits<7> funct7>
   let rd = 0;
 }
 
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3,
-             RISCVOpcode opcode, string opcodestr>
-    : RVInstIMopr<imm7, imm5, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1),
-                   opcodestr, "$rd, $rs1">;
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3,
-             RISCVOpcode opcode, string opcodestr>
-    : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
-                   opcodestr, "$rd, $rs1, $rs2">;
-
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -798,22 +786,6 @@ def SRAW  : ALUW_rr<0b0100000, 0b101, "sraw">,
 } // IsSignExtendingOpW = 1
 } // Predicates = [IsRV64]
 
-// Zimop instructions
-
-foreach i = 0...31 in {
-    let Predicates = [HasStdExtZimop] in {
-    def MOPR#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>,
-                 Sched<[]>;
-    } // Predicates = [HasStdExtZimop]
-}
-
-foreach i = 0...7 in {
-    let Predicates = [HasStdExtZimop] in {
-    def MOPRR#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>,
-                  Sched<[]>;
-    } // Predicates = [HasStdExtZimop]
-}
-
 //===----------------------------------------------------------------------===//
 // Privileged instructions
 //===----------------------------------------------------------------------===//
@@ -2140,6 +2112,7 @@ include "RISCVInstrInfoV.td"
 include "RISCVInstrInfoZvk.td"
 
 // Integer
+include "RISCVInstrInfoZimop.td"
 include "RISCVInstrInfoZicbo.td"
 include "RISCVInstrInfoZicond.td"
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 488ffa73f4e48..be4bc3b58766e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -257,13 +257,13 @@ class SegRegClass<LMULInfo m, int nf> {
 // Vector register and vector group type information.
 //===----------------------------------------------------------------------===//
 
-class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M,
+class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, LMULInfo M,
                 ValueType Scal = XLenVT, RegisterClass ScalarReg = GPR> {
   ValueType Vector = Vec;
   ValueType Mask = Mas;
   int SEW = Sew;
   int Log2SEW = !logtwo(Sew);
-  VReg RegClass = Reg;
+  VReg RegClass = M.vrclass;
   LMULInfo LMul = M;
   ValueType Scalar = Scal;
   RegisterClass ScalarRegClass = ScalarReg;
@@ -279,9 +279,9 @@ class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M,
 }
 
 class GroupVTypeInfo<ValueType Vec, ValueType VecM1, ValueType Mas, int Sew,
-                     VReg Reg, LMULInfo M, ValueType Scal = XLenVT,
+                     LMULInfo M, ValueType Scal = XLenVT,
                      RegisterClass ScalarReg = GPR>
-    : VTypeInfo<Vec, Mas, Sew, Reg, M, Scal, ScalarReg> {
+    : VTypeInfo<Vec, Mas, Sew, M, Scal, ScalarReg> {
   ValueType VectorM1 = VecM1;
 }
 
@@ -289,70 +289,70 @@ defset list<VTypeInfo> AllVectors = {
   defset list<VTypeInfo> AllIntegerVectors = {
     defset list<VTypeInfo> NoGroupIntegerVectors = {
       defset list<VTypeInfo> FractionalGroupIntegerVectors = {
-        def VI8MF8: VTypeInfo<vint8mf8_t,  vbool64_t,  8, VR, V_MF8>;
-        def VI8MF4: VTypeInfo<vint8mf4_t,  vbool32_t,  8, VR, V_MF4>;
-        def VI8MF2: VTypeInfo<vint8mf2_t,  vbool16_t,  8, VR, V_MF2>;
-        def VI16MF4: VTypeInfo<vint16mf4_t, vbool64_t, 16, VR, V_MF4>;
-        def VI16MF2: VTypeInfo<vint16mf2_t, vbool32_t, 16, VR, V_MF2>;
-        def VI32MF2: VTypeInfo<vint32mf2_t, vbool64_t, 32, VR, V_MF2>;
+        def VI8MF8:  VTypeInfo<vint8mf8_t,  vbool64_t, 8,  V_MF8>;
+        def VI8MF4:  VTypeInfo<vint8mf4_t,  vbool32_t, 8,  V_MF4>;
+        def VI8MF2:  VTypeInfo<vint8mf2_t,  vbool16_t, 8,  V_MF2>;
+        def VI16MF4: VTypeInfo<vint16mf4_t, vbool64_t, 16, V_MF4>;
+        def VI16MF2: VTypeInfo<vint16mf2_t, vbool32_t, 16, V_MF2>;
+        def VI32MF2: VTypeInfo<vint32mf2_t, vbool64_t, 32, V_MF2>;
       }
-      def VI8M1: VTypeInfo<vint8m1_t,   vbool8_t,   8, VR, V_M1>;
-      def VI16M1: VTypeInfo<vint16m1_t,  vbool16_t, 16, VR, V_M1>;
-      def VI32M1: VTypeInfo<vint32m1_t,  vbool32_t, 32, VR, V_M1>;
-      def VI64M1: VTypeInfo<vint64m1_t,  vbool64_t, 64, VR, V_M1>;
+      def VI8M1:  VTypeInfo<vint8m1_t,  vbool8_t,   8, V_M1>;
+      def VI16M1: VTypeInfo<vint16m1_t, vbool16_t, 16, V_M1>;
+      def VI32M1: VTypeInfo<vint32m1_t, vbool32_t, 32, V_M1>;
+      def VI64M1: VTypeInfo<vint64m1_t, vbool64_t, 64, V_M1>;
     }
     defset list<GroupVTypeInfo> GroupIntegerVectors = {
-      def VI8M2: GroupVTypeInfo<vint8m2_t, vint8m1_t, vbool4_t, 8, VRM2, V_M2>;
-      def VI8M4: GroupVTypeInfo<vint8m4_t, vint8m1_t, vbool2_t, 8, VRM4, V_M4>;
-      def VI8M8: GroupVTypeInfo<vint8m8_t, vint8m1_t, vbool1_t, 8, VRM8, V_M8>;
+      def VI8M2: GroupVTypeInfo<vint8m2_t, vint8m1_t, vbool4_t, 8, V_M2>;
+      def VI8M4: GroupVTypeInfo<vint8m4_t, vint8m1_t, vbool2_t, 8, V_M4>;
+      def VI8M8: GroupVTypeInfo<vint8m8_t, vint8m1_t, vbool1_t, 8, V_M8>;
 
-      def VI16M2: GroupVTypeInfo<vint16m2_t,vint16m1_t,vbool8_t, 16,VRM2, V_M2>;
-      def VI16M4: GroupVTypeInfo<vint16m4_t,vint16m1_t,vbool4_t, 16,VRM4, V_M4>;
-      def VI16M8: GroupVTypeInfo<vint16m8_t,vint16m1_t,vbool2_t, 16,VRM8, V_M8>;
+      def VI16M2: GroupVTypeInfo<vint16m2_t, vint16m1_t, vbool8_t, 16, V_M2>;
+      def VI16M4: GroupVTypeInfo<vint16m4_t, vint16m1_t, vbool4_t, 16, V_M4>;
+      def VI16M8: GroupVTypeInfo<vint16m8_t, vint16m1_t, vbool2_t, 16, V_M8>;
 
-      def VI32M2: GroupVTypeInfo<vint32m2_t,vint32m1_t,vbool16_t,32,VRM2, V_M2>;
-      def VI32M4: GroupVTypeInfo<vint32m4_t,vint32m1_t,vbool8_t, 32,VRM4, V_M4>;
-      def VI32M8: GroupVTypeInfo<vint32m8_t,vint32m1_t,vbool4_t, 32,VRM8, V_M8>;
+      def VI32M2: GroupVTypeInfo<vint32m2_t, vint32m1_t, vbool16_t, 32, V_M2>;
+      def VI32M4: GroupVTypeInfo<vint32m4_t, vint32m1_t, vbool8_t,  32, V_M4>;
+      def VI32M8: GroupVTypeInfo<vint32m8_t, vint32m1_t, vbool4_t,  32, V_M8>;
 
-      def VI64M2: GroupVTypeInfo<vint64m2_t,vint64m1_t,vbool32_t,64,VRM2, V_M2>;
-      def VI64M4: GroupVTypeInfo<vint64m4_t,vint64m1_t,vbool16_t,64,VRM4, V_M4>;
-      def VI64M8: GroupVTypeInfo<vint64m8_t,vint64m1_t,vbool8_t, 64,VRM8, V_M8>;
+      def VI64M2: GroupVTypeInfo<vint64m2_t, vint64m1_t, vbool32_t, 64, V_M2>;
+      def VI64M4: GroupVTypeInfo<vint64m4_t, vint64m1_t, vbool16_t, 64, V_M4>;
+      def VI64M8: GroupVTypeInfo<vint64m8_t, vint64m1_t, vbool8_t,  64, V_M8>;
     }
   }
 
   defset list<VTypeInfo> AllFloatVectors = {
     defset list<VTypeInfo> NoGroupFloatVectors = {
       defset list<VTypeInfo> FractionalGroupFloatVectors = {
-        def VF16MF4: VTypeInfo<vfloat16mf4_t, vbool64_t, 16, VR, V_MF4, f16, FPR16>;
-        def VF16MF2: VTypeInfo<vfloat16mf2_t, vbool32_t, 16, VR, V_MF2, f16, FPR16>;
-        def VF32MF2: VTypeInfo<vfloat32mf2_t,vbool64_t, 32, VR, V_MF2, f32, FPR32>;
+        def VF16MF4: VTypeInfo<vfloat16mf4_t, vbool64_t, 16, V_MF4, f16, FPR16>;
+        def VF16MF2: VTypeInfo<vfloat16mf2_t, vbool32_t, 16, V_MF2, f16, FPR16>;
+        def VF32MF2: VTypeInfo<vfloat32mf2_t, vbool64_t, 32, V_MF2, f32, FPR32>;
       }
-      def VF16M1:  VTypeInfo<vfloat16m1_t,  vbool16_t, 16, VR, V_M1,  f16, FPR16>;
-      def VF32M1:  VTypeInfo<vfloat32m1_t, vbool32_t, 32, VR, V_M1,  f32, FPR32>;
-      def VF64M1: VTypeInfo<vfloat64m1_t, vbool64_t, 64, VR, V_M1, f64, FPR64>;
+      def VF16M1: VTypeInfo<vfloat16m1_t, vbool16_t, 16, V_M1, f16, FPR16>;
+      def VF32M1: VTypeInfo<vfloat32m1_t, vbool32_t, 32, V_M1, f32, FPR32>;
+      def VF64M1: VTypeInfo<vfloat64m1_t, vbool64_t, 64, V_M1, f64, FPR64>;
     }
 
     defset list<GroupVTypeInfo> GroupFloatVectors = {
       def VF16M2: GroupVTypeInfo<vfloat16m2_t, vfloat16m1_t, vbool8_t, 16,
-                                 VRM2, V_M2, f16, FPR16>;
+                                 V_M2, f16, FPR16>;
       def VF16M4: GroupVTypeInfo<vfloat16m4_t, vfloat16m1_t, vbool4_t, 16,
-                                 VRM4, V_M4, f16, FPR16>;
+                                 V_M4, f16, FPR16>;
       def VF16M8: GroupVTypeInfo<vfloat16m8_t, vfloat16m1_t, vbool2_t, 16,
-                                 VRM8, V_M8, f16, FPR16>;
+                                 V_M8, f16, FPR16>;
 
       def VF32M2: GroupVTypeInfo<vfloat32m2_t, vfloat32m1_t, vbool16_t, 32,
-                                 VRM2, V_M2, f32, FPR32>;
+                                 V_M2, f32, FPR32>;
       def VF32M4: GroupVTypeInfo<vfloat32m4_t, vfloat32m1_t, vbool8_t,  32,
-                                 VRM4, V_M4, f32, FPR32>;
+                                 V_M4, f32, FPR32>;
       def VF32M8: GroupVTypeInfo<vfloat32m8_t, vfloat32m1_t, vbool4_t,  32,
-                                 VRM8, V_M8, f32, FPR32>;
+                                 V_M8, f32, FPR32>;
 
       def VF64M2: GroupVTypeInfo<vfloat64m2_t, vfloat64m1_t, vbool32_t, 64,
-                                 VRM2, V_M2, f64, FPR64>;
+                                 V_M2, f64, FPR64>;
       def VF64M4: GroupVTypeInfo<vfloat64m4_t, vfloat64m1_t, vbool16_t, 64,
-                                 VRM4, V_M4, f64, FPR64>;
+                                 V_M4, f64, FPR64>;
       def VF64M8: GroupVTypeInfo<vfloat64m8_t, vfloat64m1_t, vbool8_t,  64,
-                                 VRM8, V_M8, f64, FPR64>;
+                                 V_M8, f64, FPR64>;
     }
   }
 }
@@ -360,19 +360,19 @@ defset list<VTypeInfo> AllVectors = {
 defset list<VTypeInfo> AllBFloatVectors = {
   defset list<VTypeInfo> NoGroupBFloatVectors = {
     defset list<VTypeInfo> FractionalGroupBFloatVectors = {
-      def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, VR, V_MF4, bf16, FPR16>;
-      def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, VR, V_MF2, bf16, FPR16>;
+      def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, V_MF4, bf16, FPR16>;
+      def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, V_MF2, bf16, FPR16>;
     }
-    def VBF16M1:  VTypeInfo<vbfloat16m1_t,  vbool16_t, 16, VR, V_M1,  bf16, FPR16>;
+    def VBF16M1:  VTypeInfo<vbfloat16m1_t, vbool16_t, 16, V_M1, bf16, FPR16>;
   }
 
   defset list<GroupVTypeInfo> GroupBFloatVectors = {
     def VBF16M2: GroupVTypeInfo<vbfloat16m2_t, vbfloat16m1_t, vbool8_t, 16,
-                                VRM2, V_M2, bf16, FPR16>;
+                                V_M2, bf16, FPR16>;
     def VBF16M4: GroupVTypeInfo<vbfloat16m4_t, vbfloat16m1_t, vbool4_t, 16,
-                                VRM4, V_M4, bf16, FPR16>;
+                                V_M4, bf16, FPR16>;
     def VBF16M8: GroupVTypeInfo<vbfloat16m8_t, vbfloat16m1_t, vbool2_t, 16,
-                                VRM8, V_M8, bf16, FPR16>;
+                                V_M8, bf16, FPR16>;
   }
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 1b63ee7ac4bbd..561ab8d7403d6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -356,10 +356,9 @@ multiclass VPseudoSiFiveVQMACCDOD<string Constraint = ""> {
 }
 
 multiclass VPseudoSiFiveVQMACCQOQ<string Constraint = ""> {
-  foreach i = 0-3 in
-    let VLMul = MxListVF4[i].value in
-    defm NAME : VPseudoSiFiveVMACC<MxListVF4[i].MX, MxListVF8[i].vrclass,
-                                   MxListVF4[i].vrclass, Constraint>;
+  foreach m = [V_MF2, V_M1, V_M2, V_M4] in
+    let VLMul = m.value in
+    defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>;
 }
 
 multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> {
@@ -369,7 +368,7 @@ multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> {
 }
 
 multiclass VPseudoSiFiveVFNRCLIP<string Constraint = "@earlyclobber $rd"> {
-  foreach i = [0, 1, 2, 3, 4] in
+  foreach i = 0-4 in
     let hasSideEffects = 0 in
       defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<MxListW[i].vrclass,
                                                        MxListVF4[i].vrclass,
@@ -581,10 +580,10 @@ defset list<VTypeInfoToWide> VQMACCDODInfoPairs = {
 }
 
 defset list<VTypeInfoToWide> VQMACCQOQInfoPairs = {
-   def : VTypeInfoToWide<VI8MF2, VI32M1>;
-   def : VTypeInfoToWide<VI8M1, VI32M2>;
-   def : VTypeInfoToWide<VI8M2, VI32M4>;
-   def : VTypeInfoToWide<VI8M4, VI32M8>;
+  def : VTypeInfoToWide<VI8MF2, VI32M1>;
+  def : VTypeInfoToWide<VI8M1, VI32M2>;
+  def : VTypeInfoToWide<VI8M2, VI32M4>;
+  def : VTypeInfoToWide<VI8M4, VI32M8>;
 }
 
 multiclass VPatVQMACCDOD<string intrinsic, string instruction, string kind>
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
index 9a7249fe3e3d6..3506204d6c255 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
@@ -69,7 +69,7 @@ def spimm : Operand<OtherVT> {
     int64_t Imm;
     if (!MCOp.evaluateAsConstantImm(Imm))
       return false;
-    return isShiftedUInt<5, 4>(Imm);
+    return isShiftedUInt<2, 4>(Imm);
   }];
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td
new file mode 100644
index 0000000000000..1e8c70046c634
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td
@@ -0,0 +1,59 @@
+//===-- RISCVInstrInfoZimop.td -----------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard
+// May-Be-Operations Extension (Zimop).
+// This version is still experimental as the 'Zimop' extension hasn't been
+// ratified yet. It is based on v0.1 of the specification.
+//
+//===----------------------------------------------------------------------===//
+
+class RVInstIMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, RISCVOpcode opcode,
+                   dag outs, dag ins, string opcodestr, string argstr>
+    : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+  let Inst{31} = imm7{6};
+  let Inst{30} = imm5{4};
+  let Inst{29-28} = imm7{5-4};
+  let Inst{27-26} = imm5{3-2};
+  let Inst{25-22} = imm7{3-0};
+  let Inst{21-20} = imm5{1-0};
+}
+
+class RVInstRMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcode,
+                   dag outs, dag ins, string opcodestr, string argstr>
+    : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> {
+  let Inst{31} = imm4{3};
+  let Inst{30} = imm3{2};
+  let Inst{29-28} = imm4{2-1};
+  let Inst{27-26} = imm3{1-0};
+  let Inst{25} = imm4{0};
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3,
+             RISCVOpcode opcode, string opcodestr>
+    : RVInstIMopr<imm7, imm5, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1),
+                   opcodestr, "$rd, $rs1">;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3,
+             RISCVOpcode opcode, string opcodestr>
+    : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
+                   opcodestr, "$rd, $rs1, $rs2">;
+
+foreach i = 0...31 in {
+  let Predicates = [HasStdExtZimop] in
+  def MOPR#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>,
+               Sched<[]>;
+}
+
+foreach i = 0...7 in {
+  let Predicates = [HasStdExtZimop] in
+  def MOPRR#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>,
+                Sched<[]>;
+}
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 0582270285180..220ca31a825f9 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -277,31 +277,63 @@ class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg,
   let SchedRW = [WriteADC];
 }
 
-// UnaryOpR - Instructions that read "reg" and write "reg".
-class UnaryOpR<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p>
-  : ITy<o, f, t, (outs t.RegClass:$dst),
-        (ins t.RegClass:$src1), m, "$dst", p>, Sched<[WriteALU]>;
-
-// UnaryOpM - Instructions that read "[mem]" and writes "[mem]".
-class UnaryOpM<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p>
-  : ITy<o, f, t, (outs), (ins t.MemOperand:$dst), m, "$dst", p>,
-    Sched<[WriteALURMW]> {
+// UnaryOpR - Instructions that read "reg".
+class UnaryOpR<bits<8> o, Format f, string m, string args, X86TypeInfo t,
+               dag out, list<dag> p>
+  : ITy<o, f, t, out, (ins t.RegClass:$src), m, args, p>, Sched<[WriteALU]>;
+// UnaryOpR_R - Instructions that read "reg" and write "reg".
+class UnaryOpR_R<bits<8> o, Format f, string m, X86TypeInfo t,
+                  SDPatternOperator node>
+  : UnaryOpR<o, f, m, unaryop_args, t, (outs t.RegClass:$dst),
+             [(set t.RegClass:$dst, (node t.RegClass:$src))]>;
+// UnaryOpR_RF - Instructions that read "reg" and write "reg"/EFLAGS.
+class UnaryOpR_RF<bits<8> o, Format f, string m, X86TypeInfo t,
+                  SDPatternOperator node>
+  : UnaryOpR<o, f, m, unaryop_args, t, (outs t.RegClass:$dst),
+             [(set t.RegClass:$dst, (node t.RegClass:$src)),
+              (implicit EFLAGS)]>, DefEFLAGS;
+
+// UnaryOpM - Instructions that read "[mem]".
+class UnaryOpM<bits<8> o, Format f, string m, string args, X86TypeInfo t,
+               dag out, list<dag> p>
+  : ITy<o, f, t, out, (ins t.MemOperand:$src), m, args, p> {
   let mayLoad = 1;
+}
+// UnaryOpM_M - Instructions that read "[mem]" and writes "[mem]".
+class UnaryOpM_M<bits<8> o, Format f, string m, X86TypeInfo t,
+                  SDPatternOperator node>
+  : UnaryOpM<o, f, m, unaryop_args, t, (outs),
+             [(store (node (t.LoadNode addr:$src)), addr:$src)]>,
+    Sched<[WriteALURMW]>{
+  let mayStore = 1;
+}
+// UnaryOpM_MF - Instructions that read "[mem]" and writes "[mem]"/EFLAGS.
+class UnaryOpM_MF<bits<8> o, Format f, string m, X86TypeInfo t,
+                  SDPatternOperator node>
+  : UnaryOpM<o, f, m, unaryop_args, t, (outs),
+             [(store (node (t.LoadNode addr:$src)), addr:$src),
+              (implicit EFLAGS)]>, Sched<[WriteALURMW]>, DefEFLAGS {
   let mayStore = 1;
 }
 
 //===----------------------------------------------------------------------===//
 // MUL/IMUL and DIV/IDIV Instructions
 //
-class MulOpR<bits<8> o, Format f, string m, X86TypeInfo t,
+class MulDivOpR<bits<8> o, Format f, string m, X86TypeInfo t,
              X86FoldableSchedWrite sched, list<dag> p>
-  : ITy<o, f, t, (outs), (ins t.RegClass:$src), m, "$src", p>, Sched<[sched]>;
+  : UnaryOpR<o, f, m, "$src", t, (outs), p> {
+  let SchedRW = [sched];
+}
 
-class MulOpM<bits<8> o, Format f, string m, X86TypeInfo t,
+class MulDivOpM<bits<8> o, Format f, string m, X86TypeInfo t,
              X86FoldableSchedWrite sched, list<dag> p>
-  : ITy<o, f, t, (outs), (ins t.MemOperand:$src), m,
-        "$src", p>, SchedLoadReg<sched> {
-  let mayLoad = 1;
+  : UnaryOpM<o, f, m, "$src", t, (outs), p> {
+  let SchedRW =
+    [sched.Folded,
+     // Memory operand.
+     ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+     // Register reads (implicit or explicit).
+     sched.ReadAfterFold, sched.ReadAfterFold];
 }
 
 multiclass Mul<bits<8> o, string m, Format RegMRM, Format MemMRM, SDPatternOperator node> {
@@ -312,23 +344,23 @@ multiclass Mul<bits<8> o, string m, Format RegMRM, Format MemMRM, SDPatternOpera
   // This probably ought to be moved to a def : Pat<> if the
   // syntax can be accepted.
   let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-  def 8r : MulOpR<o, RegMRM, m, Xi8, WriteIMul8,
+  def 8r : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8,
                   [(set AL, (node AL, GR8:$src)), (implicit EFLAGS)]>;
   let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-  def 16r : MulOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, OpSize16;
+  def 16r : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, OpSize16;
   let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-  def 32r : MulOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, OpSize32;
+  def 32r : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, OpSize32;
   let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
-  def 64r : MulOpR<o, RegMRM, m, Xi64, WriteIMul64, []>;
+  def 64r : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>;
   let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-  def 8m : MulOpM<o, MemMRM, m, Xi8, WriteIMul8,
+  def 8m : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8,
                   [(set AL, (node AL, (loadi8 addr:$src))), (implicit EFLAGS)]>;
   let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-  def 16m : MulOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, OpSize16;
+  def 16m : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, OpSize16;
   let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-  def 32m : MulOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, OpSize32;
+  def 32m : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, OpSize32;
   let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
-  def 64m : MulOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, Requires<[In64BitMode]>;
+  def 64m : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, Requires<[In64BitMode]>;
 }
 
 defm MUL : Mul<0xF7, "mul", MRM4r, MRM4m, mul>;
@@ -340,21 +372,21 @@ multiclass Div<bits<8> o, string m, Format RegMRM, Format MemMRM> {
   defvar sched32 = !if(!eq(m, "div"), WriteDiv32, WriteIDiv32);
   defvar sched64 = !if(!eq(m, "div"), WriteDiv64, WriteIDiv64);
   let Defs = [AL,AH,EFLAGS], Uses = [AX] in
-  def 8r  : MulOpR<o, RegMRM, m, Xi8, sched8, []>;
+  def 8r  : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>;
   let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-  def 16r : MulOpR<o, RegMRM, m, Xi16, sched16, []>, OpSize16;
+  def 16r : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, OpSize16;
   let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-  def 32r : MulOpR<o, RegMRM, m, Xi32, sched32, []>, OpSize32;
+  def 32r : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, OpSize32;
   let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
-  def 64r : MulOpR<o, RegMRM, m, Xi64, sched64, []>;
+  def 64r : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>;
   let Defs = [AL,AH,EFLAGS], Uses = [AX] in
-  def 8m  : MulOpM<o, MemMRM, m, Xi8, sched8, []>;
+  def 8m  : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>;
   let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-  def 16m : MulOpM<o, MemMRM, m, Xi16, sched16, []>, OpSize16;
+  def 16m : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, OpSize16;
   let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-  def 32m : MulOpM<o, MemMRM, m, Xi32, sched32, []>, OpSize32;
+  def 32m : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, OpSize32;
   let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
-  def 64m : MulOpM<o, MemMRM, m, Xi64, sched64, []>, Requires<[In64BitMode]>;
+  def 64m : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, Requires<[In64BitMode]>;
 }
 let hasSideEffects = 1 in { // so that we don't speculatively execute
 defm DIV: Div<0xF7, "div", MRM6r, MRM6m>;
@@ -426,92 +458,84 @@ def IMUL64rmi32 : IMulOpMI_R<Xi64, WriteIMul64Imm>;
 //===----------------------------------------------------------------------===//
 // INC and DEC Instructions
 //
-class INCDECR<Format f, string m, X86TypeInfo t, SDPatternOperator node>
-  : UnaryOpR<0xFF, f, m, t,
-             [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, 1))]>,
-    DefEFLAGS {
-  let isConvertibleToThreeAddress = 1; // Can xform into LEA.
+class IncOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM0r, "inc", t, null_frag> {
+  let Pattern = [(set t.RegClass:$dst, EFLAGS,
+                 (X86add_flag_nocf t.RegClass:$src, 1))];
 }
-class INCDECM<Format f, string m, X86TypeInfo t, int num>
-  : UnaryOpM<0xFF, f, m, t,
-             [(store (add (t.LoadNode addr:$dst), num), addr:$dst),
-              (implicit EFLAGS)]>, DefEFLAGS;
-// INCDECR_ALT - Instructions like "inc reg" short forms.
-class INCDECR_ALT<bits<8> o, string m, X86TypeInfo t>
-  : UnaryOpR<o, AddRegFrm, m, t, []>, DefEFLAGS {
-  // Short forms only valid in 32-bit mode. Selected during MCInst lowering.
-  let Predicates = [Not64BitMode];
+class DecOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM1r, "dec", t, null_frag> {
+  let Pattern = [(set t.RegClass:$dst, EFLAGS,
+                 (X86sub_flag_nocf t.RegClass:$src, 1))];
 }
-let Constraints = "$src1 = $dst" in {
-def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>, OpSize16;
-def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>, OpSize32;
-def INC8r  : INCDECR<MRM0r, "inc", Xi8, X86add_flag_nocf>;
-def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>, OpSize16;
-def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>, OpSize32;
-def INC64r : INCDECR<MRM0r, "inc", Xi64, X86add_flag_nocf>;
-def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>, OpSize16;
-def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>, OpSize32;
-def DEC8r  : INCDECR<MRM1r, "dec", Xi8, X86sub_flag_nocf>;
-def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>, OpSize16;
-def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>, OpSize32;
-def DEC64r : INCDECR<MRM1r, "dec", Xi64, X86sub_flag_nocf>;
+class IncOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM0m, "inc", t, null_frag> {
+  let Pattern = [(store (add (t.LoadNode addr:$src), 1), addr:$src),
+                 (implicit EFLAGS)];
+}
+class DecOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM1m, "dec", t, null_frag> {
+  let Pattern = [(store (add (t.LoadNode addr:$src), -1), addr:$src),
+                 (implicit EFLAGS)];
+}
+// IncDec_Alt - Instructions like "inc reg" short forms.
+// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
+class IncDec_Alt<bits<8> o, string m, X86TypeInfo t>
+  : UnaryOpR_RF<o, AddRegFrm, m, t, null_frag>, Requires<[Not64BitMode]>;
+
+let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in {
+def INC16r_alt : IncDec_Alt<0x40, "inc", Xi16>, OpSize16;
+def INC32r_alt : IncDec_Alt<0x40, "inc", Xi32>, OpSize32;
+def DEC16r_alt : IncDec_Alt<0x48, "dec", Xi16>, OpSize16;
+def DEC32r_alt : IncDec_Alt<0x48, "dec", Xi32>, OpSize32;
+def INC8r  : IncOpR_RF<Xi8>;
+def INC16r : IncOpR_RF<Xi16>, OpSize16;
+def INC32r : IncOpR_RF<Xi32>, OpSize32;
+def INC64r : IncOpR_RF<Xi64>;
+def DEC8r  : DecOpR_RF<Xi8>;
+def DEC16r : DecOpR_RF<Xi16>, OpSize16;
+def DEC32r : DecOpR_RF<Xi32>, OpSize32;
+def DEC64r : DecOpR_RF<Xi64>;
 }
 let Predicates = [UseIncDec] in {
-def INC8m  : INCDECM<MRM0m, "inc", Xi8, 1>;
-def INC16m : INCDECM<MRM0m, "inc", Xi16, 1>, OpSize16;
-def INC32m : INCDECM<MRM0m, "inc", Xi32, 1>, OpSize32;
-def DEC8m  : INCDECM<MRM1m, "dec", Xi8, -1>;
-def DEC16m : INCDECM<MRM1m, "dec", Xi16, -1>, OpSize16;
-def DEC32m : INCDECM<MRM1m, "dec", Xi32, -1>, OpSize32;
+def INC8m  : IncOpM_M<Xi8>;
+def INC16m : IncOpM_M<Xi16>, OpSize16;
+def INC32m : IncOpM_M<Xi32>, OpSize32;
+def DEC8m  : DecOpM_M<Xi8>;
+def DEC16m : DecOpM_M<Xi16>, OpSize16;
+def DEC32m : DecOpM_M<Xi32>, OpSize32;
 }
 let Predicates = [UseIncDec, In64BitMode] in {
-def INC64m : INCDECM<MRM0m, "inc", Xi64, 1>;
-def DEC64m : INCDECM<MRM1m, "dec", Xi64, -1>;
+def INC64m : IncOpM_M<Xi64>;
+def DEC64m : DecOpM_M<Xi64>;
 }
 
 //===----------------------------------------------------------------------===//
 // NEG and NOT Instructions
 //
-class NegOpR<bits<8> o, string m, X86TypeInfo t>
-  : UnaryOpR<o, MRM3r, m, t,
-             [(set t.RegClass:$dst, (ineg t.RegClass:$src1)),
-              (implicit EFLAGS)]>, DefEFLAGS;
-class NegOpM<bits<8> o, string m, X86TypeInfo t>
-  : UnaryOpM<o, MRM3m, m, t,
-             [(store (ineg (t.LoadNode addr:$dst)), addr:$dst),
-              (implicit EFLAGS)]>, DefEFLAGS;
-
-// NOTE: NOT does not set EFLAGS!
-class NotOpR<bits<8> o, string m, X86TypeInfo t>
-  : UnaryOpR<o, MRM2r, m, t, [(set t.RegClass:$dst, (not t.RegClass:$src1))]>;
-
-class NotOpM<bits<8> o, string m, X86TypeInfo t>
-  : UnaryOpM<o, MRM2m, m,  t,
-             [(store (not (t.LoadNode addr:$dst)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-def NEG8r  : NegOpR<0xF6, "neg", Xi8>;
-def NEG16r : NegOpR<0xF7, "neg", Xi16>, OpSize16;
-def NEG32r : NegOpR<0xF7, "neg", Xi32>, OpSize32;
-def NEG64r : NegOpR<0xF7, "neg", Xi64>;
+class NegOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xF7, MRM3r, "neg", t, ineg>;
+class NegOpM_MF<X86TypeInfo t> : UnaryOpM_MF<0xF7, MRM3m, "neg", t, ineg>;
+
+class NotOpR_R<X86TypeInfo t> : UnaryOpR_R<0xF7, MRM2r, "not", t, not>;
+class NotOpM_M<X86TypeInfo t> : UnaryOpM_M<0xF7, MRM2m, "not", t, not>;
+
+let Constraints = "$src = $dst" in {
+def NEG8r  : NegOpR_RF<Xi8>;
+def NEG16r : NegOpR_RF<Xi16>, OpSize16;
+def NEG32r : NegOpR_RF<Xi32>, OpSize32;
+def NEG64r : NegOpR_RF<Xi64>;
+
+def NOT8r  : NotOpR_R<Xi8>;
+def NOT16r : NotOpR_R<Xi16>, OpSize16;
+def NOT32r : NotOpR_R<Xi32>, OpSize32;
+def NOT64r : NotOpR_R<Xi64>;
 }
 
-def NEG8m  : NegOpM<0xF6, "neg", Xi8>;
-def NEG16m : NegOpM<0xF7, "neg", Xi16>, OpSize16;
-def NEG32m : NegOpM<0xF7, "neg", Xi32>, OpSize32;
-def NEG64m : NegOpM<0xF7, "neg", Xi64>, Requires<[In64BitMode]>;
-
-let Constraints = "$src1 = $dst" in {
-def NOT8r  : NotOpR<0xF6, "not", Xi8>;
-def NOT16r : NotOpR<0xF7, "not", Xi16>, OpSize16;
-def NOT32r : NotOpR<0xF7, "not", Xi32>, OpSize32;
-def NOT64r : NotOpR<0xF7, "not", Xi64>;
-}
+def NEG8m  : NegOpM_MF<Xi8>;
+def NEG16m : NegOpM_MF<Xi16>, OpSize16;
+def NEG32m : NegOpM_MF<Xi32>, OpSize32;
+def NEG64m : NegOpM_MF<Xi64>, Requires<[In64BitMode]>;
 
-def NOT8m  : NotOpM<0xF6, "not", Xi8>;
-def NOT16m : NotOpM<0xF7, "not", Xi16>, OpSize16;
-def NOT32m : NotOpM<0xF7, "not", Xi32>, OpSize32;
-def NOT64m : NotOpM<0xF7, "not", Xi64>, Requires<[In64BitMode]>;
+def NOT8m  : NotOpM_M<Xi8>;
+def NOT16m : NotOpM_M<Xi16>, OpSize16;
+def NOT32m : NotOpM_M<Xi32>, OpSize32;
+def NOT64m : NotOpM_M<Xi64>, Requires<[In64BitMode]>;
 
 /// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
 /// defined with "(set GPR:$dst, EFLAGS, (...".
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index efb58c6102dd1..699e5847e63fb 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -446,11 +446,11 @@ let Predicates = [HasUSERMSR], mayLoad = 1 in {
 }
 let Predicates = [HasUSERMSR], mayStore = 1 in {
   def UWRMSRrr : I<0xf8, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
-                "uwrmsr\t{$src1, $src2|$src2, $src1}",
+                "uwrmsr\t{$src2, $src1|$src1, $src2}",
                 [(int_x86_uwrmsr GR64:$src1, GR64:$src2)]>, T8, XS;
   def UWRMSRir : Ii32<0xf8, MRM0r, (outs), (ins GR64:$src, i64i32imm:$imm),
                 "uwrmsr\t{$src, $imm|$imm, $src}",
-                [(int_x86_uwrmsr GR64:$src, i64immSExt32_su:$imm)]>, T_MAP7, XS, VEX;
+                [(int_x86_uwrmsr i64immSExt32_su:$imm, GR64:$src)]>, T_MAP7, XS, VEX;
 }
 let Defs = [RAX, RDX], Uses = [ECX] in
 def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
diff --git a/llvm/lib/Target/X86/X86InstrUtils.td b/llvm/lib/Target/X86/X86InstrUtils.td
index 89f5653c04f2d..b7d2d8096ff54 100644
--- a/llvm/lib/Target/X86/X86InstrUtils.td
+++ b/llvm/lib/Target/X86/X86InstrUtils.td
@@ -99,17 +99,6 @@ class DisassembleOnly {
   bit ForceDisassemble = 1;
 }
 
-
-// SchedModel info for instruction that loads one value and gets the second
-// (and possibly third) value from a register.
-// This is used for instructions that put the memory operands before other
-// uses.
-class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded,
-  // Memory operand.
-  ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
-  // Register reads (implicit or explicit).
-  Sched.ReadAfterFold, Sched.ReadAfterFold]>;
-
 //===----------------------------------------------------------------------===//
 // X86 Type infomation definitions
 //===----------------------------------------------------------------------===//
@@ -957,16 +946,15 @@ class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 /// 2. Infers whether the instruction should have a 0x40 REX_W prefix.
 /// 3. Infers whether the low bit of the opcode should be 0 (for i8 operations)
 ///    or 1 (for i16,i32,i64 operations).
-class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
-          string mnemonic, string args, list<dag> pattern>
-  : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
-       opcode{3}, opcode{2}, opcode{1},
-       !if(!eq(typeinfo.HasEvenOpcode, 1), 0, opcode{0})}, f, outs, ins,
-      !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
-
+class ITy<bits<8> o, Format f, X86TypeInfo t, dag outs, dag ins, string m,
+          string args, list<dag> p>
+  : I<{o{7}, o{6}, o{5}, o{4}, o{3}, o{2}, o{1},
+       !if(!eq(t.HasEvenOpcode, 1), 0, o{0})}, f, outs, ins,
+      !strconcat(m, "{", t.InstrSuffix, "}\t", args), p> {
   let hasSideEffects = 0;
-  let hasREX_W  = typeinfo.HasREX_W;
+  let hasREX_W  = t.HasREX_W;
 }
 
+defvar unaryop_args = "$src";
 defvar binop_args = "{$src2, $src1|$src1, $src2}";
 defvar binop_ndd_args = "{$src2, $src1, $dst|$dst, $src1, $src2}";
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 8a04987e768a1..e09dc7ff02a07 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1459,6 +1459,15 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
                                        Args, CxtI);
 }
 
+InstructionCost
+X86TTIImpl::getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
+                            unsigned Opcode1, const SmallBitVector &OpcodeMask,
+                            TTI::TargetCostKind CostKind) const {
+  if (isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask))
+    return TTI::TCC_Basic;
+  return InstructionCost::getInvalid();
+}
+
 InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                            VectorType *BaseTp,
                                            ArrayRef<int> Mask,
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 0fa0d240a548b..07a3fff4f84b3 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -140,6 +140,11 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
       const Instruction *CxtI = nullptr);
+  InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
+                                  unsigned Opcode1,
+                                  const SmallBitVector &OpcodeMask,
+                                  TTI::TargetCostKind CostKind) const;
+
   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
                                  ArrayRef<int> Mask,
                                  TTI::TargetCostKind CostKind, int Index,
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 7c277518b21db..9ce9f8451a95f 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1629,7 +1629,10 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
     for (auto &I : BB.instructionsWithoutDebug())
       if (auto *CB = dyn_cast<CallBase>(&I)) {
         Function *Callee = CB->getCalledFunction();
-        if (!Callee || Callee == F || !Callee->doesNotRecurse())
+        if (!Callee || Callee == F ||
+            (!Callee->doesNotRecurse() &&
+             !(Callee->isDeclaration() &&
+               Callee->hasFnAttribute(Attribute::NoCallback))))
           // Function calls a potentially recursive function.
           return;
       }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 719a2678fc189..556fde37efeb2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1685,8 +1685,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
       assert(NotLHS != nullptr && NotRHS != nullptr &&
              "isFreeToInvert desynced with getFreelyInverted");
       Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS);
-      return BinaryOperator::CreateSub(ConstantInt::get(RHS->getType(), -2),
-                                       LHSPlusRHS);
+      return BinaryOperator::CreateSub(
+          ConstantInt::getSigned(RHS->getType(), -2), LHSPlusRHS);
     }
   }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 289976718e52f..0222c93faf24e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4966,7 +4966,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
 
     case Instruction::SDiv:
-      if (!I.isEquality() || !BO0->isExact() || !BO1->isExact())
+      if (!(I.isEquality() || match(BO0->getOperand(1), m_NonNegative())) ||
+          !BO0->isExact() || !BO1->isExact())
         break;
       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
 
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 32913b3f55697..c0ace2996c32c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8428,6 +8428,25 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
           Mask);
       VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
                                      FinalVecTy, Mask);
+      // Patterns like [fadd,fsub] can be combined into a single instruction
+      // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we
+      // need to take into account their order when looking for the most used
+      // order.
+      unsigned Opcode0 = E->getOpcode();
+      unsigned Opcode1 = E->getAltOpcode();
+      // The opcode mask selects between the two opcodes.
+      SmallBitVector OpcodeMask(E->Scalars.size(), false);
+      for (unsigned Lane : seq<unsigned>(0, E->Scalars.size()))
+        if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1)
+          OpcodeMask.set(Lane);
+      // If this pattern is supported by the target then we consider the
+      // order.
+      if (TTI->isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
+        InstructionCost AltVecCost =
+            TTI->getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
+        return AltVecCost < VecCost ? AltVecCost : VecCost;
+      }
+      // TODO: Check the reverse order too.
       return VecCost;
     };
     return GetCostDiff(GetScalarCost, GetVectorCost);
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index 86e7f8c113d1d..bea56a72bdeae 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -43,13 +43,13 @@ define void @test1_no(ptr %p) nounwind {
 ; This is unusual, since the function is memcpy, but as above, this
 ; isn't necessarily invalid.
 
-; CHECK: define void @test2_yes(ptr nocapture %p, ptr nocapture %q, i64 %n) #4 {
+; CHECK: define void @test2_yes(ptr nocapture %p, ptr nocapture %q, i64 %n) #0 {
 define void @test2_yes(ptr %p, ptr %q, i64 %n) nounwind {
   call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %q, i64 %n, i1 false), !tbaa !1
   ret void
 }
 
-; CHECK: define void @test2_no(ptr nocapture writeonly %p, ptr nocapture readonly %q, i64 %n) #5 {
+; CHECK: define void @test2_no(ptr nocapture writeonly %p, ptr nocapture readonly %q, i64 %n) #4 {
 define void @test2_no(ptr %p, ptr %q, i64 %n) nounwind {
   call void @llvm.memcpy.p0.p0.i64(ptr %p, ptr %q, i64 %n, i1 false), !tbaa !2
   ret void
@@ -63,7 +63,7 @@ define i32 @test3_yes(ptr %p) nounwind {
   ret i32 %t
 }
 
-; CHECK: define i32 @test3_no(ptr nocapture %p) #6 {
+; CHECK: define i32 @test3_no(ptr nocapture %p) #4 {
 define i32 @test3_no(ptr %p) nounwind {
   %t = va_arg ptr %p, i32, !tbaa !2
   ret i32 %t
@@ -76,10 +76,8 @@ declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) nounwind
 ; CHECK: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) }
 ; CHECK: attributes #2 = { nofree nosync nounwind memory(none) }
 ; CHECK: attributes #3 = { nounwind }
-; CHECK: attributes #4 = { mustprogress nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #5 = { mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; CHECK: attributes #6 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) }
-; CHECK: attributes #7 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; CHECK: attributes #4 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) }
+; CHECK: attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 
 ; Root note.
 !0 = !{ }
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index e43fcef30b00e..b2fc477d8655a 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -1789,6 +1789,26 @@ define <8 x i1> @not_cmle8xi8(<8 x i8> %0) {
   ret <8 x i1> %cmp.i
 }
 
+define <4 x i1> @not_cmle16xi8(<4 x i32> %0) {
+; CHECK-SD-LABEL: not_cmle16xi8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    cmle v0.4s, v0.4s, #0
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: not_cmle16xi8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI134_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI134_0]
+; CHECK-GI-NEXT:    cmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
+entry:
+  %bc = bitcast <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0> to <4 x i32>
+  %cmp.i = icmp slt <4 x i32> %0, %bc
+  ret <4 x i1> %cmp.i
+}
+
 define <8 x i8> @cmltz8xi8_alt(<8 x i8> %A) {
 ; CHECK-SD-LABEL: cmltz8xi8_alt:
 ; CHECK-SD:       // %bb.0:
@@ -2082,8 +2102,8 @@ define <2 x i64> @cmhsz2xi64(<2 x i64> %A) {
 ;
 ; CHECK-GI-LABEL: cmhsz2xi64:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI154_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI154_0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI155_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI155_0]
 ; CHECK-GI-NEXT:    cmhs v0.2d, v0.2d, v1.2d
 ; CHECK-GI-NEXT:    ret
   %tmp3 = icmp uge <2 x i64> %A, <i64 2, i64 2>
@@ -2168,8 +2188,8 @@ define <2 x i64> @cmhiz2xi64(<2 x i64> %A) {
 ;
 ; CHECK-GI-LABEL: cmhiz2xi64:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI161_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI161_0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI162_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI162_0]
 ; CHECK-GI-NEXT:    cmhi v0.2d, v0.2d, v1.2d
 ; CHECK-GI-NEXT:    ret
   %tmp3 = icmp ugt <2 x i64> %A, <i64 1, i64 1>
@@ -2344,8 +2364,8 @@ define <2 x i64> @cmloz2xi64(<2 x i64> %A) {
 ;
 ; CHECK-GI-LABEL: cmloz2xi64:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI175_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI175_0]
+; CHECK-GI-NEXT:    adrp x8, .LCPI176_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI176_0]
 ; CHECK-GI-NEXT:    cmhi v0.2d, v1.2d, v0.2d
 ; CHECK-GI-NEXT:    ret
   %tmp3 = icmp ult <2 x i64> %A, <i64 2, i64 2>
diff --git a/llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll b/llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll
index 1d3371cce833a..b649b2ba16147 100644
--- a/llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll
@@ -1,9 +1,61 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=m68k-linux -verify-machineinstrs | FileCheck %s
 
+define zeroext i8 @smul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+; CHECK-LABEL: smul_i8:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    move.b (11,%sp), %d0
+; CHECK-NEXT:    and.l #255, %d0
+; CHECK-NEXT:    move.b (7,%sp), %d1
+; CHECK-NEXT:    and.l #255, %d1
+; CHECK-NEXT:    muls %d0, %d1
+; CHECK-NEXT:    move.l %d1, %d0
+; CHECK-NEXT:    and.l #65535, %d0
+; CHECK-NEXT:    and.l #255, %d0
+; CHECK-NEXT:    rts
+entry:
+  %smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %smul, 1
+  %smul.result = extractvalue { i8, i1 } %smul, 0
+  %X = select i1 %cmp, i8 42, i8 %smul.result
+  ret i8 %X
+}
+
+define zeroext i8 @smul_i8_no_ovf(i8 signext %a, i8 signext %b) nounwind ssp {
+; CHECK-LABEL: smul_i8_no_ovf:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    move.l #42, %d0
+; CHECK-NEXT:    rts
+entry:
+  %smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %smul, 1
+  %smul.result = extractvalue { i8, i1 } %smul, 0
+  %X = select i1 %cmp, i8 %smul.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.smul.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @smul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+; CHECK-LABEL: smul_i16:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    move.w (6,%sp), %d0
+; CHECK-NEXT:    move.w (10,%sp), %d1
+; CHECK-NEXT:    muls %d1, %d0
+; CHECK-NEXT:    and.l #65535, %d0
+; CHECK-NEXT:    rts
+entry:
+  %smul = tail call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %smul, 1
+  %smul.result = extractvalue { i16, i1 } %smul, 0
+  %X = select i1 %cmp, i16 42, i16 %smul.result
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
+
 declare i32 @printf(i8*, ...) nounwind
 declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
-declare { i63, i1 } @llvm.smul.with.overflow.i63(i63, i63)
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
@@ -11,37 +63,23 @@ declare { i63, i1 } @llvm.smul.with.overflow.i63(i63, i63)
 define fastcc i1 @test1(i32 %v1, i32 %v2) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    suba.l #28, %sp
-; CHECK-NEXT:    movem.l %d2-%d3, (20,%sp) ; 12-byte Folded Spill
-; CHECK-NEXT:    move.l %d1, (12,%sp)
-; CHECK-NEXT:    move.l #31, %d2
-; CHECK-NEXT:    asr.l %d2, %d1
-; CHECK-NEXT:    move.l %d1, (8,%sp)
-; CHECK-NEXT:    move.l %d0, (4,%sp)
-; CHECK-NEXT:    asr.l %d2, %d0
-; CHECK-NEXT:    move.l %d0, (%sp)
-; CHECK-NEXT:    jsr __muldi3@PLT
-; CHECK-NEXT:    move.l %d1, %d3
-; CHECK-NEXT:    asr.l %d2, %d3
-; CHECK-NEXT:    sub.l %d3, %d0
-; CHECK-NEXT:    sne %d0
-; CHECK-NEXT:    cmpi.b #0, %d0
-; CHECK-NEXT:    beq .LBB0_1
+; CHECK-NEXT:    suba.l #12, %sp
+; CHECK-NEXT:    muls.l %d1, %d0
+; CHECK-NEXT:    bvc .LBB3_1
 ; CHECK-NEXT:  ; %bb.2: ; %overflow
 ; CHECK-NEXT:    lea (no,%pc), %a0
 ; CHECK-NEXT:    move.l %a0, (%sp)
 ; CHECK-NEXT:    jsr printf@PLT
 ; CHECK-NEXT:    move.b #0, %d0
-; CHECK-NEXT:    bra .LBB0_3
-; CHECK-NEXT:  .LBB0_1: ; %normal
-; CHECK-NEXT:    move.l %d1, (4,%sp)
+; CHECK-NEXT:    adda.l #12, %sp
+; CHECK-NEXT:    rts
+; CHECK-NEXT:  .LBB3_1: ; %normal
+; CHECK-NEXT:    move.l %d0, (4,%sp)
 ; CHECK-NEXT:    lea (ok,%pc), %a0
 ; CHECK-NEXT:    move.l %a0, (%sp)
 ; CHECK-NEXT:    jsr printf@PLT
 ; CHECK-NEXT:    move.b #1, %d0
-; CHECK-NEXT:  .LBB0_3: ; %overflow
-; CHECK-NEXT:    movem.l (20,%sp), %d2-%d3 ; 12-byte Folded Reload
-; CHECK-NEXT:    adda.l #28, %sp
+; CHECK-NEXT:    adda.l #12, %sp
 ; CHECK-NEXT:    rts
 entry:
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
@@ -61,37 +99,25 @@ overflow:
 define fastcc i1 @test2(i32 %v1, i32 %v2) nounwind {
 ; CHECK-LABEL: test2:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    suba.l #28, %sp
-; CHECK-NEXT:    movem.l %d2-%d3, (20,%sp) ; 12-byte Folded Spill
-; CHECK-NEXT:    move.l %d1, (12,%sp)
-; CHECK-NEXT:    move.l #31, %d2
-; CHECK-NEXT:    asr.l %d2, %d1
-; CHECK-NEXT:    move.l %d1, (8,%sp)
-; CHECK-NEXT:    move.l %d0, (4,%sp)
-; CHECK-NEXT:    asr.l %d2, %d0
-; CHECK-NEXT:    move.l %d0, (%sp)
-; CHECK-NEXT:    jsr __muldi3@PLT
-; CHECK-NEXT:    move.l %d1, %d3
-; CHECK-NEXT:    asr.l %d2, %d3
-; CHECK-NEXT:    sub.l %d3, %d0
-; CHECK-NEXT:    sne %d0
-; CHECK-NEXT:    sub.b #1, %d0
-; CHECK-NEXT:    bne .LBB1_3
+; CHECK-NEXT:    suba.l #12, %sp
+; CHECK-NEXT:    muls.l %d1, %d0
+; CHECK-NEXT:    svs %d1
+; CHECK-NEXT:    sub.b #1, %d1
+; CHECK-NEXT:    bne .LBB4_2
 ; CHECK-NEXT:  ; %bb.1: ; %overflow
 ; CHECK-NEXT:    lea (no,%pc), %a0
 ; CHECK-NEXT:    move.l %a0, (%sp)
 ; CHECK-NEXT:    jsr printf@PLT
 ; CHECK-NEXT:    move.b #0, %d0
-; CHECK-NEXT:    bra .LBB1_2
-; CHECK-NEXT:  .LBB1_3: ; %normal
-; CHECK-NEXT:    move.l %d1, (4,%sp)
+; CHECK-NEXT:    adda.l #12, %sp
+; CHECK-NEXT:    rts
+; CHECK-NEXT:  .LBB4_2: ; %normal
+; CHECK-NEXT:    move.l %d0, (4,%sp)
 ; CHECK-NEXT:    lea (ok,%pc), %a0
 ; CHECK-NEXT:    move.l %a0, (%sp)
 ; CHECK-NEXT:    jsr printf@PLT
 ; CHECK-NEXT:    move.b #1, %d0
-; CHECK-NEXT:  .LBB1_2: ; %overflow
-; CHECK-NEXT:    movem.l (20,%sp), %d2-%d3 ; 12-byte Folded Reload
-; CHECK-NEXT:    adda.l #28, %sp
+; CHECK-NEXT:    adda.l #12, %sp
 ; CHECK-NEXT:    rts
 entry:
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
@@ -129,7 +155,8 @@ define i32 @test4(i32 %a, i32 %b) nounwind readnone {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    move.l (8,%sp), %d0
 ; CHECK-NEXT:    add.l (4,%sp), %d0
-; CHECK-NEXT:    lsl.l #2, %d0
+; CHECK-NEXT:    move.l #4, %d1
+; CHECK-NEXT:    muls.l %d1, %d0
 ; CHECK-NEXT:    rts
 entry:
 	%tmp0 = add i32 %b, %a
diff --git a/llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll b/llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll
index 16dc1036fd284..fd128a3e52bd3 100644
--- a/llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll
+++ b/llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll
@@ -1,20 +1,68 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=m68k -verify-machineinstrs | FileCheck %s
 
+define zeroext i8 @umul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+; CHECK-LABEL: umul_i8:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    move.b (11,%sp), %d0
+; CHECK-NEXT:    and.l #255, %d0
+; CHECK-NEXT:    move.b (7,%sp), %d1
+; CHECK-NEXT:    and.l #255, %d1
+; CHECK-NEXT:    muls %d0, %d1
+; CHECK-NEXT:    move.l %d1, %d0
+; CHECK-NEXT:    and.l #65535, %d0
+; CHECK-NEXT:    and.l #255, %d0
+; CHECK-NEXT:    rts
+entry:
+  %umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %umul, 1
+  %umul.result = extractvalue { i8, i1 } %umul, 0
+  %X = select i1 %cmp, i8 42, i8 %umul.result
+  ret i8 %X
+}
+
+define zeroext i8 @umul_i8_no_ovf(i8 signext %a, i8 signext %b) nounwind ssp {
+; CHECK-LABEL: umul_i8_no_ovf:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    move.l #42, %d0
+; CHECK-NEXT:    rts
+entry:
+  %umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %umul, 1
+  %umul.result = extractvalue { i8, i1 } %umul, 0
+  %X = select i1 %cmp, i8 %umul.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @umul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+; CHECK-LABEL: umul_i16:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    move.w (6,%sp), %d0
+; CHECK-NEXT:    move.w (10,%sp), %d1
+; CHECK-NEXT:    muls %d1, %d0
+; CHECK-NEXT:    and.l #65535, %d0
+; CHECK-NEXT:    rts
+entry:
+  %umul = tail call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %umul, 1
+  %umul.result = extractvalue { i16, i1 } %umul, 0
+  %X = select i1 %cmp, i16 42, i16 %umul.result
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
+
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 
 define i1 @a(i32 %x)  nounwind {
 ; CHECK-LABEL: a:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    suba.l #20, %sp
-; CHECK-NEXT:    move.l #3, (12,%sp)
-; CHECK-NEXT:    move.l #0, (8,%sp)
-; CHECK-NEXT:    move.l (24,%sp), (4,%sp)
-; CHECK-NEXT:    move.l #0, (%sp)
-; CHECK-NEXT:    jsr __muldi3@PLT
-; CHECK-NEXT:    cmpi.l #0, %d0
-; CHECK-NEXT:    sne %d0
-; CHECK-NEXT:    adda.l #20, %sp
+; CHECK-NEXT:    move.l #3, %d0
+; CHECK-NEXT:    move.l (4,%sp), %d1
+; CHECK-NEXT:    mulu.l %d0, %d1
+; CHECK-NEXT:    svs %d0
 ; CHECK-NEXT:    rts
   %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
   %obil = extractvalue {i32, i1} %res, 1
@@ -42,7 +90,8 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    move.l (8,%sp), %d0
 ; CHECK-NEXT:    add.l (4,%sp), %d0
-; CHECK-NEXT:    lsl.l #2, %d0
+; CHECK-NEXT:    move.l #4, %d1
+; CHECK-NEXT:    mulu.l %d1, %d0
 ; CHECK-NEXT:    rts
 entry:
 	%tmp0 = add i32 %b, %a
diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index 8a0c4240d161b..bfa2c3bb4a8ba 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -39,8 +39,6 @@ define i16 @fcvt_si_bf16(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_si_bf16:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.l.s a0, fa5, rtz
@@ -100,8 +98,6 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_si_bf16_sat:
 ; RV64ID:       # %bb.0: # %start
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    feq.s a0, fa5, fa5
@@ -145,8 +141,6 @@ define i16 @fcvt_ui_bf16(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_ui_bf16:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
@@ -196,8 +190,6 @@ define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind {
 ; RV64ID-NEXT:    lui a0, %hi(.LCPI3_0)
 ; RV64ID-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa4, a0
 ; RV64ID-NEXT:    fmv.w.x fa3, zero
@@ -235,8 +227,6 @@ define i32 @fcvt_w_bf16(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_w_bf16:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.l.s a0, fa5, rtz
@@ -281,8 +271,6 @@ define i32 @fcvt_w_bf16_sat(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_w_bf16_sat:
 ; RV64ID:       # %bb.0: # %start
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.w.s a0, fa5, rtz
@@ -321,8 +309,6 @@ define i32 @fcvt_wu_bf16(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_wu_bf16:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
@@ -361,8 +347,6 @@ define i32 @fcvt_wu_bf16_multiple_use(bfloat %x, ptr %y) nounwind {
 ; RV64ID-LABEL: fcvt_wu_bf16_multiple_use:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
@@ -413,8 +397,6 @@ define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_wu_bf16_sat:
 ; RV64ID:       # %bb.0: # %start
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.wu.s a0, fa5, rtz
@@ -463,8 +445,6 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_l_bf16:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.l.s a0, fa5, rtz
@@ -606,8 +586,6 @@ define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_l_bf16_sat:
 ; RV64ID:       # %bb.0: # %start
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.l.s a0, fa5, rtz
@@ -654,8 +632,6 @@ define i64 @fcvt_lu_bf16(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_lu_bf16:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
@@ -730,8 +706,6 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_lu_bf16_sat:
 ; RV64ID:       # %bb.0: # %start
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
@@ -1200,8 +1174,6 @@ define float @fcvt_s_bf16(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_s_bf16:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa0, a0
 ; RV64ID-NEXT:    ret
@@ -1313,8 +1285,6 @@ define double @fcvt_d_bf16(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_d_bf16:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.d.s fa0, fa5
@@ -1521,8 +1491,6 @@ define signext i8 @fcvt_w_s_i8(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_w_s_i8:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.l.s a0, fa5, rtz
@@ -1582,8 +1550,6 @@ define signext i8 @fcvt_w_s_sat_i8(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_w_s_sat_i8:
 ; RV64ID:       # %bb.0: # %start
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    feq.s a0, fa5, fa5
@@ -1627,8 +1593,6 @@ define zeroext i8 @fcvt_wu_s_i8(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_wu_s_i8:
 ; RV64ID:       # %bb.0:
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
@@ -1676,8 +1640,6 @@ define zeroext i8 @fcvt_wu_s_sat_i8(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_wu_s_sat_i8:
 ; RV64ID:       # %bb.0: # %start
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fmv.w.x fa4, zero
@@ -1731,8 +1693,6 @@ define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_wu_bf16_sat_zext:
 ; RV64ID:       # %bb.0: # %start
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.wu.s a0, fa5, rtz
@@ -1784,8 +1744,6 @@ define signext i32 @fcvt_w_bf16_sat_sext(bfloat %a) nounwind {
 ; RV64ID-LABEL: fcvt_w_bf16_sat_sext:
 ; RV64ID:       # %bb.0: # %start
 ; RV64ID-NEXT:    fmv.x.w a0, fa0
-; RV64ID-NEXT:    slli a0, a0, 48
-; RV64ID-NEXT:    srli a0, a0, 48
 ; RV64ID-NEXT:    slli a0, a0, 16
 ; RV64ID-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-NEXT:    fcvt.w.s a0, fa5, rtz
diff --git a/llvm/test/CodeGen/RISCV/bfloat.ll b/llvm/test/CodeGen/RISCV/bfloat.ll
index 5013f76f9b0b3..d62f35388123f 100644
--- a/llvm/test/CodeGen/RISCV/bfloat.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat.ll
@@ -164,8 +164,6 @@ define float @bfloat_to_float(bfloat %a) nounwind {
 ;
 ; RV64ID-LP64-LABEL: bfloat_to_float:
 ; RV64ID-LP64:       # %bb.0:
-; RV64ID-LP64-NEXT:    slli a0, a0, 48
-; RV64ID-LP64-NEXT:    srli a0, a0, 48
 ; RV64ID-LP64-NEXT:    slli a0, a0, 16
 ; RV64ID-LP64-NEXT:    ret
 ;
@@ -179,8 +177,6 @@ define float @bfloat_to_float(bfloat %a) nounwind {
 ; RV64ID-LP64D-LABEL: bfloat_to_float:
 ; RV64ID-LP64D:       # %bb.0:
 ; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
-; RV64ID-LP64D-NEXT:    slli a0, a0, 48
-; RV64ID-LP64D-NEXT:    srli a0, a0, 48
 ; RV64ID-LP64D-NEXT:    slli a0, a0, 16
 ; RV64ID-LP64D-NEXT:    fmv.w.x fa0, a0
 ; RV64ID-LP64D-NEXT:    ret
@@ -223,8 +219,6 @@ define double @bfloat_to_double(bfloat %a) nounwind {
 ;
 ; RV64ID-LP64-LABEL: bfloat_to_double:
 ; RV64ID-LP64:       # %bb.0:
-; RV64ID-LP64-NEXT:    slli a0, a0, 48
-; RV64ID-LP64-NEXT:    srli a0, a0, 48
 ; RV64ID-LP64-NEXT:    slli a0, a0, 16
 ; RV64ID-LP64-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-LP64-NEXT:    fcvt.d.s fa5, fa5
@@ -242,8 +236,6 @@ define double @bfloat_to_double(bfloat %a) nounwind {
 ; RV64ID-LP64D-LABEL: bfloat_to_double:
 ; RV64ID-LP64D:       # %bb.0:
 ; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
-; RV64ID-LP64D-NEXT:    slli a0, a0, 48
-; RV64ID-LP64D-NEXT:    srli a0, a0, 48
 ; RV64ID-LP64D-NEXT:    slli a0, a0, 16
 ; RV64ID-LP64D-NEXT:    fmv.w.x fa5, a0
 ; RV64ID-LP64D-NEXT:    fcvt.d.s fa0, fa5
@@ -366,10 +358,6 @@ define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
 ; RV64ID-LP64:       # %bb.0:
 ; RV64ID-LP64-NEXT:    addi sp, sp, -16
 ; RV64ID-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64ID-LP64-NEXT:    lui a2, 16
-; RV64ID-LP64-NEXT:    addi a2, a2, -1
-; RV64ID-LP64-NEXT:    and a0, a0, a2
-; RV64ID-LP64-NEXT:    and a1, a1, a2
 ; RV64ID-LP64-NEXT:    slli a1, a1, 16
 ; RV64ID-LP64-NEXT:    fmv.w.x fa5, a1
 ; RV64ID-LP64-NEXT:    slli a0, a0, 16
@@ -408,11 +396,7 @@ define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
 ; RV64ID-LP64D-NEXT:    addi sp, sp, -16
 ; RV64ID-LP64D-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
-; RV64ID-LP64D-NEXT:    lui a1, 16
-; RV64ID-LP64D-NEXT:    addi a1, a1, -1
-; RV64ID-LP64D-NEXT:    and a0, a0, a1
-; RV64ID-LP64D-NEXT:    fmv.x.w a2, fa1
-; RV64ID-LP64D-NEXT:    and a1, a2, a1
+; RV64ID-LP64D-NEXT:    fmv.x.w a1, fa1
 ; RV64ID-LP64D-NEXT:    slli a1, a1, 16
 ; RV64ID-LP64D-NEXT:    fmv.w.x fa5, a1
 ; RV64ID-LP64D-NEXT:    slli a0, a0, 16
@@ -604,12 +588,8 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
 ; RV64ID-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64ID-LP64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
 ; RV64ID-LP64-NEXT:    mv s0, a0
-; RV64ID-LP64-NEXT:    lui a0, 16
-; RV64ID-LP64-NEXT:    addi a0, a0, -1
-; RV64ID-LP64-NEXT:    and a1, a1, a0
-; RV64ID-LP64-NEXT:    and a0, a2, a0
-; RV64ID-LP64-NEXT:    slli a0, a0, 16
-; RV64ID-LP64-NEXT:    fmv.w.x fa5, a0
+; RV64ID-LP64-NEXT:    slli a2, a2, 16
+; RV64ID-LP64-NEXT:    fmv.w.x fa5, a2
 ; RV64ID-LP64-NEXT:    slli a1, a1, 16
 ; RV64ID-LP64-NEXT:    fmv.w.x fa4, a1
 ; RV64ID-LP64-NEXT:    fadd.s fa5, fa4, fa5
@@ -651,11 +631,7 @@ define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
 ; RV64ID-LP64D-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
 ; RV64ID-LP64D-NEXT:    mv s0, a0
 ; RV64ID-LP64D-NEXT:    fmv.x.w a0, fa0
-; RV64ID-LP64D-NEXT:    lui a1, 16
-; RV64ID-LP64D-NEXT:    addi a1, a1, -1
-; RV64ID-LP64D-NEXT:    and a0, a0, a1
-; RV64ID-LP64D-NEXT:    fmv.x.w a2, fa1
-; RV64ID-LP64D-NEXT:    and a1, a2, a1
+; RV64ID-LP64D-NEXT:    fmv.x.w a1, fa1
 ; RV64ID-LP64D-NEXT:    slli a1, a1, 16
 ; RV64ID-LP64D-NEXT:    fmv.w.x fa5, a1
 ; RV64ID-LP64D-NEXT:    slli a0, a0, 16
diff --git a/llvm/test/CodeGen/X86/usermsr-intrinsics.ll b/llvm/test/CodeGen/X86/usermsr-intrinsics.ll
index 29801a494f498..fa569affdd9ff 100644
--- a/llvm/test/CodeGen/X86/usermsr-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/usermsr-intrinsics.ll
@@ -35,7 +35,7 @@ declare i64 @llvm.x86.urdmsr(i64 %A)
 define void @test_int_x86_uwrmsr(i64 %A, i64 %B) nounwind {
 ; X64-LABEL: test_int_x86_uwrmsr:
 ; X64:       # %bb.0:
-; X64-NEXT:    uwrmsr %rdi, %rsi # encoding: [0xf3,0x0f,0x38,0xf8,0xfe]
+; X64-NEXT:    uwrmsr %rsi, %rdi # encoding: [0xf3,0x0f,0x38,0xf8,0xfe]
 ; X64-NEXT:    retq # encoding: [0xc3]
   call void @llvm.x86.uwrmsr(i64 %A, i64 %B)
   ret void
@@ -46,7 +46,7 @@ define void @test_int_x86_uwrmsr_const(i64 %A) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    uwrmsr %rdi, $123 # encoding: [0xc4,0xe7,0x7a,0xf8,0xc7,0x7b,0x00,0x00,0x00]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  call void @llvm.x86.uwrmsr(i64 %A, i64 123)
+  call void @llvm.x86.uwrmsr(i64 123, i64 %A)
   ret void
 }
 
@@ -55,9 +55,9 @@ define void @test_int_x86_uwrmsr_const_i64(i64 %A) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movabsq $8589934591, %rax # encoding: [0x48,0xb8,0xff,0xff,0xff,0xff,0x01,0x00,0x00,0x00]
 ; X64-NEXT:    # imm = 0x1FFFFFFFF
-; X64-NEXT:    uwrmsr %rdi, %rax # encoding: [0xf3,0x0f,0x38,0xf8,0xf8]
+; X64-NEXT:    uwrmsr %rdi, %rax # encoding: [0xf3,0x0f,0x38,0xf8,0xc7]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  call void @llvm.x86.uwrmsr(i64 %A, i64 8589934591)
+  call void @llvm.x86.uwrmsr(i64 8589934591, i64 %A)
   ret void
 }
 
diff --git a/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_subtractor_single_block.yaml b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_subtractor_single_block.yaml
index bf72750eece05..e45f2961d0174 100644
--- a/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_subtractor_single_block.yaml
+++ b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_subtractor_single_block.yaml
@@ -6,6 +6,7 @@
 # section).
 
 --- !mach-o
+IsLittleEndian: true
 FileHeader:
   magic:           0xFEEDFACF
   cputype:         0x100000C
diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_subtractor_single_block.yaml b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_subtractor_single_block.yaml
index 12542cf7c3142..05c16d1ad1ca3 100644
--- a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_subtractor_single_block.yaml
+++ b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_subtractor_single_block.yaml
@@ -6,6 +6,7 @@
 # section).
 
 --- !mach-o
+IsLittleEndian: true
 FileHeader:
   magic:           0xFEEDFACF
   cputype:         0x1000007
diff --git a/llvm/test/MC/LoongArch/Macros/macros-la.s b/llvm/test/MC/LoongArch/Macros/macros-la.s
index 924e4326b8e5d..1a1d12d7d7dfd 100644
--- a/llvm/test/MC/LoongArch/Macros/macros-la.s
+++ b/llvm/test/MC/LoongArch/Macros/macros-la.s
@@ -1,66 +1,128 @@
 # RUN: llvm-mc --triple=loongarch64 %s | FileCheck %s
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o %t
+# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=RELOC
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.relax
+# RUN: llvm-readobj -r %t.relax | FileCheck %s --check-prefixes=RELOC,RELAX
+
+# RELOC:      Relocations [
+# RELOC-NEXT:   Section ({{.*}}) .rela.text {
 
 la.abs $a0, sym_abs
 # CHECK:      lu12i.w $a0, %abs_hi20(sym_abs)
 # CHECK-NEXT: ori $a0, $a0, %abs_lo12(sym_abs)
 # CHECK-NEXT: lu32i.d $a0, %abs64_lo20(sym_abs)
 # CHECK-NEXT: lu52i.d $a0, $a0, %abs64_hi12(sym_abs)
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_ABS_HI20 sym_abs 0x0
+# RELOC-NEXT: R_LARCH_ABS_LO12 sym_abs 0x0
+# RELOC-NEXT: R_LARCH_ABS64_LO20 sym_abs 0x0
+# RELOC-NEXT: R_LARCH_ABS64_HI12 sym_abs 0x0
 
 la.pcrel $a0, sym_pcrel
-# CHECK:      pcalau12i $a0, %pc_hi20(sym_pcrel)
+# CHECK-NEXT: pcalau12i $a0, %pc_hi20(sym_pcrel)
 # CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(sym_pcrel)
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_PCALA_HI20 sym_pcrel 0x0
+# RELAX-NEXT: R_LARCH_RELAX - 0x0
+# RELOC-NEXT: R_LARCH_PCALA_LO12 sym_pcrel 0x0
+# RELAX-NEXT: R_LARCH_RELAX - 0x0
 
 la.pcrel $a0, $a1, sym_pcrel_large
-# CHECK:      pcalau12i $a0, %pc_hi20(sym_pcrel_large)
+# CHECK-NEXT: pcalau12i $a0, %pc_hi20(sym_pcrel_large)
 # CHECK-NEXT: addi.d $a1, $zero, %pc_lo12(sym_pcrel_large)
 # CHECK-NEXT: lu32i.d $a1, %pc64_lo20(sym_pcrel_large)
 # CHECK-NEXT: lu52i.d $a1, $a1, %pc64_hi12(sym_pcrel_large)
 # CHECK-NEXT: add.d $a0, $a0, $a1
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_PCALA_HI20 sym_pcrel_large 0x0
+# RELOC-NEXT: R_LARCH_PCALA_LO12 sym_pcrel_large 0x0
+# RELOC-NEXT: R_LARCH_PCALA64_LO20 sym_pcrel_large 0x0
+# RELOC-NEXT: R_LARCH_PCALA64_HI12 sym_pcrel_large 0x0
 
 la.got $a0, sym_got
-# CHECK:      pcalau12i $a0, %got_pc_hi20(sym_got)
+# CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(sym_got)
 # CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(sym_got)
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_GOT_PC_HI20 sym_got 0x0
+# RELAX-NEXT: R_LARCH_RELAX - 0x0
+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_got 0x0
+# RELAX-NEXT: R_LARCH_RELAX - 0x0
 
 la.got $a0, $a1, sym_got_large
-# CHECK:      pcalau12i $a0, %got_pc_hi20(sym_got_large)
+# CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(sym_got_large)
 # CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_got_large)
 # CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_got_large)
 # CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_got_large)
 # CHECK-NEXT: ldx.d $a0, $a0, $a1
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_GOT_PC_HI20 sym_got_large 0x0
+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_got_large 0x0
+# RELOC-NEXT: R_LARCH_GOT64_PC_LO20 sym_got_large 0x0
+# RELOC-NEXT: R_LARCH_GOT64_PC_HI12 sym_got_large 0x0
 
 la.tls.le $a0, sym_le
-# CHECK:      lu12i.w $a0, %le_hi20(sym_le)
+# CHECK-NEXT: lu12i.w $a0, %le_hi20(sym_le)
 # CHECK-NEXT: ori $a0, $a0, %le_lo12(sym_le)
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_TLS_LE_HI20 sym_le 0x0
+# RELOC-NEXT: R_LARCH_TLS_LE_LO12 sym_le 0x0
 
 la.tls.ie $a0, sym_ie
-# CHECK:      pcalau12i $a0, %ie_pc_hi20(sym_ie)
+# CHECK-NEXT: pcalau12i $a0, %ie_pc_hi20(sym_ie)
 # CHECK-NEXT: ld.d $a0, $a0, %ie_pc_lo12(sym_ie)
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_TLS_IE_PC_HI20 sym_ie 0x0
+# RELOC-NEXT: R_LARCH_TLS_IE_PC_LO12 sym_ie 0x0
 
 la.tls.ie $a0, $a1, sym_ie_large
-# CHECK:      pcalau12i $a0, %ie_pc_hi20(sym_ie_large)
+# CHECK-NEXT: pcalau12i $a0, %ie_pc_hi20(sym_ie_large)
 # CHECK-NEXT: addi.d $a1, $zero, %ie_pc_lo12(sym_ie_large)
 # CHECK-NEXT: lu32i.d $a1, %ie64_pc_lo20(sym_ie_large)
 # CHECK-NEXT: lu52i.d $a1, $a1, %ie64_pc_hi12(sym_ie_large)
 # CHECK-NEXT: ldx.d $a0, $a0, $a1
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_TLS_IE_PC_HI20 sym_ie_large 0x0
+# RELOC-NEXT: R_LARCH_TLS_IE_PC_LO12 sym_ie_large 0x0
+# RELOC-NEXT: R_LARCH_TLS_IE64_PC_LO20 sym_ie_large 0x0
+# RELOC-NEXT: R_LARCH_TLS_IE64_PC_HI12 sym_ie_large 0x0
 
 la.tls.ld $a0, sym_ld
-# CHECK:      pcalau12i $a0, %ld_pc_hi20(sym_ld)
+# CHECK-NEXT: pcalau12i $a0, %ld_pc_hi20(sym_ld)
 # CHECK-NEXT: addi.d $a0, $a0, %got_pc_lo12(sym_ld)
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_TLS_LD_PC_HI20 sym_ld 0x0
+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_ld 0x0
 
 la.tls.ld $a0, $a1, sym_ld_large
-# CHECK:      pcalau12i $a0, %ld_pc_hi20(sym_ld_large)
+# CHECK-NEXT: pcalau12i $a0, %ld_pc_hi20(sym_ld_large)
 # CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_ld_large)
 # CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_ld_large)
 # CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_ld_large)
 # CHECK-NEXT: add.d $a0, $a0, $a1
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_TLS_LD_PC_HI20 sym_ld_large 0x0
+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_ld_large 0x0
+# RELOC-NEXT: R_LARCH_GOT64_PC_LO20 sym_ld_large 0x0
+# RELOC-NEXT: R_LARCH_GOT64_PC_HI12 sym_ld_large 0x0
 
 la.tls.gd $a0, sym_gd
-# CHECK:      pcalau12i $a0, %gd_pc_hi20(sym_gd)
+# CHECK-NEXT: pcalau12i $a0, %gd_pc_hi20(sym_gd)
 # CHECK-NEXT: addi.d $a0, $a0, %got_pc_lo12(sym_gd)
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_TLS_GD_PC_HI20 sym_gd 0x0
+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_gd 0x0
 
 la.tls.gd $a0, $a1, sym_gd_large
-# CHECK:      pcalau12i $a0, %gd_pc_hi20(sym_gd_large)
+# CHECK-NEXT: pcalau12i $a0, %gd_pc_hi20(sym_gd_large)
 # CHECK-NEXT: addi.d $a1, $zero, %got_pc_lo12(sym_gd_large)
 # CHECK-NEXT: lu32i.d $a1, %got64_pc_lo20(sym_gd_large)
 # CHECK-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(sym_gd_large)
 # CHECK-NEXT: add.d $a0, $a0, $a1
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_TLS_GD_PC_HI20 sym_gd_large 0x0
+# RELOC-NEXT: R_LARCH_GOT_PC_LO12 sym_gd_large 0x0
+# RELOC-NEXT: R_LARCH_GOT64_PC_LO20 sym_gd_large 0x0
+# RELOC-NEXT: R_LARCH_GOT64_PC_HI12 sym_gd_large 0x0
+
+# RELOC-NEXT:   }
+# RELOC-NEXT: ]
diff --git a/llvm/test/MC/LoongArch/Misc/subsection.s b/llvm/test/MC/LoongArch/Misc/subsection.s
index 0bd22b474536c..566a2408d6913 100644
--- a/llvm/test/MC/LoongArch/Misc/subsection.s
+++ b/llvm/test/MC/LoongArch/Misc/subsection.s
@@ -1,5 +1,5 @@
 # RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,NORELAX --implicit-check-not=error:
-## TODO: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,RELAX --implicit-check-not=error:
+# RUN: not llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=ERR,RELAX --implicit-check-not=error:
 
 a:
   nop
diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s
index 532eb4e0561ac..c4454f5bb98d1 100644
--- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s
+++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s
@@ -18,7 +18,9 @@
 # RELAX:       Relocations [
 # RELAX-NEXT:    Section ({{.*}}) .rela.text {
 # RELAX-NEXT:      0x10 R_LARCH_PCALA_HI20 .L1 0x0
+# RELAX-NEXT:      0x10 R_LARCH_RELAX - 0x0
 # RELAX-NEXT:      0x14 R_LARCH_PCALA_LO12 .L1 0x0
+# RELAX-NEXT:      0x14 R_LARCH_RELAX - 0x0
 # RELAX-NEXT:    }
 # RELAX-NEXT:    Section ({{.*}}) .rela.data {
 # RELAX-NEXT:      0xF R_LARCH_ADD8 .L3 0x0
@@ -29,13 +31,21 @@
 # RELAX-NEXT:      0x12 R_LARCH_SUB32 .L2 0x0
 # RELAX-NEXT:      0x16 R_LARCH_ADD64 .L3 0x0
 # RELAX-NEXT:      0x16 R_LARCH_SUB64 .L2 0x0
+# RELAX-NEXT:      0x1E R_LARCH_ADD8 .L4 0x0
+# RELAX-NEXT:      0x1E R_LARCH_SUB8 .L3 0x0
+# RELAX-NEXT:      0x1F R_LARCH_ADD16 .L4 0x0
+# RELAX-NEXT:      0x1F R_LARCH_SUB16 .L3 0x0
+# RELAX-NEXT:      0x21 R_LARCH_ADD32 .L4 0x0
+# RELAX-NEXT:      0x21 R_LARCH_SUB32 .L3 0x0
+# RELAX-NEXT:      0x25 R_LARCH_ADD64 .L4 0x0
+# RELAX-NEXT:      0x25 R_LARCH_SUB64 .L3 0x0
 # RELAX-NEXT:    }
 # RELAX-NEXT:  ]
 
 # RELAX:      Hex dump of section '.data':
 # RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000000
-# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000808
-# RELAX-NEXT: 0x00000020 00080000 00080000 00000000 00
+# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000
+# RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00
 
 .text
 .L1:
@@ -60,8 +70,6 @@
 .short .L3 - .L2
 .word  .L3 - .L2
 .dword .L3 - .L2
-## TODO
-## With relaxation, emit relocs because la.pcrel is a linker-relaxable inst.
 .byte  .L4 - .L3
 .short .L4 - .L3
 .word  .L4 - .L3
diff --git a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll
index 7a968e4119b83..ea6392714bf6f 100644
--- a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll
+++ b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll
@@ -219,7 +219,7 @@ entry:
 }
 
 define void @test_memcpy_argonly(ptr %dst, ptr %src) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
 ; FNATTRS-LABEL: define void @test_memcpy_argonly
 ; FNATTRS-SAME: (ptr nocapture writeonly [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) #[[ATTR9:[0-9]+]] {
 ; FNATTRS-NEXT:  entry:
@@ -243,7 +243,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
 @arr = global [32 x i8] zeroinitializer
 
 define void @test_memcpy_src_global(ptr %dst) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(readwrite, inaccessiblemem: none)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, inaccessiblemem: none)
 ; FNATTRS-LABEL: define void @test_memcpy_src_global
 ; FNATTRS-SAME: (ptr nocapture writeonly [[DST:%.*]]) #[[ATTR11:[0-9]+]] {
 ; FNATTRS-NEXT:  entry:
@@ -263,7 +263,7 @@ entry:
 }
 
 define void @test_memcpy_dst_global(ptr %src) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(readwrite, inaccessiblemem: none)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, inaccessiblemem: none)
 ; FNATTRS-LABEL: define void @test_memcpy_dst_global
 ; FNATTRS-SAME: (ptr nocapture readonly [[SRC:%.*]]) #[[ATTR11]] {
 ; FNATTRS-NEXT:  entry:
diff --git a/llvm/test/Transforms/FunctionAttrs/convergent.ll b/llvm/test/Transforms/FunctionAttrs/convergent.ll
index 0263e0ec22551..a0f4c07e43371 100644
--- a/llvm/test/Transforms/FunctionAttrs/convergent.ll
+++ b/llvm/test/Transforms/FunctionAttrs/convergent.ll
@@ -74,7 +74,7 @@ declare void @llvm.nvvm.barrier0() convergent
 
 define i32 @intrinsic() convergent {
   ; Implicitly convergent, because the intrinsic is convergent.
-; CHECK: Function Attrs: convergent nounwind
+; CHECK: Function Attrs: convergent norecurse nounwind
 ; CHECK-LABEL: define {{[^@]+}}@intrinsic
 ; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.nvvm.barrier0()
diff --git a/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll b/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll
index 9ba82e2dc1cce..0f087e1a05f79 100644
--- a/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll
+++ b/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll
@@ -7,7 +7,7 @@ declare void @llvm.sideeffect()
 ; is present.
 
 define void @test() {
-; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite)
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:    call void @llvm.sideeffect()
 ; CHECK-NEXT:    ret void
@@ -17,7 +17,7 @@ define void @test() {
 }
 
 define void @loop() {
-; CHECK: Function Attrs: nofree noreturn nosync nounwind memory(inaccessiblemem: readwrite)
+; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind memory(inaccessiblemem: readwrite)
 ; CHECK-LABEL: @loop(
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
diff --git a/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll b/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll
index 17072bc433fbb..bb9ef9156794e 100644
--- a/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll
+++ b/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll
@@ -6,7 +6,7 @@
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8"
 
 define amdgpu_kernel void @test_make_buffer_rsrc(ptr %p, ptr %q) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
 ; FNATTRS-LABEL: define {{[^@]+}}@test_make_buffer_rsrc
 ; FNATTRS-SAME: (ptr nocapture readonly [[P:%.*]], ptr nocapture writeonly [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
 ; FNATTRS-NEXT:    [[P_RSRC:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr [[P]], i16 0, i32 4, i32 822243328)
diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll
index a70d71e62c305..eb999d69d95f1 100644
--- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll
@@ -650,7 +650,7 @@ entry:
 }
 
 define void @nocaptureLaunder(ptr %p) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: write, inaccessiblemem: readwrite)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write, inaccessiblemem: readwrite)
 ; FNATTRS-LABEL: define void @nocaptureLaunder
 ; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR13:[0-9]+]] {
 ; FNATTRS-NEXT:  entry:
@@ -674,7 +674,7 @@ entry:
 
 @g2 = global ptr null
 define void @captureLaunder(ptr %p) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: readwrite)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: readwrite)
 ; FNATTRS-LABEL: define void @captureLaunder
 ; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR14:[0-9]+]] {
 ; FNATTRS-NEXT:    [[B:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[P]])
@@ -694,7 +694,7 @@ define void @captureLaunder(ptr %p) {
 }
 
 define void @nocaptureStrip(ptr %p) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: write)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
 ; FNATTRS-LABEL: define void @nocaptureStrip
 ; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR15:[0-9]+]] {
 ; FNATTRS-NEXT:  entry:
@@ -718,9 +718,9 @@ entry:
 
 @g3 = global ptr null
 define void @captureStrip(ptr %p) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none)
 ; FNATTRS-LABEL: define void @captureStrip
-; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR16:[0-9]+]] {
+; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
 ; FNATTRS-NEXT:    [[B:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[P]])
 ; FNATTRS-NEXT:    store ptr [[B]], ptr @g3, align 8
 ; FNATTRS-NEXT:    ret void
@@ -831,7 +831,7 @@ define i1 @nocaptureDereferenceableOrNullICmp(ptr dereferenceable_or_null(4) %x)
 define i1 @captureDereferenceableOrNullICmp(ptr dereferenceable_or_null(4) %x) null_pointer_is_valid {
 ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none)
 ; FNATTRS-LABEL: define i1 @captureDereferenceableOrNullICmp
-; FNATTRS-SAME: (ptr readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR17:[0-9]+]] {
+; FNATTRS-SAME: (ptr readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR16:[0-9]+]] {
 ; FNATTRS-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[X]], null
 ; FNATTRS-NEXT:    ret i1 [[TMP1]]
 ;
@@ -886,8 +886,8 @@ define void @recurse_fptr(ptr %f, ptr %p) {
 define void @readnone_indirec(ptr %f, ptr %p) {
 ; FNATTRS: Function Attrs: nofree nosync memory(none)
 ; FNATTRS-LABEL: define void @readnone_indirec
-; FNATTRS-SAME: (ptr nocapture readonly [[F:%.*]], ptr readnone [[P:%.*]]) #[[ATTR18:[0-9]+]] {
-; FNATTRS-NEXT:    call void [[F]](ptr [[P]]) #[[ATTR21:[0-9]+]]
+; FNATTRS-SAME: (ptr nocapture readonly [[F:%.*]], ptr readnone [[P:%.*]]) #[[ATTR17:[0-9]+]] {
+; FNATTRS-NEXT:    call void [[F]](ptr [[P]]) #[[ATTR20:[0-9]+]]
 ; FNATTRS-NEXT:    ret void
 ;
 ; ATTRIBUTOR: Function Attrs: nosync memory(none)
diff --git a/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll b/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll
index 0fe0eadf5f669..ed5534a24cbe8 100644
--- a/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll
@@ -225,9 +225,9 @@ define void @call_both() #0 {
 declare float @llvm.floor.f32(float)
 
 define void @call_floor(float %a) #0 {
-; FNATTR: Function Attrs: mustprogress nofree noinline nosync nounwind willreturn memory(none) uwtable
+; FNATTR: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable
 ; FNATTR-LABEL: define {{[^@]+}}@call_floor
-; FNATTR-SAME: (float [[A:%.*]]) #[[ATTR7:[0-9]+]] {
+; FNATTR-SAME: (float [[A:%.*]]) #[[ATTR3]] {
 ; FNATTR-NEXT:    [[TMP1:%.*]] = tail call float @llvm.floor.f32(float [[A]])
 ; FNATTR-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse.ll b/llvm/test/Transforms/FunctionAttrs/norecurse.ll
index e1c624dc2ce50..7924428fb4989 100644
--- a/llvm/test/Transforms/FunctionAttrs/norecurse.ll
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse.ll
@@ -73,7 +73,7 @@ define i32 @extern() {
 ; ATTRIBUTOR: Function Attrs: nosync memory(none)
 ; ATTRIBUTOR-LABEL: define {{[^@]+}}@extern
 ; ATTRIBUTOR-SAME: () #[[ATTR2:[0-9]+]] {
-; ATTRIBUTOR-NEXT:    [[A:%.*]] = call i32 @k()
+; ATTRIBUTOR-NEXT:    [[A:%.*]] = call i32 @k() #[[ATTR7:[0-9]+]]
 ; ATTRIBUTOR-NEXT:    ret i32 [[A]]
 ;
   %a = call i32 @k()
@@ -83,7 +83,7 @@ define i32 @extern() {
 declare i32 @k() readnone
 
 define void @intrinsic(ptr %dest, ptr %src, i32 %len) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
 ; FNATTRS-LABEL: define {{[^@]+}}@intrinsic
 ; FNATTRS-SAME: (ptr nocapture writeonly [[DEST:%.*]], ptr nocapture readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR4:[0-9]+]] {
 ; FNATTRS-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[DEST]], ptr [[SRC]], i32 [[LEN]], i1 false)
@@ -92,7 +92,7 @@ define void @intrinsic(ptr %dest, ptr %src, i32 %len) {
 ; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
 ; ATTRIBUTOR-LABEL: define {{[^@]+}}@intrinsic
 ; ATTRIBUTOR-SAME: (ptr nocapture nofree writeonly [[DEST:%.*]], ptr nocapture nofree readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR4:[0-9]+]] {
-; ATTRIBUTOR-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly [[DEST]], ptr nocapture readonly [[SRC]], i32 [[LEN]], i1 false) #[[ATTR7:[0-9]+]]
+; ATTRIBUTOR-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly [[DEST]], ptr nocapture readonly [[SRC]], i32 [[LEN]], i1 false) #[[ATTR8:[0-9]+]]
 ; ATTRIBUTOR-NEXT:    ret void
 ;
   call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 %len, i1 false)
@@ -111,7 +111,7 @@ define internal i32 @called_by_norecurse() {
 ; ATTRIBUTOR: Function Attrs: nosync memory(none)
 ; ATTRIBUTOR-LABEL: define {{[^@]+}}@called_by_norecurse
 ; ATTRIBUTOR-SAME: () #[[ATTR2]] {
-; ATTRIBUTOR-NEXT:    [[A:%.*]] = call i32 @k()
+; ATTRIBUTOR-NEXT:    [[A:%.*]] = call i32 @k() #[[ATTR7]]
 ; ATTRIBUTOR-NEXT:    ret i32 [[A]]
 ;
   %a = call i32 @k()
@@ -145,7 +145,7 @@ define internal i32 @called_by_norecurse_indirectly() {
 ; ATTRIBUTOR: Function Attrs: nosync memory(none)
 ; ATTRIBUTOR-LABEL: define {{[^@]+}}@called_by_norecurse_indirectly
 ; ATTRIBUTOR-SAME: () #[[ATTR2]] {
-; ATTRIBUTOR-NEXT:    [[A:%.*]] = call i32 @k()
+; ATTRIBUTOR-NEXT:    [[A:%.*]] = call i32 @k() #[[ATTR7]]
 ; ATTRIBUTOR-NEXT:    ret i32 [[A]]
 ;
   %a = call i32 @k()
@@ -196,7 +196,7 @@ define internal i32 @escapes_as_parameter(ptr %p) {
 ; ATTRIBUTOR: Function Attrs: nosync memory(none)
 ; ATTRIBUTOR-LABEL: define {{[^@]+}}@escapes_as_parameter
 ; ATTRIBUTOR-SAME: (ptr nocapture nofree readnone [[P:%.*]]) #[[ATTR2]] {
-; ATTRIBUTOR-NEXT:    [[A:%.*]] = call i32 @k()
+; ATTRIBUTOR-NEXT:    [[A:%.*]] = call i32 @k() #[[ATTR7]]
 ; ATTRIBUTOR-NEXT:    ret i32 [[A]]
 ;
   %a = call i32 @k()
@@ -241,7 +241,7 @@ define void @r() norecurse {
 ; FNATTRS: attributes #[[ATTR1]] = { nofree nosync nounwind memory(none) }
 ; FNATTRS: attributes #[[ATTR2]] = { nofree nosync memory(none) }
 ; FNATTRS: attributes #[[ATTR3:[0-9]+]] = { memory(none) }
-; FNATTRS: attributes #[[ATTR4]] = { mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) }
+; FNATTRS: attributes #[[ATTR4]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) }
 ; FNATTRS: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ; FNATTRS: attributes #[[ATTR6]] = { nofree norecurse nosync memory(none) }
 ;.
@@ -252,5 +252,6 @@ define void @r() norecurse {
 ; ATTRIBUTOR: attributes #[[ATTR4]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) }
 ; ATTRIBUTOR: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ; ATTRIBUTOR: attributes #[[ATTR6]] = { norecurse nosync memory(none) }
-; ATTRIBUTOR: attributes #[[ATTR7]] = { nofree willreturn }
+; ATTRIBUTOR: attributes #[[ATTR7]] = { nosync }
+; ATTRIBUTOR: attributes #[[ATTR8]] = { nofree willreturn }
 ;.
diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll
index 5950f9e626c41..de5398f17ce51 100644
--- a/llvm/test/Transforms/FunctionAttrs/nosync.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll
@@ -236,7 +236,7 @@ declare void @llvm.memset(ptr %dest, i8 %val, i32 %len, i1 %isvolatile)
 
 ; negative, checking volatile intrinsics.
 define i32 @memcpy_volatile(ptr %ptr1, ptr %ptr2) {
-; CHECK: Function Attrs: mustprogress nofree nounwind willreturn memory(argmem: readwrite)
+; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
 ; CHECK-LABEL: @memcpy_volatile(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], i32 8, i1 true)
 ; CHECK-NEXT:    ret i32 4
@@ -247,7 +247,7 @@ define i32 @memcpy_volatile(ptr %ptr1, ptr %ptr2) {
 
 ; positive, non-volatile intrinsic.
 define i32 @memset_non_volatile(ptr %ptr1, i8 %val) {
-; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: write)
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
 ; CHECK-LABEL: @memset_non_volatile(
 ; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr [[PTR1:%.*]], i8 [[VAL:%.*]], i32 8, i1 false)
 ; CHECK-NEXT:    ret i32 4
@@ -298,7 +298,7 @@ define void @i_totally_sync() {
 declare float @llvm.cos(float %val) readnone
 
 define float @cos_test(float %x) {
-; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: @cos_test(
 ; CHECK-NEXT:    [[C:%.*]] = call float @llvm.cos.f32(float [[X:%.*]])
 ; CHECK-NEXT:    ret float [[C]]
diff --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll
index 0986f74c181d9..39513976f90d7 100644
--- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -251,7 +251,7 @@ entry:
 declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>%val, <4 x ptr>, i32, <4 x i1>)
 
 define void @test9(<4 x ptr> %ptrs, <4 x i32>%val) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(write)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
 ; FNATTRS-LABEL: define {{[^@]+}}@test9
 ; FNATTRS-SAME: (<4 x ptr> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) #[[ATTR7:[0-9]+]] {
 ; FNATTRS-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[VAL]], <4 x ptr> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 false>)
@@ -275,7 +275,7 @@ define void @test9(<4 x ptr> %ptrs, <4 x i32>%val) {
 
 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
 define <4 x i32> @test10(<4 x ptr> %ptrs) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(read)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
 ; FNATTRS-LABEL: define {{[^@]+}}@test10
 ; FNATTRS-SAME: (<4 x ptr> [[PTRS:%.*]]) #[[ATTR9:[0-9]+]] {
 ; FNATTRS-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef)
diff --git a/llvm/test/Transforms/FunctionAttrs/writeonly.ll b/llvm/test/Transforms/FunctionAttrs/writeonly.ll
index 5b20300610d81..de2d5e2238947 100644
--- a/llvm/test/Transforms/FunctionAttrs/writeonly.ll
+++ b/llvm/test/Transforms/FunctionAttrs/writeonly.ll
@@ -179,9 +179,9 @@ define void @test_atomicrmw(ptr %p) {
 }
 
 define void @test_ptrmask(ptr %p) {
-; FNATTRS: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: write)
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
 ; FNATTRS-LABEL: define {{[^@]+}}@test_ptrmask
-; FNATTRS-SAME: (ptr writeonly [[P:%.*]]) #[[ATTR8:[0-9]+]] {
+; FNATTRS-SAME: (ptr writeonly [[P:%.*]]) #[[ATTR3]] {
 ; FNATTRS-NEXT:    [[MASK:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -5)
 ; FNATTRS-NEXT:    store i8 0, ptr [[MASK]], align 1
 ; FNATTRS-NEXT:    ret void
@@ -218,7 +218,7 @@ declare void @direct2_callee(ptr %p) writeonly
 define void @direct2(ptr %p) {
 ; FNATTRS: Function Attrs: memory(write)
 ; FNATTRS-LABEL: define {{[^@]+}}@direct2
-; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR10:[0-9]+]] {
+; FNATTRS-SAME: (ptr [[P:%.*]]) #[[ATTR9:[0-9]+]] {
 ; FNATTRS-NEXT:    call void @direct2_callee(ptr [[P]])
 ; FNATTRS-NEXT:    ret void
 ;
@@ -236,7 +236,7 @@ define void @direct2(ptr %p) {
 define void @direct2b(ptr %p) {
 ; FNATTRS: Function Attrs: memory(write)
 ; FNATTRS-LABEL: define {{[^@]+}}@direct2b
-; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR10]] {
+; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR9]] {
 ; FNATTRS-NEXT:    call void @direct2_callee(ptr nocapture [[P]])
 ; FNATTRS-NEXT:    ret void
 ;
@@ -325,8 +325,8 @@ define void @fptr_test2(ptr %p, ptr %f) {
 define void @fptr_test3(ptr %p, ptr %f) {
 ; FNATTRS: Function Attrs: memory(write)
 ; FNATTRS-LABEL: define {{[^@]+}}@fptr_test3
-; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]], ptr nocapture readonly [[F:%.*]]) #[[ATTR10]] {
-; FNATTRS-NEXT:    call void [[F]](ptr nocapture [[P]]) #[[ATTR10]]
+; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]], ptr nocapture readonly [[F:%.*]]) #[[ATTR9]] {
+; FNATTRS-NEXT:    call void [[F]](ptr nocapture [[P]]) #[[ATTR9]]
 ; FNATTRS-NEXT:    ret void
 ;
 ; ATTRIBUTOR: Function Attrs: memory(write)
@@ -342,7 +342,7 @@ define void @fptr_test3(ptr %p, ptr %f) {
 define void @test_argmem_none_callee(ptr %p) {
 ; FNATTRS-LABEL: define {{[^@]+}}@test_argmem_none_callee
 ; FNATTRS-SAME: (ptr nocapture readnone [[P:%.*]]) {
-; FNATTRS-NEXT:    call void @direct1_callee(ptr nocapture [[P]]) #[[ATTR11:[0-9]+]]
+; FNATTRS-NEXT:    call void @direct1_callee(ptr nocapture [[P]]) #[[ATTR10:[0-9]+]]
 ; FNATTRS-NEXT:    ret void
 ;
 ; ATTRIBUTOR-LABEL: define {{[^@]+}}@test_argmem_none_callee
@@ -357,7 +357,7 @@ define void @test_argmem_none_callee(ptr %p) {
 define void @test_argmem_read_callee(ptr %p) {
 ; FNATTRS-LABEL: define {{[^@]+}}@test_argmem_read_callee
 ; FNATTRS-SAME: (ptr nocapture readonly [[P:%.*]]) {
-; FNATTRS-NEXT:    call void @direct1_callee(ptr nocapture [[P]]) #[[ATTR12:[0-9]+]]
+; FNATTRS-NEXT:    call void @direct1_callee(ptr nocapture [[P]]) #[[ATTR11:[0-9]+]]
 ; FNATTRS-NEXT:    ret void
 ;
 ; ATTRIBUTOR-LABEL: define {{[^@]+}}@test_argmem_read_callee
@@ -372,7 +372,7 @@ define void @test_argmem_read_callee(ptr %p) {
 define void @test_argmem_write_callee(ptr %p) {
 ; FNATTRS-LABEL: define {{[^@]+}}@test_argmem_write_callee
 ; FNATTRS-SAME: (ptr nocapture writeonly [[P:%.*]]) {
-; FNATTRS-NEXT:    call void @direct1_callee(ptr nocapture [[P]]) #[[ATTR13:[0-9]+]]
+; FNATTRS-NEXT:    call void @direct1_callee(ptr nocapture [[P]]) #[[ATTR12:[0-9]+]]
 ; FNATTRS-NEXT:    ret void
 ;
 ; ATTRIBUTOR-LABEL: define {{[^@]+}}@test_argmem_write_callee
diff --git a/llvm/test/Transforms/InstCombine/free-inversion.ll b/llvm/test/Transforms/InstCombine/free-inversion.ll
index 5e5e65164f707..be9bedbf79859 100644
--- a/llvm/test/Transforms/InstCombine/free-inversion.ll
+++ b/llvm/test/Transforms/InstCombine/free-inversion.ll
@@ -133,6 +133,24 @@ define i8 @sub_2(i8 %a, i1 %c, i8 %x, i8 %y) {
   ret i8 %not_ab
 }
 
+; Same as above but with a type larger than i64 to make sure we create -2
+; correctly.
+define i128 @sub_3(i128 %a, i1 %c, i128 %x, i128 %y) {
+; CHECK-LABEL: @sub_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i128 [[Y:%.*]], -124
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C:%.*]], i128 [[X:%.*]], i128 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i128 [[TMP2]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT_AB:%.*]] = sub i128 -2, [[TMP3]]
+; CHECK-NEXT:    ret i128 [[NOT_AB]]
+;
+  %nx = xor i128 %x, -1
+  %yy = xor i128 %y, 123
+  %b = select i1 %c, i128 %nx, i128 %yy
+  %ab = sub i128 %a, %b
+  %not_ab = xor i128 %ab, -1
+  ret i128 %not_ab
+}
+
 define i8 @sub_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_fail(
 ; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 1c7bb36f0d34c..9b2e141bdb050 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -854,6 +854,107 @@ define i1 @PR32949(i32 %X, i32 %Y, i32 %Z) {
   ret i1 %C
 }
 
+define i1 @test_sdiv_pos_slt(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_pos_slt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %divx = sdiv exact i32 %x, 40
+  %divy = sdiv exact i32 %y, 40
+  %cmp = icmp slt i32 %divx, %divy
+  ret i1 %cmp
+}
+
+define i1 @test_sdiv_pos_sle(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_pos_sle(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %divx = sdiv exact i32 %x, 40
+  %divy = sdiv exact i32 %y, 40
+  %cmp = icmp sle i32 %divx, %divy
+  ret i1 %cmp
+}
+
+define i1 @test_sdiv_pos_sgt(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_pos_sgt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %divx = sdiv exact i32 %x, 40
+  %divy = sdiv exact i32 %y, 40
+  %cmp = icmp sgt i32 %divx, %divy
+  ret i1 %cmp
+}
+
+define i1 @test_sdiv_pos_sge(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_pos_sge(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %divx = sdiv exact i32 %x, 40
+  %divy = sdiv exact i32 %y, 40
+  %cmp = icmp sge i32 %divx, %divy
+  ret i1 %cmp
+}
+
+define i1 @test_sdiv_pos_ult(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_pos_ult(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %divx = sdiv exact i32 %x, 40
+  %divy = sdiv exact i32 %y, 40
+  %cmp = icmp ult i32 %divx, %divy
+  ret i1 %cmp
+}
+
+define i1 @test_sdiv_pos_ule(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_pos_ule(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %divx = sdiv exact i32 %x, 40
+  %divy = sdiv exact i32 %y, 40
+  %cmp = icmp ule i32 %divx, %divy
+  ret i1 %cmp
+}
+
+define i1 @test_sdiv_pos_ugt(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_pos_ugt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %divx = sdiv exact i32 %x, 40
+  %divy = sdiv exact i32 %y, 40
+  %cmp = icmp ugt i32 %divx, %divy
+  ret i1 %cmp
+}
+
+define i1 @test_sdiv_pos_uge(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_pos_uge(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %divx = sdiv exact i32 %x, 40
+  %divy = sdiv exact i32 %y, 40
+  %cmp = icmp uge i32 %divx, %divy
+  ret i1 %cmp
+}
+
+define i1 @test_sdiv_neg_slt(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_neg_slt(
+; CHECK-NEXT:    [[DIVX:%.*]] = sdiv exact i32 [[X:%.*]], -40
+; CHECK-NEXT:    [[DIVY:%.*]] = sdiv exact i32 [[Y:%.*]], -40
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[DIVX]], [[DIVY]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %divx = sdiv exact i32 %x, -40
+  %divy = sdiv exact i32 %y, -40
+  %cmp = icmp slt i32 %divx, %divy
+  ret i1 %cmp
+}
+
 ; PR8469
 define <2 x i1> @test49(<2 x i32> %i3) {
 ; CHECK-LABEL: @test49(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
index d4c71285a93ab..87063fc3f7a82 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
@@ -103,21 +103,23 @@ define void @test_supernode_addsub_alt(ptr %Aarray, ptr %Barray, ptr %Carray, pt
 ; ENABLED-LABEL: @test_supernode_addsub_alt(
 ; ENABLED-NEXT:  entry:
 ; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
-; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
 ; ENABLED-NEXT:    [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
-; ENABLED-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, ptr [[SARRAY:%.*]], i64 1
 ; ENABLED-NEXT:    [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
 ; ENABLED-NEXT:    [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
-; ENABLED-NEXT:    [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
-; ENABLED-NEXT:    [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
 ; ENABLED-NEXT:    [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
 ; ENABLED-NEXT:    [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
-; ENABLED-NEXT:    [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
-; ENABLED-NEXT:    [[ADDB1C1:%.*]] = fadd fast double [[B1]], [[C1]]
-; ENABLED-NEXT:    [[SUB0:%.*]] = fsub fast double [[SUBA0B0]], [[C0]]
-; ENABLED-NEXT:    [[ADD1:%.*]] = fadd fast double [[ADDB1C1]], [[A1]]
-; ENABLED-NEXT:    store double [[SUB0]], ptr [[SARRAY]], align 8
-; ENABLED-NEXT:    store double [[ADD1]], ptr [[IDXS1]], align 8
+; ENABLED-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
+; ENABLED-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
+; ENABLED-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C1]], i32 1
+; ENABLED-NEXT:    [[TMP3:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP0]]
+; ENABLED-NEXT:    [[TMP4:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP0]]
+; ENABLED-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; ENABLED-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
+; ENABLED-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A1]], i32 1
+; ENABLED-NEXT:    [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]]
+; ENABLED-NEXT:    [[TMP9:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP7]]
+; ENABLED-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; ENABLED-NEXT:    store <2 x double> [[TMP10]], ptr [[SARRAY:%.*]], align 8
 ; ENABLED-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
index aa3c2be7dc9c2..17f9f371ff6ef 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
@@ -12,22 +12,24 @@ define void @foo() {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP10:%.*]], [[BB3:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load double, ptr undef, align 8
 ; CHECK-NEXT:    br i1 undef, label [[BB3]], label [[BB4:%.*]]
 ; CHECK:       bb4:
 ; CHECK-NEXT:    [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double>
 ; CHECK-NEXT:    [[CONV2:%.*]] = uitofp i16 undef to double
-; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[TMP3]], [[CONV2]]
-; CHECK-NEXT:    [[SUB1:%.*]] = fsub double undef, undef
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x double> <double poison, double poison, double undef, double undef>, double [[SUB1]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[ADD1]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = fcmp ogt <4 x double> [[TMP6]], [[TMP4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = fptrunc <4 x double> [[TMP6]] to <4 x float>
-; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP2]], <4 x float> [[TMP8]]
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[CONV2]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float>
+; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]]
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP10]] = phi <4 x float> [ [[TMP9]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
+; CHECK-NEXT:    [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
 ; CHECK-NEXT:    br label [[BB2]]
 ;
 entry:
diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
index fbd31d044382a..8220361df6cfa 100644
--- a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
+++ b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
@@ -20,7 +20,8 @@ RUN: printf '\3\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\1' >> %t
-RUN: printf '\0\0\0\0\0\0\0\3' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\3' >> %t
 RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\344\023\165\112\031\035\265\067' >> %t
@@ -30,7 +31,8 @@ RUN: printf '\2\xff\xff\xd3' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\2' >> %t
-RUN: printf '\0\0\0\0\0\0\0\1' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\1' >> %t
 RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\0\0\0\0\0\0\0\023' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
index bb899c5fdb555..9352ae132380d 100644
--- a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
+++ b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
@@ -20,7 +20,8 @@ RUN: printf '\0\0\0\3' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\1\0\0\0' >> %t
-RUN: printf '\0\0\0\0\3\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\3\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\067\265\035\031\112\165\023\344' >> %t
@@ -30,7 +31,8 @@ RUN: printf '\xd3\xff\xff\2' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\2\0\0\0' >> %t
-RUN: printf '\0\0\0\0\1\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\1\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t
diff --git a/mlir/include/mlir/Analysis/Presburger/QuasiPolynomial.h b/mlir/include/mlir/Analysis/Presburger/QuasiPolynomial.h
new file mode 100644
index 0000000000000..f8ce8524e41b2
--- /dev/null
+++ b/mlir/include/mlir/Analysis/Presburger/QuasiPolynomial.h
@@ -0,0 +1,71 @@
+//===- QuasiPolynomial.h - QuasiPolynomial Class ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of the QuasiPolynomial class for Barvinok's algorithm,
+// which represents a single-valued function on a set of parameters.
+// It is an expression of the form
+// f(x) = \sum_i c_i * \prod_j ⌊g_{ij}(x)⌋
+// where c_i \in Q and
+// g_{ij} : Q^d -> Q are affine functionals over d parameters.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_ANALYSIS_PRESBURGER_QUASIPOLYNOMIAL_H
+#define MLIR_ANALYSIS_PRESBURGER_QUASIPOLYNOMIAL_H
+
+#include "mlir/Analysis/Presburger/Fraction.h"
+#include "mlir/Analysis/Presburger/PresburgerSpace.h"
+
+namespace mlir {
+namespace presburger {
+
+// A class to describe quasi-polynomials.
+// A quasipolynomial consists of a set of terms.
+// The ith term is a constant `coefficients[i]`, multiplied
+// by the product of a set of affine functions on n parameters.
+// Represents functions f : Q^n -> Q of the form
+//
+// f(x) = \sum_i c_i * \prod_j ⌊g_{ij}(x)⌋
+//
+// where c_i \in Q and
+// g_{ij} : Q^n -> Q are affine functionals.
+class QuasiPolynomial : public PresburgerSpace {
+public:
+  QuasiPolynomial(unsigned numVars, SmallVector<Fraction> coeffs = {},
+                  std::vector<std::vector<SmallVector<Fraction>>> aff = {});
+
+  // Find the number of inputs (numDomain) to the polynomial.
+  // numSymbols is set to zero.
+  unsigned getNumInputs() const {
+    return getNumDomainVars() + getNumSymbolVars();
+  }
+
+  const SmallVector<Fraction> &getCoefficients() const { return coefficients; }
+
+  const std::vector<std::vector<SmallVector<Fraction>>> &getAffine() const {
+    return affine;
+  }
+
+  // Arithmetic operations.
+  QuasiPolynomial operator+(const QuasiPolynomial &x) const;
+  QuasiPolynomial operator-(const QuasiPolynomial &x) const;
+  QuasiPolynomial operator*(const QuasiPolynomial &x) const;
+  QuasiPolynomial operator/(const Fraction x) const;
+
+  // Removes terms which evaluate to zero from the expression.
+  QuasiPolynomial simplify();
+
+private:
+  SmallVector<Fraction> coefficients;
+  std::vector<std::vector<SmallVector<Fraction>>> affine;
+};
+
+} // namespace presburger
+} // namespace mlir
+
+#endif // MLIR_ANALYSIS_PRESBURGER_QUASIPOLYNOMIAL_H
\ No newline at end of file
diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt
index 22f1a4cac4405..e77e1623dae17 100644
--- a/mlir/lib/Analysis/Presburger/CMakeLists.txt
+++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt
@@ -6,6 +6,7 @@ add_mlir_library(MLIRPresburger
   PresburgerRelation.cpp
   PresburgerSpace.cpp
   PWMAFunction.cpp
+  QuasiPolynomial.cpp
   Simplex.cpp
   SlowMPInt.cpp
   Utils.cpp
diff --git a/mlir/lib/Analysis/Presburger/GeneratingFunction.h b/mlir/lib/Analysis/Presburger/GeneratingFunction.h
new file mode 100644
index 0000000000000..f7deba921ea51
--- /dev/null
+++ b/mlir/lib/Analysis/Presburger/GeneratingFunction.h
@@ -0,0 +1,134 @@
+//===- GeneratingFunction.h - Generating Functions over Q^d -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of the GeneratingFunction class for Barvinok's algorithm,
+// which represents a function over Q^n, parameterized by d parameters.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_ANALYSIS_PRESBURGER_GENERATINGFUNCTION_H
+#define MLIR_ANALYSIS_PRESBURGER_GENERATINGFUNCTION_H
+
+#include "mlir/Analysis/Presburger/Fraction.h"
+#include "mlir/Analysis/Presburger/Matrix.h"
+
+namespace mlir {
+namespace presburger {
+
+// A parametric point is a vector, each of whose elements
+// is an affine function of n parameters. Each row
+// in the matrix represents the affine function and
+// has n+1 elements.
+using ParamPoint = FracMatrix;
+
+// A point is simply a vector.
+using Point = SmallVector<Fraction>;
+
+// A class to describe the type of generating function
+// used to enumerate the integer points in a polytope.
+// Consists of a set of terms, where the ith term has
+// * a sign, ±1, stored in `signs[i]`
+// * a numerator, of the form x^{n},
+//      where n, stored in `numerators[i]`,
+//      is a parametric point.
+// * a denominator, of the form (1 - x^{d1})...(1 - x^{dn}),
+//      where each dj, stored in `denominators[i][j]`,
+//      is a vector.
+//
+// Represents functions f_p : Q^n -> Q of the form
+//
+// f_p(x) = \sum_i s_i * (x^n_i(p)) / (\prod_j (1 - x^d_{ij})
+//
+// where s_i is ±1,
+// n_i \in Q^d -> Q^n is an n-vector of affine functions on d parameters, and
+// g_{ij} \in Q^n are vectors.
+class GeneratingFunction {
+public:
+  GeneratingFunction(unsigned numParam, SmallVector<int> signs,
+                     std::vector<ParamPoint> nums,
+                     std::vector<std::vector<Point>> dens)
+      : numParam(numParam), signs(signs), numerators(nums), denominators(dens) {
+#ifndef NDEBUG
+    for (const ParamPoint &term : numerators)
+      assert(term.getNumColumns() == numParam + 1 &&
+             "dimensionality of numerator exponents does not match number of "
+             "parameters!");
+#endif // NDEBUG
+  }
+
+  unsigned getNumParams() { return numParam; }
+
+  SmallVector<int> getSigns() { return signs; }
+
+  std::vector<ParamPoint> getNumerators() { return numerators; }
+
+  std::vector<std::vector<Point>> getDenominators() { return denominators; }
+
+  GeneratingFunction operator+(GeneratingFunction &gf) const {
+    assert(numParam == gf.getNumParams() &&
+           "two generating functions with different numbers of parameters "
+           "cannot be added!");
+    SmallVector<int> sumSigns = signs;
+    sumSigns.append(gf.signs);
+
+    std::vector<ParamPoint> sumNumerators = numerators;
+    sumNumerators.insert(sumNumerators.end(), gf.numerators.begin(),
+                         gf.numerators.end());
+
+    std::vector<std::vector<Point>> sumDenominators = denominators;
+    sumDenominators.insert(sumDenominators.end(), gf.denominators.begin(),
+                           gf.denominators.end());
+    return GeneratingFunction(0, sumSigns, sumNumerators, sumDenominators);
+  }
+
+  llvm::raw_ostream &print(llvm::raw_ostream &os) const {
+    for (unsigned i = 0, e = signs.size(); i < e; i++) {
+      if (i == 0) {
+        if (signs[i] == -1)
+          os << "- ";
+      } else {
+        if (signs[i] == 1)
+          os << " + ";
+        else
+          os << " - ";
+      }
+
+      os << "x^[";
+      unsigned r = numerators[i].getNumRows();
+      for (unsigned j = 0; j < r - 1; j++) {
+        os << "[";
+        for (unsigned k = 0, c = numerators[i].getNumColumns(); k < c - 1; k++)
+          os << numerators[i].at(j, k) << ",";
+        os << numerators[i].getRow(j).back() << "],";
+      }
+      os << "[";
+      for (unsigned k = 0, c = numerators[i].getNumColumns(); k < c - 1; k++)
+        os << numerators[i].at(r - 1, k) << ",";
+      os << numerators[i].getRow(r - 1).back() << "]]/";
+
+      for (const Point &den : denominators[i]) {
+        os << "(x^[";
+        for (unsigned j = 0, e = den.size(); j < e - 1; j++)
+          os << den[j] << ",";
+        os << den.back() << "])";
+      }
+    }
+    return os;
+  }
+
+private:
+  unsigned numParam;
+  SmallVector<int> signs;
+  std::vector<ParamPoint> numerators;
+  std::vector<std::vector<Point>> denominators;
+};
+
+} // namespace presburger
+} // namespace mlir
+
+#endif // MLIR_ANALYSIS_PRESBURGER_GENERATINGFUNCTION_H
diff --git a/mlir/lib/Analysis/Presburger/QuasiPolynomial.cpp b/mlir/lib/Analysis/Presburger/QuasiPolynomial.cpp
new file mode 100644
index 0000000000000..3ae4fb726215f
--- /dev/null
+++ b/mlir/lib/Analysis/Presburger/QuasiPolynomial.cpp
@@ -0,0 +1,115 @@
+//===- QuasiPolynomial.cpp - Quasipolynomial Class --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/Presburger/QuasiPolynomial.h"
+#include "mlir/Analysis/Presburger/Fraction.h"
+#include "mlir/Analysis/Presburger/PresburgerSpace.h"
+#include "mlir/Analysis/Presburger/Utils.h"
+
+using namespace mlir;
+using namespace presburger;
+
+QuasiPolynomial::QuasiPolynomial(
+    unsigned numVars, SmallVector<Fraction> coeffs,
+    std::vector<std::vector<SmallVector<Fraction>>> aff)
+    : PresburgerSpace(/*numDomain=*/numVars, /*numRange=*/1, /*numSymbols=*/0,
+                      /*numLocals=*/0),
+      coefficients(coeffs), affine(aff) {
+#ifndef NDEBUG
+  // For each term which involves at least one affine function,
+  for (const std::vector<SmallVector<Fraction>> &term : affine) {
+    if (term.size() == 0)
+      continue;
+    // the number of elements in each affine function is
+    // one more than the number of symbols.
+    for (const SmallVector<Fraction> &aff : term) {
+      assert(aff.size() == getNumInputs() + 1 &&
+             "dimensionality of affine functions does not match number of "
+             "symbols!");
+    }
+  }
+#endif // NDEBUG
+}
+
+QuasiPolynomial QuasiPolynomial::operator+(const QuasiPolynomial &x) const {
+  assert(getNumInputs() == x.getNumInputs() &&
+         "two quasi-polynomials with different numbers of symbols cannot "
+         "be added!");
+  SmallVector<Fraction> sumCoeffs = coefficients;
+  sumCoeffs.append(x.coefficients);
+  std::vector<std::vector<SmallVector<Fraction>>> sumAff = affine;
+  sumAff.insert(sumAff.end(), x.affine.begin(), x.affine.end());
+  return QuasiPolynomial(getNumInputs(), sumCoeffs, sumAff);
+}
+
+QuasiPolynomial QuasiPolynomial::operator-(const QuasiPolynomial &x) const {
+  assert(getNumInputs() == x.getNumInputs() &&
+         "two quasi-polynomials with different numbers of symbols cannot "
+         "be subtracted!");
+  QuasiPolynomial qp(getNumInputs(), x.coefficients, x.affine);
+  for (Fraction &coeff : qp.coefficients)
+    coeff = -coeff;
+  return *this + qp;
+}
+
+QuasiPolynomial QuasiPolynomial::operator*(const QuasiPolynomial &x) const {
+  assert(getNumInputs() == x.getNumInputs() &&
+         "two quasi-polynomials with different numbers of "
+         "symbols cannot be multiplied!");
+
+  SmallVector<Fraction> coeffs;
+  coeffs.reserve(coefficients.size() * x.coefficients.size());
+  for (const Fraction &coeff : coefficients)
+    for (const Fraction &xcoeff : x.coefficients)
+      coeffs.push_back(coeff * xcoeff);
+
+  std::vector<SmallVector<Fraction>> product;
+  std::vector<std::vector<SmallVector<Fraction>>> aff;
+  aff.reserve(affine.size() * x.affine.size());
+  for (const std::vector<SmallVector<Fraction>> &term : affine) {
+    for (const std::vector<SmallVector<Fraction>> &xterm : x.affine) {
+      product.clear();
+      product.insert(product.end(), term.begin(), term.end());
+      product.insert(product.end(), xterm.begin(), xterm.end());
+      aff.push_back(product);
+    }
+  }
+
+  return QuasiPolynomial(getNumInputs(), coeffs, aff);
+}
+
+QuasiPolynomial QuasiPolynomial::operator/(const Fraction x) const {
+  assert(x != 0 && "division by zero!");
+  QuasiPolynomial qp(*this);
+  for (Fraction &coeff : qp.coefficients)
+    coeff /= x;
+  return qp;
+}
+
+// Removes terms which evaluate to zero from the expression.
+QuasiPolynomial QuasiPolynomial::simplify() {
+  SmallVector<Fraction> newCoeffs({});
+  std::vector<std::vector<SmallVector<Fraction>>> newAffine({});
+  for (unsigned i = 0, e = coefficients.size(); i < e; i++) {
+    // A term is zero if its coefficient is zero, or
+    if (coefficients[i] == Fraction(0, 1))
+      continue;
+    bool product_is_zero =
+        // if any of the affine functions in the product
+        llvm::any_of(affine[i], [](const SmallVector<Fraction> &affine_ij) {
+          // has all its coefficients as zero.
+          return llvm::all_of(affine_ij,
+                              [](const Fraction &f) { return f == 0; });
+        });
+    if (product_is_zero)
+      continue;
+    newCoeffs.push_back(coefficients[i]);
+    newAffine.push_back(affine[i]);
+  }
+  return QuasiPolynomial(getNumInputs(), newCoeffs, newAffine);
+}
\ No newline at end of file
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp
index ae89774239b58..8279b3408a6e6 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Conversion/FuncToSPIRV/FuncToSPIRV.h"
 #include "mlir/Conversion/GPUToSPIRV/GPUToSPIRV.h"
 #include "mlir/Conversion/MemRefToSPIRV/MemRefToSPIRV.h"
+#include "mlir/Conversion/SCFToSPIRV/SCFToSPIRV.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
@@ -126,6 +127,8 @@ void GPUToSPIRVPass::runOnOperation() {
 
     // TODO: Change SPIR-V conversion to be progressive and remove the following
     // patterns.
+    ScfToSPIRVContext scfContext;
+    populateSCFToSPIRVPatterns(typeConverter, scfContext, patterns);
     mlir::arith::populateArithToSPIRVPatterns(typeConverter, patterns);
     populateMemRefToSPIRVPatterns(typeConverter, patterns);
     populateFuncToSPIRVPatterns(typeConverter, patterns);
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 458bf83eac17f..64388a9a01812 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -908,6 +908,7 @@ void CallOp::build(OpBuilder &builder, OperationState &state, TypeRange results,
 
 void CallOp::build(OpBuilder &builder, OperationState &state, TypeRange results,
                    FlatSymbolRefAttr callee, ValueRange args) {
+  assert(callee && "expected non-null callee in direct call builder");
   build(builder, state, results,
         TypeAttr::get(getLLVMFuncType(builder.getContext(), results, args)),
         callee, args, /*fastmathFlags=*/nullptr, /*branch_weights=*/nullptr,
diff --git a/mlir/test/mlir-vulkan-runner/addf_if.mlir b/mlir/test/mlir-vulkan-runner/addf_if.mlir
new file mode 100644
index 0000000000000..fbd1fae6d0b59
--- /dev/null
+++ b/mlir/test/mlir-vulkan-runner/addf_if.mlir
@@ -0,0 +1,54 @@
+// RUN: mlir-vulkan-runner %s --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils --entry-point-result=void | FileCheck %s
+
+// CHECK: [3.3,  3.3,  3.3,  3.3,  0,  0,  0,  0]
+module attributes {
+  gpu.container_module,
+  spirv.target_env = #spirv.target_env<
+    #spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>
+} {
+  gpu.module @kernels {
+    gpu.func @kernel_add(%arg0 : memref<8xf32>, %arg1 : memref<8xf32>, %arg2 : memref<8xf32>)
+      kernel attributes { spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [1, 1, 1]>} {
+      %0 = gpu.block_id x
+      %limit = arith.constant 4 : index
+      %cond = arith.cmpi slt, %0, %limit : index
+      scf.if %cond {
+        %1 = memref.load %arg0[%0] : memref<8xf32>
+        %2 = memref.load %arg1[%0] : memref<8xf32>
+        %3 = arith.addf %1, %2 : f32
+        memref.store %3, %arg2[%0] : memref<8xf32>
+      }
+      gpu.return
+    }
+  }
+
+  func.func @main() {
+    %arg0 = memref.alloc() : memref<8xf32>
+    %arg1 = memref.alloc() : memref<8xf32>
+    %arg2 = memref.alloc() : memref<8xf32>
+    %0 = arith.constant 0 : i32
+    %1 = arith.constant 1 : i32
+    %2 = arith.constant 2 : i32
+    %value0 = arith.constant 0.0 : f32
+    %value1 = arith.constant 1.1 : f32
+    %value2 = arith.constant 2.2 : f32
+    %arg3 = memref.cast %arg0 : memref<8xf32> to memref<?xf32>
+    %arg4 = memref.cast %arg1 : memref<8xf32> to memref<?xf32>
+    %arg5 = memref.cast %arg2 : memref<8xf32> to memref<?xf32>
+    call @fillResource1DFloat(%arg3, %value1) : (memref<?xf32>, f32) -> ()
+    call @fillResource1DFloat(%arg4, %value2) : (memref<?xf32>, f32) -> ()
+    call @fillResource1DFloat(%arg5, %value0) : (memref<?xf32>, f32) -> ()
+
+    %cst1 = arith.constant 1 : index
+    %cst8 = arith.constant 8 : index
+    gpu.launch_func @kernels::@kernel_add
+        blocks in (%cst8, %cst1, %cst1) threads in (%cst1, %cst1, %cst1)
+        args(%arg0 : memref<8xf32>, %arg1 : memref<8xf32>, %arg2 : memref<8xf32>)
+    %arg6 = memref.cast %arg5 : memref<?xf32> to memref<*xf32>
+    call @printMemrefF32(%arg6) : (memref<*xf32>) -> ()
+    return
+  }
+  func.func private @fillResource1DFloat(%0 : memref<?xf32>, %1 : f32)
+  func.func private @printMemrefF32(%ptr : memref<*xf32>)
+}
+
diff --git a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
index 5b8e236b4618f..032f5760361f4 100644
--- a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
+++ b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
@@ -27,6 +27,7 @@
 #include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
 #include "mlir/Dialect/SPIRV/Transforms/Passes.h"
@@ -105,8 +106,8 @@ int main(int argc, char **argv) {
   mlir::DialectRegistry registry;
   registry.insert<mlir::arith::ArithDialect, mlir::LLVM::LLVMDialect,
                   mlir::gpu::GPUDialect, mlir::spirv::SPIRVDialect,
-                  mlir::func::FuncDialect, mlir::memref::MemRefDialect,
-                  mlir::vector::VectorDialect>();
+                  mlir::scf::SCFDialect, mlir::func::FuncDialect,
+                  mlir::memref::MemRefDialect, mlir::vector::VectorDialect>();
   mlir::registerBuiltinDialectTranslation(registry);
   mlir::registerLLVMDialectTranslation(registry);
 
diff --git a/mlir/unittests/Analysis/Presburger/CMakeLists.txt b/mlir/unittests/Analysis/Presburger/CMakeLists.txt
index b6ce273e35a0e..e37133354e53c 100644
--- a/mlir/unittests/Analysis/Presburger/CMakeLists.txt
+++ b/mlir/unittests/Analysis/Presburger/CMakeLists.txt
@@ -11,6 +11,7 @@ add_mlir_unittest(MLIRPresburgerTests
   PresburgerRelationTest.cpp
   PresburgerSpaceTest.cpp
   PWMAFunctionTest.cpp
+  QuasiPolynomialTest.cpp
   SimplexTest.cpp
   UtilsTest.cpp
 )
diff --git a/mlir/unittests/Analysis/Presburger/QuasiPolynomialTest.cpp b/mlir/unittests/Analysis/Presburger/QuasiPolynomialTest.cpp
new file mode 100644
index 0000000000000..a84f0234067ab
--- /dev/null
+++ b/mlir/unittests/Analysis/Presburger/QuasiPolynomialTest.cpp
@@ -0,0 +1,140 @@
+//===- MatrixTest.cpp - Tests for QuasiPolynomial -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/Presburger/QuasiPolynomial.h"
+#include "./Utils.h"
+#include "mlir/Analysis/Presburger/Fraction.h"
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+using namespace mlir;
+using namespace presburger;
+
+// Test the arithmetic operations on QuasiPolynomials;
+// addition, subtraction, multiplication, and division
+// by a constant.
+// Two QPs of 3 parameters each were generated randomly
+// and their sum, difference, and product computed by hand.
+TEST(QuasiPolynomialTest, arithmetic) {
+  QuasiPolynomial qp1(
+      3, {Fraction(1, 3), Fraction(1, 1), Fraction(1, 2)},
+      {{{Fraction(1, 1), Fraction(-1, 2), Fraction(4, 5), Fraction(0, 1)},
+        {Fraction(2, 3), Fraction(3, 4), Fraction(-1, 1), Fraction(5, 7)}},
+       {{Fraction(1, 2), Fraction(1, 1), Fraction(4, 5), Fraction(1, 1)}},
+       {{Fraction(-3, 2), Fraction(1, 1), Fraction(5, 6), Fraction(7, 5)},
+        {Fraction(1, 4), Fraction(2, 1), Fraction(6, 5), Fraction(-9, 8)},
+        {Fraction(3, 2), Fraction(2, 5), Fraction(-7, 4), Fraction(0, 1)}}});
+  QuasiPolynomial qp2(
+      3, {Fraction(1, 1), Fraction(2, 1)},
+      {{{Fraction(1, 2), Fraction(0, 1), Fraction(-1, 3), Fraction(5, 3)},
+        {Fraction(2, 1), Fraction(5, 4), Fraction(9, 7), Fraction(-1, 5)}},
+       {{Fraction(1, 3), Fraction(-2, 3), Fraction(1, 1), Fraction(0, 1)}}});
+
+  QuasiPolynomial sum = qp1 + qp2;
+  EXPECT_EQ_REPR_QUASIPOLYNOMIAL(
+      sum,
+      QuasiPolynomial(
+          3,
+          {Fraction(1, 3), Fraction(1, 1), Fraction(1, 2), Fraction(1, 1),
+           Fraction(2, 1)},
+          {{{Fraction(1, 1), Fraction(-1, 2), Fraction(4, 5), Fraction(0, 1)},
+            {Fraction(2, 3), Fraction(3, 4), Fraction(-1, 1), Fraction(5, 7)}},
+           {{Fraction(1, 2), Fraction(1, 1), Fraction(4, 5), Fraction(1, 1)}},
+           {{Fraction(-3, 2), Fraction(1, 1), Fraction(5, 6), Fraction(7, 5)},
+            {Fraction(1, 4), Fraction(2, 1), Fraction(6, 5), Fraction(-9, 8)},
+            {Fraction(3, 2), Fraction(2, 5), Fraction(-7, 4), Fraction(0, 1)}},
+           {{Fraction(1, 2), Fraction(0, 1), Fraction(-1, 3), Fraction(5, 3)},
+            {Fraction(2, 1), Fraction(5, 4), Fraction(9, 7), Fraction(-1, 5)}},
+           {{Fraction(1, 3), Fraction(-2, 3), Fraction(1, 1),
+             Fraction(0, 1)}}}));
+
+  QuasiPolynomial diff = qp1 - qp2;
+  EXPECT_EQ_REPR_QUASIPOLYNOMIAL(
+      diff,
+      QuasiPolynomial(
+          3,
+          {Fraction(1, 3), Fraction(1, 1), Fraction(1, 2), Fraction(-1, 1),
+           Fraction(-2, 1)},
+          {{{Fraction(1, 1), Fraction(-1, 2), Fraction(4, 5), Fraction(0, 1)},
+            {Fraction(2, 3), Fraction(3, 4), Fraction(-1, 1), Fraction(5, 7)}},
+           {{Fraction(1, 2), Fraction(1, 1), Fraction(4, 5), Fraction(1, 1)}},
+           {{Fraction(-3, 2), Fraction(1, 1), Fraction(5, 6), Fraction(7, 5)},
+            {Fraction(1, 4), Fraction(2, 1), Fraction(6, 5), Fraction(-9, 8)},
+            {Fraction(3, 2), Fraction(2, 5), Fraction(-7, 4), Fraction(0, 1)}},
+           {{Fraction(1, 2), Fraction(0, 1), Fraction(-1, 3), Fraction(5, 3)},
+            {Fraction(2, 1), Fraction(5, 4), Fraction(9, 7), Fraction(-1, 5)}},
+           {{Fraction(1, 3), Fraction(-2, 3), Fraction(1, 1),
+             Fraction(0, 1)}}}));
+
+  QuasiPolynomial prod = qp1 * qp2;
+  EXPECT_EQ_REPR_QUASIPOLYNOMIAL(
+      prod,
+      QuasiPolynomial(
+          3,
+          {Fraction(1, 3), Fraction(2, 3), Fraction(1, 1), Fraction(2, 1),
+           Fraction(1, 2), Fraction(1, 1)},
+          {{{Fraction(1, 1), Fraction(-1, 2), Fraction(4, 5), Fraction(0, 1)},
+            {Fraction(2, 3), Fraction(3, 4), Fraction(-1, 1), Fraction(5, 7)},
+            {Fraction(1, 2), Fraction(0, 1), Fraction(-1, 3), Fraction(5, 3)},
+            {Fraction(2, 1), Fraction(5, 4), Fraction(9, 7), Fraction(-1, 5)}},
+           {{Fraction(1, 1), Fraction(-1, 2), Fraction(4, 5), Fraction(0, 1)},
+            {Fraction(2, 3), Fraction(3, 4), Fraction(-1, 1), Fraction(5, 7)},
+            {Fraction(1, 3), Fraction(-2, 3), Fraction(1, 1), Fraction(0, 1)}},
+           {{Fraction(1, 2), Fraction(1, 1), Fraction(4, 5), Fraction(1, 1)},
+            {Fraction(1, 2), Fraction(0, 1), Fraction(-1, 3), Fraction(5, 3)},
+            {Fraction(2, 1), Fraction(5, 4), Fraction(9, 7), Fraction(-1, 5)}},
+           {{Fraction(1, 2), Fraction(1, 1), Fraction(4, 5), Fraction(1, 1)},
+            {Fraction(1, 3), Fraction(-2, 3), Fraction(1, 1), Fraction(0, 1)}},
+           {{Fraction(-3, 2), Fraction(1, 1), Fraction(5, 6), Fraction(7, 5)},
+            {Fraction(1, 4), Fraction(2, 1), Fraction(6, 5), Fraction(-9, 8)},
+            {Fraction(3, 2), Fraction(2, 5), Fraction(-7, 4), Fraction(0, 1)},
+            {Fraction(1, 2), Fraction(0, 1), Fraction(-1, 3), Fraction(5, 3)},
+            {Fraction(2, 1), Fraction(5, 4), Fraction(9, 7), Fraction(-1, 5)}},
+           {{Fraction(-3, 2), Fraction(1, 1), Fraction(5, 6), Fraction(7, 5)},
+            {Fraction(1, 4), Fraction(2, 1), Fraction(6, 5), Fraction(-9, 8)},
+            {Fraction(3, 2), Fraction(2, 5), Fraction(-7, 4), Fraction(0, 1)},
+            {Fraction(1, 3), Fraction(-2, 3), Fraction(1, 1),
+             Fraction(0, 1)}}}));
+
+  QuasiPolynomial quot = qp1 / 2;
+  EXPECT_EQ_REPR_QUASIPOLYNOMIAL(
+      quot,
+      QuasiPolynomial(
+          3, {Fraction(1, 6), Fraction(1, 2), Fraction(1, 4)},
+          {{{Fraction(1, 1), Fraction(-1, 2), Fraction(4, 5), Fraction(0, 1)},
+            {Fraction(2, 3), Fraction(3, 4), Fraction(-1, 1), Fraction(5, 7)}},
+           {{Fraction(1, 2), Fraction(1, 1), Fraction(4, 5), Fraction(1, 1)}},
+           {{Fraction(-3, 2), Fraction(1, 1), Fraction(5, 6), Fraction(7, 5)},
+            {Fraction(1, 4), Fraction(2, 1), Fraction(6, 5), Fraction(-9, 8)},
+            {Fraction(3, 2), Fraction(2, 5), Fraction(-7, 4),
+             Fraction(0, 1)}}}));
+}
+
+// Test the simplify() operation on QPs, which removes terms that
+// are identically zero. A random QP was generated and terms were
+// changed to account for each condition in simplify() – 
+// the term coefficient being zero, or all the coefficients in some
+// affine term in the product being zero.
+TEST(QuasiPolynomialTest, simplify) {
+  QuasiPolynomial qp(2,
+                     {Fraction(2, 3), Fraction(0, 1), Fraction(1, 1),
+                      Fraction(1, 2), Fraction(0, 1)},
+                     {{{Fraction(1, 1), Fraction(3, 4), Fraction(5, 3)},
+                       {Fraction(2, 1), Fraction(0, 1), Fraction(0, 1)}},
+                      {{Fraction(1, 3), Fraction(8, 5), Fraction(2, 5)}},
+                      {{Fraction(2, 7), Fraction(9, 5), Fraction(0, 1)},
+                       {Fraction(0, 1), Fraction(0, 1), Fraction(0, 1)}},
+                      {{Fraction(1, 1), Fraction(4, 5), Fraction(6, 5)}},
+                      {{Fraction(1, 3), Fraction(4, 3), Fraction(7, 8)}}});
+  EXPECT_EQ_REPR_QUASIPOLYNOMIAL(
+      qp.simplify(),
+      QuasiPolynomial(2, {Fraction(2, 3), Fraction(1, 2)},
+                      {{{Fraction(1, 1), Fraction(3, 4), Fraction(5, 3)},
+                        {Fraction(2, 1), Fraction(0, 1), Fraction(0, 1)}},
+                       {{Fraction(1, 1), Fraction(4, 5), Fraction(6, 5)}}}));
+}
\ No newline at end of file
diff --git a/mlir/unittests/Analysis/Presburger/Utils.h b/mlir/unittests/Analysis/Presburger/Utils.h
index 544577375dd1d..2a9966c7ce2ea 100644
--- a/mlir/unittests/Analysis/Presburger/Utils.h
+++ b/mlir/unittests/Analysis/Presburger/Utils.h
@@ -17,6 +17,7 @@
 #include "mlir/Analysis/Presburger/Matrix.h"
 #include "mlir/Analysis/Presburger/PWMAFunction.h"
 #include "mlir/Analysis/Presburger/PresburgerRelation.h"
+#include "mlir/Analysis/Presburger/QuasiPolynomial.h"
 #include "mlir/Analysis/Presburger/Simplex.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/Support/LLVM.h"
@@ -71,6 +72,28 @@ inline void EXPECT_EQ_FRAC_MATRIX(FracMatrix a, FracMatrix b) {
       EXPECT_EQ(a(row, col), b(row, col));
 }
 
+// Check the coefficients (in order) of two quasipolynomials.
+// Note that this is not a true equality check.
+inline void EXPECT_EQ_REPR_QUASIPOLYNOMIAL(QuasiPolynomial a, QuasiPolynomial b) {
+  EXPECT_EQ(a.getNumInputs(), b.getNumInputs());
+
+  SmallVector<Fraction> aCoeffs = a.getCoefficients(),
+                        bCoeffs = b.getCoefficients();
+  EXPECT_EQ(aCoeffs.size(), bCoeffs.size());
+  for (unsigned i = 0, e = aCoeffs.size(); i < e; i++)
+    EXPECT_EQ(aCoeffs[i], bCoeffs[i]);
+
+  std::vector<std::vector<SmallVector<Fraction>>> aAff = a.getAffine(),
+                                                  bAff = b.getAffine();
+  EXPECT_EQ(aAff.size(), bAff.size());
+  for (unsigned i = 0, e = aAff.size(); i < e; i++) {
+    EXPECT_EQ(aAff[i].size(), bAff[i].size());
+    for (unsigned j = 0, f = aAff[i].size(); j < f; j++)
+      for (unsigned k = 0, g = a.getNumInputs(); k <= g; k++)
+        EXPECT_EQ(aAff[i][j][k], bAff[i][j][k]);
+  }
+}
+
 /// lhs and rhs represent non-negative integers or positive infinity. The
 /// infinity case corresponds to when the Optional is empty.
 inline bool infinityOrUInt64LE(std::optional<MPInt> lhs,
diff --git a/openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp b/openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp
index befed120ca138..f0fde50889dac 100644
--- a/openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp
+++ b/openmp/libomptarget/test/offloading/struct_mapping_with_pointers.cpp
@@ -29,6 +29,7 @@ int main() {
 
   dat.datum[7] = 7;
   dat.more_datum[17] = 17;
+  dat.datum[dat.arr[0][0]] = 0;
 
   /// The struct is mapped with type 0x0 when the pointer fields are mapped.
   /// The struct is also map explicitely by the user. The second mapping by
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index c80d714f6a991..9e730c33db2de 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -9360,6 +9360,7 @@ cc_binary(
         ":MlirJitRunner",
         ":Pass",
         ":ReconcileUnrealizedCasts",
+	":SCFDialect",
         ":SPIRVDialect",
         ":SPIRVTransforms",
         ":ToLLVMIRTranslation",