diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h index 8dec32de9008e5..3cc9d823c815b2 100644 --- a/bolt/include/bolt/Rewrite/DWARFRewriter.h +++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h @@ -12,6 +12,7 @@ #include "bolt/Core/DIEBuilder.h" #include "bolt/Core/DebugData.h" #include "bolt/Core/DebugNames.h" +#include "bolt/Core/GDBIndex.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DIE.h" #include "llvm/DWP/DWP.h" @@ -131,7 +132,8 @@ class DWARFRewriter { makeFinalLocListsSection(DWARFVersion Version); /// Finalize type sections in the main binary. - CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer); + CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer, + GDBIndex &GDBIndexSection); /// Process and write out CUs that are passsed in. void finalizeCompileUnits(DIEBuilder &DIEBlder, DIEStreamer &Streamer, diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 8814ebbd10aa50..e1b3762a316606 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -184,7 +184,7 @@ namespace bolt { /// Emits debug information into .debug_info or .debug_types section. class DIEStreamer : public DwarfStreamer { DIEBuilder *DIEBldr; - DWARFRewriter &Rewriter; + GDBIndex &GDBIndexSection; private: /// Emit the compilation unit header for \p Unit in the debug_info @@ -247,7 +247,7 @@ class DIEStreamer : public DwarfStreamer { const uint64_t TypeSignature = cast(Unit).getTypeHash(); DIE *TypeDIE = DIEBldr->getTypeDIE(Unit); const DIEBuilder::DWARFUnitInfo &UI = DIEBldr->getUnitInfoByDwarfUnit(Unit); - Rewriter.addGDBTypeUnitEntry( + GDBIndexSection.addGDBTypeUnitEntry( {UI.UnitOffset, TypeSignature, TypeDIE->getOffset()}); if (Unit.getVersion() < 5) { // Switch the section to .debug_types section. @@ -278,12 +278,12 @@ class DIEStreamer : public DwarfStreamer { } public: - DIEStreamer(DIEBuilder *DIEBldr, DWARFRewriter &Rewriter, + DIEStreamer(DIEBuilder *DIEBldr, GDBIndex &GDBIndexSection, DWARFLinkerBase::OutputFileType OutFileType, raw_pwrite_stream &OutFile, DWARFLinkerBase::MessageHandlerTy Warning) : DwarfStreamer(OutFileType, OutFile, Warning), DIEBldr(DIEBldr), - Rewriter(Rewriter){}; + GDBIndexSection(GDBIndexSection) {}; using DwarfStreamer::emitCompileUnitHeader; @@ -326,12 +326,11 @@ static cl::opt KeepARanges( "keep or generate .debug_aranges section if .gdb_index is written"), cl::Hidden, cl::cat(BoltCategory)); -static cl::opt -DeterministicDebugInfo("deterministic-debuginfo", - cl::desc("disables parallel execution of tasks that may produce " - "nondeterministic debug info"), - cl::init(true), - cl::cat(BoltCategory)); +static cl::opt DeterministicDebugInfo( + "deterministic-debuginfo", + cl::desc("disables parallel execution of tasks that may produce " + "nondeterministic debug info"), + cl::init(true), cl::cat(BoltCategory)); static cl::opt DwarfOutputPath( "dwarf-output-path", @@ -460,10 +459,11 @@ static std::optional getAsAddress(const DWARFUnit &DU, static std::unique_ptr createDIEStreamer(const Triple &TheTriple, raw_pwrite_stream &OutFile, StringRef Swift5ReflectionSegmentName, DIEBuilder &DIEBldr, - DWARFRewriter &Rewriter) { + GDBIndex &GDBIndexSection) { std::unique_ptr Streamer = std::make_unique( - &DIEBldr, Rewriter, DWARFLinkerBase::OutputFileType::Object, OutFile, + &DIEBldr, GDBIndexSection, DWARFLinkerBase::OutputFileType::Object, + OutFile, [&](const Twine &Warning, StringRef Context, const DWARFDie *) {}); Error Err = Streamer->init(TheTriple, Swift5ReflectionSegmentName); if (Err) @@ -484,13 +484,12 @@ emitUnit(DIEBuilder &DIEBldr, DIEStreamer &Streamer, DWARFUnit &Unit) { return {U.UnitOffset, U.UnitLength, TypeHash}; } -static void emitDWOBuilder(const std::string &DWOName, - DIEBuilder &DWODIEBuilder, DWARFRewriter &Rewriter, - DWARFUnit &SplitCU, DWARFUnit &CU, - DWARFRewriter::DWPState &State, - DebugLocWriter &LocWriter, - DebugStrOffsetsWriter &StrOffstsWriter, - DebugStrWriter &StrWriter) { +static void +emitDWOBuilder(const std::string &DWOName, DIEBuilder &DWODIEBuilder, + DWARFRewriter &Rewriter, DWARFUnit &SplitCU, DWARFUnit &CU, + DWARFRewriter::DWPState &State, DebugLocWriter &LocWriter, + DebugStrOffsetsWriter &StrOffstsWriter, + DebugStrWriter &StrWriter, GDBIndex &GDBIndexSection) { // Populate debug_info and debug_abbrev for current dwo into StringRef. DWODIEBuilder.generateAbbrevs(); DWODIEBuilder.finish(); @@ -500,8 +499,9 @@ static void emitDWOBuilder(const std::string &DWOName, std::make_shared(OutBuffer); const object::ObjectFile *File = SplitCU.getContext().getDWARFObj().getFile(); auto TheTriple = std::make_unique(File->makeTriple()); - std::unique_ptr Streamer = createDIEStreamer( - *TheTriple, *ObjOS, "DwoStreamerInitAug2", DWODIEBuilder, Rewriter); + std::unique_ptr Streamer = + createDIEStreamer(*TheTriple, *ObjOS, "DwoStreamerInitAug2", + DWODIEBuilder, GDBIndexSection); DWARFRewriter::UnitMetaVectorType TUMetaVector; DWARFRewriter::UnitMeta CUMI = {0, 0, 0}; if (SplitCU.getContext().getMaxDWOVersion() >= 5) { @@ -652,6 +652,7 @@ void DWARFRewriter::updateDebugInfo() { DWARF5AcceleratorTable DebugNamesTable(opts::CreateDebugNames, BC, *StrWriter); + GDBIndex GDBIndexSection(BC); DWPState State; if (opts::WriteDWP) initDWPState(State); @@ -704,7 +705,8 @@ void DWARFRewriter::updateDebugInfo() { TempRangesSectionWriter->finalizeSection(); emitDWOBuilder(DWOName, DWODIEBuilder, *this, **SplitCU, *Unit, State, - DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter); + DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter, + GDBIndexSection); } if (Unit->getVersion() >= 5) { @@ -729,9 +731,10 @@ void DWARFRewriter::updateDebugInfo() { std::make_unique(OutBuffer); const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile(); auto TheTriple = std::make_unique(File->makeTriple()); - std::unique_ptr Streamer = - createDIEStreamer(*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this); - CUOffsetMap OffsetMap = finalizeTypeSections(DIEBlder, *Streamer); + std::unique_ptr Streamer = createDIEStreamer( + *TheTriple, *ObjOS, "TypeStreamer", DIEBlder, GDBIndexSection); + CUOffsetMap OffsetMap = + finalizeTypeSections(DIEBlder, *Streamer, GDBIndexSection); const bool SingleThreadedMode = opts::NoThreads || opts::DeterministicDebugInfo; @@ -761,7 +764,8 @@ void DWARFRewriter::updateDebugInfo() { finalizeDebugSections(DIEBlder, DebugNamesTable, *Streamer, *ObjOS, OffsetMap); - updateGdbIndexSection(OffsetMap, CUIndex); + GDBIndexSection.updateGdbIndexSection(OffsetMap, CUIndex, + *ARangesSectionWriter); } void DWARFRewriter::updateUnitDebugInfo( @@ -1429,7 +1433,8 @@ void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) { } CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder, - DIEStreamer &Streamer) { + DIEStreamer &Streamer, + GDBIndex &GDBIndexSection) { // update TypeUnit DW_AT_stmt_list with new .debug_line information. auto updateLineTable = [&](const DWARFUnit &Unit) -> void { DIE *UnitDIE = DIEBlder.getUnitDIEbyUnit(Unit); @@ -1449,8 +1454,8 @@ CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder, std::make_shared(OutBuffer); const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile(); auto TheTriple = std::make_unique(File->makeTriple()); - std::unique_ptr TypeStreamer = - createDIEStreamer(*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this); + std::unique_ptr TypeStreamer = createDIEStreamer( + *TheTriple, *ObjOS, "TypeStreamer", DIEBlder, GDBIndexSection); // generate debug_info and CUMap CUOffsetMap CUMap; diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp index 5e64d23874ec17..c25ee42d0899ae 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp @@ -67,7 +67,8 @@ SizeofExpressionCheck::SizeofExpressionCheck(StringRef Name, WarnOnSizeOfCompareToConstant( Options.get("WarnOnSizeOfCompareToConstant", true)), WarnOnSizeOfPointerToAggregate( - Options.get("WarnOnSizeOfPointerToAggregate", true)) {} + Options.get("WarnOnSizeOfPointerToAggregate", true)), + WarnOnSizeOfPointer(Options.get("WarnOnSizeOfPointer", false)) {} void SizeofExpressionCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "WarnOnSizeOfConstant", WarnOnSizeOfConstant); @@ -78,6 +79,7 @@ void SizeofExpressionCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { WarnOnSizeOfCompareToConstant); Options.store(Opts, "WarnOnSizeOfPointerToAggregate", WarnOnSizeOfPointerToAggregate); + Options.store(Opts, "WarnOnSizeOfPointer", WarnOnSizeOfPointer); } void SizeofExpressionCheck::registerMatchers(MatchFinder *Finder) { @@ -127,17 +129,30 @@ void SizeofExpressionCheck::registerMatchers(MatchFinder *Finder) { const auto ConstStrLiteralDecl = varDecl(isDefinition(), hasType(hasCanonicalType(CharPtrType)), hasInitializer(ignoringParenImpCasts(stringLiteral()))); + const auto VarWithConstStrLiteralDecl = expr( + hasType(hasCanonicalType(CharPtrType)), + ignoringParenImpCasts(declRefExpr(hasDeclaration(ConstStrLiteralDecl)))); Finder->addMatcher( - sizeOfExpr(has(ignoringParenImpCasts( - expr(hasType(hasCanonicalType(CharPtrType)), - ignoringParenImpCasts(declRefExpr( - hasDeclaration(ConstStrLiteralDecl))))))) + sizeOfExpr(has(ignoringParenImpCasts(VarWithConstStrLiteralDecl))) .bind("sizeof-charp"), this); - // Detect sizeof(ptr) where ptr points to an aggregate (i.e. sizeof(&S)). - // Do not find it if RHS of a 'sizeof(arr) / sizeof(arr[0])' expression. - if (WarnOnSizeOfPointerToAggregate) { + // Detect sizeof(ptr) where ptr is a pointer (CWE-467). + // + // In WarnOnSizeOfPointerToAggregate mode only report cases when ptr points + // to an aggregate type or ptr is an expression that (implicitly or + // explicitly) casts an array to a pointer type. (These are more suspicious + // than other sizeof(ptr) expressions because they can appear as distorted + // forms of the common sizeof(aggregate) expressions.) + // + // To avoid false positives, the check doesn't report expressions like + // 'sizeof(pp[0])' and 'sizeof(*pp)' where `pp` is a pointer-to-pointer or + // array of pointers. (This filters out both `sizeof(arr) / sizeof(arr[0])` + // expressions and other cases like `p = realloc(p, newsize * sizeof(*p));`.) + // + // Moreover this generic message is suppressed in cases that are also matched + // by the more concrete matchers 'sizeof-this' and 'sizeof-charp'. + if (WarnOnSizeOfPointerToAggregate || WarnOnSizeOfPointer) { const auto ArrayExpr = ignoringParenImpCasts(hasType(hasCanonicalType(arrayType()))); const auto ArrayCastExpr = expr(anyOf( @@ -149,32 +164,31 @@ void SizeofExpressionCheck::registerMatchers(MatchFinder *Finder) { const auto PointerToStructType = hasUnqualifiedDesugaredType(pointerType(pointee(recordType()))); - const auto PointerToStructExpr = expr( - hasType(hasCanonicalType(PointerToStructType)), unless(cxxThisExpr())); - - const auto ArrayOfPointersExpr = ignoringParenImpCasts( - hasType(hasCanonicalType(arrayType(hasElementType(pointerType())) - .bind("type-of-array-of-pointers")))); - const auto ArrayOfSamePointersExpr = - ignoringParenImpCasts(hasType(hasCanonicalType( - arrayType(equalsBoundNode("type-of-array-of-pointers"))))); + const auto PointerToStructTypeWithBinding = + type(PointerToStructType).bind("struct-type"); + const auto PointerToStructExpr = + expr(hasType(hasCanonicalType(PointerToStructType))); + + const auto PointerToDetectedExpr = + WarnOnSizeOfPointer + ? expr(hasType(hasUnqualifiedDesugaredType(pointerType()))) + : expr(anyOf(ArrayCastExpr, PointerToArrayExpr, + PointerToStructExpr)); + const auto ZeroLiteral = ignoringParenImpCasts(integerLiteral(equals(0))); - const auto ArrayOfSamePointersZeroSubscriptExpr = - ignoringParenImpCasts(arraySubscriptExpr( - hasBase(ArrayOfSamePointersExpr), hasIndex(ZeroLiteral))); - const auto ArrayLengthExprDenom = - expr(hasParent(binaryOperator(hasOperatorName("/"), - hasLHS(ignoringParenImpCasts(sizeOfExpr( - has(ArrayOfPointersExpr)))))), - sizeOfExpr(has(ArrayOfSamePointersZeroSubscriptExpr))); + const auto SubscriptExprWithZeroIndex = + arraySubscriptExpr(hasIndex(ZeroLiteral)); + const auto DerefExpr = + ignoringParenImpCasts(unaryOperator(hasOperatorName("*"))); Finder->addMatcher( - expr(sizeOfExpr(anyOf( - has(ignoringParenImpCasts(anyOf( - ArrayCastExpr, PointerToArrayExpr, PointerToStructExpr))), - has(PointerToStructType))), - unless(ArrayLengthExprDenom)) - .bind("sizeof-pointer-to-aggregate"), + expr(sizeOfExpr(anyOf(has(ignoringParenImpCasts( + expr(PointerToDetectedExpr, unless(DerefExpr), + unless(SubscriptExprWithZeroIndex), + unless(VarWithConstStrLiteralDecl), + unless(cxxThisExpr())))), + has(PointerToStructTypeWithBinding)))) + .bind("sizeof-pointer"), this); } @@ -292,11 +306,17 @@ void SizeofExpressionCheck::check(const MatchFinder::MatchResult &Result) { diag(E->getBeginLoc(), "suspicious usage of 'sizeof(char*)'; do you mean 'strlen'?") << E->getSourceRange(); - } else if (const auto *E = - Result.Nodes.getNodeAs("sizeof-pointer-to-aggregate")) { - diag(E->getBeginLoc(), - "suspicious usage of 'sizeof(A*)'; pointer to aggregate") - << E->getSourceRange(); + } else if (const auto *E = Result.Nodes.getNodeAs("sizeof-pointer")) { + if (Result.Nodes.getNodeAs("struct-type")) { + diag(E->getBeginLoc(), + "suspicious usage of 'sizeof(A*)' on pointer-to-aggregate type; did " + "you mean 'sizeof(A)'?") + << E->getSourceRange(); + } else { + diag(E->getBeginLoc(), "suspicious usage of 'sizeof()' on an expression " + "that results in a pointer") + << E->getSourceRange(); + } } else if (const auto *E = Result.Nodes.getNodeAs( "sizeof-compare-constant")) { diag(E->getOperatorLoc(), @@ -332,18 +352,23 @@ void SizeofExpressionCheck::check(const MatchFinder::MatchResult &Result) { " numerator is not a multiple of denominator") << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); } else if (NumTy && DenomTy && NumTy == DenomTy) { + // FIXME: This message is wrong, it should not refer to sizeof "pointer" + // usage (and by the way, it would be to clarify all the messages). diag(E->getOperatorLoc(), "suspicious usage of sizeof pointer 'sizeof(T)/sizeof(T)'") << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); - } else if (PointedTy && DenomTy && PointedTy == DenomTy) { - diag(E->getOperatorLoc(), - "suspicious usage of sizeof pointer 'sizeof(T*)/sizeof(T)'") - << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); - } else if (NumTy && DenomTy && NumTy->isPointerType() && - DenomTy->isPointerType()) { - diag(E->getOperatorLoc(), - "suspicious usage of sizeof pointer 'sizeof(P*)/sizeof(Q*)'") - << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); + } else if (!WarnOnSizeOfPointer) { + // When 'WarnOnSizeOfPointer' is enabled, these messages become redundant: + if (PointedTy && DenomTy && PointedTy == DenomTy) { + diag(E->getOperatorLoc(), + "suspicious usage of sizeof pointer 'sizeof(T*)/sizeof(T)'") + << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); + } else if (NumTy && DenomTy && NumTy->isPointerType() && + DenomTy->isPointerType()) { + diag(E->getOperatorLoc(), + "suspicious usage of sizeof pointer 'sizeof(P*)/sizeof(Q*)'") + << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); + } } } else if (const auto *E = Result.Nodes.getNodeAs("sizeof-sizeof-expr")) { diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h index 55becdd4ecdba1..9ca17bc9e6f124 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h @@ -30,6 +30,7 @@ class SizeofExpressionCheck : public ClangTidyCheck { const bool WarnOnSizeOfThis; const bool WarnOnSizeOfCompareToConstant; const bool WarnOnSizeOfPointerToAggregate; + const bool WarnOnSizeOfPointer; }; } // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py b/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py index d96b3450fdbe81..b048460abf2fca 100755 --- a/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py +++ b/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py @@ -242,7 +242,7 @@ def main(): filename = None lines_by_file = {} for line in sys.stdin: - match = re.search('^\+\+\+\ "?(.*?/){%s}([^ \t\n"]*)' % args.p, line) + match = re.search('^\\+\\+\\+\\ "?(.*?/){%s}([^ \t\n"]*)' % args.p, line) if match: filename = match.group(2) if filename is None: @@ -255,7 +255,7 @@ def main(): if not re.match("^%s$" % args.iregex, filename, re.IGNORECASE): continue - match = re.search("^@@.*\+(\d+)(,(\d+))?", line) + match = re.search(r"^@@.*\+(\d+)(,(\d+))?", line) if match: start_line = int(match.group(1)) line_count = 1 diff --git a/clang-tools-extra/clangd/index/remote/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/CMakeLists.txt index ed6269d2ccaa98..106bbeff84ccf3 100644 --- a/clang-tools-extra/clangd/index/remote/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/CMakeLists.txt @@ -26,7 +26,6 @@ if (CLANGD_ENABLE_REMOTE) clangdRemoteIndexProto clangdRemoteIndexServiceProto clangdRemoteMarshalling - clangBasic clangDaemon clangdSupport @@ -35,6 +34,11 @@ if (CLANGD_ENABLE_REMOTE) clangdRemoteIndexServiceProto ) + clang_target_link_libraries(clangdRemoteIndex + PRIVATE + clangBasic + ) + add_subdirectory(marshalling) add_subdirectory(server) add_subdirectory(monitor) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 0c0c10605a8307..2dc39d0ad74af8 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -237,6 +237,12 @@ Changes in existing checks ` check by eliminating false positives resulting from use of optionals in unevaluated context. +- Improved :doc:`bugprone-sizeof-expression + ` check by eliminating some + false positives and adding a new (off-by-default) option + `WarnOnSizeOfPointer` that reports all ``sizeof(pointer)`` expressions + (except for a few that are idiomatic). + - Improved :doc:`bugprone-suspicious-include ` check by replacing the local options `HeaderFileExtensions` and `ImplementationFileExtensions` by the diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst index c37df1706eb4e1..ed5bb4fbb89baf 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst @@ -190,6 +190,15 @@ Options .. option:: WarnOnSizeOfPointerToAggregate - When `true`, the check will warn on an expression like - ``sizeof(expr)`` where the expression is a pointer - to aggregate. Default is `true`. + When `true`, the check will warn when the argument of ``sizeof`` is either a + pointer-to-aggregate type, an expression returning a pointer-to-aggregate + value or an expression that returns a pointer from an array-to-pointer + conversion (that may be implicit or explicit, for example ``array + 2`` or + ``(int *)array``). Default is `true`. + +.. option:: WarnOnSizeOfPointer + + When `true`, the check will report all expressions where the argument of + ``sizeof`` is an expression that produces a pointer (except for a few + idiomatic expressions that are probably intentional and correct). + This detects occurrences of CWE 467. Default is `false`. diff --git a/clang-tools-extra/docs/clang-tidy/checks/clang-analyzer/cplusplus.ArrayDelete.rst b/clang-tools-extra/docs/clang-tidy/checks/clang-analyzer/cplusplus.ArrayDelete.rst new file mode 100644 index 00000000000000..98147aaaa6883e --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/clang-analyzer/cplusplus.ArrayDelete.rst @@ -0,0 +1,14 @@ +.. title:: clang-tidy - clang-analyzer-cplusplus.ArrayDelete +.. meta:: + :http-equiv=refresh: 5;URL=https://clang.llvm.org/docs/analyzer/checkers.html#cplusplus-arraydelete + +clang-analyzer-cplusplus.ArrayDelete +==================================== + +Reports destructions of arrays of polymorphic objects that are destructed as +their base class. + +The `clang-analyzer-cplusplus.ArrayDelete` check is an alias, please see +`Clang Static Analyzer Available Checkers +`_ +for more information. diff --git a/clang-tools-extra/docs/clang-tidy/checks/clang-analyzer/security.SetgidSetuidOrder.rst b/clang-tools-extra/docs/clang-tidy/checks/clang-analyzer/security.SetgidSetuidOrder.rst new file mode 100644 index 00000000000000..82f22b11f77fb4 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/clang-analyzer/security.SetgidSetuidOrder.rst @@ -0,0 +1,10 @@ +.. title:: clang-tidy - clang-analyzer-security.SetgidSetuidOrder + +clang-analyzer-security.SetgidSetuidOrder +========================================= + +Warn on possible reversed order of 'setgid(getgid()))' and 'setuid(getuid())' +(CERT: POS36-C). + +The clang-analyzer-security.SetgidSetuidOrder check is an alias of +Clang Static Analyzer security.SetgidSetuidOrder. diff --git a/clang-tools-extra/docs/clang-tidy/checks/clang-analyzer/unix.Stream.rst b/clang-tools-extra/docs/clang-tidy/checks/clang-analyzer/unix.Stream.rst new file mode 100644 index 00000000000000..82a8bdcaefce79 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/clang-analyzer/unix.Stream.rst @@ -0,0 +1,13 @@ +.. title:: clang-tidy - clang-analyzer-unix.Stream +.. meta:: + :http-equiv=refresh: 5;URL=https://clang.llvm.org/docs/analyzer/checkers.html#unix-stream + +clang-analyzer-unix.Stream +========================== + +Check stream handling functions. + +The `clang-analyzer-unix.Stream` check is an alias, please see +`Clang Static Analyzer Available Checkers +`_ +for more information. diff --git a/clang-tools-extra/docs/clang-tidy/checks/gen-static-analyzer-docs.py b/clang-tools-extra/docs/clang-tidy/checks/gen-static-analyzer-docs.py index 6545a3906fa50e..fba1592c7c1c75 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/gen-static-analyzer-docs.py +++ b/clang-tools-extra/docs/clang-tidy/checks/gen-static-analyzer-docs.py @@ -47,7 +47,7 @@ def get_checkers(checkers_td, checkers_rst): parent_package_ = package["ParentPackage"] hidden = (checker["Hidden"] != 0) or (package["Hidden"] != 0) - while parent_package_ != None: + while parent_package_ is not None: parent_package = table_entries[parent_package_["def"]] checker_package_prefix = ( parent_package["PackageName"] + "." + checker_package_prefix @@ -59,7 +59,7 @@ def get_checkers(checkers_td, checkers_rst): "clang-analyzer-" + checker_package_prefix + "." + checker_name ) anchor_url = re.sub( - "\.", "-", checker_package_prefix + "." + checker_name + r"\.", "-", checker_package_prefix + "." + checker_name ).lower() if not hidden and "alpha" not in full_package_name.lower(): @@ -130,7 +130,7 @@ def generate_documentation(checker, has_documentation): def update_documentation_list(checkers): with open(os.path.join(__location__, "list.rst"), "r+") as f: f_text = f.read() - check_text = f_text.split(".. csv-table:: Aliases..\n")[1] + check_text = f_text.split(':header: "Name", "Redirect", "Offers fixes"\n')[1] checks = [x for x in check_text.split("\n") if ":header:" not in x and x] old_check_text = "\n".join(checks) checks = [x for x in checks if "clang-analyzer-" not in x] diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 87d3db20f76847..a698cecc0825c6 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -443,6 +443,7 @@ Check aliases :doc:`clang-analyzer-core.uninitialized.CapturedBlockVariable `, `Clang Static Analyzer core.uninitialized.CapturedBlockVariable `_, :doc:`clang-analyzer-core.uninitialized.NewArraySize `, `Clang Static Analyzer core.uninitialized.NewArraySize `_, :doc:`clang-analyzer-core.uninitialized.UndefReturn `, `Clang Static Analyzer core.uninitialized.UndefReturn `_, + :doc:`clang-analyzer-cplusplus.ArrayDelete `, `Clang Static Analyzer cplusplus.ArrayDelete `_, :doc:`clang-analyzer-cplusplus.InnerPointer `, `Clang Static Analyzer cplusplus.InnerPointer `_, :doc:`clang-analyzer-cplusplus.Move `, Clang Static Analyzer cplusplus.Move, :doc:`clang-analyzer-cplusplus.NewDelete `, `Clang Static Analyzer cplusplus.NewDelete `_, @@ -497,6 +498,7 @@ Check aliases :doc:`clang-analyzer-osx.coreFoundation.containers.OutOfBounds `, `Clang Static Analyzer osx.coreFoundation.containers.OutOfBounds `_, :doc:`clang-analyzer-osx.coreFoundation.containers.PointerSizedValues `, `Clang Static Analyzer osx.coreFoundation.containers.PointerSizedValues `_, :doc:`clang-analyzer-security.FloatLoopCounter `, `Clang Static Analyzer security.FloatLoopCounter `_, + :doc:`clang-analyzer-security.SetgidSetuidOrder `, Clang Static Analyzer security.SetgidSetuidOrder, :doc:`clang-analyzer-security.cert.env.InvalidPtr `, `Clang Static Analyzer security.cert.env.InvalidPtr `_, :doc:`clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling `, `Clang Static Analyzer security.insecureAPI.DeprecatedOrUnsafeBufferHandling `_, :doc:`clang-analyzer-security.insecureAPI.UncheckedReturn `, `Clang Static Analyzer security.insecureAPI.UncheckedReturn `_, @@ -517,6 +519,7 @@ Check aliases :doc:`clang-analyzer-unix.MallocSizeof `, `Clang Static Analyzer unix.MallocSizeof `_, :doc:`clang-analyzer-unix.MismatchedDeallocator `, `Clang Static Analyzer unix.MismatchedDeallocator `_, :doc:`clang-analyzer-unix.StdCLibraryFunctions `, `Clang Static Analyzer unix.StdCLibraryFunctions `_, + :doc:`clang-analyzer-unix.Stream `, `Clang Static Analyzer unix.Stream `_, :doc:`clang-analyzer-unix.Vfork `, `Clang Static Analyzer unix.Vfork `_, :doc:`clang-analyzer-unix.cstring.BadSizeArg `, `Clang Static Analyzer unix.cstring.BadSizeArg `_, :doc:`clang-analyzer-unix.cstring.NullArg `, `Clang Static Analyzer unix.cstring.NullArg `_, diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt index f92f79be121508..a13b5d20cf7c3b 100644 --- a/clang-tools-extra/pseudo/lib/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -14,8 +14,6 @@ add_clang_library(clangPseudo Token.cpp LINK_LIBS - clangBasic - clangLex clangPseudoGrammar DEPENDS @@ -25,3 +23,9 @@ add_clang_library(clangPseudo target_include_directories(clangPseudo INTERFACE $ ) + +clang_target_link_libraries(clangPseudo + PRIVATE + clangBasic + clangLex + ) diff --git a/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt index d56d16c893c3d4..2fecdce6a10f9c 100644 --- a/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt @@ -9,7 +9,11 @@ add_clang_library(clangPseudoCXX cxx_gen LINK_LIBS - clangBasic clangPseudo clangPseudoGrammar ) + +clang_target_link_libraries(clangPseudoCXX + PRIVATE + clangBasic + ) diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression-2.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression-2.c index 8c4feb8f86169b..aef930f2c8fda7 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression-2.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression-2.c @@ -34,24 +34,24 @@ int Test5() { int sum = 0; sum += sizeof(&S); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(__typeof(&S)); sum += sizeof(&TS); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(__typeof(&TS)); sum += sizeof(STRKWD MyStruct*); sum += sizeof(__typeof(STRKWD MyStruct*)); sum += sizeof(TypedefStruct*); sum += sizeof(__typeof(TypedefStruct*)); sum += sizeof(PTTS); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(PMyStruct); sum += sizeof(PS); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(PS2); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(&A10); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer #ifdef __cplusplus MyStruct &rS = S; diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression-any-pointer.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression-any-pointer.cpp new file mode 100644 index 00000000000000..bfb2ec3a9eb02c --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression-any-pointer.cpp @@ -0,0 +1,241 @@ +// RUN: %check_clang_tidy %s bugprone-sizeof-expression %t -- -config="{CheckOptions: {bugprone-sizeof-expression.WarnOnSizeOfIntegerExpression: true, bugprone-sizeof-expression.WarnOnSizeOfPointer: true}}" -- + +class C { + int size() { return sizeof(this); } + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: suspicious usage of 'sizeof(this)' +}; + +#define LEN 8 + +int X; +extern int A[10]; +extern short B[10]; + +#pragma pack(1) +struct S { char a, b, c; }; + +enum E { E_VALUE = 0 }; +enum class EC { VALUE = 0 }; + +bool AsBool() { return false; } +int AsInt() { return 0; } +E AsEnum() { return E_VALUE; } +EC AsEnumClass() { return EC::VALUE; } +S AsStruct() { return {}; } + +struct M { + int AsInt() { return 0; } + E AsEnum() { return E_VALUE; } + S AsStruct() { return {}; } +}; + +int Test1(const char* ptr) { + int sum = 0; + sum += sizeof(LEN); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(K)' + sum += sizeof(LEN + 1); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(K)' + sum += sizeof(sum, LEN); + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: suspicious usage of 'sizeof(..., ...)' + sum += sizeof(AsBool()); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in an integer + sum += sizeof(AsInt()); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in an integer + sum += sizeof(AsEnum()); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in an integer + sum += sizeof(AsEnumClass()); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in an integer + sum += sizeof(M{}.AsInt()); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in an integer + sum += sizeof(M{}.AsEnum()); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in an integer + sum += sizeof(sizeof(X)); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(sizeof(...))' + sum += sizeof(LEN + sizeof(X)); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(sizeof(...))' + sum += sizeof(LEN + LEN + sizeof(X)); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(sizeof(...))' + sum += sizeof(LEN + (LEN + sizeof(X))); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(sizeof(...))' + sum += sizeof(LEN + -sizeof(X)); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(sizeof(...))' + sum += sizeof(LEN + - + -sizeof(X)); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(sizeof(...))' + sum += sizeof(char) / sizeof(char); + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: suspicious usage of sizeof pointer 'sizeof(T)/sizeof(T)' + sum += sizeof(A) / sizeof(S); + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: suspicious usage of 'sizeof(...)/sizeof(...)'; numerator is not a multiple of denominator + sum += sizeof(char) / sizeof(int); + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: suspicious usage of 'sizeof(...)/sizeof(...)'; numerator is not a multiple of denominator + sum += sizeof(char) / sizeof(A); + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: suspicious usage of 'sizeof(...)/sizeof(...)'; numerator is not a multiple of denominator + sum += sizeof(B[0]) / sizeof(A); + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: suspicious usage of 'sizeof(...)/sizeof(...)'; numerator is not a multiple of denominator + sum += sizeof(ptr) / sizeof(char); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(ptr) / sizeof(ptr[0]); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(ptr) / sizeof(char*); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(ptr) / sizeof(void*); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(ptr) / sizeof(const void volatile*); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(ptr) / sizeof(char); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(int) * sizeof(char); + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious 'sizeof' by 'sizeof' multiplication + sum += sizeof(ptr) * sizeof(ptr[0]); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + // CHECK-MESSAGES: :[[@LINE-2]]:22: warning: suspicious 'sizeof' by 'sizeof' multiplication + sum += sizeof(int) * (2 * sizeof(char)); + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious 'sizeof' by 'sizeof' multiplication + sum += (2 * sizeof(char)) * sizeof(int); + // CHECK-MESSAGES: :[[@LINE-1]]:29: warning: suspicious 'sizeof' by 'sizeof' multiplication + if (sizeof(A) < 0x100000) sum += 42; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: suspicious comparison of 'sizeof(expr)' to a constant + if (sizeof(A) <= 0xFFFFFFFEU) sum += 42; + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: suspicious comparison of 'sizeof(expr)' to a constant + return sum; +} + +int Test5() { + typedef int Array10[10]; + typedef C ArrayC[10]; + + struct MyStruct { + Array10 arr; + Array10* ptr; + }; + typedef const MyStruct TMyStruct; + typedef const MyStruct *PMyStruct; + typedef TMyStruct *PMyStruct2; + + static TMyStruct kGlocalMyStruct = {}; + static TMyStruct volatile * kGlocalMyStructPtr = &kGlocalMyStruct; + + MyStruct S; + PMyStruct PS; + PMyStruct2 PS2; + Array10 A10; + C *PtrArray[10]; + C *PC; + + char *PChar; + int *PInt, **PPInt; + MyStruct **PPMyStruct; + + int sum = 0; + sum += sizeof(&S.arr); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(&kGlocalMyStruct.arr); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(&kGlocalMyStructPtr->arr); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(S.arr + 0); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(+ S.arr); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof((int*)S.arr); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + + sum += sizeof(S.ptr); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(kGlocalMyStruct.ptr); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(kGlocalMyStructPtr->ptr); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + + sum += sizeof(&kGlocalMyStruct); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(&S); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(MyStruct*); + sum += sizeof(PMyStruct); + sum += sizeof(PS); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(PS2); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(&A10); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(PtrArray) / sizeof(PtrArray[1]); + // CHECK-MESSAGES: :[[@LINE-1]]:29: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(A10) / sizeof(PtrArray[0]); + sum += sizeof(PC) / sizeof(PtrArray[0]); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + // CHECK-MESSAGES: :[[@LINE-2]]:21: warning: suspicious usage of sizeof pointer 'sizeof(T)/sizeof(T)' + sum += sizeof(ArrayC) / sizeof(PtrArray[0]); + // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: suspicious usage of 'sizeof(...)/sizeof(...)'; numerator is not a multiple of denominator + + sum += sizeof(PChar); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(PInt); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(PPInt); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(PPMyStruct); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + + return sum; +} + +void some_generic_function(const void *arg, int argsize); +int *IntP, **IntPP; +C *ClassP, **ClassPP; + +void GenericFunctionTest() { + // The `sizeof(pointer)` checks ignore situations where the pointer is + // produced by dereferencing a pointer-to-pointer, because this is unlikely + // to be an accident and can appear in legitimate code that tries to call + // a generic function which emulates dynamic typing within C. + some_generic_function(IntPP, sizeof(*IntPP)); + some_generic_function(ClassPP, sizeof(*ClassPP)); + // Using `...[0]` instead of the dereference operator is another common + // variant, which is also widespread in the idiomatic array-size calculation: + // `sizeof(array) / sizeof(array[0])`. + some_generic_function(IntPP, sizeof(IntPP[0])); + some_generic_function(ClassPP, sizeof(ClassPP[0])); + // FIXME: There is a third common pattern where the generic function is + // called with `&Variable` and `sizeof(Variable)`. Right now these are + // reported by the `sizeof(pointer)` checks, but this causes some false + // positives, so it would be good to create an exception for them. + some_generic_function(&IntPP, sizeof(IntP)); + // CHECK-MESSAGES: :[[@LINE-1]]:33: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + some_generic_function(&ClassPP, sizeof(ClassP)); + // CHECK-MESSAGES: :[[@LINE-1]]:35: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer +} + +int ValidExpressions() { + int A[] = {1, 2, 3, 4}; + static const char str[] = "hello"; + static const char* ptr[] { "aaa", "bbb", "ccc" }; + typedef C *CA10[10]; + C *PtrArray[10]; + CA10 PtrArray1; + + int sum = 0; + if (sizeof(A) < 10) + sum += sizeof(A); + sum += sizeof(int); + sum += sizeof(AsStruct()); + sum += sizeof(M{}.AsStruct()); + sum += sizeof(A[sizeof(A) / sizeof(int)]); + // Here the outer sizeof is reported, but the inner ones are accepted: + sum += sizeof(&A[sizeof(A) / sizeof(int)]); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer + sum += sizeof(sizeof(0)); // Special case: sizeof size_t. + sum += sizeof(void*); + sum += sizeof(void const *); + sum += sizeof(void const *) / 4; + sum += sizeof(str); + sum += sizeof(str) / sizeof(char); + sum += sizeof(str) / sizeof(str[0]); + sum += sizeof(ptr) / sizeof(ptr[0]); + sum += sizeof(ptr) / sizeof(*(ptr)); + sum += sizeof(PtrArray) / sizeof(PtrArray[0]); + // Canonical type of PtrArray1 is same as PtrArray. + sum = sizeof(PtrArray) / sizeof(PtrArray1[0]); + // There is no warning for 'sizeof(T*)/sizeof(Q)' case. + sum += sizeof(PtrArray) / sizeof(A[0]); + return sum; +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression.cpp index 003a02209c3d2d..064f31cb08c6b3 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression.cpp @@ -124,8 +124,6 @@ int Test1(const char* ptr) { // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious usage of sizeof pointer 'sizeof(P*)/sizeof(Q*)' sum += sizeof(ptr) / sizeof(char); // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious usage of sizeof pointer 'sizeof(T*)/sizeof(T)' - sum += sizeof(ptr) / sizeof(ptr[0]); - // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious usage of sizeof pointer 'sizeof(T*)/sizeof(T)' sum += sizeof(int) * sizeof(char); // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious 'sizeof' by 'sizeof' multiplication sum += sizeof(ptr) * sizeof(ptr[0]); @@ -207,50 +205,57 @@ int Test5() { C *PtrArray[10]; C *PC; + char *PChar; + int *PInt, **PPInt; + MyStruct **PPMyStruct; + int sum = 0; sum += sizeof(&S.arr); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(&kGlocalMyStruct.arr); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(&kGlocalMyStructPtr->arr); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(S.arr + 0); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(+ S.arr); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof((int*)S.arr); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(S.ptr); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(kGlocalMyStruct.ptr); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(kGlocalMyStructPtr->ptr); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(&kGlocalMyStruct); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(&S); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(MyStruct*); sum += sizeof(PMyStruct); sum += sizeof(PS); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(PS2); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(&A10); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(PtrArray) / sizeof(PtrArray[1]); - // CHECK-MESSAGES: :[[@LINE-1]]:29: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:29: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer sum += sizeof(A10) / sizeof(PtrArray[0]); - // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate sum += sizeof(PC) / sizeof(PtrArray[0]); - // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer // CHECK-MESSAGES: :[[@LINE-2]]:21: warning: suspicious usage of sizeof pointer 'sizeof(T)/sizeof(T)' - // CHECK-MESSAGES: :[[@LINE-3]]:23: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate sum += sizeof(ArrayC) / sizeof(PtrArray[0]); // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: suspicious usage of 'sizeof(...)/sizeof(...)'; numerator is not a multiple of denominator - // CHECK-MESSAGES: :[[@LINE-2]]:27: warning: suspicious usage of 'sizeof(A*)'; pointer to aggregate + + // These pointers do not point to aggregate types, so they are not reported in this mode: + sum += sizeof(PChar); + sum += sizeof(PInt); + sum += sizeof(PPInt); + sum += sizeof(PPMyStruct); return sum; } @@ -293,6 +298,32 @@ bool Baz() { return sizeof(A) < N; } // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: suspicious comparison of 'sizeof(expr)' to a constant bool Test7() { return Baz<-1>(); } +void some_generic_function(const void *arg, int argsize); +int *IntP, **IntPP; +C *ClassP, **ClassPP; + +void GenericFunctionTest() { + // The `sizeof(pointer)` checks ignore situations where the pointer is + // produced by dereferencing a pointer-to-pointer, because this is unlikely + // to be an accident and can appear in legitimate code that tries to call + // a generic function which emulates dynamic typing within C. + some_generic_function(IntPP, sizeof(*IntPP)); + some_generic_function(ClassPP, sizeof(*ClassPP)); + // Using `...[0]` instead of the dereference operator is another common + // variant, which is also widespread in the idiomatic array-size calculation: + // `sizeof(array) / sizeof(array[0])`. + some_generic_function(IntPP, sizeof(IntPP[0])); + some_generic_function(ClassPP, sizeof(ClassPP[0])); + // FIXME: There is a third common pattern where the generic function is + // called with `&Variable` and `sizeof(Variable)`. Right now these are + // reported by the `sizeof(pointer)` checks, but this causes some false + // positives, so it would be good to create an exception for them. + // NOTE: `sizeof(IntP)` is only reported with `WarnOnSizeOfPointer=true`. + some_generic_function(&IntPP, sizeof(IntP)); + some_generic_function(&ClassPP, sizeof(ClassP)); + // CHECK-MESSAGES: :[[@LINE-1]]:35: warning: suspicious usage of 'sizeof()' on an expression that results in a pointer +} + int ValidExpressions() { int A[] = {1, 2, 3, 4}; static const char str[] = "hello"; diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 2ac0bccb42f50d..c6496167d3828b 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -350,7 +350,9 @@ if (LLVM_COMPILER_IS_GCC_COMPATIBLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wno-long-long") endif () - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-nested-anon-types" ) + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-nested-anon-types" ) + endif () endif () # Determine HOST_LINK_VERSION on Darwin. @@ -848,23 +850,17 @@ if (CLANG_ENABLE_BOOTSTRAP) set(CLANG_BOOTSTRAP_TARGETS check-llvm check-clang check-all) endif() foreach(target ${CLANG_BOOTSTRAP_TARGETS}) - # Install targets have side effects, so we always want to execute them. - # "install" is reserved by CMake and can't be used as a step name for - # ExternalProject_Add_Step, so we can match against "^install-" instead of - # "^install" to get a tighter match. CMake's installation scripts already - # skip up-to-date files, so there's no behavior change if you install to the - # same destination multiple times. - if(target MATCHES "^install-") - set(step_always ON) - else() - set(step_always OFF) - endif() ExternalProject_Add_Step(${NEXT_CLANG_STAGE} ${target} COMMAND ${CMAKE_COMMAND} --build --target ${target} COMMENT "Performing ${target} for '${NEXT_CLANG_STAGE}'" DEPENDEES configure - ALWAYS ${step_always} + # We need to set ALWAYS to ON here, otherwise these targets won't be + # built on a second invocation of ninja. The targets have their own + # logic to determine if they should build or not so setting ALWAYS ON + # here does not mean the targets will always rebuild it just means that + # they will check their dependenices and see if they need to be built. + ALWAYS ON EXCLUDE_FROM_MAIN ON USES_TERMINAL 1 ) diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake index 1dfb1bc535bf14..9e6feb479d45fc 100644 --- a/clang/cmake/caches/Release.cmake +++ b/clang/cmake/caches/Release.cmake @@ -30,7 +30,7 @@ endfunction() # # cmake -D LLVM_RELEASE_ENABLE_PGO=ON -C Release.cmake set(LLVM_RELEASE_ENABLE_LTO THIN CACHE STRING "") -set(LLVM_RELEASE_ENABLE_PGO OFF CACHE BOOL "") +set(LLVM_RELEASE_ENABLE_PGO ON CACHE BOOL "") set(LLVM_RELEASE_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") set(LLVM_RELEASE_ENABLE_PROJECTS "clang;lld;lldb;clang-tools-extra;bolt;polly;mlir;flang" CACHE STRING "") # Note we don't need to add install here, since it is one of the pre-defined diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt index dee51e402b687f..4866bd4aee634f 100644 --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -622,6 +622,7 @@ clang/tools/libclang/CXCursor.h clang/tools/scan-build-py/tests/functional/src/include/clean-one.h clang/unittests/Analysis/CFGBuildResult.h clang/unittests/Analysis/MacroExpansionContextTest.cpp +clang/unittests/Analysis/FlowSensitive/ASTOpsTest.cpp clang/unittests/Analysis/FlowSensitive/CNFFormula.cpp clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 8bce4812f0d482..a1d1d1c51cd417 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -3203,6 +3203,9 @@ class ASTContext : public RefCountedBase { /// valid feature names. ParsedTargetAttr filterFunctionTargetAttrs(const TargetAttr *TD) const; + std::vector + filterFunctionTargetVersionAttrs(const TargetVersionAttr *TV) const; + void getFunctionFeatureMap(llvm::StringMap &FeatureMap, const FunctionDecl *) const; void getFunctionFeatureMap(llvm::StringMap &FeatureMap, diff --git a/clang/include/clang/AST/CommentCommands.td b/clang/include/clang/AST/CommentCommands.td index e839031752cdd8..06b2fa9b5531c6 100644 --- a/clang/include/clang/AST/CommentCommands.td +++ b/clang/include/clang/AST/CommentCommands.td @@ -132,9 +132,9 @@ def Tparam : BlockCommand<"tparam"> { let IsTParamCommand = 1; } // HeaderDoc command for template parameter documentation. def Templatefield : BlockCommand<"templatefield"> { let IsTParamCommand = 1; } -def Throws : BlockCommand<"throws"> { let IsThrowsCommand = 1; } -def Throw : BlockCommand<"throw"> { let IsThrowsCommand = 1; } -def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; } +def Throws : BlockCommand<"throws"> { let IsThrowsCommand = 1; let NumArgs = 1; } +def Throw : BlockCommand<"throw"> { let IsThrowsCommand = 1; let NumArgs = 1; } +def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; let NumArgs = 1;} def Deprecated : BlockCommand<"deprecated"> { let IsEmptyParagraphAllowed = 1; diff --git a/clang/include/clang/AST/CommentParser.h b/clang/include/clang/AST/CommentParser.h index e11e818b1af0a1..a2d0e30835e2c4 100644 --- a/clang/include/clang/AST/CommentParser.h +++ b/clang/include/clang/AST/CommentParser.h @@ -100,6 +100,11 @@ class Parser { ArrayRef parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs); + /// Parse arguments for \throws command supported args are in form of class + /// or template. + ArrayRef + parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs); + BlockCommandComment *parseBlockCommand(); InlineCommandComment *parseInlineCommand(); diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h index 7bdb9052e57e74..e99c5b2466334a 100644 --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h @@ -330,9 +330,9 @@ class CapabilityExpr { bool shouldIgnore() const { return sexpr() == nullptr; } - bool isInvalid() const { return sexpr() && isa(sexpr()); } + bool isInvalid() const { return isa_and_nonnull(sexpr()); } - bool isUniversal() const { return sexpr() && isa(sexpr()); } + bool isUniversal() const { return isa_and_nonnull(sexpr()); } }; // Translate clang::Expr to til::SExpr. diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 11982af3fa609b..7bef5fd7ad40f2 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -482,11 +482,11 @@ def SqrtF16F128 : Builtin, F16F128MathTemplate { let Prototype = "T(T)"; } -def TanF128 : Builtin { - let Spellings = ["__builtin_tanf128"]; +def TanF16F128 : Builtin, F16F128MathTemplate { + let Spellings = ["__builtin_tan"]; let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions]; - let Prototype = "__float128(__float128)"; + let Prototype = "T(T)"; } def TanhF128 : Builtin { diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index de3aa4b4028eb1..193eae3bc41d61 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9015,6 +9015,11 @@ def err_cuda_ovl_target : Error< "cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">; def note_cuda_ovl_candidate_target_mismatch : Note< "candidate template ignored: target attributes do not match">; +def warn_offload_incompatible_redeclare : Warning< + "target-attribute based function overloads are not supported by NVCC and will be treated as a function redeclaration:" + "new declaration is %select{__device__|__global__|__host__|__host__ __device__}0 function, " + "old declaration is %select{__device__|__global__|__host__|__host__ __device__}1 function">, + InGroup>, DefaultIgnore; def err_cuda_device_builtin_surftex_cls_template : Error< "illegal device builtin %select{surface|texture}0 reference " diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def index b762e44e755ec4..53f410d3cb4bde 100644 --- a/clang/include/clang/Basic/Features.def +++ b/clang/include/clang/Basic/Features.def @@ -96,6 +96,7 @@ FEATURE(nullability, true) FEATURE(nullability_on_arrays, true) FEATURE(nullability_on_classes, true) FEATURE(nullability_nullable_result, true) +FEATURE(numerical_stability_sanitizer, LangOpts.Sanitize.has(SanitizerKind::NumericalStability)) FEATURE(memory_sanitizer, LangOpts.Sanitize.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory)) diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def index b228ffd07ee745..bee35e9dca7c39 100644 --- a/clang/include/clang/Basic/Sanitizers.def +++ b/clang/include/clang/Basic/Sanitizers.def @@ -76,6 +76,9 @@ SANITIZER("fuzzer-no-link", FuzzerNoLink) // ThreadSanitizer SANITIZER("thread", Thread) +// Numerical stability sanitizer. +SANITIZER("numerical", NumericalStability) + // LeakSanitizer SANITIZER("leak", Leak) diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h index 07070ec4fc0653..47ef175302679f 100644 --- a/clang/include/clang/Driver/SanitizerArgs.h +++ b/clang/include/clang/Driver/SanitizerArgs.h @@ -103,6 +103,9 @@ class SanitizerArgs { bool needsCfiDiagRt() const; bool needsStatsRt() const { return Stats; } bool needsScudoRt() const { return Sanitizers.has(SanitizerKind::Scudo); } + bool needsNsanRt() const { + return Sanitizers.has(SanitizerKind::NumericalStability); + } bool hasMemTag() const { return hasMemtagHeap() || hasMemtagStack() || hasMemtagGlobals(); diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index c0850a8fa9f7f8..9b1628d2d86f9e 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -1360,7 +1360,7 @@ class Preprocessor { MacroState &S = CurSubmoduleState->Macros[II]; auto *MD = S.getLatest(); - while (MD && isa(MD)) + while (isa_and_nonnull(MD)) MD = MD->getPrevious(); return MacroDefinition(dyn_cast_or_null(MD), S.getActiveModuleMacros(*this, II), diff --git a/clang/include/clang/Sema/SemaObjC.h b/clang/include/clang/Sema/SemaObjC.h index 91430797e5ed82..bb8887691ce5d3 100644 --- a/clang/include/clang/Sema/SemaObjC.h +++ b/clang/include/clang/Sema/SemaObjC.h @@ -383,7 +383,7 @@ class SemaObjC : public SemaBase { void AddAnyMethodToGlobalPool(Decl *D); void ActOnStartOfObjCMethodDef(Scope *S, Decl *D); - bool isObjCMethodDecl(Decl *D) { return D && isa(D); } + bool isObjCMethodDecl(Decl *D) { return isa_and_nonnull(D); } /// CheckImplementationIvars - This routine checks if the instance variables /// listed in the implelementation match those listed in the interface. diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h index 151d3e57c1cb81..59805d01be5db7 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -781,7 +781,7 @@ class SymbolicRegion : public SubRegion { : SubRegion(sreg, SymbolicRegionKind), sym(s) { // Because pointer arithmetic is represented by ElementRegion layers, // the base symbol here should not contain any arithmetic. - assert(s && isa(s)); + assert(isa_and_nonnull(s)); assert(s->getType()->isAnyPointerType() || s->getType()->isReferenceType() || s->getType()->isBlockPointerType()); diff --git a/clang/lib/ARCMigrate/TransUnbridgedCasts.cpp b/clang/lib/ARCMigrate/TransUnbridgedCasts.cpp index 1e6354f71e294a..7390ea17c8a4b6 100644 --- a/clang/lib/ARCMigrate/TransUnbridgedCasts.cpp +++ b/clang/lib/ARCMigrate/TransUnbridgedCasts.cpp @@ -371,7 +371,7 @@ class UnbridgedCastRewriter : public RecursiveASTVisitor{ Stmt *parent = E; do { parent = StmtMap->getParentIgnoreParenImpCasts(parent); - } while (parent && isa(parent)); + } while (isa_and_nonnull(parent)); if (ReturnStmt *retS = dyn_cast_or_null(parent)) { std::string note = "remove the cast and change return type of function " diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index cd76b8aa271dab..bf74e56a14799c 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -87,7 +87,6 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/TargetParser/AArch64TargetParser.h" #include "llvm/TargetParser/Triple.h" #include #include @@ -13664,20 +13663,17 @@ QualType ASTContext::getCorrespondingSignedFixedPointType(QualType Ty) const { } } -// Given a list of FMV features, return a concatenated list of the -// corresponding backend features (which may contain duplicates). -static std::vector getFMVBackendFeaturesFor( - const llvm::SmallVectorImpl &FMVFeatStrings) { - std::vector BackendFeats; - for (StringRef F : FMVFeatStrings) { - if (auto FMVExt = llvm::AArch64::parseArchExtension(F)) { - SmallVector Feats; - FMVExt->DependentFeatures.split(Feats, ',', -1, false); - for (StringRef F : Feats) - BackendFeats.push_back(F.str()); - } - } - return BackendFeats; +std::vector ASTContext::filterFunctionTargetVersionAttrs( + const TargetVersionAttr *TV) const { + assert(TV != nullptr); + llvm::SmallVector Feats; + std::vector ResFeats; + TV->getFeatures(Feats); + for (auto &Feature : Feats) + if (Target->validateCpuSupports(Feature.str())) + // Use '?' to mark features that came from TargetVersion. + ResFeats.push_back("?" + Feature.str()); + return ResFeats; } ParsedTargetAttr @@ -13712,12 +13708,10 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap &FeatureMap, // Make a copy of the features as passed on the command line into the // beginning of the additional features from the function to override. - // AArch64 handles command line option features in parseTargetAttr(). - if (!Target->getTriple().isAArch64()) - ParsedAttr.Features.insert( - ParsedAttr.Features.begin(), - Target->getTargetOpts().FeaturesAsWritten.begin(), - Target->getTargetOpts().FeaturesAsWritten.end()); + ParsedAttr.Features.insert( + ParsedAttr.Features.begin(), + Target->getTargetOpts().FeaturesAsWritten.begin(), + Target->getTargetOpts().FeaturesAsWritten.end()); if (ParsedAttr.CPU != "" && Target->isValidCPUName(ParsedAttr.CPU)) TargetCPU = ParsedAttr.CPU; @@ -13738,31 +13732,32 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap &FeatureMap, Target->getTargetOpts().FeaturesAsWritten.end()); Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); } else if (const auto *TC = FD->getAttr()) { + std::vector Features; if (Target->getTriple().isAArch64()) { + // TargetClones for AArch64 llvm::SmallVector Feats; TC->getFeatures(Feats, GD.getMultiVersionIndex()); - std::vector Features = getFMVBackendFeaturesFor(Feats); + for (StringRef Feat : Feats) + if (Target->validateCpuSupports(Feat.str())) + // Use '?' to mark features that came from AArch64 TargetClones. + Features.push_back("?" + Feat.str()); Features.insert(Features.begin(), Target->getTargetOpts().FeaturesAsWritten.begin(), Target->getTargetOpts().FeaturesAsWritten.end()); - Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); } else { - std::vector Features; StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex()); if (VersionStr.starts_with("arch=")) TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1); else if (VersionStr != "default") Features.push_back((StringRef{"+"} + VersionStr).str()); - Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); } - } else if (const auto *TV = FD->getAttr()) { - llvm::SmallVector Feats; - TV->getFeatures(Feats); - std::vector Features = getFMVBackendFeaturesFor(Feats); - Features.insert(Features.begin(), - Target->getTargetOpts().FeaturesAsWritten.begin(), - Target->getTargetOpts().FeaturesAsWritten.end()); Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); + } else if (const auto *TV = FD->getAttr()) { + std::vector Feats = filterFunctionTargetVersionAttrs(TV); + Feats.insert(Feats.begin(), + Target->getTargetOpts().FeaturesAsWritten.begin(), + Target->getTargetOpts().FeaturesAsWritten.end()); + Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Feats); } else { FeatureMap = Target->getTargetOpts().FeatureMap; } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 3b9080e09b3313..02cd4ed9a6cace 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -1505,7 +1505,7 @@ ExpectedType ASTNodeImporter::VisitInjectedClassNameType( // The InjectedClassNameType is created in VisitRecordDecl when the // T->getDecl() is imported. Here we can return the existing type. const Type *Ty = (*ToDeclOrErr)->getTypeForDecl(); - assert(Ty && isa(Ty)); + assert(isa_and_nonnull(Ty)); return QualType(Ty, 0); } diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp index 8adfd85d0160c3..5baf81a509fb60 100644 --- a/clang/lib/AST/CommentParser.cpp +++ b/clang/lib/AST/CommentParser.cpp @@ -89,6 +89,31 @@ class TextTokenRetokenizer { } } + /// Extract a template type + bool lexTemplate(SmallString<32> &WordText) { + unsigned BracketCount = 0; + while (!isEnd()) { + const char C = peek(); + WordText.push_back(C); + consumeChar(); + switch (C) { + case '<': { + BracketCount++; + break; + } + case '>': { + BracketCount--; + if (!BracketCount) + return true; + break; + } + default: + break; + } + } + return false; + } + /// Add a token. /// Returns true on success, false if there are no interesting tokens to /// fetch from lexer. @@ -149,6 +174,54 @@ class TextTokenRetokenizer { addToken(); } + /// Extract a type argument + bool lexType(Token &Tok) { + if (isEnd()) + return false; + + // Save current position in case we need to rollback because the type is + // empty. + Position SavedPos = Pos; + + // Consume any leading whitespace. + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + + while (!isEnd()) { + const char C = peek(); + // For non-whitespace characters we check if it's a template or otherwise + // continue reading the text into a word. + if (!isWhitespace(C)) { + if (C == '<') { + if (!lexTemplate(WordText)) + return false; + } else { + WordText.push_back(C); + consumeChar(); + } + } else { + consumeChar(); + break; + } + } + + const unsigned Length = WordText.size(); + if (Length == 0) { + Pos = SavedPos; + return false; + } + + char *TextPtr = Allocator.Allocate(Length + 1); + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, Length, Text); + return true; + } + /// Extract a word -- sequence of non-whitespace characters. bool lexWord(Token &Tok) { if (isEnd()) @@ -304,6 +377,23 @@ Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) { return llvm::ArrayRef(Args, ParsedArgs); } +ArrayRef +Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, + unsigned NumArgs) { + auto *Args = new (Allocator.Allocate(NumArgs)) + Comment::Argument[NumArgs]; + unsigned ParsedArgs = 0; + Token Arg; + + while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) { + Args[ParsedArgs] = Comment::Argument{ + SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; + ParsedArgs++; + } + + return llvm::ArrayRef(Args, ParsedArgs); +} + BlockCommandComment *Parser::parseBlockCommand() { assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); @@ -356,6 +446,9 @@ BlockCommandComment *Parser::parseBlockCommand() { parseParamCommandArgs(PC, Retokenizer); else if (TPC) parseTParamCommandArgs(TPC, Retokenizer); + else if (Info->IsThrowsCommand) + S.actOnBlockCommandArgs( + BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs)); else S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs)); diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 7f1ed9c691e988..e64a8326e8d5dd 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -1116,7 +1116,7 @@ bool Decl::isInExportDeclContext() const { while (DC && !isa(DC)) DC = DC->getLexicalParent(); - return DC && isa(DC); + return isa_and_nonnull(DC); } bool Decl::isInAnotherModuleUnit() const { diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index f9d634550dc061..7e555689b64c48 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -837,7 +837,7 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK, typedef SmallVector SpecsTy; SpecsTy Specs; const DeclContext *Ctx = FD->getDeclContext(); - while (Ctx && isa(Ctx)) { + while (isa_and_nonnull(Ctx)) { const ClassTemplateSpecializationDecl *Spec = dyn_cast(Ctx); if (Spec && !Spec->isExplicitSpecialization()) @@ -3067,7 +3067,7 @@ Expr *Expr::IgnoreParenCasts() { Expr *Expr::IgnoreConversionOperatorSingleStep() { if (auto *MCE = dyn_cast(this)) { - if (MCE->getMethodDecl() && isa(MCE->getMethodDecl())) + if (isa_and_nonnull(MCE->getMethodDecl())) return MCE->getImplicitObjectArgument(); } return this; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 86fb396fabe2d9..d5057452cec9c5 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -2130,7 +2130,7 @@ static bool IsWeakLValue(const LValue &Value) { static bool isZeroSized(const LValue &Value) { const ValueDecl *Decl = GetLValueBaseDecl(Value); - if (Decl && isa(Decl)) { + if (isa_and_nonnull(Decl)) { QualType Ty = Decl->getType(); if (Ty->isArrayType()) return Ty->isIncompleteType() || diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 6654a27c921689..0899a98b3b95a6 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -318,7 +318,8 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { if (DiscardResult) return this->discard(SubExpr); - std::optional FromT = classify(SubExpr->getType()); + QualType SubExprTy = SubExpr->getType(); + std::optional FromT = classify(SubExprTy); std::optional ToT = classify(CE->getType()); if (!FromT || !ToT) return false; @@ -326,9 +327,14 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { assert(isPtrType(*FromT)); assert(isPtrType(*ToT)); if (FromT == ToT) { - if (SubExpr->getType()->isVoidPointerType()) - return this->visit(SubExpr) && this->emitVoidPtrCast(CE); - return this->delegate(SubExpr); + if (CE->getType()->isVoidPointerType()) + return this->delegate(SubExpr); + + if (!this->visit(SubExpr)) + return false; + if (FromT == PT_Ptr) + return this->emitPtrPtrCast(SubExprTy->isVoidPointerType(), CE); + return true; } if (!this->visit(SubExpr)) diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 0ad710c5ec1afc..784e138e1467d4 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -1980,10 +1980,25 @@ static inline bool CastPointerIntegralAPS(InterpState &S, CodePtr OpPC, return true; } -static inline bool VoidPtrCast(InterpState &S, CodePtr OpPC) { - const SourceInfo &E = S.Current->getSource(OpPC); - S.CCEDiag(E, diag::note_constexpr_invalid_cast) - << 2 << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); +static inline bool PtrPtrCast(InterpState &S, CodePtr OpPC, bool SrcIsVoidPtr) { + const auto &Ptr = S.Stk.peek(); + + if (SrcIsVoidPtr && S.getLangOpts().CPlusPlus) { + bool HasValidResult = !Ptr.isZero(); + + if (HasValidResult) { + // FIXME: note_constexpr_invalid_void_star_cast + } else if (!S.getLangOpts().CPlusPlus26) { + const SourceInfo &E = S.Current->getSource(OpPC); + S.CCEDiag(E, diag::note_constexpr_invalid_cast) + << 3 << "'void *'" << S.Current->getRange(OpPC); + } + } else { + const SourceInfo &E = S.Current->getSource(OpPC); + S.CCEDiag(E, diag::note_constexpr_invalid_cast) + << 2 << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); + } + return true; } diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td index ac5426c87c2123..df362efd8b58b2 100644 --- a/clang/lib/AST/Interp/Opcodes.td +++ b/clang/lib/AST/Interp/Opcodes.td @@ -139,7 +139,6 @@ class AluOpcode : Opcode { } class FloatOpcode : Opcode { - let Types = []; let Args = [ArgRoundingMode]; } @@ -195,17 +194,14 @@ def NoRet : Opcode {} def Call : Opcode { let Args = [ArgFunction, ArgUint32]; - let Types = []; } def CallVirt : Opcode { let Args = [ArgFunction, ArgUint32]; - let Types = []; } def CallBI : Opcode { let Args = [ArgFunction, ArgCallExpr]; - let Types = []; } def CallPtr : Opcode { @@ -214,7 +210,6 @@ def CallPtr : Opcode { def CallVar : Opcode { let Args = [ArgFunction, ArgUint32]; - let Types = []; } def OffsetOf : Opcode { @@ -399,8 +394,6 @@ def InitGlobalTemp : AccessOpcode { // [Pointer] -> [Pointer] def InitGlobalTempComp : Opcode { let Args = [ArgLETD]; - let Types = []; - let HasGroup = 0; } // [Value] -> [] def SetGlobal : AccessOpcode; @@ -505,13 +498,9 @@ def SubPtr : Opcode { } // [Pointer] -> [Pointer] -def IncPtr : Opcode { - let HasGroup = 0; -} +def IncPtr : Opcode; // [Pointer] -> [Pointer] -def DecPtr : Opcode { - let HasGroup = 0; -} +def DecPtr : Opcode; //===----------------------------------------------------------------------===// // Function pointers. @@ -607,7 +596,6 @@ def Cast: Opcode { } def CastFP : Opcode { - let Types = []; let Args = [ArgFltSemantics, ArgRoundingMode]; } @@ -642,12 +630,10 @@ def CastFloatingIntegral : Opcode { } def CastFloatingIntegralAP : Opcode { - let Types = []; let Args = [ArgUint32]; } def CastFloatingIntegralAPS : Opcode { - let Types = []; let Args = [ArgUint32]; } @@ -656,16 +642,15 @@ def CastPointerIntegral : Opcode { let HasGroup = 1; } def CastPointerIntegralAP : Opcode { - let Types = []; - let HasGroup = 0; let Args = [ArgUint32]; } def CastPointerIntegralAPS : Opcode { - let Types = []; - let HasGroup = 0; let Args = [ArgUint32]; } -def VoidPtrCast : Opcode; +def PtrPtrCast : Opcode { + let Args = [ArgBool]; + +} def DecayPtr : Opcode { let Types = [PtrTypeClass, PtrTypeClass]; diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp index 30cff1ba2e6f37..4fbf0e3b42dbc8 100644 --- a/clang/lib/AST/Mangle.cpp +++ b/clang/lib/AST/Mangle.cpp @@ -301,9 +301,8 @@ void MangleContext::mangleBlock(const DeclContext *DC, const BlockDecl *BD, } else { assert((isa(DC) || isa(DC)) && "expected a NamedDecl or BlockDecl"); - if (isa(DC)) - for (; DC && isa(DC); DC = DC->getParent()) - (void) getBlockId(cast(DC), true); + for (; isa_and_nonnull(DC); DC = DC->getParent()) + (void)getBlockId(cast(DC), true); assert((isa(DC) || isa(DC)) && "expected a TranslationUnitDecl or a NamedDecl"); if (const auto *CD = dyn_cast(DC)) diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 36d611750ca48c..ffc5d2d4cd8fc3 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -899,6 +899,8 @@ void MicrosoftCXXNameMangler::mangleFloat(llvm::APFloat Number) { case APFloat::S_Float8E4M3FNUZ: case APFloat::S_Float8E4M3B11FNUZ: case APFloat::S_FloatTF32: + case APFloat::S_Float6E3M2FN: + case APFloat::S_Float6E2M3FN: llvm_unreachable("Tried to mangle unexpected APFloat semantics"); } @@ -2748,7 +2750,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, return; } Out << '@'; - } else if (IsInLambda && D && isa(D)) { + } else if (IsInLambda && isa_and_nonnull(D)) { // The only lambda conversion operators are to function pointers, which // can differ by their calling convention and are typically deduced. So // we make sure that this type gets mangled properly. diff --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp index 3d6a1cc84c7b10..e97cb5e226f5c2 100644 --- a/clang/lib/AST/ParentMap.cpp +++ b/clang/lib/AST/ParentMap.cpp @@ -139,7 +139,9 @@ Stmt* ParentMap::getParent(Stmt* S) const { } Stmt *ParentMap::getParentIgnoreParens(Stmt *S) const { - do { S = getParent(S); } while (S && isa(S)); + do { + S = getParent(S); + } while (isa_and_nonnull(S)); return S; } @@ -155,7 +157,8 @@ Stmt *ParentMap::getParentIgnoreParenCasts(Stmt *S) const { Stmt *ParentMap::getParentIgnoreParenImpCasts(Stmt *S) const { do { S = getParent(S); - } while (S && isa(S) && cast(S)->IgnoreParenImpCasts() != S); + } while (isa_and_nonnull(S) && + cast(S)->IgnoreParenImpCasts() != S); return S; } diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 7e030e05512690..8f51d16b5db037 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -84,7 +84,7 @@ namespace { void PrintStmt(Stmt *S, int SubIndent) { IndentLevel += SubIndent; - if (S && isa(S)) { + if (isa_and_nonnull(S)) { // If this is an expr used in a stmt context, indent and newline it. Indent(); Visit(S); @@ -1939,7 +1939,7 @@ void StmtPrinter::VisitCXXOperatorCallExpr(CXXOperatorCallExpr *Node) { void StmtPrinter::VisitCXXMemberCallExpr(CXXMemberCallExpr *Node) { // If we have a conversion operator call only print the argument. CXXMethodDecl *MD = Node->getMethodDecl(); - if (MD && isa(MD)) { + if (isa_and_nonnull(MD)) { PrintExpr(Node->getImplicitObjectArgument()); return; } diff --git a/clang/lib/Analysis/FlowSensitive/ASTOps.cpp b/clang/lib/Analysis/FlowSensitive/ASTOps.cpp index 38b5f51b7b2f02..27d42a7b508562 100644 --- a/clang/lib/Analysis/FlowSensitive/ASTOps.cpp +++ b/clang/lib/Analysis/FlowSensitive/ASTOps.cpp @@ -100,7 +100,8 @@ getFieldsForInitListExpr(const InitListT *InitList) { std::vector Fields; if (InitList->getType()->isUnionType()) { - Fields.push_back(InitList->getInitializedFieldInUnion()); + if (const FieldDecl *Field = InitList->getInitializedFieldInUnion()) + Fields.push_back(Field); return Fields; } @@ -137,9 +138,11 @@ RecordInitListHelper::RecordInitListHelper( // it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves. SmallVector InitsForUnion; if (Ty->isUnionType() && Inits.empty()) { - assert(Fields.size() == 1); - ImplicitValueInitForUnion.emplace(Fields.front()->getType()); - InitsForUnion.push_back(&*ImplicitValueInitForUnion); + assert(Fields.size() <= 1); + if (!Fields.empty()) { + ImplicitValueInitForUnion.emplace(Fields.front()->getType()); + InitsForUnion.push_back(&*ImplicitValueInitForUnion); + } Inits = InitsForUnion; } diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 0d7967c8b93449..7c88917faf9c65 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -415,7 +415,7 @@ class ResultObjectVisitor : public AnalysisASTVisitor { // below them can initialize the same object (or part of it). if (isa(E) || isa(E) || isa(E) || isa(E) || isa(E) || - isa(E) || + isa(E) || isa(E) || // We treat `BuiltinBitCastExpr` as an "original initializer" too as // it may not even be casting from a record type -- and even if it is, // the two objects are in general of unrelated type. diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 14f4abec0b456d..08d13c41a48572 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -286,7 +286,6 @@ void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts, void AArch64TargetInfo::getTargetDefinesARMV85A(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__ARM_FEATURE_FRINT", "1"); - Builder.defineMacro("__ARM_FEATURE_BTI", "1"); // Also include the Armv8.4 defines getTargetDefinesARMV84A(Opts, Builder); } @@ -499,6 +498,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasPAuthLR) Builder.defineMacro("__ARM_FEATURE_PAUTH_LR", "1"); + if (HasBTI) + Builder.defineMacro("__ARM_FEATURE_BTI", "1"); + if (HasUnalignedAccess) Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1"); @@ -1050,18 +1052,57 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, return true; } +bool AArch64TargetInfo::initFeatureMap( + llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, + const std::vector &FeaturesVec) const { + std::vector UpdatedFeaturesVec; + // Parse the CPU and add any implied features. + std::optional CpuInfo = llvm::AArch64::parseCpu(CPU); + if (CpuInfo) { + auto Exts = CpuInfo->getImpliedExtensions(); + std::vector CPUFeats; + llvm::AArch64::getExtensionFeatures(Exts, CPUFeats); + for (auto F : CPUFeats) { + assert((F[0] == '+' || F[0] == '-') && "Expected +/- in target feature!"); + UpdatedFeaturesVec.push_back(F.str()); + } + } + + // Process target and dependent features. This is done in two loops collecting + // them into UpdatedFeaturesVec: first to add dependent '+'features, second to + // add target '+/-'features that can later disable some of features added on + // the first loop. Function Multi Versioning features begin with '?'. + for (const auto &Feature : FeaturesVec) + if (((Feature[0] == '?' || Feature[0] == '+')) && + AArch64TargetInfo::doesFeatureAffectCodeGen(Feature.substr(1))) { + StringRef DepFeatures = + AArch64TargetInfo::getFeatureDependencies(Feature.substr(1)); + SmallVector AttrFeatures; + DepFeatures.split(AttrFeatures, ","); + for (auto F : AttrFeatures) + UpdatedFeaturesVec.push_back(F.str()); + } + for (const auto &Feature : FeaturesVec) + if (Feature[0] != '?') { + std::string UpdatedFeature = Feature; + if (Feature[0] == '+') { + std::optional Extension = + llvm::AArch64::parseArchExtension(Feature.substr(1)); + if (Extension) + UpdatedFeature = Extension->Feature.str(); + } + UpdatedFeaturesVec.push_back(UpdatedFeature); + } + + return TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec); +} + // Parse AArch64 Target attributes, which are a comma separated list of: // "arch=" - parsed to features as per -march=.. // "cpu=" - parsed to features as per -mcpu=.., with CPU set to // "tune=" - TuneCPU set to // "feature", "no-feature" - Add (or remove) feature. // "+feature", "+nofeature" - Add (or remove) feature. -// -// A feature may correspond to an Extension (anything with a corresponding -// AEK_), in which case an ExtensionSet is used to parse it and expand its -// dependencies. Otherwise the feature is passed through (e.g. +v8.1a, -// +outline-atomics, -fmv, etc). Features coming from the command line are -// already parsed, therefore their dependencies do not need expansion. ParsedTargetAttr AArch64TargetInfo::parseTargetAttr(StringRef Features) const { ParsedTargetAttr Ret; if (Features == "default") @@ -1071,26 +1112,23 @@ ParsedTargetAttr AArch64TargetInfo::parseTargetAttr(StringRef Features) const { bool FoundArch = false; auto SplitAndAddFeatures = [](StringRef FeatString, - std::vector &Features, - llvm::AArch64::ExtensionSet &FeatureBits) { + std::vector &Features) { SmallVector SplitFeatures; FeatString.split(SplitFeatures, StringRef("+"), -1, false); for (StringRef Feature : SplitFeatures) { - if (FeatureBits.parseModifier(Feature, /* AllowNoDashForm = */ true)) - continue; - // Pass through features that are not extensions, e.g. +v8.1a, - // +outline-atomics, -fmv, etc. - if (Feature.starts_with("no")) - Features.push_back("-" + Feature.drop_front(2).str()); + StringRef FeatureName = llvm::AArch64::getArchExtFeature(Feature); + if (!FeatureName.empty()) + Features.push_back(FeatureName.str()); else - Features.push_back("+" + Feature.str()); + // Pushing the original feature string to give a sema error later on + // when they get checked. + if (Feature.starts_with("no")) + Features.push_back("-" + Feature.drop_front(2).str()); + else + Features.push_back("+" + Feature.str()); } }; - llvm::AArch64::ExtensionSet FeatureBits; - // Reconstruct the bitset from the command line option features. - FeatureBits.reconstructFromParsedFeatures(getTargetOpts().FeaturesAsWritten); - for (auto &Feature : AttrFeatures) { Feature = Feature.trim(); if (Feature.starts_with("fpmath=")) @@ -1113,9 +1151,9 @@ ParsedTargetAttr AArch64TargetInfo::parseTargetAttr(StringRef Features) const { // Ret.Features. if (!AI) continue; - FeatureBits.addArchDefaults(*AI); + Ret.Features.push_back(AI->ArchFeature.str()); // Add any extra features, after the + - SplitAndAddFeatures(Split.second, Ret.Features, FeatureBits); + SplitAndAddFeatures(Split.second, Ret.Features); } else if (Feature.starts_with("cpu=")) { if (!Ret.CPU.empty()) Ret.Duplicate = "cpu="; @@ -1125,10 +1163,7 @@ ParsedTargetAttr AArch64TargetInfo::parseTargetAttr(StringRef Features) const { std::pair Split = Feature.split("=").second.trim().split("+"); Ret.CPU = Split.first; - if (auto CpuInfo = llvm::AArch64::parseCpu(Ret.CPU)) { - FeatureBits.addCPUDefaults(*CpuInfo); - SplitAndAddFeatures(Split.second, Ret.Features, FeatureBits); - } + SplitAndAddFeatures(Split.second, Ret.Features); } } else if (Feature.starts_with("tune=")) { if (!Ret.Tune.empty()) @@ -1136,19 +1171,25 @@ ParsedTargetAttr AArch64TargetInfo::parseTargetAttr(StringRef Features) const { else Ret.Tune = Feature.split("=").second.trim(); } else if (Feature.starts_with("+")) { - SplitAndAddFeatures(Feature, Ret.Features, FeatureBits); + SplitAndAddFeatures(Feature, Ret.Features); + } else if (Feature.starts_with("no-")) { + StringRef FeatureName = + llvm::AArch64::getArchExtFeature(Feature.split("-").second); + if (!FeatureName.empty()) + Ret.Features.push_back("-" + FeatureName.drop_front(1).str()); + else + Ret.Features.push_back("-" + Feature.split("-").second.str()); } else { - if (FeatureBits.parseModifier(Feature, /* AllowNoDashForm = */ true)) - continue; - // Pass through features that are not extensions, e.g. +v8.1a, - // +outline-atomics, -fmv, etc. - if (Feature.starts_with("no-")) - Ret.Features.push_back("-" + Feature.drop_front(3).str()); + // Try parsing the string to the internal target feature name. If it is + // invalid, add the original string (which could already be an internal + // name). These should be checked later by isValidFeatureName. + StringRef FeatureName = llvm::AArch64::getArchExtFeature(Feature); + if (!FeatureName.empty()) + Ret.Features.push_back(FeatureName.str()); else Ret.Features.push_back("+" + Feature.str()); } } - FeatureBits.toLLVMFeatureList(Ret.Features); return Ret; } diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 696553ef8038a8..12fb50286f7511 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -107,6 +107,10 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { unsigned multiVersionSortPriority(StringRef Name) const override; unsigned multiVersionFeatureCost() const override; + bool + initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, + StringRef CPU, + const std::vector &FeaturesVec) const override; bool useFP16ConversionIntrinsics() const override { return false; } diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index bf50f2025de573..5dac1cd425bf61 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -577,7 +577,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, // First, 'this'. if (block->capturesCXXThis()) { - assert(CGF && CGF->CurFuncDecl && isa(CGF->CurFuncDecl) && + assert(CGF && isa_and_nonnull(CGF->CurFuncDecl) && "Can't capture 'this' outside a method"); QualType thisType = cast(CGF->CurFuncDecl)->getThisType(); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c16b69ba875679..06e201fa71e6ff 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2923,6 +2923,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, SetSqrtFPAccuracy(Call); return RValue::get(Call); } + + case Builtin::BItan: + case Builtin::BItanf: + case Builtin::BItanl: + case Builtin::BI__builtin_tan: + case Builtin::BI__builtin_tanf: + case Builtin::BI__builtin_tanf16: + case Builtin::BI__builtin_tanl: + case Builtin::BI__builtin_tanf128: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan)); + case Builtin::BItrunc: case Builtin::BItruncf: case Builtin::BItruncl: diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index b8cb78266130c8..5a032bdbf93791 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -859,7 +859,7 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) { // Enter the function-try-block before the constructor prologue if // applicable. - bool IsTryBody = (Body && isa(Body)); + bool IsTryBody = isa_and_nonnull(Body); if (IsTryBody) EnterCXXTryStmt(*cast(Body), true); @@ -1475,7 +1475,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { // If the body is a function-try-block, enter the try before // anything else. - bool isTryBody = (Body && isa(Body)); + bool isTryBody = isa_and_nonnull(Body); if (isTryBody) EnterCXXTryStmt(*cast(Body), true); EmitAsanPrologueOrEpilogue(false); diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index b047279912f6b7..a88bb2af59fee0 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -476,6 +476,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrCleanUpFunction( !isInNoSanitizeList(SanitizerKind::Thread, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); + if (getLangOpts().Sanitize.has(SanitizerKind::NumericalStability) && + !isInNoSanitizeList(SanitizerKind::NumericalStability, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeNumericalStability); + if (getLangOpts().Sanitize.has(SanitizerKind::Memory) && !isInNoSanitizeList(SanitizerKind::Memory, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d6478cc6835d82..48d8ca34788621 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3571,9 +3571,8 @@ void CodeGenFunction::EmitCheck( llvm::BasicBlock *Handlers = createBasicBlock("handler." + CheckName); llvm::Instruction *Branch = Builder.CreateCondBr(JointCond, Cont, Handlers); // Give hint that we very much don't expect to execute the handler - // Value chosen to match UR_NONTAKEN_WEIGHT, see BranchProbabilityInfo.cpp llvm::MDBuilder MDHelper(getLLVMContext()); - llvm::MDNode *Node = MDHelper.createBranchWeights((1U << 20) - 1, 1); + llvm::MDNode *Node = MDHelper.createLikelyBranchWeights(); Branch->setMetadata(llvm::LLVMContext::MD_prof, Node); EmitBlock(Handlers); @@ -3641,7 +3640,7 @@ void CodeGenFunction::EmitCfiSlowPathCheck( llvm::BranchInst *BI = Builder.CreateCondBr(Cond, Cont, CheckBB); llvm::MDBuilder MDHelper(getLLVMContext()); - llvm::MDNode *Node = MDHelper.createBranchWeights((1U << 20) - 1, 1); + llvm::MDNode *Node = MDHelper.createLikelyBranchWeights(); BI->setMetadata(llvm::LLVMContext::MD_prof, Node); EmitBlock(CheckBB); diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 9ef73e36f66f35..f19334489a0ba5 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -856,8 +856,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { llvm::BasicBlock *OrigBB = Branch->getParent(); // Give hint that we very much don't expect to see NaNs. - // Value chosen to match UR_NONTAKEN_WEIGHT, see BranchProbabilityInfo.cpp - llvm::MDNode *BrWeight = MDHelper.createBranchWeights(1, (1U << 20) - 1); + llvm::MDNode *BrWeight = MDHelper.createUnlikelyBranchWeights(); Branch->setMetadata(llvm::LLVMContext::MD_prof, BrWeight); // Now test the imaginary part and create its branch. diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 4eb65b34a89f56..0712f40fd8215a 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -715,7 +715,7 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { const Expr *Init = nullptr; if (ElementNo < ILE->getNumInits()) Init = ILE->getInit(ElementNo++); - if (Init && isa(Init)) + if (isa_and_nonnull(Init)) continue; // Zero-sized fields are not emitted, but their initializers may still diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index 6540ac69f2d9b0..948b10954ebbed 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -2069,7 +2069,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { Builder.CreateCondBr(Builder.CreateICmpEQ(selfValue, Zero), SelfIsNilBlock, ContBlock, - MDHelper.createBranchWeights(1, 1 << 20)); + MDHelper.createUnlikelyBranchWeights()); CGF.EmitBlock(SelfIsNilBlock); @@ -2104,7 +2104,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { CGF.createBasicBlock("objc_direct_method.class_initialized"); Builder.CreateCondBr(Builder.CreateICmpEQ(isInitialized, Zeros[0]), notInitializedBlock, initializedBlock, - MDHelper.createBranchWeights(1, 1 << 20)); + MDHelper.createUnlikelyBranchWeights()); CGF.EmitBlock(notInitializedBlock); Builder.SetInsertPoint(notInitializedBlock); CGF.EmitRuntimeCall(SentInitializeFn, selfValue); diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 042cd5d46da4b2..30f3911a8b03c2 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -4072,7 +4072,7 @@ void CGObjCCommonMac::GenerateDirectMethodPrologue( llvm::MDBuilder MDHelper(CGM.getLLVMContext()); Builder.CreateCondBr(Builder.CreateICmpEQ(selfValue, Zero), SelfIsNilBlock, - ContBlock, MDHelper.createBranchWeights(1, 1 << 20)); + ContBlock, MDHelper.createUnlikelyBranchWeights()); CGF.EmitBlock(SelfIsNilBlock); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 6410f9e102c907..f73d32de7c4848 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -72,7 +72,7 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope { static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { return CGF.LambdaCaptureFields.lookup(VD) || (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || - (CGF.CurCodeDecl && isa(CGF.CurCodeDecl) && + (isa_and_nonnull(CGF.CurCodeDecl) && cast(CGF.CurCodeDecl)->capturesVariable(VD)); } @@ -227,7 +227,7 @@ class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { return CGF.LambdaCaptureFields.lookup(VD) || (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || - (CGF.CurCodeDecl && isa(CGF.CurCodeDecl) && + (isa_and_nonnull(CGF.CurCodeDecl) && cast(CGF.CurCodeDecl)->capturesVariable(VD)); } @@ -315,7 +315,7 @@ LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { bool IsCaptured = LambdaCaptureFields.lookup(OrigVD) || (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || - (CurCodeDecl && isa(CurCodeDecl)); + (isa_and_nonnull(CurCodeDecl)); DeclRefExpr DRE(getContext(), const_cast(OrigVD), IsCaptured, OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); return EmitLValue(&DRE); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index f0345f3b191b88..cea0d84c64bc47 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -818,6 +818,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, Fn->addFnAttr(llvm::Attribute::SanitizeMemTag); if (SanOpts.has(SanitizerKind::Thread)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); + if (SanOpts.has(SanitizerKind::NumericalStability)) + Fn->addFnAttr(llvm::Attribute::SanitizeNumericalStability); if (SanOpts.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory)) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); } @@ -2951,7 +2953,7 @@ void CodeGenFunction::emitAlignmentAssumptionCheck( SourceLocation SecondaryLoc, llvm::Value *Alignment, llvm::Value *OffsetValue, llvm::Value *TheCheck, llvm::Instruction *Assumption) { - assert(Assumption && isa(Assumption) && + assert(isa_and_nonnull(Assumption) && cast(Assumption)->getCalledOperand() == llvm::Intrinsic::getDeclaration( Builder.GetInsertBlock()->getParent()->getParent(), diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 75b14490903890..dd4a665ebc78b7 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4512,6 +4512,19 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { return Resolver; } +bool CodeGenModule::shouldDropDLLAttribute(const Decl *D, + const llvm::GlobalValue *GV) const { + auto SC = GV->getDLLStorageClass(); + if (SC == llvm::GlobalValue::DefaultStorageClass) + return false; + const Decl *MRD = D->getMostRecentDecl(); + return (((SC == llvm::GlobalValue::DLLImportStorageClass && + !MRD->hasAttr()) || + (SC == llvm::GlobalValue::DLLExportStorageClass && + !MRD->hasAttr())) && + !shouldMapVisibilityToDLLExport(cast(MRD))); +} + /// GetOrCreateLLVMFunction - If the specified mangled name is not in the /// module, create and return an llvm Function with the specified type. If there /// is something in the module with the specified name, return it potentially @@ -4564,8 +4577,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( } // Handle dropped DLL attributes. - if (D && !D->hasAttr() && !D->hasAttr() && - !shouldMapVisibilityToDLLExport(cast_or_null(D))) { + if (D && shouldDropDLLAttribute(D, Entry)) { Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); setDSOLocal(Entry); } @@ -4859,8 +4871,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, } // Handle dropped DLL attributes. - if (D && !D->hasAttr() && !D->hasAttr() && - !shouldMapVisibilityToDLLExport(D)) + if (D && shouldDropDLLAttribute(D, Entry)) Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); if (LangOpts.OpenMP && !LangOpts.OpenMPSimd && D) diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index dc24971a3c1862..9b63f47ef42cba 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1594,6 +1594,8 @@ class CodeGenModule : public CodeGenTypeCache { } private: + bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const; + llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, bool DontDefer = false, bool IsThunk = false, diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 273f215ca94a88..86825a6ccf7a1d 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -41,7 +41,8 @@ static const SanitizerMask NotAllowedWithExecuteOnly = SanitizerKind::Function | SanitizerKind::KCFI; static const SanitizerMask NeedsUnwindTables = SanitizerKind::Address | SanitizerKind::HWAddress | SanitizerKind::Thread | - SanitizerKind::Memory | SanitizerKind::DataFlow; + SanitizerKind::Memory | SanitizerKind::DataFlow | + SanitizerKind::NumericalStability; static const SanitizerMask SupportsCoverage = SanitizerKind::Address | SanitizerKind::HWAddress | SanitizerKind::KernelAddress | SanitizerKind::KernelHWAddress | @@ -53,7 +54,8 @@ static const SanitizerMask SupportsCoverage = SanitizerKind::DataFlow | SanitizerKind::Fuzzer | SanitizerKind::FuzzerNoLink | SanitizerKind::FloatDivideByZero | SanitizerKind::SafeStack | SanitizerKind::ShadowCallStack | - SanitizerKind::Thread | SanitizerKind::ObjCCast | SanitizerKind::KCFI; + SanitizerKind::Thread | SanitizerKind::ObjCCast | SanitizerKind::KCFI | + SanitizerKind::NumericalStability; static const SanitizerMask RecoverableByDefault = SanitizerKind::Undefined | SanitizerKind::Integer | SanitizerKind::ImplicitConversion | SanitizerKind::Nullability | @@ -175,6 +177,7 @@ static void addDefaultIgnorelists(const Driver &D, SanitizerMask Kinds, {"hwasan_ignorelist.txt", SanitizerKind::HWAddress}, {"memtag_ignorelist.txt", SanitizerKind::MemTag}, {"msan_ignorelist.txt", SanitizerKind::Memory}, + {"nsan_ignorelist.txt", SanitizerKind::NumericalStability}, {"tsan_ignorelist.txt", SanitizerKind::Thread}, {"dfsan_abilist.txt", SanitizerKind::DataFlow}, {"cfi_ignorelist.txt", SanitizerKind::CFI}, diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 593b403a1e3f05..ed5737915aa96b 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -3448,6 +3448,7 @@ SanitizerMask Darwin::getSupportedSanitizers() const { Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; Res |= SanitizerKind::Leak; + Res |= SanitizerKind::NumericalStability; Res |= SanitizerKind::Fuzzer; Res |= SanitizerKind::FuzzerNoLink; Res |= SanitizerKind::ObjCCast; diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index db2c20d7b461d0..2c583ac724a2a2 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -826,6 +826,9 @@ SanitizerMask Linux::getSupportedSanitizers() const { if (IsX86_64 || IsAArch64) { Res |= SanitizerKind::KernelHWAddress; } + if (IsX86_64 || IsAArch64) + Res |= SanitizerKind::NumericalStability; + // Work around "Cannot represent a difference across sections". if (getTriple().getArch() == llvm::Triple::ppc64) Res &= ~SanitizerKind::Function; diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index be684ac71cd614..b07360425ca6e1 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1257,6 +1257,11 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { } return CurrentState.Indent; } + if (Current.is(TT_TrailingReturnArrow) && + Previous.isOneOf(tok::kw_noexcept, tok::kw_mutable, tok::kw_constexpr, + tok::kw_consteval, tok::kw_static, TT_AttributeSquare)) { + return ContinuationIndent; + } if ((Current.isOneOf(tok::r_brace, tok::r_square) || (Current.is(tok::greater) && (Style.isProto() || Style.isTableGen()))) && State.Stack.size() > 1) { diff --git a/clang/lib/Index/CommentToXML.cpp b/clang/lib/Index/CommentToXML.cpp index 3372fbba438317..cd7226e71171c2 100644 --- a/clang/lib/Index/CommentToXML.cpp +++ b/clang/lib/Index/CommentToXML.cpp @@ -546,7 +546,8 @@ class CommentASTToXMLConverter : void visitParagraphComment(const ParagraphComment *C); void appendParagraphCommentWithKind(const ParagraphComment *C, - StringRef Kind); + StringRef ParagraphKind, + StringRef PrependBodyText); void visitBlockCommandComment(const BlockCommandComment *C); void visitParamCommandComment(const ParamCommandComment *C); @@ -680,15 +681,15 @@ CommentASTToXMLConverter::visitHTMLEndTagComment(const HTMLEndTagComment *C) { Result << "></" << C->getTagName() << ">"; } -void -CommentASTToXMLConverter::visitParagraphComment(const ParagraphComment *C) { - appendParagraphCommentWithKind(C, StringRef()); +void CommentASTToXMLConverter::visitParagraphComment( + const ParagraphComment *C) { + appendParagraphCommentWithKind(C, StringRef(), StringRef()); } void CommentASTToXMLConverter::appendParagraphCommentWithKind( - const ParagraphComment *C, - StringRef ParagraphKind) { - if (C->isWhitespace()) + const ParagraphComment *C, StringRef ParagraphKind, + StringRef PrependBodyText) { + if (C->isWhitespace() && PrependBodyText.empty()) return; if (ParagraphKind.empty()) @@ -696,8 +697,11 @@ void CommentASTToXMLConverter::appendParagraphCommentWithKind( else Result << ""; - for (Comment::child_iterator I = C->child_begin(), E = C->child_end(); - I != E; ++I) { + if (!PrependBodyText.empty()) + Result << PrependBodyText << " "; + + for (Comment::child_iterator I = C->child_begin(), E = C->child_end(); I != E; + ++I) { visit(*I); } Result << ""; @@ -706,8 +710,15 @@ void CommentASTToXMLConverter::appendParagraphCommentWithKind( void CommentASTToXMLConverter::visitBlockCommandComment( const BlockCommandComment *C) { StringRef ParagraphKind; + StringRef ExceptionType; - switch (C->getCommandID()) { + const unsigned CommandID = C->getCommandID(); + const CommandInfo *Info = Traits.getCommandInfo(CommandID); + if (Info->IsThrowsCommand && C->getNumArgs() > 0) { + ExceptionType = C->getArgText(0); + } + + switch (CommandID) { case CommandTraits::KCI_attention: case CommandTraits::KCI_author: case CommandTraits::KCI_authors: @@ -732,7 +743,8 @@ void CommentASTToXMLConverter::visitBlockCommandComment( break; } - appendParagraphCommentWithKind(C->getParagraph(), ParagraphKind); + appendParagraphCommentWithKind(C->getParagraph(), ParagraphKind, + ExceptionType); } void CommentASTToXMLConverter::visitParamCommandComment( diff --git a/clang/lib/Index/IndexBody.cpp b/clang/lib/Index/IndexBody.cpp index 08136baa5d408e..c18daf7faa7497 100644 --- a/clang/lib/Index/IndexBody.cpp +++ b/clang/lib/Index/IndexBody.cpp @@ -268,7 +268,7 @@ class BodyIndexer : public RecursiveASTVisitor { } return true; }; - bool IsPropCall = Containing && isa(Containing); + bool IsPropCall = isa_and_nonnull(Containing); // Implicit property message sends are not 'implicit'. if ((E->isImplicit() || IsPropCall) && !(IsPropCall && diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 8af4a97d00cb82..f085b943716442 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -226,7 +226,7 @@ void Preprocessor::updateModuleMacroInfo(const IdentifierInfo *II, bool IsSystemMacro = true; bool IsAmbiguous = false; if (auto *MD = Info.MD) { - while (MD && isa(MD)) + while (isa_and_nonnull(MD)) MD = MD->getPrevious(); if (auto *DMD = dyn_cast_or_null(MD)) { MI = DMD->getInfo(); diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index b9d0b59ef1db73..0f604c61fa3af9 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -442,7 +442,7 @@ static ControlFlowKind CheckFallThrough(AnalysisDeclContext &AC) { if (!live[B->getBlockID()]) { if (B->pred_begin() == B->pred_end()) { const Stmt *Term = B->getTerminatorStmt(); - if (Term && isa(Term)) + if (isa_and_nonnull(Term)) // When not adding EH edges from calls, catch clauses // can otherwise seem dead. Avoid noting them as dead. count += reachable_code::ScanReachableFromBlock(B, live); @@ -1100,7 +1100,7 @@ namespace { // issue a warn_fallthrough_attr_unreachable for them. for (const auto *B : *Cfg) { const Stmt *L = B->getLabel(); - if (L && isa(L) && ReachableBlocks.insert(B).second) + if (isa_and_nonnull(L) && ReachableBlocks.insert(B).second) BlockQueue.push_back(B); } @@ -1128,7 +1128,7 @@ namespace { if (!P) continue; const Stmt *Term = P->getTerminatorStmt(); - if (Term && isa(Term)) + if (isa_and_nonnull(Term)) continue; // Switch statement, good. const SwitchCase *SW = dyn_cast_or_null(P->getLabel()); @@ -1327,7 +1327,7 @@ static void DiagnoseSwitchLabelsFallthrough(Sema &S, AnalysisDeclContext &AC, B = *B->succ_begin(); Term = B->getTerminatorStmt(); } - if (!(B->empty() && Term && isa(Term))) { + if (!(B->empty() && isa_and_nonnull(Term))) { Preprocessor &PP = S.getPreprocessor(); StringRef AnnotationSpelling = getFallthroughAttrSpelling(PP, L); SmallString<64> TextToInsert(AnnotationSpelling); diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 80ea43dc5316eb..580b9872c6a1de 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -1018,24 +1018,33 @@ void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD, // HD/global functions "exist" in some sense on both the host and device, so // should have the same implementation on both sides. if (NewTarget != OldTarget && - ((NewTarget == CUDAFunctionTarget::HostDevice && - !(getLangOpts().OffloadImplicitHostDeviceTemplates && - isImplicitHostDeviceFunction(NewFD) && - OldTarget == CUDAFunctionTarget::Device)) || - (OldTarget == CUDAFunctionTarget::HostDevice && - !(getLangOpts().OffloadImplicitHostDeviceTemplates && - isImplicitHostDeviceFunction(OldFD) && - NewTarget == CUDAFunctionTarget::Device)) || - (NewTarget == CUDAFunctionTarget::Global) || - (OldTarget == CUDAFunctionTarget::Global)) && !SemaRef.IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, /* ConsiderCudaAttrs = */ false)) { - Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) - << llvm::to_underlying(NewTarget) << NewFD->getDeclName() - << llvm::to_underlying(OldTarget) << OldFD; - Diag(OldFD->getLocation(), diag::note_previous_declaration); - NewFD->setInvalidDecl(); - break; + if ((NewTarget == CUDAFunctionTarget::HostDevice && + !(getLangOpts().OffloadImplicitHostDeviceTemplates && + isImplicitHostDeviceFunction(NewFD) && + OldTarget == CUDAFunctionTarget::Device)) || + (OldTarget == CUDAFunctionTarget::HostDevice && + !(getLangOpts().OffloadImplicitHostDeviceTemplates && + isImplicitHostDeviceFunction(OldFD) && + NewTarget == CUDAFunctionTarget::Device)) || + (NewTarget == CUDAFunctionTarget::Global) || + (OldTarget == CUDAFunctionTarget::Global)) { + Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) + << llvm::to_underlying(NewTarget) << NewFD->getDeclName() + << llvm::to_underlying(OldTarget) << OldFD; + Diag(OldFD->getLocation(), diag::note_previous_declaration); + NewFD->setInvalidDecl(); + break; + } + if ((NewTarget == CUDAFunctionTarget::Host && + OldTarget == CUDAFunctionTarget::Device) || + (NewTarget == CUDAFunctionTarget::Device && + OldTarget == CUDAFunctionTarget::Host)) { + Diag(NewFD->getLocation(), diag::warn_offload_incompatible_redeclare) + << llvm::to_underlying(NewTarget) << llvm::to_underlying(OldTarget); + Diag(OldFD->getLocation(), diag::note_previous_declaration); + } } } } diff --git a/clang/lib/Sema/SemaCXXScopeSpec.cpp b/clang/lib/Sema/SemaCXXScopeSpec.cpp index c405fbc0aa421b..da88b6cae6e361 100644 --- a/clang/lib/Sema/SemaCXXScopeSpec.cpp +++ b/clang/lib/Sema/SemaCXXScopeSpec.cpp @@ -974,7 +974,7 @@ bool Sema::ActOnCXXNestedNameSpecifier(Scope *S, R.setBegin(SS.getRange().getBegin()); Diag(CCLoc, diag::err_non_type_template_in_nested_name_specifier) - << (TD && isa(TD)) << Template << R; + << isa_and_nonnull(TD) << Template << R; NoteAllFoundTemplates(Template); return true; } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 300af02239779f..07cd0727eb3f4a 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3839,11 +3839,11 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto, if (CallType != VariadicDoesNotApply && (!FD || FD->getBuiltinID() != Builtin::BI__noop)) { unsigned NumParams = Proto ? Proto->getNumParams() - : FDecl && isa(FDecl) - ? cast(FDecl)->getNumParams() - : FDecl && isa(FDecl) - ? cast(FDecl)->param_size() - : 0; + : isa_and_nonnull(FDecl) + ? cast(FDecl)->getNumParams() + : isa_and_nonnull(FDecl) + ? cast(FDecl)->param_size() + : 0; for (unsigned ArgIdx = NumParams; ArgIdx < Args.size(); ++ArgIdx) { // Args[ArgIdx] can be null in malformed code. diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index cf461a68d55263..f3af8dee6b090c 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -2074,7 +2074,7 @@ ExprResult Sema::BuildCXXNew(SourceRange Range, bool UseGlobal, if (DirectInitRange.isValid()) { assert(Initializer && "Have parens but no initializer."); InitStyle = CXXNewInitializationStyle::Parens; - } else if (Initializer && isa(Initializer)) + } else if (isa_and_nonnull(Initializer)) InitStyle = CXXNewInitializationStyle::Braces; else { assert((!Initializer || isa(Initializer) || @@ -3823,7 +3823,7 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal, // Otherwise, the usual operator delete[] should be the // function we just found. - else if (OperatorDelete && isa(OperatorDelete)) + else if (isa_and_nonnull(OperatorDelete)) UsualArrayDeleteWantsSize = UsualDeallocFnInfo(*this, DeclAccessPair::make(OperatorDelete, AS_public)) @@ -8595,7 +8595,7 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures( assert(S.CurContext->isDependentContext()); #ifndef NDEBUG DeclContext *DC = S.CurContext; - while (DC && isa(DC)) + while (isa_and_nonnull(DC)) DC = DC->getParent(); assert( CurrentLSI->CallOperator == DC && @@ -9172,7 +9172,7 @@ ExprResult Sema::ActOnFinishFullExpr(Expr *FE, SourceLocation CC, // - Teach the handful of places that iterate over FunctionScopes to // stop at the outermost enclosing lexical scope." DeclContext *DC = CurContext; - while (DC && isa(DC)) + while (isa_and_nonnull(DC)) DC = DC->getParent(); const bool IsInLambdaDeclContext = isLambdaCallOperator(DC); if (IsInLambdaDeclContext && CurrentLSI && diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index ed8b226a6b39f5..7244f3ef4e829e 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -2194,7 +2194,7 @@ void InitListChecker::CheckStructUnionTypes( // Designated inits always initialize fields, so if we see one, all // remaining base classes have no explicit initializer. - if (Init && isa(Init)) + if (isa_and_nonnull(Init)) Init = nullptr; // C++ [over.match.class.deduct]p1.6: @@ -6350,7 +6350,7 @@ void InitializationSequence::InitializeFrom(Sema &S, // class member of array type from a parenthesized initializer list. else if (S.getLangOpts().CPlusPlus && Entity.getKind() == InitializedEntity::EK_Member && - Initializer && isa(Initializer)) { + isa_and_nonnull(Initializer)) { TryListInitialization(S, Entity, Kind, cast(Initializer), *this, TreatUnavailableAsInvalid); AddParenthesizedArrayInitStep(DestType); @@ -8793,7 +8793,7 @@ ExprResult InitializationSequence::Perform(Sema &S, // constant expressions here in order to perform narrowing checks =( EnterExpressionEvaluationContext Evaluated( S, EnterExpressionEvaluationContext::InitList, - CurInit.get() && isa(CurInit.get())); + isa_and_nonnull(CurInit.get())); // C++ [class.abstract]p2: // no objects of an abstract class can be created except as subobjects diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 6e6815328e9139..5c759aedf9798a 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -6198,18 +6198,17 @@ class TeamsLoopChecker final : public ConstStmtVisitor { // unless the assume-no-nested-parallelism flag has been specified. // OpenMP API runtime library calls do not inhibit parallel loop // translation, regardless of the assume-no-nested-parallelism. - if (C) { - bool IsOpenMPAPI = false; - auto *FD = dyn_cast_or_null(C->getCalleeDecl()); - if (FD) { - std::string Name = FD->getNameInfo().getAsString(); - IsOpenMPAPI = Name.find("omp_") == 0; - } - TeamsLoopCanBeParallelFor = - IsOpenMPAPI || SemaRef.getLangOpts().OpenMPNoNestedParallelism; - if (!TeamsLoopCanBeParallelFor) - return; - } + bool IsOpenMPAPI = false; + auto *FD = dyn_cast_or_null(C->getCalleeDecl()); + if (FD) { + std::string Name = FD->getNameInfo().getAsString(); + IsOpenMPAPI = Name.find("omp_") == 0; + } + TeamsLoopCanBeParallelFor = + IsOpenMPAPI || SemaRef.getLangOpts().OpenMPNoNestedParallelism; + if (!TeamsLoopCanBeParallelFor) + return; + for (const Stmt *Child : C->children()) if (Child) Visit(Child); @@ -24331,7 +24330,7 @@ SemaOpenMP::ActOnOpenMPHasDeviceAddrClause(ArrayRef VarList, OMPClause *SemaOpenMP::ActOnOpenMPAllocateClause( Expr *Allocator, ArrayRef VarList, SourceLocation StartLoc, - SourceLocation ColonLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { + SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc) { if (Allocator) { // OpenMP [2.11.4 allocate Clause, Description] // allocator is an expression of omp_allocator_handle_t type. diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index 57465d4a77ac29..411e9af26f2b7b 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3701,7 +3701,7 @@ bool Sema::DeduceFunctionTypeFromReturnExpr(FunctionDecl *FD, if (isLambdaConversionOperator(FD)) return false; - if (RetExpr && isa(RetExpr)) { + if (isa_and_nonnull(RetExpr)) { // If the deduction is for a return statement and the initializer is // a braced-init-list, the program is ill-formed. Diag(RetExpr->getExprLoc(), diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 40a759ea330de4..a032e3ec6f6353 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1936,7 +1936,7 @@ DeclResult Sema::CheckClassTemplate( // We may have found the injected-class-name of a class template, // class template partial specialization, or class template specialization. // In these cases, grab the template that is being defined or specialized. - if (!PrevClassTemplate && PrevDecl && isa(PrevDecl) && + if (!PrevClassTemplate && isa_and_nonnull(PrevDecl) && cast(PrevDecl)->isInjectedClassName()) { PrevDecl = cast(PrevDecl->getDeclContext()); PrevClassTemplate diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 441fdcca0758f9..9c0d043725dde1 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8086,23 +8086,21 @@ static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr, // Target must have NEON (or MVE, whose vectors are similar enough // not to need a separate attribute) - if (!(S.Context.getTargetInfo().hasFeature("neon") || - S.Context.getTargetInfo().hasFeature("mve") || - S.Context.getTargetInfo().hasFeature("sve") || - S.Context.getTargetInfo().hasFeature("sme") || + if (!(S.Context.getTargetInfo().hasFeature("mve") || IsTargetCUDAAndHostARM) && - VecKind == VectorKind::Neon) { + VecKind == VectorKind::Neon && + S.Context.getTargetInfo().getTriple().isArmMClass()) { S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) - << Attr << "'neon', 'mve', 'sve' or 'sme'"; + << Attr << "'mve'"; Attr.setInvalid(); return; } - if (!(S.Context.getTargetInfo().hasFeature("neon") || - S.Context.getTargetInfo().hasFeature("mve") || + if (!(S.Context.getTargetInfo().hasFeature("mve") || IsTargetCUDAAndHostARM) && - VecKind == VectorKind::NeonPoly) { + VecKind == VectorKind::NeonPoly && + S.Context.getTargetInfo().getTriple().isArmMClass()) { S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) - << Attr << "'neon' or 'mve'"; + << Attr << "'mve'"; Attr.setInvalid(); return; } diff --git a/clang/test/AST/Interp/cxx23.cpp b/clang/test/AST/Interp/cxx23.cpp index 1efd784abbbe8f..d0991f3ffdff5e 100644 --- a/clang/test/AST/Interp/cxx23.cpp +++ b/clang/test/AST/Interp/cxx23.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: target={{.*}}-zos{{.*}} -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref20,all,all20 %s -// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=ref23,all %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref,ref20,all,all20 %s +// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=ref,ref23,all %s // RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20,all,all20 %s -fexperimental-new-constant-interpreter // RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=expected23,all %s -fexperimental-new-constant-interpreter @@ -200,3 +200,15 @@ namespace UndefinedThreeWay { static_assert(!(*test_a_threeway)(A(), A())); // all-error {{static assertion expression is not an integral constant expression}} \ // all-note {{undefined function 'operator<=>' cannot be used in a constant expression}} } + +/// FIXME: The new interpreter is missing the "initializer of q is not a constant expression" diagnostics.a +/// That's because the cast from void* to int* is considered fine, but diagnosed. So we don't consider +/// q to be uninitialized. +namespace VoidCast { + constexpr void* p = nullptr; + constexpr int* q = static_cast(p); // all-error {{must be initialized by a constant expression}} \ + // all-note {{cast from 'void *' is not allowed in a constant expression}} \ + // ref-note {{declared here}} + static_assert(q == nullptr); // ref-error {{not an integral constant expression}} \ + // ref-note {{initializer of 'q' is not a constant expression}} +} diff --git a/clang/test/AST/Interp/cxx26.cpp b/clang/test/AST/Interp/cxx26.cpp new file mode 100644 index 00000000000000..0b0e2b21e8201e --- /dev/null +++ b/clang/test/AST/Interp/cxx26.cpp @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -std=c++26 -fsyntax-only -fcxx-exceptions -verify=ref,both %s +// RUN: %clang_cc1 -std=c++26 -fsyntax-only -fcxx-exceptions -verify=expected,both %s -fexperimental-new-constant-interpreter + +// both-no-diagnostics + +namespace VoidCast { + constexpr void* p = nullptr; + constexpr int* q = static_cast(p); + static_assert(q == nullptr); +} diff --git a/clang/test/CodeGen/X86/math-builtins.c b/clang/test/CodeGen/X86/math-builtins.c index 093239b4482609..1e0f129b986102 100644 --- a/clang/test/CodeGen/X86/math-builtins.c +++ b/clang/test/CodeGen/X86/math-builtins.c @@ -674,10 +674,10 @@ __builtin_sqrt(f); __builtin_sqrtf(f); __builtin_sqrtl(f); __builtin_ __builtin_tan(f); __builtin_tanf(f); __builtin_tanl(f); __builtin_tanf128(f); -// NO__ERRNO: declare double @tan(double noundef) [[READNONE]] -// NO__ERRNO: declare float @tanf(float noundef) [[READNONE]] -// NO__ERRNO: declare x86_fp80 @tanl(x86_fp80 noundef) [[READNONE]] -// NO__ERRNO: declare fp128 @tanf128(fp128 noundef) [[READNONE]] +// NO__ERRNO: declare double @llvm.tan.f64(double) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare float @llvm.tan.f32(float) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare x86_fp80 @llvm.tan.f80(x86_fp80) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare fp128 @llvm.tan.f128(fp128) [[READNONE_INTRINSIC]] // HAS_ERRNO: declare double @tan(double noundef) [[NOT_READNONE]] // HAS_ERRNO: declare float @tanf(float noundef) [[NOT_READNONE]] // HAS_ERRNO: declare x86_fp80 @tanl(x86_fp80 noundef) [[NOT_READNONE]] diff --git a/clang/test/CodeGen/aarch64-cpu-supports-target.c b/clang/test/CodeGen/aarch64-cpu-supports-target.c index 28187bcf745331..e023944b24e53a 100644 --- a/clang/test/CodeGen/aarch64-cpu-supports-target.c +++ b/clang/test/CodeGen/aarch64-cpu-supports-target.c @@ -48,5 +48,5 @@ int test_versions() { return code(); } // CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" } -// CHECK: attributes #2 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" } +// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon" } +// CHECK: attributes #2 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve" } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp index 9885ac45e6a0e0..af8933d93d6cbb 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 \ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme \ // RUN: -disable-O0-optnone -Werror -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg \ // RUN: | opt -S -passes=inline \ diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c index 644e6a692c3be9..3e7a2092456071 100644 --- a/clang/test/CodeGen/aarch64-targetattr.c +++ b/clang/test/CodeGen/aarch64-targetattr.c @@ -58,50 +58,58 @@ void v1msve() {} // CHECK-LABEL: @plussve() #12 __attribute__((target("+sve"))) void plussve() {} -// CHECK-LABEL: @plussveplussve2() #12 +// CHECK-LABEL: @plussveplussve2() #13 __attribute__((target("+sve+nosve2"))) void plussveplussve2() {} -// CHECK-LABEL: @plussveminusnosve2() #12 +// CHECK-LABEL: @plussveminusnosve2() #13 __attribute__((target("sve,no-sve2"))) void plussveminusnosve2() {} -// CHECK-LABEL: @plusfp16() #13 +// CHECK-LABEL: @plusfp16() #14 __attribute__((target("+fp16"))) void plusfp16() {} -// CHECK-LABEL: @all() #14 +// CHECK-LABEL: @all() #15 __attribute__((target("cpu=neoverse-n1,tune=cortex-a710,arch=armv8.6-a+sve2"))) void all() {} -// CHECK-LABEL: @allplusbranchprotection() #15 +// CHECK-LABEL: @allplusbranchprotection() #16 __attribute__((target("cpu=neoverse-n1,tune=cortex-a710,arch=armv8.6-a+sve2,branch-protection=standard"))) void allplusbranchprotection() {} -// CHECK-LABEL: @plusnosimd() #16 +// These tests check that the user facing and internal llvm name are both accepted. +// CHECK-LABEL: @plusnoneon() #17 +__attribute__((target("+noneon"))) +void plusnoneon() {} +// CHECK-LABEL: @plusnosimd() #17 __attribute__((target("+nosimd"))) void plusnosimd() {} -// CHECK-LABEL: @nosimd() #16 +// CHECK-LABEL: @noneon() #17 +__attribute__((target("no-neon"))) +void noneon() {} +// CHECK-LABEL: @nosimd() #17 __attribute__((target("no-simd"))) void nosimd() {} // This isn't part of the standard interface, but test that -arch features should not apply anything else. -// CHECK-LABEL: @minusarch() #17 +// CHECK-LABEL: @minusarch() #18 __attribute__((target("no-v9.3a"))) void minusarch() {} // CHECK: attributes #0 = { {{.*}} "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #1 = { {{.*}} "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #2 = { {{.*}} "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #3 = { {{.*}} "target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } -// CHECK: attributes #4 = { {{.*}} "target-cpu"="cortex-a710" "target-features"="+bf16,+complxnum,+crc,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" } +// CHECK: attributes #3 = { {{.*}} "target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } +// CHECK: attributes #4 = { {{.*}} "target-cpu"="cortex-a710" "target-features"="+bf16,+complxnum,+crc,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm" } // CHECK: attributes #5 = { {{.*}} "tune-cpu"="cortex-a710" } // CHECK: attributes #6 = { {{.*}} "target-cpu"="generic" } // CHECK: attributes #7 = { {{.*}} "tune-cpu"="generic" } -// CHECK: attributes #8 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+v8.1a,+v8.2a,+v8a" "tune-cpu"="cortex-a710" } -// CHECK: attributes #9 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+sve" "tune-cpu"="cortex-a710" } -// CHECK: attributes #10 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" } -// CHECK: attributes #11 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,-sve" } -// CHECK: attributes #12 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+sve" } -// CHECK: attributes #13 = { {{.*}} "target-features"="+fp-armv8,+fullfp16" } -// CHECK: attributes #14 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } -// CHECK: attributes #15 = { {{.*}} "branch-target-enforcement"="true" "guarded-control-stack"="true" {{.*}} "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } -// CHECK-NOT: attributes #16 = {{.*}} "target-features" -// CHECK: attributes #17 = { {{.*}} "target-features"="-v9.3a" } +// CHECK: attributes #8 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs" "tune-cpu"="cortex-a710" } +// CHECK: attributes #9 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve" "tune-cpu"="cortex-a710" } +// CHECK: attributes #10 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2" } +// CHECK: attributes #11 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,-sve" } +// CHECK: attributes #12 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve" } +// CHECK: attributes #13 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve,-sve2" } +// CHECK: attributes #14 = { {{.*}} "target-features"="+fullfp16" } +// CHECK: attributes #15 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } +// CHECK: attributes #16 = { {{.*}} "branch-target-enforcement"="true" "guarded-control-stack"="true" {{.*}} "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } +// CHECK: attributes #17 = { {{.*}} "target-features"="-neon" } +// CHECK: attributes #18 = { {{.*}} "target-features"="-v9.3a" } diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index 75f8734e5aaf37..3597711333d341 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -1129,42 +1129,42 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NOFMV-NEXT: ret i32 0 // //. -// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+flagm,+fp-armv8,+fp16fml,+fullfp16,+neon,+rand,-v9.5a" } +// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+flagm,+fp16fml,+fullfp16,+neon,+rand,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+altnzcv,+bf16,+flagm,+sme,+sme-i16i64,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon,+sha2,-v9.5a" } -// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+ls64,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fp16fml,+fullfp16,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,-v9.5a" } +// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,+neon,+sha2,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+ls64,+neon,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp16fml,+fullfp16,+neon,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR6]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR7]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR8]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme,+sme2,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR9:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccpp,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR11]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,-v9.5a" } +// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR13]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sb,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mops,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,-v9.5a" } +// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+neon,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rdm,-v9.5a" } -// CHECK: attributes #[[ATTR19:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+jsconv,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR20:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rdm,-v9.5a" } -// CHECK: attributes #[[ATTR21]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+jsconv,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR22]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+f64mm,+fp-armv8,+fullfp16,+neon,+sve,-v9.5a" } -// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+fp-armv8,+fullfp16,+neon,+rdm,+sme,-v9.5a" } -// CHECK: attributes #[[ATTR24]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+f32mm,+fp-armv8,+fullfp16,+i8mm,+neon,+sha2,+sha3,+sve,-v9.5a" } -// CHECK: attributes #[[ATTR25]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+dit,+fp-armv8,+fullfp16,+neon,+sve,-v9.5a" } +// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+rdm,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR19:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+jsconv,+neon,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR20:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+rdm,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR21]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+jsconv,+neon,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR22]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+f64mm,+fullfp16,+neon,+sve,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+fullfp16,+neon,+rdm,+sme,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR24]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+f32mm,+fullfp16,+i8mm,+neon,+sha2,+sha3,+sve,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR25]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+dit,+fullfp16,+neon,+sve,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR26]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccpp,+rcpc,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR27]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccdp,+ccpp,+fp-armv8,+jsconv,+neon,-v9.5a" } +// CHECK: attributes #[[ATTR27]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccdp,+ccpp,+jsconv,+neon,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR28]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fptoint,+rcpc,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR29]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sve,-v9.5a" } -// CHECK: attributes #[[ATTR30]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3,-v9.5a" } -// CHECK: attributes #[[ATTR31]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm,-v9.5a" } -// CHECK: attributes #[[ATTR32]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+mte,+neon,+sve,+sve2,+sve2-sm4,-v9.5a" } +// CHECK: attributes #[[ATTR29]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fullfp16,+neon,+sve,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR30]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR31]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR32]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+mte,+neon,+sve,+sve2,+sve2-sm4,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR33]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mops,+mte,+rcpc,+rcpc3,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR34]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4,-v9.5a" } -// CHECK: attributes #[[ATTR35]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon,+rdm,-v9.5a" } +// CHECK: attributes #[[ATTR34]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+sm4,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR35]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,+neon,+rdm,-fp-armv8,-v9.5a" } //. // CHECK-NOFMV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } // CHECK-NOFMV: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } diff --git a/clang/test/CodeGen/constrained-math-builtins.c b/clang/test/CodeGen/constrained-math-builtins.c index 2de832dd2b6cae..6cc3a10a1e7946 100644 --- a/clang/test/CodeGen/constrained-math-builtins.c +++ b/clang/test/CodeGen/constrained-math-builtins.c @@ -183,6 +183,14 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c, _ // CHECK: call x86_fp80 @llvm.experimental.constrained.sqrt.f80(x86_fp80 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK: call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + __builtin_tan(f); __builtin_tanf(f); __builtin_tanl(f); __builtin_tanf128(f); + +// CHECK: call double @llvm.experimental.constrained.tan.f64(double %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") +// CHECK: call float @llvm.experimental.constrained.tan.f32(float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") +// CHECK: call x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") +// CHECK: call fp128 @llvm.experimental.constrained.tan.f128(fp128 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + + __builtin_trunc(f); __builtin_truncf(f); __builtin_truncl(f); __builtin_truncf128(f); // CHECK: call double @llvm.experimental.constrained.trunc.f64(double %{{.*}}, metadata !"fpexcept.strict") @@ -315,6 +323,11 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c, _ // CHECK: declare x86_fp80 @llvm.experimental.constrained.sqrt.f80(x86_fp80, metadata, metadata) // CHECK: declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) +// CHECK: declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) +// CHECK: declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) +// CHECK: declare x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80, metadata, metadata) +// CHECK: declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata) + // CHECK: declare double @llvm.experimental.constrained.trunc.f64(double, metadata) // CHECK: declare float @llvm.experimental.constrained.trunc.f32(float, metadata) // CHECK: declare x86_fp80 @llvm.experimental.constrained.trunc.f80(x86_fp80, metadata) diff --git a/clang/test/CodeGen/instrument-objc-method.m b/clang/test/CodeGen/instrument-objc-method.m index 2c9d1fc88554bd..cfc0a0a98bec6b 100644 --- a/clang/test/CodeGen/instrument-objc-method.m +++ b/clang/test/CodeGen/instrument-objc-method.m @@ -11,16 +11,16 @@ @implementation ObjCClass + (void)initialize { } -// BARE: @"\01+[ObjCClass load]"{{\(.*\)}} #1 +// PREINLINE: declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 +// BARE: @"\01+[ObjCClass load]"{{\(.*\)}} #2 + (void)load __attribute__((no_instrument_function)) { } -// PREINLINE: @"\01-[ObjCClass dealloc]"{{\(.*\)}} #1 -// BARE: @"\01-[ObjCClass dealloc]"{{\(.*\)}} #1 +// PREINLINE: @"\01-[ObjCClass dealloc]"{{\(.*\)}} #2 +// BARE: @"\01-[ObjCClass dealloc]"{{\(.*\)}} #2 - (void)dealloc __attribute__((no_instrument_function)) { } -// PREINLINE: declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 // PREINLINE: attributes #0 = { {{.*}}"instrument-function-entry"="__cyg_profile_func_enter" // PREINLINE-NOT: attributes #0 = { {{.*}}"instrument-function-entry"="__cyg_profile_func_enter_bare" // PREINLINE-NOT: attributes #2 = { {{.*}}"__cyg_profile_func_enter" diff --git a/clang/test/CodeGen/math-libcalls.c b/clang/test/CodeGen/math-libcalls.c index 29c312ba0ecac2..a249182692762d 100644 --- a/clang/test/CodeGen/math-libcalls.c +++ b/clang/test/CodeGen/math-libcalls.c @@ -662,15 +662,15 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { tan(f); tanf(f); tanl(f); -// NO__ERRNO: declare double @tan(double noundef) [[READNONE]] -// NO__ERRNO: declare float @tanf(float noundef) [[READNONE]] -// NO__ERRNO: declare x86_fp80 @tanl(x86_fp80 noundef) [[READNONE]] +// NO__ERRNO: declare double @llvm.tan.f64(double) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare float @llvm.tan.f32(float) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare x86_fp80 @llvm.tan.f80(x86_fp80) [[READNONE_INTRINSIC]] // HAS_ERRNO: declare double @tan(double noundef) [[NOT_READNONE]] // HAS_ERRNO: declare float @tanf(float noundef) [[NOT_READNONE]] // HAS_ERRNO: declare x86_fp80 @tanl(x86_fp80 noundef) [[NOT_READNONE]] -// HAS_MAYTRAP: declare double @tan(double noundef) [[NOT_READNONE]] -// HAS_MAYTRAP: declare float @tanf(float noundef) [[NOT_READNONE]] -// HAS_MAYTRAP: declare x86_fp80 @tanl(x86_fp80 noundef) [[NOT_READNONE]] +// HAS_MAYTRAP: declare double @llvm.experimental.constrained.tan.f64( +// HAS_MAYTRAP: declare float @llvm.experimental.constrained.tan.f32( +// HAS_MAYTRAP: declare x86_fp80 @llvm.experimental.constrained.tan.f80( tanh(f); tanhf(f); tanhl(f); diff --git a/clang/test/CodeGen/sanitize-numerical-stability-attr.cpp b/clang/test/CodeGen/sanitize-numerical-stability-attr.cpp new file mode 100644 index 00000000000000..f51fb79bda6afd --- /dev/null +++ b/clang/test/CodeGen/sanitize-numerical-stability-attr.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck -check-prefix=WITHOUT %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s -fsanitize=numerical | FileCheck -check-prefix=NSAN %s +// RUN: echo "src:%s" | sed -e 's/\\/\\\\/g' > %t +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s -fsanitize=numerical -fsanitize-ignorelist=%t | FileCheck -check-prefix=BL %s + +// WITHOUT: NoNSAN3{{.*}}) [[NOATTR:#[0-9]+]] +// BL: NoNSAN3{{.*}}) [[NOATTR:#[0-9]+]] +// NSAN: NoNSAN3{{.*}}) [[NOATTR:#[0-9]+]] +__attribute__((no_sanitize("numerical"))) +int NoNSAN3(int *a) { return *a; } + +// WITHOUT: NSANOk{{.*}}) [[NOATTR]] +// BL: NSANOk{{.*}}) [[NOATTR]] +// NSAN: NSANOk{{.*}}) [[WITH:#[0-9]+]] +int NSANOk(int *a) { return *a; } + +// WITHOUT: TemplateNSANOk{{.*}}) [[NOATTR]] +// BL: TemplateNSANOk{{.*}}) [[NOATTR]] +// NSAN: TemplateNSANOk{{.*}}) [[WITH]] +template +int TemplateNSANOk() { return i; } + +// WITHOUT: TemplateNoNSAN{{.*}}) [[NOATTR]] +// BL: TemplateNoNSAN{{.*}}) [[NOATTR]] +// NSAN: TemplateNoNSAN{{.*}}) [[NOATTR]] +template +__attribute__((no_sanitize("numerical"))) +int TemplateNoNSAN() { return i; } + +int force_instance = TemplateNSANOk<42>() + TemplateNoNSAN<42>(); + +// WITHOUT: attributes [[NOATTR]] = { mustprogress noinline nounwind{{.*}} } +// BL: attributes [[NOATTR]] = { mustprogress noinline nounwind{{.*}} } +// NSAN: attributes [[WITH]] = { mustprogress noinline nounwind optnone sanitize_numerical_stability{{.*}} } diff --git a/clang/test/CodeGenCXX/windows-instantiate-dllexport-template-specialization.cpp b/clang/test/CodeGenCXX/windows-instantiate-dllexport-template-specialization.cpp new file mode 100644 index 00000000000000..97f341ba1f909e --- /dev/null +++ b/clang/test/CodeGenCXX/windows-instantiate-dllexport-template-specialization.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple i686-windows -fdeclspec -emit-llvm %s -o - | FileCheck %s -check-prefix CHECK-MS +// RUN: %clang_cc1 -triple i686-windows-itanium -fdeclspec -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-scei-ps4 -fdeclspec -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fdeclspec -emit-llvm %s -o - | FileCheck %s + +struct s { + template static bool f(); +}; + +template bool template_using_f(T) { return s::f(); } + +bool use_template_using_f() { return template_using_f(0); } + +template<> +bool __declspec(dllexport) s::f() { return true; } + +// CHECK-MS: dllexport {{.*}} @"??$f@$00@s@@SA_NXZ" +// CHECK: dllexport {{.*}} @_ZN1s1fILb1EEEbv diff --git a/clang/test/CodeGenOpenCL/builtins-f16.cl b/clang/test/CodeGenOpenCL/builtins-f16.cl index adf7cdde154f51..d7bffdad5c548f 100644 --- a/clang/test/CodeGenOpenCL/builtins-f16.cl +++ b/clang/test/CodeGenOpenCL/builtins-f16.cl @@ -66,6 +66,9 @@ void test_half_builtins(half h0, half h1, half h2, int i0) { // CHECK: call half @llvm.sqrt.f16(half %h0) res = __builtin_sqrtf16(h0); + // CHECK: call half @llvm.tan.f16(half %h0) + res = __builtin_tanf16(h0); + // CHECK: call half @llvm.trunc.f16(half %h0) res = __builtin_truncf16(h0); diff --git a/clang/test/Driver/aarch64-mac-cpus.c b/clang/test/Driver/aarch64-mac-cpus.c index 51797312689506..488298cfd2d245 100644 --- a/clang/test/Driver/aarch64-mac-cpus.c +++ b/clang/test/Driver/aarch64-mac-cpus.c @@ -16,7 +16,7 @@ // RUN: %clang --target=arm64-apple-macos -mcpu=apple-m1 -### -c %s 2>&1 | FileCheck --check-prefix=EXPLICIT-M1 %s // CHECK: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "apple-m1" -// CHECK-SAME: "-target-feature" "+v8.5a" +// CHECK-SAME: "-target-feature" "+v8.4a" // EXPLICIT-A11: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "apple-a11" // EXPLICIT-A7: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "apple-a7" diff --git a/clang/test/Driver/apple-os-triples.c b/clang/test/Driver/apple-os-triples.c new file mode 100644 index 00000000000000..7664d3bc19fca2 --- /dev/null +++ b/clang/test/Driver/apple-os-triples.c @@ -0,0 +1,31 @@ +// Test triple manipulations. + +// RUN: %clang -### -c %s \ +// RUN: --target=i386-apple-darwin10 -mappletvsimulator-version-min=9.0 -arch x86_64 2>&1 | \ +// RUN: FileCheck %s -DARCH=x86_64 -DOS=tvos9.0.0-simulator +// RUN: %clang -### -c %s \ +// RUN: --target=armv7s-apple-darwin10 -mappletvos-version-min=9.0 -arch arm64 2>&1 | \ +// RUN: FileCheck %s -DARCH=arm64 -DOS=tvos9.0.0 +// RUN: env TVOS_DEPLOYMENT_TARGET=9.0 %clang -### -c %s \ +// RUN: -isysroot SDKs/MacOSX10.9.sdk -target i386-apple-darwin10 -arch x86_64 2>&1 | \ +// RUN: FileCheck %s -DARCH=x86_64 -DOS=tvos9.0.0 + +// RUN: %clang -### -c %s \ +// RUN: --target=x86_64-apple-driverkit19.0 2>&1 | \ +// RUN: FileCheck %s -DARCH=x86_64 -DOS=driverkit19.0.0 + +// RUN: %clang -### -c %s \ +// RUN: --target=i386-apple-darwin10 -miphonesimulator-version-min=7.0 -arch i386 2>&1 | \ +// RUN: FileCheck %s -DARCH=i386 -DOS=ios7.0.0-simulator +// RUN: %clang -### -c %s \ +// RUN: --target=armv7s-apple-darwin10 -miphoneos-version-min=7.0 -arch armv7s 2>&1 | \ +// RUN: FileCheck %s -DARCH=thumbv7s -DOS=ios7.0.0 + +// RUN: %clang -### -c %s \ +// RUN: --target=i386-apple-darwin10 -mwatchsimulator-version-min=2.0 -arch i386 2>&1 | \ +// RUN: FileCheck %s -DARCH=i386 -DOS=watchos2.0.0-simulator +// RUN: %clang -### -c %s \ +// RUN: --target=armv7s-apple-darwin10 -mwatchos-version-min=2.0 -arch armv7k 2>&1 | \ +// RUN: FileCheck %s -DARCH=thumbv7k -DOS=watchos2.0.0 + +// CHECK: "-cc1" "-triple" "[[ARCH]]-apple-[[OS]]" diff --git a/clang/test/Driver/appletvos-version-min.c b/clang/test/Driver/appletvos-version-min.c deleted file mode 100644 index 7cbb2001a3ec21..00000000000000 --- a/clang/test/Driver/appletvos-version-min.c +++ /dev/null @@ -1,8 +0,0 @@ -// REQUIRES: x86-registered-target -// REQUIRES: aarch64-registered-target -// RUN: %clang -target i386-apple-darwin10 -mappletvsimulator-version-min=9.0 -arch x86_64 -S -o - %s | FileCheck %s -// RUN: %clang -target armv7s-apple-darwin10 -mappletvos-version-min=9.0 -arch arm64 -S -o - %s | FileCheck %s -// RUN: env TVOS_DEPLOYMENT_TARGET=9.0 %clang -isysroot SDKs/MacOSX10.9.sdk -target i386-apple-darwin10 -arch x86_64 -S -o - %s | FileCheck %s - -int main() { return 0; } -// CHECK: .tvos_version_min 9, 0 diff --git a/clang/test/Driver/driverkit-version-min.c b/clang/test/Driver/driverkit-version-min.c deleted file mode 100644 index 9966152f11ce82..00000000000000 --- a/clang/test/Driver/driverkit-version-min.c +++ /dev/null @@ -1,5 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: %clang -target x86_64-apple-driverkit19.0 -S -o - %s | FileCheck %s - -int main() { return 0; } -// CHECK: .build_version driverkit, 19, 0 diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 571f79a6e7f70d..ba64b3dcb11aa5 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -459,6 +459,21 @@ // CHECK-TSAN-MSAN-MSAN-DARWIN: unsupported option '-fsanitize=memory' for target 'x86_64-apple-darwin10' // CHECK-TSAN-MSAN-MSAN-DARWIN-NOT: unsupported option +// RUN: %clang --target=x86_64-linux-gnu -fsanitize=numerical %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NSAN-X86-64-LINUX +// CHECK-NSAN-X86-64-LINUX: "-fsanitize=numerical" + +// RUN: %clang --target=aarch64-unknown-linux-gnu -fsanitize=numerical %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NSAN-AARCH64-LINUX +// CHECK-NSAN-AARCH64-LINUX: "-fsanitize=numerical" + +// RUN: not %clang --target=mips-unknown-linux -fsanitize=numerical %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NSAN-MIPS-LINUX +// CHECK-NSAN-MIPS-LINUX: error: unsupported option '-fsanitize=numerical' for target 'mips-unknown-linux' + +// RUN: %clang --target=x86_64-apple-macos -fsanitize=numerical %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NSAN-X86-64-MACOS +// CHECK-NSAN-X86-64-MACOS: "-fsanitize=numerical" + +// RUN: %clang --target=arm64-apple-macos -fsanitize=numerical %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NSAN-ARM64-MACOS +// CHECK-NSAN-ARM64-MACOS: "-fsanitize=numerical" + // RUN: %clang --target=x86_64-apple-darwin -fsanitize=thread %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-TSAN-X86-64-DARWIN // CHECK-TSAN-X86-64-DARWIN-NOT: unsupported option // RUN: %clang --target=x86_64-apple-macos -fsanitize=thread %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-TSAN-X86-64-MACOS diff --git a/clang/test/Driver/ios-version-min.c b/clang/test/Driver/ios-version-min.c deleted file mode 100644 index aa536cf7827b36..00000000000000 --- a/clang/test/Driver/ios-version-min.c +++ /dev/null @@ -1,7 +0,0 @@ -// REQUIRES: x86-registered-target -// REQUIRES: arm-registered-target -// RUN: %clang -target i386-apple-darwin10 -miphonesimulator-version-min=7.0 -arch i386 -S -o - %s | FileCheck %s -// RUN: %clang -target armv7s-apple-darwin10 -miphoneos-version-min=7.0 -arch armv7s -S -o - %s | FileCheck %s - -int main() { return 0; } -// CHECK: .ios_version_min 7, 0 diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index ddf617bbb62372..ffd92e1f398c45 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -231,11 +231,6 @@ // RV32-STD: error: invalid arch name 'rv32imqc', // RV32-STD: unsupported standard user-level extension 'q' -// RUN: not %clang --target=riscv32-unknown-elf -march=rv32ib -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-B %s -// RV32-B: error: invalid arch name 'rv32ib', -// RV32-B: unsupported standard user-level extension 'b' - // RUN: not %clang --target=riscv32-unknown-elf -march=rv32xabc -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32X %s // RV32X: error: invalid arch name 'rv32xabc', diff --git a/clang/test/Driver/watchos-version-min.c b/clang/test/Driver/watchos-version-min.c deleted file mode 100644 index 8f12285d4e4737..00000000000000 --- a/clang/test/Driver/watchos-version-min.c +++ /dev/null @@ -1,7 +0,0 @@ -// REQUIRES: x86-registered-target -// REQUIRES: arm-registered-target -// RUN: %clang -target i386-apple-darwin10 -mwatchsimulator-version-min=2.0 -arch i386 -S -o - %s | FileCheck %s -// RUN: %clang -target armv7s-apple-darwin10 -mwatchos-version-min=2.0 -arch armv7k -S -o - %s | FileCheck %s - -int main() { return 0; } -// CHECK: .watchos_version_min 2, 0 diff --git a/clang/test/Index/comment-to-html-xml-conversion.cpp b/clang/test/Index/comment-to-html-xml-conversion.cpp index d9eefb909653c7..e0a7cff5a9a3db 100644 --- a/clang/test/Index/comment-to-html-xml-conversion.cpp +++ b/clang/test/Index/comment-to-html-xml-conversion.cpp @@ -1046,82 +1046,101 @@ void comment_to_xml_conversion_todo_4(); /// Aaa. /// \throws Bbb. void comment_to_xml_conversion_exceptions_1(); -// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_1:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_1c:@F@comment_to_xml_conversion_exceptions_1#void comment_to_xml_conversion_exceptions_1() Aaa. Bbb.] +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_1:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_1c:@F@comment_to_xml_conversion_exceptions_1#void comment_to_xml_conversion_exceptions_1() Aaa. Bbb. ] // CHECK-NEXT: CommentAST=[ // CHECK-NEXT: (CXComment_FullComment // CHECK-NEXT: (CXComment_Paragraph // CHECK-NEXT: (CXComment_Text Text=[ Aaa.] HasTrailingNewline) // CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)) -// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] -// CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ Bbb.]))))] +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb. +// CHECK-NEXT: (CXComment_Paragraph IsWhitespace)))] /// Aaa. /// \throw Bbb. void comment_to_xml_conversion_exceptions_2(); -// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_2:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_2c:@F@comment_to_xml_conversion_exceptions_2#void comment_to_xml_conversion_exceptions_2() Aaa. Bbb.] +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_2:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_2c:@F@comment_to_xml_conversion_exceptions_2#void comment_to_xml_conversion_exceptions_2() Aaa. Bbb. ] // CHECK-NEXT: CommentAST=[ // CHECK-NEXT: (CXComment_FullComment // CHECK-NEXT: (CXComment_Paragraph // CHECK-NEXT: (CXComment_Text Text=[ Aaa.] HasTrailingNewline) // CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)) -// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throw] -// CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ Bbb.]))))] +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throw] Arg[0]=Bbb. +// CHECK-NEXT: (CXComment_Paragraph IsWhitespace)))] /// Aaa. /// \exception Bbb. void comment_to_xml_conversion_exceptions_3(); -// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_3:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_3c:@F@comment_to_xml_conversion_exceptions_3#void comment_to_xml_conversion_exceptions_3() Aaa. Bbb.] +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_3:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_3c:@F@comment_to_xml_conversion_exceptions_3#void comment_to_xml_conversion_exceptions_3() Aaa. Bbb. ] // CHECK-NEXT: CommentAST=[ // CHECK-NEXT: (CXComment_FullComment // CHECK-NEXT: (CXComment_Paragraph // CHECK-NEXT: (CXComment_Text Text=[ Aaa.] HasTrailingNewline) // CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)) -// CHECK-NEXT: (CXComment_BlockCommand CommandName=[exception] -// CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ Bbb.]))))] +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[exception] Arg[0]=Bbb. +// CHECK-NEXT: (CXComment_Paragraph IsWhitespace)))] /// Aaa. /// \throws Bbb. /// \throws Ccc. /// \throws Ddd. void comment_to_xml_conversion_exceptions_4(); -// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_4:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_4c:@F@comment_to_xml_conversion_exceptions_4#void comment_to_xml_conversion_exceptions_4() Aaa. Bbb. Ccc. Ddd.] +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_4:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_4c:@F@comment_to_xml_conversion_exceptions_4#void comment_to_xml_conversion_exceptions_4() Aaa. Bbb. Ccc. Ddd. ] // CHECK-NEXT: CommentAST=[ // CHECK-NEXT: (CXComment_FullComment // CHECK-NEXT: (CXComment_Paragraph // CHECK-NEXT: (CXComment_Text Text=[ Aaa.] HasTrailingNewline) // CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)) -// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] -// CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ Bbb.] HasTrailingNewline) -// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace))) -// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] -// CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ Ccc.] HasTrailingNewline) -// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace))) -// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] -// CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ Ddd.]))))] +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb. +// CHECK-NEXT: (CXComment_Paragraph IsWhitespace)) +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] Arg[0]=Ccc. +// CHECK-NEXT: (CXComment_Paragraph IsWhitespace)) +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] Arg[0]=Ddd. +// CHECK-NEXT: (CXComment_Paragraph IsWhitespace)))] /// Aaa. /// \throws Bbb. /// \throw Ccc. void comment_to_xml_conversion_exceptions_5(); -// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_5:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_5c:@F@comment_to_xml_conversion_exceptions_5#void comment_to_xml_conversion_exceptions_5() Aaa. Bbb. Ccc.] +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_5:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_5c:@F@comment_to_xml_conversion_exceptions_5#void comment_to_xml_conversion_exceptions_5() Aaa. Bbb. Ccc. ] +// CHECK-NEXT: CommentAST=[ +// CHECK-NEXT: (CXComment_FullComment +// CHECK-NEXT: (CXComment_Paragraph +// CHECK-NEXT: (CXComment_Text Text=[ Aaa.] HasTrailingNewline) +// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)) +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb. +// CHECK-NEXT: (CXComment_Paragraph IsWhitespace)) +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throw] Arg[0]=Ccc. +// CHECK-NEXT: (CXComment_Paragraph IsWhitespace)))] + +/// Aaa. +/// \throws Bbb subsequent arg text +void comment_to_xml_conversion_exceptions_6(); +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_6:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_6c:@F@comment_to_xml_conversion_exceptions_6#void comment_to_xml_conversion_exceptions_6() Aaa. Bbb subsequent arg text] // CHECK-NEXT: CommentAST=[ // CHECK-NEXT: (CXComment_FullComment // CHECK-NEXT: (CXComment_Paragraph // CHECK-NEXT: (CXComment_Text Text=[ Aaa.] HasTrailingNewline) // CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)) -// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb // CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ Bbb.] HasTrailingNewline) +// CHECK-NEXT: (CXComment_Text Text=[subsequent arg text]))))] + +/// Aaa. +/// \throws Bbb subsequent arg text +/// \throw Ccc subsequent arg text +void comment_to_xml_conversion_exceptions_7(); +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_7:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_exceptions_7c:@F@comment_to_xml_conversion_exceptions_7#void comment_to_xml_conversion_exceptions_7() Aaa. Bbb subsequent arg text Ccc subsequent arg text] +// CHECK-NEXT: CommentAST=[ +// CHECK-NEXT: (CXComment_FullComment +// CHECK-NEXT: (CXComment_Paragraph +// CHECK-NEXT: (CXComment_Text Text=[ Aaa.] HasTrailingNewline) +// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)) +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb +// CHECK-NEXT: (CXComment_Paragraph +// CHECK-NEXT: (CXComment_Text Text=[subsequent arg text] HasTrailingNewline) // CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace))) -// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throw] +// CHECK-NEXT: (CXComment_BlockCommand CommandName=[throw] Arg[0]=Ccc // CHECK-NEXT: (CXComment_Paragraph -// CHECK-NEXT: (CXComment_Text Text=[ Ccc.]))))] +// CHECK-NEXT: (CXComment_Text Text=[subsequent arg text]))))] #endif - diff --git a/clang/test/Lexer/has_feature_numerical_stability_sanitizer.cpp b/clang/test/Lexer/has_feature_numerical_stability_sanitizer.cpp new file mode 100644 index 00000000000000..78884977322b8e --- /dev/null +++ b/clang/test/Lexer/has_feature_numerical_stability_sanitizer.cpp @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -E -fsanitize=numerical %s -o - | FileCheck --check-prefix=CHECK-NSAN %s +// RUN: %clang_cc1 -E %s -o - | FileCheck --check-prefix=CHECK-NO-NSAN %s + +#if __has_feature(numerical_stability_sanitizer) +int NumericalStabilitySanitizerEnabled(); +#else +int NumericalStabilitySanitizerDisabled(); +#endif + +// CHECK-NSAN: NumericalStabilitySanitizerEnabled +// CHECK-NO-NSAN: NumericalStabilitySanitizerDisabled diff --git a/clang/test/OpenMP/error_unsupport_feature.c b/clang/test/OpenMP/error_unsupport_feature.c index 611a8b4639c44e..eb381b3bea1e1a 100644 --- a/clang/test/OpenMP/error_unsupport_feature.c +++ b/clang/test/OpenMP/error_unsupport_feature.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -emit-llvm -verify -fopenmp %s +// RUN: %clang_cc1 -emit-llvm-only -verify -fopenmp %s int main () { int r = 0; diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 82304a15a04a3f..c707972fb41d2e 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -335,7 +335,7 @@ // CHECK-MCPU-CARMEL: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+v8.2a" "-target-feature" "+aes" "-target-feature" "+crc" "-target-feature" "+fp-armv8" "-target-feature" "+fullfp16" "-target-feature" "+lse" "-target-feature" "+ras" "-target-feature" "+rdm" "-target-feature" "+sha2" "-target-feature" "+neon" // RUN: %clang -target x86_64-apple-macosx -arch arm64 -### -c %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH-ARM64 %s -// CHECK-ARCH-ARM64: "-target-cpu" "apple-m1" "-target-feature" "+zcm" "-target-feature" "+zcz" "-target-feature" "+v8.5a" "-target-feature" "+aes" "-target-feature" "+crc" "-target-feature" "+dotprod" "-target-feature" "+complxnum" "-target-feature" "+fp-armv8" "-target-feature" "+fullfp16" "-target-feature" "+fp16fml" "-target-feature" "+jsconv" "-target-feature" "+lse" "-target-feature" "+pauth" "-target-feature" "+ras" "-target-feature" "+rcpc" "-target-feature" "+rdm" "-target-feature" "+sha2" "-target-feature" "+sha3" "-target-feature" "+neon" +// CHECK-ARCH-ARM64: "-target-cpu" "apple-m1" "-target-feature" "+zcm" "-target-feature" "+zcz" "-target-feature" "+v8.4a" "-target-feature" "+aes" "-target-feature" "+crc" "-target-feature" "+dotprod" "-target-feature" "+complxnum" "-target-feature" "+fp-armv8" "-target-feature" "+fullfp16" "-target-feature" "+fp16fml" "-target-feature" "+jsconv" "-target-feature" "+lse" "-target-feature" "+pauth" "-target-feature" "+ras" "-target-feature" "+rcpc" "-target-feature" "+rdm" "-target-feature" "+sha2" "-target-feature" "+sha3" "-target-feature" "+neon" // RUN: %clang -target x86_64-apple-macosx -arch arm64_32 -### -c %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH-ARM64_32 %s // CHECK-ARCH-ARM64_32: "-target-cpu" "apple-s4" "-target-feature" "+zcm" "-target-feature" "+zcz" "-target-feature" "+v8.3a" "-target-feature" "+aes" "-target-feature" "+crc" "-target-feature" "+complxnum" "-target-feature" "+fp-armv8" "-target-feature" "+fullfp16" "-target-feature" "+jsconv" "-target-feature" "+lse" "-target-feature" "+pauth" "-target-feature" "+ras" "-target-feature" "+rcpc" "-target-feature" "+rdm" "-target-feature" "+sha2" "-target-feature" "+neon" @@ -671,10 +671,15 @@ // CHECK-V83-OR-LATER: __ARM_FEATURE_JCVT 1 // CHECK-V83-OR-LATER: __ARM_FEATURE_PAUTH 1 // CHECK-V81-OR-LATER: __ARM_FEATURE_QRDMX 1 +// CHECK-BEFORE-V85-NOT: __ARM_FEATURE_BTI 1 // CHECK-BEFORE-V83-NOT: __ARM_FEATURE_COMPLEX 1 // CHECK-BEFORE-V83-NOT: __ARM_FEATURE_JCVT 1 // CHECK-BEFORE-V85-NOT: __ARM_FEATURE_FRINT 1 +// RUN: %clang -target aarch64 -mcpu=apple-a14 -x c -E -dM %s -o - | FileCheck --check-prefix=APPLE-A14-M1 %s +// RUN: %clang -target aarch64 -mcpu=apple-m1 -x c -E -dM %s -o - | FileCheck --check-prefix=APPLE-A14-M1 %s +// APPLE-A14-M1-NOT: __ARM_FEATURE_BTI 1 + // RUN: %clang --target=aarch64 -march=armv8.2-a+rcpc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-RCPC %s // CHECK-RCPC: __ARM_FEATURE_RCPC 1 diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 09b9ad0a160bb1..91307141e0406b 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -7,6 +7,7 @@ // CHECK-NOT: __riscv_64e {{.*$}} // CHECK-NOT: __riscv_a {{.*$}} // CHECK-NOT: __riscv_atomic +// CHECK-NOT: __riscv_b {{.*$}} // CHECK-NOT: __riscv_c {{.*$}} // CHECK-NOT: __riscv_compressed {{.*$}} // CHECK-NOT: __riscv_d {{.*$}} @@ -194,6 +195,17 @@ // CHECK-A-EXT: __riscv_a 2001000{{$}} // CHECK-A-EXT: __riscv_atomic 1 +// RUN: %clang --target=riscv32-unknown-linux-gnu \ +// RUN: -march=rv32ib -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-B-EXT %s +// RUN: %clang --target=riscv64-unknown-linux-gnu \ +// RUN: -march=rv64ib -x c -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-B-EXT %s +// CHECK-B-EXT: __riscv_b 1000000{{$}} +// CHECK-B-EXT: __riscv_zba 1000000{{$}} +// CHECK-B-EXT: __riscv_zbb 1000000{{$}} +// CHECK-B-EXT: __riscv_zbs 1000000{{$}} + // RUN: %clang --target=riscv32-unknown-linux-gnu \ // RUN: -march=rv32ic -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-C-EXT %s diff --git a/clang/test/Sema/aarch64-neon-target.c b/clang/test/Sema/aarch64-neon-target.c index 642afddd88c154..fa45fff1d183d6 100644 --- a/clang/test/Sema/aarch64-neon-target.c +++ b/clang/test/Sema/aarch64-neon-target.c @@ -69,8 +69,8 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t vrnd_f16(v4f16); // expected-error {{always_inline function 'vrnd_f16' requires target feature 'fullfp16'}} vmaxnm_f16(v4f16, v4f16); // expected-error {{always_inline function 'vmaxnm_f16' requires target feature 'fullfp16'}} vrndi_f16(v4f16); // expected-error {{always_inline function 'vrndi_f16' requires target feature 'fullfp16'}} - // fp16fml depends on fp-armv8 - vfmlal_low_f16(v2f32, v4f16, v4f16); // expected-error {{always_inline function 'vfmlal_low_f16' requires target feature 'fp-armv8'}} + // fp16fml + vfmlal_low_f16(v2f32, v4f16, v4f16); // expected-error {{always_inline function 'vfmlal_low_f16' requires target feature 'fp16fml'}} // i8mm vmmlaq_s32(v4i32, v8i16, v8i16); // expected-error {{always_inline function 'vmmlaq_s32' requires target feature 'i8mm'}} vusdot_laneq_s32(v2i32, v8i8, v8i16, 0); // expected-error {{always_inline function 'vusdot_s32' requires target feature 'i8mm'}} diff --git a/clang/test/Sema/arm-vector-types-support.c b/clang/test/Sema/arm-vector-types-support.c index ed5f5ba175a94a..e648d791a26878 100644 --- a/clang/test/Sema/arm-vector-types-support.c +++ b/clang/test/Sema/arm-vector-types-support.c @@ -1,7 +1,8 @@ -// RUN: %clang_cc1 %s -triple armv7 -fsyntax-only -verify -// RUN: %clang_cc1 %s -triple aarch64 -fsyntax-only -verify -// RUN: %clang_cc1 %s -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -fsyntax-only -verify +// RUN: %clang_cc1 %s -triple armv8.1m.main -fsyntax-only -verify +// RUN: %clang_cc1 %s -triple aarch64 -fsyntax-only -verify=sve-type +// RUN: %clang_cc1 %s -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -fsyntax-only -verify=sve-type -typedef __attribute__((neon_vector_type(2))) int int32x2_t; // expected-error{{'neon_vector_type' attribute is not supported on targets missing 'neon', 'mve', 'sve' or 'sme'; specify an appropriate -march= or -mcpu=}} -typedef __attribute__((neon_polyvector_type(16))) short poly8x16_t; // expected-error{{'neon_polyvector_type' attribute is not supported on targets missing 'neon' or 'mve'; specify an appropriate -march= or -mcpu=}} +typedef __attribute__((neon_vector_type(2))) int int32x2_t; // expected-error{{'neon_vector_type' attribute is not supported on targets missing 'mve'; specify an appropriate -march= or -mcpu=}} +typedef __attribute__((neon_polyvector_type(16))) unsigned char poly8x16_t; // expected-error{{'neon_polyvector_type' attribute is not supported on targets missing 'mve'; specify an appropriate -march= or -mcpu=}} typedef __attribute__((arm_sve_vector_bits(256))) void nosveflag; // expected-error{{'arm_sve_vector_bits' attribute is not supported on targets missing 'sve'; specify an appropriate -march= or -mcpu=}} + // sve-type-error@-1{{'arm_sve_vector_bits' attribute is not supported on targets missing 'sve'; specify an appropriate -march= or -mcpu=}} diff --git a/clang/test/SemaCUDA/function-redclare.cu b/clang/test/SemaCUDA/function-redclare.cu new file mode 100644 index 00000000000000..7cd9bad79ae988 --- /dev/null +++ b/clang/test/SemaCUDA/function-redclare.cu @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -isystem %S/Inputs -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only \ +// RUN: -isystem %S/Inputs -fcuda-is-device -verify %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -isystem %S/Inputs -verify=redecl -Wnvcc-compat %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only \ +// RUN: -isystem %S/Inputs -fcuda-is-device -Wnvcc-compat -verify=redecl %s + +// expected-no-diagnostics +#include "cuda.h" + +__device__ void f(); // redecl-note {{previous declaration is here}} + +void f() {} // redecl-warning {{target-attribute based function overloads are not supported by NVCC and will be treated as a function redeclaration:new declaration is __host__ function, old declaration is __device__ function}} + +void g(); // redecl-note {{previous declaration is here}} + +__device__ void g() {} // redecl-warning {{target-attribute based function overloads are not supported by NVCC and will be treated as a function redeclaration:new declaration is __device__ function, old declaration is __host__ function}} diff --git a/clang/test/SemaCUDA/neon-attrs.cu b/clang/test/SemaCUDA/neon-attrs.cu deleted file mode 100644 index 129056741ac9a4..00000000000000 --- a/clang/test/SemaCUDA/neon-attrs.cu +++ /dev/null @@ -1,22 +0,0 @@ -// CPU-side compilation on ARM with neon enabled (no errors expected). -// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -aux-triple nvptx64 -x cuda -fsyntax-only -verify=quiet %s - -// CPU-side compilation on ARM with neon disabled. -// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature -neon -aux-triple nvptx64 -x cuda -fsyntax-only -verify %s - -// GPU-side compilation on ARM (no errors expected). -// RUN: %clang_cc1 -triple nvptx64 -aux-triple arm64-linux-gnu -fcuda-is-device -x cuda -fsyntax-only -verify=quiet %s - -// Regular C++ compilation on ARM with neon enabled (no errors expected). -// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -x c++ -fsyntax-only -verify=quiet %s - -// Regular C++ compilation on ARM with neon disabled. -// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature -neon -x c++ -fsyntax-only -verify %s - -// quiet-no-diagnostics -typedef __attribute__((neon_vector_type(4))) float float32x4_t; -// expected-error@-1 {{'neon_vector_type' attribute is not supported on targets missing 'neon', 'mve', 'sve' or 'sme'}} -// expect -typedef unsigned char poly8_t; -typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t; -// expected-error@-1 {{'neon_polyvector_type' attribute is not supported on targets missing 'neon' or 'mve'}} diff --git a/clang/tools/clang-repl/CMakeLists.txt b/clang/tools/clang-repl/CMakeLists.txt index 4017b1445da0f4..42618e4e31cb0a 100644 --- a/clang/tools/clang-repl/CMakeLists.txt +++ b/clang/tools/clang-repl/CMakeLists.txt @@ -61,10 +61,7 @@ clang_target_link_libraries(clang-repl PRIVATE clangInterpreter ) -# Support plugins. -if(CLANG_PLUGIN_SUPPORT) - export_executable_symbols_for_plugins(clang-repl) -endif() +export_executable_symbols_for_plugins(clang-repl) # The clang-repl binary can get huge with static linking in debug mode. # Some 32-bit targets use PLT slots with limited branch range by default and we diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp index c3479672ae2a3c..1c57c899f90745 100644 --- a/clang/unittests/AST/CommentParser.cpp +++ b/clang/unittests/AST/CommentParser.cpp @@ -1427,8 +1427,220 @@ TEST_F(CommentParserTest, Deprecated) { } } +TEST_F(CommentParserTest, ThrowsCommandHasArg1) { + const char *Sources[] = { + "/// @throws int This function throws an integer", + ("/// @throws\n" + "/// int This function throws an integer"), + ("/// @throws \n" + "/// int This function throws an integer"), + ("/// @throws\n" + "/// int\n" + "/// This function throws an integer"), + ("/// @throws \n" + "/// int \n" + "/// This function throws an integer"), + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "int"); + } + } +} + +TEST_F(CommentParserTest, ThrowsCommandHasArg2) { + const char *Sources[] = { + "/// @throws int** This function throws a double pointer to an integer", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "int**"); + } + } +} + +TEST_F(CommentParserTest, ThrowsCommandHasArg3) { + const char *Sources[] = { + "/// @throws Error error of type Error", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC)); + ASSERT_TRUE(HasChildCount(PC, 3)); // Extra children because is parsed + // as a series of TextComments + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Error"); + } + } +} + +TEST_F(CommentParserTest, ThrowsCommandHasArg4) { + const char *Sources[] = { + "/// @throws Error> nested templates", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Error>"); + } + } +} + +TEST_F(CommentParserTest, ThrowsCommandHasArg5) { + const char *Sources[] = { + "/// @throws Error variadic templates", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Error"); + } + } +} + +TEST_F(CommentParserTest, ThrowsCommandHasArg6) { + const char *Sources[] = { + "/// @throws Foo<(1 > 0)> typo1", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC)); + ASSERT_TRUE(HasChildCount(PC, 1)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Foo<(1 >"); + } + } +} + +// No matter the number of (unmatched) opening brackets, no type is parsed. +TEST_F(CommentParserTest, ThrowsCommandHasArg7) { + const char *Sources[] = { + "/// @throws Foo<", + "/// @throws Foo<<<", + "/// @throws Foo<<<<<<<", + "/// @throws Foo<\n", + "/// @throws Foo<<<\n", + "/// @throws Foo<<<<<<<\n", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC)); + ASSERT_TRUE(HasChildCount(PC, 0)); + ASSERT_TRUE(BCC->getNumArgs() == 0); + } + } +} + +// Types with a non-matching closing bracket are parsed as if they are a type +TEST_F(CommentParserTest, ThrowsCommandHasArg8) { + const char *Sources[] = { + "/// @throws Foo>", + "/// @throws Foo>\n", + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 2)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC)); + ASSERT_TRUE(HasChildCount(PC, 0)); + ASSERT_TRUE(BCC->getNumArgs() == 1); + ASSERT_TRUE(BCC->getArgText(0) == "Foo>"); + } + } +} + +// Everying up until the end of the paragraph comment will be +// eaten up if the template sequence is unterminated (i.e. number of +// opening and closing brackets is not equal). +TEST_F(CommentParserTest, ThrowsCommandHasArg9) { + const char *Sources[] = { + "/// @throws Foo\n" + "/// Aaa\n" + "///\n" + "/// Bbb\n" + }; + + for (size_t i = 0, e = std::size(Sources); i != e; i++) { + FullComment *FC = parseString(Sources[i]); + ASSERT_TRUE(HasChildCount(FC, 3)); + + ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " ")); + { + BlockCommandComment *BCC; + ParagraphComment *PC; + ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC)); + ASSERT_TRUE(HasChildCount(PC, 0)); + ASSERT_TRUE(BCC->getNumArgs() == 0); + } + } +} + + } // unnamed namespace } // end namespace comments } // end namespace clang - diff --git a/clang/unittests/Analysis/FlowSensitive/ASTOpsTest.cpp b/clang/unittests/Analysis/FlowSensitive/ASTOpsTest.cpp new file mode 100644 index 00000000000000..cd1c076ab09e6b --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/ASTOpsTest.cpp @@ -0,0 +1,88 @@ +//===- unittests/Analysis/FlowSensitive/ASTOpsTest.cpp --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "TestingSupport.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include + +namespace { + +using namespace clang; +using namespace dataflow; + +using ast_matchers::cxxRecordDecl; +using ast_matchers::hasName; +using ast_matchers::hasType; +using ast_matchers::initListExpr; +using ast_matchers::match; +using ast_matchers::selectFirst; +using test::findValueDecl; +using testing::IsEmpty; +using testing::UnorderedElementsAre; + +TEST(ASTOpsTest, RecordInitListHelperOnEmptyUnionInitList) { + // This is a regression test: The `RecordInitListHelper` used to assert-fail + // when called for the `InitListExpr` of an empty union. + std::string Code = R"cc( + struct S { + S() : UField{} {}; + + union U {} UField; + }; + )cc"; + std::unique_ptr Unit = + tooling::buildASTFromCodeWithArgs(Code, {"-fsyntax-only", "-std=c++17"}); + auto &ASTCtx = Unit->getASTContext(); + + ASSERT_EQ(ASTCtx.getDiagnostics().getClient()->getNumErrors(), 0U); + + auto *InitList = selectFirst( + "init", + match(initListExpr(hasType(cxxRecordDecl(hasName("U")))).bind("init"), + ASTCtx)); + ASSERT_NE(InitList, nullptr); + + RecordInitListHelper Helper(InitList); + EXPECT_THAT(Helper.base_inits(), IsEmpty()); + EXPECT_THAT(Helper.field_inits(), IsEmpty()); +} + +TEST(ASTOpsTest, ReferencedDeclsOnUnionInitList) { + // This is a regression test: `getReferencedDecls()` used to return a null + // `FieldDecl` in this case (in addition to the correct non-null `FieldDecl`) + // because `getInitializedFieldInUnion()` returns null for the syntactic form + // of the `InitListExpr`. + std::string Code = R"cc( + struct S { + S() : UField{0} {}; + + union U { + int I; + } UField; + }; + )cc"; + std::unique_ptr Unit = + tooling::buildASTFromCodeWithArgs(Code, {"-fsyntax-only", "-std=c++17"}); + auto &ASTCtx = Unit->getASTContext(); + + ASSERT_EQ(ASTCtx.getDiagnostics().getClient()->getNumErrors(), 0U); + + auto *InitList = selectFirst( + "init", + match(initListExpr(hasType(cxxRecordDecl(hasName("U")))).bind("init"), + ASTCtx)); + ASSERT_NE(InitList, nullptr); + auto *IDecl = cast(findValueDecl(ASTCtx, "I")); + + EXPECT_THAT(getReferencedDecls(*InitList).Fields, + UnorderedElementsAre(IDecl)); +} + +} // namespace diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt index cfabb80576bc12..12fee5dc2789ce 100644 --- a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt @@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS add_clang_unittest(ClangAnalysisFlowSensitiveTests ArenaTest.cpp + ASTOpsTest.cpp CFGMatchSwitchTest.cpp ChromiumCheckModelTest.cpp DataflowAnalysisContextTest.cpp diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index f7e6b0c22e8db2..2a74d7fa63fd74 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -3345,6 +3345,32 @@ TEST(TransferTest, ResultObjectLocationForBuiltinBitCastExpr) { }); } +TEST(TransferTest, ResultObjectLocationForAtomicExpr) { + std::string Code = R"( + struct S {}; + void target(_Atomic(S) *ptr) { + S s = __c11_atomic_load(ptr, __ATOMIC_SEQ_CST); + // [[p]] + } + )"; + using ast_matchers::atomicExpr; + using ast_matchers::match; + using ast_matchers::selectFirst; + using ast_matchers::traverse; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto *Atomic = selectFirst( + "atomic", match(atomicExpr().bind("atomic"), ASTCtx)); + + EXPECT_EQ(&Env.getResultObjectLocation(*Atomic), + &getLocForDecl(ASTCtx, Env, "s")); + }); +} + TEST(TransferTest, ResultObjectLocationPropagatesThroughConditionalOperator) { std::string Code = R"( struct A { diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index dbc1916825f334..fb573338585297 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -22866,6 +22866,22 @@ TEST_F(FormatTest, FormatsLambdas) { " //\n" " });"); + FormatStyle LLVMStyle = getLLVMStyleWithColumns(60); + verifyFormat("very_long_function_name_yes_it_is_really_long(\n" + " [](auto n) noexcept [[back_attr]]\n" + " -> std::unordered_map {\n" + " really_do_something();\n" + " });", + LLVMStyle); + verifyFormat("very_long_function_name_yes_it_is_really_long(\n" + " [](auto n) constexpr\n" + " -> std::unordered_map {\n" + " really_do_something();\n" + " });", + LLVMStyle); + FormatStyle DoNotMerge = getLLVMStyle(); DoNotMerge.AllowShortLambdasOnASingleLine = FormatStyle::SLS_None; verifyFormat("auto c = []() {\n" diff --git a/clang/unittests/Interpreter/InterpreterTest.cpp b/clang/unittests/Interpreter/InterpreterTest.cpp index 72b34dae378e50..683295a18d5199 100644 --- a/clang/unittests/Interpreter/InterpreterTest.cpp +++ b/clang/unittests/Interpreter/InterpreterTest.cpp @@ -283,7 +283,7 @@ TEST_F(InterpreterTest, InstantiateTemplate) { } // This test exposes an ARM specific problem in the interpreter, see -// https://github.com/llvm/llvm-project/issues/94741. +// https://github.com/llvm/llvm-project/issues/94994. #ifndef __arm__ TEST_F(InterpreterTest, Value) { std::unique_ptr Interp = createInterpreter(); diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 56f1fdf9ef574f..626031d38cf003 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -2370,10 +2370,6 @@ void NeonEmitter::run(raw_ostream &OS) { "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n"; OS << "#else\n\n"; - OS << "#if !defined(__ARM_NEON)\n"; - OS << "#error \"NEON support not enabled\"\n"; - OS << "#else\n\n"; - OS << "#include \n\n"; OS << "#include \n"; @@ -2450,7 +2446,6 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "#undef __ai\n\n"; OS << "#endif /* if !defined(__ARM_NEON) */\n"; OS << "#endif /* ifndef __ARM_FP */\n"; - OS << "#endif /* __ARM_NEON_H */\n"; } /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt index 61df987da7139b..49673790ff6e84 100644 --- a/clang/utils/perf-training/CMakeLists.txt +++ b/clang/utils/perf-training/CMakeLists.txt @@ -15,7 +15,7 @@ if(LLVM_BUILD_INSTRUMENTED) add_lit_testsuite(generate-profraw "Generating clang PGO data" ${CMAKE_CURRENT_BINARY_DIR}/pgo-data/ EXCLUDE_FROM_CHECK_ALL - DEPENDS clang clear-profraw ${CLANG_PGO_TRAINING_DEPS} + DEPENDS clear-profraw ) add_custom_target(clear-profraw @@ -29,10 +29,21 @@ if(LLVM_BUILD_INSTRUMENTED) if(NOT LLVM_PROFDATA) message(STATUS "To enable merging PGO data LLVM_PROFDATA has to point to llvm-profdata") else() - add_custom_target(generate-profdata + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata + # generate-profraw is a custom_target which are always considered stale. + # If we add it here to 'DEPENDS', then it will always execute and running + # ninja install && ninja check-all will result in the profile data being + # generated twice, and cause the ninja check-all build to fail with errors like: + # `ld.lld: error: Function Import: link error: linking module flags 'ProfileSummary': IDs have conflicting values in` + # Therefor we call the generate-profraw target manually as part of this custom + # command, which will only run if clang or ${CLANG_PGO_TRAINING_DEPS} are updated. + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target generate-profraw COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/ COMMENT "Merging profdata" - DEPENDS generate-profraw) + DEPENDS clang ${CLANG_PGO_TRAINING_DEPS} + ) + add_custom_target(generate-profdata DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata) if (CLANG_PGO_TRAINING_DATA_SOURCE_DIR) llvm_ExternalProject_Add(generate-profraw-external ${CLANG_PGO_TRAINING_DATA_SOURCE_DIR} USE_TOOLCHAIN EXLUDE_FROM_ALL NO_INSTALL DEPENDS generate-profraw) diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 8ef5d59b92f0c1..52f2034b8707a3 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -328,6 +328,8 @@ struct IntrinsicLibrary { void genRandomNumber(llvm::ArrayRef); void genRandomSeed(llvm::ArrayRef); fir::ExtendedValue genReduce(mlir::Type, llvm::ArrayRef); + fir::ExtendedValue genReduceDim(mlir::Type, + llvm::ArrayRef); fir::ExtendedValue genRepeat(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genReshape(mlir::Type, llvm::ArrayRef); mlir::Value genRRSpacing(mlir::Type resultType, diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Inquiry.h b/flang/include/flang/Optimizer/Builder/Runtime/Inquiry.h index 5f14d7781004b3..3707273e0cbd48 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Inquiry.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Inquiry.h @@ -20,12 +20,14 @@ class FirOpBuilder; namespace fir::runtime { -/// Generate call to general `LboundDim` runtime routine. Calls to LBOUND -/// without a DIM argument get transformed into descriptor inquiries so they're -/// not handled in the runtime. +/// Generate call to `LboundDim` runtime routine. mlir::Value genLboundDim(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value array, mlir::Value dim); +/// Generate call to Lbound` runtime routine. +void genLbound(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value resultAddr, mlir::Value arrayt, mlir::Value kind); + /// Generate call to general `Ubound` runtime routine. Calls to UBOUND /// with a DIM argument get transformed into an expression equivalent to /// SIZE() + LBOUND() - 1, so they don't have an intrinsic in the runtime. diff --git a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h index 575746374fcc4e..99161c57fbe288 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h @@ -22,6 +22,7 @@ #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Runtime/reduce.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" #include "llvm/ADT/SmallVector.h" @@ -52,6 +53,34 @@ namespace fir::runtime { using TypeBuilderFunc = mlir::Type (*)(mlir::MLIRContext *); using FuncTypeBuilderFunc = mlir::FunctionType (*)(mlir::MLIRContext *); +#define REDUCTION_OPERATION_MODEL(T) \ + template <> \ + constexpr TypeBuilderFunc \ + getModel>() { \ + return [](mlir::MLIRContext *context) -> mlir::Type { \ + TypeBuilderFunc f{getModel()}; \ + auto refTy = fir::ReferenceType::get(f(context)); \ + return mlir::FunctionType::get(context, {refTy, refTy}, refTy); \ + }; \ + } + +#define REDUCTION_CHAR_OPERATION_MODEL(T) \ + template <> \ + constexpr TypeBuilderFunc \ + getModel>() { \ + return [](mlir::MLIRContext *context) -> mlir::Type { \ + TypeBuilderFunc f{getModel()}; \ + auto voidTy = fir::LLVMPointerType::get( \ + context, mlir::IntegerType::get(context, 8)); \ + auto size_tTy = \ + mlir::IntegerType::get(context, 8 * sizeof(std::size_t)); \ + auto refTy = fir::ReferenceType::get(f(context)); \ + return mlir::FunctionType::get( \ + context, {refTy, size_tTy, refTy, refTy, size_tTy, size_tTy}, \ + voidTy); \ + }; \ + } + //===----------------------------------------------------------------------===// // Type builder models //===----------------------------------------------------------------------===// @@ -75,7 +104,6 @@ constexpr TypeBuilderFunc getModel() { return mlir::IntegerType::get(context, 8 * sizeof(unsigned int)); }; } - template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { @@ -83,6 +111,17 @@ constexpr TypeBuilderFunc getModel() { }; } template <> +constexpr TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + TypeBuilderFunc f{getModel()}; + return fir::ReferenceType::get(f(context)); + }; +} +template <> +constexpr TypeBuilderFunc getModel() { + return getModel(); +} +template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, 8 * sizeof(int)); @@ -96,6 +135,17 @@ constexpr TypeBuilderFunc getModel() { }; } template <> +constexpr TypeBuilderFunc getModel() { + return getModel(); +} +template <> +constexpr TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + TypeBuilderFunc f{getModel()}; + return fir::ReferenceType::get(f(context)); + }; +} +template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return fir::ReferenceType::get(mlir::IntegerType::get(context, 8)); @@ -130,6 +180,43 @@ constexpr TypeBuilderFunc getModel() { }; } template <> +constexpr TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + TypeBuilderFunc f{getModel()}; + return fir::ReferenceType::get(f(context)); + }; +} +template <> +constexpr TypeBuilderFunc getModel() { + return getModel(); +} +template <> +constexpr TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::IntegerType::get(context, 8 * sizeof(char16_t)); + }; +} +template <> +constexpr TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + TypeBuilderFunc f{getModel()}; + return fir::ReferenceType::get(f(context)); + }; +} +template <> +constexpr TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::IntegerType::get(context, 8 * sizeof(char32_t)); + }; +} +template <> +constexpr TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + TypeBuilderFunc f{getModel()}; + return fir::ReferenceType::get(f(context)); + }; +} +template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, 8 * sizeof(unsigned char)); @@ -175,6 +262,10 @@ constexpr TypeBuilderFunc getModel() { return getModel(); } template <> +constexpr TypeBuilderFunc getModel() { + return getModel(); +} +template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, 8 * sizeof(long long)); @@ -199,6 +290,10 @@ constexpr TypeBuilderFunc getModel() { return getModel(); } template <> +constexpr TypeBuilderFunc getModel() { + return getModel(); +} +template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, 8 * sizeof(unsigned long)); @@ -228,6 +323,27 @@ constexpr TypeBuilderFunc getModel() { return getModel(); } template <> +constexpr TypeBuilderFunc getModel() { + return getModel(); +} +template <> +constexpr TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::FloatType::getF80(context); + }; +} +template <> +constexpr TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + TypeBuilderFunc f{getModel()}; + return fir::ReferenceType::get(f(context)); + }; +} +template <> +constexpr TypeBuilderFunc getModel() { + return getModel(); +} +template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::FloatType::getF32(context); @@ -245,6 +361,10 @@ constexpr TypeBuilderFunc getModel() { return getModel(); } template <> +constexpr TypeBuilderFunc getModel() { + return getModel(); +} +template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, 1); @@ -258,20 +378,48 @@ constexpr TypeBuilderFunc getModel() { }; } template <> +constexpr TypeBuilderFunc getModel>() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::ComplexType::get(mlir::FloatType::getF32(context)); + }; +} +template <> constexpr TypeBuilderFunc getModel &>() { return [](mlir::MLIRContext *context) -> mlir::Type { - auto ty = mlir::ComplexType::get(mlir::FloatType::getF32(context)); - return fir::ReferenceType::get(ty); + TypeBuilderFunc f{getModel>()}; + return fir::ReferenceType::get(f(context)); + }; +} +template <> +constexpr TypeBuilderFunc getModel *>() { + return getModel &>(); +} +template <> +constexpr TypeBuilderFunc getModel *>() { + return getModel *>(); +} +template <> +constexpr TypeBuilderFunc getModel>() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::ComplexType::get(mlir::FloatType::getF64(context)); }; } template <> constexpr TypeBuilderFunc getModel &>() { return [](mlir::MLIRContext *context) -> mlir::Type { - auto ty = mlir::ComplexType::get(mlir::FloatType::getF64(context)); - return fir::ReferenceType::get(ty); + TypeBuilderFunc f{getModel>()}; + return fir::ReferenceType::get(f(context)); }; } template <> +constexpr TypeBuilderFunc getModel *>() { + return getModel &>(); +} +template <> +constexpr TypeBuilderFunc getModel *>() { + return getModel *>(); +} +template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return fir::ComplexType::get(context, sizeof(float)); @@ -332,6 +480,33 @@ constexpr TypeBuilderFunc getModel() { }; } +REDUCTION_OPERATION_MODEL(std::int8_t) +REDUCTION_OPERATION_MODEL(std::int16_t) +REDUCTION_OPERATION_MODEL(std::int32_t) +REDUCTION_OPERATION_MODEL(std::int64_t) +REDUCTION_OPERATION_MODEL(Fortran::common::int128_t) + +REDUCTION_OPERATION_MODEL(float) +REDUCTION_OPERATION_MODEL(double) +REDUCTION_OPERATION_MODEL(long double) + +REDUCTION_OPERATION_MODEL(std::complex) +REDUCTION_OPERATION_MODEL(std::complex) + +REDUCTION_CHAR_OPERATION_MODEL(char) +REDUCTION_CHAR_OPERATION_MODEL(char16_t) +REDUCTION_CHAR_OPERATION_MODEL(char32_t) + +template <> +constexpr TypeBuilderFunc +getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + auto voidTy = + fir::LLVMPointerType::get(context, mlir::IntegerType::get(context, 8)); + return mlir::FunctionType::get(context, {voidTy, voidTy, voidTy}, voidTy); + }; +} + template struct RuntimeTableKey; template diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h index 667ea9081a893c..fedf453a6dc8de 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h @@ -224,6 +224,29 @@ void genIParityDim(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value resultBox, mlir::Value arrayBox, mlir::Value dim, mlir::Value maskBox); +/// Generate call to `Reduce` intrinsic runtime routine. This is the version +/// that does not take a dim argument and store the result in the provided +/// result value. This is used for COMPLEX, CHARACTER and DERIVED TYPES. +void genReduce(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value arrayBox, mlir::Value operation, mlir::Value maskBox, + mlir::Value identity, mlir::Value ordered, + mlir::Value resultBox); + +/// Generate call to `Reduce` intrinsic runtime routine. This is the version +/// that does not take a dim argument and return a scalare result. This is used +/// for REAL, INTEGER and LOGICAL TYPES. +mlir::Value genReduce(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value arrayBox, mlir::Value operation, + mlir::Value maskBox, mlir::Value identity, + mlir::Value ordered); + +/// Generate call to `Reduce` intrinsic runtime routine. This is the version +/// that takes arrays of any rank with a dim argument specified. +void genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value arrayBox, mlir::Value operation, mlir::Value dim, + mlir::Value maskBox, mlir::Value identity, + mlir::Value ordered, mlir::Value resultBox); + } // namespace fir::runtime #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_REDUCTION_H diff --git a/flang/lib/Optimizer/CodeGen/DescriptorModel.h b/flang/include/flang/Optimizer/CodeGen/DescriptorModel.h similarity index 88% rename from flang/lib/Optimizer/CodeGen/DescriptorModel.h rename to flang/include/flang/Optimizer/CodeGen/DescriptorModel.h index ed35caef930149..ff0cf29e8073e6 100644 --- a/flang/lib/Optimizer/CodeGen/DescriptorModel.h +++ b/flang/include/flang/Optimizer/CodeGen/DescriptorModel.h @@ -35,73 +35,73 @@ using TypeBuilderFunc = mlir::Type (*)(mlir::MLIRContext *); /// Get the LLVM IR dialect model for building a particular C++ type, `T`. template -TypeBuilderFunc getModel(); +static TypeBuilderFunc getModel(); template <> -TypeBuilderFunc getModel() { +constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::LLVM::LLVMPointerType::get(context); }; } template <> -TypeBuilderFunc getModel() { +constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, sizeof(unsigned) * 8); }; } template <> -TypeBuilderFunc getModel() { +constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, sizeof(int) * 8); }; } template <> -TypeBuilderFunc getModel() { +constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, sizeof(unsigned long) * 8); }; } template <> -TypeBuilderFunc getModel() { +constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, sizeof(unsigned long long) * 8); }; } template <> -TypeBuilderFunc getModel() { +constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, sizeof(long long) * 8); }; } template <> -TypeBuilderFunc getModel() { +constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, sizeof(Fortran::ISO::CFI_rank_t) * 8); }; } template <> -TypeBuilderFunc getModel() { +constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, sizeof(Fortran::ISO::CFI_type_t) * 8); }; } template <> -TypeBuilderFunc getModel() { +constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { return mlir::IntegerType::get(context, sizeof(long) * 8); }; } template <> -TypeBuilderFunc getModel() { +constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { auto indexTy = getModel()(context); return mlir::LLVM::LLVMArrayType::get(indexTy, 3); }; } template <> -TypeBuilderFunc +constexpr TypeBuilderFunc getModel>() { return getModel(); } diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 512c7a349ae21b..202efa57d4a367 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -104,7 +104,7 @@ struct IncrementLoopInfo { bool hasLocalitySpecs() const { return !localSymList.empty() || !localInitSymList.empty() || - !sharedSymList.empty(); + !reduceSymList.empty() || !sharedSymList.empty(); } // Data members common to both structured and unstructured loops. @@ -116,6 +116,9 @@ struct IncrementLoopInfo { bool isUnordered; // do concurrent, forall llvm::SmallVector localSymList; llvm::SmallVector localInitSymList; + llvm::SmallVector< + std::pair> + reduceSymList; llvm::SmallVector sharedSymList; mlir::Value loopVariable = nullptr; @@ -1741,6 +1744,35 @@ class FirConverter : public Fortran::lower::AbstractConverter { builder->create(loc); } + fir::ReduceOperationEnum + getReduceOperationEnum(const Fortran::parser::ReductionOperator &rOpr) { + switch (rOpr.v) { + case Fortran::parser::ReductionOperator::Operator::Plus: + return fir::ReduceOperationEnum::Add; + case Fortran::parser::ReductionOperator::Operator::Multiply: + return fir::ReduceOperationEnum::Multiply; + case Fortran::parser::ReductionOperator::Operator::And: + return fir::ReduceOperationEnum::AND; + case Fortran::parser::ReductionOperator::Operator::Or: + return fir::ReduceOperationEnum::OR; + case Fortran::parser::ReductionOperator::Operator::Eqv: + return fir::ReduceOperationEnum::EQV; + case Fortran::parser::ReductionOperator::Operator::Neqv: + return fir::ReduceOperationEnum::NEQV; + case Fortran::parser::ReductionOperator::Operator::Max: + return fir::ReduceOperationEnum::MAX; + case Fortran::parser::ReductionOperator::Operator::Min: + return fir::ReduceOperationEnum::MIN; + case Fortran::parser::ReductionOperator::Operator::Iand: + return fir::ReduceOperationEnum::IAND; + case Fortran::parser::ReductionOperator::Operator::Ior: + return fir::ReduceOperationEnum::IOR; + case Fortran::parser::ReductionOperator::Operator::Ieor: + return fir::ReduceOperationEnum::EIOR; + } + llvm_unreachable("illegal reduction operator"); + } + /// Collect DO CONCURRENT or FORALL loop control information. IncrementLoopNestInfo getConcurrentControl( const Fortran::parser::ConcurrentHeader &header, @@ -1763,6 +1795,16 @@ class FirConverter : public Fortran::lower::AbstractConverter { std::get_if(&x.u)) for (const Fortran::parser::Name &x : localInitList->v) info.localInitSymList.push_back(x.symbol); + if (const auto *reduceList = + std::get_if(&x.u)) { + fir::ReduceOperationEnum reduce_operation = getReduceOperationEnum( + std::get(reduceList->t)); + for (const Fortran::parser::Name &x : + std::get>(reduceList->t)) { + info.reduceSymList.push_back( + std::make_pair(reduce_operation, x.symbol)); + } + } if (const auto *sharedList = std::get_if(&x.u)) for (const Fortran::parser::Name &x : sharedList->v) @@ -1955,9 +1997,23 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::Type loopVarType = info.getLoopVariableType(); mlir::Value loopValue; if (info.isUnordered) { + llvm::SmallVector reduceOperands; + llvm::SmallVector reduceAttrs; + // Create DO CONCURRENT reduce operands and attributes + for (const auto &reduceSym : info.reduceSymList) { + const fir::ReduceOperationEnum reduce_operation = reduceSym.first; + const Fortran::semantics::Symbol *sym = reduceSym.second; + fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr); + reduceOperands.push_back(fir::getBase(exv)); + auto reduce_attr = + fir::ReduceAttr::get(builder->getContext(), reduce_operation); + reduceAttrs.push_back(reduce_attr); + } // The loop variable value is explicitly updated. info.doLoop = builder->create( - loc, lowerValue, upperValue, stepValue, /*unordered=*/true); + loc, lowerValue, upperValue, stepValue, /*unordered=*/true, + /*finalCountValue=*/false, /*iterArgs=*/std::nullopt, + llvm::ArrayRef(reduceOperands), reduceAttrs); builder->setInsertionPointToStart(info.doLoop.getBody()); loopValue = builder->createConvert(loc, loopVarType, info.doLoop.getInductionVar()); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index b3e1ee3da3a778..4cdf1f2d98caa4 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -526,8 +526,8 @@ static constexpr IntrinsicHandler handlers[]{ {"operation", asAddr}, {"dim", asValue}, {"mask", asBox, handleDynamicOptional}, - {"identity", asValue}, - {"ordered", asValue}}}, + {"identity", asAddr, handleDynamicOptional}, + {"ordered", asValue, handleDynamicOptional}}}, /*isElemental=*/false}, {"repeat", &I::genRepeat, @@ -5736,7 +5736,71 @@ void IntrinsicLibrary::genRandomSeed(llvm::ArrayRef args) { fir::ExtendedValue IntrinsicLibrary::genReduce(mlir::Type resultType, llvm::ArrayRef args) { - TODO(loc, "intrinsic: reduce"); + assert(args.size() == 6); + + fir::BoxValue arrayTmp = builder.createBox(loc, args[0]); + mlir::Value array = fir::getBase(arrayTmp); + mlir::Value operation = fir::getBase(args[1]); + int rank = arrayTmp.rank(); + assert(rank >= 1); + + mlir::Type ty = array.getType(); + mlir::Type arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); + mlir::Type eleTy = mlir::cast(arrTy).getEleTy(); + + // Handle optional arguments + bool absentDim = isStaticallyAbsent(args[2]); + + auto mask = isStaticallyAbsent(args[3]) + ? builder.create( + loc, fir::BoxType::get(builder.getI1Type())) + : builder.createBox(loc, args[3]); + + mlir::Value identity = + isStaticallyAbsent(args[4]) + ? builder.create(loc, fir::ReferenceType::get(eleTy)) + : fir::getBase(args[4]); + + mlir::Value ordered = isStaticallyAbsent(args[5]) + ? builder.createBool(loc, false) + : fir::getBase(args[5]); + + // We call the type specific versions because the result is scalar + // in the case below. + if (absentDim || rank == 1) { + if (fir::isa_complex(eleTy) || fir::isa_derived(eleTy)) { + mlir::Value result = builder.createTemporary(loc, eleTy); + fir::runtime::genReduce(builder, loc, array, operation, mask, identity, + ordered, result); + if (fir::isa_derived(eleTy)) + return result; + return builder.create(loc, result); + } + if (fir::isa_char(eleTy)) { + // Create mutable fir.box to be passed to the runtime for the result. + fir::MutableBoxValue resultMutableBox = + fir::factory::createTempMutableBox(builder, loc, eleTy); + mlir::Value resultIrBox = + fir::factory::getMutableIRBox(builder, loc, resultMutableBox); + fir::runtime::genReduce(builder, loc, array, operation, mask, identity, + ordered, resultIrBox); + // Handle cleanup of allocatable result descriptor and return + return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE"); + } + return fir::runtime::genReduce(builder, loc, array, operation, mask, + identity, ordered); + } + // Handle cases that have an array result. + // Create mutable fir.box to be passed to the runtime for the result. + mlir::Type resultArrayType = builder.getVarLenSeqTy(resultType, rank - 1); + fir::MutableBoxValue resultMutableBox = + fir::factory::createTempMutableBox(builder, loc, resultArrayType); + mlir::Value resultIrBox = + fir::factory::getMutableIRBox(builder, loc, resultMutableBox); + mlir::Value dim = fir::getBase(args[2]); + fir::runtime::genReduceDim(builder, loc, array, operation, dim, mask, + identity, ordered, resultIrBox); + return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE"); } // REPEAT @@ -6298,16 +6362,17 @@ IntrinsicLibrary::genLbound(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 2 || args.size() == 3); const fir::ExtendedValue &array = args[0]; - if (const auto *boxValue = array.getBoxOf()) - if (boxValue->hasAssumedRank()) - TODO(loc, "intrinsic: lbound with assumed rank argument"); + // Semantics builds signatures for LBOUND calls as either + // LBOUND(array, dim, [kind]) or LBOUND(array, [kind]). + const bool dimIsAbsent = args.size() == 2 || isStaticallyAbsent(args, 1); + if (array.hasAssumedRank() && dimIsAbsent) + return genAssumedRankBoundInquiry(builder, loc, resultType, args, + /*kindPos=*/1, fir::runtime::genLbound); mlir::Type indexType = builder.getIndexType(); - // Semantics builds signatures for LBOUND calls as either - // LBOUND(array, dim, [kind]) or LBOUND(array, [kind]). - if (args.size() == 2 || isStaticallyAbsent(args, 1)) { - // DIM is absent. + if (dimIsAbsent) { + // DIM is absent and the rank of array is a compile time constant. mlir::Type lbType = fir::unwrapSequenceType(resultType); unsigned rank = array.rank(); mlir::Type lbArrayType = fir::SequenceType::get( @@ -6332,13 +6397,16 @@ IntrinsicLibrary::genLbound(mlir::Type resultType, // DIM is present. mlir::Value dim = fir::getBase(args[1]); - // If it is a compile time constant, skip the runtime call. - if (std::optional cstDim = fir::getIntIfConstant(dim)) { - mlir::Value one = builder.createIntegerConstant(loc, resultType, 1); - mlir::Value zero = builder.createIntegerConstant(loc, indexType, 0); - mlir::Value lb = computeLBOUND(builder, loc, array, *cstDim - 1, zero, one); - return builder.createConvert(loc, resultType, lb); - } + // If it is a compile time constant and the rank is known, skip the runtime + // call. + if (!array.hasAssumedRank()) + if (std::optional cstDim = fir::getIntIfConstant(dim)) { + mlir::Value one = builder.createIntegerConstant(loc, resultType, 1); + mlir::Value zero = builder.createIntegerConstant(loc, indexType, 0); + mlir::Value lb = + computeLBOUND(builder, loc, array, *cstDim - 1, zero, one); + return builder.createConvert(loc, resultType, lb); + } fir::ExtendedValue box = createBoxForRuntimeBoundInquiry(loc, builder, array); return builder.createConvert( diff --git a/flang/lib/Optimizer/Builder/Runtime/Inquiry.cpp b/flang/lib/Optimizer/Builder/Runtime/Inquiry.cpp index 34c4020b5907c9..e01a6f05b5fdd8 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Inquiry.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Inquiry.cpp @@ -29,6 +29,20 @@ mlir::Value fir::runtime::genLboundDim(fir::FirOpBuilder &builder, return builder.create(loc, lboundFunc, args).getResult(0); } +void fir::runtime::genLbound(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value resultAddr, mlir::Value array, + mlir::Value kind) { + mlir::func::FuncOp func = + fir::runtime::getRuntimeFunc(loc, builder); + auto fTy = func.getFunctionType(); + auto sourceFile = fir::factory::locationToFilename(builder, loc); + auto sourceLine = + fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); + auto args = fir::runtime::createArguments( + builder, loc, fTy, resultAddr, array, kind, sourceFile, sourceLine); + builder.create(loc, func, args).getResult(0); +} + /// Generate call to `Ubound` runtime routine. Calls to UBOUND with a DIM /// argument get transformed into an expression equivalent to /// SIZE() + LBOUND() - 1, so they don't have an intrinsic in the runtime. diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp index d4076067bf103e..4b086a98de47b2 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp @@ -12,6 +12,7 @@ #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/Support/Utils.h" +#include "flang/Runtime/reduce.h" #include "flang/Runtime/reduction.h" #include "mlir/Dialect/Func/IR/FuncOps.h" @@ -466,6 +467,216 @@ struct ForcedIParity16 { } }; +/// Placeholder for real*10 version of Reduce Intrinsic +struct ForcedReduceReal10 { + static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ReduceReal10)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF80(ctx); + auto boxTy = + fir::runtime::getModel()(ctx); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refTy = fir::ReferenceType::get(ty); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty}); + }; + } +}; + +/// Placeholder for real*16 version of Reduce Intrinsic +struct ForcedReduceReal16 { + static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ReduceReal16)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF128(ctx); + auto boxTy = + fir::runtime::getModel()(ctx); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refTy = fir::ReferenceType::get(ty); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty}); + }; + } +}; + +/// Placeholder for DIM real*10 version of Reduce Intrinsic +struct ForcedReduceReal10Dim { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceReal10Dim)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF80(ctx); + auto boxTy = + fir::runtime::getModel()(ctx); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refTy = fir::ReferenceType::get(ty); + auto refBoxTy = fir::ReferenceType::get(boxTy); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for DIM real*16 version of Reduce Intrinsic +struct ForcedReduceReal16Dim { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceReal16Dim)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF128(ctx); + auto boxTy = + fir::runtime::getModel()(ctx); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refTy = fir::ReferenceType::get(ty); + auto refBoxTy = fir::ReferenceType::get(boxTy); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for integer*16 version of Reduce Intrinsic +struct ForcedReduceInteger16 { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceInteger16)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::IntegerType::get(ctx, 128); + auto boxTy = + fir::runtime::getModel()(ctx); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refTy = fir::ReferenceType::get(ty); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty}); + }; + } +}; + +/// Placeholder for DIM integer*16 version of Reduce Intrinsic +struct ForcedReduceInteger16Dim { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceInteger16Dim)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::IntegerType::get(ctx, 128); + auto boxTy = + fir::runtime::getModel()(ctx); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refTy = fir::ReferenceType::get(ty); + auto refBoxTy = fir::ReferenceType::get(boxTy); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for complex(10) version of Reduce Intrinsic +struct ForcedReduceComplex10 { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(CppReduceComplex10)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx)); + auto boxTy = + fir::runtime::getModel()(ctx); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refTy = fir::ReferenceType::get(ty); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for Dim complex(10) version of Reduce Intrinsic +struct ForcedReduceComplex10Dim { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(CppReduceComplex10Dim)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx)); + auto boxTy = + fir::runtime::getModel()(ctx); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refTy = fir::ReferenceType::get(ty); + auto refBoxTy = fir::ReferenceType::get(boxTy); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for complex(16) version of Reduce Intrinsic +struct ForcedReduceComplex16 { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(CppReduceComplex16)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx)); + auto boxTy = + fir::runtime::getModel()(ctx); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refTy = fir::ReferenceType::get(ty); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for Dim complex(16) version of Reduce Intrinsic +struct ForcedReduceComplex16Dim { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(CppReduceComplex16Dim)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx)); + auto boxTy = + fir::runtime::getModel()(ctx); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refTy = fir::ReferenceType::get(ty); + auto refBoxTy = fir::ReferenceType::get(boxTy); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + /// Generate call to specialized runtime function that takes a mask and /// dim argument. The All, Any, and Count intrinsics use this pattern. template @@ -1237,3 +1448,220 @@ void fir::runtime::genIParityDim(fir::FirOpBuilder &builder, mlir::Location loc, /// Generate call to `IParity` intrinsic runtime routine. This is the version /// that does not take a dim argument. GEN_IALL_IANY_IPARITY(IParity) + +/// Generate call to `Reduce` intrinsic runtime routine. This is the version +/// that does not take a DIM argument and store result in the passed result +/// value. +void fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value arrayBox, mlir::Value operation, + mlir::Value maskBox, mlir::Value identity, + mlir::Value ordered, mlir::Value resultBox) { + mlir::func::FuncOp func; + auto ty = arrayBox.getType(); + auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); + auto eleTy = mlir::cast(arrTy).getEleTy(); + auto dim = builder.createIntegerConstant(loc, builder.getI32Type(), 1); + + assert(resultBox && "expect non null value for the result"); + assert((fir::isa_char(eleTy) || fir::isa_complex(eleTy) || + fir::isa_derived(eleTy)) && + "expect character, complex or derived-type"); + + mlir::MLIRContext *ctx = builder.getContext(); + fir::factory::CharacterExprHelper charHelper{builder, loc}; + + if (eleTy == fir::ComplexType::get(ctx, 2)) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 3)) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 4)) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 8)) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 10)) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 16)) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 1) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 2) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 4) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fir::isa_derived(eleTy)) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else + fir::intrinsicTypeTODO(builder, eleTy, loc, "REDUCE"); + + auto fTy = func.getFunctionType(); + auto sourceFile = fir::factory::locationToFilename(builder, loc); + auto sourceLine = + fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); + auto opAddr = builder.create(loc, fTy.getInput(2), operation); + auto args = fir::runtime::createArguments( + builder, loc, fTy, resultBox, arrayBox, opAddr, sourceFile, sourceLine, + dim, maskBox, identity, ordered); + builder.create(loc, func, args); +} + +/// Generate call to `Reduce` intrinsic runtime routine. This is the version +/// that does not take DIM argument and return a scalar result. +mlir::Value fir::runtime::genReduce(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value arrayBox, + mlir::Value operation, mlir::Value maskBox, + mlir::Value identity, mlir::Value ordered) { + mlir::func::FuncOp func; + auto ty = arrayBox.getType(); + auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); + auto eleTy = mlir::cast(arrTy).getEleTy(); + auto dim = builder.createIntegerConstant(loc, builder.getI32Type(), 1); + + mlir::MLIRContext *ctx = builder.getContext(); + fir::factory::CharacterExprHelper charHelper{builder, loc}; + + assert((fir::isa_real(eleTy) || fir::isa_integer(eleTy) || + mlir::isa(eleTy)) && + "expect real, interger or logical"); + + if (eleTy.isF16()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isBF16()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF32()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF64()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF80()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF128()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1))) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2))) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4))) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8))) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16))) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 1)) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 2)) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 4)) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 8)) + func = fir::runtime::getRuntimeFunc(loc, builder); + else + fir::intrinsicTypeTODO(builder, eleTy, loc, "REDUCE"); + + auto fTy = func.getFunctionType(); + auto sourceFile = fir::factory::locationToFilename(builder, loc); + auto sourceLine = + fir::factory::locationToLineNo(builder, loc, fTy.getInput(3)); + auto opAddr = builder.create(loc, fTy.getInput(1), operation); + auto args = fir::runtime::createArguments(builder, loc, fTy, arrayBox, opAddr, + sourceFile, sourceLine, dim, + maskBox, identity, ordered); + return builder.create(loc, func, args).getResult(0); +} + +void fir::runtime::genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value arrayBox, mlir::Value operation, + mlir::Value dim, mlir::Value maskBox, + mlir::Value identity, mlir::Value ordered, + mlir::Value resultBox) { + mlir::func::FuncOp func; + auto ty = arrayBox.getType(); + auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); + auto eleTy = mlir::cast(arrTy).getEleTy(); + + mlir::MLIRContext *ctx = builder.getContext(); + fir::factory::CharacterExprHelper charHelper{builder, loc}; + + if (eleTy.isF16()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isBF16()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF32()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF64()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF80()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF128()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1))) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2))) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4))) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8))) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16))) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 2)) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy == fir::ComplexType::get(ctx, 3)) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy == fir::ComplexType::get(ctx, 4)) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy == fir::ComplexType::get(ctx, 8)) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy == fir::ComplexType::get(ctx, 10)) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 16)) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 1)) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 2)) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 4)) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 8)) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 1) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 2) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 4) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (fir::isa_derived(eleTy)) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else + fir::intrinsicTypeTODO(builder, eleTy, loc, "REDUCE"); + + auto fTy = func.getFunctionType(); + auto sourceFile = fir::factory::locationToFilename(builder, loc); + + auto sourceLine = + fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); + auto opAddr = builder.create(loc, fTy.getInput(2), operation); + auto args = fir::runtime::createArguments( + builder, loc, fTy, resultBox, arrayBox, opAddr, sourceFile, sourceLine, + dim, maskBox, identity, ordered); + builder.create(loc, func, args); +} diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp index 07d3bd713ce45d..501a36f5b68ba6 100644 --- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp +++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp @@ -13,9 +13,9 @@ #define DEBUG_TYPE "flang-type-conversion" #include "flang/Optimizer/CodeGen/TypeConverter.h" -#include "DescriptorModel.h" #include "flang/Common/Fortran.h" #include "flang/Optimizer/Builder/Todo.h" // remove when TODO's are done +#include "flang/Optimizer/CodeGen/DescriptorModel.h" #include "flang/Optimizer/CodeGen/TBAABuilder.h" #include "flang/Optimizer/CodeGen/Target.h" #include "flang/Optimizer/Dialect/FIRType.h" diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 75ca738211abef..ea8a9752eeeeea 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -1432,7 +1432,8 @@ bool fir::ConvertOp::canBeConverted(mlir::Type inType, mlir::Type outType) { mlir::LogicalResult fir::ConvertOp::verify() { if (canBeConverted(getValue().getType(), getType())) return mlir::success(); - return emitOpError("invalid type conversion"); + return emitOpError("invalid type conversion") + << getValue().getType() << " / " << getType(); } //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index a174f2c2bc4bfd..53745d10fe9e4d 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -13,15 +13,55 @@ #define DEBUG_TYPE "flang-debug-type-generator" #include "DebugTypeGenerator.h" +#include "flang/Optimizer/CodeGen/DescriptorModel.h" +#include "flang/Optimizer/CodeGen/TypeConverter.h" +#include "flang/Optimizer/Support/DataLayout.h" +#include "mlir/Pass/Pass.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Debug.h" namespace fir { +/// Calculate offset of any field in the descriptor. +template +std::uint64_t getComponentOffset(const mlir::DataLayout &dl, + mlir::MLIRContext *context, + mlir::Type llvmFieldType) { + static_assert(DescriptorField > 0 && DescriptorField < 10); + mlir::Type previousFieldType = + getDescFieldTypeModel()(context); + std::uint64_t previousOffset = + getComponentOffset(dl, context, previousFieldType); + std::uint64_t offset = previousOffset + dl.getTypeSize(previousFieldType); + std::uint64_t fieldAlignment = dl.getTypeABIAlignment(llvmFieldType); + return llvm::alignTo(offset, fieldAlignment); +} +template <> +std::uint64_t getComponentOffset<0>(const mlir::DataLayout &dl, + mlir::MLIRContext *context, + mlir::Type llvmFieldType) { + return 0; +} + DebugTypeGenerator::DebugTypeGenerator(mlir::ModuleOp m) : module(m), kindMapping(getKindMapping(m)) { LLVM_DEBUG(llvm::dbgs() << "DITypeAttr generator\n"); + + std::optional dl = + fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/true); + if (!dl) { + mlir::emitError(module.getLoc(), "Missing data layout attribute in module"); + return; + } + + mlir::MLIRContext *context = module.getContext(); + + // The debug information requires the offset of certain fields in the + // descriptors like lower_bound and extent for each dimension. + mlir::Type llvmDimsType = getDescFieldTypeModel()(context); + dimsOffset = getComponentOffset(*dl, context, llvmDimsType); + dimsSize = dl->getTypeSize(llvmDimsType); } static mlir::LLVM::DITypeAttr genBasicType(mlir::MLIRContext *context, @@ -37,10 +77,82 @@ static mlir::LLVM::DITypeAttr genPlaceholderType(mlir::MLIRContext *context) { llvm::dwarf::DW_ATE_signed); } +mlir::LLVM::DITypeAttr DebugTypeGenerator::convertBoxedSequenceType( + fir::SequenceType seqTy, mlir::LLVM::DIFileAttr fileAttr, + mlir::LLVM::DIScopeAttr scope, mlir::Location loc, bool genAllocated, + bool genAssociated) { + + mlir::MLIRContext *context = module.getContext(); + // FIXME: Assumed rank arrays not supported yet + if (seqTy.hasUnknownShape()) + return genPlaceholderType(context); + + llvm::SmallVector ops; + auto addOp = [&](unsigned opc, llvm::ArrayRef vals) { + ops.push_back(mlir::LLVM::DIExpressionElemAttr::get(context, opc, vals)); + }; + + addOp(llvm::dwarf::DW_OP_push_object_address, {}); + addOp(llvm::dwarf::DW_OP_deref, {}); + + // dataLocation = *base_addr + mlir::LLVM::DIExpressionAttr dataLocation = + mlir::LLVM::DIExpressionAttr::get(context, ops); + addOp(llvm::dwarf::DW_OP_lit0, {}); + addOp(llvm::dwarf::DW_OP_ne, {}); + + // allocated = associated = (*base_addr != 0) + mlir::LLVM::DIExpressionAttr valid = + mlir::LLVM::DIExpressionAttr::get(context, ops); + mlir::LLVM::DIExpressionAttr associated = genAllocated ? valid : nullptr; + mlir::LLVM::DIExpressionAttr allocated = genAssociated ? valid : nullptr; + ops.clear(); + + llvm::SmallVector elements; + mlir::LLVM::DITypeAttr elemTy = + convertType(seqTy.getEleTy(), fileAttr, scope, loc); + unsigned offset = dimsOffset; + const unsigned indexSize = dimsSize / 3; + for ([[maybe_unused]] auto _ : seqTy.getShape()) { + // For each dimension, find the offset of count and lower bound in the + // descriptor and generate the dwarf expression to extract it. + // FIXME: If `indexSize` happens to be bigger than address size on the + // system then we may have to change 'DW_OP_deref' here. + addOp(llvm::dwarf::DW_OP_push_object_address, {}); + addOp(llvm::dwarf::DW_OP_plus_uconst, + {offset + (indexSize * kDimExtentPos)}); + addOp(llvm::dwarf::DW_OP_deref, {}); + // count[i] = *(base_addr + offset + (indexSize * kDimExtentPos)) + // where 'offset' is dimsOffset + (i * dimsSize) + mlir::LLVM::DIExpressionAttr countAttr = + mlir::LLVM::DIExpressionAttr::get(context, ops); + ops.clear(); + + addOp(llvm::dwarf::DW_OP_push_object_address, {}); + addOp(llvm::dwarf::DW_OP_plus_uconst, + {offset + (indexSize * kDimLowerBoundPos)}); + addOp(llvm::dwarf::DW_OP_deref, {}); + // lower_bound[i] = *(base_addr + offset + (indexSize * kDimLowerBoundPos)) + mlir::LLVM::DIExpressionAttr lowerAttr = + mlir::LLVM::DIExpressionAttr::get(context, ops); + ops.clear(); + + offset += dimsSize; + mlir::LLVM::DISubrangeAttr subrangeTy = mlir::LLVM::DISubrangeAttr::get( + context, nullptr, lowerAttr, countAttr, nullptr); + elements.push_back(subrangeTy); + } + return mlir::LLVM::DICompositeTypeAttr::get( + context, llvm::dwarf::DW_TAG_array_type, /*recursive id*/ {}, + /* name */ nullptr, /* file */ nullptr, /* line */ 0, + /* scope */ nullptr, elemTy, mlir::LLVM::DIFlags::Zero, + /* sizeInBits */ 0, /*alignInBits*/ 0, elements, dataLocation, + /* rank */ nullptr, allocated, associated); +} + mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType( fir::SequenceType seqTy, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, mlir::Location loc) { - mlir::MLIRContext *context = module.getContext(); // FIXME: Only fixed sizes arrays handled at the moment. if (seqTy.hasDynamicExtents()) @@ -112,6 +224,12 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr, bitWidth * 2, llvm::dwarf::DW_ATE_complex_float); } else if (auto seqTy = mlir::dyn_cast_or_null(Ty)) { return convertSequenceType(seqTy, fileAttr, scope, loc); + } else if (auto boxTy = mlir::dyn_cast_or_null(Ty)) { + auto elTy = boxTy.getElementType(); + if (auto seqTy = mlir::dyn_cast_or_null(elTy)) + return convertBoxedSequenceType(seqTy, fileAttr, scope, loc, false, + false); + return genPlaceholderType(context); } else { // FIXME: These types are currently unhandled. We are generating a // placeholder type to allow us to test supported bits. diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h index 963c919d66825c..11515d11dfed63 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h @@ -35,8 +35,20 @@ class DebugTypeGenerator { mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, mlir::Location loc); + + /// The 'genAllocated' is true when we want to generate 'allocated' field + /// in the DICompositeType. It is needed for the allocatable arrays. + /// Similarly, 'genAssociated' is used with 'pointer' type to generate + /// 'associated' field. + mlir::LLVM::DITypeAttr + convertBoxedSequenceType(fir::SequenceType seqTy, + mlir::LLVM::DIFileAttr fileAttr, + mlir::LLVM::DIScopeAttr scope, mlir::Location loc, + bool genAllocated, bool genAssociated); mlir::ModuleOp module; KindMapping kindMapping; + std::uint64_t dimsSize; + std::uint64_t dimsOffset; }; } // namespace fir diff --git a/flang/test/Integration/debug-assumed-shape-array.f90 b/flang/test/Integration/debug-assumed-shape-array.f90 new file mode 100644 index 00000000000000..7b0801c12dba11 --- /dev/null +++ b/flang/test/Integration/debug-assumed-shape-array.f90 @@ -0,0 +1,13 @@ +! RUN: %flang_fc1 -emit-llvm -debug-info-kind=standalone %s -o - | FileCheck %s + +subroutine ff(arr) + implicit none + integer :: arr(:, :) + return arr(1,1) +end subroutine ff + +! CHECK-DAG: !DICompositeType(tag: DW_TAG_array_type{{.*}}elements: ![[ELEMS:[0-9]+]], dataLocation: !DIExpression(DW_OP_push_object_address, DW_OP_deref)) +! CHECK-DAG: ![[ELEMS]] = !{![[ELEM1:[0-9]+]], ![[ELEM2:[0-9]+]]} +! CHECK-DAG: ![[ELEM1]] = !DISubrange(lowerBound: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 24, DW_OP_deref), upperBound: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 32, DW_OP_deref)) +! CHECK-DAG: ![[ELEM2]] = !DISubrange(lowerBound: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 48, DW_OP_deref), upperBound: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 56, DW_OP_deref)) + diff --git a/flang/test/Lower/HLFIR/assumed-rank-inquiries-3.f90 b/flang/test/Lower/HLFIR/assumed-rank-inquiries-3.f90 index bbeff5ff051915..e568b94f4f8843 100644 --- a/flang/test/Lower/HLFIR/assumed-rank-inquiries-3.f90 +++ b/flang/test/Lower/HLFIR/assumed-rank-inquiries-3.f90 @@ -54,3 +54,58 @@ subroutine test_shape_2(x) ! CHECK: %[[VAL_13:.*]] = fir.box_rank %[[VAL_4]] : (!fir.box>>) -> index ! CHECK: %[[VAL_14:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_14]]) {uniq_name = ".tmp.intrinsic_result"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) + + +subroutine test_lbound(x) + real :: x(..) + call takes_integer_array(lbound(x)) +end subroutine +! CHECK-LABEL: func.func @_QPtest_lbound( +! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<15xi32> +! CHECK: %[[VAL_4:.*]] = arith.constant 4 : i32 +! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>) -> !fir.llvm_ptr +! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_3:.*]] : (!fir.box>) -> !fir.box +! CHECK: %[[VAL_10:.*]] = fir.call @_FortranALbound(%[[VAL_7]], %[[VAL_8]], %[[VAL_4]], %{{.*}}, %{{.*}}) +! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>) -> !fir.ref> +! CHECK: %[[VAL_12:.*]] = fir.box_rank %[[VAL_3]] : (!fir.box>) -> index +! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_11]](%[[VAL_13]]) {uniq_name = ".tmp.intrinsic_result"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! CHECK: %[[VAL_15:.*]] = arith.constant false +! CHECK: %[[VAL_16:.*]] = hlfir.as_expr %[[VAL_14]]#0 move %[[VAL_15]] : (!fir.box>, i1) -> !hlfir.expr +! CHECK: %[[VAL_17:.*]]:3 = hlfir.associate %[[VAL_16]](%[[VAL_13]]) {adapt.valuebyref} : (!hlfir.expr, !fir.shape<1>) -> (!fir.box>, !fir.ref>, i1) +! CHECK: fir.call @_QPtakes_integer_array(%[[VAL_17]]#1) fastmath : (!fir.ref>) -> () +! CHECK: hlfir.end_associate %[[VAL_17]]#1, %[[VAL_17]]#2 : !fir.ref>, i1 +! CHECK: hlfir.destroy %[[VAL_16]] : !hlfir.expr +! CHECK: return +! CHECK: } + +subroutine test_lbound_kind(x) + real :: x(..) + call takes_integer8_array(lbound(x, kind=8)) +end subroutine +! CHECK-LABEL: func.func @_QPtest_lbound_kind( +! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<15xi64> +! CHECK: %[[VAL_4:.*]] = arith.constant 8 : i32 +! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>) -> !fir.llvm_ptr +! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_3:.*]] : (!fir.box>) -> !fir.box +! CHECK: %[[VAL_10:.*]] = fir.call @_FortranALbound(%[[VAL_7]], %[[VAL_8]], %[[VAL_4]], %{{.*}}, %{{.*}}) +! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>) -> !fir.ref> +! CHECK: %[[VAL_12:.*]] = fir.box_rank %[[VAL_3]] : (!fir.box>) -> index +! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_11]](%[[VAL_13]]) {uniq_name = ".tmp.intrinsic_result"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) + +subroutine test_lbound_2(x) + real, pointer :: x(..) + call takes_integer_array(lbound(x)) +end subroutine +! CHECK-LABEL: func.func @_QPtest_lbound_2( +! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.array<15xi32> +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3:.*]] : !fir.ref>>> +! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 +! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>) -> !fir.llvm_ptr +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_4]] : (!fir.box>>) -> !fir.box +! CHECK: %[[VAL_11:.*]] = fir.call @_FortranALbound(%[[VAL_8]], %[[VAL_9]], %[[VAL_5]], %{{.*}}, %{{.*}}) +! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>) -> !fir.ref> +! CHECK: %[[VAL_13:.*]] = fir.box_rank %[[VAL_4]] : (!fir.box>>) -> index +! CHECK: %[[VAL_14:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_14]]) {uniq_name = ".tmp.intrinsic_result"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) diff --git a/flang/test/Lower/Intrinsics/Todo/reduce.f90 b/flang/test/Lower/Intrinsics/Todo/reduce.f90 deleted file mode 100644 index 7aa6f4a9f3ad37..00000000000000 --- a/flang/test/Lower/Intrinsics/Todo/reduce.f90 +++ /dev/null @@ -1,13 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir %s -o - 2>&1 | FileCheck %s - -interface - pure function chfunc(a,b) - character(*),intent(in) :: a,b - character(3) :: chfunc - end function - end interface - character(3) x(5) - print*, reduce(x,chfunc) -end program - -! CHECK: not yet implemented: intrinsic: reduce diff --git a/flang/test/Lower/Intrinsics/reduce.f90 b/flang/test/Lower/Intrinsics/reduce.f90 new file mode 100644 index 00000000000000..842e626d7cc397 --- /dev/null +++ b/flang/test/Lower/Intrinsics/reduce.f90 @@ -0,0 +1,616 @@ +! RUN: bbc -emit-hlfir %s -o - | FileCheck %s + +module reduce_mod + +type :: t1 + integer :: a +end type + +contains + +pure function red_int1(a,b) + integer(1), intent(in) :: a, b + integer(1) :: red_int1 + red_int1 = a + b +end function + +subroutine integer1(a, id) + integer(1), intent(in) :: a(:) + integer(1) :: res, id + + res = reduce(a, red_int1) + + res = reduce(a, red_int1, identity=id) + + res = reduce(a, red_int1, identity=id, ordered = .true.) + + res = reduce(a, red_int1, [.true., .true., .false.]) +end subroutine + +! CHECK-LABEL: func.func @_QMreduce_modPinteger1( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "id"}) +! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QMreduce_modFinteger1Ea"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +! CHECK: %[[ID:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %{{.*}} {uniq_name = "_QMreduce_modFinteger1Eid"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOC_RES:.*]] = fir.alloca i8 {bindc_name = "res", uniq_name = "_QMreduce_modFinteger1Eres"} +! CHECK: %[[RES:.*]]:2 = hlfir.declare %[[ALLOC_RES]] {uniq_name = "_QMreduce_modFinteger1Eres"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ADDR_OP:.*]] = fir.address_of(@_QMreduce_modPred_int1) : (!fir.ref, !fir.ref) -> i8 +! CHECK: %[[BOX_PROC:.*]] = fir.emboxproc %[[ADDR_OP]] : ((!fir.ref, !fir.ref) -> i8) -> !fir.boxproc<() -> ()> +! CHECK: %[[MASK:.*]] = fir.absent !fir.box +! CHECK: %[[IDENTITY:.*]] = fir.absent !fir.ref +! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX_PROC]] : (!fir.boxproc<() -> ()>) -> ((!fir.ref, !fir.ref) -> !fir.ref) +! CHECK: %[[A_NONE:.*]] = fir.convert %[[A]]#1 : (!fir.box>) -> !fir.box +! CHECK: %[[MASK_NONE:.*]] = fir.convert %[[MASK]] : (!fir.box) -> !fir.box +! CHECK: %[[REDUCE_RES:.*]] = fir.call @_FortranAReduceInteger1(%[[A_NONE]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}, %c1{{.*}}, %[[MASK_NONE]], %[[IDENTITY]], %false) fastmath : (!fir.box, (!fir.ref, !fir.ref) -> !fir.ref, !fir.ref, i32, i32, !fir.box, !fir.ref, i1) -> i8 +! CHECK: hlfir.assign %[[REDUCE_RES]] to %[[RES]]#0 : i8, !fir.ref +! CHECK: %[[ADDR_OP:.*]] = fir.address_of(@_QMreduce_modPred_int1) : (!fir.ref, !fir.ref) -> i8 +! CHECK: %[[BOX_PROC:.*]] = fir.emboxproc %[[ADDR_OP]] : ((!fir.ref, !fir.ref) -> i8) -> !fir.boxproc<() -> ()> +! CHECK: %[[MASK:.*]] = fir.absent !fir.box +! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX_PROC]] : (!fir.boxproc<() -> ()>) -> ((!fir.ref, !fir.ref) -> !fir.ref) +! CHECK: %[[A_NONE:.*]] = fir.convert %[[A]]#1 : (!fir.box>) -> !fir.box +! CHECK: %[[MASK_NONE:.*]] = fir.convert %[[MASK]] : (!fir.box) -> !fir.box +! CHECK: %{{.*}} = fir.call @_FortranAReduceInteger1(%[[A_NONE]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}, %c1{{.*}}, %[[MASK_NONE]], %[[ID]]#1, %false{{.*}}) fastmath : (!fir.box, (!fir.ref, !fir.ref) -> !fir.ref, !fir.ref, i32, i32, !fir.box, !fir.ref, i1) -> i8 +! CHECK: fir.call @_FortranAReduceInteger1(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}#1, %true) +! CHECK: %[[MASK:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.3xl4.0"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[SHAPE_C3:.*]] = fir.shape %c3{{.*}} : (index) -> !fir.shape<1> +! CHECK: %[[BOXED_MASK:.*]] = fir.embox %[[MASK]]#1(%[[SHAPE_C3]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> +! CHECK: %[[CONV_MASK:.*]] = fir.convert %[[BOXED_MASK]] : (!fir.box>>) -> !fir.box +! CHECK: fir.call @_FortranAReduceInteger1(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[CONV_MASK]], %{{.*}}, %false{{.*}}) + +pure function red_int2(a,b) + integer(2), intent(in) :: a, b + integer(2) :: red_int2 + red_int2 = a + b +end function + +subroutine integer2(a) + integer(2), intent(in) :: a(:) + integer(2) :: res + res = reduce(a, red_int2) +end subroutine + +! CHECK: fir.call @_FortranAReduceInteger2 + +pure function red_int4(a,b) + integer(4), intent(in) :: a, b + integer(4) :: red_int4 + red_int4 = a + b +end function + +subroutine integer4(a) + integer(4), intent(in) :: a(:) + integer(4) :: res + res = reduce(a, red_int4) +end subroutine + +! CHECK: fir.call @_FortranAReduceInteger4 + +pure function red_int8(a,b) + integer(8), intent(in) :: a, b + integer(8) :: red_int8 + red_int8 = a + b +end function + +subroutine integer8(a) + integer(8), intent(in) :: a(:) + integer(8) :: res + res = reduce(a, red_int8) +end subroutine + +! CHECK: fir.call @_FortranAReduceInteger8 + +pure function red_int16(a,b) + integer(16), intent(in) :: a, b + integer(16) :: red_int16 + red_int16 = a + b +end function + +subroutine integer16(a) + integer(16), intent(in) :: a(:) + integer(16) :: res + res = reduce(a, red_int16) +end subroutine + +! CHECK: fir.call @_FortranAReduceInteger16 + +pure function red_real2(a,b) + real(2), intent(in) :: a, b + real(2) :: red_real2 + red_real2 = a + b +end function + +subroutine real2(a) + real(2), intent(in) :: a(:) + real(2) :: res + res = reduce(a, red_real2) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal2 + +pure function red_real3(a,b) + real(3), intent(in) :: a, b + real(3) :: red_real3 + red_real3 = a + b +end function + +subroutine real3(a) + real(3), intent(in) :: a(:) + real(3) :: res + res = reduce(a, red_real3) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal3 + +pure function red_real4(a,b) + real(4), intent(in) :: a, b + real(4) :: red_real4 + red_real4 = a + b +end function + +subroutine real4(a) + real(4), intent(in) :: a(:) + real(4) :: res + res = reduce(a, red_real4) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal4 + +pure function red_real8(a,b) + real(8), intent(in) :: a, b + real(8) :: red_real8 + red_real8 = a + b +end function + +subroutine real8(a) + real(8), intent(in) :: a(:) + real(8) :: res + res = reduce(a, red_real8) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal8 + +pure function red_real10(a,b) + real(10), intent(in) :: a, b + real(10) :: red_real10 + red_real10 = a + b +end function + +subroutine real10(a) + real(10), intent(in) :: a(:) + real(10) :: res + res = reduce(a, red_real10) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal10 + +pure function red_real16(a,b) + real(16), intent(in) :: a, b + real(16) :: red_real16 + red_real16 = a + b +end function + +subroutine real16(a) + real(16), intent(in) :: a(:) + real(16) :: res + res = reduce(a, red_real16) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal16 + +pure function red_complex2(a,b) + complex(2), intent(in) :: a, b + complex(2) :: red_complex2 + red_complex2 = a + b +end function + +subroutine complex2(a) + complex(2), intent(in) :: a(:) + complex(2) :: res + res = reduce(a, red_complex2) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex2 + +pure function red_complex3(a,b) + complex(3), intent(in) :: a, b + complex(3) :: red_complex3 + red_complex3 = a + b +end function + +subroutine complex3(a) + complex(3), intent(in) :: a(:) + complex(3) :: res + res = reduce(a, red_complex3) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex3 + +pure function red_complex4(a,b) + complex(4), intent(in) :: a, b + complex(4) :: red_complex4 + red_complex4 = a + b +end function + +subroutine complex4(a) + complex(4), intent(in) :: a(:) + complex(4) :: res + res = reduce(a, red_complex4) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex4 + +pure function red_complex8(a,b) + complex(8), intent(in) :: a, b + complex(8) :: red_complex8 + red_complex8 = a + b +end function + +subroutine complex8(a) + complex(8), intent(in) :: a(:) + complex(8) :: res + res = reduce(a, red_complex8) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex8 + +pure function red_complex10(a,b) + complex(10), intent(in) :: a, b + complex(10) :: red_complex10 + red_complex10 = a + b +end function + +subroutine complex10(a) + complex(10), intent(in) :: a(:) + complex(10) :: res + res = reduce(a, red_complex10) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex10 + +pure function red_complex16(a,b) + complex(16), intent(in) :: a, b + complex(16) :: red_complex16 + red_complex16 = a + b +end function + +subroutine complex16(a) + complex(16), intent(in) :: a(:) + complex(16) :: res + res = reduce(a, red_complex16) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex16 + +pure function red_log1(a,b) + logical(1), intent(in) :: a, b + logical(1) :: red_log1 + red_log1 = a .and. b +end function + +subroutine log1(a) + logical(1), intent(in) :: a(:) + logical(1) :: res + res = reduce(a, red_log1) +end subroutine + +! CHECK: fir.call @_FortranAReduceLogical1 + +pure function red_log2(a,b) + logical(2), intent(in) :: a, b + logical(2) :: red_log2 + red_log2 = a .and. b +end function + +subroutine log2(a) + logical(2), intent(in) :: a(:) + logical(2) :: res + res = reduce(a, red_log2) +end subroutine + +! CHECK: fir.call @_FortranAReduceLogical2 + +pure function red_log4(a,b) + logical(4), intent(in) :: a, b + logical(4) :: red_log4 + red_log4 = a .and. b +end function + +subroutine log4(a) + logical(4), intent(in) :: a(:) + logical(4) :: res + res = reduce(a, red_log4) +end subroutine + +! CHECK: fir.call @_FortranAReduceLogical4 + +pure function red_log8(a,b) + logical(8), intent(in) :: a, b + logical(8) :: red_log8 + red_log8 = a .and. b +end function + +subroutine log8(a) + logical(8), intent(in) :: a(:) + logical(8) :: res + res = reduce(a, red_log8) +end subroutine + +! CHECK: fir.call @_FortranAReduceLogical8 + +pure function red_char1(a,b) + character(1), intent(in) :: a, b + character(1) :: red_char1 + red_char1 = a // b +end function + +subroutine char1(a) + character(1), intent(in) :: a(:) + character(1) :: res + res = reduce(a, red_char1) +end subroutine + +! CHECK: fir.call @_FortranAReduceChar1 + +pure function red_char2(a,b) + character(kind=2), intent(in) :: a, b + character(kind=2) :: red_char2 + red_char2 = a // b +end function + +subroutine char2(a) + character(kind=2), intent(in) :: a(:) + character(kind=2) :: res + res = reduce(a, red_char2) +end subroutine + +! CHECK: fir.call @_FortranAReduceChar2 + +pure function red_char4(a,b) + character(kind=4), intent(in) :: a, b + character(kind=4) :: red_char4 + red_char4 = a // b +end function + +subroutine char4(a) + character(kind=4), intent(in) :: a(:) + character(kind=4) :: res + res = reduce(a, red_char4) +end subroutine + +! CHECK: fir.call @_FortranAReduceChar4 + +pure function red_type(a,b) + type(t1), intent(in) :: a, b + type(t1) :: red_type + red_type%a = a%a + b%a +end function + +subroutine testtype(a) + type(t1), intent(in) :: a(:) + type(t1) :: res + res = reduce(a, red_type) +end subroutine + +! CHECK: fir.call @_FortranAReduceDerivedType + +subroutine integer1dim(a, id) + integer(1), intent(in) :: a(:,:) + integer(1), allocatable :: res(:) + + res = reduce(a, red_int1, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceInteger1Dim + +subroutine integer2dim(a, id) + integer(2), intent(in) :: a(:,:) + integer(2), allocatable :: res(:) + + res = reduce(a, red_int2, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceInteger2Dim + +subroutine integer4dim(a, id) + integer(4), intent(in) :: a(:,:) + integer(4), allocatable :: res(:) + + res = reduce(a, red_int4, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceInteger4Dim + +subroutine integer8dim(a, id) + integer(8), intent(in) :: a(:,:) + integer(8), allocatable :: res(:) + + res = reduce(a, red_int8, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceInteger8Dim + +subroutine integer16dim(a, id) + integer(16), intent(in) :: a(:,:) + integer(16), allocatable :: res(:) + + res = reduce(a, red_int16, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceInteger16Dim + +subroutine real2dim(a, id) + real(2), intent(in) :: a(:,:) + real(2), allocatable :: res(:) + + res = reduce(a, red_real2, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal2Dim + +subroutine real3dim(a, id) + real(3), intent(in) :: a(:,:) + real(3), allocatable :: res(:) + + res = reduce(a, red_real3, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal3Dim + +subroutine real4dim(a, id) + real(4), intent(in) :: a(:,:) + real(4), allocatable :: res(:) + + res = reduce(a, red_real4, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal4Dim + +subroutine real8dim(a, id) + real(8), intent(in) :: a(:,:) + real(8), allocatable :: res(:) + + res = reduce(a, red_real8, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal8Dim + +subroutine real10dim(a, id) + real(10), intent(in) :: a(:,:) + real(10), allocatable :: res(:) + + res = reduce(a, red_real10, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal10Dim + +subroutine real16dim(a, id) + real(16), intent(in) :: a(:,:) + real(16), allocatable :: res(:) + + res = reduce(a, red_real16, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceReal16Dim + +subroutine complex2dim(a, id) + complex(2), intent(in) :: a(:,:) + complex(2), allocatable :: res(:) + + res = reduce(a, red_complex2, 2) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex2Dim + +subroutine complex3dim(a, id) + complex(3), intent(in) :: a(:,:) + complex(3), allocatable :: res(:) + + res = reduce(a, red_complex3, 2) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex3Dim + +subroutine complex4dim(a, id) + complex(4), intent(in) :: a(:,:) + complex(4), allocatable :: res(:) + + res = reduce(a, red_complex4, 2) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex4Dim + +subroutine complex8dim(a, id) + complex(8), intent(in) :: a(:,:) + complex(8), allocatable :: res(:) + + res = reduce(a, red_complex8, 2) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex8Dim + +subroutine complex10dim(a, id) + complex(10), intent(in) :: a(:,:) + complex(10), allocatable :: res(:) + + res = reduce(a, red_complex10, 2) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex10Dim + +subroutine complex16dim(a, id) + complex(16), intent(in) :: a(:,:) + complex(16), allocatable :: res(:) + + res = reduce(a, red_complex16, 2) +end subroutine + +! CHECK: fir.call @_FortranACppReduceComplex16Dim + +subroutine logical1dim(a, id) + logical(1), intent(in) :: a(:,:) + logical(1), allocatable :: res(:) + + res = reduce(a, red_log1, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceLogical1Dim + +subroutine logical2dim(a, id) + logical(2), intent(in) :: a(:,:) + logical(2), allocatable :: res(:) + + res = reduce(a, red_log2, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceLogical2Dim + +subroutine logical4dim(a, id) + logical(4), intent(in) :: a(:,:) + logical(4), allocatable :: res(:) + + res = reduce(a, red_log4, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceLogical4Dim + +subroutine logical8dim(a, id) + logical(8), intent(in) :: a(:,:) + logical(8), allocatable :: res(:) + + res = reduce(a, red_log8, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceLogical8Dim + +subroutine testtypeDim(a) + type(t1), intent(in) :: a(:,:) + type(t1), allocatable :: res(:) + res = reduce(a, red_type, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceDerivedTypeDim + +subroutine char1dim(a) + character(1), intent(in) :: a(:, :) + character(1), allocatable :: res(:) + res = reduce(a, red_char1, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceCharacter1Dim + +subroutine char2dim(a) + character(kind=2), intent(in) :: a(:, :) + character(kind=2), allocatable :: res(:) + res = reduce(a, red_char2, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceCharacter2Dim + +subroutine char4dim(a) + character(kind=4), intent(in) :: a(:, :) + character(kind=4), allocatable :: res(:) + res = reduce(a, red_char4, 2) +end subroutine + +! CHECK: fir.call @_FortranAReduceCharacter4Dim + +end module diff --git a/flang/test/Lower/loops3.f90 b/flang/test/Lower/loops3.f90 new file mode 100644 index 00000000000000..2e62ee480ec8a6 --- /dev/null +++ b/flang/test/Lower/loops3.f90 @@ -0,0 +1,23 @@ +! Test do concurrent reduction +! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s + +! CHECK-LABEL: loop_test +subroutine loop_test + integer(4) :: i, j, k, tmp, sum = 0 + real :: m + + i = 100 + j = 200 + k = 300 + + ! CHECK: %[[VAL_0:.*]] = fir.alloca f32 {bindc_name = "m", uniq_name = "_QFloop_testEm"} + ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFloop_testEsum) : !fir.ref + ! CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { + ! CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { + ! CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered reduce(#fir.reduce_attr -> %[[VAL_1:.*]] : !fir.ref, #fir.reduce_attr -> %[[VAL_0:.*]] : !fir.ref) { + do concurrent (i=1:5, j=1:5, k=1:5) local(tmp) reduce(+:sum) reduce(max:m) + tmp = i + j + k + sum = tmp + sum + m = max(m, sum) + enddo +end subroutine loop_test diff --git a/flang/test/Transforms/debug-90683.fir b/flang/test/Transforms/debug-90683.fir index 9da0e5347d3f8f..cc6929c10411f8 100644 --- a/flang/test/Transforms/debug-90683.fir +++ b/flang/test/Transforms/debug-90683.fir @@ -2,7 +2,7 @@ // This test checks that debug information for fir.real type works ok. -module attributes {} { +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { func.func @_QPfn1(%arg0: !fir.ref> {fir.bindc_name = "a"} ) { %0 = fir.declare %arg0 {uniq_name = "_QFfn1Ea"} : (!fir.ref>) -> !fir.ref> %1 = fir.alloca f32 {bindc_name = "abserror", uniq_name = "_QFfn1Eabserror"} diff --git a/flang/test/Transforms/debug-assumed-shape-array.fir b/flang/test/Transforms/debug-assumed-shape-array.fir new file mode 100644 index 00000000000000..00dec9b318c811 --- /dev/null +++ b/flang/test/Transforms/debug-assumed-shape-array.fir @@ -0,0 +1,16 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>, #dlti.dl_entry<"dlti.endianness", "little">>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} { + func.func @ff_(%arg0: !fir.box> {fir.bindc_name = "arr"} ) { + %0 = fir.undefined !fir.dscope + %1 = fircg.ext_declare %arg0 dummy_scope %0 {uniq_name = "_QFffEarr"} : (!fir.box>, !fir.dscope) -> !fir.box> loc(#loc1) + return + } loc(#loc2) +} +#loc1 = loc("test1.f90":1:1) +#loc2 = loc("test1.f90":3:16) + +// CHECK: #llvm.di_composite_type, upperBound = #llvm.di_expression<[DW_OP_push_object_address, DW_OP_plus_uconst(32), DW_OP_deref]>> +// CHECK-SAME: #llvm.di_subrange, upperBound = #llvm.di_expression<[DW_OP_push_object_address, DW_OP_plus_uconst(56), DW_OP_deref]>> +// CHECK-SAME: dataLocation = <[DW_OP_push_object_address, DW_OP_deref]>> diff --git a/flang/test/Transforms/debug-complex-1.fir b/flang/test/Transforms/debug-complex-1.fir index a3cbd767d8a58e..cc742d3b183bbf 100644 --- a/flang/test/Transforms/debug-complex-1.fir +++ b/flang/test/Transforms/debug-complex-1.fir @@ -3,7 +3,7 @@ // check conversion of complex type of different size. Both fir and mlir // variants are checked. -module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} { +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { func.func @test1(%x : !fir.complex<4>) -> !fir.complex<8> { %1 = fir.convert %x : (!fir.complex<4>) -> !fir.complex<8> return %1 : !fir.complex<8> diff --git a/flang/test/Transforms/debug-fixed-array-type.fir b/flang/test/Transforms/debug-fixed-array-type.fir index 401c725411831e..d4ed0b97020898 100644 --- a/flang/test/Transforms/debug-fixed-array-type.fir +++ b/flang/test/Transforms/debug-fixed-array-type.fir @@ -1,6 +1,6 @@ // RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s -module attributes {} { +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { func.func @_QQmain() attributes {fir.bindc_name = "mn"} { %c7 = arith.constant 7 : index %c8 = arith.constant 8 : index diff --git a/flang/test/Transforms/debug-line-table-existing.fir b/flang/test/Transforms/debug-line-table-existing.fir index 534278ebc972d3..0e006303c8a81d 100644 --- a/flang/test/Transforms/debug-line-table-existing.fir +++ b/flang/test/Transforms/debug-line-table-existing.fir @@ -3,7 +3,7 @@ // REQUIRES: system-linux // Test that there are no changes to a function with existed fused loc debug -module attributes {} { +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { func.func @_QPs1() { return loc(#loc1) } loc(#loc2) diff --git a/flang/test/Transforms/debug-line-table-inc-file.fir b/flang/test/Transforms/debug-line-table-inc-file.fir index 9370c138fd42ff..065039b59c5ae8 100644 --- a/flang/test/Transforms/debug-line-table-inc-file.fir +++ b/flang/test/Transforms/debug-line-table-inc-file.fir @@ -3,7 +3,7 @@ // REQUIRES: system-linux // Test for included functions that have a different debug location than the current file -module attributes {} { +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { func.func @_QPsinc() { return loc(#loc2) } loc(#loc1) @@ -19,7 +19,7 @@ module attributes {} { #loc4 = loc("/home/user01/llvm-project/build_release/simple.f90":4:3) #loc5 = loc("/home/user01/llvm-project/build_release/simple.f90":5:1) -// CHECK: module { +// CHECK: module // CHECK: func.func @_QPsinc() { // CHECK: } loc(#[[FUSED_LOC_INC_FILE:.*]]) // CHECK: func.func @_QQmain() { diff --git a/flang/test/Transforms/debug-line-table-inc-same-file.fir b/flang/test/Transforms/debug-line-table-inc-same-file.fir index 4836f2e21dd9db..bcaf4497982310 100644 --- a/flang/test/Transforms/debug-line-table-inc-same-file.fir +++ b/flang/test/Transforms/debug-line-table-inc-same-file.fir @@ -4,7 +4,7 @@ // Test that there is only one FileAttribute generated for multiple functions // in the same file. -module attributes {} { +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { func.func @_QPs1() { return loc(#loc2) } loc(#loc1) diff --git a/flang/test/Transforms/debug-line-table.fir b/flang/test/Transforms/debug-line-table.fir index 8a72ca2a856a70..d6e54fd1ac467e 100644 --- a/flang/test/Transforms/debug-line-table.fir +++ b/flang/test/Transforms/debug-line-table.fir @@ -3,7 +3,7 @@ // RUN: fir-opt --add-debug-info="debug-level=LineTablesOnly" --mlir-print-debuginfo %s | FileCheck %s --check-prefix=LINETABLE // RUN: fir-opt --add-debug-info="is-optimized=true" --mlir-print-debuginfo %s | FileCheck %s --check-prefix=OPT -module attributes { fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", llvm.target_triple = "aarch64-unknown-linux-gnu"} { +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { func.func @_QPsb() { return loc(#loc_sb) } loc(#loc_sb) diff --git a/flang/test/Transforms/debug-local-var-2.f90 b/flang/test/Transforms/debug-local-var-2.f90 index ee60a07cc4bee6..0fe1b81c27e61e 100644 --- a/flang/test/Transforms/debug-local-var-2.f90 +++ b/flang/test/Transforms/debug-local-var-2.f90 @@ -20,20 +20,20 @@ ! CHECK-LABEL: define {{.*}}i64 @_QFPfn1 ! CHECK-SAME: (ptr %[[ARG1:.*]], ptr %[[ARG2:.*]], ptr %[[ARG3:.*]]) -! CHECK-DAG: call void @llvm.dbg.declare(metadata ptr %[[ARG1]], metadata ![[A1:.*]], metadata !DIExpression()) -! CHECK-DAG: call void @llvm.dbg.declare(metadata ptr %[[ARG2]], metadata ![[B1:.*]], metadata !DIExpression()) -! CHECK-DAG: call void @llvm.dbg.declare(metadata ptr %[[ARG3]], metadata ![[C1:.*]], metadata !DIExpression()) +! CHECK-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[ARG1]], metadata ![[A1:.*]], metadata !DIExpression()) +! CHECK-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[ARG2]], metadata ![[B1:.*]], metadata !DIExpression()) +! CHECK-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[ARG3]], metadata ![[C1:.*]], metadata !DIExpression()) ! CHECK-DAG: %[[AL2:.*]] = alloca i64 -! CHECK-DAG: call void @llvm.dbg.declare(metadata ptr %[[AL2]], metadata ![[RES1:.*]], metadata !DIExpression()) +! CHECK-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[AL2]], metadata ![[RES1:.*]], metadata !DIExpression()) ! CHECK-LABEL: } ! CHECK-LABEL: define {{.*}}i32 @_QFPfn2 ! CHECK-SAME: (ptr %[[FN2ARG1:.*]], ptr %[[FN2ARG2:.*]], ptr %[[FN2ARG3:.*]]) -! CHECK-DAG: call void @llvm.dbg.declare(metadata ptr %[[FN2ARG1]], metadata ![[A2:.*]], metadata !DIExpression()) -! CHECK-DAG: call void @llvm.dbg.declare(metadata ptr %[[FN2ARG2]], metadata ![[B2:.*]], metadata !DIExpression()) -! CHECK-DAG: call void @llvm.dbg.declare(metadata ptr %[[FN2ARG3]], metadata ![[C2:.*]], metadata !DIExpression()) +! CHECK-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[FN2ARG1]], metadata ![[A2:.*]], metadata !DIExpression()) +! CHECK-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[FN2ARG2]], metadata ![[B2:.*]], metadata !DIExpression()) +! CHECK-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[FN2ARG3]], metadata ![[C2:.*]], metadata !DIExpression()) ! CHECK-DAG: %[[AL3:.*]] = alloca i32 -! CHECK-DAG: call void @llvm.dbg.declare(metadata ptr %[[AL3]], metadata ![[RES2:.*]], metadata !DIExpression()) +! CHECK-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[AL3]], metadata ![[RES2:.*]], metadata !DIExpression()) ! CHECK-LABEL: } program mn diff --git a/flang/test/Transforms/debug-module-1.fir b/flang/test/Transforms/debug-module-1.fir index 822ae01b99aa78..71457d32b15960 100644 --- a/flang/test/Transforms/debug-module-1.fir +++ b/flang/test/Transforms/debug-module-1.fir @@ -1,7 +1,7 @@ // RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s -module attributes {} { +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { fir.global @_QMhelperEgli : i32 { %0 = fir.zero_bits i32 fir.has_value %0 : i32 diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 8863749e12c6db..381061ce3fcbf0 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -536,6 +536,8 @@ if(LIBC_TYPES_HAS_FLOAT16) # clang-12 and after: https://godbolt.org/z/8ceT9454c # libc.src.math.nexttowardf16 libc.src.math.nextupf16 + libc.src.math.remainderf16 + libc.src.math.remquof16 libc.src.math.rintf16 libc.src.math.roundf16 libc.src.math.roundevenf16 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 31ad0bc412836c..e99960b12441da 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -566,6 +566,8 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.nextdownf16 libc.src.math.nexttowardf16 libc.src.math.nextupf16 + libc.src.math.remainderf16 + libc.src.math.remquof16 libc.src.math.rintf16 libc.src.math.roundf16 libc.src.math.roundevenf16 diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 3e122fb8bc26e4..f83a646c34b57c 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -198,9 +198,9 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | nextup | |check| | |check| | |check| | |check| | |check| | 7.12.11.5 | F.10.8.5 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| remainder | |check| | |check| | |check| | | | 7.12.10.2 | F.10.7.2 | +| remainder | |check| | |check| | |check| | |check| | | 7.12.10.2 | F.10.7.2 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| remquo | |check| | |check| | |check| | | |check| | 7.12.10.3 | F.10.7.3 | +| remquo | |check| | |check| | |check| | |check| | |check| | 7.12.10.3 | F.10.7.3 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | rint | |check| | |check| | |check| | |check| | |check| | 7.12.9.4 | F.10.6.4 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/include/llvm-libc-macros/float16-macros.h b/libc/include/llvm-libc-macros/float16-macros.h index 9f17503d85130c..e7d8d93bca1b60 100644 --- a/libc/include/llvm-libc-macros/float16-macros.h +++ b/libc/include/llvm-libc-macros/float16-macros.h @@ -10,7 +10,8 @@ #define LLVM_LIBC_MACROS_FLOAT16_MACROS_H #if defined(__FLT16_MANT_DIG__) && \ - (!defined(__GNUC__) || __GNUC__ >= 13 || defined(__clang__)) + (!defined(__GNUC__) || __GNUC__ >= 13 || defined(__clang__)) && \ + !defined(__arm__) && !defined(_M_ARM) && !defined(__riscv) #define LIBC_TYPES_HAS_FLOAT16 #endif diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index b5b6dbc481bd7e..34169948fc6d27 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -581,14 +581,16 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"exp10", RetValSpec, [ArgSpec]>, FunctionSpec<"exp10f", RetValSpec, [ArgSpec]>, - FunctionSpec<"remainderf", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"remainder", RetValSpec, [ArgSpec, ArgSpec]>, + FunctionSpec<"remainderf", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"remainderl", RetValSpec, [ArgSpec, ArgSpec]>, + GuardedFunctionSpec<"remainderf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, - FunctionSpec<"remquof", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, - GuardedFunctionSpec<"remquof128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, FunctionSpec<"remquo", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + FunctionSpec<"remquof", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, FunctionSpec<"remquol", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + GuardedFunctionSpec<"remquof16", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + GuardedFunctionSpec<"remquof128", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, FunctionSpec<"round", RetValSpec, [ArgSpec]>, FunctionSpec<"roundf", RetValSpec, [ArgSpec]>, diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt index 1744f8cf626f31..01ca4254c79962 100644 --- a/libc/src/__support/FPUtil/CMakeLists.txt +++ b/libc/src/__support/FPUtil/CMakeLists.txt @@ -217,7 +217,6 @@ add_header_library( .nearest_integer_operations .normal_float libc.hdr.math_macros - libc.src.__support.CPP.algorithm libc.src.__support.CPP.bit libc.src.__support.CPP.limits libc.src.__support.CPP.type_traits diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h index f695b83fb0b0ba..a289c2ef70467b 100644 --- a/libc/src/__support/FPUtil/ManipulationFunctions.h +++ b/libc/src/__support/FPUtil/ManipulationFunctions.h @@ -16,7 +16,6 @@ #include "rounding_mode.h" #include "hdr/math_macros.h" -#include "src/__support/CPP/algorithm.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/limits.h" // INT_MAX, INT_MIN #include "src/__support/CPP/type_traits.h" @@ -103,7 +102,7 @@ intlogb(U x) { return IntLogbConstants::T_MAX; } - DyadicFloat::STORAGE_LEN, 32)> normal(bits.get_val()); + DyadicFloat::STORAGE_LEN> normal(bits.get_val()); int exponent = normal.get_unbiased_exponent(); // The C standard does not specify the return value when an exponent is // out of int range. However, XSI conformance required that INT_MAX or @@ -139,7 +138,7 @@ LIBC_INLINE constexpr T logb(T x) { return FPBits::inf().get_val(); } - DyadicFloat::STORAGE_LEN, 32)> normal(bits.get_val()); + DyadicFloat::STORAGE_LEN> normal(bits.get_val()); return static_cast(normal.get_unbiased_exponent()); } diff --git a/libc/src/__support/FPUtil/NormalFloat.h b/libc/src/__support/FPUtil/NormalFloat.h index 33529d5e9b80a6..413d20430090bb 100644 --- a/libc/src/__support/FPUtil/NormalFloat.h +++ b/libc/src/__support/FPUtil/NormalFloat.h @@ -52,7 +52,7 @@ template struct NormalFloat { return; unsigned normalization_shift = evaluate_normalization_shift(mantissa); - mantissa = mantissa << normalization_shift; + mantissa <<= normalization_shift; exponent -= normalization_shift; } diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h index e2061c43007024..40ad6eeed7ac26 100644 --- a/libc/src/__support/big_int.h +++ b/libc/src/__support/big_int.h @@ -299,7 +299,8 @@ LIBC_INLINE constexpr cpp::array shift(cpp::array array, if (bit_offset == 0) dst = part1; // no crosstalk between parts. else if constexpr (direction == LEFT) - dst = (part1 << bit_offset) | (part2 >> (WORD_BITS - bit_offset)); + dst = static_cast((part1 << bit_offset) | + (part2 >> (WORD_BITS - bit_offset))); else dst = (part1 >> bit_offset) | (part2 << (WORD_BITS - bit_offset)); } @@ -969,7 +970,8 @@ struct WordTypeSelector : cpp::type_identity< #endif // LIBC_TYPES_HAS_INT64 > { }; -// Except if we request 32 bits explicitly. +// Except if we request 16 or 32 bits explicitly. +template <> struct WordTypeSelector<16> : cpp::type_identity {}; template <> struct WordTypeSelector<32> : cpp::type_identity {}; template using WordTypeSelectorT = typename WordTypeSelector::type; diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index f8582d8d426833..82dfdaf479ff00 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -315,11 +315,13 @@ add_math_entrypoint_object(powf) add_math_entrypoint_object(remainder) add_math_entrypoint_object(remainderf) add_math_entrypoint_object(remainderl) +add_math_entrypoint_object(remainderf16) add_math_entrypoint_object(remquo) add_math_entrypoint_object(remquof) add_math_entrypoint_object(remquof128) add_math_entrypoint_object(remquol) +add_math_entrypoint_object(remquof16) add_math_entrypoint_object(rint) add_math_entrypoint_object(rintf) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index caaa0ac23dc7a0..f4f683e61bd658 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -2495,7 +2495,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O2 + -O3 ) add_entrypoint_object( @@ -2519,7 +2519,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O2 + -O3 ) add_entrypoint_object( @@ -2531,7 +2531,20 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O2 + -O3 +) + +add_entrypoint_object( + remquof16 + SRCS + remquof16.cpp + HDRS + ../remquof16.h + DEPENDS + libc.src.__support.macros.properties.types + libc.src.__support.FPUtil.division_and_remainder_operations + COMPILE_OPTIONS + -O3 ) add_entrypoint_object( @@ -2543,7 +2556,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O2 + -O3 ) add_entrypoint_object( @@ -2555,7 +2568,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O2 + -O3 ) add_entrypoint_object( @@ -2567,7 +2580,20 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.division_and_remainder_operations COMPILE_OPTIONS - -O2 + -O3 +) + +add_entrypoint_object( + remainderf16 + SRCS + remainderf16.cpp + HDRS + ../remainderf16.h + DEPENDS + libc.src.__support.macros.properties.types + libc.src.__support.FPUtil.division_and_remainder_operations + COMPILE_OPTIONS + -O3 ) add_entrypoint_object( diff --git a/libc/src/math/generic/remainderf16.cpp b/libc/src/math/generic/remainderf16.cpp new file mode 100644 index 00000000000000..35177228acdbf5 --- /dev/null +++ b/libc/src/math/generic/remainderf16.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of remainderf16 function ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/remainderf16.h" +#include "src/__support/FPUtil/DivisionAndRemainderOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(float16, remainderf16, (float16 x, float16 y)) { + int quotient; + return fputil::remquo(x, y, quotient); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/remquof16.cpp b/libc/src/math/generic/remquof16.cpp new file mode 100644 index 00000000000000..a373bfa58651bb --- /dev/null +++ b/libc/src/math/generic/remquof16.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of remquof16 function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/remquof16.h" +#include "src/__support/FPUtil/DivisionAndRemainderOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(float16, remquof16, (float16 x, float16 y, int *exp)) { + return fputil::remquo(x, y, *exp); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/remainderf16.h b/libc/src/math/remainderf16.h new file mode 100644 index 00000000000000..e23eead4bae2cc --- /dev/null +++ b/libc/src/math/remainderf16.h @@ -0,0 +1,20 @@ +//===-- Implementation header for remainderf16 ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMAINDERF16_H +#define LLVM_LIBC_SRC_MATH_REMAINDERF16_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +float16 remainderf16(float16 x, float16 y); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_REMAINDERF16_H diff --git a/libc/src/math/remquof16.h b/libc/src/math/remquof16.h new file mode 100644 index 00000000000000..fee848c955a018 --- /dev/null +++ b/libc/src/math/remquof16.h @@ -0,0 +1,20 @@ +//===-- Implementation header for remquof16 ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMQUOF16_H +#define LLVM_LIBC_SRC_MATH_REMQUOF16_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +float16 remquof16(float16 x, float16 y, int *exp); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_REMQUOF16_H diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index f0091ad367c0a4..d4aa50a43d186d 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -380,6 +380,28 @@ elseif(LIBC_TARGET_OS_IS_GPU) aligned_alloc ) else() + add_header_library( + block + HDRS + block.h + DEPENDS + libc.src.__support.CPP.algorithm + libc.src.__support.CPP.limits + libc.src.__support.CPP.new + libc.src.__support.CPP.optional + libc.src.__support.CPP.span + libc.src.__support.CPP.type_traits + ) + add_header_library( + freelist + HDRS + freelist.h + DEPENDS + libc.src.__support.fixedvector + libc.src.__support.CPP.cstddef + libc.src.__support.CPP.array + libc.src.__support.CPP.span + ) add_entrypoint_external( malloc ) diff --git a/libc/src/stdlib/block.h b/libc/src/stdlib/block.h new file mode 100644 index 00000000000000..afb18c1ef738fc --- /dev/null +++ b/libc/src/stdlib/block.h @@ -0,0 +1,482 @@ +//===-- Implementation header for a block of memory -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_BLOCK_H +#define LLVM_LIBC_SRC_STDLIB_BLOCK_H + +#include "src/__support/CPP/algorithm.h" +#include "src/__support/CPP/cstddef.h" +#include "src/__support/CPP/limits.h" +#include "src/__support/CPP/new.h" +#include "src/__support/CPP/optional.h" +#include "src/__support/CPP/span.h" +#include "src/__support/CPP/type_traits.h" + +#include + +namespace LIBC_NAMESPACE { + +namespace internal { +// Types of corrupted blocks, and functions to crash with an error message +// corresponding to each type. +enum class BlockStatus { + VALID, + MISALIGNED, + PREV_MISMATCHED, + NEXT_MISMATCHED, +}; +} // namespace internal + +/// Returns the value rounded down to the nearest multiple of alignment. +LIBC_INLINE constexpr size_t align_down(size_t value, size_t alignment) { + // Note this shouldn't overflow since the result will always be <= value. + return (value / alignment) * alignment; +} + +/// Returns the value rounded down to the nearest multiple of alignment. +template +LIBC_INLINE constexpr T *align_down(T *value, size_t alignment) { + return reinterpret_cast( + align_down(reinterpret_cast(value), alignment)); +} + +/// Returns the value rounded up to the nearest multiple of alignment. +LIBC_INLINE constexpr size_t align_up(size_t value, size_t alignment) { + __builtin_add_overflow(value, alignment - 1, &value); + return align_down(value, alignment); +} + +/// Returns the value rounded up to the nearest multiple of alignment. +template +LIBC_INLINE constexpr T *align_up(T *value, size_t alignment) { + return reinterpret_cast( + align_up(reinterpret_cast(value), alignment)); +} + +using ByteSpan = cpp::span; +using cpp::optional; + +/// Memory region with links to adjacent blocks. +/// +/// The blocks do not encode their size directly. Instead, they encode offsets +/// to the next and previous blocks using the type given by the `OffsetType` +/// template parameter. The encoded offsets are simply the offsets divded by the +/// minimum block alignment, `ALIGNMENT`. +/// +/// The `ALIGNMENT` constant provided by the derived block is typically the +/// minimum value of `alignof(OffsetType)`. Since the addressable range of a +/// block is given by `std::numeric_limits::max() * +/// ALIGNMENT`, it may be advantageous to set a higher alignment if it allows +/// using a smaller offset type, even if this wastes some bytes in order to +/// align block headers. +/// +/// Blocks will always be aligned to a `ALIGNMENT` boundary. Block sizes will +/// always be rounded up to a multiple of `ALIGNMENT`. +/// +/// As an example, the diagram below represents two contiguous +/// `Block`s. The indices indicate byte offsets: +/// +/// @code{.unparsed} +/// Block 1: +/// +---------------------+------+--------------+ +/// | Header | Info | Usable space | +/// +----------+----------+------+--------------+ +/// | prev | next | | | +/// | 0......3 | 4......7 | 8..9 | 10.......280 | +/// | 00000000 | 00000046 | 8008 | | +/// +----------+----------+------+--------------+ +/// Block 2: +/// +---------------------+------+--------------+ +/// | Header | Info | Usable space | +/// +----------+----------+------+--------------+ +/// | prev | next | | | +/// | 0......3 | 4......7 | 8..9 | 10......1056 | +/// | 00000046 | 00000106 | 2008 | f7f7....f7f7 | +/// +----------+----------+------+--------------+ +/// @endcode +/// +/// The overall size of the block (e.g. 280 bytes) is given by its next offset +/// multiplied by the alignment (e.g. 0x106 * 4). Also, the next offset of a +/// block matches the previous offset of its next block. The first block in a +/// list is denoted by having a previous offset of `0`. +/// +/// @tparam OffsetType Unsigned integral type used to encode offsets. Larger +/// types can address more memory, but consume greater +/// overhead. +/// @tparam kAlign Sets the overall alignment for blocks. Minimum is +/// `alignof(OffsetType)` (the default). Larger values can +/// address more memory, but consume greater overhead. +template +class Block { +public: + using offset_type = OffsetType; + static_assert(cpp::is_unsigned_v, + "offset type must be unsigned"); + + static constexpr size_t ALIGNMENT = cpp::max(kAlign, alignof(offset_type)); + static constexpr size_t BLOCK_OVERHEAD = align_up(sizeof(Block), ALIGNMENT); + + // No copy or move. + Block(const Block &other) = delete; + Block &operator=(const Block &other) = delete; + + /// Creates the first block for a given memory region. + static optional init(ByteSpan region); + + /// @returns A pointer to a `Block`, given a pointer to the start of the + /// usable space inside the block. + /// + /// This is the inverse of `usable_space()`. + /// + /// @warning This method does not do any checking; passing a random + /// pointer will return a non-null pointer. + static Block *from_usable_space(void *usable_space) { + auto *bytes = reinterpret_cast(usable_space); + return reinterpret_cast(bytes - BLOCK_OVERHEAD); + } + static const Block *from_usable_space(const void *usable_space) { + const auto *bytes = reinterpret_cast(usable_space); + return reinterpret_cast(bytes - BLOCK_OVERHEAD); + } + + /// @returns The total size of the block in bytes, including the header. + size_t outer_size() const { return next_ * ALIGNMENT; } + + /// @returns The number of usable bytes inside the block. + size_t inner_size() const { return outer_size() - BLOCK_OVERHEAD; } + + /// @returns The number of bytes requested using AllocFirst or AllocLast. + size_t requested_size() const { return inner_size() - padding_; } + + /// @returns A pointer to the usable space inside this block. + cpp::byte *usable_space() { + return reinterpret_cast(this) + BLOCK_OVERHEAD; + } + const cpp::byte *usable_space() const { + return reinterpret_cast(this) + BLOCK_OVERHEAD; + } + + /// Marks the block as free and merges it with any free neighbors. + /// + /// This method is static in order to consume and replace the given block + /// pointer. If neither member is free, the returned pointer will point to the + /// original block. Otherwise, it will point to the new, larger block created + /// by merging adjacent free blocks together. + static void free(Block *&block); + + /// Attempts to split this block. + /// + /// If successful, the block will have an inner size of `new_inner_size`, + /// rounded up to a `ALIGNMENT` boundary. The remaining space will be + /// returned as a new block. + /// + /// This method may fail if the remaining space is too small to hold a new + /// block. If this method fails for any reason, the original block is + /// unmodified. + /// + /// This method is static in order to consume and replace the given block + /// pointer with a pointer to the new, smaller block. + static optional split(Block *&block, size_t new_inner_size); + + /// Merges this block with the one that comes after it. + /// + /// This method is static in order to consume and replace the given block + /// pointer with a pointer to the new, larger block. + static bool merge_next(Block *&block); + + /// Fetches the block immediately after this one. + /// + /// For performance, this always returns a block pointer, even if the returned + /// pointer is invalid. The pointer is valid if and only if `last()` is false. + /// + /// Typically, after calling `Init` callers may save a pointer past the end of + /// the list using `next()`. This makes it easy to subsequently iterate over + /// the list: + /// @code{.cpp} + /// auto result = Block<>::init(byte_span); + /// Block<>* begin = *result; + /// Block<>* end = begin->next(); + /// ... + /// for (auto* block = begin; block != end; block = block->next()) { + /// // Do something which each block. + /// } + /// @endcode + Block *next() const; + + /// @copydoc `next`. + static Block *next_block(const Block *block) { + return block == nullptr ? nullptr : block->next(); + } + + /// @returns The block immediately before this one, or a null pointer if this + /// is the first block. + Block *prev() const; + + /// @copydoc `prev`. + static Block *prev_block(const Block *block) { + return block == nullptr ? nullptr : block->prev(); + } + + /// Returns the current alignment of a block. + size_t alignment() const { return used() ? info_.alignment : 1; } + + /// Indicates whether the block is in use. + /// + /// @returns `true` if the block is in use or `false` if not. + bool used() const { return info_.used; } + + /// Indicates whether this block is the last block or not (i.e. whether + /// `next()` points to a valid block or not). This is needed because + /// `next()` points to the end of this block, whether there is a valid + /// block there or not. + /// + /// @returns `true` is this is the last block or `false` if not. + bool last() const { return info_.last; } + + /// Marks this block as in use. + void mark_used() { info_.used = 1; } + + /// Marks this block as free. + void mark_free() { info_.used = 0; } + + /// Marks this block as the last one in the chain. + void mark_last() { info_.last = 1; } + + /// Clears the last bit from this block. + void clear_last() { info_.last = 1; } + + /// @brief Checks if a block is valid. + /// + /// @returns `true` if and only if the following conditions are met: + /// * The block is aligned. + /// * The prev/next fields match with the previous and next blocks. + bool is_valid() const { + return check_status() == internal::BlockStatus::VALID; + } + +private: + /// Consumes the block and returns as a span of bytes. + static ByteSpan as_bytes(Block *&&block); + + /// Consumes the span of bytes and uses it to construct and return a block. + static Block *as_block(size_t prev_outer_size, ByteSpan bytes); + + Block(size_t prev_outer_size, size_t outer_size); + + /// Returns a `BlockStatus` that is either VALID or indicates the reason why + /// the block is invalid. + /// + /// If the block is invalid at multiple points, this function will only return + /// one of the reasons. + internal::BlockStatus check_status() const; + + /// Like `split`, but assumes the caller has already checked to parameters to + /// ensure the split will succeed. + static Block *split_impl(Block *&block, size_t new_inner_size); + + /// Offset (in increments of the minimum alignment) from this block to the + /// previous block. 0 if this is the first block. + offset_type prev_ = 0; + + /// Offset (in increments of the minimum alignment) from this block to the + /// next block. Valid even if this is the last block, since it equals the + /// size of the block. + offset_type next_ = 0; + + /// Information about the current state of the block: + /// * If the `used` flag is set, the block's usable memory has been allocated + /// and is being used. + /// * If the `last` flag is set, the block does not have a next block. + /// * If the `used` flag is set, the alignment represents the requested value + /// when the memory was allocated, which may be less strict than the actual + /// alignment. + struct { + uint16_t used : 1; + uint16_t last : 1; + uint16_t alignment : 14; + } info_; + + /// Number of bytes allocated beyond what was requested. This will be at most + /// the minimum alignment, i.e. `alignof(offset_type).` + uint16_t padding_ = 0; +} __attribute__((packed, aligned(kAlign))); + +// Public template method implementations. + +LIBC_INLINE ByteSpan get_aligned_subspan(ByteSpan bytes, size_t alignment) { + if (bytes.data() == nullptr) + return ByteSpan(); + + auto unaligned_start = reinterpret_cast(bytes.data()); + auto aligned_start = align_up(unaligned_start, alignment); + auto unaligned_end = unaligned_start + bytes.size(); + auto aligned_end = align_down(unaligned_end, alignment); + + if (aligned_end <= aligned_start) + return ByteSpan(); + + return bytes.subspan(aligned_start - unaligned_start, + aligned_end - aligned_start); +} + +template +optional *> +Block::init(ByteSpan region) { + optional result = get_aligned_subspan(region, ALIGNMENT); + if (!result) + return {}; + + region = result.value(); + if (region.size() < BLOCK_OVERHEAD) + return {}; + + if (cpp::numeric_limits::max() < region.size() / ALIGNMENT) + return {}; + + Block *block = as_block(0, region); + block->mark_last(); + return block; +} + +template +void Block::free(Block *&block) { + if (block == nullptr) + return; + + block->mark_free(); + Block *prev = block->prev(); + + if (merge_next(prev)) + block = prev; + + merge_next(block); +} + +template +optional *> +Block::split(Block *&block, size_t new_inner_size) { + if (block == nullptr) + return {}; + + if (block->used()) + return {}; + + size_t old_inner_size = block->inner_size(); + new_inner_size = align_up(new_inner_size, ALIGNMENT); + if (old_inner_size < new_inner_size) + return {}; + + if (old_inner_size - new_inner_size < BLOCK_OVERHEAD) + return {}; + + return split_impl(block, new_inner_size); +} + +template +Block * +Block::split_impl(Block *&block, size_t new_inner_size) { + size_t prev_outer_size = block->prev_ * ALIGNMENT; + size_t outer_size1 = new_inner_size + BLOCK_OVERHEAD; + bool is_last = block->last(); + ByteSpan bytes = as_bytes(cpp::move(block)); + Block *block1 = as_block(prev_outer_size, bytes.subspan(0, outer_size1)); + Block *block2 = as_block(outer_size1, bytes.subspan(outer_size1)); + + if (is_last) + block2->mark_last(); + else + block2->next()->prev_ = block2->next_; + + block = cpp::move(block1); + return block2; +} + +template +bool Block::merge_next(Block *&block) { + if (block == nullptr) + return false; + + if (block->last()) + return false; + + Block *next = block->next(); + if (block->used() || next->used()) + return false; + + size_t prev_outer_size = block->prev_ * ALIGNMENT; + bool is_last = next->last(); + ByteSpan prev_bytes = as_bytes(cpp::move(block)); + ByteSpan next_bytes = as_bytes(cpp::move(next)); + size_t outer_size = prev_bytes.size() + next_bytes.size(); + cpp::byte *merged = ::new (prev_bytes.data()) cpp::byte[outer_size]; + block = as_block(prev_outer_size, ByteSpan(merged, outer_size)); + + if (is_last) + block->mark_last(); + else + block->next()->prev_ = block->next_; + + return true; +} + +template +Block *Block::next() const { + uintptr_t addr = + last() ? 0 : reinterpret_cast(this) + outer_size(); + return reinterpret_cast(addr); +} + +template +Block *Block::prev() const { + uintptr_t addr = + (prev_ == 0) ? 0 + : reinterpret_cast(this) - (prev_ * ALIGNMENT); + return reinterpret_cast(addr); +} + +// Private template method implementations. + +template +Block::Block(size_t prev_outer_size, size_t outer_size) { + prev_ = prev_outer_size / ALIGNMENT; + next_ = outer_size / ALIGNMENT; + info_.used = 0; + info_.last = 0; + info_.alignment = ALIGNMENT; +} + +template +ByteSpan Block::as_bytes(Block *&&block) { + size_t block_size = block->outer_size(); + cpp::byte *bytes = new (cpp::move(block)) cpp::byte[block_size]; + return {bytes, block_size}; +} + +template +Block * +Block::as_block(size_t prev_outer_size, ByteSpan bytes) { + return ::new (bytes.data()) Block(prev_outer_size, bytes.size()); +} + +template +internal::BlockStatus Block::check_status() const { + if (reinterpret_cast(this) % ALIGNMENT != 0) + return internal::BlockStatus::MISALIGNED; + + if (!last() && (this >= next() || this != next()->prev())) + return internal::BlockStatus::NEXT_MISMATCHED; + + if (prev() && (this <= prev() || this != prev()->next())) + return internal::BlockStatus::PREV_MISMATCHED; + + return internal::BlockStatus::VALID; +} + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDLIB_BLOCK_H diff --git a/libc/src/stdlib/free.h b/libc/src/stdlib/free.h index f802f1d192d810..b3970fd9677401 100644 --- a/libc/src/stdlib/free.h +++ b/libc/src/stdlib/free.h @@ -17,4 +17,4 @@ void free(void *ptr); } // namespace LIBC_NAMESPACE -#endif // LLVM_LIBC_SRC_STDLIB_LDIV_H +#endif // LLVM_LIBC_SRC_STDLIB_FREE_H diff --git a/libc/src/stdlib/freelist.h b/libc/src/stdlib/freelist.h new file mode 100644 index 00000000000000..c01ed6eddb7d46 --- /dev/null +++ b/libc/src/stdlib/freelist.h @@ -0,0 +1,198 @@ +//===-- Interface for freelist_malloc -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_FREELIST_H +#define LLVM_LIBC_SRC_STDLIB_FREELIST_H + +#include "src/__support/CPP/array.h" +#include "src/__support/CPP/cstddef.h" +#include "src/__support/CPP/span.h" +#include "src/__support/fixedvector.h" + +namespace LIBC_NAMESPACE { + +using cpp::span; + +/// Basic [freelist](https://en.wikipedia.org/wiki/Free_list) implementation +/// for an allocator. This implementation buckets by chunk size, with a list +/// of user-provided buckets. Each bucket is a linked list of storage chunks. +/// Because this freelist uses the added chunks themselves as list nodes, there +/// is a lower bound of `sizeof(FreeList.FreeListNode)` bytes for chunks which +/// can be added to this freelist. There is also an implicit bucket for +/// "everything else", for chunks which do not fit into a bucket. +/// +/// Each added chunk will be added to the smallest bucket under which it fits. +/// If it does not fit into any user-provided bucket, it will be added to the +/// default bucket. +/// +/// As an example, assume that the `FreeList` is configured with buckets of +/// sizes {64, 128, 256, and 512} bytes. The internal state may look like the +/// following: +/// +/// @code{.unparsed} +/// bucket[0] (64B) --> chunk[12B] --> chunk[42B] --> chunk[64B] --> NULL +/// bucket[1] (128B) --> chunk[65B] --> chunk[72B] --> NULL +/// bucket[2] (256B) --> NULL +/// bucket[3] (512B) --> chunk[312B] --> chunk[512B] --> chunk[416B] --> NULL +/// bucket[4] (implicit) --> chunk[1024B] --> chunk[513B] --> NULL +/// @endcode +/// +/// Note that added chunks should be aligned to a 4-byte boundary. +template class FreeList { +public: + // Remove copy/move ctors + FreeList(const FreeList &other) = delete; + FreeList(FreeList &&other) = delete; + FreeList &operator=(const FreeList &other) = delete; + FreeList &operator=(FreeList &&other) = delete; + + /// Adds a chunk to this freelist. + bool add_chunk(cpp::span chunk); + + /// Finds an eligible chunk for an allocation of size `size`. + /// + /// @note This returns the first allocation possible within a given bucket; + /// It does not currently optimize for finding the smallest chunk. + /// + /// @returns + /// * On success - A span representing the chunk. + /// * On failure (e.g. there were no chunks available for that allocation) - + /// A span with a size of 0. + cpp::span find_chunk(size_t size) const; + + /// Removes a chunk from this freelist. + bool remove_chunk(cpp::span chunk); + +private: + // For a given size, find which index into chunks_ the node should be written + // to. + size_t find_chunk_ptr_for_size(size_t size, bool non_null) const; + + struct FreeListNode { + FreeListNode *next; + size_t size; + }; + +public: + explicit FreeList(cpp::array sizes) + : chunks_(NUM_BUCKETS + 1, 0), sizes_(sizes.begin(), sizes.end()) {} + + FixedVector chunks_; + FixedVector sizes_; +}; + +template +bool FreeList::add_chunk(span chunk) { + // Check that the size is enough to actually store what we need + if (chunk.size() < sizeof(FreeListNode)) + return false; + + union { + FreeListNode *node; + cpp::byte *bytes; + } aliased; + + aliased.bytes = chunk.data(); + + size_t chunk_ptr = find_chunk_ptr_for_size(chunk.size(), false); + + // Add it to the correct list. + aliased.node->size = chunk.size(); + aliased.node->next = chunks_[chunk_ptr]; + chunks_[chunk_ptr] = aliased.node; + + return true; +} + +template +span FreeList::find_chunk(size_t size) const { + if (size == 0) + return span(); + + size_t chunk_ptr = find_chunk_ptr_for_size(size, true); + + // Check that there's data. This catches the case where we run off the + // end of the array + if (chunks_[chunk_ptr] == nullptr) + return span(); + + // Now iterate up the buckets, walking each list to find a good candidate + for (size_t i = chunk_ptr; i < chunks_.size(); i++) { + union { + FreeListNode *node; + cpp::byte *data; + } aliased; + aliased.node = chunks_[static_cast(i)]; + + while (aliased.node != nullptr) { + if (aliased.node->size >= size) + return span(aliased.data, aliased.node->size); + + aliased.node = aliased.node->next; + } + } + + // If we get here, we've checked every block in every bucket. There's + // nothing that can support this allocation. + return span(); +} + +template +bool FreeList::remove_chunk(span chunk) { + size_t chunk_ptr = find_chunk_ptr_for_size(chunk.size(), true); + + // Walk that list, finding the chunk. + union { + FreeListNode *node; + cpp::byte *data; + } aliased, aliased_next; + + // Check head first. + if (chunks_[chunk_ptr] == nullptr) + return false; + + aliased.node = chunks_[chunk_ptr]; + if (aliased.data == chunk.data()) { + chunks_[chunk_ptr] = aliased.node->next; + return true; + } + + // No? Walk the nodes. + aliased.node = chunks_[chunk_ptr]; + + while (aliased.node->next != nullptr) { + aliased_next.node = aliased.node->next; + if (aliased_next.data == chunk.data()) { + // Found it, remove this node out of the chain + aliased.node->next = aliased_next.node->next; + return true; + } + + aliased.node = aliased.node->next; + } + + return false; +} + +template +size_t FreeList::find_chunk_ptr_for_size(size_t size, + bool non_null) const { + size_t chunk_ptr = 0; + for (chunk_ptr = 0u; chunk_ptr < sizes_.size(); chunk_ptr++) { + if (sizes_[chunk_ptr] >= size && + (!non_null || chunks_[chunk_ptr] != nullptr)) { + break; + } + } + + return chunk_ptr; +} + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDLIB_FREELIST_H diff --git a/libc/test/src/__support/FPUtil/CMakeLists.txt b/libc/test/src/__support/FPUtil/CMakeLists.txt index 1cbeec0cc4eb08..22fbd2664b546b 100644 --- a/libc/test/src/__support/FPUtil/CMakeLists.txt +++ b/libc/test/src/__support/FPUtil/CMakeLists.txt @@ -9,6 +9,7 @@ add_fp_unittest( dyadic_float_test.cpp DEPENDS libc.src.__support.FPUtil.dyadic_float + libc.src.__support.macros.properties.types COMPILE_OPTIONS # Prevent constant folding with a default rounding mode. "-frounding-math" diff --git a/libc/test/src/__support/FPUtil/dyadic_float_test.cpp b/libc/test/src/__support/FPUtil/dyadic_float_test.cpp index 809381ed47b59b..3b1f9deb64ac8f 100644 --- a/libc/test/src/__support/FPUtil/dyadic_float_test.cpp +++ b/libc/test/src/__support/FPUtil/dyadic_float_test.cpp @@ -8,6 +8,7 @@ #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/big_int.h" +#include "src/__support/macros/properties/types.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" @@ -89,3 +90,6 @@ TEST(LlvmLibcDyadicFloatTest, QuickMul) { TEST_EDGE_RANGES(Float, float); TEST_EDGE_RANGES(Double, double); TEST_EDGE_RANGES(LongDouble, long double); +#ifdef LIBC_TYPES_HAS_FLOAT16 +TEST_EDGE_RANGES(Float16, float16); +#endif diff --git a/libc/test/src/__support/big_int_test.cpp b/libc/test/src/__support/big_int_test.cpp index 1c4f0ac29171fa..84cd206b3273c7 100644 --- a/libc/test/src/__support/big_int_test.cpp +++ b/libc/test/src/__support/big_int_test.cpp @@ -205,6 +205,7 @@ TYPED_TEST(LlvmLibcUIntClassTest, CountBits, Types) { } } +using LL_UInt16 = UInt<16>; using LL_UInt64 = UInt<64>; // We want to test UInt<128> explicitly. So, for // convenience, we use a sugar which does not conflict with the UInt128 type @@ -258,6 +259,19 @@ TEST(LlvmLibcUIntClassTest, BitCastToFromNativeFloat128) { } #endif // LIBC_TYPES_HAS_FLOAT128 +#ifdef LIBC_TYPES_HAS_FLOAT16 +TEST(LlvmLibcUIntClassTest, BitCastToFromNativeFloat16) { + static_assert(cpp::is_trivially_copyable::value); + static_assert(sizeof(LL_UInt16) == sizeof(float16)); + const float16 array[] = {0, 0.1, 1}; + for (float16 value : array) { + LL_UInt16 back = cpp::bit_cast(value); + float16 forth = cpp::bit_cast(back); + EXPECT_TRUE(value == forth); + } +} +#endif // LIBC_TYPES_HAS_FLOAT16 + TEST(LlvmLibcUIntClassTest, BasicInit) { LL_UInt128 half_val(12345); LL_UInt128 full_val({12345, 67890}); diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 102188c332e408..79e6e89a5324ed 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -141,6 +141,7 @@ add_fp_unittest( TruncTest.h DEPENDS libc.src.math.trunc + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -155,6 +156,7 @@ add_fp_unittest( TruncTest.h DEPENDS libc.src.math.truncf + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -169,6 +171,22 @@ add_fp_unittest( TruncTest.h DEPENDS libc.src.math.truncl + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + truncf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + truncf16_test.cpp + HDRS + TruncTest.h + DEPENDS + libc.src.math.truncf16 + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -183,6 +201,7 @@ add_fp_unittest( CeilTest.h DEPENDS libc.src.math.ceil + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -197,6 +216,7 @@ add_fp_unittest( CeilTest.h DEPENDS libc.src.math.ceilf + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -211,6 +231,22 @@ add_fp_unittest( CeilTest.h DEPENDS libc.src.math.ceill + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + ceilf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + ceilf16_test.cpp + HDRS + CeilTest.h + DEPENDS + libc.src.math.ceilf16 + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -225,6 +261,7 @@ add_fp_unittest( FloorTest.h DEPENDS libc.src.math.floor + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -239,6 +276,7 @@ add_fp_unittest( FloorTest.h DEPENDS libc.src.math.floorf + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -253,6 +291,22 @@ add_fp_unittest( FloorTest.h DEPENDS libc.src.math.floorl + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + floorf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + floorf16_test.cpp + HDRS + FloorTest.h + DEPENDS + libc.src.math.floorf16 + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -267,6 +321,7 @@ add_fp_unittest( RoundTest.h DEPENDS libc.src.math.round + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -281,6 +336,7 @@ add_fp_unittest( RoundTest.h DEPENDS libc.src.math.roundf + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -295,6 +351,22 @@ add_fp_unittest( RoundTest.h DEPENDS libc.src.math.roundl + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + roundf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + roundf16_test.cpp + HDRS + RoundTest.h + DEPENDS + libc.src.math.roundf16 + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -309,6 +381,7 @@ add_fp_unittest( RoundEvenTest.h DEPENDS libc.src.math.roundeven + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -323,6 +396,7 @@ add_fp_unittest( RoundEvenTest.h DEPENDS libc.src.math.roundevenf + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -337,6 +411,22 @@ add_fp_unittest( RoundEvenTest.h DEPENDS libc.src.math.roundevenl + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + roundevenf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + roundevenf16_test.cpp + HDRS + RoundEvenTest.h + DEPENDS + libc.src.math.roundevenf16 + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) @@ -350,11 +440,11 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS + libc.hdr.fenv_macros libc.src.errno.errno - libc.src.fenv.feclearexcept - libc.src.fenv.feraiseexcept - libc.src.fenv.fetestexcept libc.src.math.lround + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -368,11 +458,11 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS + libc.hdr.fenv_macros libc.src.errno.errno - libc.src.fenv.feclearexcept - libc.src.fenv.feraiseexcept - libc.src.fenv.fetestexcept libc.src.math.lroundf + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -386,11 +476,29 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS + libc.hdr.fenv_macros libc.src.errno.errno - libc.src.fenv.feclearexcept - libc.src.fenv.feraiseexcept - libc.src.fenv.fetestexcept libc.src.math.lroundl + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + lroundf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + lroundf16_test.cpp + HDRS + RoundToIntegerTest.h + DEPENDS + libc.hdr.fenv_macros + libc.src.errno.errno + libc.src.math.lroundf16 + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -404,11 +512,11 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS + libc.hdr.fenv_macros libc.src.errno.errno - libc.src.fenv.feclearexcept - libc.src.fenv.feraiseexcept - libc.src.fenv.fetestexcept libc.src.math.llround + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -422,11 +530,11 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS + libc.hdr.fenv_macros libc.src.errno.errno - libc.src.fenv.feclearexcept - libc.src.fenv.feraiseexcept - libc.src.fenv.fetestexcept libc.src.math.llroundf + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -440,11 +548,29 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS + libc.hdr.fenv_macros libc.src.errno.errno - libc.src.fenv.feclearexcept - libc.src.fenv.feraiseexcept - libc.src.fenv.fetestexcept libc.src.math.llroundl + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + llroundf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + llroundf16_test.cpp + HDRS + RoundToIntegerTest.h + DEPENDS + libc.hdr.fenv_macros + libc.src.errno.errno + libc.src.math.llroundf16 + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -458,7 +584,9 @@ add_fp_unittest( HDRS RIntTest.h DEPENDS + libc.hdr.fenv_macros libc.src.math.rint + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -473,7 +601,9 @@ add_fp_unittest( HDRS RIntTest.h DEPENDS + libc.hdr.fenv_macros libc.src.math.rintf + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -488,7 +618,26 @@ add_fp_unittest( HDRS RIntTest.h DEPENDS + libc.hdr.fenv_macros libc.src.math.rintl + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + rintf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + rintf16_test.cpp + HDRS + RIntTest.h + DEPENDS + libc.hdr.fenv_macros + libc.src.math.rintf16 + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -504,6 +653,7 @@ add_fp_unittest( RoundToIntegerTest.h DEPENDS libc.src.math.lrint + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -519,6 +669,7 @@ add_fp_unittest( RoundToIntegerTest.h DEPENDS libc.src.math.lrintf + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -534,6 +685,23 @@ add_fp_unittest( RoundToIntegerTest.h DEPENDS libc.src.math.lrintl + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + lrintf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + lrintf16_test.cpp + HDRS + RoundToIntegerTest.h + DEPENDS + libc.src.math.lrintf16 + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -549,6 +717,7 @@ add_fp_unittest( RoundToIntegerTest.h DEPENDS libc.src.math.llrint + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -564,6 +733,7 @@ add_fp_unittest( RoundToIntegerTest.h DEPENDS libc.src.math.llrintf + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) @@ -579,6 +749,23 @@ add_fp_unittest( RoundToIntegerTest.h DEPENDS libc.src.math.llrintl + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + llrintf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + llrintf16_test.cpp + HDRS + RoundToIntegerTest.h + DEPENDS + libc.src.math.llrintf16 + libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits ) diff --git a/libc/test/src/math/CeilTest.h b/libc/test/src/math/CeilTest.h index b4c3752cc5c4ba..3af87420a739fc 100644 --- a/libc/test/src/math/CeilTest.h +++ b/libc/test/src/math/CeilTest.h @@ -6,6 +6,10 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_TEST_SRC_MATH_CEILTEST_H +#define LLVM_LIBC_TEST_SRC_MATH_CEILTEST_H + +#include "src/__support/CPP/algorithm.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" @@ -59,18 +63,21 @@ class CeilTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { EXPECT_FP_EQ(T(-10.0), func(T(-10.32))); EXPECT_FP_EQ(T(11.0), func(T(10.65))); EXPECT_FP_EQ(T(-10.0), func(T(-10.65))); - EXPECT_FP_EQ(T(1235.0), func(T(1234.38))); - EXPECT_FP_EQ(T(-1234.0), func(T(-1234.38))); - EXPECT_FP_EQ(T(1235.0), func(T(1234.96))); - EXPECT_FP_EQ(T(-1234.0), func(T(-1234.96))); + EXPECT_FP_EQ(T(124.0), func(T(123.38))); + EXPECT_FP_EQ(T(-123.0), func(T(-123.38))); + EXPECT_FP_EQ(T(124.0), func(T(123.96))); + EXPECT_FP_EQ(T(-123.0), func(T(-123.96))); } void testRange(CeilFunc func) { - constexpr StorageType COUNT = 100'000; - constexpr StorageType STEP = STORAGE_MAX / COUNT; - for (StorageType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { - T x = FPBits(v).get_val(); - if (isnan(x) || isinf(x)) + constexpr int COUNT = 100'000; + constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast(STORAGE_MAX / COUNT), StorageType(1)); + StorageType v = 0; + for (int i = 0; i <= COUNT; ++i, v += STEP) { + FPBits xbits(v); + T x = xbits.get_val(); + if (xbits.is_inf_or_nan()) continue; ASSERT_MPFR_MATCH(mpfr::Operation::Ceil, x, func(x), 0.0); @@ -84,3 +91,5 @@ class CeilTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { TEST_F(LlvmLibcCeilTest, RoundedNubmers) { testRoundedNumbers(&func); } \ TEST_F(LlvmLibcCeilTest, Fractions) { testFractions(&func); } \ TEST_F(LlvmLibcCeilTest, Range) { testRange(&func); } + +#endif // LLVM_LIBC_TEST_SRC_MATH_CEILTEST_H diff --git a/libc/test/src/math/FloorTest.h b/libc/test/src/math/FloorTest.h index 9103a5b05eb5ad..cce0c731ebbc0d 100644 --- a/libc/test/src/math/FloorTest.h +++ b/libc/test/src/math/FloorTest.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_FLOORTEST_H #define LLVM_LIBC_TEST_SRC_MATH_FLOORTEST_H +#include "src/__support/CPP/algorithm.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" @@ -62,18 +63,21 @@ class FloorTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { EXPECT_FP_EQ(T(-11.0), func(T(-10.32))); EXPECT_FP_EQ(T(10.0), func(T(10.65))); EXPECT_FP_EQ(T(-11.0), func(T(-10.65))); - EXPECT_FP_EQ(T(1234.0), func(T(1234.38))); - EXPECT_FP_EQ(T(-1235.0), func(T(-1234.38))); - EXPECT_FP_EQ(T(1234.0), func(T(1234.96))); - EXPECT_FP_EQ(T(-1235.0), func(T(-1234.96))); + EXPECT_FP_EQ(T(123.0), func(T(123.38))); + EXPECT_FP_EQ(T(-124.0), func(T(-123.38))); + EXPECT_FP_EQ(T(123.0), func(T(123.96))); + EXPECT_FP_EQ(T(-124.0), func(T(-123.96))); } void testRange(FloorFunc func) { - constexpr StorageType COUNT = 100'000; - constexpr StorageType STEP = STORAGE_MAX / COUNT; - for (StorageType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { - T x = FPBits(v).get_val(); - if (isnan(x) || isinf(x)) + constexpr int COUNT = 100'000; + constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast(STORAGE_MAX / COUNT), StorageType(1)); + StorageType v = 0; + for (int i = 0; i <= COUNT; ++i, v += STEP) { + FPBits xbits(v); + T x = xbits.get_val(); + if (xbits.is_inf_or_nan()) continue; ASSERT_MPFR_MATCH(mpfr::Operation::Floor, x, func(x), 0.0); diff --git a/libc/test/src/math/RIntTest.h b/libc/test/src/math/RIntTest.h index 007b50427ba34a..d31bf743f1a376 100644 --- a/libc/test/src/math/RIntTest.h +++ b/libc/test/src/math/RIntTest.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_RINTTEST_H #define LLVM_LIBC_TEST_SRC_MATH_RINTTEST_H +#include "src/__support/CPP/algorithm.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "test/UnitTest/FEnvSafeTest.h" @@ -18,7 +19,6 @@ #include "hdr/fenv_macros.h" #include "hdr/math_macros.h" -#include namespace mpfr = LIBC_NAMESPACE::testing::mpfr; @@ -101,8 +101,10 @@ class RIntTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { } void testSubnormalRange(RIntFunc func) { - constexpr StorageType COUNT = 100'001; - constexpr StorageType STEP = (MAX_SUBNORMAL - MIN_SUBNORMAL) / COUNT; + constexpr int COUNT = 100'001; + constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast((MAX_SUBNORMAL - MIN_SUBNORMAL) / COUNT), + StorageType(1)); for (StorageType i = MIN_SUBNORMAL; i <= MAX_SUBNORMAL; i += STEP) { T x = FPBits(i).get_val(); for (int mode : ROUNDING_MODES) { @@ -114,15 +116,17 @@ class RIntTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { } void testNormalRange(RIntFunc func) { - constexpr StorageType COUNT = 100'001; - constexpr StorageType STEP = (MAX_NORMAL - MIN_NORMAL) / COUNT; + constexpr int COUNT = 100'001; + constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast((MAX_NORMAL - MIN_NORMAL) / COUNT), + StorageType(1)); for (StorageType i = MIN_NORMAL; i <= MAX_NORMAL; i += STEP) { - T x = FPBits(i).get_val(); + FPBits xbits(i); + T x = xbits.get_val(); // In normal range on x86 platforms, the long double implicit 1 bit can be // zero making the numbers NaN. We will skip them. - if (isnan(x)) { + if (xbits.is_nan()) continue; - } for (int mode : ROUNDING_MODES) { LIBC_NAMESPACE::fputil::set_round(mode); diff --git a/libc/test/src/math/RoundEvenTest.h b/libc/test/src/math/RoundEvenTest.h index d70555d3476591..5ecda66ccb5881 100644 --- a/libc/test/src/math/RoundEvenTest.h +++ b/libc/test/src/math/RoundEvenTest.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_ROUNDEVENTEST_H #define LLVM_LIBC_TEST_SRC_MATH_ROUNDEVENTEST_H +#include "src/__support/CPP/algorithm.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" @@ -60,22 +61,25 @@ class RoundEvenTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { EXPECT_FP_EQ(T(-2.0), func(T(-1.75))); EXPECT_FP_EQ(T(11.0), func(T(10.65))); EXPECT_FP_EQ(T(-11.0), func(T(-10.65))); - EXPECT_FP_EQ(T(1233.0), func(T(1233.25))); - EXPECT_FP_EQ(T(1234.0), func(T(1233.50))); - EXPECT_FP_EQ(T(1234.0), func(T(1233.75))); - EXPECT_FP_EQ(T(-1233.0), func(T(-1233.25))); - EXPECT_FP_EQ(T(-1234.0), func(T(-1233.50))); - EXPECT_FP_EQ(T(-1234.0), func(T(-1233.75))); - EXPECT_FP_EQ(T(1234.0), func(T(1234.50))); - EXPECT_FP_EQ(T(-1234.0), func(T(-1234.50))); + EXPECT_FP_EQ(T(123.0), func(T(123.25))); + EXPECT_FP_EQ(T(124.0), func(T(123.50))); + EXPECT_FP_EQ(T(124.0), func(T(123.75))); + EXPECT_FP_EQ(T(-123.0), func(T(-123.25))); + EXPECT_FP_EQ(T(-124.0), func(T(-123.50))); + EXPECT_FP_EQ(T(-124.0), func(T(-123.75))); + EXPECT_FP_EQ(T(124.0), func(T(124.50))); + EXPECT_FP_EQ(T(-124.0), func(T(-124.50))); } void testRange(RoundEvenFunc func) { - constexpr StorageType COUNT = 100'000; - constexpr StorageType STEP = STORAGE_MAX / COUNT; - for (StorageType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { - T x = FPBits(v).get_val(); - if (isnan(x) || isinf(x)) + constexpr int COUNT = 100'000; + constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast(STORAGE_MAX / COUNT), StorageType(1)); + StorageType v = 0; + for (int i = 0; i <= COUNT; ++i, v += STEP) { + FPBits xbits(v); + T x = xbits.get_val(); + if (xbits.is_inf_or_nan()) continue; ASSERT_MPFR_MATCH(mpfr::Operation::RoundEven, x, func(x), 0.0); diff --git a/libc/test/src/math/RoundTest.h b/libc/test/src/math/RoundTest.h index 2a31df305ac384..d571d5d8feed44 100644 --- a/libc/test/src/math/RoundTest.h +++ b/libc/test/src/math/RoundTest.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_ROUNDTEST_H #define LLVM_LIBC_TEST_SRC_MATH_ROUNDTEST_H +#include "src/__support/CPP/algorithm.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" @@ -62,18 +63,21 @@ class RoundTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { EXPECT_FP_EQ(T(-10.0), func(T(-10.32))); EXPECT_FP_EQ(T(11.0), func(T(10.65))); EXPECT_FP_EQ(T(-11.0), func(T(-10.65))); - EXPECT_FP_EQ(T(1234.0), func(T(1234.38))); - EXPECT_FP_EQ(T(-1234.0), func(T(-1234.38))); - EXPECT_FP_EQ(T(1235.0), func(T(1234.96))); - EXPECT_FP_EQ(T(-1235.0), func(T(-1234.96))); + EXPECT_FP_EQ(T(123.0), func(T(123.38))); + EXPECT_FP_EQ(T(-123.0), func(T(-123.38))); + EXPECT_FP_EQ(T(124.0), func(T(123.96))); + EXPECT_FP_EQ(T(-124.0), func(T(-123.96))); } void testRange(RoundFunc func) { - constexpr StorageType COUNT = 100'000; - constexpr StorageType STEP = STORAGE_MAX / COUNT; - for (StorageType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { - T x = FPBits(v).get_val(); - if (isnan(x) || isinf(x)) + constexpr int COUNT = 100'000; + constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast(STORAGE_MAX / COUNT), StorageType(1)); + StorageType v = 0; + for (int i = 0; i <= COUNT; ++i, v += STEP) { + FPBits xbits(v); + T x = xbits.get_val(); + if (xbits.is_inf_or_nan()) continue; ASSERT_MPFR_MATCH(mpfr::Operation::Round, x, func(x), 0.0); diff --git a/libc/test/src/math/RoundToIntegerTest.h b/libc/test/src/math/RoundToIntegerTest.h index d40e15080087c0..bb7e8643973c36 100644 --- a/libc/test/src/math/RoundToIntegerTest.h +++ b/libc/test/src/math/RoundToIntegerTest.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_ROUNDTOINTEGERTEST_H #define LLVM_LIBC_TEST_SRC_MATH_ROUNDTOINTEGERTEST_H +#include "src/__support/CPP/algorithm.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "test/UnitTest/FEnvSafeTest.h" @@ -136,10 +137,13 @@ class RoundToIntegerTestTemplate return; constexpr int EXPONENT_LIMIT = sizeof(I) * 8 - 1; + constexpr int BIASED_EXPONENT_LIMIT = EXPONENT_LIMIT + FPBits::EXP_BIAS; + if (BIASED_EXPONENT_LIMIT > FPBits::MAX_BIASED_EXPONENT) + return; // We start with 1.0 so that the implicit bit for x86 long doubles // is set. FPBits bits(F(1.0)); - bits.set_biased_exponent(EXPONENT_LIMIT + FPBits::EXP_BIAS); + bits.set_biased_exponent(BIASED_EXPONENT_LIMIT); bits.set_sign(Sign::NEG); bits.set_mantissa(0); @@ -200,10 +204,13 @@ class RoundToIntegerTestTemplate return; constexpr int EXPONENT_LIMIT = sizeof(I) * 8 - 1; + constexpr int BIASED_EXPONENT_LIMIT = EXPONENT_LIMIT + FPBits::EXP_BIAS; + if (BIASED_EXPONENT_LIMIT > FPBits::MAX_BIASED_EXPONENT) + return; // We start with 1.0 so that the implicit bit for x86 long doubles // is set. FPBits bits(F(1.0)); - bits.set_biased_exponent(EXPONENT_LIMIT + FPBits::EXP_BIAS); + bits.set_biased_exponent(BIASED_EXPONENT_LIMIT); bits.set_sign(Sign::NEG); bits.set_mantissa(FPBits::FRACTION_MASK); @@ -226,8 +233,10 @@ class RoundToIntegerTestTemplate } void testSubnormalRange(RoundToIntegerFunc func) { - constexpr StorageType COUNT = 1'000'001; - constexpr StorageType STEP = (MAX_SUBNORMAL - MIN_SUBNORMAL) / COUNT; + constexpr int COUNT = 1'000'001; + constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast((MAX_SUBNORMAL - MIN_SUBNORMAL) / COUNT), + StorageType(1)); for (StorageType i = MIN_SUBNORMAL; i <= MAX_SUBNORMAL; i += STEP) { F x = FPBits(i).get_val(); if (x == F(0.0)) @@ -268,15 +277,17 @@ class RoundToIntegerTestTemplate if (sizeof(I) > sizeof(long)) return; - constexpr StorageType COUNT = 1'000'001; - constexpr StorageType STEP = (MAX_NORMAL - MIN_NORMAL) / COUNT; + constexpr int COUNT = 1'000'001; + constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast((MAX_NORMAL - MIN_NORMAL) / COUNT), + StorageType(1)); for (StorageType i = MIN_NORMAL; i <= MAX_NORMAL; i += STEP) { - F x = FPBits(i).get_val(); + FPBits xbits(i); + F x = xbits.get_val(); // In normal range on x86 platforms, the long double implicit 1 bit can be // zero making the numbers NaN. We will skip them. - if (isnan(x)) { + if (xbits.is_nan()) continue; - } if (TestModes) { for (int m : ROUNDING_MODES) { diff --git a/libc/test/src/math/TruncTest.h b/libc/test/src/math/TruncTest.h index bc5b76131291bb..76c9740a917bfc 100644 --- a/libc/test/src/math/TruncTest.h +++ b/libc/test/src/math/TruncTest.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_TRUNCTEST_H #define LLVM_LIBC_TEST_SRC_MATH_TRUNCTEST_H +#include "src/__support/CPP/algorithm.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" @@ -62,18 +63,21 @@ class TruncTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { EXPECT_FP_EQ(T(-10.0), func(T(-10.32))); EXPECT_FP_EQ(T(10.0), func(T(10.65))); EXPECT_FP_EQ(T(-10.0), func(T(-10.65))); - EXPECT_FP_EQ(T(1234.0), func(T(1234.38))); - EXPECT_FP_EQ(T(-1234.0), func(T(-1234.38))); - EXPECT_FP_EQ(T(1234.0), func(T(1234.96))); - EXPECT_FP_EQ(T(-1234.0), func(T(-1234.96))); + EXPECT_FP_EQ(T(123.0), func(T(123.38))); + EXPECT_FP_EQ(T(-123.0), func(T(-123.38))); + EXPECT_FP_EQ(T(123.0), func(T(123.96))); + EXPECT_FP_EQ(T(-123.0), func(T(-123.96))); } void testRange(TruncFunc func) { - constexpr StorageType COUNT = 100'000; - constexpr StorageType STEP = STORAGE_MAX / COUNT; - for (StorageType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { - T x = FPBits(v).get_val(); - if (isnan(x) || isinf(x)) + constexpr int COUNT = 100'000; + constexpr StorageType STEP = LIBC_NAMESPACE::cpp::max( + static_cast(STORAGE_MAX / COUNT), StorageType(1)); + StorageType v = 0; + for (int i = 0; i <= COUNT; ++i, v += STEP) { + FPBits xbits(v); + T x = xbits.get_val(); + if (xbits.is_inf_or_nan()) continue; ASSERT_MPFR_MATCH(mpfr::Operation::Trunc, x, func(x), 0.0); diff --git a/libc/test/src/math/ceilf16_test.cpp b/libc/test/src/math/ceilf16_test.cpp new file mode 100644 index 00000000000000..a6ec922836a759 --- /dev/null +++ b/libc/test/src/math/ceilf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for ceilf16 ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CeilTest.h" + +#include "src/math/ceilf16.h" + +LIST_CEIL_TESTS(float16, LIBC_NAMESPACE::ceilf16) diff --git a/libc/test/src/math/floorf16_test.cpp b/libc/test/src/math/floorf16_test.cpp new file mode 100644 index 00000000000000..ca5160e927035e --- /dev/null +++ b/libc/test/src/math/floorf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for floorf16 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FloorTest.h" + +#include "src/math/floorf16.h" + +LIST_FLOOR_TESTS(float16, LIBC_NAMESPACE::floorf16) diff --git a/libc/test/src/math/llrintf16_test.cpp b/libc/test/src/math/llrintf16_test.cpp new file mode 100644 index 00000000000000..d16bd8f38b052b --- /dev/null +++ b/libc/test/src/math/llrintf16_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for llrintf16 -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundToIntegerTest.h" + +#include "src/math/llrintf16.h" + +LIST_ROUND_TO_INTEGER_TESTS_WITH_MODES(float16, long long, + LIBC_NAMESPACE::llrintf16) diff --git a/libc/test/src/math/llroundf16_test.cpp b/libc/test/src/math/llroundf16_test.cpp new file mode 100644 index 00000000000000..9342b24fd5d041 --- /dev/null +++ b/libc/test/src/math/llroundf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for llroundf16 ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundToIntegerTest.h" + +#include "src/math/llroundf16.h" + +LIST_ROUND_TO_INTEGER_TESTS(float16, long long, LIBC_NAMESPACE::llroundf16) diff --git a/libc/test/src/math/lrintf16_test.cpp b/libc/test/src/math/lrintf16_test.cpp new file mode 100644 index 00000000000000..28b1a1cb888d7a --- /dev/null +++ b/libc/test/src/math/lrintf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for lrintf16 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundToIntegerTest.h" + +#include "src/math/lrintf16.h" + +LIST_ROUND_TO_INTEGER_TESTS_WITH_MODES(float16, long, LIBC_NAMESPACE::lrintf16) diff --git a/libc/test/src/math/lroundf16_test.cpp b/libc/test/src/math/lroundf16_test.cpp new file mode 100644 index 00000000000000..3077134d58f916 --- /dev/null +++ b/libc/test/src/math/lroundf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for lroundf16 -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundToIntegerTest.h" + +#include "src/math/lroundf16.h" + +LIST_ROUND_TO_INTEGER_TESTS(float16, long, LIBC_NAMESPACE::lroundf16) diff --git a/libc/test/src/math/rintf16_test.cpp b/libc/test/src/math/rintf16_test.cpp new file mode 100644 index 00000000000000..2adf2560bae1f7 --- /dev/null +++ b/libc/test/src/math/rintf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for rintf16 ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RIntTest.h" + +#include "src/math/rintf16.h" + +LIST_RINT_TESTS(float16, LIBC_NAMESPACE::rintf16) diff --git a/libc/test/src/math/roundevenf16_test.cpp b/libc/test/src/math/roundevenf16_test.cpp new file mode 100644 index 00000000000000..911a32c9f73f46 --- /dev/null +++ b/libc/test/src/math/roundevenf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for roundevenf16 ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundEvenTest.h" + +#include "src/math/roundevenf16.h" + +LIST_ROUNDEVEN_TESTS(float16, LIBC_NAMESPACE::roundevenf16) diff --git a/libc/test/src/math/roundf16_test.cpp b/libc/test/src/math/roundf16_test.cpp new file mode 100644 index 00000000000000..54ead855934db4 --- /dev/null +++ b/libc/test/src/math/roundf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for roundf16 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundTest.h" + +#include "src/math/roundf16.h" + +LIST_ROUND_TESTS(float16, LIBC_NAMESPACE::roundf16) diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 84aa76c0a08810..75e2bdd7be100a 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -2608,7 +2608,6 @@ add_fp_unittest( RemQuoTest.h DEPENDS libc.src.math.remquof - libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits ) @@ -2636,7 +2635,6 @@ add_fp_unittest( RemQuoTest.h DEPENDS libc.src.math.remquo - libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits ) @@ -2650,7 +2648,19 @@ add_fp_unittest( RemQuoTest.h DEPENDS libc.src.math.remquol - libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + remquof16_test + SUITE + libc-math-smoke-tests + SRCS + remquof16_test.cpp + HDRS + RemQuoTest.h + DEPENDS + libc.src.math.remquof16 libc.src.__support.FPUtil.fp_bits ) diff --git a/libc/test/src/math/smoke/RemQuoTest.h b/libc/test/src/math/smoke/RemQuoTest.h index 43eee3d38e4495..e9263263dfb247 100644 --- a/libc/test/src/math/smoke/RemQuoTest.h +++ b/libc/test/src/math/smoke/RemQuoTest.h @@ -9,8 +9,6 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_REMQUOTEST_H #define LLVM_LIBC_TEST_SRC_MATH_REMQUOTEST_H -#include "hdr/math_macros.h" -#include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FPBits.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/smoke/remquof16_test.cpp b/libc/test/src/math/smoke/remquof16_test.cpp new file mode 100644 index 00000000000000..18f2aba71aabe3 --- /dev/null +++ b/libc/test/src/math/smoke/remquof16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for remquof16 -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RemQuoTest.h" + +#include "src/math/remquof16.h" + +LIST_REMQUO_TESTS(float16, LIBC_NAMESPACE::remquof16) diff --git a/libc/test/src/math/truncf16_test.cpp b/libc/test/src/math/truncf16_test.cpp new file mode 100644 index 00000000000000..832d88ec84f8ed --- /dev/null +++ b/libc/test/src/math/truncf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for truncf16 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TruncTest.h" + +#include "src/math/truncf16.h" + +LIST_TRUNC_TESTS(float16, LIBC_NAMESPACE::truncf16) diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt index 38488778c657c3..d3954f077a219f 100644 --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -54,6 +54,31 @@ add_libc_test( libc.src.stdlib.atoll ) +add_libc_test( + block_test + SUITE + libc-stdlib-tests + SRCS + block_test.cpp + DEPENDS + libc.src.stdlib.block + libc.src.__support.CPP.array + libc.src.__support.CPP.span + libc.src.string.memcpy +) + +add_libc_test( + freelist_test + SUITE + libc-stdlib-tests + SRCS + freelist_test.cpp + DEPENDS + libc.src.stdlib.freelist + libc.src.__support.CPP.array + libc.src.__support.CPP.span +) + add_fp_unittest( strtod_test SUITE diff --git a/libc/test/src/stdlib/block_test.cpp b/libc/test/src/stdlib/block_test.cpp new file mode 100644 index 00000000000000..0544e699cc8b25 --- /dev/null +++ b/libc/test/src/stdlib/block_test.cpp @@ -0,0 +1,570 @@ +//===-- Unittests for a block of memory -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include + +#include "src/stdlib/block.h" + +#include "src/__support/CPP/array.h" +#include "src/__support/CPP/span.h" +#include "src/string/memcpy.h" +#include "test/UnitTest/Test.h" + +// Block types. +using LargeOffsetBlock = LIBC_NAMESPACE::Block; +using SmallOffsetBlock = LIBC_NAMESPACE::Block; + +// For each of the block types above, we'd like to run the same tests since +// they should work independently of the parameter sizes. Rather than re-writing +// the same test for each case, let's instead create a custom test framework for +// each test case that invokes the actual testing function for each block type. +// +// It's organized this way because the ASSERT/EXPECT macros only work within a +// `Test` class due to those macros expanding to `test` methods. +#define TEST_FOR_EACH_BLOCK_TYPE(TestCase) \ + class LlvmLibcBlockTest##TestCase : public LIBC_NAMESPACE::testing::Test { \ + public: \ + template void RunTest(); \ + }; \ + TEST_F(LlvmLibcBlockTest##TestCase, TestCase) { \ + RunTest(); \ + RunTest(); \ + } \ + template void LlvmLibcBlockTest##TestCase::RunTest() + +using LIBC_NAMESPACE::cpp::array; +using LIBC_NAMESPACE::cpp::byte; +using LIBC_NAMESPACE::cpp::span; + +TEST_FOR_EACH_BLOCK_TYPE(CanCreateSingleAlignedBlock) { + constexpr size_t kN = 1024; + alignas(BlockType::ALIGNMENT) array bytes; + + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block = *result; + + EXPECT_EQ(block->outer_size(), kN); + EXPECT_EQ(block->inner_size(), kN - BlockType::BLOCK_OVERHEAD); + EXPECT_EQ(block->prev(), static_cast(nullptr)); + EXPECT_EQ(block->next(), static_cast(nullptr)); + EXPECT_FALSE(block->used()); + EXPECT_TRUE(block->last()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanCreateUnalignedSingleBlock) { + constexpr size_t kN = 1024; + + // Force alignment, so we can un-force it below + alignas(BlockType::ALIGNMENT) array bytes; + span aligned(bytes); + + auto result = BlockType::init(aligned.subspan(1)); + EXPECT_TRUE(result.has_value()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CannotCreateTooSmallBlock) { + array bytes; + auto result = BlockType::init(bytes); + EXPECT_FALSE(result.has_value()); +} + +// This test specifically checks that we cannot allocate a block with a size +// larger than what can be held by the offset type, we don't need to test with +// multiple block types for this particular check, so we use the normal TEST +// macro and not the custom framework. +TEST(LlvmLibcBlockTest, CannotCreateTooLargeBlock) { + using BlockType = LIBC_NAMESPACE::Block; + constexpr size_t kN = 1024; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + EXPECT_FALSE(result.has_value()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanSplitBlock) { + constexpr size_t kN = 1024; + constexpr size_t kSplitN = 512; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + auto *block1 = *result; + + result = BlockType::split(block1, kSplitN); + ASSERT_TRUE(result.has_value()); + + auto *block2 = *result; + + EXPECT_EQ(block1->inner_size(), kSplitN); + EXPECT_EQ(block1->outer_size(), kSplitN + BlockType::BLOCK_OVERHEAD); + EXPECT_FALSE(block1->last()); + + EXPECT_EQ(block2->outer_size(), kN - kSplitN - BlockType::BLOCK_OVERHEAD); + EXPECT_FALSE(block2->used()); + EXPECT_TRUE(block2->last()); + + EXPECT_EQ(block1->next(), block2); + EXPECT_EQ(block2->prev(), block1); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanSplitBlockUnaligned) { + constexpr size_t kN = 1024; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + // We should split at sizeof(BlockType) + kSplitN bytes. Then + // we need to round that up to an alignof(BlockType) boundary. + constexpr size_t kSplitN = 513; + uintptr_t split_addr = reinterpret_cast(block1) + kSplitN; + split_addr += alignof(BlockType) - (split_addr % alignof(BlockType)); + uintptr_t split_len = split_addr - (uintptr_t)&bytes; + + result = BlockType::split(block1, kSplitN); + ASSERT_TRUE(result.has_value()); + BlockType *block2 = *result; + + EXPECT_EQ(block1->inner_size(), split_len); + EXPECT_EQ(block1->outer_size(), split_len + BlockType::BLOCK_OVERHEAD); + + EXPECT_EQ(block2->outer_size(), kN - block1->outer_size()); + EXPECT_FALSE(block2->used()); + + EXPECT_EQ(block1->next(), block2); + EXPECT_EQ(block2->prev(), block1); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanSplitMidBlock) { + // split once, then split the original block again to ensure that the + // pointers get rewired properly. + // I.e. + // [[ BLOCK 1 ]] + // block1->split() + // [[ BLOCK1 ]][[ BLOCK2 ]] + // block1->split() + // [[ BLOCK1 ]][[ BLOCK3 ]][[ BLOCK2 ]] + + constexpr size_t kN = 1024; + constexpr size_t kSplit1 = 512; + constexpr size_t kSplit2 = 256; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + result = BlockType::split(block1, kSplit1); + ASSERT_TRUE(result.has_value()); + BlockType *block2 = *result; + + result = BlockType::split(block1, kSplit2); + ASSERT_TRUE(result.has_value()); + BlockType *block3 = *result; + + EXPECT_EQ(block1->next(), block3); + EXPECT_EQ(block3->prev(), block1); + EXPECT_EQ(block3->next(), block2); + EXPECT_EQ(block2->prev(), block3); +} + +TEST_FOR_EACH_BLOCK_TYPE(CannotSplitTooSmallBlock) { + constexpr size_t kN = 64; + constexpr size_t kSplitN = kN + 1; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block = *result; + + result = BlockType::split(block, kSplitN); + ASSERT_FALSE(result.has_value()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CannotSplitBlockWithoutHeaderSpace) { + constexpr size_t kN = 1024; + constexpr size_t kSplitN = kN - BlockType::BLOCK_OVERHEAD - 1; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block = *result; + + result = BlockType::split(block, kSplitN); + ASSERT_FALSE(result.has_value()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CannotSplitNull) { + BlockType *block = nullptr; + auto result = BlockType::split(block, 1); + ASSERT_FALSE(result.has_value()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CannotMakeBlockLargerInSplit) { + // Ensure that we can't ask for more space than the block actually has... + constexpr size_t kN = 1024; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block = *result; + + result = BlockType::split(block, block->inner_size() + 1); + ASSERT_FALSE(result.has_value()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CannotMakeSecondBlockLargerInSplit) { + // Ensure that the second block in split is at least of the size of header. + constexpr size_t kN = 1024; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block = *result; + + result = BlockType::split(block, block->inner_size() - + BlockType::BLOCK_OVERHEAD + 1); + ASSERT_FALSE(result.has_value()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanMakeZeroSizeFirstBlock) { + // This block does support splitting with zero payload size. + constexpr size_t kN = 1024; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block = *result; + + result = BlockType::split(block, 0); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(block->inner_size(), static_cast(0)); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanMakeZeroSizeSecondBlock) { + // Likewise, the split block can be zero-width. + constexpr size_t kN = 1024; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + result = BlockType::split(block1, + block1->inner_size() - BlockType::BLOCK_OVERHEAD); + ASSERT_TRUE(result.has_value()); + BlockType *block2 = *result; + + EXPECT_EQ(block2->inner_size(), static_cast(0)); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanMarkBlockUsed) { + constexpr size_t kN = 1024; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block = *result; + + block->mark_used(); + EXPECT_TRUE(block->used()); + + // Size should be unaffected. + EXPECT_EQ(block->outer_size(), kN); + + block->mark_free(); + EXPECT_FALSE(block->used()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CannotSplitUsedBlock) { + constexpr size_t kN = 1024; + constexpr size_t kSplitN = 512; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block = *result; + + block->mark_used(); + result = BlockType::split(block, kSplitN); + ASSERT_FALSE(result.has_value()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanMergeWithNextBlock) { + // Do the three way merge from "CanSplitMidBlock", and let's + // merge block 3 and 2 + constexpr size_t kN = 1024; + constexpr size_t kSplit1 = 512; + constexpr size_t kSplit2 = 256; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + result = BlockType::split(block1, kSplit1); + ASSERT_TRUE(result.has_value()); + + result = BlockType::split(block1, kSplit2); + ASSERT_TRUE(result.has_value()); + BlockType *block3 = *result; + + EXPECT_TRUE(BlockType::merge_next(block3)); + + EXPECT_EQ(block1->next(), block3); + EXPECT_EQ(block3->prev(), block1); + EXPECT_EQ(block1->inner_size(), kSplit2); + EXPECT_EQ(block3->outer_size(), kN - block1->outer_size()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CannotMergeWithFirstOrLastBlock) { + constexpr size_t kN = 1024; + constexpr size_t kSplitN = 512; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + // Do a split, just to check that the checks on next/prev are different... + result = BlockType::split(block1, kSplitN); + ASSERT_TRUE(result.has_value()); + BlockType *block2 = *result; + + EXPECT_FALSE(BlockType::merge_next(block2)); +} + +TEST_FOR_EACH_BLOCK_TYPE(CannotMergeNull) { + BlockType *block = nullptr; + EXPECT_FALSE(BlockType::merge_next(block)); +} + +TEST_FOR_EACH_BLOCK_TYPE(CannotMergeUsedBlock) { + constexpr size_t kN = 1024; + constexpr size_t kSplitN = 512; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block = *result; + + // Do a split, just to check that the checks on next/prev are different... + result = BlockType::split(block, kSplitN); + ASSERT_TRUE(result.has_value()); + + block->mark_used(); + EXPECT_FALSE(BlockType::merge_next(block)); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanFreeSingleBlock) { + constexpr size_t kN = 1024; + alignas(BlockType::ALIGNMENT) array bytes; + + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block = *result; + + block->mark_used(); + BlockType::free(block); + EXPECT_FALSE(block->used()); + EXPECT_EQ(block->outer_size(), kN); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanFreeBlockWithoutMerging) { + constexpr size_t kN = 1024; + constexpr size_t kSplit1 = 512; + constexpr size_t kSplit2 = 256; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + result = BlockType::split(block1, kSplit1); + ASSERT_TRUE(result.has_value()); + BlockType *block2 = *result; + + result = BlockType::split(block2, kSplit2); + ASSERT_TRUE(result.has_value()); + BlockType *block3 = *result; + + block1->mark_used(); + block2->mark_used(); + block3->mark_used(); + + BlockType::free(block2); + EXPECT_FALSE(block2->used()); + EXPECT_NE(block2->prev(), static_cast(nullptr)); + EXPECT_FALSE(block2->last()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanFreeBlockAndMergeWithPrev) { + constexpr size_t kN = 1024; + constexpr size_t kSplit1 = 512; + constexpr size_t kSplit2 = 256; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + result = BlockType::split(block1, kSplit1); + ASSERT_TRUE(result.has_value()); + BlockType *block2 = *result; + + result = BlockType::split(block2, kSplit2); + ASSERT_TRUE(result.has_value()); + BlockType *block3 = *result; + + block2->mark_used(); + block3->mark_used(); + + BlockType::free(block2); + EXPECT_FALSE(block2->used()); + EXPECT_EQ(block2->prev(), static_cast(nullptr)); + EXPECT_FALSE(block2->last()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanFreeBlockAndMergeWithNext) { + constexpr size_t kN = 1024; + constexpr size_t kSplit1 = 512; + constexpr size_t kSplit2 = 256; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + result = BlockType::split(block1, kSplit1); + ASSERT_TRUE(result.has_value()); + BlockType *block2 = *result; + + result = BlockType::split(block2, kSplit2); + ASSERT_TRUE(result.has_value()); + + block1->mark_used(); + block2->mark_used(); + + BlockType::free(block2); + EXPECT_FALSE(block2->used()); + EXPECT_NE(block2->prev(), static_cast(nullptr)); + EXPECT_TRUE(block2->last()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanFreeUsedBlockAndMergeWithBoth) { + constexpr size_t kN = 1024; + constexpr size_t kSplit1 = 512; + constexpr size_t kSplit2 = 256; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + result = BlockType::split(block1, kSplit1); + ASSERT_TRUE(result.has_value()); + BlockType *block2 = *result; + + result = BlockType::split(block2, kSplit2); + ASSERT_TRUE(result.has_value()); + + block2->mark_used(); + + BlockType::free(block2); + EXPECT_FALSE(block2->used()); + EXPECT_EQ(block2->prev(), static_cast(nullptr)); + EXPECT_TRUE(block2->last()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanCheckValidBlock) { + constexpr size_t kN = 1024; + constexpr size_t kSplit1 = 512; + constexpr size_t kSplit2 = 256; + + alignas(BlockType::ALIGNMENT) array bytes; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + result = BlockType::split(block1, kSplit1); + ASSERT_TRUE(result.has_value()); + BlockType *block2 = *result; + + result = BlockType::split(block2, kSplit2); + ASSERT_TRUE(result.has_value()); + BlockType *block3 = *result; + + EXPECT_TRUE(block1->is_valid()); + EXPECT_TRUE(block2->is_valid()); + EXPECT_TRUE(block3->is_valid()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanCheckInvalidBlock) { + constexpr size_t kN = 1024; + constexpr size_t kSplit1 = 128; + constexpr size_t kSplit2 = 384; + constexpr size_t kSplit3 = 256; + + array bytes{}; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + result = BlockType::split(block1, kSplit1); + ASSERT_TRUE(result.has_value()); + BlockType *block2 = *result; + + result = BlockType::split(block2, kSplit2); + ASSERT_TRUE(result.has_value()); + BlockType *block3 = *result; + + result = BlockType::split(block3, kSplit3); + ASSERT_TRUE(result.has_value()); + + // Corrupt a Block header. + // This must not touch memory outside the original region, or the test may + // (correctly) abort when run with address sanitizer. + // To remain as agostic to the internals of `Block` as possible, the test + // copies a smaller block's header to a larger block. + EXPECT_TRUE(block1->is_valid()); + EXPECT_TRUE(block2->is_valid()); + EXPECT_TRUE(block3->is_valid()); + auto *src = reinterpret_cast(block1); + auto *dst = reinterpret_cast(block2); + LIBC_NAMESPACE::memcpy(dst, src, sizeof(BlockType)); + EXPECT_FALSE(block1->is_valid()); + EXPECT_FALSE(block2->is_valid()); + EXPECT_FALSE(block3->is_valid()); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanGetBlockFromUsableSpace) { + constexpr size_t kN = 1024; + + array bytes{}; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + BlockType *block1 = *result; + + void *ptr = block1->usable_space(); + BlockType *block2 = BlockType::from_usable_space(ptr); + EXPECT_EQ(block1, block2); +} + +TEST_FOR_EACH_BLOCK_TYPE(CanGetConstBlockFromUsableSpace) { + constexpr size_t kN = 1024; + + array bytes{}; + auto result = BlockType::init(bytes); + ASSERT_TRUE(result.has_value()); + const BlockType *block1 = *result; + + const void *ptr = block1->usable_space(); + const BlockType *block2 = BlockType::from_usable_space(ptr); + EXPECT_EQ(block1, block2); +} diff --git a/libc/test/src/stdlib/freelist_test.cpp b/libc/test/src/stdlib/freelist_test.cpp new file mode 100644 index 00000000000000..e25c74b47b8522 --- /dev/null +++ b/libc/test/src/stdlib/freelist_test.cpp @@ -0,0 +1,166 @@ +//===-- Unittests for a freelist --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "src/__support/CPP/array.h" +#include "src/__support/CPP/span.h" +#include "src/stdlib/freelist.h" +#include "test/UnitTest/Test.h" + +using LIBC_NAMESPACE::FreeList; +using LIBC_NAMESPACE::cpp::array; +using LIBC_NAMESPACE::cpp::byte; +using LIBC_NAMESPACE::cpp::span; + +static constexpr size_t SIZE = 8; +static constexpr array example_sizes = {64, 128, 256, 512, + 1024, 2048, 4096, 8192}; + +TEST(LlvmLibcFreeList, EmptyListHasNoMembers) { + FreeList list(example_sizes); + + auto item = list.find_chunk(4); + EXPECT_EQ(item.size(), static_cast(0)); + item = list.find_chunk(128); + EXPECT_EQ(item.size(), static_cast(0)); +} + +TEST(LlvmLibcFreeList, CanRetrieveAddedMember) { + FreeList list(example_sizes); + constexpr size_t N = 512; + + byte data[N] = {byte(0)}; + + bool ok = list.add_chunk(span(data, N)); + EXPECT_TRUE(ok); + + auto item = list.find_chunk(N); + EXPECT_EQ(item.size(), N); + EXPECT_EQ(item.data(), data); +} + +TEST(LlvmLibcFreeList, CanRetrieveAddedMemberForSmallerSize) { + FreeList list(example_sizes); + constexpr size_t N = 512; + + byte data[N] = {byte(0)}; + + ASSERT_TRUE(list.add_chunk(span(data, N))); + auto item = list.find_chunk(N / 2); + EXPECT_EQ(item.size(), N); + EXPECT_EQ(item.data(), data); +} + +TEST(LlvmLibcFreeList, CanRemoveItem) { + FreeList list(example_sizes); + constexpr size_t N = 512; + + byte data[N] = {byte(0)}; + + ASSERT_TRUE(list.add_chunk(span(data, N))); + EXPECT_TRUE(list.remove_chunk(span(data, N))); + + auto item = list.find_chunk(N); + EXPECT_EQ(item.size(), static_cast(0)); +} + +TEST(LlvmLibcFreeList, FindReturnsSmallestChunk) { + FreeList list(example_sizes); + constexpr size_t kN1 = 512; + constexpr size_t kN2 = 1024; + + byte data1[kN1] = {byte(0)}; + byte data2[kN2] = {byte(0)}; + + ASSERT_TRUE(list.add_chunk(span(data1, kN1))); + ASSERT_TRUE(list.add_chunk(span(data2, kN2))); + + auto chunk = list.find_chunk(kN1 / 2); + EXPECT_EQ(chunk.size(), kN1); + EXPECT_EQ(chunk.data(), data1); + + chunk = list.find_chunk(kN1); + EXPECT_EQ(chunk.size(), kN1); + EXPECT_EQ(chunk.data(), data1); + + chunk = list.find_chunk(kN1 + 1); + EXPECT_EQ(chunk.size(), kN2); + EXPECT_EQ(chunk.data(), data2); +} + +TEST(LlvmLibcFreeList, FindReturnsCorrectChunkInSameBucket) { + // If we have two values in the same bucket, ensure that the allocation will + // pick an appropriately sized one. + FreeList list(example_sizes); + constexpr size_t kN1 = 512; + constexpr size_t kN2 = 257; + + byte data1[kN1] = {byte(0)}; + byte data2[kN2] = {byte(0)}; + + // List should now be 257 -> 512 -> NULL + ASSERT_TRUE(list.add_chunk(span(data1, kN1))); + ASSERT_TRUE(list.add_chunk(span(data2, kN2))); + + auto chunk = list.find_chunk(kN2 + 1); + EXPECT_EQ(chunk.size(), kN1); +} + +TEST(LlvmLibcFreeList, FindCanMoveUpThroughBuckets) { + // Ensure that finding a chunk will move up through buckets if no appropriate + // chunks were found in a given bucket + FreeList list(example_sizes); + constexpr size_t kN1 = 257; + constexpr size_t kN2 = 513; + + byte data1[kN1] = {byte(0)}; + byte data2[kN2] = {byte(0)}; + + // List should now be: + // bkt[3] (257 bytes up to 512 bytes) -> 257 -> NULL + // bkt[4] (513 bytes up to 1024 bytes) -> 513 -> NULL + ASSERT_TRUE(list.add_chunk(span(data1, kN1))); + ASSERT_TRUE(list.add_chunk(span(data2, kN2))); + + // Request a 300 byte chunk. This should return the 513 byte one + auto chunk = list.find_chunk(kN1 + 1); + EXPECT_EQ(chunk.size(), kN2); +} + +TEST(LlvmLibcFreeList, RemoveUnknownChunkReturnsNotFound) { + FreeList list(example_sizes); + constexpr size_t N = 512; + + byte data[N] = {byte(0)}; + byte data2[N] = {byte(0)}; + + ASSERT_TRUE(list.add_chunk(span(data, N))); + EXPECT_FALSE(list.remove_chunk(span(data2, N))); +} + +TEST(LlvmLibcFreeList, CanStoreMultipleChunksPerBucket) { + FreeList list(example_sizes); + constexpr size_t N = 512; + + byte data1[N] = {byte(0)}; + byte data2[N] = {byte(0)}; + + ASSERT_TRUE(list.add_chunk(span(data1, N))); + ASSERT_TRUE(list.add_chunk(span(data2, N))); + + auto chunk1 = list.find_chunk(N); + ASSERT_TRUE(list.remove_chunk(chunk1)); + auto chunk2 = list.find_chunk(N); + ASSERT_TRUE(list.remove_chunk(chunk2)); + + // Ordering of the chunks doesn't matter + EXPECT_TRUE(chunk1.data() != chunk2.data()); + EXPECT_TRUE(chunk1.data() == data1 || chunk1.data() == data2); + EXPECT_TRUE(chunk2.data() == data1 || chunk2.data() == data2); +} diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 18a8ac044a9bbd..6918139fa83b7c 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -12,6 +12,7 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/fpbits_str.h" +#include "src/__support/macros/properties/types.h" #include "test/UnitTest/FPMatcher.h" #include "hdr/math_macros.h" @@ -30,6 +31,12 @@ namespace mpfr { // precision compared to the floating point precision. template struct ExtraPrecision; +#ifdef LIBC_TYPES_HAS_FLOAT16 +template <> struct ExtraPrecision { + static constexpr unsigned int VALUE = 128; +}; +#endif + template <> struct ExtraPrecision { static constexpr unsigned int VALUE = 128; }; @@ -85,9 +92,16 @@ class MPFRNumber { // We use explicit EnableIf specializations to disallow implicit // conversions. Implicit conversions can potentially lead to loss of - // precision. + // precision. We exceptionally allow implicit conversions from float16 + // to float, as the MPFR API does not support float16, thus requiring + // conversion to a higher-precision format. template , int> = 0> + cpp::enable_if_t +#ifdef LIBC_TYPES_HAS_FLOAT16 + || cpp::is_same_v +#endif + , + int> = 0> explicit MPFRNumber(XType x, unsigned int precision = ExtraPrecision::VALUE, RoundingMode rounding = RoundingMode::Nearest) @@ -529,8 +543,8 @@ class MPFRNumber { // If the control reaches here, it means that this number and input are // of the same sign but different exponent. In such a case, ULP error is // calculated as sum of two parts. - thisAsT = std::abs(thisAsT); - input = std::abs(input); + thisAsT = FPBits(thisAsT).abs().get_val(); + input = FPBits(input).abs().get_val(); T min = thisAsT > input ? input : thisAsT; T max = thisAsT > input ? thisAsT : input; int minExponent = FPBits(min).get_exponent(); @@ -585,6 +599,14 @@ template <> long double MPFRNumber::as() const { return mpfr_get_ld(value, mpfr_rounding); } +#ifdef LIBC_TYPES_HAS_FLOAT16 +template <> float16 MPFRNumber::as() const { + // TODO: Either prove that this cast won't cause double-rounding errors, or + // find a better way to get a float16. + return static_cast(mpfr_get_d(value, mpfr_rounding)); +} +#endif + namespace internal { template @@ -763,6 +785,10 @@ template void explain_unary_operation_single_output_error( Operation op, double, double, double, RoundingMode); template void explain_unary_operation_single_output_error( Operation op, long double, long double, double, RoundingMode); +#ifdef LIBC_TYPES_HAS_FLOAT16 +template void explain_unary_operation_single_output_error( + Operation op, float16, float16, double, RoundingMode); +#endif template void explain_unary_operation_two_outputs_error( @@ -942,6 +968,11 @@ template bool compare_unary_operation_single_output(Operation, double, RoundingMode); template bool compare_unary_operation_single_output( Operation, long double, long double, double, RoundingMode); +#ifdef LIBC_TYPES_HAS_FLOAT16 +template bool compare_unary_operation_single_output(Operation, float16, + float16, double, + RoundingMode); +#endif template bool compare_unary_operation_two_outputs(Operation op, T input, @@ -1054,6 +1085,9 @@ template bool round_to_long(T x, long &result) { template bool round_to_long(float, long &); template bool round_to_long(double, long &); template bool round_to_long(long double, long &); +#ifdef LIBC_TYPES_HAS_FLOAT16 +template bool round_to_long(float16, long &); +#endif template bool round_to_long(T x, RoundingMode mode, long &result) { MPFRNumber mpfr(x); @@ -1063,6 +1097,9 @@ template bool round_to_long(T x, RoundingMode mode, long &result) { template bool round_to_long(float, RoundingMode, long &); template bool round_to_long(double, RoundingMode, long &); template bool round_to_long(long double, RoundingMode, long &); +#ifdef LIBC_TYPES_HAS_FLOAT16 +template bool round_to_long(float16, RoundingMode, long &); +#endif template T round(T x, RoundingMode mode) { MPFRNumber mpfr(x); @@ -1073,6 +1110,9 @@ template T round(T x, RoundingMode mode) { template float round(float, RoundingMode); template double round(double, RoundingMode); template long double round(long double, RoundingMode); +#ifdef LIBC_TYPES_HAS_FLOAT16 +template float16 round(float16, RoundingMode); +#endif } // namespace mpfr } // namespace testing diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index cbbcc74e21b443..d65b7ce782ebd2 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -277,6 +277,7 @@ set(files __chrono/year_month.h __chrono/year_month_day.h __chrono/year_month_weekday.h + __chrono/zoned_time.h __compare/common_comparison_category.h __compare/compare_partial_order_fallback.h __compare/compare_strong_order_fallback.h diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h index aa4336a2214c87..549197be80183f 100644 --- a/libcxx/include/__algorithm/simd_utils.h +++ b/libcxx/include/__algorithm/simd_utils.h @@ -11,6 +11,7 @@ #include <__algorithm/min.h> #include <__bit/bit_cast.h> +#include <__bit/countl.h> #include <__bit/countr.h> #include <__config> #include <__type_traits/is_arithmetic.h> @@ -126,8 +127,13 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_T // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876 auto __impl = [&](_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept { +# if defined(_LIBCPP_BIG_ENDIAN) + return std::min( + _Np, std::__countl_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec)))); +# else return std::min( _Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec)))); +# endif }; if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) { diff --git a/libcxx/include/__chrono/zoned_time.h b/libcxx/include/__chrono/zoned_time.h new file mode 100644 index 00000000000000..c6084426ad72b4 --- /dev/null +++ b/libcxx/include/__chrono/zoned_time.h @@ -0,0 +1,55 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html + +#ifndef _LIBCPP___CHRONO_ZONED_TIME_H +#define _LIBCPP___CHRONO_ZONED_TIME_H + +#include +// Enable the contents of the header only when libc++ was built with experimental features enabled. +#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) + +# include <__chrono/time_zone.h> +# include <__chrono/tzdb_list.h> +# include <__config> +# include <__fwd/string_view.h> + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +# if _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ + !defined(_LIBCPP_HAS_NO_LOCALIZATION) + +namespace chrono { + +template +struct zoned_traits {}; + +template <> +struct zoned_traits { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static const time_zone* default_zone() { return chrono::locate_zone("UTC"); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static const time_zone* locate_zone(string_view __name) { + return chrono::locate_zone(__name); + } +}; + +} // namespace chrono + +# endif // _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) + // && !defined(_LIBCPP_HAS_NO_LOCALIZATION) + +_LIBCPP_END_NAMESPACE_STD + +#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) + +#endif // _LIBCPP___CHRONO_ZONED_TIME_H diff --git a/libcxx/include/__format/escaped_output_table.h b/libcxx/include/__format/escaped_output_table.h index 6aa91c89defa57..f7be2dc61f21a3 100644 --- a/libcxx/include/__format/escaped_output_table.h +++ b/libcxx/include/__format/escaped_output_table.h @@ -833,7 +833,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { /// more details. /// -/// \pre The code point is a valid Unicode code point. +/// \\pre The code point is a valid Unicode code point. [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool __needs_escape(const char32_t __code_point) noexcept { // The entries in the gap at the end. diff --git a/libcxx/include/__format/width_estimation_table.h b/libcxx/include/__format/width_estimation_table.h index c9a9f6719c6102..11f61dea18d696 100644 --- a/libcxx/include/__format/width_estimation_table.h +++ b/libcxx/include/__format/width_estimation_table.h @@ -237,7 +237,7 @@ inline constexpr uint32_t __table_upper_bound = 0x0003fffd; /// Returns the estimated width of a Unicode code point. /// -/// \pre The code point is a valid Unicode code point. +/// \\pre The code point is a valid Unicode code point. [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int __estimated_width(const char32_t __code_point) noexcept { // Since __table_upper_bound contains the unshifted range do the // comparison without shifting. diff --git a/libcxx/include/chrono b/libcxx/include/chrono index 4d47dc57ba6547..c1a92595ff1f59 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -786,6 +786,9 @@ class time_zone { bool operator==(const time_zone& x, const time_zone& y) noexcept; // C++20 strong_ordering operator<=>(const time_zone& x, const time_zone& y) noexcept; // C++20 +// [time.zone.zonedtraits], class template zoned_traits +template struct zoned_traits; // C++20 + // [time.zone.leap], leap second support class leap_second { // C++20 public: @@ -959,6 +962,7 @@ constexpr chrono::year operator ""y(unsigned lo # include <__chrono/time_zone_link.h> # include <__chrono/tzdb.h> # include <__chrono/tzdb_list.h> +# include <__chrono/zoned_time.h> # endif #endif diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index e03ea989eb3719..892d2c6b4af3c9 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1159,6 +1159,7 @@ module std_private_chrono_year [system] { header "__chrono/yea module std_private_chrono_year_month [system] { header "__chrono/year_month.h" } module std_private_chrono_year_month_day [system] { header "__chrono/year_month_day.h" } module std_private_chrono_year_month_weekday [system] { header "__chrono/year_month_weekday.h" } +module std_private_chrono_zoned_time [system] { header "__chrono/zoned_time.h" } module std_private_compare_common_comparison_category [system] { header "__compare/common_comparison_category.h" } module std_private_compare_compare_partial_order_fallback [system] { header "__compare/compare_partial_order_fallback.h" } diff --git a/libcxx/modules/std/chrono.inc b/libcxx/modules/std/chrono.inc index f12c2425592e0b..87e32afbe4bdca 100644 --- a/libcxx/modules/std/chrono.inc +++ b/libcxx/modules/std/chrono.inc @@ -227,11 +227,10 @@ export namespace std { using std::chrono::choose; using std::chrono::time_zone; -# if 0 - // [time.zone.zonedtraits], class template zoned_traits using std::chrono::zoned_traits; +# if 0 // [time.zone.zonedtime], class template zoned_time using std::chrono::zoned_time; diff --git a/libcxx/test/libcxx/diagnostics/chrono.nodiscard.verify.cpp b/libcxx/test/libcxx/diagnostics/chrono.nodiscard.verify.cpp index cba7916ff2c646..6fed41bdb43edb 100644 --- a/libcxx/test/libcxx/diagnostics/chrono.nodiscard.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/chrono.nodiscard.verify.cpp @@ -72,4 +72,10 @@ void test() { leap.date(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} leap.value(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} } + + { + using t = std::chrono::zoned_traits; + t::default_zone(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + t::locate_zone(""); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + } } diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp index b278419df6eedb..7a8d096994fff0 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp @@ -19,9 +19,9 @@ #include "test_macros.h" -int main(int, char**) -{ - std::shared_timed_mutex m; +int main(int, char**) { + std::shared_timed_mutex m; + (void)m; return 0; } diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp index d13a0ad435aea8..c7f207372ac421 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp @@ -18,9 +18,9 @@ #include "test_macros.h" -int main(int, char**) -{ - std::timed_mutex m; +int main(int, char**) { + std::timed_mutex m; + (void)m; return 0; } diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp index 73e2e7a3a5ab9e..3096e031855a22 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp @@ -18,9 +18,9 @@ #include "test_macros.h" -int main(int, char**) -{ - std::recursive_timed_mutex m; +int main(int, char**) { + std::recursive_timed_mutex m; + (void)m; return 0; } diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp index 6dc15974c44843..a8c468a6c6fd4d 100644 --- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.local_time.pass.cpp @@ -11,6 +11,8 @@ // XFAIL: libcpp-has-no-experimental-tzdb // XFAIL: availability-tzdb-missing +// Times out under HWASan +// XFAIL: hwasan // diff --git a/libcxx/test/std/time/time.zone/time.zone.zonedtraits/const_time_zone_default_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.zonedtraits/const_time_zone_default_zone.pass.cpp new file mode 100644 index 00000000000000..c6527094bb6215 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.zonedtraits/const_time_zone_default_zone.pass.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-experimental-tzdb +// XFAIL: availability-tzdb-missing + +// + +// template<> struct zoned_traits; + +// static const time_zone* default_zone(); + +#include +#include + +int main(int, char**) { + std::same_as decltype(auto) tz = + std::chrono::zoned_traits::default_zone(); + assert(tz); + + // The time zone "UTC" can be a link, this means tz->name() can be something + // differently. For example, "Etc/UTC". Instead validate whether same time + // zone is returned by comparing the addresses. + const std::chrono::time_zone* expected = std::chrono::locate_zone("UTC"); + assert(tz == expected); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.zonedtraits/const_time_zone_locate_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.zonedtraits/const_time_zone_locate_zone.pass.cpp new file mode 100644 index 00000000000000..1c8184455f23cf --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.zonedtraits/const_time_zone_locate_zone.pass.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-experimental-tzdb +// XFAIL: availability-tzdb-missing + +// + +// template<> struct zoned_traits; + +// static const time_zone* locate_zone(string_view name); + +#include +#include +#include + +#include "assert_macros.h" + +static void test(std::string_view name) { + std::same_as decltype(auto) tz = + std::chrono::zoned_traits::locate_zone(name); + + const std::chrono::time_zone* expected = std::chrono::locate_zone(name); + assert(tz == expected); +} + +int main(int, char**) { + test("UTC"); + test("Europe/Berlin"); + test("Asia/Hong_Kong"); + + TEST_THROWS_TYPE(std::runtime_error, + TEST_IGNORE_NODISCARD std::chrono::zoned_traits::locate_zone( + "there_is_no_time_zone_with_this_name")); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.zonedtraits/types.compile.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.zonedtraits/types.compile.pass.cpp new file mode 100644 index 00000000000000..6e3410795c8240 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.zonedtraits/types.compile.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-experimental-tzdb +// XFAIL: availability-tzdb-missing + +// + +// template struct zoned_traits {}; +// +// A specialization for const time_zone* is provided by the implementation: +// template<> struct zoned_traits { ... } + +#include +#include + +// This test test whether non-specialized versions exhibit the expected +// behavior. (Note these specializations are not really useful.) +static_assert(std::is_trivial_v>); +static_assert(std::is_trivial_v>); +static_assert(std::is_trivial_v>); + +struct foo {}; +static_assert(std::is_empty_v>); +static_assert(std::is_trivial_v>); diff --git a/libcxx/utils/generate_escaped_output_table.py b/libcxx/utils/generate_escaped_output_table.py index c0b21f7731f64e..9dcecaa5575cdd 100755 --- a/libcxx/utils/generate_escaped_output_table.py +++ b/libcxx/utils/generate_escaped_output_table.py @@ -84,7 +84,7 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: return result -DATA_ARRAY_TEMPLATE = """ +DATA_ARRAY_TEMPLATE = r""" /// The entries of the characters to escape in format's debug string. /// /// Contains the entries for [format.string.escaped]/2.2.1.2.1 diff --git a/libcxx/utils/generate_width_estimation_table.py b/libcxx/utils/generate_width_estimation_table.py index 2fe51490449ddb..f4cce1071d1f15 100644 --- a/libcxx/utils/generate_width_estimation_table.py +++ b/libcxx/utils/generate_width_estimation_table.py @@ -99,7 +99,7 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: return result -DATA_ARRAY_TEMPLATE = """ +DATA_ARRAY_TEMPLATE = r""" /// The entries of the characters with an estimated width of 2. /// /// Contains the entries for [format.string.std]/12 diff --git a/libunwind/test/floatregister.pass.cpp b/libunwind/test/floatregister.pass.cpp index 64107e6d490b70..ce4481bdf8287e 100644 --- a/libunwind/test/floatregister.pass.cpp +++ b/libunwind/test/floatregister.pass.cpp @@ -11,20 +11,27 @@ // Basic test for float registers number are accepted. -#include #include #include #include +// Using __attribute__((section("main_func"))) is ELF specific, but then +// this entire test is marked as requiring Linux, so we should be good. +// +// We don't use dladdr() because on musl it's a no-op when statically linked. +extern char __start_main_func; +extern char __stop_main_func; + _Unwind_Reason_Code frame_handler(struct _Unwind_Context *ctx, void *arg) { (void)arg; - Dl_info info = {0, 0, 0, 0}; - // Unwind util the main is reached, above frames depend on the platform and + // Unwind until the main is reached, above frames depend on the platform and // architecture. - if (dladdr(reinterpret_cast(_Unwind_GetIP(ctx)), &info) && - info.dli_sname && !strcmp("main", info.dli_sname)) + uintptr_t ip = _Unwind_GetIP(ctx); + if (ip >= (uintptr_t)&__start_main_func && + ip < (uintptr_t)&__stop_main_func) { _Exit(0); + } return _URC_NO_REASON; } @@ -45,7 +52,7 @@ __attribute__((noinline)) void foo() { _Unwind_Backtrace(frame_handler, NULL); } -int main() { +__attribute__((section("main_func"))) int main() { foo(); return -2; } diff --git a/libunwind/test/forceunwind.pass.cpp b/libunwind/test/forceunwind.pass.cpp index db499d8bc30894..344034e1ea5f5e 100644 --- a/libunwind/test/forceunwind.pass.cpp +++ b/libunwind/test/forceunwind.pass.cpp @@ -17,7 +17,6 @@ #undef NDEBUG #include -#include #include #include #include @@ -27,6 +26,13 @@ #include #include +// Using __attribute__((section("main_func"))) is Linux specific, but then +// this entire test is marked as requiring Linux, so we should be good. +// +// We don't use dladdr() because on musl it's a no-op when statically linked. +extern char __start_main_func; +extern char __stop_main_func; + void foo(); _Unwind_Exception ex; @@ -41,14 +47,14 @@ _Unwind_Reason_Code stop(int version, _Unwind_Action actions, assert(exceptionObject == &ex); assert(stop_parameter == &foo); - Dl_info info = {0, 0, 0, 0}; - - // Unwind util the main is reached, above frames depend on the platform and + // Unwind until the main is reached, above frames depend on the platform and // architecture. - if (dladdr(reinterpret_cast(_Unwind_GetIP(context)), &info) && - info.dli_sname && !strcmp("main", info.dli_sname)) { + uintptr_t ip = _Unwind_GetIP(context); + if (ip >= (uintptr_t)&__start_main_func && + ip < (uintptr_t)&__stop_main_func) { _Exit(0); } + return _URC_NO_REASON; } @@ -66,7 +72,7 @@ __attribute__((noinline)) void foo() { _Unwind_ForcedUnwind(e, stop, (void *)&foo); } -int main() { +__attribute__((section("main_func"))) int main() { foo(); return -2; } diff --git a/libunwind/test/signal_unwind.pass.cpp b/libunwind/test/signal_unwind.pass.cpp index 954a5d4ba3db10..1c1566415a4d4b 100644 --- a/libunwind/test/signal_unwind.pass.cpp +++ b/libunwind/test/signal_unwind.pass.cpp @@ -13,9 +13,15 @@ // TODO: Figure out why this fails with Memory Sanitizer. // XFAIL: msan +// Note: this test fails on musl because: +// +// (a) musl disables emission of unwind information for its build, and +// (b) musl's signal trampolines don't include unwind information +// +// XFAIL: target={{.*}}-musl + #undef NDEBUG #include -#include #include #include #include @@ -24,16 +30,24 @@ #include #include +// Using __attribute__((section("main_func"))) is ELF specific, but then +// this entire test is marked as requiring Linux, so we should be good. +// +// We don't use dladdr() because on musl it's a no-op when statically linked. +extern char __start_main_func; +extern char __stop_main_func; + _Unwind_Reason_Code frame_handler(struct _Unwind_Context* ctx, void* arg) { (void)arg; - Dl_info info = { 0, 0, 0, 0 }; - // Unwind util the main is reached, above frames depend on the platform and + // Unwind until the main is reached, above frames depend on the platform and // architecture. - if (dladdr(reinterpret_cast(_Unwind_GetIP(ctx)), &info) && - info.dli_sname && !strcmp("main", info.dli_sname)) { + uintptr_t ip = _Unwind_GetIP(ctx); + if (ip >= (uintptr_t)&__start_main_func && + ip < (uintptr_t)&__stop_main_func) { _Exit(0); } + return _URC_NO_REASON; } @@ -43,7 +57,7 @@ void signal_handler(int signum) { _Exit(-1); } -int main(int, char**) { +__attribute__((section("main_func"))) int main(int, char **) { signal(SIGUSR1, signal_handler); kill(getpid(), SIGUSR1); return -2; diff --git a/libunwind/test/unwind_leaffunction.pass.cpp b/libunwind/test/unwind_leaffunction.pass.cpp index 112a5968247a42..98de7dc43260c2 100644 --- a/libunwind/test/unwind_leaffunction.pass.cpp +++ b/libunwind/test/unwind_leaffunction.pass.cpp @@ -13,9 +13,15 @@ // TODO: Figure out why this fails with Memory Sanitizer. // XFAIL: msan +// Note: this test fails on musl because: +// +// (a) musl disables emission of unwind information for its build, and +// (b) musl's signal trampolines don't include unwind information +// +// XFAIL: target={{.*}}-musl + #undef NDEBUG #include -#include #include #include #include @@ -24,16 +30,24 @@ #include #include +// Using __attribute__((section("main_func"))) is ELF specific, but then +// this entire test is marked as requiring Linux, so we should be good. +// +// We don't use dladdr() because on musl it's a no-op when statically linked. +extern char __start_main_func; +extern char __stop_main_func; + _Unwind_Reason_Code frame_handler(struct _Unwind_Context* ctx, void* arg) { (void)arg; - Dl_info info = { 0, 0, 0, 0 }; // Unwind until the main is reached, above frames depend on the platform and // architecture. - if (dladdr(reinterpret_cast(_Unwind_GetIP(ctx)), &info) && - info.dli_sname && !strcmp("main", info.dli_sname)) { + uintptr_t ip = _Unwind_GetIP(ctx); + if (ip >= (uintptr_t)&__start_main_func && + ip < (uintptr_t)&__stop_main_func) { _Exit(0); } + return _URC_NO_REASON; } @@ -56,7 +70,7 @@ __attribute__((noinline)) void crashing_leaf_func(int do_trap) { __builtin_trap(); } -int main(int, char**) { +__attribute__((section("main_func"))) int main(int, char **) { signal(SIGTRAP, signal_handler); signal(SIGILL, signal_handler); crashing_leaf_func(1); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index d10174681f5eb2..7d91b02b510790 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -57,7 +57,7 @@ def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">; def build_id: J<"build-id=">, HelpText<"Generate build ID note">, MetaVarName<"[fast,md5,sha1,uuid,0x]">; -def : F<"build-id">, Alias, AliasArgs<["fast"]>, HelpText<"Alias for --build-id=fast">; +def : F<"build-id">, Alias, AliasArgs<["sha1"]>, HelpText<"Alias for --build-id=sha1">; defm check_sections: B<"check-sections", "Check section addresses for overlaps (default)", diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 79ddc159a945f7..12ea6de0fc15c4 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -48,6 +48,9 @@ ELF Improvements and combine relocation sections if their relocated section group members are placed to the same output section. (`#94704 `_) +* ``--build-id`` now defaults to generating a 20-byte digest ("sha1") instead + of 8-byte ("fast"). This improves compatibility with RPM packaging tools. + (`#93943 `_) Breaking changes ---------------- diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index bdc35c08ccb8e2..f9a00b78750388 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -119,7 +119,7 @@ are calculated from the object contents. is not intended to be cryptographically secure. .It Fl -build-id Synonym for -.Fl -build-id Ns = Ns Cm fast . +.Fl -build-id Ns = Ns Cm sha1 . .It Fl -call-graph-profile-sort Ns = Ns Ar algorithm .Ar algorithm may be: diff --git a/lld/test/ELF/build-id.s b/lld/test/ELF/build-id.s index 844880937b09f5..581f600881a27c 100644 --- a/lld/test/ELF/build-id.s +++ b/lld/test/ELF/build-id.s @@ -6,11 +6,12 @@ # RUN: llvm-readobj -S %t2 | FileCheck -check-prefix=ALIGN %s # RUN: ld.lld --build-id %t -o %t2 -# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s -# RUN: ld.lld --build-id=fast %t -o %t2 -# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s # RUN: ld.lld --build-id %t -o %t2 --threads=1 -# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s + +# RUN: ld.lld --build-id=fast %t -o %t2 +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=FAST %s # RUN: ld.lld --build-id=md5 %t -o %t2 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=MD5 %s @@ -41,7 +42,7 @@ # RUN: ld.lld --build-id --build-id=none %t -o %t2 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s # RUN: ld.lld --build-id=none --build-id %t -o %t2 -# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s +# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s .globl _start _start: @@ -62,10 +63,10 @@ _start: # ALIGN-NEXT: Info: # ALIGN-NEXT: AddressAlignment: 4 -# DEFAULT: Contents of section .note.test: -# DEFAULT: Contents of section .note.gnu.build-id: -# DEFAULT-NEXT: 04000000 08000000 03000000 474e5500 ............GNU. -# DEFAULT-NEXT: 630bc2f5 a2584763 +# FAST: Contents of section .note.test: +# FAST: Contents of section .note.gnu.build-id: +# FAST-NEXT: 04000000 08000000 03000000 474e5500 ............GNU. +# FAST-NEXT: 630bc2f5 a2584763 # MD5: Contents of section .note.gnu.build-id: # MD5-NEXT: 04000000 10000000 03000000 474e5500 ............GNU. diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 4dd23bb1e4dbec..2979bf69bf762a 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -5140,12 +5140,20 @@ uint32_t ObjectFileMachO::GetDependentModules(FileSpecList &files) { case LC_LOADFVMLIB: case LC_LOAD_UPWARD_DYLIB: { uint32_t name_offset = cmd_offset + m_data.GetU32(&offset); + // For LC_LOAD_DYLIB there is an alternate encoding + // which adds a uint32_t `flags` field for `DYLD_USE_*` + // flags. This can be detected by a timestamp field with + // the `DYLIB_USE_MARKER` constant value. bool is_delayed_init = false; uint32_t use_command_marker = m_data.GetU32(&offset); if (use_command_marker == 0x1a741800 /* DYLIB_USE_MARKER */) { offset += 4; /* uint32_t current_version */ offset += 4; /* uint32_t compat_version */ uint32_t flags = m_data.GetU32(&offset); + // If this LC_LOAD_DYLIB is marked delay-init, + // don't report it as a dependent library -- it + // may be loaded in the process at some point, + // but will most likely not be load at launch. if (flags & 0x08 /* DYLIB_USE_DELAYED_INIT */) is_delayed_init = true; } diff --git a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp index 75504323b4fdf9..25cee369d7ee3d 100644 --- a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp +++ b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp @@ -92,9 +92,7 @@ bool StopInfoMachException::DeterminePtrauthFailure(ExecutionContext &exe_ctx) { Target &target = *exe_ctx.GetTargetPtr(); Process &process = *exe_ctx.GetProcessPtr(); - ABISP abi_sp = process.GetABI(); const ArchSpec &arch = target.GetArchitecture(); - assert(abi_sp && "Missing ABI info"); // Check for a ptrauth-enabled target. const bool ptrauth_enabled_target = @@ -110,6 +108,9 @@ bool StopInfoMachException::DeterminePtrauthFailure(ExecutionContext &exe_ctx) { strm.Printf("Note: Possible pointer authentication failure detected.\n"); }; + ABISP abi_sp = process.GetABI(); + assert(abi_sp && "Missing ABI info"); + // Check if we have a "brk 0xc47x" trap, where the value that failed to // authenticate is in x16. Address current_address = current_frame->GetFrameCodeAddress(); diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/Makefile b/lldb/test/API/functionalities/target-new-solib-notifications/Makefile index 99998b20bcb050..6c61d210eeb2f3 100644 --- a/lldb/test/API/functionalities/target-new-solib-notifications/Makefile +++ b/lldb/test/API/functionalities/target-new-solib-notifications/Makefile @@ -1,3 +1,23 @@ -CXX_SOURCES := main.cpp - -include Makefile.rules +CXX_SOURCES := main.cpp +LD_EXTRAS := -L. -l_d -l_c -l_a -l_b + +a.out: lib_b lib_a lib_c lib_d + +include Makefile.rules + +lib_a: lib_b + $(MAKE) -f $(MAKEFILE_RULES) \ + DYLIB_ONLY=YES DYLIB_CXX_SOURCES=a.cpp DYLIB_NAME=_a \ + LD_EXTRAS="-L. -l_b" + +lib_b: + $(MAKE) -f $(MAKEFILE_RULES) \ + DYLIB_ONLY=YES DYLIB_CXX_SOURCES=b.cpp DYLIB_NAME=_b + +lib_c: + $(MAKE) -f $(MAKEFILE_RULES) \ + DYLIB_ONLY=YES DYLIB_CXX_SOURCES=c.cpp DYLIB_NAME=_c + +lib_d: + $(MAKE) -f $(MAKEFILE_RULES) \ + DYLIB_ONLY=YES DYLIB_CXX_SOURCES=d.cpp DYLIB_NAME=_d diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/TestModuleLoadedNotifys.py b/lldb/test/API/functionalities/target-new-solib-notifications/TestModuleLoadedNotifys.py index abf761fb3420b4..47af6909b728cf 100644 --- a/lldb/test/API/functionalities/target-new-solib-notifications/TestModuleLoadedNotifys.py +++ b/lldb/test/API/functionalities/target-new-solib-notifications/TestModuleLoadedNotifys.py @@ -9,22 +9,47 @@ from lldbsuite.test import lldbutil +@skipUnlessPlatform(["linux"] + lldbplatformutil.getDarwinOSTriples()) class ModuleLoadedNotifysTestCase(TestBase): NO_DEBUG_INFO_TESTCASE = True # At least DynamicLoaderDarwin and DynamicLoaderPOSIXDYLD should batch up # notifications about newly added/removed libraries. Other DynamicLoaders may # not be written this way. - @skipUnlessPlatform(["linux"] + lldbplatformutil.getDarwinOSTriples()) def setUp(self): # Call super's setUp(). TestBase.setUp(self) # Find the line number to break inside main(). self.line = line_number("main.cpp", "// breakpoint") + def setup_test(self, solibs): + if lldb.remote_platform: + path = lldb.remote_platform.GetWorkingDirectory() + for f in solibs: + lldbutil.install_to_target(self, self.getBuildArtifact(f)) + else: + path = self.getBuildDir() + if self.dylibPath in os.environ: + sep = self.platformContext.shlib_path_separator + path = os.environ[self.dylibPath] + sep + path + self.runCmd( + "settings append target.env-vars '{}={}'".format(self.dylibPath, path) + ) + self.default_path = path + def test_launch_notifications(self): """Test that lldb broadcasts newly loaded libraries in batches.""" + + expected_solibs = [ + "lib_a." + self.platformContext.shlib_extension, + "lib_b." + self.platformContext.shlib_extension, + "lib_c." + self.platformContext.shlib_extension, + "lib_d." + self.platformContext.shlib_extension, + ] + self.build() + self.setup_test(expected_solibs) + exe = self.getBuildArtifact("a.out") self.dbg.SetAsync(False) @@ -70,6 +95,8 @@ def test_launch_notifications(self): total_modules_added_events = 0 total_modules_removed_events = 0 already_loaded_modules = [] + max_solibs_per_event = 0 + max_solib_chunk_per_event = [] while listener.GetNextEvent(event): if lldb.SBTarget.EventIsTargetEvent(event): if event.GetType() == lldb.SBTarget.eBroadcastBitModulesLoaded: @@ -91,12 +118,17 @@ def test_launch_notifications(self): "{} is already loaded".format(module), ) already_loaded_modules.append(module) - if self.TraceOn(): - added_files.append(module.GetFileSpec().GetFilename()) + added_files.append(module.GetFileSpec().GetFilename()) if self.TraceOn(): # print all of the binaries that have been added print("Loaded files: %s" % (", ".join(added_files))) + # We will check the latest biggest chunk of loaded solibs. + # We expect all of our solibs in the last chunk of loaded modules. + if solib_count >= max_solibs_per_event: + max_solib_chunk_per_event = added_files.copy() + max_solibs_per_event = solib_count + if event.GetType() == lldb.SBTarget.eBroadcastBitModulesUnloaded: solib_count = lldb.SBTarget.GetNumModulesFromEvent(event) total_modules_removed_events += 1 @@ -115,9 +147,7 @@ def test_launch_notifications(self): # binaries in batches. Check that we got back more than 1 solib per event. # In practice on Darwin today, we get back two events for a do-nothing c # program: a.out and dyld, and then all the rest of the system libraries. - # On Linux we get events for ld.so, [vdso], the binary and then all libraries. - - avg_solibs_added_per_event = round( - float(total_solibs_added) / float(total_modules_added_events) - ) - self.assertGreater(avg_solibs_added_per_event, 1) + # On Linux we get events for ld.so, [vdso], the binary and then all libraries, + # but the different configurations could load a different number of .so modules + # per event. + self.assertLessEqual(set(expected_solibs), set(max_solib_chunk_per_event)) diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp new file mode 100644 index 00000000000000..778b46ed5cef1a --- /dev/null +++ b/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp @@ -0,0 +1,3 @@ +extern "C" int b_function(); + +extern "C" int a_function() { return b_function(); } diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp new file mode 100644 index 00000000000000..4f1a4032ee0eed --- /dev/null +++ b/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp @@ -0,0 +1 @@ +extern "C" int b_function() { return 500; } diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp new file mode 100644 index 00000000000000..8abd1b155a7590 --- /dev/null +++ b/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp @@ -0,0 +1 @@ +extern "C" int c_function() { return 600; } diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp new file mode 100644 index 00000000000000..58888a29ba323a --- /dev/null +++ b/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp @@ -0,0 +1 @@ +extern "C" int d_function() { return 700; } diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp index 00130c93b8863e..77b38c5ccdc698 100644 --- a/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp +++ b/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp @@ -1,6 +1,16 @@ -#include -int main () -{ - puts("running"); // breakpoint here - return 0; -} +#include + +extern "C" int a_function(); +extern "C" int c_function(); +extern "C" int b_function(); +extern "C" int d_function(); + +int main() { + a_function(); + b_function(); + c_function(); + d_function(); + + puts("running"); // breakpoint here + return 0; +} diff --git a/lldb/test/API/macosx/delay-init-dependency/Makefile b/lldb/test/API/macosx/delay-init-dependency/Makefile new file mode 100644 index 00000000000000..246ea0f34e1a1c --- /dev/null +++ b/lldb/test/API/macosx/delay-init-dependency/Makefile @@ -0,0 +1,11 @@ +C_SOURCES := main.c +LD_EXTRAS := -L. -Wl,-delay_library,libfoo.dylib + +.PHONY: build-libfoo +all: build-libfoo a.out + +include Makefile.rules + +build-libfoo: foo.c + $(MAKE) -f $(MAKEFILE_RULES) \ + DYLIB_C_SOURCES=foo.c DYLIB_NAME=foo DYLIB_ONLY=YES diff --git a/lldb/test/API/macosx/delay-init-dependency/TestDelayInitDependency.py b/lldb/test/API/macosx/delay-init-dependency/TestDelayInitDependency.py new file mode 100644 index 00000000000000..44ed2b1d21f186 --- /dev/null +++ b/lldb/test/API/macosx/delay-init-dependency/TestDelayInitDependency.py @@ -0,0 +1,62 @@ +"""Test binaries with delay-init dependencies.""" + +import subprocess +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestDelayInitDependencies(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + @skipUnlessDarwin + def test_delay_init_dependency(self): + TestBase.setUp(self) + out = subprocess.run( + ["xcrun", "ld", "-delay_library"], + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if "delay_library missing" not in out.stderr: + self.skipTest( + "Skipped because the linker doesn't know about -delay_library" + ) + self.build() + main_source = "main.c" + exe = self.getBuildArtifact("a.out") + lib = self.getBuildArtifact("libfoo.dylib") + + target = self.dbg.CreateTarget(exe) + self.assertTrue(target, VALID_TARGET) + + # libfoo.dylib should not be in the target pre-execution + for m in target.modules: + self.assertNotEqual(m.GetFileSpec().GetFilename(), "libfoo.dylib") + + # This run without arguments will not load libfoo.dylib + li = lldb.SBLaunchInfo([]) + li.SetWorkingDirectory(self.getBuildDir()) + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.c"), li + ) + for m in target.modules: + self.assertNotEqual(m.GetFileSpec().GetFilename(), "libfoo.dylib") + + process.Kill() + self.dbg.DeleteTarget(target) + + # This run with one argument will load libfoo.dylib + li = lldb.SBLaunchInfo([]) + li.SetWorkingDirectory(self.getBuildDir()) + li.SetArguments(["one-argument"], True) + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( + self, "// break here", lldb.SBFileSpec("main.c"), li + ) + + found_libfoo = False + for m in target.modules: + if m.GetFileSpec().GetFilename() == "libfoo.dylib": + found_libfoo = True + self.assertTrue(found_libfoo) diff --git a/lldb/test/API/macosx/delay-init-dependency/foo.c b/lldb/test/API/macosx/delay-init-dependency/foo.c new file mode 100644 index 00000000000000..de1cbc4c4648a1 --- /dev/null +++ b/lldb/test/API/macosx/delay-init-dependency/foo.c @@ -0,0 +1 @@ +int foo() { return 5; } diff --git a/lldb/test/API/macosx/delay-init-dependency/main.c b/lldb/test/API/macosx/delay-init-dependency/main.c new file mode 100644 index 00000000000000..57d251e6b2abe4 --- /dev/null +++ b/lldb/test/API/macosx/delay-init-dependency/main.c @@ -0,0 +1,9 @@ +int foo(); +int main(int argc, char **argv) { + int retval = 0; + // Only call foo() if one argument is passed + if (argc == 2) + retval = foo(); + + return retval; // break here +} diff --git a/lldb/unittests/UnwindAssembly/x86/Testx86AssemblyInspectionEngine.cpp b/lldb/unittests/UnwindAssembly/x86/Testx86AssemblyInspectionEngine.cpp index 277cc14ce50c9f..597e5b2e40d5e0 100644 --- a/lldb/unittests/UnwindAssembly/x86/Testx86AssemblyInspectionEngine.cpp +++ b/lldb/unittests/UnwindAssembly/x86/Testx86AssemblyInspectionEngine.cpp @@ -1731,7 +1731,29 @@ TEST_F(Testx86AssemblyInspectionEngine, TestAddESP) { EXPECT_EQ(4 - 16, row_sp->GetCFAValue().GetOffset()); } -// FIXME add test for lea_rsp_pattern_p +TEST_F(Testx86AssemblyInspectionEngine, TestLEA_RSP_Pattern) { + UnwindPlan::Row::RegisterLocation regloc; + UnwindPlan::RowSP row_sp; + AddressRange sample_range; + UnwindPlan unwind_plan(eRegisterKindLLDB); + std::unique_ptr engine = Getx86_64Inspector(); + + uint8_t data[] = { + 0x8d, 0x64, 0x24, 0x10, // lea rsp, [rsp + 0x10] + 0x90 // nop + }; + + sample_range = AddressRange(0x1000, sizeof(data)); + + EXPECT_TRUE(engine->GetNonCallSiteUnwindPlanFromAssembly( + data, sizeof(data), sample_range, unwind_plan)); + + row_sp = unwind_plan.GetRowForFunctionOffset(0); + EXPECT_EQ(0ull, row_sp->GetOffset()); + EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_rsp); + EXPECT_TRUE(row_sp->GetCFAValue().IsRegisterPlusOffset() == true); + EXPECT_EQ(8, row_sp->GetCFAValue().GetOffset()); +} TEST_F(Testx86AssemblyInspectionEngine, TestPopRBX) { UnwindPlan::Row::RegisterLocation regloc; diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index aa50ce329d1dea..b7ec1b51ee247e 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -5980,6 +5980,33 @@ following sections: * :ref:`amdgpu-amdhsa-memory-model-gfx942` * :ref:`amdgpu-amdhsa-memory-model-gfx10-gfx11` +.. _amdgpu-fence-as: + +Fence and Address Spaces +++++++++++++++++++++++++++++++ + +LLVM fences do not have address space information, thus, fence +codegen usually needs to conservatively synchronize all address spaces. + +In the case of OpenCL, where fences only need to synchronize +user-specified address spaces, this can result in extra unnecessary waits. +For instance, a fence that is supposed to only synchronize local memory will +also have to wait on all global memory operations, which is unnecessary. + +:doc:`Memory Model Relaxation Annotations ` can +be used as an optimization hint for fences to solve this problem. +The AMDGPU backend recognizes the following tags on fences: + +- ``amdgpu-as:local`` - fence only the local address space +- ``amdgpu-as:global``- fence only the global address space + +.. note:: + + As an optimization hint, those tags are not guaranteed to survive until + code generation. Optimizations are free to drop the tags to allow for + better code optimization, at the cost of synchronizing additional address + spaces. + .. _amdgpu-amdhsa-memory-model-gfx6-gfx9: Memory Model GFX6-GFX9 @@ -6317,21 +6344,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table`. - If OpenCL and address space is not generic, omit. - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Must happen after any preceding local/generic load @@ -6363,14 +6378,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table`. address space is not generic, omit lgkmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -6573,21 +6583,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table`. - If OpenCL and address space is not generic, omit. - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Must happen after any preceding local/generic @@ -6623,21 +6621,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table`. address space is local, omit vmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -6967,14 +6953,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table`. address space is not generic, omit lgkmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -7915,21 +7896,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. address space is local, omit vmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - s_waitcnt vmcnt(0) must happen after any preceding @@ -7988,14 +7957,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. address space is not generic, omit lgkmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -8066,14 +8030,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. address space is not generic, omit lgkmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -8441,21 +8400,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. address space is local, omit vmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - s_waitcnt vmcnt(0) must happen after any preceding @@ -8501,21 +8448,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. address space is local, omit vmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -8583,21 +8518,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. address space is local, omit vmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -9218,14 +9141,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. address space is not generic, omit lgkmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -9327,14 +9245,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. address space is not generic, omit lgkmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -10290,21 +10203,9 @@ are defined in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx940-gfx9 address space is local, omit vmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - s_waitcnt vmcnt(0) must happen after any preceding @@ -10363,14 +10264,9 @@ are defined in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx940-gfx9 address space is not generic, omit lgkmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -10441,14 +10337,9 @@ are defined in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx940-gfx9 address space is not generic, omit lgkmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -10847,21 +10738,9 @@ are defined in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx940-gfx9 address space is local, omit vmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - s_waitcnt vmcnt(0) must happen after any preceding @@ -10920,21 +10799,9 @@ are defined in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx940-gfx9 address space is local, omit vmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -10999,21 +10866,9 @@ are defined in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx940-gfx9 address space is local, omit vmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -11662,14 +11517,9 @@ are defined in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx940-gfx9 address space is not generic, omit lgkmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -11771,14 +11621,9 @@ are defined in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx940-gfx9 address space is not generic, omit lgkmcnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0) and @@ -12624,21 +12469,9 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. address space is local, omit vmcnt(0) and vscnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0), s_waitcnt @@ -12721,14 +12554,9 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. address space is local, omit vmcnt(0) and vscnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0), s_waitcnt @@ -13092,21 +12920,9 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. address space is local, omit vmcnt(0) and vscnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0), s_waitcnt @@ -13165,21 +12981,9 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. address space is local, omit vmcnt(0) and vscnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate. If - fence had an - address space then - set to address - space of OpenCL - fence flag, or to - generic if both - local and global - flags are - specified. + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0), s_waitcnt @@ -13731,14 +13535,9 @@ table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-gfx11-table`. address space is local, omit vmcnt(0) and vscnt(0). - - However, since LLVM - currently has no - address space on - the fence need to - conservatively - always generate - (see comment for - previous fence). + - See :ref:`amdgpu-fence-as` for + more details on fencing specific + address spaces. - Could be split into separate s_waitcnt vmcnt(0), s_waitcnt diff --git a/llvm/docs/CompileCudaWithLLVM.rst b/llvm/docs/CompileCudaWithLLVM.rst index 631691ef9b472a..0371d7a3bdfcb5 100644 --- a/llvm/docs/CompileCudaWithLLVM.rst +++ b/llvm/docs/CompileCudaWithLLVM.rst @@ -418,6 +418,17 @@ the compiler chooses to inline ``host_only``. Member functions, including constructors, may be overloaded using H and D attributes. However, destructors cannot be overloaded. +Clang Warnings for Host and Device Function Declarations +-------------------------------------------------------- + +Clang can emit warnings when it detects that host (H) and device (D) functions are declared or defined with the same signature. These warnings are not enabled by default. + +To enable these warnings, use the following compiler flag: + +.. code-block:: console + + -Wnvcc-compat + Using a Different Class on Host/Device -------------------------------------- diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 9fb2c048a5c869..c11a6627d81d31 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -26244,6 +26244,42 @@ same values as the libm ``cos`` functions would, and handles error conditions in the same way. +'``llvm.experimental.constrained.tan``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.tan( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.tan``' intrinsic returns the tangent of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the tangent of the specified operand, returning the +same values as the libm ``tan`` functions would, and handles error +conditions in the same way. + + '``llvm.experimental.constrained.exp``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 35115e67ecf924..ef06f80c747f94 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -85,6 +85,7 @@ on support follow. Extension Status ================ ================================================================= ``A`` Supported + ``B`` Supported ``C`` Supported ``D`` Supported ``F`` Supported diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 8cdb9db087c778..00e2969ee3543b 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -152,6 +152,7 @@ Changes to the RISC-V Backend * Zaamo and Zalrsc are no longer experimental. * Processors that enable post reg-alloc scheduling (PostMachineScheduler) by default should use the `UsePostRAScheduler` subtarget feature. Setting `PostRAScheduler = 1` in the scheduler model will have no effect on the enabling of the PostMachineScheduler. * Zabha is no longer experimental. +* B (the collection of the Zba, Zbb, Zbs extensions) is supported. Changes to the WebAssembly Backend ---------------------------------- @@ -225,13 +226,6 @@ Changes to the Metadata Info Changes to the Debug Info --------------------------------- -* LLVM has switched from using debug intrinsics internally to using debug - records by default. This should happen transparently when using the DIBuilder - to construct debug variable information, but will require changes for any code - that interacts with debug intrinsics directly. Debug intrinsics will only be - supported on a best-effort basis from here onwards; for more information, see - the `migration docs `_. - Changes to the LLVM tools --------------------------------- * llvm-nm and llvm-objdump can now print symbol information from linked diff --git a/llvm/docs/RemoveDIsDebugInfo.md b/llvm/docs/RemoveDIsDebugInfo.md index ef11c8ed8b4335..56634f7ccc6bda 100644 --- a/llvm/docs/RemoveDIsDebugInfo.md +++ b/llvm/docs/RemoveDIsDebugInfo.md @@ -34,6 +34,107 @@ The second matter is that if you transfer sequences of instructions from one pla For a more in-depth overview of how to update existing code to support debug records, see [the guide below](#how-to-update-existing-code). +## Textual IR Changes + +As we change from using debug intrinsics to debug records, any tools that depend on parsing IR produced by LLVM will need to handle the new format. For the most part, the difference between the printed form of a debug intrinsic call and a debug record is trivial: + +1. An extra 2 spaces of indentation are added. +2. The text `(tail|notail|musttail)? call void @llvm.dbg.` is replaced with `#dbg_`. +3. The leading `metadata ` is removed from each argument to the intrinsic. +4. The DILocation changes from being an instruction attachment with the format `!dbg !`, to being an ordinary argument, i.e. `!`, that is passed as the final argument to the debug record. + +Following these rules, we have this example of a debug intrinsic and the equivalent debug record: + +``` +; Debug Intrinsic: + call void @llvm.dbg.value(metadata i32 %add, metadata !10, metadata !DIExpression()), !dbg !20 +; Debug Record: + #dbg_value(i32 %add, !10, !DIExpression(), !20) +``` + +### Test updates + +Any tests downstream of the main LLVM repo that test the IR output of LLVM may break as a result of the change to using records. Updating an individual test to expect records instead of intrinsics should be trivial, given the update rules above. Updating many tests may be burdensome however; to update the lit tests in the main repository, the following steps were used: + +1. Collect the list of failing lit tests into a single file, `failing-tests.txt`, separated by (and ending with) newlines. +2. Use the following line to split the failing tests into tests that use update_test_checks and tests that don't: + ``` + $ while IFS= read -r f; do grep -q "Assertions have been autogenerated by" "$f" && echo "$f" >> update-checks-tests.txt || echo "$f" >> manual-tests.txt; done < failing-tests.txt + ``` +3. For the tests that use update_test_checks, run the appropriate update_test_checks script - for the main LLVM repo, this was achieved with: + ``` + $ xargs ./llvm/utils/update_test_checks.py --opt-binary ./build/bin/opt < update-checks-tests.txt + $ xargs ./llvm/utils/update_cc_test_checks.py --llvm-bin ./build/bin/ < update-checks-tests.txt + ``` +4. The remaining tests can be manually updated, although if there is a large number of tests then the following scripts may be useful; firstly, a script used to extract the check-line prefixes from a file: + ``` + $ cat ./get-checks.sh + #!/bin/bash + + # Always add CHECK, since it's more effort than it's worth to filter files where + # every RUN line uses other check prefixes. + # Then detect every instance of "check-prefix(es)=..." and add the + # comma-separated arguments as extra checks. + for filename in "$@" + do + echo "$filename,CHECK" + allchecks=$(grep -Eo 'check-prefix(es)?[ =][A-Z0-9_,-]+' $filename | sed -E 's/.+[= ]([A-Z0-9_,-]+).*/\1/g; s/,/\n/g') + for check in $allchecks; do + echo "$filename,$check" + done + done + ``` + Then a second script to perform the work of actually updating the check-lines in each of the failing tests, with a series of simple substitution patterns: + ``` + $ cat ./substitute-checks.sh + #!/bin/bash + + file="$1" + check="$2" + + # Any test that explicitly tests debug intrinsic output is not suitable to + # update by this script. + if grep -q "write-experimental-debuginfo=false" "$file"; then + exit 0 + fi + + sed -i -E -e " + /(#|;|\/\/).*$check[A-Z0-9_\-]*:/!b + /DIGlobalVariableExpression/b + /!llvm.dbg./bpostcall + s/((((((no|must)?tail )?call.*)?void )?@)?llvm.)?dbg\.([a-z]+)/#dbg_\7/ + :postcall + /declare #dbg_/d + s/metadata //g + s/metadata\{/{/g + s/DIExpression\(([^)]*)\)\)(,( !dbg)?)?/DIExpression(\1),/ + /#dbg_/!b + s/((\))?(,) )?!dbg (![0-9]+)/\3\4\2/ + s/((\))?(, ))?!dbg/\3/ + " "$file" + ``` + Both of these scripts combined can be used on the list in `manual-tests.txt` as follows: + ``` + $ cat manual-tests.txt | xargs ./get-checks.sh | sort | uniq | awk -F ',' '{ system("./substitute-checks.sh " $1 " " $2) }' + ``` + These scripts dealt successfully with the vast majority of checks in `clang/test` and `llvm/test`. +5. Verify the resulting tests pass, and detect any failing tests: + ``` + $ xargs ./build/bin/llvm-lit -q < failing-tests.txt + ******************** + Failed Tests (5): + LLVM :: DebugInfo/Generic/dbg-value-lower-linenos.ll + LLVM :: Transforms/HotColdSplit/transfer-debug-info.ll + LLVM :: Transforms/ObjCARC/basic.ll + LLVM :: Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll + LLVM :: Transforms/SafeStack/X86/debug-loc2.ll + + + Total Discovered Tests: 295 + Failed: 5 (1.69%) + ``` +6. Some tests may have failed - the update scripts are simplistic and preserve no context across lines, and so there are cases that they will not handle; the remaining cases must be manually updated (or handled by further scripts). + # C-API changes Some new functions that have been added are temporary and will be deprecated in the future. The intention is that they'll help downstream projects adapt during the transition period. diff --git a/llvm/docs/TestSuiteGuide.md b/llvm/docs/TestSuiteGuide.md index 85623da080ed84..9d43e1a345ffea 100644 --- a/llvm/docs/TestSuiteGuide.md +++ b/llvm/docs/TestSuiteGuide.md @@ -71,6 +71,9 @@ MicroBenchmarks/XRay microbenchmarks, you need to add `compiler-rt` to your PASS: test-suite :: MultiSource/Applications/ALAC/encode/alacconvert-encode.test (2 of 474) ... ``` +**NOTE!** even in the case you only want to get the compile-time results(code size, llvm stats etc), +you need to run the test with the above `llvm-lit` command. In that case, the *results.json* file will +contain compile-time metrics. 6. Show and compare result files (optional): diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst index 89499d8b937d5c..c35e58bc53b671 100644 --- a/llvm/docs/TestingGuide.rst +++ b/llvm/docs/TestingGuide.rst @@ -741,16 +741,16 @@ RUN lines: Expands to the path separator, i.e. ``:`` (or ``;`` on Windows). -``${fs-src-root}`` +``%{fs-src-root}`` Expands to the root component of file system paths for the source directory, i.e. ``/`` on Unix systems or ``C:\`` (or another drive) on Windows. -``${fs-tmp-root}`` +``%{fs-tmp-root}`` Expands to the root component of file system paths for the test's temporary directory, i.e. ``/`` on Unix systems or ``C:\`` (or another drive) on Windows. -``${fs-sep}`` +``%{fs-sep}`` Expands to the file system separator, i.e. ``/`` or ``\`` on Windows. ``%/s, %/S, %/t, %/T`` diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 78faadb30d9eb5..a9bb6cc9999b1e 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -189,6 +189,14 @@ struct APFloatBase { // improved range compared to half (16-bit) formats, at (potentially) // greater throughput than single precision (32-bit) formats. S_FloatTF32, + // 6-bit floating point number with bit layout S1E3M2. Unlike IEEE-754 + // types, there are no infinity or NaN values. The format is detailed in + // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf + S_Float6E3M2FN, + // 6-bit floating point number with bit layout S1E2M3. Unlike IEEE-754 + // types, there are no infinity or NaN values. The format is detailed in + // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf + S_Float6E2M3FN, S_x87DoubleExtended, S_MaxSemantics = S_x87DoubleExtended, @@ -209,6 +217,8 @@ struct APFloatBase { static const fltSemantics &Float8E4M3FNUZ() LLVM_READNONE; static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE; static const fltSemantics &FloatTF32() LLVM_READNONE; + static const fltSemantics &Float6E3M2FN() LLVM_READNONE; + static const fltSemantics &Float6E2M3FN() LLVM_READNONE; static const fltSemantics &x87DoubleExtended() LLVM_READNONE; /// A Pseudo fltsemantic used to construct APFloats that cannot conflict with @@ -627,6 +637,8 @@ class IEEEFloat final : public APFloatBase { APInt convertFloat8E4M3FNUZAPFloatToAPInt() const; APInt convertFloat8E4M3B11FNUZAPFloatToAPInt() const; APInt convertFloatTF32APFloatToAPInt() const; + APInt convertFloat6E3M2FNAPFloatToAPInt() const; + APInt convertFloat6E2M3FNAPFloatToAPInt() const; void initFromAPInt(const fltSemantics *Sem, const APInt &api); template void initFromIEEEAPInt(const APInt &api); void initFromHalfAPInt(const APInt &api); @@ -642,6 +654,8 @@ class IEEEFloat final : public APFloatBase { void initFromFloat8E4M3FNUZAPInt(const APInt &api); void initFromFloat8E4M3B11FNUZAPInt(const APInt &api); void initFromFloatTF32APInt(const APInt &api); + void initFromFloat6E3M2FNAPInt(const APInt &api); + void initFromFloat6E2M3FNAPInt(const APInt &api); void assign(const IEEEFloat &); void copySignificand(const IEEEFloat &); @@ -1046,6 +1060,17 @@ class APFloat : public APFloatBase { /// \param Semantics - type float semantics static APFloat getAllOnesValue(const fltSemantics &Semantics); + static bool hasNanOrInf(const fltSemantics &Sem) { + switch (SemanticsToEnum(Sem)) { + default: + return true; + // Below Semantics do not support {NaN or Inf} + case APFloat::S_Float6E3M2FN: + case APFloat::S_Float6E2M3FN: + return false; + } + } + /// Used to insert APFloat objects, or objects that contain APFloat objects, /// into FoldingSets. void Profile(FoldingSetNodeID &NID) const; diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index e687254f6c4c70..b2dcdfad0a04b4 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -337,6 +337,7 @@ namespace llvm { // Top-Level Entities bool parseTopLevelEntities(); + bool finalizeDebugInfoFormat(Module *M); void dropUnknownMetadataReferences(); bool validateEndOfModule(bool UpgradeDebugInfo); bool validateEndOfIndex(); diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 9999aee61528e5..39dcd209afdc69 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -754,6 +754,7 @@ enum AttributeKindCodes { ATTR_KIND_CORO_ONLY_DESTROY_WHEN_COMPLETE = 90, ATTR_KIND_DEAD_ON_UNWIND = 91, ATTR_KIND_RANGE = 92, + ATTR_KIND_SANITIZE_NUMERICAL_STABILITY = 93, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 81c3e4be95e9ff..011f8c6534b6a7 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -38,6 +38,7 @@ class BasicBlock; class BlockAddress; class Constant; class ConstantArray; +class ConstantPtrAuth; class DataLayout; class DIE; class DIEAbbrev; @@ -585,6 +586,10 @@ class AsmPrinter : public MachineFunctionPass { emitGlobalConstant(DL, CV); } + virtual const MCExpr *lowerConstantPtrAuth(const ConstantPtrAuth &CPA) { + report_fatal_error("ptrauth constant lowering not implemented"); + } + /// Return true if the basic block has exactly one predecessor and the control /// transfer mechanism between the predecessor and this block is a /// fall-through. diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 2dcd7805b6c96b..dbb658940eef12 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -61,7 +61,7 @@ class DWARFFormValue { const DWARFUnit *U = nullptr; /// Remember the DWARFUnit at extract time. const DWARFContext *C = nullptr; /// Context for extract time. - DWARFFormValue(dwarf::Form F, ValueType V) : Form(F), Value(V) {} + DWARFFormValue(dwarf::Form F, const ValueType &V) : Form(F), Value(V) {} public: DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F) {} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index e54ec4f2b1d72d..bff49dab4a313d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -198,7 +198,7 @@ struct TargetRegionEntryInfo { unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count); - bool operator<(const TargetRegionEntryInfo RHS) const { + bool operator<(const TargetRegionEntryInfo &RHS) const { return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, RHS.Count); diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index cef8b17769f0d0..772c7579aec6d8 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -285,6 +285,9 @@ def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress", [FnAttr]>; /// MemTagSanitizer is on. def SanitizeMemTag : EnumAttr<"sanitize_memtag", [FnAttr]>; +/// NumericalStabilitySanitizer is on. +def SanitizeNumericalStability : EnumAttr<"sanitize_numerical_stability", [FnAttr]>; + /// Speculative Load Hardening is enabled. /// /// Note that this uses the default compatibility (always compatible during @@ -372,6 +375,7 @@ def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; +def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def index 41aa44de957f93..a7b37c5cb204da 100644 --- a/llvm/include/llvm/IR/ConstrainedOps.def +++ b/llvm/include/llvm/IR/ConstrainedOps.def @@ -95,6 +95,7 @@ DAG_FUNCTION(round, 1, 0, experimental_constrained_round, FROUND) DAG_FUNCTION(roundeven, 1, 0, experimental_constrained_roundeven, FROUNDEVEN) DAG_FUNCTION(sin, 1, 1, experimental_constrained_sin, FSIN) DAG_FUNCTION(sqrt, 1, 1, experimental_constrained_sqrt, FSQRT) +DAG_FUNCTION(tan, 1, 1, experimental_constrained_tan, FTAN) DAG_FUNCTION(trunc, 1, 0, experimental_constrained_trunc, FTRUNC) // This is definition for fmuladd intrinsic function, that is converted into diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 107442623ab7bd..4c506a6ace23ea 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1218,6 +1218,10 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_tan : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_pow : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 4c4e7351212f8a..2da154c300344c 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -144,14 +144,14 @@ class RISCVVIntrinsic { let TargetPrefix = "riscv" in { // We use anyint here but we only support XLen. - def int_riscv_vsetvli : Intrinsic<[llvm_anyint_ty], + def int_riscv_vsetvli : DefaultAttrsIntrinsic<[llvm_anyint_ty], /* AVL */ [LLVMMatchType<0>, /* VSEW */ LLVMMatchType<0>, /* VLMUL */ LLVMMatchType<0>], [IntrNoMem, ImmArg>, ImmArg>]>; - def int_riscv_vsetvlimax : Intrinsic<[llvm_anyint_ty], + def int_riscv_vsetvlimax : DefaultAttrsIntrinsic<[llvm_anyint_ty], /* VSEW */ [LLVMMatchType<0>, /* VLMUL */ LLVMMatchType<0>], [IntrNoMem, @@ -669,7 +669,7 @@ let TargetPrefix = "riscv" in { // The destination vector type is the same as first source vector. // Input: (passthru, vector_in, vector_in/scalar_in, vxrm, vl) class RISCVSaturatingBinaryAAXUnMaskedRoundingMode - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty, LLVMMatchType<2>], [ImmArg>, IntrNoMem]>, RISCVVIntrinsic { @@ -692,7 +692,7 @@ let TargetPrefix = "riscv" in { // The destination vector type is the same as first source vector. // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vxrm, vl, policy) class RISCVSaturatingBinaryAAXMaskedRoundingMode - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, LLVMMatchType<2>, LLVMMatchType<2>], diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index b0ac432a065bf0..72eae85467dc9d 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -252,31 +252,6 @@ class MCContext { /// A collection of MCPseudoProbe in the current module MCPseudoProbeTable PseudoProbeTable; - // Sections are differentiated by the quadruple (section_name, group_name, - // unique_id, link_to_symbol_name). Sections sharing the same quadruple are - // combined into one section. - struct ELFSectionKey { - std::string SectionName; - StringRef GroupName; - StringRef LinkedToName; - unsigned UniqueID; - - ELFSectionKey(StringRef SectionName, StringRef GroupName, - StringRef LinkedToName, unsigned UniqueID) - : SectionName(SectionName), GroupName(GroupName), - LinkedToName(LinkedToName), UniqueID(UniqueID) {} - - bool operator<(const ELFSectionKey &Other) const { - if (SectionName != Other.SectionName) - return SectionName < Other.SectionName; - if (GroupName != Other.GroupName) - return GroupName < Other.GroupName; - if (int O = LinkedToName.compare(Other.LinkedToName)) - return O < 0; - return UniqueID < Other.UniqueID; - } - }; - struct COFFSectionKey { std::string SectionName; StringRef GroupName; @@ -350,8 +325,8 @@ class MCContext { }; StringMap MachOUniquingMap; - std::map ELFUniquingMap; std::map COFFUniquingMap; + StringMap ELFUniquingMap; std::map GOFFUniquingMap; std::map WasmUniquingMap; std::map XCOFFUniquingMap; @@ -371,7 +346,7 @@ class MCContext { std::function); MCSymbol *createSymbolImpl(const StringMapEntry *Name, - bool CanBeUnnamed); + bool IsTemporary); MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix, bool IsTemporary); diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index 90effde5bb670a..217b9b4b5bc52b 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -63,9 +63,6 @@ class MCSection { using const_iterator = FragmentListType::const_iterator; using iterator = FragmentListType::iterator; - using const_reverse_iterator = FragmentListType::const_reverse_iterator; - using reverse_iterator = FragmentListType::reverse_iterator; - private: MCSymbol *Begin; MCSymbol *End = nullptr; @@ -192,6 +189,7 @@ class MCSection { iterator end() { return Fragments.end(); } const_iterator end() const { return Fragments.end(); } + bool empty() const { return Fragments.empty(); } void addFragment(MCFragment &F) { Fragments.push_back(&F); } diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 9de8ba5b8bf512..84d1b541171bf1 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -171,7 +171,7 @@ class PreservedCFGCheckerInstrumentation { FunctionAnalysisManager::Invalidator &); }; -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS SmallVector PassStack; #endif diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 53ddfd19234108..a9f7d7c3fda885 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -988,7 +988,7 @@ computeFrameHistogram(llvm::MapVector> // // The radix tree allows us to reconstruct call stacks in the leaf-to-root // order as we scan the array from left ro right while following pointers to -// parents along the way +// parents along the way. // // For example, if we are decoding CallStackId 2, we start a forward traversal // at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h index b9d398fee2b74e..29b5f9159c68ec 100644 --- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -638,7 +638,7 @@ struct SemiNCAInfo { Bucket; SmallDenseSet Visited; SmallVector Affected; -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS SmallVector VisitedUnaffected; #endif }; @@ -913,7 +913,7 @@ struct SemiNCAInfo { LLVM_DEBUG(dbgs() << "Deleting edge " << BlockNamePrinter(From) << " -> " << BlockNamePrinter(To) << "\n"); -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS // Ensure that the edge was in fact deleted from the CFG before informing // the DomTree about it. // The check is O(N), so run it only in debug configuration. diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index f0e4ee534ece38..adb5ba6e3dcc2f 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -635,6 +635,9 @@ std::enable_if_t, T> SubOverflow(T X, T Y, T &Result) { /// result, returning true if an overflow ocurred. template std::enable_if_t, T> MulOverflow(T X, T Y, T &Result) { +#if __has_builtin(__builtin_mul_overflow) + return __builtin_mul_overflow(X, Y, &Result); +#else // Perform the unsigned multiplication on absolute values. using U = std::make_unsigned_t; const U UX = X < 0 ? (0 - static_cast(X)) : static_cast(X); @@ -656,6 +659,7 @@ std::enable_if_t, T> MulOverflow(T X, T Y, T &Result) { return UX > (static_cast(std::numeric_limits::max()) + U(1)) / UY; else return UX > (static_cast(std::numeric_limits::max())) / UY; +#endif } } // namespace llvm diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index c6010d745b46ed..b7c1216d16e5ea 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -132,6 +132,48 @@ struct ExtensionInfo { #define EMIT_EXTENSIONS #include "llvm/TargetParser/AArch64TargetParserDef.inc" +struct ExtensionSet { + // Set of extensions which are currently enabled. + ExtensionBitset Enabled; + // Set of extensions which have been enabled or disabled at any point. Used + // to avoid cluttering the cc1 command-line with lots of unneeded features. + ExtensionBitset Touched; + // Base architecture version, which we need to know because some feature + // dependencies change depending on this. + const ArchInfo *BaseArch; + + ExtensionSet() : Enabled(), Touched(), BaseArch(nullptr) {} + + // Enable the given architecture extension, and any other extensions it + // depends on. Does not change the base architecture, or follow dependencies + // between features which are only related by required arcitecture versions. + void enable(ArchExtKind E); + + // Disable the given architecture extension, and any other extensions which + // depend on it. Does not change the base architecture, or follow + // dependencies between features which are only related by required + // arcitecture versions. + void disable(ArchExtKind E); + + // Add default extensions for the given CPU. Records the base architecture, + // to later resolve dependencies which depend on it. + void addCPUDefaults(const CpuInfo &CPU); + + // Add default extensions for the given architecture version. Records the + // base architecture, to later resolve dependencies which depend on it. + void addArchDefaults(const ArchInfo &Arch); + + // Add or remove a feature based on a modifier string. The string must be of + // the form "" to enable a feature or "no" to disable it. This + // will also enable or disable any features as required by the dependencies + // between them. + bool parseModifier(StringRef Modifier); + + // Convert the set of enabled extension to an LLVM feature list, appending + // them to Features. + void toLLVMFeatureList(std::vector &Features) const; +}; + // Represents a dependency between two architecture extensions. Later is the // feature which was added to the architecture after Earlier, and expands the // functionality provided by it. If Later is enabled, then Earlier will also be @@ -438,7 +480,7 @@ inline constexpr CpuInfo CpuInfos[] = { AArch64::ExtensionBitset({AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_SHA3, AArch64::AEK_FP16, AArch64::AEK_FP16FML})}, - {"apple-a14", ARMV8_5A, + {"apple-a14", ARMV8_4A, AArch64::ExtensionBitset({AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_SHA3, AArch64::AEK_FP16, AArch64::AEK_FP16FML})}, @@ -455,7 +497,7 @@ inline constexpr CpuInfo CpuInfos[] = { AArch64::AEK_SHA3, AArch64::AEK_FP16, AArch64::AEK_FP16FML})}, - {"apple-m1", ARMV8_5A, + {"apple-m1", ARMV8_4A, AArch64::ExtensionBitset({AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_SHA3, AArch64::AEK_FP16, AArch64::AEK_FP16FML})}, @@ -542,65 +584,6 @@ inline constexpr CpuInfo CpuInfos[] = { AArch64::AEK_PROFILE}))}, }; -struct ExtensionSet { - // Set of extensions which are currently enabled. - ExtensionBitset Enabled; - // Set of extensions which have been enabled or disabled at any point. Used - // to avoid cluttering the cc1 command-line with lots of unneeded features. - ExtensionBitset Touched; - // Base architecture version, which we need to know because some feature - // dependencies change depending on this. - const ArchInfo *BaseArch; - - ExtensionSet() : Enabled(), Touched(), BaseArch(nullptr) {} - - // Enable the given architecture extension, and any other extensions it - // depends on. Does not change the base architecture, or follow dependencies - // between features which are only related by required arcitecture versions. - void enable(ArchExtKind E); - - // Disable the given architecture extension, and any other extensions which - // depend on it. Does not change the base architecture, or follow - // dependencies between features which are only related by required - // arcitecture versions. - void disable(ArchExtKind E); - - // Add default extensions for the given CPU. Records the base architecture, - // to later resolve dependencies which depend on it. - void addCPUDefaults(const CpuInfo &CPU); - - // Add default extensions for the given architecture version. Records the - // base architecture, to later resolve dependencies which depend on it. - void addArchDefaults(const ArchInfo &Arch); - - // Add or remove a feature based on a modifier string. The string must be of - // the form "" to enable a feature or "no" to disable it. This - // will also enable or disable any features as required by the dependencies - // between them. - bool parseModifier(StringRef Modifier, const bool AllowNoDashForm = false); - - // Constructs a new ExtensionSet by toggling the corresponding bits for every - // feature in the \p Features list without expanding their dependencies. Used - // for reconstructing an ExtensionSet from the output of toLLVMFeatures(). - void reconstructFromParsedFeatures(const std::vector &Features); - - // Convert the set of enabled extension to an LLVM feature list, appending - // them to Features. - template void toLLVMFeatureList(std::vector &Features) const { - if (BaseArch && !BaseArch->ArchFeature.empty()) - Features.emplace_back(T(BaseArch->ArchFeature)); - - for (const auto &E : Extensions) { - if (E.Feature.empty() || !Touched.test(E.ID)) - continue; - if (Enabled.test(E.ID)) - Features.emplace_back(T(E.Feature)); - else - Features.emplace_back(T(E.NegFeature)); - } - } -}; - // Name alias. struct Alias { StringRef AltName; @@ -624,13 +607,7 @@ const ArchInfo *getArchForCpu(StringRef CPU); // Parser const ArchInfo *parseArch(StringRef Arch); - -// Return the extension which has the given -target-feature name. -std::optional targetFeatureToExtension(StringRef TargetFeature); - -// Parse a name as defined by the Extension class in tablegen. std::optional parseArchExtension(StringRef Extension); - // Given the name of a CPU or alias, return the correponding CpuInfo. std::optional parseCpu(StringRef Name); // Used by target parser tests diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index 8daa8a689c95f5..f10cede0734e44 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -250,8 +250,8 @@ X86_FEATURE_COMPAT(SHA512, "sha512", 0) X86_FEATURE_COMPAT(SM4, "sm4", 0) X86_FEATURE (EGPR, "egpr") X86_FEATURE_COMPAT(USERMSR, "usermsr", 0) -X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 0) -X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 0) +X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 36) +X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37) // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches") diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 290e5a1cc337f3..65d43775bdc1d1 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -88,7 +88,7 @@ class JumpThreadingPass : public PassInfoMixin { std::optional BPI; bool ChangedSinceLastAnalysisUpdate = false; bool HasGuards = false; -#if !LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifndef LLVM_ENABLE_ABI_BREAKING_CHECKS SmallPtrSet LoopHeaders; #else SmallSet, 16> LoopHeaders; diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index 227f6dfb8362cb..6aab1f98e67816 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -286,7 +286,7 @@ class LPMUpdater { } void setParentLoop(Loop *L) { -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS ParentL = L; #endif } @@ -377,7 +377,7 @@ class LPMUpdater { const bool LoopNestMode; bool LoopNestChanged; -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS // In debug builds we also track the parent loop to implement asserts even in // the face of loop deletion. Loop *ParentL; diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 468b50092efcf7..62c1e15a9a60e1 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -167,7 +167,7 @@ class SCEVExpander : public SCEVVisitor { /// consistent when instructions are moved. SmallVector InsertPointGuards; -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS const char *DebugType; #endif @@ -183,7 +183,7 @@ class SCEVExpander : public SCEVVisitor { Builder(se.getContext(), InstSimplifyFolder(DL), IRBuilderCallbackInserter( [this](Instruction *I) { rememberInstruction(I); })) { -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS DebugType = ""; #endif } @@ -193,7 +193,7 @@ class SCEVExpander : public SCEVVisitor { assert(InsertPointGuards.empty()); } -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS void setDebugType(const char *s) { DebugType = s; } #endif diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index eb1e3e494a42f4..f0fde9ae4df5c3 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -74,6 +74,23 @@ static std::string getTypeString(Type *T) { return Tmp.str(); } +// Whatever debug info format we parsed, we should convert to the expected debug +// info format immediately afterwards. +bool LLParser::finalizeDebugInfoFormat(Module *M) { + // We should have already returned an error if we observed both intrinsics and + // records in this IR. + assert(!(SeenNewDbgInfoFormat && SeenOldDbgInfoFormat) && + "Mixed debug intrinsics/records seen without a parsing error?"); + if (PreserveInputDbgFormat == cl::boolOrDefault::BOU_TRUE) { + UseNewDbgInfoFormat = SeenNewDbgInfoFormat; + WriteNewDbgInfoFormatToBitcode = SeenNewDbgInfoFormat; + WriteNewDbgInfoFormat = SeenNewDbgInfoFormat; + } else if (M) { + M->setIsNewDbgInfoFormat(false); + } + return false; +} + /// Run: module ::= toplevelentity* bool LLParser::Run(bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback) { @@ -91,7 +108,7 @@ bool LLParser::Run(bool UpgradeDebugInfo, } return parseTopLevelEntities() || validateEndOfModule(UpgradeDebugInfo) || - validateEndOfIndex(); + validateEndOfIndex() || finalizeDebugInfoFormat(M); } bool LLParser::parseStandaloneConstantValue(Constant *&C, @@ -190,18 +207,6 @@ void LLParser::dropUnknownMetadataReferences() { bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { if (!M) return false; - - // We should have already returned an error if we observed both intrinsics and - // records in this IR. - assert(!(SeenNewDbgInfoFormat && SeenOldDbgInfoFormat) && - "Mixed debug intrinsics/records seen without a parsing error?"); - if (PreserveInputDbgFormat == cl::boolOrDefault::BOU_TRUE) { - UseNewDbgInfoFormat = SeenNewDbgInfoFormat; - WriteNewDbgInfoFormatToBitcode = SeenNewDbgInfoFormat; - WriteNewDbgInfoFormat = SeenNewDbgInfoFormat; - M->setNewDbgInfoFormatFlag(SeenNewDbgInfoFormat); - } - // Handle any function attribute group forward references. for (const auto &RAG : ForwardRefAttrGroups) { Value *V = RAG.first; @@ -434,9 +439,6 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { UpgradeModuleFlags(*M); UpgradeSectionAttributes(*M); - if (PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE) - M->setIsNewDbgInfoFormat(UseNewDbgInfoFormat); - if (!Slots) return false; // Initialize the slot mapping. diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 026595bdc63405..c667913005cd80 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2128,6 +2128,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::SanitizeThread; case bitc::ATTR_KIND_SANITIZE_MEMORY: return Attribute::SanitizeMemory; + case bitc::ATTR_KIND_SANITIZE_NUMERICAL_STABILITY: + return Attribute::SanitizeNumericalStability; case bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING: return Attribute::SpeculativeLoadHardening; case bitc::ATTR_KIND_SWIFT_ERROR: @@ -4355,7 +4357,7 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, if (PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE) { TheModule->IsNewDbgInfoFormat = UseNewDbgInfoFormat && - LoadBitcodeIntoNewDbgInfoFormat != cl::boolOrDefault::BOU_FALSE; + LoadBitcodeIntoNewDbgInfoFormat == cl::boolOrDefault::BOU_TRUE; } this->ValueTypeCallback = std::move(Callbacks.ValueType); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 35ea3c11396e7e..b08d5c50e5ae3e 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -828,6 +828,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_SANITIZE_THREAD; case Attribute::SanitizeMemory: return bitc::ATTR_KIND_SANITIZE_MEMORY; + case Attribute::SanitizeNumericalStability: + return bitc::ATTR_KIND_SANITIZE_NUMERICAL_STABILITY; case Attribute::SpeculativeLoadHardening: return bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING; case Attribute::SwiftError: diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index e8bab26907b7e1..2943b270cd5df1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3181,6 +3181,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { if (const ConstantInt *CI = dyn_cast(CV)) return MCConstantExpr::create(CI->getZExtValue(), Ctx); + if (const ConstantPtrAuth *CPA = dyn_cast(CV)) + return lowerConstantPtrAuth(*CPA); + if (const GlobalValue *GV = dyn_cast(CV)) return MCSymbolRefExpr::create(getSymbol(GV), Ctx); diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 86eb259c090152..9cc6d9b9fa715f 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -223,8 +223,8 @@ namespace { void HoistPostRA(MachineInstr *MI, unsigned Def, MachineLoop *CurLoop, MachineBasicBlock *CurPreheader); - void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, - BitVector &PhysRegClobbers, SmallSet &StoredFIs, + void ProcessMI(MachineInstr *MI, BitVector &RUDefs, BitVector &RUClobbers, + SmallSet &StoredFIs, SmallVectorImpl &Candidates, MachineLoop *CurLoop); @@ -423,10 +423,47 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { return false; } +static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI, + BitVector &RUs, + const uint32_t *Mask) { + // Iterate over the RegMask raw to avoid constructing a BitVector, which is + // expensive as it implies dynamically allocating memory. + // + // We also work backwards. + const unsigned NumRegs = TRI.getNumRegs(); + const unsigned MaskWords = (NumRegs + 31) / 32; + for (unsigned K = 0; K < MaskWords; ++K) { + // We want to set the bits that aren't in RegMask, so flip it. + uint32_t Word = ~Mask[K]; + + // Iterate all set bits, starting from the right. + while (Word) { + const unsigned SetBitIdx = countr_zero(Word); + + // The bits are numbered from the LSB in each word. + const unsigned PhysReg = (K * 32) + SetBitIdx; + + // Clear the bit at SetBitIdx. Doing it this way appears to generate less + // instructions on x86. This works because negating a number will flip all + // the bits after SetBitIdx. So (Word & -Word) == (1 << SetBitIdx), but + // faster. + Word ^= Word & -Word; + + if (PhysReg == NumRegs) + return; + + if (PhysReg) { + for (MCRegUnitIterator RUI(PhysReg, &TRI); RUI.isValid(); ++RUI) + RUs.set(*RUI); + } + } + } +} + /// Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. -void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, - BitVector &PhysRegClobbers, +void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &RUDefs, + BitVector &RUClobbers, SmallSet &StoredFIs, SmallVectorImpl &Candidates, MachineLoop *CurLoop) { @@ -448,7 +485,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, // We can't hoist an instruction defining a physreg that is clobbered in // the loop. if (MO.isRegMask()) { - PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); + applyBitsNotInRegMaskToRegUnitsMask(*TRI, RUClobbers, MO.getRegMask()); continue; } @@ -460,16 +497,22 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, assert(Reg.isPhysical() && "Not expecting virtual register!"); if (!MO.isDef()) { - if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) - // If it's using a non-loop-invariant register, then it's obviously not - // safe to hoist. - HasNonInvariantUse = true; + if (!HasNonInvariantUse) { + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { + // If it's using a non-loop-invariant register, then it's obviously + // not safe to hoist. + if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) { + HasNonInvariantUse = true; + break; + } + } + } continue; } if (MO.isImplicit()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - PhysRegClobbers.set(*AI); + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + RUClobbers.set(*RUI); if (!MO.isDead()) // Non-dead implicit def? This cannot be hoisted. RuledOut = true; @@ -488,19 +531,18 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, // If we have already seen another instruction that defines the same // register, then this is not safe. Two defs is indicated by setting a // PhysRegClobbers bit. - for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { - if (PhysRegDefs.test(*AS)) - PhysRegClobbers.set(*AS); + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { + if (RUDefs.test(*RUI)) { + RUClobbers.set(*RUI); + RuledOut = true; + } else if (RUClobbers.test(*RUI)) { + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; + } + + RUDefs.set(*RUI); } - // Need a second loop because MCRegAliasIterator can visit the same - // register twice. - for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) - PhysRegDefs.set(*AS); - - if (PhysRegClobbers.test(Reg)) - // MI defined register is seen defined by another instruction in - // the loop, it cannot be a LICM candidate. - RuledOut = true; } // Only consider reloads for now and remats which do not have register @@ -521,9 +563,9 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop, if (!Preheader) return; - unsigned NumRegs = TRI->getNumRegs(); - BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. - BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. + unsigned NumRegUnits = TRI->getNumRegUnits(); + BitVector RUDefs(NumRegUnits); // RUs defined once in the loop. + BitVector RUClobbers(NumRegUnits); // RUs defined more than once. SmallVector Candidates; SmallSet StoredFIs; @@ -540,22 +582,21 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop, // FIXME: That means a reload that're reused in successor block(s) will not // be LICM'ed. for (const auto &LI : BB->liveins()) { - for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) - PhysRegDefs.set(*AI); + for (MCRegUnitIterator RUI(LI.PhysReg, TRI); RUI.isValid(); ++RUI) + RUDefs.set(*RUI); } // Funclet entry blocks will clobber all registers if (const uint32_t *Mask = BB->getBeginClobberMask(TRI)) - PhysRegClobbers.setBitsNotInMask(Mask); + applyBitsNotInRegMaskToRegUnitsMask(*TRI, RUClobbers, Mask); SpeculationState = SpeculateUnknown; for (MachineInstr &MI : *BB) - ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates, - CurLoop); + ProcessMI(&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop); } // Gather the registers read / clobbered by the terminator. - BitVector TermRegs(NumRegs); + BitVector TermRUs(NumRegUnits); MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); if (TI != Preheader->end()) { for (const MachineOperand &MO : TI->operands()) { @@ -564,8 +605,8 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop, Register Reg = MO.getReg(); if (!Reg) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - TermRegs.set(*AI); + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + TermRUs.set(*RUI); } } @@ -583,24 +624,36 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop, continue; unsigned Def = Candidate.Def; - if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { - bool Safe = true; - MachineInstr *MI = Candidate.MI; - for (const MachineOperand &MO : MI->all_uses()) { - if (!MO.getReg()) - continue; - Register Reg = MO.getReg(); - if (PhysRegDefs.test(Reg) || - PhysRegClobbers.test(Reg)) { + bool Safe = true; + for (MCRegUnitIterator RUI(Def, TRI); RUI.isValid(); ++RUI) { + if (RUClobbers.test(*RUI) || TermRUs.test(*RUI)) { + Safe = false; + break; + } + } + + if (!Safe) + continue; + + MachineInstr *MI = Candidate.MI; + for (const MachineOperand &MO : MI->all_uses()) { + if (!MO.getReg()) + continue; + for (MCRegUnitIterator RUI(MO.getReg(), TRI); RUI.isValid(); ++RUI) { + if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) { // If it's using a non-loop-invariant register, then it's obviously // not safe to hoist. Safe = false; break; } } - if (Safe) - HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader); + + if (!Safe) + break; } + + if (Safe) + HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader); } } diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h index 1941643bba9e66..06cf0828ea79b3 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -425,7 +425,7 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass, ZeroCostFoldedReloads || Copies); } - void add(RAGreedyStats other) { + void add(const RAGreedyStats &other) { Reloads += other.Reloads; FoldedReloads += other.FoldedReloads; ZeroCostFoldedReloads += other.ZeroCostFoldedReloads; diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index 9fdc8a338b52a5..eb7a113b575f75 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -64,6 +65,7 @@ namespace { MachineFrameInfo *MFI = nullptr; const TargetInstrInfo *TII = nullptr; const MachineBlockFrequencyInfo *MBFI = nullptr; + SlotIndexes *Indexes = nullptr; // SSIntervals - Spill slot intervals. std::vector SSIntervals; @@ -152,6 +154,14 @@ namespace { AU.addRequired(); AU.addPreserved(); AU.addPreservedID(MachineDominatorsID); + + // In some Target's pipeline, register allocation (RA) might be + // split into multiple phases based on register class. So, this pass + // may be invoked multiple times requiring it to save these analyses to be + // used by RA later. + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); } @@ -496,8 +506,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { ++I; } - for (MachineInstr *MI : toErase) + for (MachineInstr *MI : toErase) { + if (Indexes) + Indexes->removeMachineInstrFromMaps(*MI); MI->eraseFromParent(); + } return changed; } @@ -515,6 +528,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); LS = &getAnalysis(); MBFI = &getAnalysis(); + Indexes = &getAnalysis(); bool Changed = false; diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index aea9425ebebaab..29f2cbf611fa3a 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -181,7 +181,7 @@ template class llvm::SymbolTableListTraits(getRawLocation())) || - (getNumVariableLocationOps() == 0 && !getExpression()->isComplex()) || + return (getNumVariableLocationOps() == 0 && + !getExpression()->isComplex()) || any_of(location_ops(), [](Value *V) { return isa(V); }); } diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 9360e6d7d274c8..3f735020e87402 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -83,8 +83,6 @@ static cl::opt NonGlobalValueMaxNameSize( "non-global-value-max-name-size", cl::Hidden, cl::init(1024), cl::desc("Maximum size for the name of non-global values.")); -extern cl::opt UseNewDbgInfoFormat; - void Function::convertToNewDbgValues() { IsNewDbgInfoFormat = true; for (auto &BB : *this) { @@ -443,7 +441,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, : GlobalObject(Ty, Value::FunctionVal, OperandTraits::op_begin(this), 0, Linkage, name, computeAddrSpace(AddrSpace, ParentModule)), - NumArgs(Ty->getNumParams()), IsNewDbgInfoFormat(UseNewDbgInfoFormat) { + NumArgs(Ty->getNumParams()), IsNewDbgInfoFormat(false) { assert(FunctionType::isValidReturnType(getReturnType()) && "invalid return type"); setGlobalObjectSubClassData(0); diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 55c282cb25e793..f97dd18c736c51 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -54,8 +54,6 @@ using namespace llvm; -extern cl::opt UseNewDbgInfoFormat; - //===----------------------------------------------------------------------===// // Methods to implement the globals and functions lists. // @@ -74,7 +72,7 @@ template class llvm::SymbolTableListTraits; Module::Module(StringRef MID, LLVMContext &C) : Context(C), ValSymTab(std::make_unique(-1)), ModuleID(std::string(MID)), SourceFileName(std::string(MID)), DL(""), - IsNewDbgInfoFormat(UseNewDbgInfoFormat) { + IsNewDbgInfoFormat(false) { Context.addModule(this); } diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index a7157e7a37b439..8490853eda87c2 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -820,7 +820,7 @@ void MCAssembler::layout(MCAsmLayout &Layout) { for (MCSection &Sec : *this) { // Create dummy fragments to eliminate any empty sections, this simplifies // layout. - if (Sec.getFragmentList().empty()) + if (Sec.empty()) new MCDataFragment(&Sec); Sec.setOrdinal(SectionIndex++); diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index 771ca9c6006ca0..15900547179602 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -44,6 +44,7 @@ #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -264,13 +265,9 @@ MCSymbol *MCContext::createSymbolImpl(const StringMapEntry *Name, } MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix, - bool CanBeUnnamed) { - if (CanBeUnnamed && !UseNamesOnTempLabels) - return createSymbolImpl(nullptr, true); - + bool IsTemporary) { // Determine whether this is a user written assembler temporary or normal // label, if used. - bool IsTemporary = CanBeUnnamed; if (AllowTemporaryLabels && !IsTemporary) IsTemporary = Name.starts_with(MAI->getPrivateGlobalPrefix()); @@ -298,6 +295,9 @@ MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix, } MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix) { + if (!UseNamesOnTempLabels) + return createSymbolImpl(nullptr, /*IsTemporary=*/true); + SmallString<128> NameSV; raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name; return createSymbol(NameSV, AlwaysAddSuffix, true); @@ -548,16 +548,42 @@ MCSectionELF *MCContext::getELFSection(const Twine &Section, unsigned Type, if (GroupSym) Group = GroupSym->getName(); assert(!(LinkedToSym && LinkedToSym->getName().empty())); - // Do the lookup, if we have a hit, return it. - auto IterBool = ELFUniquingMap.insert(std::make_pair( - ELFSectionKey{Section.str(), Group, - LinkedToSym ? LinkedToSym->getName() : "", UniqueID}, - nullptr)); - auto &Entry = *IterBool.first; - if (!IterBool.second) - return Entry.second; - StringRef CachedName = Entry.first.SectionName; + // Sections are differentiated by the quadruple (section_name, group_name, + // unique_id, link_to_symbol_name). Sections sharing the same quadruple are + // combined into one section. As an optimization, non-unique sections without + // group or linked-to symbol have a shorter unique-ing key. + std::pair::iterator, bool> EntryNewPair; + // Length of the section name, which are the first SectionLen bytes of the key + unsigned SectionLen; + if (GroupSym || LinkedToSym || UniqueID != MCSection::NonUniqueID) { + SmallString<128> Buffer; + Section.toVector(Buffer); + SectionLen = Buffer.size(); + Buffer.push_back(0); // separator which cannot occur in the name + if (GroupSym) + Buffer.append(GroupSym->getName()); + Buffer.push_back(0); // separator which cannot occur in the name + if (LinkedToSym) + Buffer.append(LinkedToSym->getName()); + support::endian::write(Buffer, UniqueID, endianness::native); + StringRef UniqueMapKey = StringRef(Buffer); + EntryNewPair = ELFUniquingMap.insert(std::make_pair(UniqueMapKey, nullptr)); + } else if (!Section.isSingleStringRef()) { + SmallString<128> Buffer; + StringRef UniqueMapKey = Section.toStringRef(Buffer); + SectionLen = UniqueMapKey.size(); + EntryNewPair = ELFUniquingMap.insert(std::make_pair(UniqueMapKey, nullptr)); + } else { + StringRef UniqueMapKey = Section.getSingleStringRef(); + SectionLen = UniqueMapKey.size(); + EntryNewPair = ELFUniquingMap.insert(std::make_pair(UniqueMapKey, nullptr)); + } + + if (!EntryNewPair.second) + return EntryNewPair.first->second; + + StringRef CachedName = EntryNewPair.first->getKey().take_front(SectionLen); SectionKind Kind; if (Flags & ELF::SHF_ARM_PURECODE) @@ -601,7 +627,7 @@ MCSectionELF *MCContext::getELFSection(const Twine &Section, unsigned Type, MCSectionELF *Result = createELFSectionImpl(CachedName, Type, Flags, Kind, EntrySize, GroupSym, IsComdat, UniqueID, LinkedToSym); - Entry.second = Result; + EntryNewPair.first->second = Result; recordELFMergeableSectionInfo(Result->getName(), Result->getFlags(), Result->getUniqueID(), Result->getEntrySize()); diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 0ccade91677a41..ae4e6915fa294c 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -175,8 +175,11 @@ void MCObjectStreamer::emitAbsoluteSymbolDiffAsULEB128(const MCSymbol *Hi, } void MCObjectStreamer::reset() { - if (Assembler) + if (Assembler) { Assembler->reset(); + if (getContext().getTargetOptions()) + Assembler->setRelaxAll(getContext().getTargetOptions()->MCRelaxAll); + } CurInsertionPoint = MCSection::iterator(); EmitEHFrame = true; EmitDebugFrame = false; @@ -199,7 +202,7 @@ void MCObjectStreamer::emitFrames(MCAsmBackend *MAB) { MCFragment *MCObjectStreamer::getCurrentFragment() const { assert(getCurrentSectionOnly() && "No current section!"); - if (CurInsertionPoint != getCurrentSectionOnly()->getFragmentList().begin()) + if (CurInsertionPoint != getCurrentSectionOnly()->begin()) return &*std::prev(CurInsertionPoint); return nullptr; diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index 12e69f70537b78..9848d7fafe764a 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -130,10 +130,13 @@ LLVM_DUMP_METHOD void MCSection::dump() const { OS << "dump(); + F.dump(); } OS << "]>"; } diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index d17e6e125d8727..de8bde4211b49b 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -767,11 +767,9 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, if (!Asm.CGProfile.empty()) { MCSection *CGProfileSection = Asm.getContext().getMachOSection( "__LLVM", "__cg_profile", 0, SectionKind::getMetadata()); - MCDataFragment *Frag = dyn_cast_or_null( - &*CGProfileSection->getFragmentList().begin()); - assert(Frag && "call graph profile section not reserved"); - Frag->getContents().clear(); - raw_svector_ostream OS(Frag->getContents()); + auto &Frag = cast(*CGProfileSection->begin()); + Frag.getContents().clear(); + raw_svector_ostream OS(Frag.getContents()); for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) { uint32_t FromIndex = CGPE.From->getSymbol().getIndex(); uint32_t ToIndex = CGPE.To->getSymbol().getIndex(); diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 788e92f94b2689..451269608f1799 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1857,14 +1857,9 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, report_fatal_error(".fini_array sections are unsupported"); if (!WS.getName().starts_with(".init_array")) continue; - if (WS.getFragmentList().empty()) - continue; - - // init_array is expected to contain a single non-empty data fragment - if (WS.getFragmentList().size() != 3) - report_fatal_error("only one .init_array section fragment supported"); - auto IT = WS.begin(); + if (IT == WS.end()) + continue; const MCFragment &EmptyFrag = *IT; if (EmptyFrag.getKind() != MCFragment::FT_Data) report_fatal_error(".init_array section should be aligned"); diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp index 3c9ff71b6b0622..a2b6c4e5c3a5c5 100644 --- a/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -354,7 +354,7 @@ void WinCOFFWriter::defineSection(const MCSectionCOFF &MCSec, Section->MCSection = &MCSec; SectionMap[&MCSec] = Section; - if (UseOffsetLabels && !MCSec.getFragmentList().empty()) { + if (UseOffsetLabels && !MCSec.empty()) { const uint32_t Interval = 1 << OffsetLabelIntervalBits; uint32_t N = 1; for (uint32_t Off = Interval, E = Layout.getSectionAddressSize(&MCSec); diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h index f72c109b6009e8..2b1895a30b41ed 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObject.h +++ b/llvm/lib/ObjCopy/ELF/ELFObject.h @@ -910,7 +910,7 @@ class RelocationSection public: RelocationSection(const Object &O) : Obj(O) {} - void addRelocation(Relocation Rel) { Relocations.push_back(Rel); } + void addRelocation(const Relocation &Rel) { Relocations.push_back(Rel); } Error accept(SectionVisitor &Visitor) const override; Error accept(MutableSectionVisitor &Visitor) override; Error removeSectionReferences( diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index b23d5fe245481c..c7adc7668b9a17 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -1357,7 +1357,7 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks( bool Registered = false; PIC.registerBeforeNonSkippedPassCallback([this, &MAM, Registered]( StringRef P, Any IR) mutable { -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS assert(&PassStack.emplace_back(P)); #endif (void)this; @@ -1386,7 +1386,7 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks( PIC.registerAfterPassInvalidatedCallback( [this](StringRef P, const PreservedAnalyses &PassPA) { -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS assert(PassStack.pop_back_val() == P && "Before and After callbacks must correspond"); #endif @@ -1395,7 +1395,7 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks( PIC.registerAfterPassCallback([this, &MAM](StringRef P, Any IR, const PreservedAnalyses &PassPA) { -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS assert(PassStack.pop_back_val() == P && "Before and After callbacks must correspond"); #endif diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 4ca868722ec4e3..6d784053f877d4 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -394,7 +394,7 @@ Expected readMemProfSchema(const unsigned char *&Buffer) { } Result.push_back(static_cast(Tag)); } - // Advace the buffer to one past the schema if we succeeded. + // Advance the buffer to one past the schema if we succeeded. Buffer = Ptr; return Result; } diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 283fcc153b33aa..1209bf71a287d7 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -68,6 +68,10 @@ enum class fltNonfiniteBehavior { // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available // encodings do not distinguish between signalling and quiet NaN. NanOnly, + + // This behavior is present in Float6E3M2FN and Float6E2M3FN types, + // which do not support Inf or NaN values. + FiniteOnly, }; // How NaN values are represented. This is curently only used in combination @@ -139,6 +143,10 @@ static constexpr fltSemantics semFloat8E4M3FNUZ = { static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; +static constexpr fltSemantics semFloat6E3M2FN = { + 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; +static constexpr fltSemantics semFloat6E2M3FN = { + 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; static constexpr fltSemantics semBogus = {0, 0, 0, 0}; @@ -206,6 +214,10 @@ const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { return Float8E4M3B11FNUZ(); case S_FloatTF32: return FloatTF32(); + case S_Float6E3M2FN: + return Float6E3M2FN(); + case S_Float6E2M3FN: + return Float6E2M3FN(); case S_x87DoubleExtended: return x87DoubleExtended(); } @@ -238,6 +250,10 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { return S_Float8E4M3B11FNUZ; else if (&Sem == &llvm::APFloat::FloatTF32()) return S_FloatTF32; + else if (&Sem == &llvm::APFloat::Float6E3M2FN()) + return S_Float6E3M2FN; + else if (&Sem == &llvm::APFloat::Float6E2M3FN()) + return S_Float6E2M3FN; else if (&Sem == &llvm::APFloat::x87DoubleExtended()) return S_x87DoubleExtended; else @@ -260,6 +276,8 @@ const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { return semFloat8E4M3B11FNUZ; } const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } +const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } +const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } const fltSemantics &APFloatBase::x87DoubleExtended() { return semX87DoubleExtended; } @@ -878,6 +896,9 @@ void IEEEFloat::copySignificand(const IEEEFloat &rhs) { for the significand. If double or longer, this is a signalling NaN, which may not be ideal. If float, this is QNaN(0). */ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) + llvm_unreachable("This floating point format does not support NaN"); + category = fcNaN; sign = Negative; exponent = exponentNaN(); @@ -1499,16 +1520,18 @@ static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, /* Handle overflow. Sign is preserved. We either become infinity or the largest finite number. */ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { - /* Infinity? */ - if (rounding_mode == rmNearestTiesToEven || - rounding_mode == rmNearestTiesToAway || - (rounding_mode == rmTowardPositive && !sign) || - (rounding_mode == rmTowardNegative && sign)) { - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) - makeNaN(false, sign); - else - category = fcInfinity; - return (opStatus) (opOverflow | opInexact); + if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) { + /* Infinity? */ + if (rounding_mode == rmNearestTiesToEven || + rounding_mode == rmNearestTiesToAway || + (rounding_mode == rmTowardPositive && !sign) || + (rounding_mode == rmTowardNegative && sign)) { + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) + makeNaN(false, sign); + else + category = fcInfinity; + return static_cast(opOverflow | opInexact); + } } /* Otherwise we become the largest finite number. */ @@ -3518,13 +3541,15 @@ APInt IEEEFloat::convertIEEEFloatToAPInt() const { myexponent = ::exponentZero(S) + bias; mysignificand.fill(0); } else if (category == fcInfinity) { - if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { + if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || + S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) llvm_unreachable("semantics don't support inf!"); - } myexponent = ::exponentInf(S) + bias; mysignificand.fill(0); } else { assert(category == fcNaN && "Unknown category!"); + if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) + llvm_unreachable("semantics don't support NaN!"); myexponent = ::exponentNaN(S) + bias; std::copy_n(significandParts(), mysignificand.size(), mysignificand.begin()); @@ -3605,6 +3630,16 @@ APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { return convertIEEEFloatToAPInt(); } +APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt(); +} + +APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt(); +} + // This function creates an APInt that is just a bit map of the floating // point constant as it would appear in memory. It is not a conversion, // and treating the result as a normal integer is unlikely to be useful. @@ -3646,6 +3681,12 @@ APInt IEEEFloat::bitcastToAPInt() const { if (semantics == (const llvm::fltSemantics *)&semFloatTF32) return convertFloatTF32APFloatToAPInt(); + if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN) + return convertFloat6E3M2FNAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) + return convertFloat6E2M3FNAPFloatToAPInt(); + assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && "unknown format!"); return convertF80LongDoubleAPFloatToAPInt(); @@ -3862,6 +3903,14 @@ void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { initFromIEEEAPInt(api); } +void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) { + initFromIEEEAPInt(api); +} + +void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { + initFromIEEEAPInt(api); +} + /// Treat api as containing the bits of a floating point number. void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { assert(api.getBitWidth() == Sem->sizeInBits); @@ -3891,6 +3940,10 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { return initFromFloat8E4M3B11FNUZAPInt(api); if (Sem == &semFloatTF32) return initFromFloatTF32APInt(api); + if (Sem == &semFloat6E3M2FN) + return initFromFloat6E3M2FNAPInt(api); + if (Sem == &semFloat6E2M3FN) + return initFromFloat6E2M3FNAPInt(api); llvm_unreachable(nullptr); } @@ -4328,7 +4381,8 @@ int IEEEFloat::getExactLog2Abs() const { bool IEEEFloat::isSignaling() const { if (!isNaN()) return false; - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || + semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) return false; // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the @@ -4387,6 +4441,10 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { // nextUp(getLargest()) == NAN makeNaN(); break; + } else if (semantics->nonFiniteBehavior == + fltNonfiniteBehavior::FiniteOnly) { + // nextUp(getLargest()) == getLargest() + break; } else { // nextUp(getLargest()) == INFINITY APInt::tcSet(significandParts(), 0, partCount()); @@ -4477,6 +4535,9 @@ APFloatBase::ExponentType IEEEFloat::exponentZero() const { } void IEEEFloat::makeInf(bool Negative) { + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) + llvm_unreachable("This floating point format does not support Inf"); + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { // There is no Inf, so make NaN instead. makeNaN(false, Negative); diff --git a/llvm/lib/Support/CodeGenCoverage.cpp b/llvm/lib/Support/CodeGenCoverage.cpp index 4d41c42e527e2e..2e35019e12c168 100644 --- a/llvm/lib/Support/CodeGenCoverage.cpp +++ b/llvm/lib/Support/CodeGenCoverage.cpp @@ -21,8 +21,6 @@ using namespace llvm; -static sys::SmartMutex OutputMutex; - CodeGenCoverage::CodeGenCoverage() = default; void CodeGenCoverage::setCovered(uint64_t RuleID) { @@ -79,6 +77,7 @@ bool CodeGenCoverage::parse(MemoryBuffer &Buffer, StringRef BackendName) { bool CodeGenCoverage::emit(StringRef CoveragePrefix, StringRef BackendName) const { if (!CoveragePrefix.empty() && !RuleCoverage.empty()) { + static sys::SmartMutex OutputMutex; sys::SmartScopedLock Lock(OutputMutex); // We can handle locking within a process easily enough but we don't want to diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 7da540f8ef8e50..da11539eab348f 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -90,6 +90,8 @@ class AArch64AsmPrinter : public AsmPrinter { return MCInstLowering.lowerOperand(MO, MCOp); } + const MCExpr *lowerConstantPtrAuth(const ConstantPtrAuth &CPA) override; + void emitStartOfAsmFile(Module &M) override; void emitJumpTableInfo() override; std::tuplegetInstrInfo()->getInstSizeInBytes(*MI) >= InstsEmitted * 4); } +const MCExpr * +AArch64AsmPrinter::lowerConstantPtrAuth(const ConstantPtrAuth &CPA) { + MCContext &Ctx = OutContext; + + // Figure out the base symbol and the addend, if any. + APInt Offset(64, 0); + const Value *BaseGV = CPA.getPointer()->stripAndAccumulateConstantOffsets( + getDataLayout(), Offset, /*AllowNonInbounds=*/true); + + auto *BaseGVB = dyn_cast(BaseGV); + + // If we can't understand the referenced ConstantExpr, there's nothing + // else we can do: emit an error. + if (!BaseGVB) { + BaseGV->getContext().emitError( + "cannot resolve target base/addend of ptrauth constant"); + return nullptr; + } + + // If there is an addend, turn that into the appropriate MCExpr. + const MCExpr *Sym = MCSymbolRefExpr::create(getSymbol(BaseGVB), Ctx); + if (Offset.sgt(0)) + Sym = MCBinaryExpr::createAdd( + Sym, MCConstantExpr::create(Offset.getSExtValue(), Ctx), Ctx); + else if (Offset.slt(0)) + Sym = MCBinaryExpr::createSub( + Sym, MCConstantExpr::create((-Offset).getSExtValue(), Ctx), Ctx); + + uint64_t KeyID = CPA.getKey()->getZExtValue(); + // We later rely on valid KeyID value in AArch64PACKeyIDToString call from + // AArch64AuthMCExpr::printImpl, so fail fast. + if (KeyID > AArch64PACKey::LAST) + report_fatal_error("AArch64 PAC Key ID '" + Twine(KeyID) + + "' out of range [0, " + + Twine((unsigned)AArch64PACKey::LAST) + "]"); + + uint64_t Disc = CPA.getDiscriminator()->getZExtValue(); + if (!isUInt<16>(Disc)) + report_fatal_error("AArch64 PAC Discriminator '" + Twine(Disc) + + "' out of range [0, 0xFFFF]"); + + // Finally build the complete @AUTH expr. + return AArch64AuthMCExpr::create(Sym, Disc, AArch64PACKey::ID(KeyID), + CPA.hasAddressDiscriminator(), Ctx); +} + // Simple pseudo-instructions have their lowering (with expansion to real // instructions) auto-generated. #include "AArch64GenMCPseudoLowering.inc" diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index cd532671f50189..cf617c7e92a70a 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -431,8 +431,16 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo(); uint64_t NumBytes = AFI->getLocalStackSize(); + // If neither NEON or SVE are available, a COPY from one Q-reg to + // another requires a spill -> reload sequence. We can do that + // using a pre-decrementing store/post-decrementing load, but + // if we do so, we can't use the Red Zone. + bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() && + !Subtarget.isNeonAvailable() && + !Subtarget.hasSVE(); + return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || - getSVEStackSize(MF)); + getSVEStackSize(MF) || LowerQRegCopyThroughMem); } /// hasFP - Return true if the specified function should have a dedicated frame diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 48bf648b005227..c4f819f5fcdd29 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -728,14 +728,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote); } - for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, - ISD::FCOS, ISD::FSIN, ISD::FSINCOS, - ISD::FTAN, ISD::FEXP, ISD::FEXP2, - ISD::FEXP10, ISD::FLOG, ISD::FLOG2, - ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW, - ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN, - ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG, - ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) { + for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, + ISD::FCOS, ISD::FSIN, ISD::FSINCOS, + ISD::FTAN, ISD::FEXP, ISD::FEXP2, + ISD::FEXP10, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW, + ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN, + ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG, + ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) { setOperationAction(Op, MVT::f16, Promote); setOperationAction(Op, MVT::v4f16, Expand); setOperationAction(Op, MVT::v8f16, Expand); @@ -17720,6 +17720,47 @@ static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(AArch64ISD::NVCAST, DL, VT, CM); } +// Transform vector add(zext i8 to i32, zext i8 to i32) +// into sext(add(zext(i8 to i16), zext(i8 to i16)) to i32) +// This allows extra uses of saddl/uaddl at the lower vector widths, and less +// extends. +static SDValue performVectorExtCombine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + if (!VT.isFixedLengthVector() || VT.getSizeInBits() <= 128 || + (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) || + (N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) || + N->getOperand(0).getOperand(0).getValueType() != + N->getOperand(1).getOperand(0).getValueType()) + return SDValue(); + + if (N->getOpcode() == ISD::MUL && + N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode()) + return SDValue(); + + SDValue N0 = N->getOperand(0).getOperand(0); + SDValue N1 = N->getOperand(1).getOperand(0); + EVT InVT = N0.getValueType(); + + EVT S1 = InVT.getScalarType(); + EVT S2 = VT.getScalarType(); + if ((S2 == MVT::i32 && S1 == MVT::i8) || + (S2 == MVT::i64 && (S1 == MVT::i8 || S1 == MVT::i16))) { + SDLoc DL(N); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), + S2.getHalfSizedIntegerVT(*DAG.getContext()), + VT.getVectorElementCount()); + SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0); + SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1); + SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1); + return DAG.getNode(N->getOpcode() == ISD::MUL ? N->getOperand(0).getOpcode() + : (unsigned)ISD::SIGN_EXTEND, + DL, VT, NewOp); + } + return SDValue(); +} + static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -17728,6 +17769,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, return Ext; if (SDValue Ext = performMulVectorCmpZeroCombine(N, DAG)) return Ext; + if (SDValue Ext = performVectorExtCombine(N, DAG)) + return Ext; if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -19604,41 +19647,6 @@ static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(AArch64ISD::CSINC, DL, VT, LHS, LHS, CC, Cond); } -// Transform vector add(zext i8 to i32, zext i8 to i32) -// into sext(add(zext(i8 to i16), zext(i8 to i16)) to i32) -// This allows extra uses of saddl/uaddl at the lower vector widths, and less -// extends. -static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - if (!VT.isFixedLengthVector() || VT.getSizeInBits() <= 128 || - (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && - N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) || - (N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && - N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) || - N->getOperand(0).getOperand(0).getValueType() != - N->getOperand(1).getOperand(0).getValueType()) - return SDValue(); - - SDValue N0 = N->getOperand(0).getOperand(0); - SDValue N1 = N->getOperand(1).getOperand(0); - EVT InVT = N0.getValueType(); - - EVT S1 = InVT.getScalarType(); - EVT S2 = VT.getScalarType(); - if ((S2 == MVT::i32 && S1 == MVT::i8) || - (S2 == MVT::i64 && (S1 == MVT::i8 || S1 == MVT::i16))) { - SDLoc DL(N); - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), - S2.getHalfSizedIntegerVT(*DAG.getContext()), - VT.getVectorElementCount()); - SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0); - SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1); - SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1); - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewOp); - } - return SDValue(); -} - static SDValue performBuildVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { @@ -20260,7 +20268,7 @@ static SDValue performAddSubCombine(SDNode *N, return Val; if (SDValue Val = performNegCSelCombine(N, DCI.DAG)) return Val; - if (SDValue Val = performVectorAddSubExtCombine(N, DCI.DAG)) + if (SDValue Val = performVectorExtCombine(N, DCI.DAG)) return Val; if (SDValue Val = performAddCombineForShiftedOperands(N, DCI.DAG)) return Val; diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index a759efcd9441ec..cc33765307fb42 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -741,11 +741,12 @@ def ProcessorFeatures { FeatureNEON, FeaturePerfMon, FeatureFullFP16, FeatureFP16FML, FeatureSHA3]; list AppleA14 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8, - FeatureNEON, FeaturePerfMon, FeatureFRInt3264, - FeatureSpecRestrict, FeatureSSBS, FeatureSB, - FeaturePredRes, FeatureCacheDeepPersist, + FeatureNEON, FeaturePerfMon, FeatureFullFP16, FeatureFP16FML, FeatureSHA3, - FeatureAltFPCmp]; + // ArmV8.5-a extensions, excluding BTI: + FeatureAltFPCmp, FeatureFRInt3264, + FeatureSpecRestrict, FeatureSSBS, FeatureSB, + FeaturePredRes, FeatureCacheDeepPersist]; list AppleA15 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML]; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 270474f80767aa..322bde3da67631 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -1022,7 +1022,7 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, if (!IsTailCall) { if (!PAI) - return IsIndirect ? getBLRCallOpcode(CallerF) : AArch64::BL; + return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL; assert(IsIndirect && "Direct call should not be authenticated"); assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) && diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 375643b7f5197a..18193d88075971 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -42,8 +42,10 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { if (StoreSize <= 32) return EVT::getIntegerVT(Ctx, StoreSize); - assert(StoreSize % 32 == 0 && "Store size not a multiple of 32"); - return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); + if (StoreSize % 32 == 0) + return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); + + return VT; } unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index de7449a400a741..b6cecccf3572dd 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -329,7 +329,7 @@ void ARMELFObjectWriter::addTargetSectionFlags(MCContext &Ctx, MCSectionELF *TextSection = static_cast(Ctx.getObjectFileInfo()->getTextSection()); if (Sec.getKind().isExecuteOnly() && !TextSection->hasInstructions()) { - for (auto &F : TextSection->getFragmentList()) + for (auto &F : *TextSection) if (auto *DF = dyn_cast(&F)) if (!DF->getContents().empty()) return; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index f9a0ba3608e6dc..3b6ea81cdf10ed 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -713,15 +713,14 @@ class HexagonAsmBackend : public MCAsmBackend { void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override { for (auto *I : Layout.getSectionOrder()) { - auto &Fragments = I->getFragmentList(); - for (auto &J : Fragments) { + for (auto &J : *I) { switch (J.getKind()) { default: break; case MCFragment::FT_Align: { auto Size = Asm.computeFragmentSize(Layout, J); for (auto K = J.getIterator(); - K != Fragments.begin() && Size >= HEXAGON_PACKET_SIZE;) { + K != I->begin() && Size >= HEXAGON_PACKET_SIZE;) { --K; switch (K->getKind()) { default: diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 82770f8660850c..ca077d41d36bac 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1845,6 +1845,10 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) { size_t NumBytes = (Val.getBitWidth() + 7) / 8; SmallVector Buf(NumBytes); + // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the + // input's bit width, and i1 arrays may not have a length that is a multuple + // of 8. We handle the last byte separately, so we never request out of + // bounds bits. for (unsigned I = 0; I < NumBytes - 1; ++I) { Buf[I] = Val.extractBitsAsZExtValue(8, I * 8); } diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 5906a2cdb3bfa1..8ac1cdf0a7a9ce 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2155,6 +2155,16 @@ bool RISCVAsmParser::parseVTypeToken(const AsmToken &Tok, VTypeState &State, break; if (!RISCVVType::isValidLMUL(Lmul, Fractional)) break; + + if (Fractional) { + unsigned ELEN = STI->hasFeature(RISCV::FeatureStdExtZve64x) ? 64 : 32; + unsigned MinLMUL = ELEN / 8; + if (Lmul > MinLMUL) + Warning(Tok.getLoc(), + "use of vtype encodings with LMUL < SEWMIN/ELEN == mf" + + Twine(MinLMUL) + " is reserved"); + } + State = VTypeState_TailPolicy; return false; } @@ -2194,6 +2204,7 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) { bool MaskAgnostic = false; VTypeState State = VTypeState_SEW; + SMLoc SEWLoc = S; if (parseVTypeToken(getTok(), State, Sew, Lmul, Fractional, TailAgnostic, MaskAgnostic)) @@ -2211,6 +2222,16 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) { if (getLexer().is(AsmToken::EndOfStatement) && State == VTypeState_Done) { RISCVII::VLMUL VLMUL = RISCVVType::encodeLMUL(Lmul, Fractional); + if (Fractional) { + unsigned ELEN = STI->hasFeature(RISCV::FeatureStdExtZve64x) ? 64 : 32; + unsigned MaxSEW = ELEN / Lmul; + // If MaxSEW < 8, we should have printed warning about reserved LMUL. + if (MaxSEW >= 8 && Sew > MaxSEW) + Warning(SEWLoc, + "use of vtype encodings with SEW > " + Twine(MaxSEW) + + " and LMUL == mf" + Twine(Lmul) + + " may not be compatible with all RVV implementations"); + } unsigned VTypeI = RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index beee9405de02ac..2bfee45852b200 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -102,9 +102,14 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler { void assignValueToReg(Register ValVReg, Register PhysReg, const CCValAssign &VA) override { - // If we're passing an f32 value into an i64, anyextend before copying. - if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) - ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(64), ValVReg).getReg(0); + // If we're passing a smaller fp value into a larger integer register, + // anyextend before copying. + if ((VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) || + ((VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::i64) && + VA.getValVT() == MVT::f16)) { + LLT DstTy = LLT::scalar(VA.getLocVT().getSizeInBits()); + ValVReg = MIRBuilder.buildAnyExt(DstTy, ValVReg).getReg(0); + } Register ExtReg = extendRegister(ValVReg, VA); MIRBuilder.buildCopy(PhysReg, ExtReg); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 9bf06850483d84..011edca019fd60 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -477,6 +477,14 @@ def HasStdExtZbs : Predicate<"Subtarget->hasStdExtZbs()">, // Bitmanip Extensions for Cryptography Extensions +def FeatureStdExtB + : RISCVExtension<"b", 1, 0, + "'B' (the collection of the Zba, Zbb, Zbs extensions)", + [FeatureStdExtZba, FeatureStdExtZbb, FeatureStdExtZbs]>; +def HasStdExtB : Predicate<"Subtarget->hasStdExtB()">, + AssemblerPredicate<(all_of FeatureStdExtB), + "'B' (the collection of the Zba, Zbb, Zbs extensions)">; + def FeatureStdExtZbkb : RISCVExtension<"zbkb", 1, 0, "'Zbkb' (Bitmanip instructions for Cryptography)">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 77d3644687fddd..a206974e53e510 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -67,11 +67,6 @@ /// that terminology in code frequently refers to these as "TA" which is /// confusing. We're in the process of migrating away from this /// representation. -/// * _TU w/o policy operand -- Has a passthrough operand, and always -/// represents the tail undisturbed state. -/// * _TU w/policy operand - Can represent all three policy states. If -/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined". -/// Otherwise, policy operand and tablegen flags drive the interpretation. /// //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp index 2744c25d1bc754..52354281cdd7e3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp @@ -17,6 +17,8 @@ #include "SPIRVSubtarget.h" #include "SPIRVTargetMachine.h" #include "SPIRVUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/IR/CFG.h" @@ -71,7 +73,7 @@ class SPIRVMergeRegionExitTargets : public FunctionPass { /// terminator will take. llvm::Value *createExitVariable( BasicBlock *BB, - const std::unordered_map &TargetToValue) { + const DenseMap &TargetToValue) { auto *T = BB->getTerminator(); if (isa(T)) return nullptr; @@ -103,7 +105,7 @@ class SPIRVMergeRegionExitTargets : public FunctionPass { /// Replaces |BB|'s branch targets present in |ToReplace| with |NewTarget|. void replaceBranchTargets(BasicBlock *BB, - const std::unordered_set ToReplace, + const SmallPtrSet &ToReplace, BasicBlock *NewTarget) { auto *T = BB->getTerminator(); if (isa(T)) @@ -133,7 +135,7 @@ class SPIRVMergeRegionExitTargets : public FunctionPass { bool runOnConvergenceRegionNoRecurse(LoopInfo &LI, const SPIRV::ConvergenceRegion *CR) { // Gather all the exit targets for this region. - std::unordered_set ExitTargets; + SmallPtrSet ExitTargets; for (BasicBlock *Exit : CR->Exits) { for (BasicBlock *Target : gatherSuccessors(Exit)) { if (CR->Blocks.count(Target) == 0) @@ -164,9 +166,10 @@ class SPIRVMergeRegionExitTargets : public FunctionPass { // Creating one constant per distinct exit target. This will be route to the // correct target. - std::unordered_map TargetToValue; + DenseMap TargetToValue; for (BasicBlock *Target : SortedExitTargets) - TargetToValue.emplace(Target, Builder.getInt32(TargetToValue.size())); + TargetToValue.insert( + std::make_pair(Target, Builder.getInt32(TargetToValue.size()))); // Creating one variable per exit node, set to the constant matching the // targeted external block. @@ -184,12 +187,12 @@ class SPIRVMergeRegionExitTargets : public FunctionPass { } // Creating the switch to jump to the correct exit target. - std::vector> CasesList( - TargetToValue.begin(), TargetToValue.end()); - llvm::SwitchInst *Sw = - Builder.CreateSwitch(node, CasesList[0].first, CasesList.size() - 1); - for (size_t i = 1; i < CasesList.size(); i++) - Sw->addCase(CasesList[i].second, CasesList[i].first); + llvm::SwitchInst *Sw = Builder.CreateSwitch(node, SortedExitTargets[0], + SortedExitTargets.size() - 1); + for (size_t i = 1; i < SortedExitTargets.size(); i++) { + BasicBlock *BB = SortedExitTargets[i]; + Sw->addCase(TargetToValue[BB], BB); + } // Fix exit branches to redirect to the new exit. for (auto Exit : CR->Exits) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 472f34a4efdb47..bc2eb6dcd541c7 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -125,9 +125,10 @@ class X86AsmBackend : public MCAsmBackend { unsigned TargetPrefixMax = 0; MCInst PrevInst; + unsigned PrevInstOpcode = 0; MCBoundaryAlignFragment *PendingBA = nullptr; std::pair PrevInstPosition; - bool CanPadInst = false; + bool IsRightAfterData = false; uint8_t determinePaddingPrefix(const MCInst &Inst) const; bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; @@ -267,8 +268,8 @@ static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { } /// Check if the instruction is a prefix. -static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) { - return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags); +static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) { + return X86II::isPrefix(MCII.get(Opcode).TSFlags); } /// Check if the instruction is valid as the first instruction in macro fusion. @@ -382,9 +383,9 @@ bool X86AsmBackend::allowEnhancedRelaxation() const { /// X86 has certain instructions which enable interrupts exactly one /// instruction *after* the instruction which stores to SS. Return true if the -/// given instruction has such an interrupt delay slot. -static bool hasInterruptDelaySlot(const MCInst &Inst) { - switch (Inst.getOpcode()) { +/// given instruction may have such an interrupt delay slot. +static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) { + switch (InstOpcode) { case X86::POPSS16: case X86::POPSS32: case X86::STI: @@ -394,9 +395,9 @@ static bool hasInterruptDelaySlot(const MCInst &Inst) { case X86::MOV32sr: case X86::MOV64sr: case X86::MOV16sm: - if (Inst.getOperand(0).getReg() == X86::SS) - return true; - break; + // In fact, this is only the case if the first operand is SS. However, as + // segment moves occur extremely rarely, this is just a minor pessimization. + return true; } return false; } @@ -406,16 +407,10 @@ static bool isRightAfterData(MCFragment *CurrentFragment, const std::pair &PrevInstPosition) { MCFragment *F = CurrentFragment; - // Empty data fragments may be created to prevent further data being - // added into the previous fragment, we need to skip them since they - // have no contents. - for (; isa_and_nonnull(F); F = F->getPrevNode()) - if (cast(F)->getContents().size() != 0) - break; - // Since data is always emitted into a DataFragment, our check strategy is // simple here. // - If the fragment is a DataFragment + // - If it's empty (section start or data after align), return false. // - If it's not the fragment where the previous instruction is, // returns true. // - If it's the fragment holding the previous instruction but its @@ -424,8 +419,9 @@ isRightAfterData(MCFragment *CurrentFragment, // - Otherwise returns false. // - If the fragment is not a DataFragment, returns false. if (auto *DF = dyn_cast_or_null(F)) - return DF != PrevInstPosition.first || - DF->getContents().size() != PrevInstPosition.second; + return DF->getContents().size() && + (DF != PrevInstPosition.first || + DF->getContents().size() != PrevInstPosition.second); return false; } @@ -455,22 +451,22 @@ bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { // TLSCALL). return false; - if (hasInterruptDelaySlot(PrevInst)) + if (mayHaveInterruptDelaySlot(PrevInstOpcode)) // If this instruction follows an interrupt enabling instruction with a one // instruction delay, inserting a nop would change behavior. return false; - if (isPrefix(PrevInst, *MCII)) + if (isPrefix(PrevInstOpcode, *MCII)) // If this instruction follows a prefix, inserting a nop/prefix would change // semantic. return false; - if (isPrefix(Inst, *MCII)) + if (isPrefix(Inst.getOpcode(), *MCII)) // If this instruction is a prefix, inserting a prefix would change // semantic. return false; - if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) + if (IsRightAfterData) // If this instruction follows any data, there is no clear // instruction boundary, inserting a nop/prefix would change semantic. return false; @@ -514,19 +510,27 @@ bool X86AsmBackend::needAlign(const MCInst &Inst) const { /// Insert BoundaryAlignFragment before instructions to align branches. void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, const MCSubtargetInfo &STI) { - CanPadInst = canPadInst(Inst, OS); + // Used by canPadInst. Done here, because in emitInstructionEnd, the current + // fragment will have changed. + IsRightAfterData = + isRightAfterData(OS.getCurrentFragment(), PrevInstPosition); if (!canPadBranches(OS)) return; + // NB: PrevInst only valid if canPadBranches is true. if (!isMacroFused(PrevInst, Inst)) // Macro fusion doesn't happen indeed, clear the pending. PendingBA = nullptr; - if (!CanPadInst) + // When branch padding is enabled (basically the skx102 erratum => unlikely), + // we call canPadInst (not cheap) twice. However, in the common case, we can + // avoid unnecessary calls to that, as this is otherwise only used for + // relaxable fragments. + if (!canPadInst(Inst, OS)) return; - if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) { + if (PendingBA && PendingBA->getNextNode() == OS.getCurrentFragment()) { // Macro fusion actually happens and there is no other fragment inserted // after the previous instruction. // @@ -557,16 +561,22 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, } /// Set the last fragment to be aligned for the BoundaryAlignFragment. -void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { - PrevInst = Inst; +void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, + const MCInst &Inst) { MCFragment *CF = OS.getCurrentFragment(); - PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); if (auto *F = dyn_cast_or_null(CF)) - F->setAllowAutoPadding(CanPadInst); + F->setAllowAutoPadding(canPadInst(Inst, OS)); + + // Update PrevInstOpcode here, canPadInst() reads that. + PrevInstOpcode = Inst.getOpcode(); + PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); if (!canPadBranches(OS)) return; + // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap. + PrevInst = Inst; + if (!needAlign(Inst) || !PendingBA) return; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2aec14e93d082d..2ed79385272fa8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -50184,12 +50184,12 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT, /// If this is an add or subtract where one operand is produced by a cmp+setcc, /// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB} /// with CMP+{ADC, SBB}. -static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { +static SDValue combineAddOrSubToADCOrSBB(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG) { bool IsSub = N->getOpcode() == ISD::SUB; SDValue X = N->getOperand(0); SDValue Y = N->getOperand(1); EVT VT = N->getValueType(0); - SDLoc DL(N); if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, X, Y, DAG)) return ADCOrSBB; @@ -52718,7 +52718,7 @@ static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) { return getSETCC(NewCC, LHS->getOperand(1), DL, DAG); } -static SDValue combineXorSubCTLZ(SDNode *N, SelectionDAG &DAG, +static SDValue combineXorSubCTLZ(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert((N->getOpcode() == ISD::XOR || N->getOpcode() == ISD::SUB) && "Invalid opcode for combing with CTLZ"); @@ -52758,7 +52758,6 @@ static SDValue combineXorSubCTLZ(SDNode *N, SelectionDAG &DAG, if (C->getZExtValue() != uint64_t(OpCTLZ.getValueSizeInBits() - 1)) return SDValue(); - SDLoc DL(N); EVT OpVT = VT; SDValue Op = OpCTLZ.getOperand(0); if (VT == MVT::i8) { @@ -52781,11 +52780,12 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + SDLoc DL(N); // If this is SSE1 only convert to FXOR to avoid scalarization. if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { return DAG.getBitcast(MVT::v4i32, - DAG.getNode(X86ISD::FXOR, SDLoc(N), MVT::v4f32, + DAG.getNode(X86ISD::FXOR, DL, MVT::v4f32, DAG.getBitcast(MVT::v4f32, N0), DAG.getBitcast(MVT::v4f32, N1))); } @@ -52805,7 +52805,7 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget)) return FPLogic; - if (SDValue R = combineXorSubCTLZ(N, DAG, Subtarget)) + if (SDValue R = combineXorSubCTLZ(N, DL, DAG, Subtarget)) return R; if (DCI.isBeforeLegalizeOps()) @@ -52826,8 +52826,8 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, N0.getOperand(0).getValueType().isVector() && N0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && TLI.isTypeLegal(N0.getOperand(0).getValueType()) && N0.hasOneUse()) { - return DAG.getBitcast(VT, DAG.getNOT(SDLoc(N), N0.getOperand(0), - N0.getOperand(0).getValueType())); + return DAG.getBitcast( + VT, DAG.getNOT(DL, N0.getOperand(0), N0.getOperand(0).getValueType())); } // Handle AVX512 mask widening. @@ -52837,8 +52837,8 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(0).isUndef() && TLI.isTypeLegal(N0.getOperand(1).getValueType())) { return DAG.getNode( - ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), - DAG.getNOT(SDLoc(N), N0.getOperand(1), N0.getOperand(1).getValueType()), + ISD::INSERT_SUBVECTOR, DL, VT, N0.getOperand(0), + DAG.getNOT(DL, N0.getOperand(1), N0.getOperand(1).getValueType()), N0.getOperand(2)); } @@ -52851,7 +52851,6 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, auto *N1C = dyn_cast(N1); auto *N001C = dyn_cast(TruncExtSrc.getOperand(1)); if (N1C && !N1C->isOpaque() && N001C && !N001C->isOpaque()) { - SDLoc DL(N); SDValue LHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(0), DL, VT); SDValue RHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(1), DL, VT); return DAG.getNode(ISD::XOR, DL, VT, LHS, @@ -55419,7 +55418,8 @@ static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1, /// Try to fold those constants into an 'add' instruction to reduce instruction /// count. We do this with CMOV rather the generic 'select' because there are /// earlier folds that may be used to turn select-of-constants into logic hacks. -static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG, +static SDValue pushAddIntoCmovOfConsts(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG, const X86Subtarget &Subtarget) { // If an operand is zero, add-of-0 gets simplified away, so that's clearly // better because we eliminate 1-2 instructions. This transform is still @@ -55451,7 +55451,6 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT VT = N->getValueType(0); - SDLoc DL(N); SDValue FalseOp = Cmov.getOperand(0); SDValue TrueOp = Cmov.getOperand(1); @@ -55492,7 +55491,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, SDValue Op1 = N->getOperand(1); SDLoc DL(N); - if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG, Subtarget)) + if (SDValue Select = pushAddIntoCmovOfConsts(N, DL, DAG, Subtarget)) return Select; if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget)) @@ -55550,7 +55549,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, Op0.getOperand(0), Op0.getOperand(2)); } - return combineAddOrSubToADCOrSBB(N, DAG); + return combineAddOrSubToADCOrSBB(N, DL, DAG); } // Try to fold (sub Y, cmovns X, -X) -> (add Y, cmovns -X, X) if the cmov @@ -55626,6 +55625,7 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); + SDLoc DL(N); // TODO: Add NoOpaque handling to isConstantIntBuildVectorOrConstantInt. auto IsNonOpaqueConstant = [&](SDValue Op) { @@ -55645,7 +55645,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, if (Op1.getOpcode() == ISD::XOR && IsNonOpaqueConstant(Op0) && !isNullConstant(Op0) && IsNonOpaqueConstant(Op1.getOperand(1)) && Op1->hasOneUse()) { - SDLoc DL(N); EVT VT = Op0.getValueType(); SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0), DAG.getNOT(SDLoc(Op1), Op1.getOperand(1), VT)); @@ -55676,14 +55675,14 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use"); SDValue ADC = DAG.getNode(X86ISD::ADC, SDLoc(Op1), Op1->getVTList(), Op0, Op1.getOperand(1), Op1.getOperand(2)); - return DAG.getNode(ISD::SUB, SDLoc(N), Op0.getValueType(), ADC.getValue(0), + return DAG.getNode(ISD::SUB, DL, Op0.getValueType(), ADC.getValue(0), Op1.getOperand(0)); } - if (SDValue V = combineXorSubCTLZ(N, DAG, Subtarget)) + if (SDValue V = combineXorSubCTLZ(N, DL, DAG, Subtarget)) return V; - if (SDValue V = combineAddOrSubToADCOrSBB(N, DAG)) + if (SDValue V = combineAddOrSubToADCOrSBB(N, DL, DAG)) return V; return combineSubSetcc(N, DAG); diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp index 2e88e01ce7fdfa..7b57f7c23bf4da 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp @@ -13,6 +13,14 @@ using namespace llvm; +yaml::X86MachineFunctionInfo::X86MachineFunctionInfo( + const llvm::X86MachineFunctionInfo &MFI) + : AMXProgModel(MFI.getAMXProgModel()) {} + +void yaml::X86MachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { + MappingTraits::mapping(YamlIO, *this); +} + MachineFunctionInfo *X86MachineFunctionInfo::clone( BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap &Src2DstMBB) @@ -20,6 +28,11 @@ MachineFunctionInfo *X86MachineFunctionInfo::clone( return DestMF.cloneInfo(*this); } +void X86MachineFunctionInfo::initializeBaseYamlFields( + const yaml::X86MachineFunctionInfo &YamlMFI) { + AMXProgModel = YamlMFI.AMXProgModel; +} + void X86MachineFunctionInfo::anchor() { } void X86MachineFunctionInfo::setRestoreBasePointer(const MachineFunction *MF) { diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 8aaa49945f9d44..af2de2e73dc368 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -16,13 +16,43 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/YAMLTraits.h" #include namespace llvm { enum AMXProgModelEnum { None = 0, DirectReg = 1, ManagedRA = 2 }; +class X86MachineFunctionInfo; + +namespace yaml { +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &YamlIO, AMXProgModelEnum &Value) { + YamlIO.enumCase(Value, "None", AMXProgModelEnum::None); + YamlIO.enumCase(Value, "DirectReg", AMXProgModelEnum::DirectReg); + YamlIO.enumCase(Value, "ManagedRA", AMXProgModelEnum::ManagedRA); + } +}; + +struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo { + AMXProgModelEnum AMXProgModel; + + X86MachineFunctionInfo() = default; + X86MachineFunctionInfo(const llvm::X86MachineFunctionInfo &MFI); + + void mappingImpl(yaml::IO &YamlIO) override; + ~X86MachineFunctionInfo() = default; +}; + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, X86MachineFunctionInfo &MFI) { + YamlIO.mapOptional("amxProgModel", MFI.AMXProgModel); + } +}; +} // end namespace yaml + /// X86MachineFunctionInfo - This class is derived from MachineFunction and /// contains private X86 target-specific information for each MachineFunction. class X86MachineFunctionInfo : public MachineFunctionInfo { @@ -160,6 +190,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { const DenseMap &Src2DstMBB) const override; + void initializeBaseYamlFields(const yaml::X86MachineFunctionInfo &YamlMFI); + bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 27542e54829bff..d4e642c7df9cf9 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -31,6 +31,8 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/MIRParser/MIParser.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" @@ -344,6 +346,24 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } +yaml::MachineFunctionInfo *X86TargetMachine::createDefaultFuncInfoYAML() const { + return new yaml::X86MachineFunctionInfo(); +} + +yaml::MachineFunctionInfo * +X86TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { + const auto *MFI = MF.getInfo(); + return new yaml::X86MachineFunctionInfo(*MFI); +} + +bool X86TargetMachine::parseMachineFunctionInfo( + const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, + SMDiagnostic &Error, SMRange &SourceRange) const { + const auto &YamlMFI = static_cast(MFI); + PFS.MF.getInfo()->initializeBaseYamlFields(YamlMFI); + return false; +} + bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { assert(SrcAS != DestAS && "Expected different address spaces!"); diff --git a/llvm/lib/Target/X86/X86TargetMachine.h b/llvm/lib/Target/X86/X86TargetMachine.h index 4a5f20fcc01726..916445c74bb903 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.h +++ b/llvm/lib/Target/X86/X86TargetMachine.h @@ -58,6 +58,14 @@ class X86TargetMachine final : public LLVMTargetMachine { createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, const TargetSubtargetInfo *STI) const override; + yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; + yaml::MachineFunctionInfo * + convertFuncInfoToYAML(const MachineFunction &MF) const override; + bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, + PerFunctionMIParsingState &PFS, + SMDiagnostic &Error, + SMRange &SourceRange) const override; + void registerPassBuilderCallbacks(PassBuilder &PB, bool PopulateClassToPassNames) override; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0a23bf2516763a..74948778ccf853 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -4061,7 +4061,7 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::CTPOP, MVT::i8, { 1, 1, 2, 2 } }, // popcnt(zext()) }; static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets - { ISD::ABS, MVT::i64, { 1, 2, 3, 4 } }, // SUB+CMOV + { ISD::ABS, MVT::i64, { 1, 2, 3, 3 } }, // SUB+CMOV { ISD::BITREVERSE, MVT::i64, { 10, 12, 20, 22 } }, { ISD::BSWAP, MVT::i64, { 1, 2, 1, 2 } }, { ISD::CTLZ, MVT::i64, { 4 } }, // BSR+XOR or BSR+XOR+CMOV @@ -4082,9 +4082,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::UMULO, MVT::i64, { 2 } }, // mulq + seto }; static const CostKindTblEntry X86CostTbl[] = { // 32 or 64-bit targets - { ISD::ABS, MVT::i32, { 1, 2, 3, 4 } }, // SUB+XOR+SRA or SUB+CMOV - { ISD::ABS, MVT::i16, { 2, 2, 3, 4 } }, // SUB+XOR+SRA or SUB+CMOV - { ISD::ABS, MVT::i8, { 2, 4, 4, 4 } }, // SUB+XOR+SRA + { ISD::ABS, MVT::i32, { 1, 2, 3, 3 } }, // SUB+XOR+SRA or SUB+CMOV + { ISD::ABS, MVT::i16, { 2, 2, 3, 3 } }, // SUB+XOR+SRA or SUB+CMOV + { ISD::ABS, MVT::i8, { 2, 4, 4, 3 } }, // SUB+XOR+SRA { ISD::BITREVERSE, MVT::i32, { 9, 12, 17, 19 } }, { ISD::BITREVERSE, MVT::i16, { 9, 12, 17, 19 } }, { ISD::BITREVERSE, MVT::i8, { 7, 9, 13, 14 } }, diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp index d1cc306790522d..ca356ec82bf1f9 100644 --- a/llvm/lib/TargetParser/AArch64TargetParser.cpp +++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp @@ -122,14 +122,6 @@ AArch64::parseArchExtension(StringRef ArchExt) { return {}; } -std::optional -AArch64::targetFeatureToExtension(StringRef TargetFeature) { - for (const auto &E : Extensions) - if (TargetFeature == E.Feature) - return E; - return {}; -} - std::optional AArch64::parseCpu(StringRef Name) { // Resolve aliases first. Name = resolveCPUAlias(Name); @@ -221,6 +213,21 @@ void AArch64::ExtensionSet::disable(ArchExtKind E) { disable(Dep.Later); } +void AArch64::ExtensionSet::toLLVMFeatureList( + std::vector &Features) const { + if (BaseArch && !BaseArch->ArchFeature.empty()) + Features.push_back(BaseArch->ArchFeature); + + for (const auto &E : Extensions) { + if (E.Feature.empty() || !Touched.test(E.ID)) + continue; + if (Enabled.test(E.ID)) + Features.push_back(E.Feature); + else + Features.push_back(E.NegFeature); + } +} + void AArch64::ExtensionSet::addCPUDefaults(const CpuInfo &CPU) { LLVM_DEBUG(llvm::dbgs() << "addCPUDefaults(" << CPU.Name << ")\n"); BaseArch = &CPU.Arch; @@ -240,18 +247,11 @@ void AArch64::ExtensionSet::addArchDefaults(const ArchInfo &Arch) { enable(E.ID); } -bool AArch64::ExtensionSet::parseModifier(StringRef Modifier, - const bool AllowNoDashForm) { +bool AArch64::ExtensionSet::parseModifier(StringRef Modifier) { LLVM_DEBUG(llvm::dbgs() << "parseModifier(" << Modifier << ")\n"); - size_t NChars = 0; - // The "no-feat" form is allowed in the target attribute but nowhere else. - if (AllowNoDashForm && Modifier.starts_with("no-")) - NChars = 3; - else if (Modifier.starts_with("no")) - NChars = 2; - bool IsNegated = NChars != 0; - StringRef ArchExt = Modifier.drop_front(NChars); + bool IsNegated = Modifier.starts_with("no"); + StringRef ArchExt = IsNegated ? Modifier.drop_front(2) : Modifier; if (auto AE = parseArchExtension(ArchExt)) { if (AE->Feature.empty() || AE->NegFeature.empty()) @@ -265,21 +265,6 @@ bool AArch64::ExtensionSet::parseModifier(StringRef Modifier, return false; } -void AArch64::ExtensionSet::reconstructFromParsedFeatures( - const std::vector &Features) { - assert(Touched.none() && "Bitset already initialized"); - for (auto &F : Features) { - bool IsNegated = F[0] == '-'; - if (auto AE = targetFeatureToExtension(F)) { - Touched.set(AE->ID); - if (IsNegated) - Enabled.reset(AE->ID); - else - Enabled.set(AE->ID); - } - } -} - const AArch64::ExtensionInfo & AArch64::getExtensionByID(AArch64::ArchExtKind ExtID) { return lookupExtensionByID(ExtID); diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index e3802380d2beea..eda0c7f5da15ab 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -748,13 +748,13 @@ unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) { #ifndef NDEBUG // Check that priorities are set properly in the .def file. We expect that // "compat" features are assigned non-duplicate consecutive priorities - // starting from one (1, ..., 35) and multiple zeros. + // starting from one (1, ..., 37) and multiple zeros. #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) PRIORITY, unsigned Priorities[] = { #include "llvm/TargetParser/X86TargetParser.def" }; std::array HelperList; - const size_t MaxPriority = 35; + const size_t MaxPriority = 37; std::iota(HelperList.begin(), HelperList.begin() + MaxPriority + 1, 0); for (size_t i = MaxPriority + 1; i != std::size(Priorities); ++i) HelperList[i] = 0; diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index b58b906465e569..1cbf9c957f5c2b 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -568,13 +568,13 @@ class CallsiteContextGraph { /// unioning their recorded alloc types. uint8_t computeAllocType(DenseSet &ContextIds); - /// Returns the alloction type of the intersection of the contexts of two + /// Returns the allocation type of the intersection of the contexts of two /// nodes (based on their provided context id sets), optimized for the case /// when Node1Ids is smaller than Node2Ids. uint8_t intersectAllocTypesImpl(const DenseSet &Node1Ids, const DenseSet &Node2Ids); - /// Returns the alloction type of the intersection of the contexts of two + /// Returns the allocation type of the intersection of the contexts of two /// nodes (based on their provided context id sets). uint8_t intersectAllocTypes(const DenseSet &Node1Ids, const DenseSet &Node2Ids); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0632f3cfc6dd23..436cdbff756699 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2618,6 +2618,24 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } + // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0) + // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0) + Value *ExtSrc; + if (match(Exp, m_ZExt(m_Value(ExtSrc))) && + ExtSrc->getType()->getScalarSizeInBits() == 1) { + Value *Select = + Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0), + ConstantFP::get(II->getType(), 1.0)); + return BinaryOperator::CreateFMulFMF(Src, Select, II); + } + if (match(Exp, m_SExt(m_Value(ExtSrc))) && + ExtSrc->getType()->getScalarSizeInBits() == 1) { + Value *Select = + Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5), + ConstantFP::get(II->getType(), 1.0)); + return BinaryOperator::CreateFMulFMF(Src, Select, II); + } + break; } case Intrinsic::ptrauth_auth: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 3fce4a3c20b335..34b0f8b8604974 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5549,8 +5549,8 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) { } // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && - match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_And(m_Value(C), m_Value(D)))) { Value *X = nullptr, *Y = nullptr, *Z = nullptr; if (A == C) { @@ -5571,10 +5571,26 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) { Z = B; } - if (X) { // Build (X^Y) & Z - Op1 = Builder.CreateXor(X, Y); - Op1 = Builder.CreateAnd(Op1, Z); - return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType())); + if (X) { + // If X^Y is a negative power of two, then `icmp eq/ne (Z & NegP2), 0` + // will fold to `icmp ult/uge Z, -NegP2` incurringb no additional + // instructions. + const APInt *C0, *C1; + bool XorIsNegP2 = match(X, m_APInt(C0)) && match(Y, m_APInt(C1)) && + (*C0 ^ *C1).isNegatedPowerOf2(); + + // If either Op0/Op1 are both one use or X^Y will constant fold and one of + // Op0/Op1 are one use, proceed. In those cases we are instruction neutral + // but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B`. + int UseCnt = + int(Op0->hasOneUse()) + int(Op1->hasOneUse()) + + (int(match(X, m_ImmConstant()) && match(Y, m_ImmConstant()))); + if (XorIsNegP2 || UseCnt >= 2) { + // Build (X^Y) & Z + Op1 = Builder.CreateXor(X, Y); + Op1 = Builder.CreateAnd(Op1, Z); + return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType())); + } } } diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index aac57231ba2ed6..8a12fa19a3dedf 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -839,7 +839,7 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, }; // Now walk the instructions, looking up the associated profile data using - // dbug locations. + // debug locations. for (auto &BB : F) { for (auto &I : BB) { if (I.isDebugOrPseudoInst()) @@ -937,7 +937,7 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, // Add callsite metadata for the instruction's location list so that // it simpler later on to identify which part of the MIB contexts // are from this particular instruction (including during inlining, - // when the callsite metdata will be updated appropriately). + // when the callsite metadata will be updated appropriately). // FIXME: can this be changed to strip out the matching stack // context ids from the MIB contexts and not add any callsite // metadata here to save space? diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b352558a1c0d25..ffb4e5b94c20b6 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4481,15 +4481,25 @@ struct MemorySanitizerVisitor : public InstVisitor { } void visitSelectInst(SelectInst &I) { - IRBuilder<> IRB(&I); // a = select b, c, d Value *B = I.getCondition(); Value *C = I.getTrueValue(); Value *D = I.getFalseValue(); + + handleSelectLikeInst(I, B, C, D); + } + + void handleSelectLikeInst(Instruction &I, Value *B, Value *C, Value *D) { + IRBuilder<> IRB(&I); + Value *Sb = getShadow(B); Value *Sc = getShadow(C); Value *Sd = getShadow(D); + Value *Ob = MS.TrackOrigins ? getOrigin(B) : nullptr; + Value *Oc = MS.TrackOrigins ? getOrigin(C) : nullptr; + Value *Od = MS.TrackOrigins ? getOrigin(D) : nullptr; + // Result shadow if condition shadow is 0. Value *Sa0 = IRB.CreateSelect(B, Sc, Sd); Value *Sa1; @@ -4522,10 +4532,7 @@ struct MemorySanitizerVisitor : public InstVisitor { } // a = select b, c, d // Oa = Sb ? Ob : (b ? Oc : Od) - setOrigin( - &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()), - IRB.CreateSelect(B, getOrigin(I.getTrueValue()), - getOrigin(I.getFalseValue())))); + setOrigin(&I, IRB.CreateSelect(Sb, Ob, IRB.CreateSelect(B, Oc, Od))); } } diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 73ed611e8de8c4..3a98e257367b25 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1256,7 +1256,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - Instruction *Fixup = nullptr); + Instruction *Fixup = nullptr, + int64_t ScalableOffset = 0); static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) { if (isa(Reg) || isa(Reg)) @@ -1675,16 +1676,18 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - Instruction *Fixup/*= nullptr*/) { + Instruction *Fixup /* = nullptr */, + int64_t ScalableOffset) { switch (Kind) { case LSRUse::Address: return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset, - HasBaseReg, Scale, AccessTy.AddrSpace, Fixup); + HasBaseReg, Scale, AccessTy.AddrSpace, + Fixup, ScalableOffset); case LSRUse::ICmpZero: // There's not even a target hook for querying whether it would be legal to // fold a GV into an ICmp. - if (BaseGV) + if (BaseGV || ScalableOffset != 0) return false; // ICmp only has two operands; don't allow more than two non-trivial parts. @@ -1715,11 +1718,12 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, case LSRUse::Basic: // Only handle single-register values. - return !BaseGV && Scale == 0 && BaseOffset == 0; + return !BaseGV && Scale == 0 && BaseOffset == 0 && ScalableOffset == 0; case LSRUse::Special: // Special case Basic to handle -1 scales. - return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0; + return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0 && + ScalableOffset == 0; } llvm_unreachable("Invalid LSRUse Kind!"); @@ -1843,7 +1847,7 @@ static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg) { + bool HasBaseReg, int64_t ScalableOffset = 0) { // Fast-path: zero is always foldable. if (BaseOffset == 0 && !BaseGV) return true; @@ -1859,7 +1863,7 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, } return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset, - HasBaseReg, Scale); + HasBaseReg, Scale, nullptr, ScalableOffset); } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, @@ -3165,16 +3169,30 @@ void LSRInstance::FinalizeChain(IVChain &Chain) { static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI) { const SCEVConstant *IncConst = dyn_cast(IncExpr); - if (!IncConst || !isAddressUse(TTI, UserInst, Operand)) - return false; + int64_t IncOffset = 0; + int64_t ScalableOffset = 0; + if (IncConst) { + if (IncConst && IncConst->getAPInt().getSignificantBits() > 64) + return false; + IncOffset = IncConst->getValue()->getSExtValue(); + } else { + // Look for mul(vscale, constant), to detect ScalableOffset. + auto *IncVScale = dyn_cast(IncExpr); + if (!IncVScale || IncVScale->getNumOperands() != 2 || + !isa(IncVScale->getOperand(1))) + return false; + auto *Scale = dyn_cast(IncVScale->getOperand(0)); + if (!Scale || Scale->getType()->getScalarSizeInBits() > 64) + return false; + ScalableOffset = Scale->getValue()->getSExtValue(); + } - if (IncConst->getAPInt().getSignificantBits() > 64) + if (!isAddressUse(TTI, UserInst, Operand)) return false; MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand); - int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr, - IncOffset, /*HasBaseReg=*/false)) + IncOffset, /*HasBaseReg=*/false, ScalableOffset)) return false; return true; @@ -3220,6 +3238,10 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, Type *IVTy = IVSrc->getType(); Type *IntTy = SE.getEffectiveSCEVType(IVTy); const SCEV *LeftOverExpr = nullptr; + const SCEV *Accum = SE.getZero(IntTy); + SmallVector> Bases; + Bases.emplace_back(Accum, IVSrc); + for (const IVInc &Inc : Chain) { Instruction *InsertPt = Inc.UserInst; if (isa(InsertPt)) @@ -3232,10 +3254,31 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, // IncExpr was the result of subtraction of two narrow values, so must // be signed. const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy); + Accum = SE.getAddExpr(Accum, IncExpr); LeftOverExpr = LeftOverExpr ? SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr; } - if (LeftOverExpr && !LeftOverExpr->isZero()) { + + // Look through each base to see if any can produce a nice addressing mode. + bool FoundBase = false; + for (auto [MapScev, MapIVOper] : reverse(Bases)) { + const SCEV *Remainder = SE.getMinusSCEV(Accum, MapScev); + if (canFoldIVIncExpr(Remainder, Inc.UserInst, Inc.IVOperand, TTI)) { + if (!Remainder->isZero()) { + Rewriter.clearPostInc(); + Value *IncV = Rewriter.expandCodeFor(Remainder, IntTy, InsertPt); + const SCEV *IVOperExpr = + SE.getAddExpr(SE.getUnknown(MapIVOper), SE.getUnknown(IncV)); + IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt); + } else { + IVOper = MapIVOper; + } + + FoundBase = true; + break; + } + } + if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) { // Expand the IV increment. Rewriter.clearPostInc(); Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt); @@ -3246,6 +3289,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, // If an IV increment can't be folded, use it as the next IV value. if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) { assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); + Bases.emplace_back(Accum, IVOper); IVSrc = IVOper; LeftOverExpr = nullptr; } diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f2672b8e9118f5..b2775eb6c6c7a4 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -954,6 +954,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::ShadowCallStack: case Attribute::SanitizeAddress: case Attribute::SanitizeMemory: + case Attribute::SanitizeNumericalStability: case Attribute::SanitizeThread: case Attribute::SanitizeHWAddress: case Attribute::SanitizeMemTag: diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index 6220f850930969..9fe655e548c22c 100644 --- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -467,7 +467,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst"); MDNode *BranchWeights = - MDBuilder(CI->getContext()).createBranchWeights(1, 2000); + MDBuilder(CI->getContext()).createUnlikelyBranchWeights(); Instruction *NewInst = SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, &DTU); diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 09e6739fa9533c..c437a44dda8d30 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -28,7 +28,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS #define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X) #else #define SCEV_DEBUG_WITH_TYPE(TYPE, X) diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index eb1224abf00e29..a91c3ff9306160 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -2376,7 +2376,13 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { hasFloatVersion(M, Name)) Ret = optimizeUnaryDoubleFP(CI, B, TLI, true); - const bool UseIntrinsic = CI->doesNotAccessMemory(); + // If we have an llvm.exp2 intrinsic, emit the llvm.ldexp intrinsic. If we + // have the libcall, emit the libcall. + // + // TODO: In principle we should be able to just always use the intrinsic for + // any doesNotAccessMemory callsite. + + const bool UseIntrinsic = Callee->isIntrinsic(); // Bail out for vectors because the code below only expects scalars. Type *Ty = CI->getType(); if (!UseIntrinsic && Ty->isVectorTy()) @@ -2386,12 +2392,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { // exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize Value *Op = CI->getArgOperand(0); if ((isa(Op) || isa(Op)) && - hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + (UseIntrinsic || + hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl))) { if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) { Constant *One = ConstantFP::get(Ty, 1.0); - // TODO: Emitting the intrinsic should not depend on whether the libcall - // is available. if (UseIntrinsic) { return copyFlags(*CI, B.CreateIntrinsic(Intrinsic::ldexp, {Ty, Exp->getType()}, diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 90bbf2d5d99faf..eca5d1d4c5e1de 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -9,6 +9,7 @@ #include "VPlanAnalysis.h" #include "VPlan.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/IR/Instruction.h" using namespace llvm; @@ -26,7 +27,24 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) { } Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { - switch (R->getOpcode()) { + // Set the result type from the first operand, check if the types for all + // other operands match and cache them. + auto SetResultTyFromOp = [this, R]() { + Type *ResTy = inferScalarType(R->getOperand(0)); + for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) { + VPValue *OtherV = R->getOperand(Op); + assert(inferScalarType(OtherV) == ResTy && + "different types inferred for different operands"); + CachedTypes[OtherV] = ResTy; + } + return ResTy; + }; + + unsigned Opcode = R->getOpcode(); + if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) + return SetResultTyFromOp(); + + switch (Opcode) { case Instruction::Select: { Type *ResTy = inferScalarType(R->getOperand(1)); VPValue *OtherV = R->getOperand(2); @@ -35,28 +53,16 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { CachedTypes[OtherV] = ResTy; return ResTy; } - case Instruction::Or: case Instruction::ICmp: - case VPInstruction::FirstOrderRecurrenceSplice: { - Type *ResTy = inferScalarType(R->getOperand(0)); - VPValue *OtherV = R->getOperand(1); - assert(inferScalarType(OtherV) == ResTy && - "different types inferred for different operands"); - CachedTypes[OtherV] = ResTy; - return ResTy; - } + case VPInstruction::FirstOrderRecurrenceSplice: + case VPInstruction::Not: + return SetResultTyFromOp(); case VPInstruction::ExtractFromEnd: { Type *BaseTy = inferScalarType(R->getOperand(0)); if (auto *VecTy = dyn_cast(BaseTy)) return VecTy->getElementType(); return BaseTy; } - case VPInstruction::Not: { - Type *ResTy = inferScalarType(R->getOperand(0)); - assert(IntegerType::get(Ctx, 1) == ResTy && - "unexpected scalar type inferred for operand"); - return ResTy; - } case VPInstruction::LogicalAnd: return IntegerType::get(Ctx, 1); case VPInstruction::PtrAdd: diff --git a/llvm/test/Analysis/CostModel/X86/abs-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/abs-sizelatency.ll index 4498d499745aa8..5d41228fdce186 100644 --- a/llvm/test/Analysis/CostModel/X86/abs-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/abs-sizelatency.ll @@ -14,42 +14,42 @@ define void @cost_abs_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'cost_abs_i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 false) ; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 false) ; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 false) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'cost_abs_i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'cost_abs_i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'cost_abs_i64' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'cost_abs_i64' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'cost_abs_i64' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 false) @@ -64,56 +64,56 @@ define void @cost_abs_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> define void @cost_abs_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE2-LABEL: 'cost_abs_i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'cost_abs_i32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'cost_abs_i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'cost_abs_i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'cost_abs_i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'cost_abs_i32' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'cost_abs_i32' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'cost_abs_i32' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 false) @@ -128,56 +128,56 @@ define void @cost_abs_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> define void @cost_abs_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE2-LABEL: 'cost_abs_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'cost_abs_i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'cost_abs_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'cost_abs_i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'cost_abs_i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'cost_abs_i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'cost_abs_i16' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'cost_abs_i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 false) @@ -192,56 +192,56 @@ define void @cost_abs_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16 define void @cost_abs_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE2-LABEL: 'cost_abs_i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'cost_abs_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 false) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'cost_abs_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'cost_abs_i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'cost_abs_i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'cost_abs_i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'cost_abs_i8' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 false) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'cost_abs_i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 false) @@ -260,42 +260,42 @@ define void @cost_abs_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a5 define void @cost_abs_i64_poison(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'cost_abs_i64_poison' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 true) ; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 true) ; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 true) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'cost_abs_i64_poison' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'cost_abs_i64_poison' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'cost_abs_i64_poison' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'cost_abs_i64_poison' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'cost_abs_i64_poison' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.abs.i64(i64 %a64, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a128, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a256, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %a512, i1 true) @@ -310,56 +310,56 @@ define void @cost_abs_i64_poison(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 define void @cost_abs_i32_poison(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE2-LABEL: 'cost_abs_i32_poison' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'cost_abs_i32_poison' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'cost_abs_i32_poison' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'cost_abs_i32_poison' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'cost_abs_i32_poison' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'cost_abs_i32_poison' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'cost_abs_i32_poison' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'cost_abs_i32_poison' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 %a32, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a128, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a256, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %a512, i1 true) @@ -374,56 +374,56 @@ define void @cost_abs_i32_poison(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 define void @cost_abs_i16_poison(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE2-LABEL: 'cost_abs_i16_poison' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'cost_abs_i16_poison' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'cost_abs_i16_poison' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'cost_abs_i16_poison' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'cost_abs_i16_poison' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'cost_abs_i16_poison' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'cost_abs_i16_poison' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'cost_abs_i16_poison' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 %a16, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a128, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a256, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %a512, i1 true) @@ -438,56 +438,56 @@ define void @cost_abs_i16_poison(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 define void @cost_abs_i8_poison(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE2-LABEL: 'cost_abs_i8_poison' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'cost_abs_i8_poison' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 true) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'cost_abs_i8_poison' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'cost_abs_i8_poison' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'cost_abs_i8_poison' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'cost_abs_i8_poison' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'cost_abs_i8_poison' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 true) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'cost_abs_i8_poison' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 %a8, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a128, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a256, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %a512, i1 true) diff --git a/llvm/test/Analysis/StackSafetyAnalysis/ipa-alias.ll b/llvm/test/Analysis/StackSafetyAnalysis/ipa-alias.ll index a4846dade4dbd5..7d2b2a3b25d573 100644 --- a/llvm/test/Analysis/StackSafetyAnalysis/ipa-alias.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/ipa-alias.ll @@ -1,5 +1,4 @@ ; REQUIRES: aarch64-registered-target -; REQUIRES: shell ; Test IPA over a single combined file ; RUN: llvm-as %s -o %t0.bc @@ -14,27 +13,27 @@ ; RUN: opt -module-summary %s -o %t.summ0.bc ; RUN: opt -module-summary %S/Inputs/ipa-alias.ll -o %t.summ1.bc -; RUN: echo > %t.res.txt \ -; RUN: -r %t.summ0.bc,AliasCall,px \ -; RUN: -r %t.summ0.bc,AliasToBitcastAliasWrite1, \ -; RUN: -r %t.summ0.bc,AliasToPreemptableAliasWrite1, \ -; RUN: -r %t.summ0.bc,AliasWrite1, \ -; RUN: -r %t.summ0.bc,BitcastAliasCall,px \ -; RUN: -r %t.summ0.bc,BitcastAliasWrite1, \ -; RUN: -r %t.summ0.bc,InterposableAliasCall,px \ -; RUN: -r %t.summ0.bc,InterposableAliasWrite1, \ -; RUN: -r %t.summ0.bc,PreemptableAliasCall,px \ -; RUN: -r %t.summ0.bc,PreemptableAliasWrite1, \ -; RUN: -r %t.summ1.bc,AliasToBitcastAliasWrite1,px \ -; RUN: -r %t.summ1.bc,AliasToPreemptableAliasWrite1,px \ -; RUN: -r %t.summ1.bc,AliasWrite1,px \ -; RUN: -r %t.summ1.bc,BitcastAliasWrite1,px \ -; RUN: -r %t.summ1.bc,InterposableAliasWrite1,px \ -; RUN: -r %t.summ1.bc,PreemptableAliasWrite1,px \ -; RUN: -r %t.summ1.bc,Write1,px +; DEFINE: %{res} = \ +; DEFINE: -r %t.summ0.bc,AliasCall,px \ +; DEFINE: -r %t.summ0.bc,AliasToBitcastAliasWrite1, \ +; DEFINE: -r %t.summ0.bc,AliasToPreemptableAliasWrite1, \ +; DEFINE: -r %t.summ0.bc,AliasWrite1, \ +; DEFINE: -r %t.summ0.bc,BitcastAliasCall,px \ +; DEFINE: -r %t.summ0.bc,BitcastAliasWrite1, \ +; DEFINE: -r %t.summ0.bc,InterposableAliasCall,px \ +; DEFINE: -r %t.summ0.bc,InterposableAliasWrite1, \ +; DEFINE: -r %t.summ0.bc,PreemptableAliasCall,px \ +; DEFINE: -r %t.summ0.bc,PreemptableAliasWrite1, \ +; DEFINE: -r %t.summ1.bc,AliasToBitcastAliasWrite1,px \ +; DEFINE: -r %t.summ1.bc,AliasToPreemptableAliasWrite1,px \ +; DEFINE: -r %t.summ1.bc,AliasWrite1,px \ +; DEFINE: -r %t.summ1.bc,BitcastAliasWrite1,px \ +; DEFINE: -r %t.summ1.bc,InterposableAliasWrite1,px \ +; DEFINE: -r %t.summ1.bc,PreemptableAliasWrite1,px \ +; DEFINE: -r %t.summ1.bc,Write1,px ; RUN: llvm-lto2 run %t.summ0.bc %t.summ1.bc -o %t.lto -stack-safety-print -stack-safety-run -save-temps -thinlto-threads 1 -O0 \ -; RUN: $(cat %t.res.txt) \ +; RUN: %{res} \ ; RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK,GLOBAL,LTO target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Analysis/StackSafetyAnalysis/ipa.ll b/llvm/test/Analysis/StackSafetyAnalysis/ipa.ll index 8de56adc2bbd1c..0e2a08f3ae8df7 100644 --- a/llvm/test/Analysis/StackSafetyAnalysis/ipa.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/ipa.ll @@ -1,5 +1,4 @@ ; REQUIRES: aarch64-registered-target -; REQUIRES: shell ; RUN: llvm-as %s -o %t0.bc ; RUN: llvm-as %S/Inputs/ipa.ll -o %t1.bc @@ -17,78 +16,79 @@ ; RUN: llvm-dis %t.summ0.bc -o - > %t.ids.txt ; RUN: llvm-dis %t.summ1.bc -o - >> %t.ids.txt -; RUN: echo > %t.res.txt \ -; RUN: -r %t.summ0.bc,ExternalCall, \ -; RUN: -r %t.summ0.bc,f1,px \ -; RUN: -r %t.summ0.bc,f2,px \ -; RUN: -r %t.summ0.bc,f3,px \ -; RUN: -r %t.summ0.bc,f4,px \ -; RUN: -r %t.summ0.bc,f5,px \ -; RUN: -r %t.summ0.bc,f6,px \ -; RUN: -r %t.summ0.bc,f7,px \ -; RUN: -r %t.summ0.bc,f8left,px \ -; RUN: -r %t.summ0.bc,f8oobleft,px \ -; RUN: -r %t.summ0.bc,f8oobright,px \ -; RUN: -r %t.summ0.bc,f8right,px \ -; RUN: -r %t.summ0.bc,InterposableCall,px \ -; RUN: -r %t.summ0.bc,InterposableWrite1, \ -; RUN: -r %t.summ0.bc,PreemptableCall,px \ -; RUN: -r %t.summ0.bc,PreemptableWrite1, \ -; RUN: -r %t.summ0.bc,PrivateCall,px \ -; RUN: -r %t.summ0.bc,Rec2, \ -; RUN: -r %t.summ0.bc,RecursiveNoOffset, \ -; RUN: -r %t.summ0.bc,RecursiveWithOffset, \ -; RUN: -r %t.summ0.bc,ReturnDependent, \ -; RUN: -r %t.summ0.bc,TestCrossModuleConflict,px \ -; RUN: -r %t.summ0.bc,TestCrossModuleOnce,px \ -; RUN: -r %t.summ0.bc,TestCrossModuleTwice,px \ -; RUN: -r %t.summ0.bc,TestCrossModuleWeak,px \ -; RUN: -r %t.summ0.bc,TestRecursiveNoOffset,px \ -; RUN: -r %t.summ0.bc,TestRecursiveWithOffset,px \ -; RUN: -r %t.summ0.bc,TestUpdateArg,px \ -; RUN: -r %t.summ0.bc,TwoArguments,px \ -; RUN: -r %t.summ0.bc,TwoArgumentsOOBBoth,px \ -; RUN: -r %t.summ0.bc,TwoArgumentsOOBOne,px \ -; RUN: -r %t.summ0.bc,TwoArgumentsOOBOther,px \ -; RUN: -r %t.summ0.bc,Weak,x \ -; RUN: -r %t.summ0.bc,Write1, \ -; RUN: -r %t.summ0.bc,Write1DiffModule,x \ -; RUN: -r %t.summ0.bc,Write1Module0,px \ -; RUN: -r %t.summ0.bc,Write1Private,x \ -; RUN: -r %t.summ0.bc,Write1SameModule,x \ -; RUN: -r %t.summ0.bc,Write1Weak,x \ -; RUN: -r %t.summ0.bc,Write4_2, \ -; RUN: -r %t.summ0.bc,Write4, \ -; RUN: -r %t.summ0.bc,Write8, \ -; RUN: -r %t.summ0.bc,WriteAndReturn8, \ -; RUN: -r %t.summ1.bc,ExternalCall,px \ -; RUN: -r %t.summ1.bc,InterposableWrite1,px \ -; RUN: -r %t.summ1.bc,PreemptableWrite1,px \ -; RUN: -r %t.summ1.bc,Rec0,px \ -; RUN: -r %t.summ1.bc,Rec1,px \ -; RUN: -r %t.summ1.bc,Rec2,px \ -; RUN: -r %t.summ1.bc,RecursiveNoOffset,px \ -; RUN: -r %t.summ1.bc,RecursiveWithOffset,px \ -; RUN: -r %t.summ1.bc,ReturnAlloca,px \ -; RUN: -r %t.summ1.bc,ReturnDependent,px \ -; RUN: -r %t.summ1.bc,Weak,x \ -; RUN: -r %t.summ1.bc,Write1,px \ -; RUN: -r %t.summ1.bc,Write1DiffModule,px \ -; RUN: -r %t.summ1.bc,Write1Module0,x \ -; RUN: -r %t.summ1.bc,Write1Private,px \ -; RUN: -r %t.summ1.bc,Write1SameModule,px \ -; RUN: -r %t.summ1.bc,Write1Weak,px \ -; RUN: -r %t.summ1.bc,Write4_2,px \ -; RUN: -r %t.summ1.bc,Write4,px \ -; RUN: -r %t.summ1.bc,Write8,px \ -; RUN: -r %t.summ1.bc,WriteAndReturn8,px +; DEFINE: %{res} = \ +; DEFINE: -r %t.summ0.bc,ExternalCall, \ +; DEFINE: -r %t.summ0.bc,f1,px \ +; DEFINE: -r %t.summ0.bc,f2,px \ +; DEFINE: -r %t.summ0.bc,f3,px \ +; DEFINE: -r %t.summ0.bc,f4,px \ +; DEFINE: -r %t.summ0.bc,f5,px \ +; DEFINE: -r %t.summ0.bc,f6,px \ +; DEFINE: -r %t.summ0.bc,f7,px \ +; DEFINE: -r %t.summ0.bc,f8left,px \ +; DEFINE: -r %t.summ0.bc,f8oobleft,px \ +; DEFINE: -r %t.summ0.bc,f8oobright,px \ +; DEFINE: -r %t.summ0.bc,f8right,px \ +; DEFINE: -r %t.summ0.bc,InterposableCall,px \ +; DEFINE: -r %t.summ0.bc,InterposableWrite1, \ +; DEFINE: -r %t.summ0.bc,PreemptableCall,px \ +; DEFINE: -r %t.summ0.bc,PreemptableWrite1, \ +; DEFINE: -r %t.summ0.bc,PrivateCall,px \ +; DEFINE: -r %t.summ0.bc,Rec2, \ +; DEFINE: -r %t.summ0.bc,RecursiveNoOffset, \ +; DEFINE: -r %t.summ0.bc,RecursiveWithOffset, \ +; DEFINE: -r %t.summ0.bc,ReturnDependent, \ +; DEFINE: -r %t.summ0.bc,TestCrossModuleConflict,px \ +; DEFINE: -r %t.summ0.bc,TestCrossModuleOnce,px \ +; DEFINE: -r %t.summ0.bc,TestCrossModuleTwice,px \ +; DEFINE: -r %t.summ0.bc,TestCrossModuleWeak,px \ +; DEFINE: -r %t.summ0.bc,TestRecursiveNoOffset,px \ +; DEFINE: -r %t.summ0.bc,TestRecursiveWithOffset,px \ +; DEFINE: -r %t.summ0.bc,TestUpdateArg,px \ +; DEFINE: -r %t.summ0.bc,TwoArguments,px \ +; DEFINE: -r %t.summ0.bc,TwoArgumentsOOBBoth,px \ +; DEFINE: -r %t.summ0.bc,TwoArgumentsOOBOne,px \ +; DEFINE: -r %t.summ0.bc,TwoArgumentsOOBOther,px \ +; DEFINE: -r %t.summ0.bc,Weak,x \ +; DEFINE: -r %t.summ0.bc,Write1, \ +; DEFINE: -r %t.summ0.bc,Write1DiffModule,x \ +; DEFINE: -r %t.summ0.bc,Write1Module0,px \ +; DEFINE: -r %t.summ0.bc,Write1Private,x \ +; DEFINE: -r %t.summ0.bc,Write1SameModule,x \ +; DEFINE: -r %t.summ0.bc,Write1Weak,x \ +; DEFINE: -r %t.summ0.bc,Write4_2, \ +; DEFINE: -r %t.summ0.bc,Write4, \ +; DEFINE: -r %t.summ0.bc,Write8, \ +; DEFINE: -r %t.summ0.bc,WriteAndReturn8, \ +; DEFINE: -r %t.summ1.bc,ExternalCall,px \ +; DEFINE: -r %t.summ1.bc,InterposableWrite1,px \ +; DEFINE: -r %t.summ1.bc,PreemptableWrite1,px \ +; DEFINE: -r %t.summ1.bc,Rec0,px \ +; DEFINE: -r %t.summ1.bc,Rec1,px \ +; DEFINE: -r %t.summ1.bc,Rec2,px \ +; DEFINE: -r %t.summ1.bc,RecursiveNoOffset,px \ +; DEFINE: -r %t.summ1.bc,RecursiveWithOffset,px \ +; DEFINE: -r %t.summ1.bc,ReturnAlloca,px \ +; DEFINE: -r %t.summ1.bc,ReturnDependent,px \ +; DEFINE: -r %t.summ1.bc,Weak,x \ +; DEFINE: -r %t.summ1.bc,Write1,px \ +; DEFINE: -r %t.summ1.bc,Write1DiffModule,px \ +; DEFINE: -r %t.summ1.bc,Write1Module0,x \ +; DEFINE: -r %t.summ1.bc,Write1Private,px \ +; DEFINE: -r %t.summ1.bc,Write1SameModule,px \ +; DEFINE: -r %t.summ1.bc,Write1Weak,px \ +; DEFINE: -r %t.summ1.bc,Write4_2,px \ +; DEFINE: -r %t.summ1.bc,Write4,px \ +; DEFINE: -r %t.summ1.bc,Write8,px \ +; DEFINE: -r %t.summ1.bc,WriteAndReturn8,px ; RUN: llvm-lto2 run %t.summ0.bc %t.summ1.bc -o %t.lto -stack-safety-print -stack-safety-run -save-temps -thinlto-threads 1 -O0 \ -; RUN: $(cat %t.res.txt) \ +; RUN: %{res} \ ; RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK,GLOBAL,LTO -; RUN: llvm-lto2 run %t.summ0.bc %t.summ1.bc -o %t.lto -stack-safety-run -thinlto-distributed-indexes -thinlto-threads 1 -O0 $(cat %t.res.txt) -; RUN: (cat %t.ids.txt ; llvm-dis %t.summ1.bc.thinlto.bc -o -) | FileCheck --check-prefixes=INDEX %s +; RUN: llvm-lto2 run %t.summ0.bc %t.summ1.bc -o %t.lto -stack-safety-run -thinlto-distributed-indexes -thinlto-threads 1 -O0 %{res} +; RUN: llvm-dis %t.summ1.bc.thinlto.bc -o - >> %t.ids.txt +; RUN: FileCheck --check-prefixes=INDEX %s < %t.ids.txt target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux" diff --git a/llvm/test/Assembler/fp-intrinsics-attr.ll b/llvm/test/Assembler/fp-intrinsics-attr.ll index 6546d1a275c99f..613630e1a2b4d2 100644 --- a/llvm/test/Assembler/fp-intrinsics-attr.ll +++ b/llvm/test/Assembler/fp-intrinsics-attr.ll @@ -85,6 +85,11 @@ define void @func(double %a, double %b, double %c, i32 %i) strictfp { metadata !"round.dynamic", metadata !"fpexcept.strict") + %tan = call double @llvm.experimental.constrained.tan.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %pow = call double @llvm.experimental.constrained.pow.f64( double %a, double %b, metadata !"round.dynamic", @@ -244,6 +249,9 @@ declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.cos.f64({{.*}}) #[[ATTR1]] +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.tan.f64({{.*}}) #[[ATTR1]] + declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.pow.f64({{.*}}) #[[ATTR1]] diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index 2a846e036924c7..e437c37d8d1c87 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1564,7 +1564,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #51 + ; CHECK: call void @f.nobuiltin() #52 call fastcc noalias ptr @f.noalias() noinline ; CHECK: call fastcc noalias ptr @f.noalias() #12 @@ -1988,6 +1988,8 @@ declare void @f.nosanitize_bounds() nosanitize_bounds declare void @f.allockind() allockind("alloc,uninitialized") ; CHECK: declare void @f.allockind() #50 +declare void @f.sanitize_numerical_stability() sanitize_numerical_stability +; CHECK: declare void @f.sanitize_numerical_stability() #51 ; CHECK: declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) @@ -2110,7 +2112,8 @@ define float @nofpclass_callsites(float %arg) { ; CHECK: attributes #48 = { allocsize(1,0) } ; CHECK: attributes #49 = { nosanitize_bounds } ; CHECK: attributes #50 = { allockind("alloc,uninitialized") } -; CHECK: attributes #51 = { builtin } +; CHECK: attributes #51 = { sanitize_numerical_stability } +; CHECK: attributes #52 = { builtin } ;; Metadata diff --git a/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll b/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll index 4c0d1efb99498f..410c2d9021d6d5 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll @@ -28,14 +28,12 @@ entry: define <16 x i32> @mul_i32(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SD-LABEL: mul_i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushll v2.8h, v0.8b, #0 -; CHECK-SD-NEXT: ushll v4.8h, v1.8b, #0 -; CHECK-SD-NEXT: ushll2 v5.8h, v0.16b, #0 -; CHECK-SD-NEXT: ushll2 v6.8h, v1.16b, #0 -; CHECK-SD-NEXT: umull v0.4s, v2.4h, v4.4h -; CHECK-SD-NEXT: umull2 v1.4s, v2.8h, v4.8h -; CHECK-SD-NEXT: umull2 v3.4s, v5.8h, v6.8h -; CHECK-SD-NEXT: umull v2.4s, v5.4h, v6.4h +; CHECK-SD-NEXT: umull v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: umull2 v4.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ushll v0.4s, v2.4h, #0 +; CHECK-SD-NEXT: ushll2 v3.4s, v4.8h, #0 +; CHECK-SD-NEXT: ushll2 v1.4s, v2.8h, #0 +; CHECK-SD-NEXT: ushll v2.4s, v4.4h, #0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: mul_i32: @@ -59,26 +57,20 @@ entry: define <16 x i64> @mul_i64(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SD-LABEL: mul_i64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushll v2.8h, v0.8b, #0 -; CHECK-SD-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-SD-NEXT: ushll v3.8h, v1.8b, #0 -; CHECK-SD-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-SD-NEXT: ushll v4.4s, v2.4h, #0 -; CHECK-SD-NEXT: ushll v5.4s, v0.4h, #0 -; CHECK-SD-NEXT: ushll v6.4s, v3.4h, #0 +; CHECK-SD-NEXT: umull v2.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: umull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ushll v3.4s, v2.4h, #0 ; CHECK-SD-NEXT: ushll2 v2.4s, v2.8h, #0 -; CHECK-SD-NEXT: ushll v16.4s, v1.4h, #0 -; CHECK-SD-NEXT: ushll2 v7.4s, v3.8h, #0 -; CHECK-SD-NEXT: ushll2 v17.4s, v0.8h, #0 -; CHECK-SD-NEXT: ushll2 v18.4s, v1.8h, #0 -; CHECK-SD-NEXT: umull2 v1.2d, v4.4s, v6.4s -; CHECK-SD-NEXT: umull v0.2d, v4.2s, v6.2s -; CHECK-SD-NEXT: umull2 v3.2d, v2.4s, v7.4s -; CHECK-SD-NEXT: umull v2.2d, v2.2s, v7.2s -; CHECK-SD-NEXT: umull v4.2d, v5.2s, v16.2s -; CHECK-SD-NEXT: umull2 v7.2d, v17.4s, v18.4s -; CHECK-SD-NEXT: umull2 v5.2d, v5.4s, v16.4s -; CHECK-SD-NEXT: umull v6.2d, v17.2s, v18.2s +; CHECK-SD-NEXT: ushll v5.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v6.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v3.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v3.2s, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll v4.2d, v5.2s, #0 +; CHECK-SD-NEXT: ushll2 v7.2d, v6.4s, #0 +; CHECK-SD-NEXT: ushll2 v5.2d, v5.4s, #0 +; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: mul_i64: @@ -139,17 +131,12 @@ entry: define <16 x i32> @mla_i32(<16 x i8> %a, <16 x i8> %b, <16 x i32> %c) { ; CHECK-SD-LABEL: mla_i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushll v6.8h, v0.8b, #0 -; CHECK-SD-NEXT: ushll v7.8h, v1.8b, #0 -; CHECK-SD-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-SD-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-SD-NEXT: umlal v2.4s, v6.4h, v7.4h -; CHECK-SD-NEXT: umlal2 v3.4s, v6.8h, v7.8h -; CHECK-SD-NEXT: umlal2 v5.4s, v0.8h, v1.8h -; CHECK-SD-NEXT: umlal v4.4s, v0.4h, v1.4h -; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: mov v1.16b, v3.16b -; CHECK-SD-NEXT: mov v2.16b, v4.16b +; CHECK-SD-NEXT: umull2 v7.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: umull v6.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: uaddw2 v5.4s, v5.4s, v7.8h +; CHECK-SD-NEXT: uaddw v0.4s, v2.4s, v6.4h +; CHECK-SD-NEXT: uaddw2 v1.4s, v3.4s, v6.8h +; CHECK-SD-NEXT: uaddw v2.4s, v4.4s, v7.4h ; CHECK-SD-NEXT: mov v3.16b, v5.16b ; CHECK-SD-NEXT: ret ; @@ -179,35 +166,22 @@ entry: define <16 x i64> @mla_i64(<16 x i8> %a, <16 x i8> %b, <16 x i64> %c) { ; CHECK-SD-LABEL: mla_i64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov v17.16b, v7.16b -; CHECK-SD-NEXT: mov v16.16b, v6.16b -; CHECK-SD-NEXT: ushll v6.8h, v0.8b, #0 -; CHECK-SD-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-SD-NEXT: ushll v7.8h, v1.8b, #0 -; CHECK-SD-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-SD-NEXT: ushll v18.4s, v6.4h, #0 -; CHECK-SD-NEXT: ushll2 v21.4s, v6.8h, #0 -; CHECK-SD-NEXT: ushll v19.4s, v0.4h, #0 -; CHECK-SD-NEXT: ushll v20.4s, v7.4h, #0 -; CHECK-SD-NEXT: ushll v22.4s, v1.4h, #0 -; CHECK-SD-NEXT: ushll2 v23.4s, v7.8h, #0 -; CHECK-SD-NEXT: ldp q6, q7, [sp] -; CHECK-SD-NEXT: ushll2 v0.4s, v0.8h, #0 -; CHECK-SD-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-SD-NEXT: umlal2 v3.2d, v18.4s, v20.4s -; CHECK-SD-NEXT: umlal v2.2d, v18.2s, v20.2s -; CHECK-SD-NEXT: umlal v16.2d, v19.2s, v22.2s -; CHECK-SD-NEXT: umlal2 v5.2d, v21.4s, v23.4s -; CHECK-SD-NEXT: umlal v4.2d, v21.2s, v23.2s -; CHECK-SD-NEXT: umlal2 v17.2d, v19.4s, v22.4s -; CHECK-SD-NEXT: umlal2 v7.2d, v0.4s, v1.4s -; CHECK-SD-NEXT: umlal v6.2d, v0.2s, v1.2s -; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: mov v1.16b, v3.16b -; CHECK-SD-NEXT: mov v2.16b, v4.16b -; CHECK-SD-NEXT: mov v3.16b, v5.16b -; CHECK-SD-NEXT: mov v4.16b, v16.16b -; CHECK-SD-NEXT: mov v5.16b, v17.16b +; CHECK-SD-NEXT: umull v16.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: umull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ldp q20, q21, [sp] +; CHECK-SD-NEXT: ushll v17.4s, v16.4h, #0 +; CHECK-SD-NEXT: ushll2 v16.4s, v16.8h, #0 +; CHECK-SD-NEXT: ushll2 v19.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v18.4s, v0.4h, #0 +; CHECK-SD-NEXT: uaddw2 v1.2d, v3.2d, v17.4s +; CHECK-SD-NEXT: uaddw v0.2d, v2.2d, v17.2s +; CHECK-SD-NEXT: uaddw2 v3.2d, v5.2d, v16.4s +; CHECK-SD-NEXT: uaddw v2.2d, v4.2d, v16.2s +; CHECK-SD-NEXT: uaddw2 v16.2d, v21.2d, v19.4s +; CHECK-SD-NEXT: uaddw v4.2d, v6.2d, v18.2s +; CHECK-SD-NEXT: uaddw2 v5.2d, v7.2d, v18.4s +; CHECK-SD-NEXT: uaddw v6.2d, v20.2d, v19.2s +; CHECK-SD-NEXT: mov v7.16b, v16.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: mla_i64: diff --git a/llvm/test/CodeGen/AArch64/addp-shuffle.ll b/llvm/test/CodeGen/AArch64/addp-shuffle.ll index 7cc5041d59f49e..fb96d11acc275a 100644 --- a/llvm/test/CodeGen/AArch64/addp-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/addp-shuffle.ll @@ -136,15 +136,13 @@ define <4 x double> @deinterleave_shuffle_v8f64(<8 x double> %a) { define <4 x i32> @udot(<4 x i32> %z, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: udot: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v3.8h, v1.8b, #0 -; CHECK-NEXT: ushll v4.8h, v2.8b, #0 -; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-NEXT: ushll2 v2.8h, v2.16b, #0 -; CHECK-NEXT: umull2 v5.4s, v3.8h, v4.8h -; CHECK-NEXT: umull v3.4s, v3.4h, v4.4h -; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h -; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: addp v2.4s, v3.4s, v5.4s +; CHECK-NEXT: umull v3.8h, v1.8b, v2.8b +; CHECK-NEXT: umull2 v1.8h, v1.16b, v2.16b +; CHECK-NEXT: ushll2 v2.4s, v3.8h, #0 +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-NEXT: ushll2 v4.4s, v1.8h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: addp v2.4s, v3.4s, v2.4s ; CHECK-NEXT: addp v1.4s, v1.4s, v4.4s ; CHECK-NEXT: addp v1.4s, v2.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s @@ -165,15 +163,13 @@ define <4 x i32> @udot(<4 x i32> %z, <16 x i8> %a, <16 x i8> %b) { define <4 x i32> @sdot(<4 x i32> %z, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: sdot: ; CHECK: // %bb.0: -; CHECK-NEXT: sshll v3.8h, v1.8b, #0 -; CHECK-NEXT: sshll v4.8h, v2.8b, #0 -; CHECK-NEXT: sshll2 v1.8h, v1.16b, #0 -; CHECK-NEXT: sshll2 v2.8h, v2.16b, #0 -; CHECK-NEXT: smull2 v5.4s, v3.8h, v4.8h -; CHECK-NEXT: smull v3.4s, v3.4h, v4.4h -; CHECK-NEXT: smull2 v4.4s, v1.8h, v2.8h -; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: addp v2.4s, v3.4s, v5.4s +; CHECK-NEXT: smull v3.8h, v1.8b, v2.8b +; CHECK-NEXT: smull2 v1.8h, v1.16b, v2.16b +; CHECK-NEXT: sshll2 v2.4s, v3.8h, #0 +; CHECK-NEXT: sshll v3.4s, v3.4h, #0 +; CHECK-NEXT: sshll2 v4.4s, v1.8h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: addp v2.4s, v3.4s, v2.4s ; CHECK-NEXT: addp v1.4s, v1.4s, v4.4s ; CHECK-NEXT: addp v1.4s, v2.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s diff --git a/llvm/test/CodeGen/AArch64/arm64-redzone.ll b/llvm/test/CodeGen/AArch64/arm64-redzone.ll index fe30a1a98521e1..d001bc2a8dbe4e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-redzone.ll +++ b/llvm/test/CodeGen/AArch64/arm64-redzone.ll @@ -16,3 +16,16 @@ define i32 @foo(i32 %a, i32 %b) nounwind ssp { %tmp2 = load i32, ptr %x, align 4 ret i32 %tmp2 } + +; We disable red-zone if NEON is available because copies of Q-regs +; require a spill/fill and dynamic allocation. But we only need to do +; this when FP registers are enabled. +define void @bar(fp128 %f) "target-features"="-fp-armv8" { +; CHECK-LABEL: bar: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x0, x1, [sp, #-16] +; CHECK-NEXT: ret + %ptr = alloca fp128 + store fp128 %f, ptr %ptr + ret void +} diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll index 48062c9a54b5d3..b09ed8d3eb764e 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll @@ -338,6 +338,21 @@ define half @cos_f16(half %x) #0 { ret half %val } +define half @tan_f16(half %x) #0 { +; CHECK-LABEL: tan_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %val = call half @llvm.experimental.constrained.tan.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + define half @pow_f16(half %x, half %y) #0 { ; CHECK-LABEL: pow_f16: ; CHECK: // %bb.0: @@ -1147,6 +1162,7 @@ declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.powi.f16(half, i32, metadata, metadata) declare half @llvm.experimental.constrained.sin.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.cos.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.tan.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.pow.f16(half, half, metadata, metadata) declare half @llvm.experimental.constrained.log.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.log10.f16(half, metadata, metadata) diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll index 685efbb7cad431..67d0b63f4076f6 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -146,6 +146,13 @@ define float @cos_f32(float %x) #0 { ret float %val } +; CHECK-LABEL: tan_f32: +; CHECK: bl tanf +define float @tan_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.tan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: pow_f32: ; CHECK: bl powf define float @pow_f32(float %x, float %y) #0 { @@ -630,6 +637,13 @@ define double @cos_f64(double %x) #0 { ret double %val } +; CHECK-LABEL: tan_f64: +; CHECK: bl tan +define double @tan_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.tan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: pow_f64: ; CHECK: bl pow define double @pow_f64(double %x, double %y) #0 { @@ -1114,6 +1128,13 @@ define fp128 @cos_f128(fp128 %x) #0 { ret fp128 %val } +; CHECK-LABEL: tan_f128: +; CHECK: bl tanl +define fp128 @tan_f128(fp128 %x) #0 { + %val = call fp128 @llvm.experimental.constrained.tan.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret fp128 %val +} + ; CHECK-LABEL: pow_f128: ; CHECK: bl powl define fp128 @pow_f128(fp128 %x, fp128 %y) #0 { @@ -1491,6 +1512,13 @@ define <1 x double> @cos_v1f64(<1 x double> %x, <1 x double> %y) #0 { ret <1 x double> %val } +; CHECK-LABEL: tan_v1f64: +; CHECK: bl tan +define <1 x double> @tan_v1f64(<1 x double> %x, <1 x double> %y) #0 { + %val = call <1 x double> @llvm.experimental.constrained.tan.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret <1 x double> %val +} + ; CHECK-LABEL: pow_v1f64: ; CHECK: bl pow define <1 x double> @pow_v1f64(<1 x double> %x, <1 x double> %y) #0 { @@ -1555,6 +1583,7 @@ declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata) declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) @@ -1599,6 +1628,7 @@ declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadat declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata) @@ -1643,6 +1673,7 @@ declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata declare fp128 @llvm.experimental.constrained.powi.f128(fp128, i32, metadata, metadata) declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.cos.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.pow.f128(fp128, fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.log.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.log10.f128(fp128, metadata, metadata) diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll index 40b8a47f92aa70..33245a2b120ea4 100644 --- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -132,13 +132,12 @@ define i32 @test_udot_v5i8(ptr nocapture readonly %a, ptr nocapture readonly %b, ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: umull2 v2.4s, v1.8h, v0.8h -; CHECK-NEXT: mov v3.s[0], v2.s[0] -; CHECK-NEXT: umlal v3.4s, v1.4h, v0.4h -; CHECK-NEXT: addv s0, v3.4s +; CHECK-NEXT: umull v0.8h, v1.8b, v0.8b +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: mov v1.s[0], v2.s[0] +; CHECK-NEXT: uaddw v0.4s, v1.4s, v0.4h +; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -176,13 +175,12 @@ define i32 @test_sdot_v5i8(ptr nocapture readonly %a, ptr nocapture readonly %b, ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: smull2 v2.4s, v1.8h, v0.8h -; CHECK-NEXT: mov v3.s[0], v2.s[0] -; CHECK-NEXT: smlal v3.4s, v1.4h, v0.4h -; CHECK-NEXT: addv s0, v3.4s +; CHECK-NEXT: smull v0.8h, v1.8b, v0.8b +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: mov v1.s[0], v2.s[0] +; CHECK-NEXT: saddw v0.4s, v1.4s, v0.4h +; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -200,19 +198,17 @@ entry: define i32 @test_sdot_v5i8_double(<5 x i8> %a, <5 x i8> %b, <5 x i8> %c, <5 x i8> %d) { ; CHECK-LABEL: test_sdot_v5i8_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-NEXT: sshll v3.8h, v3.8b, #0 -; CHECK-NEXT: movi v5.2d, #0000000000000000 -; CHECK-NEXT: movi v6.2d, #0000000000000000 -; CHECK-NEXT: smull2 v4.4s, v0.8h, v1.8h -; CHECK-NEXT: smull2 v7.4s, v2.8h, v3.8h -; CHECK-NEXT: mov v6.s[0], v4.s[0] -; CHECK-NEXT: mov v5.s[0], v7.s[0] -; CHECK-NEXT: smlal v6.4s, v0.4h, v1.4h -; CHECK-NEXT: smlal v5.4s, v2.4h, v3.4h -; CHECK-NEXT: add v0.4s, v6.4s, v5.4s +; CHECK-NEXT: smull v2.8h, v2.8b, v3.8b +; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: movi v3.2d, #0000000000000000 +; CHECK-NEXT: sshll2 v4.4s, v0.8h, #0 +; CHECK-NEXT: sshll2 v5.4s, v2.8h, #0 +; CHECK-NEXT: mov v3.s[0], v4.s[0] +; CHECK-NEXT: mov v1.s[0], v5.s[0] +; CHECK-NEXT: saddw v0.4s, v3.4s, v0.4h +; CHECK-NEXT: saddw v1.4s, v1.4s, v2.4h +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -998,27 +994,21 @@ entry: define i32 @test_udot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v25i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q0, [x0] -; CHECK-NEXT: ldp q5, q1, [x1] -; CHECK-NEXT: ushll2 v3.8h, v0.16b, #0 -; CHECK-NEXT: ushll v6.8h, v2.8b, #0 -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v4.8h, v1.16b, #0 -; CHECK-NEXT: ushll v7.8h, v5.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll2 v2.8h, v2.16b, #0 -; CHECK-NEXT: umull v3.4s, v4.4h, v3.4h -; CHECK-NEXT: movi v4.2d, #0000000000000000 -; CHECK-NEXT: umull2 v16.4s, v7.8h, v6.8h -; CHECK-NEXT: umull v6.4s, v7.4h, v6.4h -; CHECK-NEXT: mov v4.s[0], v3.s[0] -; CHECK-NEXT: ushll2 v3.8h, v5.16b, #0 -; CHECK-NEXT: umlal2 v16.4s, v1.8h, v0.8h -; CHECK-NEXT: umlal v6.4s, v1.4h, v0.4h -; CHECK-NEXT: umlal v4.4s, v3.4h, v2.4h -; CHECK-NEXT: umlal2 v16.4s, v3.8h, v2.8h -; CHECK-NEXT: add v0.4s, v6.4s, v4.4s -; CHECK-NEXT: add v0.4s, v0.4s, v16.4s +; CHECK-NEXT: ldp q3, q0, [x1] +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: ldp q2, q1, [x0] +; CHECK-NEXT: umull2 v4.8h, v0.16b, v1.16b +; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: umull v1.8h, v3.8b, v2.8b +; CHECK-NEXT: umull2 v2.8h, v3.16b, v2.16b +; CHECK-NEXT: ushll v3.4s, v4.4h, #0 +; CHECK-NEXT: uaddl2 v4.4s, v1.8h, v0.8h +; CHECK-NEXT: uaddl v0.4s, v1.4h, v0.4h +; CHECK-NEXT: mov v5.s[0], v3.s[0] +; CHECK-NEXT: uaddw2 v1.4s, v4.4s, v2.8h +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uaddw v2.4s, v5.4s, v2.4h +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -1063,27 +1053,21 @@ entry: define i32 @test_sdot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v25i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q0, [x0] -; CHECK-NEXT: ldp q5, q1, [x1] -; CHECK-NEXT: sshll2 v3.8h, v0.16b, #0 -; CHECK-NEXT: sshll v6.8h, v2.8b, #0 -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll2 v4.8h, v1.16b, #0 -; CHECK-NEXT: sshll v7.8h, v5.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: sshll2 v2.8h, v2.16b, #0 -; CHECK-NEXT: smull v3.4s, v4.4h, v3.4h -; CHECK-NEXT: movi v4.2d, #0000000000000000 -; CHECK-NEXT: smull2 v16.4s, v7.8h, v6.8h -; CHECK-NEXT: smull v6.4s, v7.4h, v6.4h -; CHECK-NEXT: mov v4.s[0], v3.s[0] -; CHECK-NEXT: sshll2 v3.8h, v5.16b, #0 -; CHECK-NEXT: smlal2 v16.4s, v1.8h, v0.8h -; CHECK-NEXT: smlal v6.4s, v1.4h, v0.4h -; CHECK-NEXT: smlal v4.4s, v3.4h, v2.4h -; CHECK-NEXT: smlal2 v16.4s, v3.8h, v2.8h -; CHECK-NEXT: add v0.4s, v6.4s, v4.4s -; CHECK-NEXT: add v0.4s, v0.4s, v16.4s +; CHECK-NEXT: ldp q3, q0, [x1] +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: ldp q2, q1, [x0] +; CHECK-NEXT: smull2 v4.8h, v0.16b, v1.16b +; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: smull v1.8h, v3.8b, v2.8b +; CHECK-NEXT: smull2 v2.8h, v3.16b, v2.16b +; CHECK-NEXT: sshll v3.4s, v4.4h, #0 +; CHECK-NEXT: saddl2 v4.4s, v1.8h, v0.8h +; CHECK-NEXT: saddl v0.4s, v1.4h, v0.4h +; CHECK-NEXT: mov v5.s[0], v3.s[0] +; CHECK-NEXT: saddw2 v1.4s, v4.4s, v2.8h +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: saddw v2.4s, v5.4s, v2.4h +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -1105,222 +1089,210 @@ define i32 @test_sdot_v25i8_double(<25 x i8> %a, <25 x i8> %b, <25 x i8> %c, <25 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldr b1, [sp, #16] -; CHECK-NEXT: ldr b0, [sp, #80] -; CHECK-NEXT: add x11, sp, #24 -; CHECK-NEXT: ldr b3, [sp, #216] -; CHECK-NEXT: add x10, sp, #88 -; CHECK-NEXT: ldr b2, [sp, #280] -; CHECK-NEXT: ld1 { v1.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #224 -; CHECK-NEXT: ldr b4, [sp, #152] -; CHECK-NEXT: ldr b6, [sp, #480] -; CHECK-NEXT: ld1 { v0.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #288 -; CHECK-NEXT: add x12, sp, #160 -; CHECK-NEXT: ld1 { v3.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #488 -; CHECK-NEXT: ld1 { v2.b }[1], [x10] -; CHECK-NEXT: ld1 { v4.b }[1], [x12] -; CHECK-NEXT: ld1 { v6.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #32 -; CHECK-NEXT: add x9, sp, #96 -; CHECK-NEXT: add x8, sp, #104 -; CHECK-NEXT: ld1 { v1.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #232 -; CHECK-NEXT: ld1 { v0.b }[2], [x9] +; CHECK-NEXT: ldr b0, [sp, #280] +; CHECK-NEXT: add x8, sp, #288 +; CHECK-NEXT: ldr b1, [sp, #80] +; CHECK-NEXT: ldr b2, [sp, #152] ; CHECK-NEXT: add x9, sp, #296 -; CHECK-NEXT: ld1 { v3.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #168 +; CHECK-NEXT: ldr b4, [sp, #216] +; CHECK-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: add x10, sp, #320 +; CHECK-NEXT: ld1 { v1.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #160 +; CHECK-NEXT: add x12, sp, #192 +; CHECK-NEXT: ld1 { v2.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #304 +; CHECK-NEXT: add x11, sp, #328 +; CHECK-NEXT: ld1 { v0.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: ldr b5, [sp, #16] +; CHECK-NEXT: ld1 { v1.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #168 +; CHECK-NEXT: ldr b6, [sp, #680] ; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: ld1 { v4.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #40 -; CHECK-NEXT: ld1 { v1.b }[3], [x11] +; CHECK-NEXT: add x9, sp, #104 +; CHECK-NEXT: ldr b7, [sp, #480] ; CHECK-NEXT: ld1 { v0.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #304 -; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: add x8, sp, #312 +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: ld1 { v1.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #176 +; CHECK-NEXT: ldr b19, [sp, #552] +; CHECK-NEXT: ld1 { v2.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #112 +; CHECK-NEXT: ldr b22, [sp, #744] +; CHECK-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-NEXT: add x8, sp, #336 +; CHECK-NEXT: mov v3.b[1], w1 +; CHECK-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #184 +; CHECK-NEXT: ldr b23, [sp, #544] +; CHECK-NEXT: ld1 { v2.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #224 +; CHECK-NEXT: ldr b20, [sp, #352] +; CHECK-NEXT: ld1 { v0.b }[5], [x10] +; CHECK-NEXT: ld1 { v4.b }[1], [x9] +; CHECK-NEXT: add x10, sp, #120 +; CHECK-NEXT: ld1 { v1.b }[5], [x10] +; CHECK-NEXT: add x10, sp, #128 +; CHECK-NEXT: add x9, sp, #136 +; CHECK-NEXT: ld1 { v2.b }[5], [x12] +; CHECK-NEXT: add x12, sp, #232 +; CHECK-NEXT: mov v3.b[2], w2 +; CHECK-NEXT: ld1 { v0.b }[6], [x11] +; CHECK-NEXT: ld1 { v4.b }[2], [x12] ; CHECK-NEXT: add x11, sp, #240 -; CHECK-NEXT: add x13, sp, #56 -; CHECK-NEXT: ld1 { v2.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #48 -; CHECK-NEXT: ld1 { v3.b }[3], [x11] -; CHECK-NEXT: ld1 { v1.b }[4], [x8] -; CHECK-NEXT: ld1 { v0.b }[4], [x10] -; CHECK-NEXT: add x15, sp, #312 -; CHECK-NEXT: add x12, sp, #120 +; CHECK-NEXT: add x12, sp, #24 +; CHECK-NEXT: ld1 { v1.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #200 +; CHECK-NEXT: ld1 { v5.b }[1], [x12] +; CHECK-NEXT: ld1 { v2.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #256 +; CHECK-NEXT: ld1 { v0.b }[7], [x8] +; CHECK-NEXT: ld1 { v4.b }[3], [x11] +; CHECK-NEXT: add x8, sp, #688 +; CHECK-NEXT: ld1 { v6.b }[1], [x8] +; CHECK-NEXT: add x11, sp, #32 ; CHECK-NEXT: add x8, sp, #248 -; CHECK-NEXT: add x11, sp, #64 -; CHECK-NEXT: ld1 { v2.b }[4], [x15] -; CHECK-NEXT: ld1 { v3.b }[4], [x8] -; CHECK-NEXT: add x15, sp, #320 -; CHECK-NEXT: ld1 { v1.b }[5], [x13] -; CHECK-NEXT: ld1 { v0.b }[5], [x12] -; CHECK-NEXT: ldr b18, [sp, #552] -; CHECK-NEXT: add x14, sp, #128 -; CHECK-NEXT: add x16, sp, #256 -; CHECK-NEXT: ldr b16, [sp, #352] -; CHECK-NEXT: ld1 { v2.b }[5], [x15] -; CHECK-NEXT: add x15, sp, #176 -; CHECK-NEXT: ld1 { v3.b }[5], [x16] -; CHECK-NEXT: ld1 { v1.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #560 -; CHECK-NEXT: ld1 { v0.b }[6], [x14] -; CHECK-NEXT: add x16, sp, #360 -; CHECK-NEXT: ld1 { v4.b }[3], [x15] -; CHECK-NEXT: ld1 { v18.b }[1], [x11] +; CHECK-NEXT: ld1 { v5.b }[2], [x11] +; CHECK-NEXT: ld1 { v1.b }[7], [x9] +; CHECK-NEXT: add x9, sp, #40 +; CHECK-NEXT: ld1 { v4.b }[4], [x8] +; CHECK-NEXT: add x8, sp, #696 +; CHECK-NEXT: ldr b21, [sp, #616] +; CHECK-NEXT: ld1 { v6.b }[2], [x8] +; CHECK-NEXT: add x8, sp, #208 +; CHECK-NEXT: smull v23.8h, v23.8b, v22.8b +; CHECK-NEXT: ld1 { v5.b }[3], [x9] +; CHECK-NEXT: ld1 { v2.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #704 +; CHECK-NEXT: ld1 { v4.b }[5], [x10] +; CHECK-NEXT: add x9, sp, #48 +; CHECK-NEXT: add x10, sp, #56 +; CHECK-NEXT: ld1 { v6.b }[3], [x8] +; CHECK-NEXT: add x8, sp, #264 +; CHECK-NEXT: ldr b22, [sp, #416] +; CHECK-NEXT: ld1 { v5.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #488 +; CHECK-NEXT: mov v3.b[3], w3 +; CHECK-NEXT: ld1 { v4.b }[6], [x8] +; CHECK-NEXT: add x8, sp, #712 +; CHECK-NEXT: ld1 { v7.b }[1], [x9] +; CHECK-NEXT: ld1 { v6.b }[4], [x8] +; CHECK-NEXT: add x9, sp, #720 +; CHECK-NEXT: add x8, sp, #64 +; CHECK-NEXT: ld1 { v5.b }[5], [x10] +; CHECK-NEXT: add x10, sp, #496 +; CHECK-NEXT: add x11, sp, #576 +; CHECK-NEXT: ld1 { v7.b }[2], [x10] ; CHECK-NEXT: add x10, sp, #72 -; CHECK-NEXT: ld1 { v16.b }[1], [x16] -; CHECK-NEXT: add x9, sp, #136 -; CHECK-NEXT: add x14, sp, #184 -; CHECK-NEXT: ld1 { v1.b }[7], [x10] +; CHECK-NEXT: mov v3.b[4], w4 +; CHECK-NEXT: ld1 { v6.b }[5], [x9] +; CHECK-NEXT: add x9, sp, #272 +; CHECK-NEXT: ldr b16, [sp, #344] +; CHECK-NEXT: ld1 { v5.b }[6], [x8] +; CHECK-NEXT: add x8, sp, #728 +; CHECK-NEXT: ld1 { v4.b }[7], [x9] +; CHECK-NEXT: add x9, sp, #504 +; CHECK-NEXT: ldr b17, [sp, #144] +; CHECK-NEXT: sshll v23.4s, v23.4h, #0 +; CHECK-NEXT: ld1 { v6.b }[6], [x8] +; CHECK-NEXT: ld1 { v7.b }[3], [x9] +; CHECK-NEXT: add x8, sp, #736 +; CHECK-NEXT: add x9, sp, #512 +; CHECK-NEXT: ld1 { v5.b }[7], [x10] ; CHECK-NEXT: add x10, sp, #568 -; CHECK-NEXT: ld1 { v0.b }[7], [x9] -; CHECK-NEXT: ld1 { v4.b }[4], [x14] +; CHECK-NEXT: mov v3.b[5], w5 +; CHECK-NEXT: smull v16.8h, v17.8b, v16.8b +; CHECK-NEXT: movi v17.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v6.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #560 +; CHECK-NEXT: ld1 { v7.b }[4], [x9] +; CHECK-NEXT: ld1 { v19.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #360 +; CHECK-NEXT: add x9, sp, #424 +; CHECK-NEXT: ld1 { v20.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #624 +; CHECK-NEXT: ld1 { v22.b }[1], [x9] +; CHECK-NEXT: ld1 { v21.b }[1], [x8] ; CHECK-NEXT: add x9, sp, #368 -; CHECK-NEXT: ld1 { v18.b }[2], [x10] -; CHECK-NEXT: add x11, sp, #496 -; CHECK-NEXT: ld1 { v16.b }[2], [x9] -; CHECK-NEXT: fmov s5, w0 -; CHECK-NEXT: add x9, sp, #192 -; CHECK-NEXT: ld1 { v6.b }[2], [x11] -; CHECK-NEXT: add x10, sp, #576 -; CHECK-NEXT: ld1 { v4.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #376 -; CHECK-NEXT: ld1 { v18.b }[3], [x10] -; CHECK-NEXT: add x11, sp, #504 -; CHECK-NEXT: ld1 { v16.b }[3], [x9] -; CHECK-NEXT: mov v5.b[1], w1 -; CHECK-NEXT: ldr b7, [sp, #144] -; CHECK-NEXT: ldr b17, [sp, #344] -; CHECK-NEXT: add x9, sp, #200 -; CHECK-NEXT: ld1 { v6.b }[3], [x11] +; CHECK-NEXT: add x8, sp, #520 +; CHECK-NEXT: ld1 { v19.b }[2], [x10] +; CHECK-NEXT: add x10, sp, #432 +; CHECK-NEXT: ld1 { v7.b }[5], [x8] +; CHECK-NEXT: ld1 { v20.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #632 +; CHECK-NEXT: ld1 { v22.b }[2], [x10] +; CHECK-NEXT: ld1 { v21.b }[2], [x9] +; CHECK-NEXT: add x8, sp, #376 +; CHECK-NEXT: add x9, sp, #440 +; CHECK-NEXT: ld1 { v19.b }[3], [x11] ; CHECK-NEXT: add x10, sp, #584 -; CHECK-NEXT: ld1 { v4.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #384 -; CHECK-NEXT: ld1 { v18.b }[4], [x10] -; CHECK-NEXT: sshll v7.8h, v7.8b, #0 -; CHECK-NEXT: sshll v17.8h, v17.8b, #0 -; CHECK-NEXT: add x11, sp, #512 -; CHECK-NEXT: ld1 { v16.b }[4], [x9] -; CHECK-NEXT: ld1 { v6.b }[4], [x11] ; CHECK-NEXT: add x11, sp, #592 -; CHECK-NEXT: mov v5.b[2], w2 -; CHECK-NEXT: add x10, sp, #392 -; CHECK-NEXT: ldr b19, [sp, #680] -; CHECK-NEXT: ld1 { v18.b }[5], [x11] -; CHECK-NEXT: smull v7.4s, v7.4h, v17.4h -; CHECK-NEXT: ldr b17, [sp, #416] -; CHECK-NEXT: ld1 { v16.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #688 -; CHECK-NEXT: add x12, sp, #328 -; CHECK-NEXT: add x9, sp, #424 -; CHECK-NEXT: ld1 { v19.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #600 -; CHECK-NEXT: ldr b20, [sp, #616] -; CHECK-NEXT: ld1 { v2.b }[6], [x12] -; CHECK-NEXT: ld1 { v17.b }[1], [x9] -; CHECK-NEXT: add x11, sp, #400 -; CHECK-NEXT: ld1 { v18.b }[6], [x10] -; CHECK-NEXT: add x12, sp, #624 -; CHECK-NEXT: mov v5.b[3], w3 -; CHECK-NEXT: ld1 { v16.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #696 -; CHECK-NEXT: ld1 { v20.b }[1], [x12] -; CHECK-NEXT: add x9, sp, #432 -; CHECK-NEXT: ld1 { v19.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #608 -; CHECK-NEXT: ld1 { v17.b }[2], [x9] -; CHECK-NEXT: add x10, sp, #408 -; CHECK-NEXT: ld1 { v18.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #632 -; CHECK-NEXT: ld1 { v16.b }[7], [x10] -; CHECK-NEXT: ld1 { v20.b }[2], [x11] -; CHECK-NEXT: mov v5.b[4], w4 -; CHECK-NEXT: add x10, sp, #704 -; CHECK-NEXT: add x12, sp, #440 -; CHECK-NEXT: ld1 { v19.b }[3], [x10] +; CHECK-NEXT: ld1 { v20.b }[3], [x8] +; CHECK-NEXT: add x8, sp, #640 +; CHECK-NEXT: ld1 { v22.b }[3], [x9] +; CHECK-NEXT: ld1 { v21.b }[3], [x8] +; CHECK-NEXT: add x9, sp, #384 +; CHECK-NEXT: add x8, sp, #528 +; CHECK-NEXT: ld1 { v19.b }[4], [x10] ; CHECK-NEXT: add x10, sp, #448 -; CHECK-NEXT: ld1 { v17.b }[3], [x12] -; CHECK-NEXT: add x12, sp, #640 -; CHECK-NEXT: sshll v21.8h, v16.8b, #0 -; CHECK-NEXT: ld1 { v20.b }[3], [x12] -; CHECK-NEXT: sshll v18.8h, v18.8b, #0 -; CHECK-NEXT: add x11, sp, #712 -; CHECK-NEXT: mov v5.b[5], w5 -; CHECK-NEXT: ld1 { v19.b }[4], [x11] -; CHECK-NEXT: add x9, sp, #520 -; CHECK-NEXT: ld1 { v17.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #648 -; CHECK-NEXT: ldr b22, [sp, #544] -; CHECK-NEXT: ld1 { v20.b }[4], [x10] -; CHECK-NEXT: smull2 v16.4s, v21.8h, v18.8h -; CHECK-NEXT: smull v18.4s, v21.4h, v18.4h -; CHECK-NEXT: ldr b21, [sp, #744] -; CHECK-NEXT: add x11, sp, #720 -; CHECK-NEXT: ld1 { v6.b }[5], [x9] +; CHECK-NEXT: ld1 { v7.b }[6], [x8] +; CHECK-NEXT: ld1 { v20.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #648 +; CHECK-NEXT: ld1 { v22.b }[4], [x10] +; CHECK-NEXT: ld1 { v21.b }[4], [x9] +; CHECK-NEXT: add x8, sp, #392 ; CHECK-NEXT: add x9, sp, #456 ; CHECK-NEXT: ld1 { v19.b }[5], [x11] -; CHECK-NEXT: mov v5.b[6], w6 -; CHECK-NEXT: ld1 { v17.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #656 -; CHECK-NEXT: sshll v22.8h, v22.8b, #0 -; CHECK-NEXT: sshll v21.8h, v21.8b, #0 -; CHECK-NEXT: ld1 { v20.b }[5], [x9] -; CHECK-NEXT: add x10, sp, #528 -; CHECK-NEXT: add x11, sp, #728 -; CHECK-NEXT: ld1 { v6.b }[6], [x10] +; CHECK-NEXT: mov v3.b[6], w6 +; CHECK-NEXT: add x10, sp, #600 +; CHECK-NEXT: ld1 { v20.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #656 +; CHECK-NEXT: ld1 { v22.b }[5], [x9] +; CHECK-NEXT: ld1 { v21.b }[5], [x8] +; CHECK-NEXT: add x9, sp, #400 +; CHECK-NEXT: add x8, sp, #536 +; CHECK-NEXT: ld1 { v19.b }[6], [x10] ; CHECK-NEXT: add x10, sp, #464 -; CHECK-NEXT: ld1 { v19.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #664 -; CHECK-NEXT: ld1 { v17.b }[6], [x10] -; CHECK-NEXT: smull v21.4s, v22.4h, v21.4h -; CHECK-NEXT: movi v22.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v20.b }[6], [x11] -; CHECK-NEXT: mov v5.b[7], w7 -; CHECK-NEXT: add x9, sp, #536 -; CHECK-NEXT: add x10, sp, #736 -; CHECK-NEXT: add x11, sp, #208 -; CHECK-NEXT: add x13, sp, #264 -; CHECK-NEXT: ld1 { v6.b }[7], [x9] -; CHECK-NEXT: ld1 { v19.b }[7], [x10] -; CHECK-NEXT: ld1 { v4.b }[7], [x11] +; CHECK-NEXT: ld1 { v7.b }[7], [x8] +; CHECK-NEXT: ld1 { v20.b }[6], [x9] +; CHECK-NEXT: add x9, sp, #664 +; CHECK-NEXT: ld1 { v22.b }[6], [x10] +; CHECK-NEXT: ld1 { v21.b }[6], [x9] +; CHECK-NEXT: add x8, sp, #408 +; CHECK-NEXT: mov v3.b[7], w7 +; CHECK-NEXT: sshll v18.4s, v16.4h, #0 +; CHECK-NEXT: movi v16.2d, #0000000000000000 +; CHECK-NEXT: add x11, sp, #608 +; CHECK-NEXT: ld1 { v20.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #672 ; CHECK-NEXT: add x9, sp, #472 -; CHECK-NEXT: add x10, sp, #672 -; CHECK-NEXT: ld1 { v3.b }[6], [x13] -; CHECK-NEXT: ld1 { v17.b }[7], [x9] -; CHECK-NEXT: ld1 { v20.b }[7], [x10] -; CHECK-NEXT: add x8, sp, #336 -; CHECK-NEXT: mov v22.s[0], v21.s[0] -; CHECK-NEXT: movi v21.2d, #0000000000000000 -; CHECK-NEXT: sshll v5.8h, v5.8b, #0 -; CHECK-NEXT: sshll v6.8h, v6.8b, #0 -; CHECK-NEXT: sshll v19.8h, v19.8b, #0 -; CHECK-NEXT: ld1 { v2.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #272 -; CHECK-NEXT: sshll v4.8h, v4.8b, #0 -; CHECK-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-NEXT: sshll v17.8h, v17.8b, #0 -; CHECK-NEXT: sshll v20.8h, v20.8b, #0 -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: smlal v18.4s, v6.4h, v19.4h -; CHECK-NEXT: smlal2 v16.4s, v6.8h, v19.8h -; CHECK-NEXT: mov v21.s[0], v7.s[0] -; CHECK-NEXT: smull v6.4s, v5.4h, v4.4h -; CHECK-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-NEXT: sshll v3.8h, v3.8b, #0 -; CHECK-NEXT: smlal v22.4s, v17.4h, v20.4h -; CHECK-NEXT: smull2 v4.4s, v5.8h, v4.8h -; CHECK-NEXT: smlal v21.4s, v1.4h, v3.4h -; CHECK-NEXT: smlal2 v16.4s, v17.8h, v20.8h -; CHECK-NEXT: smlal v6.4s, v0.4h, v2.4h -; CHECK-NEXT: add v5.4s, v18.4s, v22.4s -; CHECK-NEXT: smlal2 v4.4s, v0.8h, v2.8h -; CHECK-NEXT: add v0.4s, v6.4s, v21.4s -; CHECK-NEXT: add v2.4s, v5.4s, v16.4s -; CHECK-NEXT: smlal2 v4.4s, v1.8h, v3.8h -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v0.4s, v0.4s, v4.4s +; CHECK-NEXT: ld1 { v19.b }[7], [x11] +; CHECK-NEXT: ld1 { v21.b }[7], [x8] +; CHECK-NEXT: ld1 { v22.b }[7], [x9] +; CHECK-NEXT: smull v0.8h, v1.8b, v0.8b +; CHECK-NEXT: smull v1.8h, v3.8b, v2.8b +; CHECK-NEXT: smull v2.8h, v5.8b, v4.8b +; CHECK-NEXT: mov v17.s[0], v18.s[0] +; CHECK-NEXT: smull v3.8h, v7.8b, v6.8b +; CHECK-NEXT: mov v16.s[0], v23.s[0] +; CHECK-NEXT: smull v4.8h, v20.8b, v19.8b +; CHECK-NEXT: smull v5.8h, v22.8b, v21.8b +; CHECK-NEXT: saddl v7.4s, v1.4h, v0.4h +; CHECK-NEXT: saddl2 v0.4s, v1.8h, v0.8h +; CHECK-NEXT: saddw v6.4s, v17.4s, v2.4h +; CHECK-NEXT: saddl v1.4s, v4.4h, v3.4h +; CHECK-NEXT: saddl2 v3.4s, v4.8h, v3.8h +; CHECK-NEXT: saddw v4.4s, v16.4s, v5.4h +; CHECK-NEXT: saddw2 v0.4s, v0.4s, v2.8h +; CHECK-NEXT: add v6.4s, v7.4s, v6.4s +; CHECK-NEXT: add v1.4s, v1.4s, v4.4s +; CHECK-NEXT: saddw2 v2.4s, v3.4s, v5.8h +; CHECK-NEXT: add v0.4s, v6.4s, v0.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -1586,32 +1558,24 @@ define i32 @test_udot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr b0, [x0, #32] ; CHECK-NEXT: ldr b1, [x1, #32] -; CHECK-NEXT: ldp q2, q4, [x0] -; CHECK-NEXT: ldp q3, q6, [x1] -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll v5.8h, v2.8b, #0 -; CHECK-NEXT: ushll2 v2.8h, v2.16b, #0 -; CHECK-NEXT: ushll2 v16.8h, v4.16b, #0 -; CHECK-NEXT: ushll v7.8h, v3.8b, #0 -; CHECK-NEXT: ushll2 v3.8h, v3.16b, #0 -; CHECK-NEXT: ushll v4.8h, v4.8b, #0 -; CHECK-NEXT: umull v0.4s, v1.4h, v0.4h -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: ushll2 v19.8h, v6.16b, #0 -; CHECK-NEXT: ushll v6.8h, v6.8b, #0 -; CHECK-NEXT: umull2 v17.4s, v7.8h, v5.8h -; CHECK-NEXT: umull2 v18.4s, v3.8h, v2.8h -; CHECK-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NEXT: umull v0.4s, v3.4h, v2.4h -; CHECK-NEXT: umlal2 v18.4s, v19.8h, v16.8h -; CHECK-NEXT: umlal2 v17.4s, v6.8h, v4.8h -; CHECK-NEXT: umlal v1.4s, v7.4h, v5.4h -; CHECK-NEXT: umlal v0.4s, v19.4h, v16.4h -; CHECK-NEXT: add v2.4s, v17.4s, v18.4s -; CHECK-NEXT: umlal v1.4s, v6.4h, v4.4h -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: ldp q4, q2, [x1] +; CHECK-NEXT: umull v0.8h, v1.8b, v0.8b +; CHECK-NEXT: ldp q3, q1, [x0] +; CHECK-NEXT: umull v6.8h, v2.8b, v1.8b +; CHECK-NEXT: umull2 v1.8h, v2.16b, v1.16b +; CHECK-NEXT: umull v2.8h, v4.8b, v3.8b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: umull2 v3.8h, v4.16b, v3.16b +; CHECK-NEXT: mov v5.s[0], v0.s[0] +; CHECK-NEXT: uaddl2 v4.4s, v2.8h, v6.8h +; CHECK-NEXT: uaddl2 v0.4s, v3.8h, v1.8h +; CHECK-NEXT: uaddl v1.4s, v3.4h, v1.4h +; CHECK-NEXT: add v0.4s, v4.4s, v0.4s +; CHECK-NEXT: uaddw v2.4s, v5.4s, v2.4h +; CHECK-NEXT: uaddw v2.4s, v2.4s, v6.4h ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -1662,32 +1626,24 @@ define i32 @test_sdot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr b0, [x0, #32] ; CHECK-NEXT: ldr b1, [x1, #32] -; CHECK-NEXT: ldp q2, q4, [x0] -; CHECK-NEXT: ldp q3, q6, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: sshll v5.8h, v2.8b, #0 -; CHECK-NEXT: sshll2 v2.8h, v2.16b, #0 -; CHECK-NEXT: sshll2 v16.8h, v4.16b, #0 -; CHECK-NEXT: sshll v7.8h, v3.8b, #0 -; CHECK-NEXT: sshll2 v3.8h, v3.16b, #0 -; CHECK-NEXT: sshll v4.8h, v4.8b, #0 -; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sshll2 v19.8h, v6.16b, #0 -; CHECK-NEXT: sshll v6.8h, v6.8b, #0 -; CHECK-NEXT: smull2 v17.4s, v7.8h, v5.8h -; CHECK-NEXT: smull2 v18.4s, v3.8h, v2.8h -; CHECK-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NEXT: smull v0.4s, v3.4h, v2.4h -; CHECK-NEXT: smlal2 v18.4s, v19.8h, v16.8h -; CHECK-NEXT: smlal2 v17.4s, v6.8h, v4.8h -; CHECK-NEXT: smlal v1.4s, v7.4h, v5.4h -; CHECK-NEXT: smlal v0.4s, v19.4h, v16.4h -; CHECK-NEXT: add v2.4s, v17.4s, v18.4s -; CHECK-NEXT: smlal v1.4s, v6.4h, v4.4h -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: ldp q4, q2, [x1] +; CHECK-NEXT: smull v0.8h, v1.8b, v0.8b +; CHECK-NEXT: ldp q3, q1, [x0] +; CHECK-NEXT: smull v6.8h, v2.8b, v1.8b +; CHECK-NEXT: smull2 v1.8h, v2.16b, v1.16b +; CHECK-NEXT: smull v2.8h, v4.8b, v3.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: smull2 v3.8h, v4.16b, v3.16b +; CHECK-NEXT: mov v5.s[0], v0.s[0] +; CHECK-NEXT: saddl2 v4.4s, v2.8h, v6.8h +; CHECK-NEXT: saddl2 v0.4s, v3.8h, v1.8h +; CHECK-NEXT: saddl v1.4s, v3.4h, v1.4h +; CHECK-NEXT: add v0.4s, v4.4s, v0.4s +; CHECK-NEXT: saddw v2.4s, v5.4s, v2.4h +; CHECK-NEXT: saddw v2.4s, v2.4s, v6.4h ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -1709,291 +1665,275 @@ define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: fmov s4, w0 -; CHECK-NEXT: ldr b0, [sp, #80] -; CHECK-NEXT: add x8, sp, #88 -; CHECK-NEXT: ldr b1, [sp, #144] -; CHECK-NEXT: add x10, sp, #152 -; CHECK-NEXT: ldr b6, [sp, #16] +; CHECK-NEXT: ldr b0, [sp, #344] +; CHECK-NEXT: add x8, sp, #352 +; CHECK-NEXT: ldr b2, [sp, #80] +; CHECK-NEXT: add x9, sp, #88 +; CHECK-NEXT: ldr b3, [sp, #216] +; CHECK-NEXT: add x10, sp, #232 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x9, sp, #96 -; CHECK-NEXT: ldr b2, [sp, #344] -; CHECK-NEXT: mov v4.b[1], w1 -; CHECK-NEXT: ld1 { v1.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #24 -; CHECK-NEXT: ld1 { v6.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #352 -; CHECK-NEXT: add x8, sp, #104 +; CHECK-NEXT: add x8, sp, #224 +; CHECK-NEXT: ld1 { v2.b }[1], [x9] +; CHECK-NEXT: add x9, sp, #360 +; CHECK-NEXT: ld1 { v3.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #96 +; CHECK-NEXT: add x11, sp, #376 +; CHECK-NEXT: ldr b4, [sp, #408] +; CHECK-NEXT: add x12, sp, #384 ; CHECK-NEXT: ld1 { v0.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #160 -; CHECK-NEXT: ld1 { v2.b }[1], [x10] -; CHECK-NEXT: ld1 { v1.b }[2], [x9] -; CHECK-NEXT: add x10, sp, #32 -; CHECK-NEXT: add x11, sp, #112 -; CHECK-NEXT: mov v4.b[2], w2 -; CHECK-NEXT: ld1 { v6.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #168 -; CHECK-NEXT: ld1 { v0.b }[3], [x8] -; CHECK-NEXT: ldr b5, [sp, #216] -; CHECK-NEXT: add x13, sp, #224 -; CHECK-NEXT: ld1 { v1.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #40 -; CHECK-NEXT: add x12, sp, #120 -; CHECK-NEXT: ld1 { v6.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #176 -; CHECK-NEXT: ld1 { v5.b }[1], [x13] -; CHECK-NEXT: mov v4.b[3], w3 -; CHECK-NEXT: ld1 { v0.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #48 -; CHECK-NEXT: add x8, sp, #360 -; CHECK-NEXT: ld1 { v1.b }[4], [x10] -; CHECK-NEXT: add x13, sp, #56 -; CHECK-NEXT: ld1 { v6.b }[4], [x11] -; CHECK-NEXT: ldr b7, [sp, #280] +; CHECK-NEXT: add x9, sp, #368 ; CHECK-NEXT: ld1 { v2.b }[2], [x8] -; CHECK-NEXT: add x15, sp, #232 +; CHECK-NEXT: ld1 { v3.b }[2], [x10] +; CHECK-NEXT: add x8, sp, #104 +; CHECK-NEXT: add x14, sp, #248 +; CHECK-NEXT: add x10, sp, #392 +; CHECK-NEXT: ldr b5, [sp, #144] +; CHECK-NEXT: ldr b6, [sp, #280] +; CHECK-NEXT: ld1 { v0.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #240 +; CHECK-NEXT: ld1 { v2.b }[3], [x8] +; CHECK-NEXT: ld1 { v3.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #112 +; CHECK-NEXT: add x8, sp, #400 +; CHECK-NEXT: add x13, sp, #128 +; CHECK-NEXT: ldr b17, [sp, #744] +; CHECK-NEXT: ldr b19, [sp, #480] +; CHECK-NEXT: ld1 { v0.b }[4], [x11] +; CHECK-NEXT: ld1 { v2.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #416 +; CHECK-NEXT: ld1 { v4.b }[1], [x9] +; CHECK-NEXT: ld1 { v3.b }[4], [x14] +; CHECK-NEXT: add x11, sp, #120 +; CHECK-NEXT: add x9, sp, #136 +; CHECK-NEXT: ldr b21, [sp, #936] +; CHECK-NEXT: ldr b22, [sp, #672] ; CHECK-NEXT: ld1 { v0.b }[5], [x12] -; CHECK-NEXT: add x14, sp, #184 -; CHECK-NEXT: mov v4.b[4], w4 -; CHECK-NEXT: ld1 { v5.b }[2], [x15] -; CHECK-NEXT: add x9, sp, #128 -; CHECK-NEXT: ld1 { v6.b }[5], [x13] -; CHECK-NEXT: add x13, sp, #288 -; CHECK-NEXT: add x10, sp, #368 -; CHECK-NEXT: ld1 { v7.b }[1], [x13] -; CHECK-NEXT: ld1 { v1.b }[5], [x14] -; CHECK-NEXT: ld1 { v2.b }[3], [x10] -; CHECK-NEXT: add x15, sp, #240 -; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #296 -; CHECK-NEXT: mov v4.b[5], w5 -; CHECK-NEXT: add x11, sp, #192 -; CHECK-NEXT: ld1 { v5.b }[3], [x15] -; CHECK-NEXT: ldr b3, [sp, #408] -; CHECK-NEXT: ld1 { v7.b }[2], [x9] -; CHECK-NEXT: add x12, sp, #64 -; CHECK-NEXT: add x13, sp, #376 -; CHECK-NEXT: ld1 { v1.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #416 -; CHECK-NEXT: ld1 { v6.b }[6], [x12] -; CHECK-NEXT: add x12, sp, #248 -; CHECK-NEXT: ld1 { v3.b }[1], [x11] -; CHECK-NEXT: mov v4.b[6], w6 -; CHECK-NEXT: ld1 { v2.b }[4], [x13] -; CHECK-NEXT: add x11, sp, #304 -; CHECK-NEXT: ld1 { v5.b }[4], [x12] -; CHECK-NEXT: ld1 { v7.b }[3], [x11] -; CHECK-NEXT: add x8, sp, #136 -; CHECK-NEXT: add x15, sp, #384 -; CHECK-NEXT: add x9, sp, #424 -; CHECK-NEXT: ld1 { v0.b }[7], [x8] -; CHECK-NEXT: ld1 { v3.b }[2], [x9] -; CHECK-NEXT: ld1 { v2.b }[5], [x15] -; CHECK-NEXT: add x8, sp, #312 -; CHECK-NEXT: mov v4.b[7], w7 -; CHECK-NEXT: add x9, sp, #256 -; CHECK-NEXT: add x10, sp, #200 -; CHECK-NEXT: ld1 { v7.b }[4], [x8] -; CHECK-NEXT: ld1 { v5.b }[5], [x9] -; CHECK-NEXT: add x14, sp, #72 -; CHECK-NEXT: ld1 { v1.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #432 -; CHECK-NEXT: add x8, sp, #392 -; CHECK-NEXT: ld1 { v6.b }[7], [x14] -; CHECK-NEXT: ld1 { v3.b }[3], [x10] -; CHECK-NEXT: ld1 { v2.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #320 -; CHECK-NEXT: add x9, sp, #264 -; CHECK-NEXT: sshll v21.8h, v4.8b, #0 -; CHECK-NEXT: ldr b4, [sp, #208] -; CHECK-NEXT: ld1 { v7.b }[5], [x8] -; CHECK-NEXT: ld1 { v5.b }[6], [x9] +; CHECK-NEXT: ld1 { v2.b }[5], [x11] +; CHECK-NEXT: add x11, sp, #424 +; CHECK-NEXT: add x12, sp, #256 +; CHECK-NEXT: ld1 { v4.b }[2], [x11] +; CHECK-NEXT: add x11, sp, #152 +; CHECK-NEXT: ld1 { v3.b }[5], [x12] +; CHECK-NEXT: ld1 { v5.b }[1], [x11] +; CHECK-NEXT: add x11, sp, #432 +; CHECK-NEXT: ld1 { v0.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #264 +; CHECK-NEXT: ld1 { v2.b }[6], [x13] +; CHECK-NEXT: ld1 { v4.b }[3], [x11] +; CHECK-NEXT: add x11, sp, #160 +; CHECK-NEXT: ldr b7, [sp, #472] +; CHECK-NEXT: ld1 { v3.b }[6], [x10] +; CHECK-NEXT: ld1 { v5.b }[2], [x11] ; CHECK-NEXT: add x10, sp, #440 -; CHECK-NEXT: add x8, sp, #400 -; CHECK-NEXT: sshll v16.8h, v6.8b, #0 -; CHECK-NEXT: sshll v6.8h, v4.8b, #0 -; CHECK-NEXT: ld1 { v3.b }[4], [x10] -; CHECK-NEXT: ld1 { v2.b }[7], [x8] +; CHECK-NEXT: ld1 { v0.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #288 +; CHECK-NEXT: add x11, sp, #168 +; CHECK-NEXT: ld1 { v6.b }[1], [x8] ; CHECK-NEXT: add x8, sp, #272 -; CHECK-NEXT: add x9, sp, #328 -; CHECK-NEXT: ldr b4, [sp, #608] -; CHECK-NEXT: ld1 { v7.b }[6], [x9] -; CHECK-NEXT: ld1 { v5.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #616 -; CHECK-NEXT: add x10, sp, #448 -; CHECK-NEXT: ld1 { v4.b }[1], [x8] -; CHECK-NEXT: ldr b18, [sp, #480] -; CHECK-NEXT: ld1 { v3.b }[5], [x10] -; CHECK-NEXT: add x9, sp, #336 -; CHECK-NEXT: ldr b17, [sp, #472] -; CHECK-NEXT: add x8, sp, #488 -; CHECK-NEXT: ld1 { v7.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #624 -; CHECK-NEXT: ld1 { v18.b }[1], [x8] -; CHECK-NEXT: sshll v22.8h, v5.8b, #0 -; CHECK-NEXT: add x8, sp, #456 -; CHECK-NEXT: sshll v5.8h, v17.8b, #0 -; CHECK-NEXT: ld1 { v4.b }[2], [x9] -; CHECK-NEXT: ld1 { v3.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #496 -; CHECK-NEXT: sshll v17.8h, v7.8b, #0 -; CHECK-NEXT: add x10, sp, #632 -; CHECK-NEXT: ld1 { v18.b }[2], [x8] -; CHECK-NEXT: add x9, sp, #464 -; CHECK-NEXT: add x8, sp, #504 -; CHECK-NEXT: smull v19.4s, v6.4h, v5.4h -; CHECK-NEXT: movi v5.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v4.b }[3], [x10] -; CHECK-NEXT: ld1 { v3.b }[7], [x9] -; CHECK-NEXT: smull v6.4s, v16.4h, v17.4h -; CHECK-NEXT: add x9, sp, #640 -; CHECK-NEXT: ld1 { v18.b }[3], [x8] -; CHECK-NEXT: smull2 v16.4s, v16.8h, v17.8h -; CHECK-NEXT: ldr b17, [sp, #672] -; CHECK-NEXT: ld1 { v4.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #680 -; CHECK-NEXT: ldr b20, [sp, #544] -; CHECK-NEXT: mov v5.s[0], v19.s[0] -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ld1 { v17.b }[1], [x9] -; CHECK-NEXT: add x11, sp, #552 -; CHECK-NEXT: add x10, sp, #648 -; CHECK-NEXT: ld1 { v18.b }[4], [x8] -; CHECK-NEXT: ld1 { v20.b }[1], [x11] -; CHECK-NEXT: ld1 { v4.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #688 -; CHECK-NEXT: add x9, sp, #520 -; CHECK-NEXT: ld1 { v17.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #560 -; CHECK-NEXT: smull2 v7.4s, v21.8h, v22.8h -; CHECK-NEXT: ld1 { v18.b }[5], [x9] -; CHECK-NEXT: smlal v5.4s, v21.4h, v22.4h -; CHECK-NEXT: ld1 { v20.b }[2], [x10] -; CHECK-NEXT: ldr b21, [sp, #736] -; CHECK-NEXT: ldr b22, [sp, #1000] -; CHECK-NEXT: add x8, sp, #656 -; CHECK-NEXT: add x9, sp, #696 -; CHECK-NEXT: add x11, sp, #568 -; CHECK-NEXT: ld1 { v4.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #528 -; CHECK-NEXT: ld1 { v17.b }[3], [x9] -; CHECK-NEXT: sshll v21.8h, v21.8b, #0 -; CHECK-NEXT: sshll v24.8h, v22.8b, #0 -; CHECK-NEXT: ld1 { v18.b }[6], [x8] -; CHECK-NEXT: ld1 { v20.b }[3], [x11] -; CHECK-NEXT: add x10, sp, #704 -; CHECK-NEXT: ldr b23, [sp, #808] -; CHECK-NEXT: movi v19.2d, #0000000000000000 -; CHECK-NEXT: add x9, sp, #536 -; CHECK-NEXT: ld1 { v17.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #576 -; CHECK-NEXT: ldr b22, [sp, #744] -; CHECK-NEXT: add x11, sp, #816 -; CHECK-NEXT: smull v24.4s, v21.4h, v24.4h -; CHECK-NEXT: ld1 { v18.b }[7], [x9] -; CHECK-NEXT: ld1 { v20.b }[4], [x10] +; CHECK-NEXT: ld1 { v4.b }[4], [x10] +; CHECK-NEXT: ld1 { v3.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #296 +; CHECK-NEXT: ld1 { v5.b }[3], [x11] +; CHECK-NEXT: ld1 { v2.b }[7], [x9] +; CHECK-NEXT: add x9, sp, #448 +; CHECK-NEXT: add x10, sp, #176 +; CHECK-NEXT: ld1 { v6.b }[2], [x8] +; CHECK-NEXT: ld1 { v4.b }[5], [x9] +; CHECK-NEXT: add x8, sp, #304 +; CHECK-NEXT: ld1 { v5.b }[4], [x10] +; CHECK-NEXT: add x9, sp, #456 +; CHECK-NEXT: add x10, sp, #184 +; CHECK-NEXT: add x11, sp, #192 +; CHECK-NEXT: ldr b16, [sp, #208] +; CHECK-NEXT: add x12, sp, #784 +; CHECK-NEXT: ld1 { v6.b }[3], [x8] +; CHECK-NEXT: ld1 { v4.b }[6], [x9] +; CHECK-NEXT: add x9, sp, #312 +; CHECK-NEXT: ld1 { v5.b }[5], [x10] ; CHECK-NEXT: add x10, sp, #752 -; CHECK-NEXT: ld1 { v23.b }[1], [x11] -; CHECK-NEXT: add x9, sp, #712 +; CHECK-NEXT: smull v7.8h, v16.8b, v7.8b +; CHECK-NEXT: ld1 { v17.b }[1], [x10] +; CHECK-NEXT: add x10, sp, #760 +; CHECK-NEXT: ldr b16, [sp, #16] +; CHECK-NEXT: ld1 { v6.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #320 +; CHECK-NEXT: ldr b18, [sp, #1000] +; CHECK-NEXT: ld1 { v5.b }[6], [x11] +; CHECK-NEXT: add x11, sp, #768 +; CHECK-NEXT: ldr b20, [sp, #736] +; CHECK-NEXT: ld1 { v17.b }[2], [x10] +; CHECK-NEXT: add x10, sp, #680 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: ld1 { v6.b }[5], [x9] +; CHECK-NEXT: add x9, sp, #488 ; CHECK-NEXT: ld1 { v22.b }[1], [x10] -; CHECK-NEXT: ld1 { v17.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #584 -; CHECK-NEXT: add x10, sp, #824 -; CHECK-NEXT: sshll v21.8h, v18.8b, #0 -; CHECK-NEXT: ld1 { v20.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #760 -; CHECK-NEXT: ldr b18, [sp, #936] -; CHECK-NEXT: ld1 { v23.b }[2], [x10] -; CHECK-NEXT: mov v19.s[0], v24.s[0] -; CHECK-NEXT: ldr b24, [sp, #872] -; CHECK-NEXT: ld1 { v22.b }[2], [x9] +; CHECK-NEXT: ld1 { v19.b }[1], [x9] ; CHECK-NEXT: add x9, sp, #944 -; CHECK-NEXT: add x11, sp, #880 -; CHECK-NEXT: add x10, sp, #768 -; CHECK-NEXT: ld1 { v18.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #832 -; CHECK-NEXT: ld1 { v24.b }[1], [x11] -; CHECK-NEXT: ld1 { v23.b }[3], [x9] -; CHECK-NEXT: ld1 { v22.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #952 -; CHECK-NEXT: add x12, sp, #888 -; CHECK-NEXT: add x9, sp, #592 +; CHECK-NEXT: add x10, sp, #688 +; CHECK-NEXT: ld1 { v21.b }[1], [x9] +; CHECK-NEXT: add x9, sp, #496 +; CHECK-NEXT: ld1 { v17.b }[3], [x11] +; CHECK-NEXT: ld1 { v22.b }[2], [x10] ; CHECK-NEXT: add x11, sp, #776 -; CHECK-NEXT: ld1 { v18.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #840 -; CHECK-NEXT: ld1 { v24.b }[2], [x12] -; CHECK-NEXT: ld1 { v23.b }[4], [x10] -; CHECK-NEXT: ld1 { v22.b }[4], [x11] -; CHECK-NEXT: ld1 { v20.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #960 -; CHECK-NEXT: add x11, sp, #896 -; CHECK-NEXT: add x10, sp, #784 -; CHECK-NEXT: ld1 { v18.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #848 -; CHECK-NEXT: ld1 { v24.b }[3], [x11] -; CHECK-NEXT: ld1 { v23.b }[5], [x9] -; CHECK-NEXT: ld1 { v22.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #968 -; CHECK-NEXT: add x12, sp, #904 -; CHECK-NEXT: add x9, sp, #600 +; CHECK-NEXT: add x10, sp, #504 +; CHECK-NEXT: ld1 { v19.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #952 +; CHECK-NEXT: smull v20.8h, v20.8b, v18.8b +; CHECK-NEXT: ld1 { v21.b }[2], [x9] +; CHECK-NEXT: ld1 { v17.b }[4], [x11] +; CHECK-NEXT: add x11, sp, #696 +; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: ld1 { v22.b }[3], [x11] ; CHECK-NEXT: add x11, sp, #792 -; CHECK-NEXT: ld1 { v18.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #856 -; CHECK-NEXT: ld1 { v24.b }[4], [x12] -; CHECK-NEXT: ld1 { v23.b }[6], [x10] -; CHECK-NEXT: ld1 { v22.b }[6], [x11] -; CHECK-NEXT: ld1 { v20.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #976 -; CHECK-NEXT: add x11, sp, #912 -; CHECK-NEXT: add x10, sp, #800 -; CHECK-NEXT: ld1 { v18.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #864 -; CHECK-NEXT: ld1 { v24.b }[5], [x11] -; CHECK-NEXT: ld1 { v23.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #720 -; CHECK-NEXT: ld1 { v22.b }[7], [x10] +; CHECK-NEXT: ld1 { v19.b }[3], [x10] +; CHECK-NEXT: add x10, sp, #960 +; CHECK-NEXT: ld1 { v16.b }[1], [x9] +; CHECK-NEXT: ld1 { v21.b }[3], [x10] +; CHECK-NEXT: add x9, sp, #512 +; CHECK-NEXT: ld1 { v17.b }[5], [x12] +; CHECK-NEXT: add x10, sp, #704 +; CHECK-NEXT: add x12, sp, #800 +; CHECK-NEXT: movi v18.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v19.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #968 +; CHECK-NEXT: ld1 { v22.b }[4], [x10] +; CHECK-NEXT: ld1 { v21.b }[4], [x9] +; CHECK-NEXT: add x10, sp, #520 +; CHECK-NEXT: ld1 { v17.b }[6], [x11] +; CHECK-NEXT: add x11, sp, #712 +; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: sshll v23.4s, v20.4h, #0 +; CHECK-NEXT: ld1 { v19.b }[5], [x10] +; CHECK-NEXT: add x10, sp, #976 +; CHECK-NEXT: ld1 { v22.b }[5], [x11] +; CHECK-NEXT: ld1 { v21.b }[5], [x10] +; CHECK-NEXT: add x10, sp, #528 +; CHECK-NEXT: add x11, sp, #720 +; CHECK-NEXT: ld1 { v16.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #536 +; CHECK-NEXT: ld1 { v17.b }[7], [x12] +; CHECK-NEXT: ld1 { v19.b }[6], [x10] ; CHECK-NEXT: add x10, sp, #984 -; CHECK-NEXT: ld1 { v17.b }[6], [x9] +; CHECK-NEXT: ld1 { v22.b }[6], [x11] +; CHECK-NEXT: ld1 { v21.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #992 +; CHECK-NEXT: add x11, sp, #728 +; CHECK-NEXT: mov v1.b[1], w1 +; CHECK-NEXT: ldr b20, [sp, #872] +; CHECK-NEXT: mov v18.s[0], v23.s[0] +; CHECK-NEXT: ld1 { v19.b }[7], [x9] +; CHECK-NEXT: ld1 { v22.b }[7], [x11] +; CHECK-NEXT: add x9, sp, #328 +; CHECK-NEXT: ld1 { v21.b }[7], [x10] +; CHECK-NEXT: add x10, sp, #40 +; CHECK-NEXT: ldr b23, [sp, #608] +; CHECK-NEXT: ld1 { v16.b }[3], [x10] +; CHECK-NEXT: add x10, sp, #816 +; CHECK-NEXT: add x11, sp, #552 +; CHECK-NEXT: smull v17.8h, v19.8b, v17.8b +; CHECK-NEXT: ld1 { v6.b }[6], [x9] +; CHECK-NEXT: add x9, sp, #880 +; CHECK-NEXT: smull v19.8h, v22.8b, v21.8b +; CHECK-NEXT: ldr b21, [sp, #808] +; CHECK-NEXT: ldr b22, [sp, #544] +; CHECK-NEXT: add x12, sp, #616 +; CHECK-NEXT: mov v1.b[2], w2 +; CHECK-NEXT: ld1 { v20.b }[1], [x9] +; CHECK-NEXT: ld1 { v21.b }[1], [x10] +; CHECK-NEXT: ld1 { v22.b }[1], [x11] +; CHECK-NEXT: ld1 { v23.b }[1], [x12] +; CHECK-NEXT: add x11, sp, #824 +; CHECK-NEXT: add x12, sp, #560 +; CHECK-NEXT: add x9, sp, #888 +; CHECK-NEXT: add x13, sp, #624 +; CHECK-NEXT: add x10, sp, #48 +; CHECK-NEXT: ld1 { v20.b }[2], [x9] +; CHECK-NEXT: ld1 { v21.b }[2], [x11] +; CHECK-NEXT: ld1 { v22.b }[2], [x12] +; CHECK-NEXT: ld1 { v23.b }[2], [x13] +; CHECK-NEXT: mov v1.b[3], w3 +; CHECK-NEXT: ld1 { v16.b }[4], [x10] +; CHECK-NEXT: add x10, sp, #832 +; CHECK-NEXT: add x11, sp, #568 +; CHECK-NEXT: add x9, sp, #896 +; CHECK-NEXT: add x12, sp, #632 +; CHECK-NEXT: ld1 { v21.b }[3], [x10] +; CHECK-NEXT: ld1 { v22.b }[3], [x11] +; CHECK-NEXT: ld1 { v20.b }[3], [x9] +; CHECK-NEXT: ld1 { v23.b }[3], [x12] +; CHECK-NEXT: add x11, sp, #840 +; CHECK-NEXT: add x12, sp, #576 +; CHECK-NEXT: mov v1.b[4], w4 +; CHECK-NEXT: add x9, sp, #904 +; CHECK-NEXT: add x13, sp, #640 +; CHECK-NEXT: ld1 { v21.b }[4], [x11] +; CHECK-NEXT: ld1 { v22.b }[4], [x12] +; CHECK-NEXT: add x10, sp, #56 +; CHECK-NEXT: ld1 { v20.b }[4], [x9] +; CHECK-NEXT: ld1 { v23.b }[4], [x13] +; CHECK-NEXT: ld1 { v16.b }[5], [x10] +; CHECK-NEXT: add x10, sp, #848 +; CHECK-NEXT: add x11, sp, #584 +; CHECK-NEXT: add x9, sp, #912 +; CHECK-NEXT: add x12, sp, #648 +; CHECK-NEXT: ld1 { v21.b }[5], [x10] +; CHECK-NEXT: ld1 { v22.b }[5], [x11] +; CHECK-NEXT: mov v1.b[5], w5 +; CHECK-NEXT: ld1 { v20.b }[5], [x9] +; CHECK-NEXT: ld1 { v23.b }[5], [x12] +; CHECK-NEXT: add x11, sp, #856 +; CHECK-NEXT: add x12, sp, #592 ; CHECK-NEXT: add x9, sp, #920 -; CHECK-NEXT: ld1 { v18.b }[6], [x10] -; CHECK-NEXT: ld1 { v24.b }[6], [x9] -; CHECK-NEXT: add x10, sp, #728 -; CHECK-NEXT: add x8, sp, #664 -; CHECK-NEXT: sshll v20.8h, v20.8b, #0 -; CHECK-NEXT: sshll v22.8h, v22.8b, #0 -; CHECK-NEXT: sshll v23.8h, v23.8b, #0 -; CHECK-NEXT: add x9, sp, #992 -; CHECK-NEXT: ld1 { v17.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #928 -; CHECK-NEXT: ld1 { v18.b }[7], [x9] +; CHECK-NEXT: add x13, sp, #656 +; CHECK-NEXT: ld1 { v21.b }[6], [x11] +; CHECK-NEXT: ld1 { v22.b }[6], [x12] +; CHECK-NEXT: add x10, sp, #64 +; CHECK-NEXT: ld1 { v20.b }[6], [x9] +; CHECK-NEXT: ld1 { v23.b }[6], [x13] +; CHECK-NEXT: mov v1.b[6], w6 +; CHECK-NEXT: ld1 { v16.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #864 +; CHECK-NEXT: add x11, sp, #600 +; CHECK-NEXT: add x9, sp, #928 +; CHECK-NEXT: add x12, sp, #664 +; CHECK-NEXT: ld1 { v21.b }[7], [x10] +; CHECK-NEXT: ld1 { v22.b }[7], [x11] +; CHECK-NEXT: add x8, sp, #464 +; CHECK-NEXT: ld1 { v20.b }[7], [x9] +; CHECK-NEXT: ld1 { v23.b }[7], [x12] ; CHECK-NEXT: ld1 { v4.b }[7], [x8] -; CHECK-NEXT: ld1 { v24.b }[7], [x10] -; CHECK-NEXT: smlal v19.4s, v21.4h, v22.4h -; CHECK-NEXT: smull2 v21.4s, v21.8h, v22.8h -; CHECK-NEXT: smull v22.4s, v20.4h, v23.4h -; CHECK-NEXT: smull2 v20.4s, v20.8h, v23.8h -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: sshll v3.8h, v3.8b, #0 -; CHECK-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-NEXT: sshll v17.8h, v17.8b, #0 -; CHECK-NEXT: sshll v18.8h, v18.8b, #0 -; CHECK-NEXT: sshll v4.8h, v4.8b, #0 -; CHECK-NEXT: sshll v23.8h, v24.8b, #0 -; CHECK-NEXT: smlal2 v16.4s, v1.8h, v3.8h -; CHECK-NEXT: smlal v6.4s, v1.4h, v3.4h -; CHECK-NEXT: smlal2 v7.4s, v0.8h, v2.8h -; CHECK-NEXT: smlal v5.4s, v0.4h, v2.4h -; CHECK-NEXT: smlal2 v20.4s, v17.8h, v18.8h -; CHECK-NEXT: smlal v22.4s, v17.4h, v18.4h -; CHECK-NEXT: smlal2 v21.4s, v4.8h, v23.8h -; CHECK-NEXT: smlal v19.4s, v4.4h, v23.4h -; CHECK-NEXT: add v0.4s, v7.4s, v16.4s -; CHECK-NEXT: add v1.4s, v5.4s, v6.4s -; CHECK-NEXT: add v2.4s, v21.4s, v20.4s -; CHECK-NEXT: add v3.4s, v19.4s, v22.4s -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: add v1.4s, v3.4s, v2.4s +; CHECK-NEXT: add x8, sp, #200 +; CHECK-NEXT: mov v1.b[7], w7 +; CHECK-NEXT: add x10, sp, #336 +; CHECK-NEXT: ld1 { v5.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #72 +; CHECK-NEXT: smull v21.8h, v22.8b, v21.8b +; CHECK-NEXT: movi v22.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v6.b }[7], [x10] +; CHECK-NEXT: ld1 { v16.b }[7], [x8] +; CHECK-NEXT: smull v20.8h, v23.8b, v20.8b +; CHECK-NEXT: sshll v7.4s, v7.4h, #0 +; CHECK-NEXT: smull v0.8h, v2.8b, v0.8b +; CHECK-NEXT: saddw v2.4s, v18.4s, v17.4h +; CHECK-NEXT: smull v1.8h, v1.8b, v3.8b +; CHECK-NEXT: smull v3.8h, v5.8b, v4.8b +; CHECK-NEXT: smull v4.8h, v16.8b, v6.8b +; CHECK-NEXT: saddl2 v5.4s, v21.8h, v19.8h +; CHECK-NEXT: mov v22.s[0], v7.s[0] +; CHECK-NEXT: saddl v7.4s, v21.4h, v19.4h +; CHECK-NEXT: saddl2 v6.4s, v17.8h, v20.8h +; CHECK-NEXT: saddw v2.4s, v2.4s, v20.4h +; CHECK-NEXT: saddl2 v17.4s, v1.8h, v0.8h +; CHECK-NEXT: saddl2 v16.4s, v4.8h, v3.8h +; CHECK-NEXT: saddl v3.4s, v4.4h, v3.4h +; CHECK-NEXT: saddw v1.4s, v22.4s, v1.4h +; CHECK-NEXT: add v5.4s, v6.4s, v5.4s +; CHECK-NEXT: add v2.4s, v2.4s, v7.4s +; CHECK-NEXT: add v6.4s, v17.4s, v16.4s +; CHECK-NEXT: saddw v0.4s, v1.4s, v0.4h +; CHECK-NEXT: add v1.4s, v2.4s, v5.4s +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: add v1.4s, v6.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/neon-extmul.ll b/llvm/test/CodeGen/AArch64/neon-extmul.ll index 502673fa76694b..3dbc033dfab964 100644 --- a/llvm/test/CodeGen/AArch64/neon-extmul.ll +++ b/llvm/test/CodeGen/AArch64/neon-extmul.ll @@ -5,10 +5,9 @@ define <8 x i32> @extmuls_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) { ; CHECK-SD-LABEL: extmuls_v8i8_i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-SD-NEXT: sshll v2.8h, v1.8b, #0 -; CHECK-SD-NEXT: smull2 v1.4s, v0.8h, v2.8h -; CHECK-SD-NEXT: smull v0.4s, v0.4h, v2.4h +; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: extmuls_v8i8_i32: @@ -28,10 +27,9 @@ entry: define <8 x i32> @extmulu_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1) { ; CHECK-SD-LABEL: extmulu_v8i8_i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-SD-NEXT: ushll v2.8h, v1.8b, #0 -; CHECK-SD-NEXT: umull2 v1.4s, v0.8h, v2.8h -; CHECK-SD-NEXT: umull v0.4s, v0.4h, v2.4h +; CHECK-SD-NEXT: umull v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: extmulu_v8i8_i32: @@ -78,12 +76,9 @@ entry: define <8 x i32> @extmuladds_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1, <8 x i32> %b) { ; CHECK-SD-LABEL: extmuladds_v8i8_i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-SD-NEXT: smlal2 v3.4s, v0.8h, v1.8h -; CHECK-SD-NEXT: smlal v2.4s, v0.4h, v1.4h -; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: mov v1.16b, v3.16b +; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: saddw2 v1.4s, v3.4s, v0.8h +; CHECK-SD-NEXT: saddw v0.4s, v2.4s, v0.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: extmuladds_v8i8_i32: @@ -106,12 +101,9 @@ entry: define <8 x i32> @extmuladdu_v8i8_i32(<8 x i8> %s0, <8 x i8> %s1, <8 x i32> %b) { ; CHECK-SD-LABEL: extmuladdu_v8i8_i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-SD-NEXT: umlal2 v3.4s, v0.8h, v1.8h -; CHECK-SD-NEXT: umlal v2.4s, v0.4h, v1.4h -; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: mov v1.16b, v3.16b +; CHECK-SD-NEXT: umull v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: uaddw2 v1.4s, v3.4s, v0.8h +; CHECK-SD-NEXT: uaddw v0.4s, v2.4s, v0.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: extmuladdu_v8i8_i32: @@ -168,16 +160,13 @@ entry: define <8 x i64> @extmuls_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) { ; CHECK-SD-LABEL: extmuls_v8i8_i64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-SD-NEXT: sshll v2.4s, v0.4h, #0 -; CHECK-SD-NEXT: sshll v4.4s, v1.4h, #0 -; CHECK-SD-NEXT: sshll2 v5.4s, v0.8h, #0 -; CHECK-SD-NEXT: sshll2 v6.4s, v1.8h, #0 -; CHECK-SD-NEXT: smull v0.2d, v2.2s, v4.2s -; CHECK-SD-NEXT: smull2 v1.2d, v2.4s, v4.4s -; CHECK-SD-NEXT: smull2 v3.2d, v5.4s, v6.4s -; CHECK-SD-NEXT: smull v2.2d, v5.2s, v6.2s +; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.2d, v1.2s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: extmuls_v8i8_i64: @@ -203,16 +192,13 @@ entry: define <8 x i64> @extmulu_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1) { ; CHECK-SD-LABEL: extmulu_v8i8_i64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-SD-NEXT: ushll v2.4s, v0.4h, #0 -; CHECK-SD-NEXT: ushll v4.4s, v1.4h, #0 -; CHECK-SD-NEXT: ushll2 v5.4s, v0.8h, #0 -; CHECK-SD-NEXT: ushll2 v6.4s, v1.8h, #0 -; CHECK-SD-NEXT: umull v0.2d, v2.2s, v4.2s -; CHECK-SD-NEXT: umull2 v1.2d, v2.4s, v4.4s -; CHECK-SD-NEXT: umull2 v3.2d, v5.4s, v6.4s -; CHECK-SD-NEXT: umull v2.2d, v5.2s, v6.2s +; CHECK-SD-NEXT: umull v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: extmulu_v8i8_i64: @@ -309,19 +295,13 @@ entry: define <8 x i64> @extmuladds_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1, <8 x i64> %b) { ; CHECK-SD-LABEL: extmuladds_v8i8_i64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-SD-NEXT: sshll v6.4s, v0.4h, #0 -; CHECK-SD-NEXT: sshll v7.4s, v1.4h, #0 -; CHECK-SD-NEXT: sshll2 v0.4s, v0.8h, #0 -; CHECK-SD-NEXT: sshll2 v1.4s, v1.8h, #0 -; CHECK-SD-NEXT: smlal v2.2d, v6.2s, v7.2s -; CHECK-SD-NEXT: smlal2 v3.2d, v6.4s, v7.4s -; CHECK-SD-NEXT: smlal2 v5.2d, v0.4s, v1.4s -; CHECK-SD-NEXT: smlal v4.2d, v0.2s, v1.2s -; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: mov v1.16b, v3.16b -; CHECK-SD-NEXT: mov v2.16b, v4.16b +; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll2 v6.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-SD-NEXT: saddw2 v5.2d, v5.2d, v6.4s +; CHECK-SD-NEXT: saddw v0.2d, v2.2d, v1.2s +; CHECK-SD-NEXT: saddw2 v1.2d, v3.2d, v1.4s +; CHECK-SD-NEXT: saddw v2.2d, v4.2d, v6.2s ; CHECK-SD-NEXT: mov v3.16b, v5.16b ; CHECK-SD-NEXT: ret ; @@ -353,19 +333,13 @@ entry: define <8 x i64> @extmuladdu_v8i8_i64(<8 x i8> %s0, <8 x i8> %s1, <8 x i64> %b) { ; CHECK-SD-LABEL: extmuladdu_v8i8_i64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-SD-NEXT: ushll v6.4s, v0.4h, #0 -; CHECK-SD-NEXT: ushll v7.4s, v1.4h, #0 -; CHECK-SD-NEXT: ushll2 v0.4s, v0.8h, #0 -; CHECK-SD-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-SD-NEXT: umlal v2.2d, v6.2s, v7.2s -; CHECK-SD-NEXT: umlal2 v3.2d, v6.4s, v7.4s -; CHECK-SD-NEXT: umlal2 v5.2d, v0.4s, v1.4s -; CHECK-SD-NEXT: umlal v4.2d, v0.2s, v1.2s -; CHECK-SD-NEXT: mov v0.16b, v2.16b -; CHECK-SD-NEXT: mov v1.16b, v3.16b -; CHECK-SD-NEXT: mov v2.16b, v4.16b +; CHECK-SD-NEXT: umull v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll2 v6.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-SD-NEXT: uaddw2 v5.2d, v5.2d, v6.4s +; CHECK-SD-NEXT: uaddw v0.2d, v2.2d, v1.2s +; CHECK-SD-NEXT: uaddw2 v1.2d, v3.2d, v1.4s +; CHECK-SD-NEXT: uaddw v2.2d, v4.2d, v6.2s ; CHECK-SD-NEXT: mov v3.16b, v5.16b ; CHECK-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/ptrauth-reloc.ll b/llvm/test/CodeGen/AArch64/ptrauth-reloc.ll new file mode 100644 index 00000000000000..b7304b957a0013 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ptrauth-reloc.ll @@ -0,0 +1,176 @@ +; RUN: rm -rf %t && split-file %s %t && cd %t + +;--- ok.ll + +; RUN: llc < ok.ll -mtriple arm64e-apple-darwin \ +; RUN: | FileCheck %s --check-prefix=CHECK-MACHO +; RUN: llc < ok.ll -mtriple aarch64-elf -mattr=+pauth \ +; RUN: | FileCheck %s --check-prefix=CHECK-ELF + +; RUN: llc < ok.ll -mtriple arm64e-apple-darwin \ +; RUN: -global-isel -verify-machineinstrs -global-isel-abort=1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-MACHO +; RUN: llc < ok.ll -mtriple aarch64-elf -mattr=+pauth \ +; RUN: -global-isel -verify-machineinstrs -global-isel-abort=1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ELF + +@g = external global i32 + +@g_weak = extern_weak global i32 + +@g_strong_def = constant i32 42 + +; CHECK-ELF-LABEL: .globl g.ref.ia.0 +; CHECK-ELF-NEXT: .p2align 4 +; CHECK-ELF-NEXT: g.ref.ia.0: +; CHECK-ELF-NEXT: .xword 5 +; CHECK-ELF-NEXT: .xword g@AUTH(ia,0) +; CHECK-ELF-NEXT: .xword 6 + +; CHECK-MACHO-LABEL: .section __DATA,__const +; CHECK-MACHO-NEXT: .globl _g.ref.ia.0 +; CHECK-MACHO-NEXT: .p2align 4 +; CHECK-MACHO-NEXT: _g.ref.ia.0: +; CHECK-MACHO-NEXT: .quad 5 +; CHECK-MACHO-NEXT: .quad _g@AUTH(ia,0) +; CHECK-MACHO-NEXT: .quad 6 + +@g.ref.ia.0 = constant { i64, ptr, i64 } { i64 5, ptr ptrauth (ptr @g, i32 0), i64 6 } + +; CHECK-ELF-LABEL: .globl g.ref.ia.42 +; CHECK-ELF-NEXT: .p2align 3 +; CHECK-ELF-NEXT: g.ref.ia.42: +; CHECK-ELF-NEXT: .xword g@AUTH(ia,42) + +; CHECK-MACHO-LABEL: .globl _g.ref.ia.42 +; CHECK-MACHO-NEXT: .p2align 3 +; CHECK-MACHO-NEXT: _g.ref.ia.42: +; CHECK-MACHO-NEXT: .quad _g@AUTH(ia,42) + +@g.ref.ia.42 = constant ptr ptrauth (ptr @g, i32 0, i64 42) + +; CHECK-ELF-LABEL: .globl g.ref.ib.0 +; CHECK-ELF-NEXT: .p2align 4 +; CHECK-ELF-NEXT: g.ref.ib.0: +; CHECK-ELF-NEXT: .xword 5 +; CHECK-ELF-NEXT: .xword g@AUTH(ib,0) +; CHECK-ELF-NEXT: .xword 6 + +; CHECK-MACHO-LABEL: .globl _g.ref.ib.0 +; CHECK-MACHO-NEXT: .p2align 4 +; CHECK-MACHO-NEXT: _g.ref.ib.0: +; CHECK-MACHO-NEXT: .quad 5 +; CHECK-MACHO-NEXT: .quad _g@AUTH(ib,0) +; CHECK-MACHO-NEXT: .quad 6 + +@g.ref.ib.0 = constant { i64, ptr, i64 } { i64 5, ptr ptrauth (ptr @g, i32 1, i64 0), i64 6 } + +; CHECK-ELF-LABEL: .globl g.ref.da.42.addr +; CHECK-ELF-NEXT: .p2align 3 +; CHECK-ELF-NEXT: g.ref.da.42.addr: +; CHECK-ELF-NEXT: .xword g@AUTH(da,42,addr) + +; CHECK-MACHO-LABEL: .globl _g.ref.da.42.addr +; CHECK-MACHO-NEXT: .p2align 3 +; CHECK-MACHO-NEXT: _g.ref.da.42.addr: +; CHECK-MACHO-NEXT: .quad _g@AUTH(da,42,addr) + +@g.ref.da.42.addr = constant ptr ptrauth (ptr @g, i32 2, i64 42, ptr @g.ref.da.42.addr) + +; CHECK-ELF-LABEL: .globl g.offset.ref.da.0 +; CHECK-ELF-NEXT: .p2align 3 +; CHECK-ELF-NEXT: g.offset.ref.da.0: +; CHECK-ELF-NEXT: .xword (g+16)@AUTH(da,0) + +; CHECK-MACHO-LABEL: .globl _g.offset.ref.da.0 +; CHECK-MACHO-NEXT: .p2align 3 +; CHECK-MACHO-NEXT: _g.offset.ref.da.0: +; CHECK-MACHO-NEXT: .quad (_g+16)@AUTH(da,0) + +@g.offset.ref.da.0 = constant ptr ptrauth (i8* getelementptr (i8, ptr @g, i64 16), i32 2) + +; CHECK-ELF-LABEL: .globl g.big_offset.ref.da.0 +; CHECK-ELF-NEXT: .p2align 3 +; CHECK-ELF-NEXT: g.big_offset.ref.da.0: +; CHECK-ELF-NEXT: .xword (g+2147549185)@AUTH(da,0) + +; CHECK-MACHO-LABEL: .globl _g.big_offset.ref.da.0 +; CHECK-MACHO-NEXT: .p2align 3 +; CHECK-MACHO-NEXT: _g.big_offset.ref.da.0: +; CHECK-MACHO-NEXT: .quad (_g+2147549185)@AUTH(da,0) + +@g.big_offset.ref.da.0 = constant ptr ptrauth (i8* getelementptr (i8, ptr @g, i64 add (i64 2147483648, i64 65537)), i32 2) + +; CHECK-ELF-LABEL: .globl g.weird_ref.da.0 +; CHECK-ELF-NEXT: .p2align 3 +; CHECK-ELF-NEXT: g.weird_ref.da.0: +; CHECK-ELF-NEXT: .xword (g+16)@AUTH(da,0) + +; CHECK-MACHO-LABEL: .globl _g.weird_ref.da.0 +; CHECK-MACHO-NEXT: .p2align 3 +; CHECK-MACHO-NEXT: _g.weird_ref.da.0: +; CHECK-MACHO-NEXT: .quad (_g+16)@AUTH(da,0) + +@g.weird_ref.da.0 = constant i64 ptrtoint (ptr inttoptr (i64 ptrtoint (ptr ptrauth (i8* getelementptr (i8, ptr @g, i64 16), i32 2) to i64) to ptr) to i64) + +; CHECK-ELF-LABEL: .globl g_weak.ref.ia.42 +; CHECK-ELF-NEXT: .p2align 3 +; CHECK-ELF-NEXT: g_weak.ref.ia.42: +; CHECK-ELF-NEXT: .xword g_weak@AUTH(ia,42) + +; CHECK-MACHO-LABEL: .globl _g_weak.ref.ia.42 +; CHECK-MACHO-NEXT: .p2align 3 +; CHECK-MACHO-NEXT: _g_weak.ref.ia.42: +; CHECK-MACHO-NEXT: .quad _g_weak@AUTH(ia,42) + +@g_weak.ref.ia.42 = constant ptr ptrauth (ptr @g_weak, i32 0, i64 42) + +; CHECK-ELF-LABEL: .globl g_strong_def.ref.da.0 +; CHECK-ELF-NEXT: .p2align 3 +; CHECK-ELF-NEXT: g_strong_def.ref.da.0: +; CHECK-ELF-NEXT: .xword g_strong_def@AUTH(da,0) + +; CHECK-MACHO-LABEL: .globl _g_strong_def.ref.da.0 +; CHECK-MACHO-NEXT: .p2align 3 +; CHECK-MACHO-NEXT: _g_strong_def.ref.da.0: +; CHECK-MACHO-NEXT: .quad _g_strong_def@AUTH(da,0) + +@g_strong_def.ref.da.0 = constant ptr ptrauth (ptr @g_strong_def, i32 2) + +;--- err-key.ll + +; RUN: not --crash llc < err-key.ll -mtriple arm64e-apple-darwin 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERR-KEY +; RUN: not --crash llc < err-key.ll -mtriple aarch64-elf -mattr=+pauth 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERR-KEY + +; RUN: not --crash llc < err-key.ll -mtriple arm64e-apple-darwin \ +; RUN: -global-isel -verify-machineinstrs -global-isel-abort=1 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERR-KEY +; RUN: not --crash llc < err-key.ll -mtriple aarch64-elf -mattr=+pauth \ +; RUN: -global-isel -verify-machineinstrs -global-isel-abort=1 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERR-KEY + +; CHECK-ERR-KEY: LLVM ERROR: AArch64 PAC Key ID '4' out of range [0, 3] + +@g = external global i32 +@g.ref.4.0 = constant ptr ptrauth (ptr @g, i32 4, i64 0) + +;--- err-disc.ll + +; RUN: not --crash llc < err-disc.ll -mtriple arm64e-apple-darwin 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERR-DISC +; RUN: not --crash llc < err-disc.ll -mtriple aarch64-elf -mattr=+pauth 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERR-DISC + +; RUN: not --crash llc < err-disc.ll -mtriple arm64e-apple-darwin \ +; RUN: -global-isel -verify-machineinstrs -global-isel-abort=1 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERR-DISC +; RUN: not --crash llc < err-disc.ll -mtriple aarch64-elf -mattr=+pauth \ +; RUN: -global-isel -verify-machineinstrs -global-isel-abort=1 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERR-DISC + +; CHECK-ERR-DISC: LLVM ERROR: AArch64 PAC Discriminator '65536' out of range [0, 0xFFFF] + +@g = external global i32 +@g.ref.ia.65536 = constant ptr ptrauth (ptr @g, i32 0, i64 65536) diff --git a/llvm/test/CodeGen/AArch64/sve-lsrchain.ll b/llvm/test/CodeGen/AArch64/sve-lsrchain.ll index 9c7bffb921ce29..1931cfc2ef51de 100644 --- a/llvm/test/CodeGen/AArch64/sve-lsrchain.ll +++ b/llvm/test/CodeGen/AArch64/sve-lsrchain.ll @@ -14,24 +14,22 @@ define void @test(ptr nocapture noundef readonly %kernel, i32 noundef %kw, float ; CHECK-NEXT: // %bb.2: // %for.body.us.preheader ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add x11, x2, x11, lsl #1 -; CHECK-NEXT: mov x12, #-16 // =0xfffffffffffffff0 -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: mov w10, wzr -; CHECK-NEXT: addvl x12, x12, #1 -; CHECK-NEXT: mov x13, #4 // =0x4 -; CHECK-NEXT: mov x14, #8 // =0x8 +; CHECK-NEXT: mov x12, #4 // =0x4 +; CHECK-NEXT: mov x13, #8 // =0x8 ; CHECK-NEXT: .LBB0_3: // %for.body.us ; CHECK-NEXT: // =>This Loop Header: Depth=1 ; CHECK-NEXT: // Child Loop BB0_4 Depth 2 -; CHECK-NEXT: add x15, x0, x9, lsl #2 -; CHECK-NEXT: sbfiz x16, x8, #1, #32 -; CHECK-NEXT: mov x17, x2 -; CHECK-NEXT: ldp s0, s1, [x15] -; CHECK-NEXT: add x16, x16, #8 -; CHECK-NEXT: ldp s2, s3, [x15, #8] -; CHECK-NEXT: ubfiz x15, x8, #1, #32 +; CHECK-NEXT: add x14, x0, x9, lsl #2 +; CHECK-NEXT: sbfiz x15, x8, #1, #32 +; CHECK-NEXT: mov x16, x2 +; CHECK-NEXT: ldp s0, s1, [x14] +; CHECK-NEXT: add x15, x15, #8 +; CHECK-NEXT: ldp s2, s3, [x14, #8] +; CHECK-NEXT: ubfiz x14, x8, #1, #32 ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: fcvt h1, s1 ; CHECK-NEXT: fcvt h2, s2 @@ -43,56 +41,52 @@ define void @test(ptr nocapture noundef readonly %kernel, i32 noundef %kw, float ; CHECK-NEXT: .LBB0_4: // %for.cond.i.preheader.us ; CHECK-NEXT: // Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: // => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ld1b { z4.b }, p1/z, [x17, x15] -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x17] -; CHECK-NEXT: add x18, x17, x16 -; CHECK-NEXT: add x3, x17, x15 +; CHECK-NEXT: ld1b { z4.b }, p1/z, [x16, x14] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x16] +; CHECK-NEXT: add x17, x16, x15 +; CHECK-NEXT: add x18, x16, x14 +; CHECK-NEXT: add x3, x17, #8 +; CHECK-NEXT: add x4, x17, #16 ; CHECK-NEXT: fmad z4.h, p0/m, z0.h, z5.h -; CHECK-NEXT: ld1b { z5.b }, p1/z, [x17, x16] +; CHECK-NEXT: ld1b { z5.b }, p1/z, [x16, x15] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z1.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x18, x13, lsl #1] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x17, x12, lsl #1] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z2.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x18, x14, lsl #1] -; CHECK-NEXT: add x18, x18, #16 +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x17, x13, lsl #1] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z3.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x17, #1, mul vl] -; CHECK-NEXT: st1h { z4.h }, p0, [x17] -; CHECK-NEXT: ld1h { z4.h }, p0/z, [x3, #1, mul vl] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x16, #1, mul vl] +; CHECK-NEXT: st1h { z4.h }, p0, [x16] +; CHECK-NEXT: ld1h { z4.h }, p0/z, [x18, #1, mul vl] ; CHECK-NEXT: fmad z4.h, p0/m, z0.h, z5.h -; CHECK-NEXT: ld1b { z5.b }, p1/z, [x18, x12] -; CHECK-NEXT: add x18, x18, x12 +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x17, #1, mul vl] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z1.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x18, x13, lsl #1] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x3, #1, mul vl] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z2.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x18, x14, lsl #1] -; CHECK-NEXT: add x18, x18, #16 +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x4, #1, mul vl] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z3.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x17, #2, mul vl] -; CHECK-NEXT: st1h { z4.h }, p0, [x17, #1, mul vl] -; CHECK-NEXT: ld1h { z4.h }, p0/z, [x3, #2, mul vl] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x16, #2, mul vl] +; CHECK-NEXT: st1h { z4.h }, p0, [x16, #1, mul vl] +; CHECK-NEXT: ld1h { z4.h }, p0/z, [x18, #2, mul vl] ; CHECK-NEXT: fmad z4.h, p0/m, z0.h, z5.h -; CHECK-NEXT: ld1b { z5.b }, p1/z, [x18, x12] -; CHECK-NEXT: add x18, x18, x12 +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x17, #2, mul vl] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z1.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x18, x13, lsl #1] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x3, #2, mul vl] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z2.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x18, x14, lsl #1] -; CHECK-NEXT: add x18, x18, #16 +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x4, #2, mul vl] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z3.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x17, #3, mul vl] -; CHECK-NEXT: st1h { z4.h }, p0, [x17, #2, mul vl] -; CHECK-NEXT: ld1h { z4.h }, p0/z, [x3, #3, mul vl] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x16, #3, mul vl] +; CHECK-NEXT: st1h { z4.h }, p0, [x16, #2, mul vl] +; CHECK-NEXT: ld1h { z4.h }, p0/z, [x18, #3, mul vl] ; CHECK-NEXT: fmad z4.h, p0/m, z0.h, z5.h -; CHECK-NEXT: ld1b { z5.b }, p1/z, [x18, x12] -; CHECK-NEXT: add x18, x18, x12 +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x17, #3, mul vl] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z1.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x18, x13, lsl #1] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x3, #3, mul vl] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z2.h -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x18, x14, lsl #1] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x4, #3, mul vl] ; CHECK-NEXT: fmla z4.h, p0/m, z5.h, z3.h -; CHECK-NEXT: st1h { z4.h }, p0, [x17, #3, mul vl] -; CHECK-NEXT: addvl x17, x17, #4 -; CHECK-NEXT: cmp x17, x11 +; CHECK-NEXT: st1h { z4.h }, p0, [x16, #3, mul vl] +; CHECK-NEXT: addvl x16, x16, #4 +; CHECK-NEXT: cmp x16, x11 ; CHECK-NEXT: b.lo .LBB0_4 ; CHECK-NEXT: // %bb.5: // %while.cond.i..exit_crit_edge.us ; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll index c9fe89aec8ad9b..c81fd26a775256 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -1925,11 +1925,8 @@ entry: define i32 @test_udot_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-SD-BASE-LABEL: test_udot_v8i8: ; CHECK-SD-BASE: // %bb.0: // %entry -; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-SD-BASE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-SD-BASE-NEXT: umull v2.4s, v1.4h, v0.4h -; CHECK-SD-BASE-NEXT: umlal2 v2.4s, v1.8h, v0.8h -; CHECK-SD-BASE-NEXT: addv s0, v2.4s +; CHECK-SD-BASE-NEXT: umull v0.8h, v1.8b, v0.8b +; CHECK-SD-BASE-NEXT: uaddlv s0, v0.8h ; CHECK-SD-BASE-NEXT: fmov w0, s0 ; CHECK-SD-BASE-NEXT: ret ; @@ -1969,15 +1966,11 @@ entry: define i32 @test_udot_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SD-BASE-LABEL: test_udot_v16i8: ; CHECK-SD-BASE: // %bb.0: // %entry -; CHECK-SD-BASE-NEXT: ushll v2.8h, v0.8b, #0 -; CHECK-SD-BASE-NEXT: ushll v3.8h, v1.8b, #0 -; CHECK-SD-BASE-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-SD-BASE-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-SD-BASE-NEXT: umull v4.4s, v3.4h, v2.4h -; CHECK-SD-BASE-NEXT: umull2 v2.4s, v3.8h, v2.8h -; CHECK-SD-BASE-NEXT: umlal2 v2.4s, v1.8h, v0.8h -; CHECK-SD-BASE-NEXT: umlal v4.4s, v1.4h, v0.4h -; CHECK-SD-BASE-NEXT: add v0.4s, v4.4s, v2.4s +; CHECK-SD-BASE-NEXT: umull2 v2.8h, v1.16b, v0.16b +; CHECK-SD-BASE-NEXT: umull v0.8h, v1.8b, v0.8b +; CHECK-SD-BASE-NEXT: uaddl2 v1.4s, v0.8h, v2.8h +; CHECK-SD-BASE-NEXT: uaddl v0.4s, v0.4h, v2.4h +; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-SD-BASE-NEXT: addv s0, v0.4s ; CHECK-SD-BASE-NEXT: fmov w0, s0 ; CHECK-SD-BASE-NEXT: ret @@ -2025,21 +2018,16 @@ define i32 @test_udot_v24i8(ptr %p1, ptr %p2) { ; CHECK-SD-BASE: // %bb.0: // %entry ; CHECK-SD-BASE-NEXT: ldr q0, [x0] ; CHECK-SD-BASE-NEXT: ldr q1, [x1] -; CHECK-SD-BASE-NEXT: ldr d4, [x0, #16] -; CHECK-SD-BASE-NEXT: ldr d5, [x1, #16] -; CHECK-SD-BASE-NEXT: ushll v2.8h, v0.8b, #0 -; CHECK-SD-BASE-NEXT: ushll v3.8h, v1.8b, #0 -; CHECK-SD-BASE-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-SD-BASE-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-SD-BASE-NEXT: umull v6.4s, v3.4h, v2.4h -; CHECK-SD-BASE-NEXT: umull2 v2.4s, v3.8h, v2.8h -; CHECK-SD-BASE-NEXT: ushll v3.8h, v4.8b, #0 -; CHECK-SD-BASE-NEXT: ushll v4.8h, v5.8b, #0 -; CHECK-SD-BASE-NEXT: umlal2 v2.4s, v4.8h, v3.8h -; CHECK-SD-BASE-NEXT: umlal v6.4s, v4.4h, v3.4h -; CHECK-SD-BASE-NEXT: umlal2 v2.4s, v1.8h, v0.8h -; CHECK-SD-BASE-NEXT: umlal v6.4s, v1.4h, v0.4h -; CHECK-SD-BASE-NEXT: add v0.4s, v6.4s, v2.4s +; CHECK-SD-BASE-NEXT: ldr d2, [x0, #16] +; CHECK-SD-BASE-NEXT: ldr d3, [x1, #16] +; CHECK-SD-BASE-NEXT: umull v2.8h, v3.8b, v2.8b +; CHECK-SD-BASE-NEXT: umull v3.8h, v1.8b, v0.8b +; CHECK-SD-BASE-NEXT: umull2 v0.8h, v1.16b, v0.16b +; CHECK-SD-BASE-NEXT: uaddl2 v1.4s, v3.8h, v2.8h +; CHECK-SD-BASE-NEXT: uaddl v2.4s, v3.4h, v2.4h +; CHECK-SD-BASE-NEXT: uaddw2 v1.4s, v1.4s, v0.8h +; CHECK-SD-BASE-NEXT: uaddw v0.4s, v2.4s, v0.4h +; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-SD-BASE-NEXT: addv s0, v0.4s ; CHECK-SD-BASE-NEXT: fmov w0, s0 ; CHECK-SD-BASE-NEXT: ret @@ -2125,37 +2113,27 @@ entry: define i32 @test_udot_v48i8(ptr %p1, ptr %p2) { ; CHECK-SD-BASE-LABEL: test_udot_v48i8: ; CHECK-SD-BASE: // %bb.0: // %entry -; CHECK-SD-BASE-NEXT: ldp q0, q4, [x1] -; CHECK-SD-BASE-NEXT: ldr q2, [x0, #32] -; CHECK-SD-BASE-NEXT: ldp q1, q3, [x0] -; CHECK-SD-BASE-NEXT: ldr q7, [x1, #32] -; CHECK-SD-BASE-NEXT: ushll2 v16.8h, v2.16b, #0 -; CHECK-SD-BASE-NEXT: ushll2 v6.8h, v0.16b, #0 -; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-SD-BASE-NEXT: ushll2 v17.8h, v7.16b, #0 -; CHECK-SD-BASE-NEXT: ushll2 v5.8h, v1.16b, #0 -; CHECK-SD-BASE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-SD-BASE-NEXT: umull2 v18.4s, v6.8h, v5.8h -; CHECK-SD-BASE-NEXT: umull v19.4s, v0.4h, v1.4h -; CHECK-SD-BASE-NEXT: umull v5.4s, v6.4h, v5.4h -; CHECK-SD-BASE-NEXT: umull2 v0.4s, v0.8h, v1.8h -; CHECK-SD-BASE-NEXT: ushll v1.8h, v2.8b, #0 -; CHECK-SD-BASE-NEXT: ushll v2.8h, v7.8b, #0 -; CHECK-SD-BASE-NEXT: ushll2 v6.8h, v3.16b, #0 -; CHECK-SD-BASE-NEXT: ushll2 v7.8h, v4.16b, #0 -; CHECK-SD-BASE-NEXT: umlal2 v18.4s, v17.8h, v16.8h -; CHECK-SD-BASE-NEXT: umlal v5.4s, v17.4h, v16.4h -; CHECK-SD-BASE-NEXT: umlal v19.4s, v2.4h, v1.4h -; CHECK-SD-BASE-NEXT: umlal2 v0.4s, v2.8h, v1.8h -; CHECK-SD-BASE-NEXT: ushll v1.8h, v3.8b, #0 -; CHECK-SD-BASE-NEXT: ushll v2.8h, v4.8b, #0 -; CHECK-SD-BASE-NEXT: umlal2 v18.4s, v7.8h, v6.8h -; CHECK-SD-BASE-NEXT: umlal v5.4s, v7.4h, v6.4h -; CHECK-SD-BASE-NEXT: umlal v19.4s, v2.4h, v1.4h -; CHECK-SD-BASE-NEXT: umlal2 v0.4s, v2.8h, v1.8h -; CHECK-SD-BASE-NEXT: add v1.4s, v19.4s, v5.4s -; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v18.4s -; CHECK-SD-BASE-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-SD-BASE-NEXT: ldp q4, q0, [x0, #16] +; CHECK-SD-BASE-NEXT: ldr q2, [x1, #32] +; CHECK-SD-BASE-NEXT: ldp q1, q5, [x1] +; CHECK-SD-BASE-NEXT: ldr q3, [x0] +; CHECK-SD-BASE-NEXT: umull2 v6.8h, v2.16b, v0.16b +; CHECK-SD-BASE-NEXT: umull v0.8h, v2.8b, v0.8b +; CHECK-SD-BASE-NEXT: umull2 v7.8h, v1.16b, v3.16b +; CHECK-SD-BASE-NEXT: umull v1.8h, v1.8b, v3.8b +; CHECK-SD-BASE-NEXT: umull2 v2.8h, v5.16b, v4.16b +; CHECK-SD-BASE-NEXT: umull v3.8h, v5.8b, v4.8b +; CHECK-SD-BASE-NEXT: uaddl2 v4.4s, v7.8h, v6.8h +; CHECK-SD-BASE-NEXT: uaddl2 v5.4s, v1.8h, v0.8h +; CHECK-SD-BASE-NEXT: uaddl v6.4s, v7.4h, v6.4h +; CHECK-SD-BASE-NEXT: uaddl v0.4s, v1.4h, v0.4h +; CHECK-SD-BASE-NEXT: uaddw2 v1.4s, v4.4s, v2.8h +; CHECK-SD-BASE-NEXT: uaddw2 v4.4s, v5.4s, v3.8h +; CHECK-SD-BASE-NEXT: uaddw v2.4s, v6.4s, v2.4h +; CHECK-SD-BASE-NEXT: uaddw v0.4s, v0.4s, v3.4h +; CHECK-SD-BASE-NEXT: add v1.4s, v4.4s, v1.4s +; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-SD-BASE-NEXT: addv s0, v0.4s ; CHECK-SD-BASE-NEXT: fmov w0, s0 ; CHECK-SD-BASE-NEXT: ret @@ -2275,11 +2253,8 @@ entry: define i32 @test_sdot_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-SD-BASE-LABEL: test_sdot_v8i8: ; CHECK-SD-BASE: // %bb.0: // %entry -; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-SD-BASE-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-SD-BASE-NEXT: smull v2.4s, v1.4h, v0.4h -; CHECK-SD-BASE-NEXT: smlal2 v2.4s, v1.8h, v0.8h -; CHECK-SD-BASE-NEXT: addv s0, v2.4s +; CHECK-SD-BASE-NEXT: smull v0.8h, v1.8b, v0.8b +; CHECK-SD-BASE-NEXT: saddlv s0, v0.8h ; CHECK-SD-BASE-NEXT: fmov w0, s0 ; CHECK-SD-BASE-NEXT: ret ; @@ -2319,15 +2294,11 @@ entry: define i32 @test_sdot_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SD-BASE-LABEL: test_sdot_v16i8: ; CHECK-SD-BASE: // %bb.0: // %entry -; CHECK-SD-BASE-NEXT: sshll v2.8h, v0.8b, #0 -; CHECK-SD-BASE-NEXT: sshll v3.8h, v1.8b, #0 -; CHECK-SD-BASE-NEXT: sshll2 v0.8h, v0.16b, #0 -; CHECK-SD-BASE-NEXT: sshll2 v1.8h, v1.16b, #0 -; CHECK-SD-BASE-NEXT: smull v4.4s, v3.4h, v2.4h -; CHECK-SD-BASE-NEXT: smull2 v2.4s, v3.8h, v2.8h -; CHECK-SD-BASE-NEXT: smlal2 v2.4s, v1.8h, v0.8h -; CHECK-SD-BASE-NEXT: smlal v4.4s, v1.4h, v0.4h -; CHECK-SD-BASE-NEXT: add v0.4s, v4.4s, v2.4s +; CHECK-SD-BASE-NEXT: smull2 v2.8h, v1.16b, v0.16b +; CHECK-SD-BASE-NEXT: smull v0.8h, v1.8b, v0.8b +; CHECK-SD-BASE-NEXT: saddl2 v1.4s, v0.8h, v2.8h +; CHECK-SD-BASE-NEXT: saddl v0.4s, v0.4h, v2.4h +; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-SD-BASE-NEXT: addv s0, v0.4s ; CHECK-SD-BASE-NEXT: fmov w0, s0 ; CHECK-SD-BASE-NEXT: ret @@ -2375,21 +2346,16 @@ define i32 @test_sdot_v24i8(ptr %p1, ptr %p2) { ; CHECK-SD-BASE: // %bb.0: // %entry ; CHECK-SD-BASE-NEXT: ldr q0, [x0] ; CHECK-SD-BASE-NEXT: ldr q1, [x1] -; CHECK-SD-BASE-NEXT: ldr d4, [x0, #16] -; CHECK-SD-BASE-NEXT: ldr d5, [x1, #16] -; CHECK-SD-BASE-NEXT: sshll v2.8h, v0.8b, #0 -; CHECK-SD-BASE-NEXT: sshll v3.8h, v1.8b, #0 -; CHECK-SD-BASE-NEXT: sshll2 v0.8h, v0.16b, #0 -; CHECK-SD-BASE-NEXT: sshll2 v1.8h, v1.16b, #0 -; CHECK-SD-BASE-NEXT: smull v6.4s, v3.4h, v2.4h -; CHECK-SD-BASE-NEXT: smull2 v2.4s, v3.8h, v2.8h -; CHECK-SD-BASE-NEXT: sshll v3.8h, v4.8b, #0 -; CHECK-SD-BASE-NEXT: sshll v4.8h, v5.8b, #0 -; CHECK-SD-BASE-NEXT: smlal2 v2.4s, v4.8h, v3.8h -; CHECK-SD-BASE-NEXT: smlal v6.4s, v4.4h, v3.4h -; CHECK-SD-BASE-NEXT: smlal2 v2.4s, v1.8h, v0.8h -; CHECK-SD-BASE-NEXT: smlal v6.4s, v1.4h, v0.4h -; CHECK-SD-BASE-NEXT: add v0.4s, v6.4s, v2.4s +; CHECK-SD-BASE-NEXT: ldr d2, [x0, #16] +; CHECK-SD-BASE-NEXT: ldr d3, [x1, #16] +; CHECK-SD-BASE-NEXT: smull v2.8h, v3.8b, v2.8b +; CHECK-SD-BASE-NEXT: smull v3.8h, v1.8b, v0.8b +; CHECK-SD-BASE-NEXT: smull2 v0.8h, v1.16b, v0.16b +; CHECK-SD-BASE-NEXT: saddl2 v1.4s, v3.8h, v2.8h +; CHECK-SD-BASE-NEXT: saddl v2.4s, v3.4h, v2.4h +; CHECK-SD-BASE-NEXT: saddw2 v1.4s, v1.4s, v0.8h +; CHECK-SD-BASE-NEXT: saddw v0.4s, v2.4s, v0.4h +; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-SD-BASE-NEXT: addv s0, v0.4s ; CHECK-SD-BASE-NEXT: fmov w0, s0 ; CHECK-SD-BASE-NEXT: ret @@ -2475,37 +2441,27 @@ entry: define i32 @test_sdot_v48i8(ptr %p1, ptr %p2) { ; CHECK-SD-BASE-LABEL: test_sdot_v48i8: ; CHECK-SD-BASE: // %bb.0: // %entry -; CHECK-SD-BASE-NEXT: ldp q0, q4, [x1] -; CHECK-SD-BASE-NEXT: ldr q2, [x0, #32] -; CHECK-SD-BASE-NEXT: ldp q1, q3, [x0] -; CHECK-SD-BASE-NEXT: ldr q7, [x1, #32] -; CHECK-SD-BASE-NEXT: sshll2 v16.8h, v2.16b, #0 -; CHECK-SD-BASE-NEXT: sshll2 v6.8h, v0.16b, #0 -; CHECK-SD-BASE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-SD-BASE-NEXT: sshll2 v17.8h, v7.16b, #0 -; CHECK-SD-BASE-NEXT: sshll2 v5.8h, v1.16b, #0 -; CHECK-SD-BASE-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-SD-BASE-NEXT: smull2 v18.4s, v6.8h, v5.8h -; CHECK-SD-BASE-NEXT: smull v19.4s, v0.4h, v1.4h -; CHECK-SD-BASE-NEXT: smull v5.4s, v6.4h, v5.4h -; CHECK-SD-BASE-NEXT: smull2 v0.4s, v0.8h, v1.8h -; CHECK-SD-BASE-NEXT: sshll v1.8h, v2.8b, #0 -; CHECK-SD-BASE-NEXT: sshll v2.8h, v7.8b, #0 -; CHECK-SD-BASE-NEXT: sshll2 v6.8h, v3.16b, #0 -; CHECK-SD-BASE-NEXT: sshll2 v7.8h, v4.16b, #0 -; CHECK-SD-BASE-NEXT: smlal2 v18.4s, v17.8h, v16.8h -; CHECK-SD-BASE-NEXT: smlal v5.4s, v17.4h, v16.4h -; CHECK-SD-BASE-NEXT: smlal v19.4s, v2.4h, v1.4h -; CHECK-SD-BASE-NEXT: smlal2 v0.4s, v2.8h, v1.8h -; CHECK-SD-BASE-NEXT: sshll v1.8h, v3.8b, #0 -; CHECK-SD-BASE-NEXT: sshll v2.8h, v4.8b, #0 -; CHECK-SD-BASE-NEXT: smlal2 v18.4s, v7.8h, v6.8h -; CHECK-SD-BASE-NEXT: smlal v5.4s, v7.4h, v6.4h -; CHECK-SD-BASE-NEXT: smlal v19.4s, v2.4h, v1.4h -; CHECK-SD-BASE-NEXT: smlal2 v0.4s, v2.8h, v1.8h -; CHECK-SD-BASE-NEXT: add v1.4s, v19.4s, v5.4s -; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v18.4s -; CHECK-SD-BASE-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-SD-BASE-NEXT: ldp q4, q0, [x0, #16] +; CHECK-SD-BASE-NEXT: ldr q2, [x1, #32] +; CHECK-SD-BASE-NEXT: ldp q1, q5, [x1] +; CHECK-SD-BASE-NEXT: ldr q3, [x0] +; CHECK-SD-BASE-NEXT: smull2 v6.8h, v2.16b, v0.16b +; CHECK-SD-BASE-NEXT: smull v0.8h, v2.8b, v0.8b +; CHECK-SD-BASE-NEXT: smull2 v7.8h, v1.16b, v3.16b +; CHECK-SD-BASE-NEXT: smull v1.8h, v1.8b, v3.8b +; CHECK-SD-BASE-NEXT: smull2 v2.8h, v5.16b, v4.16b +; CHECK-SD-BASE-NEXT: smull v3.8h, v5.8b, v4.8b +; CHECK-SD-BASE-NEXT: saddl2 v4.4s, v7.8h, v6.8h +; CHECK-SD-BASE-NEXT: saddl2 v5.4s, v1.8h, v0.8h +; CHECK-SD-BASE-NEXT: saddl v6.4s, v7.4h, v6.4h +; CHECK-SD-BASE-NEXT: saddl v0.4s, v1.4h, v0.4h +; CHECK-SD-BASE-NEXT: saddw2 v1.4s, v4.4s, v2.8h +; CHECK-SD-BASE-NEXT: saddw2 v4.4s, v5.4s, v3.8h +; CHECK-SD-BASE-NEXT: saddw v2.4s, v6.4s, v2.4h +; CHECK-SD-BASE-NEXT: saddw v0.4s, v0.4s, v3.4h +; CHECK-SD-BASE-NEXT: add v1.4s, v4.4s, v1.4s +; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-SD-BASE-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-SD-BASE-NEXT: addv s0, v0.4s ; CHECK-SD-BASE-NEXT: fmov w0, s0 ; CHECK-SD-BASE-NEXT: ret @@ -2626,26 +2582,22 @@ entry: define i32 @test_udot_v8i8_multi_use(<8 x i8> %a, <8 x i8> %b) { ; CHECK-SD-BASE-LABEL: test_udot_v8i8_multi_use: ; CHECK-SD-BASE: // %bb.0: // %entry -; CHECK-SD-BASE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-SD-BASE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-SD-BASE-NEXT: umull v2.4s, v1.4h, v0.4h -; CHECK-SD-BASE-NEXT: mov v3.16b, v2.16b -; CHECK-SD-BASE-NEXT: fmov w8, s2 -; CHECK-SD-BASE-NEXT: umlal2 v3.4s, v1.8h, v0.8h -; CHECK-SD-BASE-NEXT: addv s0, v3.4s +; CHECK-SD-BASE-NEXT: umull v0.8h, v1.8b, v0.8b +; CHECK-SD-BASE-NEXT: uaddlv s1, v0.8h +; CHECK-SD-BASE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SD-BASE-NEXT: fmov w9, s0 -; CHECK-SD-BASE-NEXT: add w0, w9, w8 +; CHECK-SD-BASE-NEXT: fmov w8, s1 +; CHECK-SD-BASE-NEXT: add w0, w8, w9 ; CHECK-SD-BASE-NEXT: ret ; ; CHECK-SD-DOT-LABEL: test_udot_v8i8_multi_use: ; CHECK-SD-DOT: // %bb.0: // %entry ; CHECK-SD-DOT-NEXT: movi v2.2d, #0000000000000000 -; CHECK-SD-DOT-NEXT: ushll v3.8h, v0.8b, #0 -; CHECK-SD-DOT-NEXT: ushll v4.8h, v1.8b, #0 +; CHECK-SD-DOT-NEXT: umull v3.8h, v1.8b, v0.8b ; CHECK-SD-DOT-NEXT: udot v2.2s, v1.8b, v0.8b -; CHECK-SD-DOT-NEXT: umull v0.4s, v4.4h, v3.4h -; CHECK-SD-DOT-NEXT: addp v1.2s, v2.2s, v2.2s +; CHECK-SD-DOT-NEXT: ushll v0.4s, v3.4h, #0 ; CHECK-SD-DOT-NEXT: fmov w9, s0 +; CHECK-SD-DOT-NEXT: addp v1.2s, v2.2s, v2.2s ; CHECK-SD-DOT-NEXT: fmov w8, s1 ; CHECK-SD-DOT-NEXT: add w0, w8, w9 ; CHECK-SD-DOT-NEXT: ret diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll index 7799b9509ceb03..da8aa544698355 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -886,12 +886,12 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s62, 30 ; GCN-NEXT: v_writelane_b32 v40, s63, 31 ; GCN-NEXT: s_mov_b64 s[6:7], exec -; GCN-NEXT: s_movk_i32 s4, 0x7b ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s8, v0 ; GCN-NEXT: v_readfirstlane_b32 s9, v1 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] ; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GCN-NEXT: s_movk_i32 s4, 0x7b ; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] @@ -980,12 +980,12 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s62, 30 ; GISEL-NEXT: v_writelane_b32 v40, s63, 31 ; GISEL-NEXT: s_mov_b64 s[6:7], exec -; GISEL-NEXT: s_movk_i32 s4, 0x7b ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s8, v0 ; GISEL-NEXT: v_readfirstlane_b32 s9, v1 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] ; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: s_movk_i32 s4, 0x7b ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll new file mode 100644 index 00000000000000..de484e3db18ab5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load-last-use.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +;RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 | FileCheck %s --check-prefix=GCN +;RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 | FileCheck %s --check-prefix=GCN + +define float @raw_buffer_load(<4 x i32> inreg) { +; GCN-LABEL: raw_buffer_load: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_expcnt 0x0 +; GCN-NEXT: s_wait_samplecnt 0x0 +; GCN-NEXT: s_wait_bvhcnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: buffer_load_b32 v0, off, s[0:3], null th:TH_LOAD_LU +; GCN-NEXT: s_wait_loadcnt 0x0 +; GCN-NEXT: s_setpc_b64 s[30:31] +main_body: + %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %0, i32 0, i32 0, i32 3) + ret float %data +} + +define float @struct_buffer_load(<4 x i32> inreg) { +; GCN-LABEL: struct_buffer_load: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_expcnt 0x0 +; GCN-NEXT: s_wait_samplecnt 0x0 +; GCN-NEXT: s_wait_bvhcnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], null idxen th:TH_LOAD_LU +; GCN-NEXT: s_wait_loadcnt 0x0 +; GCN-NEXT: s_setpc_b64 s[30:31] +main_body: + %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 3) + ret float %data +} + diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.csub.ll new file mode 100644 index 00000000000000..d7dd0ce58a08f3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.csub.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,PREGFX12 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,PREGFX12 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX12PLUS + +declare i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1), i32) + +; GCN-LABEL: {{^}}global_atomic_csub_rtn: +; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9:]+}}, s{{\[[0-9]+:[0-9]+\]}} glc +; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v0, v1, s[0:1] th:TH_ATOMIC_RETURN +define amdgpu_kernel void @global_atomic_csub_rtn(ptr addrspace(1) %ptr, i32 %data) { +main_body: + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_csub_no_rtn: +; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v1, s[0:1] +define amdgpu_kernel void @global_atomic_csub_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 { +main_body: + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_csub_off4_rtn: +; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 glc +; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v0, v1, s[0:1] offset:4 th:TH_ATOMIC_RETURN +define amdgpu_kernel void @global_atomic_csub_off4_rtn(ptr addrspace(1) %ptr, i32 %data) { +main_body: + %p = getelementptr i32, ptr addrspace(1) %ptr, i64 1 + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_csub_off4_no_rtn: +; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 +; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v1, s[0:1] offset:4 +define amdgpu_kernel void @global_atomic_csub_off4_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 { +main_body: + %p = getelementptr i32, ptr addrspace(1) %ptr, i64 1 + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data) + ret void +} + +attributes #0 = { "target-features"="+atomic-csub-no-rtn-insts" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.gfx90a.ll new file mode 100644 index 00000000000000..af841057471891 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.gfx90a.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A + +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float) +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>) + +; GFX90A-LABEL: {{^}}global_atomic_add_f32: +; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc +define amdgpu_ps float @global_atomic_add_f32(ptr addrspace(1) %ptr, float %data) { +main_body: + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) + ret float %ret +} + +; GFX90A-LABEL: {{^}}global_atomic_add_f32_off4: +; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off offset:4 glc +define amdgpu_ps float @global_atomic_add_f32_off4(ptr addrspace(1) %ptr, float %data) { +main_body: + %p = getelementptr float, ptr addrspace(1) %ptr, i64 1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) + ret float %ret +} + +; GFX90A-LABEL: {{^}}global_atomic_add_f32_offneg4: +; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off offset:-4 glc +define amdgpu_ps float @global_atomic_add_f32_offneg4(ptr addrspace(1) %ptr, float %data) { +main_body: + %p = getelementptr float, ptr addrspace(1) %ptr, i64 -1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) + ret float %ret +} + +; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16: +; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc +define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) + ret <2 x half> %ret +} + +; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16_off4: +; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:4 glc +define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_off4(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) + ret <2 x half> %ret +} + +; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4: +; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-4 glc +define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_offneg4(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) + ret <2 x half> %ret +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.ll new file mode 100644 index 00000000000000..0c3ce3308dd8fe --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=DPP | FileCheck %s -check-prefix=GCN +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=DPP | FileCheck %s -check-prefix=GCN + +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float) +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>) +declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr, float) + +; GCN-LABEL: {{^}}global_atomic_add_f32: +; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +define amdgpu_kernel void @global_atomic_add_f32(ptr addrspace(1) %ptr, float %data) { +main_body: + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_add_f32_off4: +; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 +define amdgpu_kernel void @global_atomic_add_f32_off4(ptr addrspace(1) %ptr, float %data) { +main_body: + %p = getelementptr float, ptr addrspace(1) %ptr, i64 1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4: +; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4 +define amdgpu_kernel void @global_atomic_add_f32_offneg4(ptr addrspace(1) %ptr, float %data) { +main_body: + %p = getelementptr float, ptr addrspace(1) %ptr, i64 -1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16: +; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @global_atomic_pk_add_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_off4: +; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 +define amdgpu_kernel void @global_atomic_pk_add_v2f16_off4(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4: +; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4{{$}} +define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) + ret void +} + +; Make sure this artificially selects with an incorrect subtarget, but +; the feature set. +; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget: +; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(ptr addrspace(1) %ptr, float %data) #0 { + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) + ret void +} + +; GCN-LABEL: {{^}}flat_atomic_fadd_f32_wrong_subtarget: +; GCN: flat_atomic_add_f32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} +define amdgpu_kernel void @flat_atomic_fadd_f32_wrong_subtarget(ptr %ptr, float %data) #1 { + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) + ret void +} + +attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts"} +attributes #1 = { "target-cpu"="gfx803" "target-features"="+flat-atomic-fadd-f32-inst"} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll index 5c917c97e261f8..5d9daae69e7865 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -1,67 +1,58 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck %s -check-prefix=CHECK +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 < %s | FileCheck -check-prefix=CHECK %s -define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { +define void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: s_mov_b32 s10, s4 -; CHECK-NEXT: s_mov_b32 s9, s3 -; CHECK-NEXT: s_mov_b32 s8, s2 -; CHECK-NEXT: buffer_atomic_add_f32 v0, v1, s[8:11], s6 offen -; CHECK-NEXT: s_endpgm - %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 24) +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_atomic_add_f32 v0, v1, s[4:7], s8 offen offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 24 + %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) ret void } -define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { +define void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: s_mov_b32 s10, s4 -; CHECK-NEXT: s_mov_b32 s9, s3 -; CHECK-NEXT: s_mov_b32 s8, s2 -; CHECK-NEXT: buffer_atomic_add_f32 v0, off, s[8:11], s6 -; CHECK-NEXT: s_endpgm +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_atomic_add_f32 v0, off, s[4:7], s8 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void } -define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { +define void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: s_mov_b32 s10, s4 -; CHECK-NEXT: s_mov_b32 s9, s3 -; CHECK-NEXT: s_mov_b32 s8, s2 -; CHECK-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[8:11], s6 offen -; CHECK-NEXT: s_endpgm +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[4:7], s8 offen +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } -define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { +define void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: s_mov_b32 s10, s4 -; CHECK-NEXT: s_mov_b32 s9, s3 -; CHECK-NEXT: s_mov_b32 s8, s2 -; CHECK-NEXT: buffer_atomic_pk_add_f16 v0, off, s[8:11], s6 offset:92 -; CHECK-NEXT: s_endpgm +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_atomic_pk_add_f16 v0, off, s[4:7], s8 offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0) ret void } -define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { +define void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: s_mov_b32 s10, s4 -; CHECK-NEXT: s_mov_b32 s9, s3 -; CHECK-NEXT: s_mov_b32 s8, s2 -; CHECK-NEXT: buffer_atomic_add_f32 v0, v1, s[8:11], s6 offen slc -; CHECK-NEXT: s_endpgm +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_atomic_add_f32 v0, v1, s[4:7], s8 offen slc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.bf16.xfail.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.bf16.xfail.ll new file mode 100644 index 00000000000000..c8273b3527bb84 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.bf16.xfail.ll @@ -0,0 +1,8 @@ +; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: Do not know how to widen the result of this operator! + +define <6 x bfloat> @raw_ptr_buffer_load_v6bf16(ptr addrspace(8) inreg %rsrc) { + %val = call <6 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v6bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) + ret <6 x bfloat> %val +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.bf16.xfail.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.bf16.xfail.ll new file mode 100644 index 00000000000000..e636c10c256f4a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.bf16.xfail.ll @@ -0,0 +1,11 @@ +; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck %s + +; FIXME: This should be handled + +; CHECK: LLVM ERROR: Do not know how to widen this operator's operand! + + +define void @buffer_store_v6bf16(ptr addrspace(8) inreg %rsrc, <6 x bfloat> %data, i32 %offset) { + call void @llvm.amdgcn.raw.ptr.buffer.store.v6bf16(<6 x bfloat> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll index 0bdb21f767191e..5401de0b082883 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -1,57 +1,50 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck %s -check-prefix=CHECK +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 < %s | FileCheck %s -check-prefix=CHECK - -define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { +define void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: s_mov_b32 s10, s4 -; CHECK-NEXT: s_mov_b32 s9, s3 -; CHECK-NEXT: s_mov_b32 s8, s2 -; CHECK-NEXT: buffer_atomic_add_f32 v0, v[1:2], s[8:11], s6 idxen offen -; CHECK-NEXT: s_endpgm - %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_atomic_add_f32 v0, v[1:2], s[4:7], s8 idxen offen offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 24 + %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) ret void } ; Natural mapping, no voffset -define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { +define void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { ; CHECK-LABEL: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: s_mov_b32 s10, s4 -; CHECK-NEXT: s_mov_b32 s9, s3 -; CHECK-NEXT: s_mov_b32 s8, s2 -; CHECK-NEXT: buffer_atomic_add_f32 v0, v1, s[8:11], s6 idxen -; CHECK-NEXT: s_endpgm +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_atomic_add_f32 v0, v1, s[4:7], s8 idxen +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } -define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { +define void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: s_mov_b32 s10, s4 -; CHECK-NEXT: s_mov_b32 s9, s3 -; CHECK-NEXT: s_mov_b32 s8, s2 -; CHECK-NEXT: buffer_atomic_add_f32 v0, v[1:2], s[8:11], s6 idxen offen slc -; CHECK-NEXT: s_endpgm +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_atomic_add_f32 v0, v[1:2], s[4:7], s8 idxen offen slc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } -define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { +define void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_mov_b32 s11, s5 -; CHECK-NEXT: s_mov_b32 s10, s4 -; CHECK-NEXT: s_mov_b32 s9, s3 -; CHECK-NEXT: s_mov_b32 s8, s2 -; CHECK-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[8:11], s6 idxen offen -; CHECK-NEXT: s_endpgm - %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[4:7], s8 idxen offen offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 24 + %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll index e7c6044b3fb6b0..fb40274cac1bac 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) { ; GFX12-LABEL: flat_last_use_load_0: @@ -107,6 +107,3 @@ entry: !0 = !{i32 1} declare i32 @llvm.amdgcn.workitem.id.x() -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX12-CU: {{.*}} -; GFX12-WGP: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll index c889c67a5ca37d..7a9cb992a0cd16 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; GFX12-LABEL: global_last_use_load_0: @@ -92,6 +92,3 @@ entry: } !0 = !{i32 1} declare i32 @llvm.amdgcn.workitem.id.x() -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX12-CU: {{.*}} -; GFX12-WGP: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll index 1f835349b12b0b..61cec731feb565 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s define amdgpu_kernel void @private_last_use_load_0(ptr addrspace(5) %in, ptr addrspace(1) %out) { ; GFX12-LABEL: private_last_use_load_0: @@ -85,6 +85,3 @@ entry: !0 = !{i32 1} declare i32 @llvm.amdgcn.workitem.id.x() -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX12-CU: {{.*}} -; GFX12-WGP: {{.*}} diff --git a/llvm/test/CodeGen/ARM/apple-version-min.ll b/llvm/test/CodeGen/ARM/apple-version-min.ll new file mode 100644 index 00000000000000..6b4af21d74c00d --- /dev/null +++ b/llvm/test/CodeGen/ARM/apple-version-min.ll @@ -0,0 +1,9 @@ +; Test emitting version_min directives. + +; RUN: llc %s -filetype=asm -o - --mtriple arm64-apple-tvos9.0.0 | FileCheck %s --check-prefix=TVOS +; RUN: llc %s -filetype=asm -o - --mtriple thumbv7s-apple-ios7.0.0 | FileCheck %s --check-prefix=IOS +; RUN: llc %s -filetype=asm -o - --mtriple thumbv7k-apple-watchos2.0.0 | FileCheck %s --check-prefix=WATCHOS + +; TVOS: .tvos_version_min 9, 0 +; IOS: .ios_version_min 7, 0 +; WATCHOS: .watchos_version_min 2, 0 diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll index 64b22a5cc71bcc..e286eb3226e46f 100644 --- a/llvm/test/CodeGen/ARM/fp-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll @@ -139,6 +139,13 @@ define float @cos_f32(float %x) #0 { ret float %val } +; CHECK-LABEL: tan_f32: +; CHECK: bl tanf +define float @tan_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.tan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: pow_f32: ; CHECK: bl powf define float @pow_f32(float %x, float %y) #0 { @@ -596,6 +603,13 @@ define double @cos_f64(double %x) #0 { ret double %val } +; CHECK-LABEL: tan_f64: +; CHECK: bl tan +define double @tan_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.tan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: pow_f64: ; CHECK: bl pow define double @pow_f64(double %x, double %y) #0 { @@ -1023,6 +1037,7 @@ declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata) declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) @@ -1056,6 +1071,7 @@ declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadat declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll index 50ebe0471dceac..402ecb763d5b33 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll @@ -83,5 +83,55 @@ exit: ret void } +; Check constrained ops converted to call +define void @testTan(ptr %cast) strictfp { +; CHECK-LABEL: testTan: +; CHECK: # %bb.0: # %root +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: addi 30, 3, -8 +; CHECK-NEXT: li 29, 255 +; CHECK-NEXT: std 0, 80(1) +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB2_1: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: lfdu 1, 8(30) +; CHECK-NEXT: bl tan +; CHECK-NEXT: nop +; CHECK-NEXT: addi 29, 29, -1 +; CHECK-NEXT: stfd 1, 0(30) +; CHECK-NEXT: cmpldi 29, 0 +; CHECK-NEXT: bc 12, 1, .LBB2_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +root: + br label %for.body + +exit: + ret void + +for.body: + %i = phi i64 [ 0, %root ], [ %next, %for.body ] + %idx = getelementptr inbounds double, ptr %cast, i64 %i + %val = load double, ptr %idx + %tan = tail call nnan ninf nsz arcp double @llvm.experimental.constrained.tan.f64(double %val, metadata !"round.dynamic", metadata !"fpexcept.strict") + store double %tan, ptr %idx, align 8 + %next = add nuw nsw i64 %i, 1 + %cond = icmp eq i64 %next, 255 + br i1 %cond, label %exit, label %for.body +} + declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index 42972fe069df6a..76f3dea5b7751d 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -2066,6 +2066,50 @@ entry: ret i1 %conv } +define ppc_fp128 @test_tan_ppc_fp128(ppc_fp128 %first) #0 { +; PC64LE-LABEL: test_tan_ppc_fp128: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: std 0, 48(1) +; PC64LE-NEXT: bl tanl +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: test_tan_ppc_fp128: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: std 0, 48(1) +; PC64LE9-NEXT: bl tanl +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +; +; PC64-LABEL: test_tan_ppc_fp128: +; PC64: # %bb.0: # %entry +; PC64-NEXT: mflr 0 +; PC64-NEXT: stdu 1, -112(1) +; PC64-NEXT: std 0, 128(1) +; PC64-NEXT: bl tanl +; PC64-NEXT: nop +; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: mtlr 0 +; PC64-NEXT: blr +entry: + %tan = call ppc_fp128 @llvm.experimental.constrained.tan.ppcf128( + ppc_fp128 %first, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret ppc_fp128 %tan +} + attributes #0 = { nounwind strictfp } attributes #1 = { strictfp } @@ -2096,6 +2140,7 @@ declare ppc_fp128 @llvm.experimental.constrained.round.ppcf128(ppc_fp128, metada declare ppc_fp128 @llvm.experimental.constrained.sin.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.sqrt.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.fsub.ppcf128(ppc_fp128, ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.tan.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.trunc.ppcf128(ppc_fp128, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128, metadata) diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 9cabe0c17d849d..f217162782bfd9 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -8302,6 +8302,357 @@ entry: ret <4 x float> %result } +define <1 x float> @constrained_vector_tan_v1f32(<1 x float> %x) #0 { +; PC64LE-LABEL: constrained_vector_tan_v1f32: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: std 0, 48(1) +; PC64LE-NEXT: bl tanf +; PC64LE-NEXT: nop +; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_tan_v1f32: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: std 0, 48(1) +; PC64LE9-NEXT: bl tanf +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %tan = call <1 x float> @llvm.experimental.constrained.tan.v1f32( + <1 x float> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <1 x float> %tan +} + +define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 { +; PC64LE-LABEL: constrained_vector_tan_v2f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 96(1) +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 31, 2 +; PC64LE-NEXT: xxlor 1, 63, 63 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 62, 1, 1 +; PC64LE-NEXT: xxswapd 1, 63 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 34, 62, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_tan_v2f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: vmr 31, 2 +; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 62, 1, 1 +; PC64LE9-NEXT: xxswapd 1, 63 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 34, 62, 1 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %tan = call <2 x double> @llvm.experimental.constrained.tan.v2f64( + <2 x double> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <2 x double> %tan +} + +define <3 x float> @constrained_vector_tan_v3f32(<3 x float> %x) #0 { +; PC64LE-LABEL: constrained_vector_tan_v3f32: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: xxsldwi 0, 34, 34, 1 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 96(1) +; PC64LE-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE-NEXT: xscvspdpn 1, 0 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 31, 2 +; PC64LE-NEXT: bl tanf +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxswapd 0, 63 +; PC64LE-NEXT: fmr 31, 1 +; PC64LE-NEXT: xscvspdpn 1, 0 +; PC64LE-NEXT: bl tanf +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxsldwi 0, 63, 63, 3 +; PC64LE-NEXT: fmr 30, 1 +; PC64LE-NEXT: xscvspdpn 1, 0 +; PC64LE-NEXT: bl tanf +; PC64LE-NEXT: nop +; PC64LE-NEXT: xscvdpspn 0, 1 +; PC64LE-NEXT: xscvdpspn 1, 30 +; PC64LE-NEXT: addis 3, 2, .LCPI189_0@toc@ha +; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE-NEXT: xscvdpspn 36, 31 +; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE-NEXT: addi 3, 3, .LCPI189_0@toc@l +; PC64LE-NEXT: xxmrghw 34, 1, 0 +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: xxswapd 35, 0 +; PC64LE-NEXT: vperm 2, 4, 2, 3 +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_tan_v3f32: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1 +; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stfd 30, 48(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill +; PC64LE9-NEXT: vmr 31, 2 +; PC64LE9-NEXT: xscvspdpn 1, 0 +; PC64LE9-NEXT: bl tanf +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xxswapd 0, 63 +; PC64LE9-NEXT: fmr 31, 1 +; PC64LE9-NEXT: xscvspdpn 1, 0 +; PC64LE9-NEXT: bl tanf +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xxsldwi 0, 63, 63, 3 +; PC64LE9-NEXT: fmr 30, 1 +; PC64LE9-NEXT: xscvspdpn 1, 0 +; PC64LE9-NEXT: bl tanf +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscvdpspn 0, 1 +; PC64LE9-NEXT: xscvdpspn 1, 30 +; PC64LE9-NEXT: addis 3, 2, .LCPI189_0@toc@ha +; PC64LE9-NEXT: xscvdpspn 34, 31 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload +; PC64LE9-NEXT: addi 3, 3, .LCPI189_0@toc@l +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xxmrghw 35, 1, 0 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxperm 34, 35, 0 +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %tan = call <3 x float> @llvm.experimental.constrained.tan.v3f32( + <3 x float> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <3 x float> %tan +} + +define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 { +; PC64LE-LABEL: constrained_vector_tan_v3f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 96(1) +; PC64LE-NEXT: stfd 30, 64(1) # 8-byte Folded Spill +; PC64LE-NEXT: fmr 30, 2 +; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill +; PC64LE-NEXT: fmr 31, 3 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 63, 1, 1 +; PC64LE-NEXT: fmr 1, 30 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 63, 1, 63 +; PC64LE-NEXT: fmr 1, 31 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: fmr 3, 1 +; PC64LE-NEXT: xxswapd 1, 63 +; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload +; PC64LE-NEXT: xxlor 2, 63, 63 +; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_tan_v3f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stfd 30, 48(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill +; PC64LE9-NEXT: fmr 31, 3 +; PC64LE9-NEXT: fmr 30, 2 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 63, 1, 1 +; PC64LE9-NEXT: fmr 1, 30 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 63, 1, 63 +; PC64LE9-NEXT: fmr 1, 31 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: fmr 3, 1 +; PC64LE9-NEXT: xxswapd 1, 63 +; PC64LE9-NEXT: xscpsgndp 2, 63, 63 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %tan = call <3 x double> @llvm.experimental.constrained.tan.v3f64( + <3 x double> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <3 x double> %tan +} + +define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 { +; PC64LE-LABEL: constrained_vector_tan_v4f64: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: stdu 1, -96(1) +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 30, 2 +; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: xxlor 1, 62, 62 +; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PC64LE-NEXT: vmr 31, 3 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 61, 1, 1 +; PC64LE-NEXT: xxswapd 1, 62 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 62, 61, 1 +; PC64LE-NEXT: xxlor 1, 63, 63 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: xxlor 61, 1, 1 +; PC64LE-NEXT: xxswapd 1, 63 +; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE-NEXT: bl tan +; PC64LE-NEXT: nop +; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 2, 30 +; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE-NEXT: xxmrghd 35, 61, 1 +; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload +; PC64LE-NEXT: addi 1, 1, 96 +; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: blr +; +; PC64LE9-LABEL: constrained_vector_tan_v4f64: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: std 0, 96(1) +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill +; PC64LE9-NEXT: vmr 30, 2 +; PC64LE9-NEXT: xscpsgndp 1, 62, 62 +; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill +; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 61, 1, 1 +; PC64LE9-NEXT: xxswapd 1, 62 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 62, 61, 1 +; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: xscpsgndp 61, 1, 1 +; PC64LE9-NEXT: xxswapd 1, 63 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: bl tan +; PC64LE9-NEXT: nop +; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; PC64LE9-NEXT: xxmrghd 35, 61, 1 +; PC64LE9-NEXT: vmr 2, 30 +; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 61, 32(1) # 16-byte Folded Reload +; PC64LE9-NEXT: addi 1, 1, 80 +; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: blr +entry: + %tan = call <4 x double> @llvm.experimental.constrained.tan.v4f64( + <4 x double> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #1 + ret <4 x double> %tan +} + attributes #0 = { nounwind strictfp noimplicitfloat } attributes #1 = { strictfp } @@ -8316,6 +8667,7 @@ declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) @@ -8361,6 +8713,7 @@ declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x f declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) @@ -8414,6 +8767,8 @@ declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metada declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) @@ -8470,6 +8825,7 @@ declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll index 0a0828e51893f8..04fa62b1950763 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-half.ll @@ -1,10 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -global-isel -stop-after=irtranslator < %s \ +; RUN: llc -mtriple=riscv32 -global-isel -stop-after=irtranslator -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s -; RUN: llc -mtriple=riscv32 -mattr=+f -global-isel -stop-after=irtranslator < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+f -global-isel -stop-after=irtranslator -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IF %s -; RUN: llc -mtriple=riscv32 -mattr=+zfh -global-isel -stop-after=irtranslator < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+zfh -global-isel -stop-after=irtranslator -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -global-isel -stop-after=irtranslator -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+f -global-isel -stop-after=irtranslator -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IF %s +; RUN: llc -mtriple=riscv64 -mattr=+zfh -global-isel -stop-after=irtranslator -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IZFH %s define half @callee_half_in_regs(half %x) nounwind { ; RV32I-LABEL: name: callee_half_in_regs @@ -34,6 +40,34 @@ define half @callee_half_in_regs(half %x) nounwind { ; RV32IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h ; RV32IZFH-NEXT: $f10_h = COPY [[COPY]](s16) ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ; + ; RV64I-LABEL: name: callee_half_in_regs + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64IF-LABEL: name: callee_half_in_regs + ; RV64IF: bb.1 (%ir-block.0): + ; RV64IF-NEXT: liveins: $f10_f + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV64IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV64IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV64IZFH-LABEL: name: callee_half_in_regs + ; RV64IZFH: bb.1 (%ir-block.0): + ; RV64IZFH-NEXT: liveins: $f10_h + ; RV64IZFH-NEXT: {{ $}} + ; RV64IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY]](s16) + ; RV64IZFH-NEXT: PseudoRET implicit $f10_h ret half %x } @@ -84,6 +118,53 @@ define half @caller_half_in_regs(half %x) nounwind { ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f10_h ; RV32IZFH-NEXT: $f10_h = COPY [[COPY1]](s16) ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ; + ; RV64I-LABEL: name: caller_half_in_regs + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; RV64I-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64I-NEXT: PseudoCALL target-flags(riscv-call) @caller_half_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit-def $x10 + ; RV64I-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) + ; RV64I-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC1]](s16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT1]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64IF-LABEL: name: caller_half_in_regs + ; RV64IF: bb.1 (%ir-block.0): + ; RV64IF-NEXT: liveins: $f10_f + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV64IF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV64IF-NEXT: PseudoCALL target-flags(riscv-call) @caller_half_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $f10_f, implicit-def $f10_f + ; RV64IF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV64IF-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT1]](s32) + ; RV64IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV64IZFH-LABEL: name: caller_half_in_regs + ; RV64IZFH: bb.1 (%ir-block.0): + ; RV64IZFH-NEXT: liveins: $f10_h + ; RV64IZFH-NEXT: {{ $}} + ; RV64IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY]](s16) + ; RV64IZFH-NEXT: PseudoCALL target-flags(riscv-call) @caller_half_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $f10_h, implicit-def $f10_h + ; RV64IZFH-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY1]](s16) + ; RV64IZFH-NEXT: PseudoRET implicit $f10_h %y = call half @caller_half_in_regs(half %x) ret half %y } @@ -119,6 +200,40 @@ define half @callee_half_mixed_with_int(i32 %x0, half %x) nounwind { ; RV32IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f10_h ; RV32IZFH-NEXT: $f10_h = COPY [[COPY1]](s16) ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ; + ; RV64I-LABEL: name: callee_half_mixed_with_int + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10, $x11 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; RV64I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC1]](s16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64IF-LABEL: name: callee_half_mixed_with_int + ; RV64IF: bb.1 (%ir-block.0): + ; RV64IF-NEXT: liveins: $x10, $f10_f + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64IF-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV64IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV64IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV64IZFH-LABEL: name: callee_half_mixed_with_int + ; RV64IZFH: bb.1 (%ir-block.0): + ; RV64IZFH-NEXT: liveins: $x10, $f10_h + ; RV64IZFH-NEXT: {{ $}} + ; RV64IZFH-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64IZFH-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; RV64IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY1]](s16) + ; RV64IZFH-NEXT: PseudoRET implicit $f10_h ret half %x } @@ -175,6 +290,65 @@ define half @caller_half_mixed_with_int(half %x, i32 %x0) nounwind { ; RV32IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f10_h ; RV32IZFH-NEXT: $f10_h = COPY [[COPY2]](s16) ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ; + ; RV64I-LABEL: name: caller_half_mixed_with_int + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10, $x11 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; RV64I-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; RV64I-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC1]](s32) + ; RV64I-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64I-NEXT: $x11 = COPY [[ANYEXT1]](s64) + ; RV64I-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_mixed_with_int, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV64I-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) + ; RV64I-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT2]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64IF-LABEL: name: caller_half_mixed_with_int + ; RV64IF: bb.1 (%ir-block.0): + ; RV64IF-NEXT: liveins: $x10, $f10_f + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; RV64IF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC1]](s32) + ; RV64IF-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV64IF-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT1]](s32) + ; RV64IF-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_mixed_with_int, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $f10_f, implicit-def $f10_f + ; RV64IF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64IF-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; RV64IF-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT2]](s32) + ; RV64IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV64IZFH-LABEL: name: caller_half_mixed_with_int + ; RV64IZFH: bb.1 (%ir-block.0): + ; RV64IZFH-NEXT: liveins: $x10, $f10_h + ; RV64IZFH-NEXT: {{ $}} + ; RV64IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64IZFH-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; RV64IZFH-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64IZFH-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32) + ; RV64IZFH-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY]](s16) + ; RV64IZFH-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_mixed_with_int, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $f10_h, implicit-def $f10_h + ; RV64IZFH-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY2]](s16) + ; RV64IZFH-NEXT: PseudoRET implicit $f10_h %y = call half @callee_half_mixed_with_int(i32 %x0, half %x) ret half %y } @@ -232,6 +406,83 @@ define half @callee_half_return_stack1(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 % ; RV32IZFH-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY $f10_h ; RV32IZFH-NEXT: $f10_h = COPY [[COPY8]](s16) ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ; + ; RV64I-LABEL: name: callee_half_return_stack1 + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; RV64I-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; RV64I-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x12 + ; RV64I-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; RV64I-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x13 + ; RV64I-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) + ; RV64I-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x14 + ; RV64I-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; RV64I-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $x15 + ; RV64I-NEXT: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) + ; RV64I-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $x16 + ; RV64I-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY6]](s64) + ; RV64I-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $x17 + ; RV64I-NEXT: [[TRUNC7:%[0-9]+]]:_(s32) = G_TRUNC [[COPY7]](s64) + ; RV64I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %fixed-stack.0, align 16) + ; RV64I-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s64) + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC8]](s16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64IF-LABEL: name: callee_half_return_stack1 + ; RV64IF: bb.1 (%ir-block.0): + ; RV64IF-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $f10_f + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64IF-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; RV64IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; RV64IF-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x12 + ; RV64IF-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; RV64IF-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x13 + ; RV64IF-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) + ; RV64IF-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x14 + ; RV64IF-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; RV64IF-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $x15 + ; RV64IF-NEXT: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) + ; RV64IF-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $x16 + ; RV64IF-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY6]](s64) + ; RV64IF-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $x17 + ; RV64IF-NEXT: [[TRUNC7:%[0-9]+]]:_(s32) = G_TRUNC [[COPY7]](s64) + ; RV64IF-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; RV64IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC8]](s16) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV64IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV64IZFH-LABEL: name: callee_half_return_stack1 + ; RV64IZFH: bb.1 (%ir-block.0): + ; RV64IZFH-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $f10_h + ; RV64IZFH-NEXT: {{ $}} + ; RV64IZFH-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64IZFH-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; RV64IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; RV64IZFH-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; RV64IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x12 + ; RV64IZFH-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; RV64IZFH-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x13 + ; RV64IZFH-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY3]](s64) + ; RV64IZFH-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x14 + ; RV64IZFH-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; RV64IZFH-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $x15 + ; RV64IZFH-NEXT: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY5]](s64) + ; RV64IZFH-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $x16 + ; RV64IZFH-NEXT: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[COPY6]](s64) + ; RV64IZFH-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $x17 + ; RV64IZFH-NEXT: [[TRUNC7:%[0-9]+]]:_(s32) = G_TRUNC [[COPY7]](s64) + ; RV64IZFH-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY8]](s16) + ; RV64IZFH-NEXT: PseudoRET implicit $f10_h ret half %x } @@ -333,6 +584,131 @@ define half @caller_half_return_stack1(i32 %v1, half %x) nounwind { ; RV32IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f10_h ; RV32IZFH-NEXT: $f10_h = COPY [[COPY2]](s16) ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ; + ; RV64I-LABEL: name: caller_half_return_stack1 + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10, $x11 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; RV64I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV64I-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64I-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64I-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64I-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32) + ; RV64I-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64I-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[C4]](s32) + ; RV64I-NEXT: [[ANYEXT6:%[0-9]+]]:_(s64) = G_ANYEXT [[C5]](s32) + ; RV64I-NEXT: [[ANYEXT7:%[0-9]+]]:_(s64) = G_ANYEXT [[C6]](s32) + ; RV64I-NEXT: [[ANYEXT8:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC1]](s16) + ; RV64I-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C7]](s64) + ; RV64I-NEXT: G_STORE [[ANYEXT8]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64I-NEXT: $x11 = COPY [[ANYEXT1]](s64) + ; RV64I-NEXT: $x12 = COPY [[ANYEXT2]](s64) + ; RV64I-NEXT: $x13 = COPY [[ANYEXT3]](s64) + ; RV64I-NEXT: $x14 = COPY [[ANYEXT4]](s64) + ; RV64I-NEXT: $x15 = COPY [[ANYEXT5]](s64) + ; RV64I-NEXT: $x16 = COPY [[ANYEXT6]](s64) + ; RV64I-NEXT: $x17 = COPY [[ANYEXT7]](s64) + ; RV64I-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack1, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 + ; RV64I-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s64) + ; RV64I-NEXT: [[ANYEXT9:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT9]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64IF-LABEL: name: caller_half_return_stack1 + ; RV64IF: bb.1 (%ir-block.0): + ; RV64IF-NEXT: liveins: $x10, $f10_f + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64IF-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV64IF-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64IF-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64IF-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV64IF-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; RV64IF-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; RV64IF-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; RV64IF-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV64IF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64IF-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64IF-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64IF-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32) + ; RV64IF-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64IF-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[C4]](s32) + ; RV64IF-NEXT: [[ANYEXT6:%[0-9]+]]:_(s64) = G_ANYEXT [[C5]](s32) + ; RV64IF-NEXT: [[ANYEXT7:%[0-9]+]]:_(s64) = G_ANYEXT [[C6]](s32) + ; RV64IF-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV64IF-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64IF-NEXT: $x11 = COPY [[ANYEXT1]](s64) + ; RV64IF-NEXT: $x12 = COPY [[ANYEXT2]](s64) + ; RV64IF-NEXT: $x13 = COPY [[ANYEXT3]](s64) + ; RV64IF-NEXT: $x14 = COPY [[ANYEXT4]](s64) + ; RV64IF-NEXT: $x15 = COPY [[ANYEXT5]](s64) + ; RV64IF-NEXT: $x16 = COPY [[ANYEXT6]](s64) + ; RV64IF-NEXT: $x17 = COPY [[ANYEXT7]](s64) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT8]](s32) + ; RV64IF-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack1, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $f10_f, implicit-def $f10_f + ; RV64IF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64IF-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; RV64IF-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT9]](s32) + ; RV64IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV64IZFH-LABEL: name: caller_half_return_stack1 + ; RV64IZFH: bb.1 (%ir-block.0): + ; RV64IZFH-NEXT: liveins: $x10, $f10_h + ; RV64IZFH-NEXT: {{ $}} + ; RV64IZFH-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64IZFH-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; RV64IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64IZFH-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64IZFH-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV64IZFH-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; RV64IZFH-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; RV64IZFH-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; RV64IZFH-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV64IZFH-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64IZFH-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64IZFH-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64IZFH-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64IZFH-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32) + ; RV64IZFH-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64IZFH-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[C4]](s32) + ; RV64IZFH-NEXT: [[ANYEXT6:%[0-9]+]]:_(s64) = G_ANYEXT [[C5]](s32) + ; RV64IZFH-NEXT: [[ANYEXT7:%[0-9]+]]:_(s64) = G_ANYEXT [[C6]](s32) + ; RV64IZFH-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64IZFH-NEXT: $x11 = COPY [[ANYEXT1]](s64) + ; RV64IZFH-NEXT: $x12 = COPY [[ANYEXT2]](s64) + ; RV64IZFH-NEXT: $x13 = COPY [[ANYEXT3]](s64) + ; RV64IZFH-NEXT: $x14 = COPY [[ANYEXT4]](s64) + ; RV64IZFH-NEXT: $x15 = COPY [[ANYEXT5]](s64) + ; RV64IZFH-NEXT: $x16 = COPY [[ANYEXT6]](s64) + ; RV64IZFH-NEXT: $x17 = COPY [[ANYEXT7]](s64) + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY1]](s16) + ; RV64IZFH-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack1, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $f10_h, implicit-def $f10_h + ; RV64IZFH-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY2]](s16) + ; RV64IZFH-NEXT: PseudoRET implicit $f10_h %y = call half @callee_half_return_stack1(i32 0, i32 1, i32 2, i32 %v1, i32 5, i32 6, i32 7, i32 8, half %x) ret half %y } @@ -407,6 +783,76 @@ define half @callee_half_return_stack2(half %v1, half %v2, half %v3, half %v4, h ; RV32IZFH-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) ; RV32IZFH-NEXT: $f10_h = COPY [[TRUNC]](s16) ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ; + ; RV64I-LABEL: name: callee_half_return_stack2 + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; RV64I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) + ; RV64I-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x12 + ; RV64I-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) + ; RV64I-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x13 + ; RV64I-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s64) + ; RV64I-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x14 + ; RV64I-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s64) + ; RV64I-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $x15 + ; RV64I-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s64) + ; RV64I-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $x16 + ; RV64I-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s64) + ; RV64I-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $x17 + ; RV64I-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s64) + ; RV64I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %fixed-stack.0, align 16) + ; RV64I-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s64) + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC8]](s16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64IF-LABEL: name: callee_half_return_stack2 + ; RV64IF: bb.1 (%ir-block.0): + ; RV64IF-NEXT: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f, $f14_f, $f15_f, $f16_f, $f17_f + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f11_f + ; RV64IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV64IF-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $f12_f + ; RV64IF-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; RV64IF-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $f13_f + ; RV64IF-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; RV64IF-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $f14_f + ; RV64IF-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; RV64IF-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $f15_f + ; RV64IF-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; RV64IF-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $f16_f + ; RV64IF-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; RV64IF-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $f17_f + ; RV64IF-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; RV64IF-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64IF-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s64) + ; RV64IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC8]](s16) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV64IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV64IZFH-LABEL: name: callee_half_return_stack2 + ; RV64IZFH: bb.1 (%ir-block.0): + ; RV64IZFH-NEXT: liveins: $x10, $f10_h, $f11_h, $f12_h, $f13_h, $f14_h, $f15_h, $f16_h, $f17_h + ; RV64IZFH-NEXT: {{ $}} + ; RV64IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h + ; RV64IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f12_h + ; RV64IZFH-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY $f13_h + ; RV64IZFH-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY $f14_h + ; RV64IZFH-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY $f15_h + ; RV64IZFH-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY $f16_h + ; RV64IZFH-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY $f17_h + ; RV64IZFH-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64IZFH-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s64) + ; RV64IZFH-NEXT: $f10_h = COPY [[TRUNC]](s16) + ; RV64IZFH-NEXT: PseudoRET implicit $f10_h ret half %x } @@ -505,12 +951,116 @@ define half @caller_half_return_stack2(half %x, half %y) nounwind { ; RV32IZFH-NEXT: $f15_h = COPY [[COPY1]](s16) ; RV32IZFH-NEXT: $f16_h = COPY [[COPY1]](s16) ; RV32IZFH-NEXT: $f17_h = COPY [[COPY1]](s16) - ; RV32IZFH-NEXT: $x10 = COPY [[COPY]](s16) + ; RV32IZFH-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) + ; RV32IZFH-NEXT: $x10 = COPY [[ANYEXT]](s32) ; RV32IZFH-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack2, csr_ilp32f_lp64f, implicit-def $x1, implicit $f10_h, implicit $f11_h, implicit $f12_h, implicit $f13_h, implicit $f14_h, implicit $f15_h, implicit $f16_h, implicit $f17_h, implicit $x10, implicit-def $f10_h ; RV32IZFH-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; RV32IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f10_h ; RV32IZFH-NEXT: $f10_h = COPY [[COPY2]](s16) ; RV32IZFH-NEXT: PseudoRET implicit $f10_h + ; + ; RV64I-LABEL: name: caller_half_return_stack2 + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10, $x11 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; RV64I-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 + ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4200 + ; RV64I-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s16) + ; RV64I-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s16) + ; RV64I-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s16) + ; RV64I-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s16) + ; RV64I-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s16) + ; RV64I-NEXT: [[ANYEXT5:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC1]](s16) + ; RV64I-NEXT: [[ANYEXT6:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC1]](s16) + ; RV64I-NEXT: [[ANYEXT7:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC1]](s16) + ; RV64I-NEXT: [[ANYEXT8:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s16) + ; RV64I-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s64) + ; RV64I-NEXT: G_STORE [[ANYEXT8]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64I-NEXT: $x11 = COPY [[ANYEXT1]](s64) + ; RV64I-NEXT: $x12 = COPY [[ANYEXT2]](s64) + ; RV64I-NEXT: $x13 = COPY [[ANYEXT3]](s64) + ; RV64I-NEXT: $x14 = COPY [[ANYEXT4]](s64) + ; RV64I-NEXT: $x15 = COPY [[ANYEXT5]](s64) + ; RV64I-NEXT: $x16 = COPY [[ANYEXT6]](s64) + ; RV64I-NEXT: $x17 = COPY [[ANYEXT7]](s64) + ; RV64I-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack2, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 + ; RV64I-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s64) + ; RV64I-NEXT: [[ANYEXT9:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s16) + ; RV64I-NEXT: $x10 = COPY [[ANYEXT9]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64IF-LABEL: name: caller_half_return_stack2 + ; RV64IF: bb.1 (%ir-block.0): + ; RV64IF-NEXT: liveins: $f10_f, $f11_f + ; RV64IF-NEXT: {{ $}} + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f11_f + ; RV64IF-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; RV64IF-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 + ; RV64IF-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4200 + ; RV64IF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64IF-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV64IF-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; RV64IF-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV64IF-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[C1]](s16) + ; RV64IF-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV64IF-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV64IF-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV64IF-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16) + ; RV64IF-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT]](s32) + ; RV64IF-NEXT: $f11_f = COPY [[ANYEXT1]](s32) + ; RV64IF-NEXT: $f12_f = COPY [[ANYEXT2]](s32) + ; RV64IF-NEXT: $f13_f = COPY [[ANYEXT3]](s32) + ; RV64IF-NEXT: $f14_f = COPY [[ANYEXT4]](s32) + ; RV64IF-NEXT: $f15_f = COPY [[ANYEXT5]](s32) + ; RV64IF-NEXT: $f16_f = COPY [[ANYEXT6]](s32) + ; RV64IF-NEXT: $f17_f = COPY [[ANYEXT7]](s32) + ; RV64IF-NEXT: [[ANYEXT9:%[0-9]+]]:_(s64) = G_ANYEXT [[ANYEXT8]](s32) + ; RV64IF-NEXT: $x10 = COPY [[ANYEXT9]](s64) + ; RV64IF-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack2, csr_ilp32f_lp64f, implicit-def $x1, implicit $f10_f, implicit $f11_f, implicit $f12_f, implicit $f13_f, implicit $f14_f, implicit $f15_f, implicit $f16_f, implicit $f17_f, implicit $x10, implicit-def $f10_f + ; RV64IF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64IF-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $f10_f + ; RV64IF-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; RV64IF-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) + ; RV64IF-NEXT: $f10_f = COPY [[ANYEXT10]](s32) + ; RV64IF-NEXT: PseudoRET implicit $f10_f + ; + ; RV64IZFH-LABEL: name: caller_half_return_stack2 + ; RV64IZFH: bb.1 (%ir-block.0): + ; RV64IZFH-NEXT: liveins: $f10_h, $f11_h + ; RV64IZFH-NEXT: {{ $}} + ; RV64IZFH-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h + ; RV64IZFH-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 + ; RV64IZFH-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4200 + ; RV64IZFH-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY]](s16) + ; RV64IZFH-NEXT: $f11_h = COPY [[C]](s16) + ; RV64IZFH-NEXT: $f12_h = COPY [[COPY]](s16) + ; RV64IZFH-NEXT: $f13_h = COPY [[C1]](s16) + ; RV64IZFH-NEXT: $f14_h = COPY [[COPY]](s16) + ; RV64IZFH-NEXT: $f15_h = COPY [[COPY1]](s16) + ; RV64IZFH-NEXT: $f16_h = COPY [[COPY1]](s16) + ; RV64IZFH-NEXT: $f17_h = COPY [[COPY1]](s16) + ; RV64IZFH-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s16) + ; RV64IZFH-NEXT: $x10 = COPY [[ANYEXT]](s64) + ; RV64IZFH-NEXT: PseudoCALL target-flags(riscv-call) @callee_half_return_stack2, csr_ilp32f_lp64f, implicit-def $x1, implicit $f10_h, implicit $f11_h, implicit $f12_h, implicit $f13_h, implicit $f14_h, implicit $f15_h, implicit $f16_h, implicit $f17_h, implicit $x10, implicit-def $f10_h + ; RV64IZFH-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64IZFH-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f10_h + ; RV64IZFH-NEXT: $f10_h = COPY [[COPY2]](s16) + ; RV64IZFH-NEXT: PseudoRET implicit $f10_h %z = call half @callee_half_return_stack2(half %x, half 1.0, half %x, half 3.0, half %x, half %y, half %y, half %y, half %x) ret half %z } diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 1c9356fb1a05a7..86b557700347e1 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -5,6 +5,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zmmul %s -o - | FileCheck --check-prefixes=CHECK,RV32ZMMUL %s ; RUN: llc -mtriple=riscv32 -mattr=+m,+zmmul %s -o - | FileCheck --check-prefixes=CHECK,RV32MZMMUL %s ; RUN: llc -mtriple=riscv32 -mattr=+a %s -o - | FileCheck --check-prefixes=CHECK,RV32A %s +; RUN: llc -mtriple=riscv32 -mattr=+b %s -o - | FileCheck --check-prefixes=CHECK,RV32B %s ; RUN: llc -mtriple=riscv32 -mattr=+f %s -o - | FileCheck --check-prefixes=CHECK,RV32F %s ; RUN: llc -mtriple=riscv32 -mattr=+d %s -o - | FileCheck --check-prefixes=CHECK,RV32D %s ; RUN: llc -mtriple=riscv32 -mattr=+c %s -o - | FileCheck --check-prefixes=CHECK,RV32C %s @@ -131,6 +132,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zmmul %s -o - | FileCheck --check-prefixes=CHECK,RV64ZMMUL %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+zmmul %s -o - | FileCheck --check-prefixes=CHECK,RV64MZMMUL %s ; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefixes=CHECK,RV64A %s +; RUN: llc -mtriple=riscv64 -mattr=+b %s -o - | FileCheck --check-prefixes=CHECK,RV64B %s ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefixes=CHECK,RV64F %s ; RUN: llc -mtriple=riscv64 -mattr=+d %s -o - | FileCheck --check-prefixes=CHECK,RV64D %s ; RUN: llc -mtriple=riscv64 -mattr=+c %s -o - | FileCheck --check-prefixes=CHECK,RV64C %s @@ -277,6 +279,7 @@ ; RV32ZMMUL: .attribute 5, "rv32i2p1_zmmul1p0" ; RV32MZMMUL: .attribute 5, "rv32i2p1_m2p0_zmmul1p0" ; RV32A: .attribute 5, "rv32i2p1_a2p1" +; RV32B: .attribute 5, "rv32i2p1_b1p0_zba1p0_zbb1p0_zbs1p0" ; RV32F: .attribute 5, "rv32i2p1_f2p2_zicsr2p0" ; RV32D: .attribute 5, "rv32i2p1_f2p2_d2p2_zicsr2p0" ; RV32C: .attribute 5, "rv32i2p1_c2p0" @@ -402,6 +405,7 @@ ; RV64ZMMUL: .attribute 5, "rv64i2p1_zmmul1p0" ; RV64MZMMUL: .attribute 5, "rv64i2p1_m2p0_zmmul1p0" ; RV64A: .attribute 5, "rv64i2p1_a2p1" +; RV64B: .attribute 5, "rv64i2p1_b1p0_zba1p0_zbb1p0_zbs1p0" ; RV64F: .attribute 5, "rv64i2p1_f2p2_zicsr2p0" ; RV64D: .attribute 5, "rv64i2p1_f2p2_d2p2_zicsr2p0" ; RV64C: .attribute 5, "rv64i2p1_c2p0" diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll index 38215860193eaf..4cb6191e7322e9 100644 --- a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll @@ -375,6 +375,66 @@ define double @sincos_f64(double %a) nounwind strictfp { ret double %3 } +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) + +define double @tan_f64(double %a) nounwind strictfp { +; RV32IFD-LABEL: tan_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call tan +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: tan_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: call tan +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: tan_f64: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: call tan +; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: tan_f64: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINXZDINX-NEXT: call tan +; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV64IZFINXZDINX-NEXT: ret +; +; RV32I-LABEL: tan_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call tan +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: tan_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call tan +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.tan.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) define double @pow_f64(double %a, double %b) nounwind strictfp { diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll index 626db1985bfc7e..e4be5074cb800a 100644 --- a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll @@ -354,6 +354,66 @@ define float @sincos_f32(float %a) nounwind strictfp { ret float %3 } +declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) + +define float @tan_f32(float %a) nounwind strictfp { +; RV32IF-LABEL: tan_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call tanf +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: tan_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call tanf +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32IZFINX-LABEL: tan_f32: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: addi sp, sp, -16 +; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFINX-NEXT: call tanf +; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFINX-NEXT: addi sp, sp, 16 +; RV32IZFINX-NEXT: ret +; +; RV64IZFINX-LABEL: tan_f32: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: addi sp, sp, -16 +; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINX-NEXT: call tanf +; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFINX-NEXT: addi sp, sp, 16 +; RV64IZFINX-NEXT: ret +; +; RV32I-LABEL: tan_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call tanf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: tan_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call tanf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.tan.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) define float @pow_f32(float %a, float %b) nounwind strictfp { diff --git a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-break.ll b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-break.ll index b3fcdc978625f9..e7b1b441405f61 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-break.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-break.ll @@ -66,7 +66,7 @@ while.end: ; CHECK: %[[#new_end]] = OpLabel ; CHECK: %[[#route:]] = OpPhi %[[#int_ty]] %[[#int_1]] %[[#while_cond]] %[[#int_0]] %[[#while_body]] -; CHECK: OpSwitch %[[#route]] %[[#while_end_loopexit]] 0 %[[#if_then]] +; CHECK: OpSwitch %[[#route]] %[[#if_then]] 1 %[[#while_end_loopexit]] } declare token @llvm.experimental.convergence.entry() #2 diff --git a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-convergence-in-break.ll b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-convergence-in-break.ll index a67c58fdd5749f..593e3631c02b9d 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-convergence-in-break.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-convergence-in-break.ll @@ -75,7 +75,7 @@ while.end: ; CHECK: %[[#new_end]] = OpLabel ; CHECK: %[[#route:]] = OpPhi %[[#int_ty]] %[[#int_0]] %[[#while_cond]] %[[#int_1]] %[[#tail]] -; CHECK: OpSwitch %[[#route]] %[[#while_end]] 0 %[[#while_end_loopexit]] +; CHECK: OpSwitch %[[#route]] %[[#while_end_loopexit]] 1 %[[#while_end]] } declare token @llvm.experimental.convergence.entry() #2 diff --git a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-multiple-break.ll b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-multiple-break.ll index 32a97553df05e3..9806dd7955468e 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-multiple-break.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-multiple-break.ll @@ -85,7 +85,7 @@ while.end: ; CHECK: %[[#new_end]] = OpLabel ; CHECK: %[[#route:]] = OpPhi %[[#int_ty]] %[[#int_2]] %[[#while_cond]] %[[#int_0]] %[[#while_body]] %[[#int_1]] %[[#if_end]] -; CHECK: OpSwitch %[[#route]] %[[#while_end_loopexit]] 1 %[[#if_then2]] 0 %[[#if_then]] +; CHECK: OpSwitch %[[#route]] %[[#if_then]] 1 %[[#if_then2]] 2 %[[#while_end_loopexit]] } declare token @llvm.experimental.convergence.entry() #2 diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index 9d77744f18ca1a..4a109ee96a3d3e 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -6222,6 +6222,323 @@ entry: ret void } +define <1 x float> @constrained_vector_tan_v1f32() #0 { +; S390X-LABEL: constrained_vector_tan_v1f32: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -160 +; S390X-NEXT: .cfi_def_cfa_offset 320 +; S390X-NEXT: larl %r1, .LCPI119_0 +; S390X-NEXT: le %f0, 0(%r1) +; S390X-NEXT: brasl %r14, tanf@PLT +; S390X-NEXT: lmg %r14, %r15, 272(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_tan_v1f32: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -160 +; SZ13-NEXT: .cfi_def_cfa_offset 320 +; SZ13-NEXT: larl %r1, .LCPI119_0 +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tanf@PLT +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vlr %v24, %v0 +; SZ13-NEXT: lmg %r14, %r15, 272(%r15) +; SZ13-NEXT: br %r14 +entry: + %tan = call <1 x float> @llvm.experimental.constrained.tan.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %tan +} + +define <2 x double> @constrained_vector_tan_v2f64() #0 { +; S390X-LABEL: constrained_vector_tan_v2f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -168 +; S390X-NEXT: .cfi_def_cfa_offset 328 +; S390X-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: larl %r1, .LCPI120_0 +; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: larl %r1, .LCPI120_1 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldr %f8, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: ldr %f2, %f8 +; S390X-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 280(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_tan_v2f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -176 +; SZ13-NEXT: .cfi_def_cfa_offset 336 +; SZ13-NEXT: larl %r1, .LCPI120_0 +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: larl %r1, .LCPI120_1 +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v24, %v0, %v1 +; SZ13-NEXT: lmg %r14, %r15, 288(%r15) +; SZ13-NEXT: br %r14 +entry: + %tan = call <2 x double> @llvm.experimental.constrained.tan.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %tan +} + +define <3 x float> @constrained_vector_tan_v3f32() #0 { +; S390X-LABEL: constrained_vector_tan_v3f32: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -176 +; S390X-NEXT: .cfi_def_cfa_offset 336 +; S390X-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: larl %r1, .LCPI121_0 +; S390X-NEXT: le %f0, 0(%r1) +; S390X-NEXT: brasl %r14, tanf@PLT +; S390X-NEXT: larl %r1, .LCPI121_1 +; S390X-NEXT: le %f1, 0(%r1) +; S390X-NEXT: ler %f8, %f0 +; S390X-NEXT: ler %f0, %f1 +; S390X-NEXT: brasl %r14, tanf@PLT +; S390X-NEXT: larl %r1, .LCPI121_2 +; S390X-NEXT: le %f1, 0(%r1) +; S390X-NEXT: ler %f9, %f0 +; S390X-NEXT: ler %f0, %f1 +; S390X-NEXT: brasl %r14, tanf@PLT +; S390X-NEXT: ler %f2, %f9 +; S390X-NEXT: ler %f4, %f8 +; S390X-NEXT: ld %f8, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 288(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_tan_v3f32: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -192 +; SZ13-NEXT: .cfi_def_cfa_offset 352 +; SZ13-NEXT: larl %r1, .LCPI121_0 +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tanf@PLT +; SZ13-NEXT: larl %r1, .LCPI121_1 +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tanf@PLT +; SZ13-NEXT: larl %r1, .LCPI121_2 +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: lde %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tanf@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0s killed $f0s def $v0 +; SZ13-NEXT: vmrhf %v0, %v1, %v0 +; SZ13-NEXT: vl %v1, 176(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: vrepf %v1, %v1, 0 +; SZ13-NEXT: vmrhg %v24, %v0, %v1 +; SZ13-NEXT: lmg %r14, %r15, 304(%r15) +; SZ13-NEXT: br %r14 +entry: + %tan = call <3 x float> @llvm.experimental.constrained.tan.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %tan +} + +define void @constrained_vector_tan_v3f64(ptr %a) #0 { +; S390X-LABEL: constrained_vector_tan_v3f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r13, %r15, 104(%r15) +; S390X-NEXT: .cfi_offset %r13, -56 +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -184 +; S390X-NEXT: .cfi_def_cfa_offset 344 +; S390X-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: .cfi_offset %f10, -184 +; S390X-NEXT: lgr %r13, %r2 +; S390X-NEXT: ld %f8, 0(%r2) +; S390X-NEXT: ld %f0, 16(%r2) +; S390X-NEXT: ld %f9, 8(%r2) +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: ldr %f10, %f0 +; S390X-NEXT: ldr %f0, %f9 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: ldr %f9, %f0 +; S390X-NEXT: ldr %f0, %f8 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: std %f0, 0(%r13) +; S390X-NEXT: std %f9, 8(%r13) +; S390X-NEXT: std %f10, 16(%r13) +; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r13, %r15, 288(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_tan_v3f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r13, %r15, 104(%r15) +; SZ13-NEXT: .cfi_offset %r13, -56 +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -200 +; SZ13-NEXT: .cfi_def_cfa_offset 360 +; SZ13-NEXT: std %f8, 192(%r15) # 8-byte Folded Spill +; SZ13-NEXT: .cfi_offset %f8, -168 +; SZ13-NEXT: vl %v0, 0(%r2), 4 +; SZ13-NEXT: ld %f8, 16(%r2) +; SZ13-NEXT: lgr %r13, %r2 +; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: vl %v0, 176(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: vrepg %v0, %v0, 1 +; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v1, %v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ldr %f0, %f8 +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: std %f0, 16(%r13) +; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Folded Reload +; SZ13-NEXT: vst %v0, 0(%r13), 4 +; SZ13-NEXT: lmg %r13, %r15, 304(%r15) +; SZ13-NEXT: br %r14 +entry: + %b = load <3 x double>, ptr %a + %tan = call <3 x double> @llvm.experimental.constrained.tan.v3f64( + <3 x double> %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + store <3 x double> %tan, ptr %a + ret void +} + +define <4 x double> @constrained_vector_tan_v4f64() #0 { +; S390X-LABEL: constrained_vector_tan_v4f64: +; S390X: # %bb.0: # %entry +; S390X-NEXT: stmg %r14, %r15, 112(%r15) +; S390X-NEXT: .cfi_offset %r14, -48 +; S390X-NEXT: .cfi_offset %r15, -40 +; S390X-NEXT: aghi %r15, -184 +; S390X-NEXT: .cfi_def_cfa_offset 344 +; S390X-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f9, 168(%r15) # 8-byte Folded Spill +; S390X-NEXT: std %f10, 160(%r15) # 8-byte Folded Spill +; S390X-NEXT: .cfi_offset %f8, -168 +; S390X-NEXT: .cfi_offset %f9, -176 +; S390X-NEXT: .cfi_offset %f10, -184 +; S390X-NEXT: larl %r1, .LCPI123_0 +; S390X-NEXT: ld %f0, 0(%r1) +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: larl %r1, .LCPI123_1 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldr %f8, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: larl %r1, .LCPI123_2 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldr %f9, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: larl %r1, .LCPI123_3 +; S390X-NEXT: ld %f1, 0(%r1) +; S390X-NEXT: ldr %f10, %f0 +; S390X-NEXT: ldr %f0, %f1 +; S390X-NEXT: brasl %r14, tan@PLT +; S390X-NEXT: ldr %f2, %f10 +; S390X-NEXT: ldr %f4, %f9 +; S390X-NEXT: ldr %f6, %f8 +; S390X-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f9, 168(%r15) # 8-byte Folded Reload +; S390X-NEXT: ld %f10, 160(%r15) # 8-byte Folded Reload +; S390X-NEXT: lmg %r14, %r15, 296(%r15) +; S390X-NEXT: br %r14 +; +; SZ13-LABEL: constrained_vector_tan_v4f64: +; SZ13: # %bb.0: # %entry +; SZ13-NEXT: stmg %r14, %r15, 112(%r15) +; SZ13-NEXT: .cfi_offset %r14, -48 +; SZ13-NEXT: .cfi_offset %r15, -40 +; SZ13-NEXT: aghi %r15, -192 +; SZ13-NEXT: .cfi_def_cfa_offset 352 +; SZ13-NEXT: larl %r1, .LCPI123_0 +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: larl %r1, .LCPI123_1 +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: larl %r1, .LCPI123_2 +; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: larl %r1, .LCPI123_3 +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Folded Spill +; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: brasl %r14, tan@PLT +; SZ13-NEXT: vl %v1, 176(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: vl %v24, 160(%r15), 3 # 16-byte Folded Reload +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v26, %v0, %v1 +; SZ13-NEXT: lmg %r14, %r15, 304(%r15) +; SZ13-NEXT: br %r14 +entry: + %tan = call <4 x double> @llvm.experimental.constrained.tan.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %tan +} + attributes #0 = { strictfp } declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) @@ -6234,6 +6551,7 @@ declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) @@ -6260,6 +6578,7 @@ declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x f declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) @@ -6296,6 +6615,8 @@ declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metada declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) @@ -6335,6 +6656,7 @@ declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir index e79f4d1f989a91..eef1f43b278d9d 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir @@ -87,7 +87,8 @@ liveins: - { reg: '$rsi', virtual-reg: '%14' } frameInfo: maxAlignment: 1 -machineFunctionInfo: {} +machineFunctionInfo: + amxProgModel: ManagedRA body: | ; CHECK-LABEL: name: foo ; CHECK: bb.0.entry: diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir index d47bda0044115e..5843366baab6d1 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir @@ -34,7 +34,8 @@ liveins: - { reg: '$edi', virtual-reg: '%12' } frameInfo: maxAlignment: 1 -machineFunctionInfo: {} +machineFunctionInfo: + amxProgModel: ManagedRA body: | ; CHECK-LABEL: name: foo ; CHECK: bb.0.entry: diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir index 15d3eb6bdfebb6..4eb8b950851895 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir @@ -35,7 +35,8 @@ liveins: - { reg: '$edi', virtual-reg: '%12' } frameInfo: maxAlignment: 1 -machineFunctionInfo: {} +machineFunctionInfo: + amxProgModel: ManagedRA body: | ; CHECK-LABEL: name: foo ; CHECK: bb.0.entry: diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir index 98744bbe8e1473..1ed4328bf132a1 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir @@ -23,7 +23,8 @@ frameInfo: stack: - { id: 0, size: 1024, alignment: 16 } - { id: 1, size: 64, alignment: 4 } -machineFunctionInfo: {} +machineFunctionInfo: + amxProgModel: ManagedRA body: | ; CHECK-LABEL: name: foo ; CHECK: bb.0.entry: @@ -100,7 +101,8 @@ frameInfo: stack: - { id: 0, size: 1024, alignment: 16 } - { id: 1, size: 64, alignment: 4 } -machineFunctionInfo: {} +machineFunctionInfo: + amxProgModel: ManagedRA body: | ; CHECK-LABEL: name: copy ; CHECK: bb.0.entry: diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir index 84fc47a3a91202..561ba6f2f49709 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir @@ -77,7 +77,8 @@ liveins: - { reg: '$edx', virtual-reg: '%11' } frameInfo: maxAlignment: 1 -machineFunctionInfo: {} +machineFunctionInfo: + amxProgModel: ManagedRA body: | ; CHECK-LABEL: name: test_api ; CHECK: bb.0.entry: diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir index 40566520b79f01..0d56feac626814 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir @@ -23,7 +23,8 @@ frameInfo: stack: - { id: 0, size: 1024, alignment: 16 } - { id: 1, size: 64, alignment: 4 } -machineFunctionInfo: {} +machineFunctionInfo: + amxProgModel: ManagedRA body: | bb.0.entry: ; CHECK-LABEL: name: main @@ -79,6 +80,8 @@ registers: liveins: - { reg: '$rdi', virtual-reg: '' } - { reg: '$rsi', virtual-reg: '' } +machineFunctionInfo: + amxProgModel: ManagedRA body: | bb.1.entry: liveins: $rdi, $rsi diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index 5969aae43f82e8..dde877c5bb61e5 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -709,3 +709,134 @@ define i128 @test_sextinreg_i128(i128 %a) nounwind { %res = call i128 @llvm.abs.i128(i128 %ashr, i1 true) ret i128 %res } + +define i8 @test_minsigned_i8(i8 %a0, i8 %a1) nounwind { +; X64-LABEL: test_minsigned_i8: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarb $7, %al +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: xorb %al, %cl +; X64-NEXT: subb %al, %cl +; X64-NEXT: cmpb $-128, %dil +; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: cmovel %esi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; X86-LABEL: test_minsigned_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpb $-128, %al +; X86-NEXT: jne .LBB17_1 +; X86-NEXT: # %bb.2: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl +; X86-NEXT: .LBB17_1: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: sarb $7, %cl +; X86-NEXT: xorb %cl, %al +; X86-NEXT: subb %cl, %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: retl + %lim = icmp eq i8 %a0, -128 + %abs = tail call i8 @llvm.abs.i8(i8 %a0, i1 false) + %res = select i1 %lim, i8 %a1, i8 %abs + ret i8 %res +} + +define i16 @test_minsigned_i16(i16 %a0, i16 %a1) nounwind { +; X64-LABEL: test_minsigned_i16: +; X64: # %bb.0: +; X64-NEXT: movzwl %di, %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: negw %ax +; X64-NEXT: cmovsw %cx, %ax +; X64-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; X64-NEXT: cmovel %esi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: test_minsigned_i16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: negw %ax +; X86-NEXT: cmovsw %cx, %ax +; X86-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; X86-NEXT: jne .LBB18_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: .LBB18_2: +; X86-NEXT: retl + %lim = icmp eq i16 %a0, -32768 + %abs = tail call i16 @llvm.abs.i16(i16 %a0, i1 false) + %res = select i1 %lim, i16 %a1, i16 %abs + ret i16 %res +} + +define i32 @test_minsigned_i32(i32 %a0, i32 %a1) nounwind { +; X64-LABEL: test_minsigned_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmovsl %edi, %eax +; X64-NEXT: cmpl $-2147483648, %edi # imm = 0x80000000 +; X64-NEXT: cmovel %esi, %eax +; X64-NEXT: retq +; +; X86-LABEL: test_minsigned_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: cmovsl %ecx, %eax +; X86-NEXT: cmpl $-2147483648, %ecx # imm = 0x80000000 +; X86-NEXT: jne .LBB19_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: .LBB19_2: +; X86-NEXT: retl + %lim = icmp eq i32 %a0, -2147483648 + %abs = tail call i32 @llvm.abs.i32(i32 %a0, i1 false) + %res = select i1 %lim, i32 %a1, i32 %abs + ret i32 %res +} + +define i64 @test_minsigned_i64(i64 %a0, i64 %a1) nounwind { +; X64-LABEL: test_minsigned_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: negq %rax +; X64-NEXT: cmovsq %rdi, %rax +; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; X64-NEXT: cmpq %rcx, %rdi +; X64-NEXT: cmoveq %rsi, %rax +; X64-NEXT: retq +; +; X86-LABEL: test_minsigned_i64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal -2147483648(%edx), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: jne .LBB20_1 +; X86-NEXT: # %bb.2: # %select.end +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: retl +; X86-NEXT: .LBB20_1: # %select.false.sink +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: retl + %lim = icmp eq i64 %a0, -9223372036854775808 + %abs = tail call i64 @llvm.abs.i64(i64 %a0, i1 false) + %res = select i1 %lim, i64 %a1, i64 %abs + ret i64 %res +} diff --git a/llvm/test/CodeGen/X86/apple-version-min.ll b/llvm/test/CodeGen/X86/apple-version-min.ll new file mode 100644 index 00000000000000..fde10ac3b42483 --- /dev/null +++ b/llvm/test/CodeGen/X86/apple-version-min.ll @@ -0,0 +1,12 @@ +; Test emitting version_min directives. + +; RUN: llc %s -filetype=asm -o - --mtriple x86_64-apple-tvos9.0.0-simulator | FileCheck %s --check-prefix=TVOS +; RUN: llc %s -filetype=asm -o - --mtriple x86_64-apple-tvos9.0.0 | FileCheck %s --check-prefix=TVOS +; RUN: llc %s -filetype=asm -o - --mtriple x86_64-apple-driverkit19.0.0 | FileCheck %s --check-prefix=DRIVERKIT +; RUN: llc %s -filetype=asm -o - --mtriple i386-apple-ios7.0.0-simulator | FileCheck %s --check-prefix=IOS +; RUN: llc %s -filetype=asm -o - --mtriple i386-apple-watchos2.0.0-simulator | FileCheck %s --check-prefix=WATCHOS + +; TVOS: .tvos_version_min 9, 0 +; DRIVERKIT: .build_version driverkit, 19, 0 +; IOS: .ios_version_min 7, 0 +; WATCHOS: .watchos_version_min 2, 0 diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index d2b45ee1e03e63..8c48e6f9da80a7 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -2758,6 +2758,58 @@ entry: ret float %result } +; Verify that tan(42.0) isn't simplified when the rounding mode is unknown. +define double @ftan() #0 { +; X87-LABEL: ftan: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll tan +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: ftan: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll tan +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: ftan: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq tan@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: ftan: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.tan.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + + attributes #0 = { strictfp } @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" @@ -2771,6 +2823,7 @@ declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll index 1bc308bef8cccf..cfec52c0e68863 100644 --- a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll +++ b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll @@ -160,6 +160,23 @@ define float @sin(float %x) #0 { ret float %result } +define float @tan(float %x) #0 { +; CHECK-LABEL: tan: +; CHECK: # %bb.0: +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait +; CHECK-NEXT: calll _tan +; CHECK-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: retl + %result = call float @llvm.experimental.constrained.tan.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %result +} + attributes #0 = { strictfp } declare float @llvm.experimental.constrained.ceil.f32(float, metadata) @@ -171,3 +188,4 @@ declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index f1d473f81a9fa1..bd51f553587db7 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -1047,6 +1047,46 @@ entry: ret fp128 %sqrt } +define fp128 @tan(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: tan: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq tanl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: tan: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq tanf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: tan: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll tanl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %tan = call fp128 @llvm.experimental.constrained.tan.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %tan +} + define fp128 @trunc(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: trunc: ; ANDROID: # %bb.0: # %entry @@ -1663,6 +1703,7 @@ declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata) declare fp128 @llvm.experimental.constrained.roundeven.f128(fp128, metadata) declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata) declare i32 @llvm.experimental.constrained.lrint.i32.f128(fp128, metadata, metadata) declare i64 @llvm.experimental.constrained.llrint.i64.f128(fp128, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll index 4d50b15e5c185b..89729975cfd61b 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll @@ -504,6 +504,31 @@ entry: ret x86_fp80 %sin } +define x86_fp80 @tan(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: tan: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll tanl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: tan: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq tanl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %tan = call x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %tan +} + define x86_fp80 @trunc(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: trunc: ; X86: # %bb.0: # %entry @@ -650,6 +675,7 @@ declare x86_fp80 @llvm.experimental.constrained.rint.f80(x86_fp80, metadata, met declare x86_fp80 @llvm.experimental.constrained.round.f80(x86_fp80, metadata) declare x86_fp80 @llvm.experimental.constrained.roundeven.f80(x86_fp80, metadata) declare x86_fp80 @llvm.experimental.constrained.sin.f80(x86_fp80, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.trunc.f80(x86_fp80, metadata) declare i32 @llvm.experimental.constrained.lrint.i32.f80(x86_fp80, metadata, metadata) declare i64 @llvm.experimental.constrained.llrint.i64.f80(x86_fp80, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll index 2046d790cc57e4..999be0f98b6fc5 100644 --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -1,1336 +1,1414 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefix=CHECK-32 -; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK-64 +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-GENERIC +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+ndd | FileCheck %s -check-prefixes=X64,X64-NDD define i1 @isnan_f(float %x) { -; CHECK-32-LABEL: isnan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setp %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ucomiss %xmm0, %xmm0 -; CHECK-64-NEXT: setp %al -; CHECK-64-NEXT: retq +; X86-LABEL: isnan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %al +; X86-NEXT: retl +; +; X64-LABEL: isnan_f: +; X64: # %bb.0: # %entry +; X64-NEXT: ucomiss %xmm0, %xmm0 +; X64-NEXT: setp %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; "nan" ret i1 %0 } define i1 @isnot_nan_f(float %x) { -; CHECK-32-LABEL: isnot_nan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setnp %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnot_nan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ucomiss %xmm0, %xmm0 -; CHECK-64-NEXT: setnp %al -; CHECK-64-NEXT: retq +; X86-LABEL: isnot_nan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setnp %al +; X86-NEXT: retl +; +; X64-LABEL: isnot_nan_f: +; X64: # %bb.0: # %entry +; X64-NEXT: ucomiss %xmm0, %xmm0 +; X64-NEXT: setnp %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1020) ; 0x3fc = "zero|subnormal|normal|inf" ret i1 %0 } define i1 @issignaling_f(float %x) { -; CHECK-32-LABEL: issignaling_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setl %cl -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: andb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issignaling_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setl %cl -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: andb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: issignaling_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setl %cl +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %al +; X86-NEXT: andb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: issignaling_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setl %cl +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %al +; X64-NEXT: andb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan" ret i1 %0 } define i1 @not_issignaling_f(float %x) { -; CHECK-32-LABEL: not_issignaling_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issignaling_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issignaling_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %cl +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setl %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: not_issignaling_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %cl +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setl %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1022) ; ~"snan" ret i1 %0 } define i1 @isquiet_f(float %x) { -; CHECK-32-LABEL: isquiet_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isquiet_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: retq +; X86-LABEL: isquiet_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %al +; X86-NEXT: retl +; +; X64-LABEL: isquiet_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan" ret i1 %0 } define i1 @not_isquiet_f(float %x) { -; CHECK-32-LABEL: not_isquiet_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_isquiet_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_isquiet_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setl %al +; X86-NEXT: retl +; +; X64-LABEL: not_isquiet_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setl %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan" ret i1 %0 } define i1 @isinf_f(float %x) { -; CHECK-32-LABEL: isinf_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isinf_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: isinf_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: isinf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" ret i1 %0 } define i1 @not_isinf_f(float %x) { -; CHECK-32-LABEL: not_isinf_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_isinf_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_isinf_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_isinf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf" ret i1 %0 } define i1 @is_plus_inf_f(float %x) { -; CHECK-32-LABEL: is_plus_inf_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_plus_inf_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_plus_inf_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: is_plus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf" ret i1 %0 } define i1 @is_minus_inf_f(float %x) { -; CHECK-32-LABEL: is_minus_inf_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_minus_inf_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_minus_inf_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: is_minus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf" ret i1 %0 } define i1 @not_is_minus_inf_f(float %x) { -; CHECK-32-LABEL: not_is_minus_inf_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_minus_inf_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_minus_inf_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_is_minus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf" ret i1 %0 } define i1 @isfinite_f(float %x) { -; CHECK-32-LABEL: isfinite_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isfinite_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: retq +; X86-LABEL: isfinite_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %al +; X86-NEXT: retl +; +; X64-LABEL: isfinite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" ret i1 %0 } define i1 @not_isfinite_f(float %x) { -; CHECK-32-LABEL: not_isfinite_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_isfinite_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_isfinite_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setge %al +; X86-NEXT: retl +; +; X64-LABEL: not_isfinite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setge %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite" ret i1 %0 } define i1 @is_plus_finite_f(float %x) { -; CHECK-32-LABEL: is_plus_finite_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_plus_finite_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_plus_finite_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: is_plus_finite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setb %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite" ret i1 %0 } define i1 @not_is_plus_finite_f(float %x) { -; CHECK-32-LABEL: not_is_plus_finite_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; CHECK-32-NEXT: setae %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_plus_finite_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setae %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_plus_finite_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: setae %al +; X86-NEXT: retl +; +; X64-LABEL: not_is_plus_finite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setae %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 575) ; ~0x1c0 = ~"+finite" ret i1 %0 } define i1 @is_minus_finite_f(float %x) { -; CHECK-32-LABEL: is_minus_finite_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: sets %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: andb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_minus_finite_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: sets %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: andb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_minus_finite_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sets %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %al +; X86-NEXT: andb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: is_minus_finite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sets %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al +; X64-NEXT: andb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 56) ; 0x38 = "-finite" ret i1 %0 } define i1 @not_is_minus_finite_f(float %x) { -; CHECK-32-LABEL: not_is_minus_finite_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: setns %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_minus_finite_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: setns %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_minus_finite_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setns %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setge %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: not_is_minus_finite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setns %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setge %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 967) ; ~0x38 = ~"-finite" ret i1 %0 } define i1 @isnormal_f(float %x) #1 { -; CHECK-32-LABEL: isnormal_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnormal_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: retq +; X86-LABEL: isnormal_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: isnormal_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-NEXT: setb %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 264) ; 0x108 = "normal" ret i1 %0 } define i1 @not_isnormal_f(float %x) #1 { -; CHECK-32-LABEL: not_isnormal_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setae %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_isnormal_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setae %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_isnormal_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X86-NEXT: setae %al +; X86-NEXT: retl +; +; X64-LABEL: not_isnormal_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-NEXT: setae %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 759) ; ~0x108 = "~normal" ret i1 %0 } define i1 @is_plus_normal_f(float %x) { -; CHECK-32-LABEL: is_plus_normal_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: setns %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: andb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_plus_normal_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: setns %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: andb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_plus_normal_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setns %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X86-NEXT: setb %al +; X86-NEXT: andb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: is_plus_normal_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setns %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-NEXT: setb %al +; X64-NEXT: andb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 256) ; 0x100 = "+normal" ret i1 %0 } define i1 @issubnormal_f(float %x) { -; CHECK-32-LABEL: issubnormal_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: issubnormal_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setb %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 144) ; 0x90 = "subnormal" ret i1 %0 } define i1 @issubnormal_f_daz(float %x) #0 { -; CHECK-32-LABEL: issubnormal_f_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_f_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_f_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: issubnormal_f_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setb %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 144) ; 0x90 = "subnormal" ret i1 %0 } define i1 @issubnormal_f_maybe_daz(float %x) #1 { -; CHECK-32-LABEL: issubnormal_f_maybe_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_f_maybe_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_f_maybe_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: issubnormal_f_maybe_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setb %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 144) ; 0x90 = "subnormal" ret i1 %0 } define i1 @not_issubnormal_f(float %x) { -; CHECK-32-LABEL: not_issubnormal_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setae %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setae %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setae %al +; X86-NEXT: retl +; +; X64-LABEL: not_issubnormal_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setae %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 879) ; ~0x90 = "~subnormal" ret i1 %0 } define i1 @not_issubnormal_f_daz(float %x) #0 { -; CHECK-32-LABEL: not_issubnormal_f_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setae %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_f_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setae %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_f_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setae %al +; X86-NEXT: retl +; +; X64-LABEL: not_issubnormal_f_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setae %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 879) ; ~0x90 = "~subnormal" ret i1 %0 } define i1 @not_issubnormal_f_maybe_daz(float %x) #1 { -; CHECK-32-LABEL: not_issubnormal_f_maybe_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setae %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_f_maybe_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setae %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_f_maybe_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setae %al +; X86-NEXT: retl +; +; X64-LABEL: not_issubnormal_f_maybe_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setae %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 879) ; ~0x90 = "~subnormal" ret i1 %0 } define i1 @is_plus_subnormal_f(float %x) { -; CHECK-32-LABEL: is_plus_subnormal_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_plus_subnormal_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_plus_subnormal_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: is_plus_subnormal_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setb %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 128) ; 0x80 = "+subnormal" ret i1 %0 } define i1 @not_is_plus_subnormal_f(float %x) { -; CHECK-32-LABEL: not_is_plus_subnormal_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setae %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_plus_subnormal_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setae %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_plus_subnormal_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setae %al +; X86-NEXT: retl +; +; X64-LABEL: not_is_plus_subnormal_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setae %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 895) ; ~0x80 = ~"+subnormal" ret i1 %0 } define i1 @is_minus_subnormal_f(float %x) { -; CHECK-32-LABEL: is_minus_subnormal_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: sets %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: andb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_minus_subnormal_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: sets %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: andb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_minus_subnormal_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sets %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setb %al +; X86-NEXT: andb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: is_minus_subnormal_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sets %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setb %al +; X64-NEXT: andb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 16) ; 0x10 = "-subnormal" ret i1 %0 } define i1 @not_is_minus_subnormal_f(float %x) { -; CHECK-32-LABEL: not_is_minus_subnormal_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: setns %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setae %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_minus_subnormal_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: setns %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setae %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_minus_subnormal_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setns %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setae %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: not_is_minus_subnormal_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setns %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setae %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1007) ; ~0x10 = ~"-subnormal" ret i1 %0 } define i1 @iszero_f(float %x) { -; CHECK-32-LABEL: iszero_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_f: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: iszero_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 96) ; 0x60 = "zero" ret i1 %0 } define i1 @iszero_f_daz(float %x) #0 { -; CHECK-32-LABEL: iszero_f_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_f_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_f_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: iszero_f_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 96) ; 0x60 = "zero" ret i1 %0 } define i1 @iszero_f_maybe_daz(float %x) #1 { -; CHECK-32-LABEL: iszero_f_maybe_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_f_maybe_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_f_maybe_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: iszero_f_maybe_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 96) ; 0x60 = "zero" ret i1 %0 } define i1 @not_iszero_f(float %x) { -; CHECK-32-LABEL: not_iszero_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_iszero_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_iszero_f: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_iszero_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 927) ; ~0x60 = "~zero" ret i1 %0 } define i1 @not_iszero_f_daz(float %x) #0 { -; CHECK-32-LABEL: not_iszero_f_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_iszero_f_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_iszero_f_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_iszero_f_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 927) ; ~0x60 = "~zero" ret i1 %0 } define i1 @not_iszero_f_maybe_daz(float %x) #1 { -; CHECK-32-LABEL: not_iszero_f_maybe_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_iszero_f_maybe_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_iszero_f_maybe_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_iszero_f_maybe_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 927) ; ~0x60 = "~zero" ret i1 %0 } define i1 @issubnormal_or_zero_f(float %x) { -; CHECK-32-LABEL: issubnormal_or_zero_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_or_zero_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_or_zero_f: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: issubnormal_or_zero_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero" ret i1 %0 } define i1 @issubnormal_or_zero_f_daz(float %x) #0 { -; CHECK-32-LABEL: issubnormal_or_zero_f_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_or_zero_f_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_or_zero_f_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: issubnormal_or_zero_f_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero" ret i1 %0 } define i1 @issubnormal_or_zero_f_maybe_daz(float %x) #1 { -; CHECK-32-LABEL: issubnormal_or_zero_f_maybe_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_or_zero_f_maybe_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_or_zero_f_maybe_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: issubnormal_or_zero_f_maybe_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 240) ; 0xf0 = "subnormal|zero" ret i1 %0 } define i1 @not_issubnormal_or_zero_f(float %x) { -; CHECK-32-LABEL: not_issubnormal_or_zero_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_or_zero_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_or_zero_f: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_issubnormal_or_zero_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 783) ; ~0xf0 = "~(subnormal|zero)" ret i1 %0 } define i1 @not_issubnormal_or_zero_f_daz(float %x) #0 { -; CHECK-32-LABEL: not_issubnormal_or_zero_f_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_or_zero_f_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_or_zero_f_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_issubnormal_or_zero_f_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 783) ; ~0xf0 = "~(subnormal|zero)" ret i1 %0 } define i1 @not_issubnormal_or_zero_f_maybe_daz(float %x) #1 { -; CHECK-32-LABEL: not_issubnormal_or_zero_f_maybe_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_or_zero_f_maybe_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_or_zero_f_maybe_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_issubnormal_or_zero_f_maybe_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 783) ; ~0xf0 = "~(subnormal|zero)" ret i1 %0 } define i1 @is_plus_zero_f(float %x) { -; CHECK-32-LABEL: is_plus_zero_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_plus_zero_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_plus_zero_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: is_plus_zero_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 64) ; 0x40 = "+zero" ret i1 %0 } define i1 @not_is_plus_zero_f(float %x) { -; CHECK-32-LABEL: not_is_plus_zero_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_plus_zero_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_plus_zero_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_is_plus_zero_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 959) ; ~0x40 = ~"+zero" ret i1 %0 } define i1 @is_minus_zero_f(float %x) { -; CHECK-32-LABEL: is_minus_zero_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_minus_zero_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_minus_zero_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: is_minus_zero_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 32) ; 0x20 = "-zero" ret i1 %0 } define i1 @not_is_minus_zero_f(float %x) { -; CHECK-32-LABEL: not_is_minus_zero_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_minus_zero_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_minus_zero_f: +; X86: # %bb.0: # %entry +; X86-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_is_minus_zero_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000 +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 991) ; ~0x20 = ~"-zero" ret i1 %0 } define i1 @isnan_f_strictfp(float %x) strictfp { -; CHECK-32-LABEL: isnan_f_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnan_f_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: retq +; X86-LABEL: isnan_f_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %al +; X86-NEXT: retl +; +; X64-LABEL: isnan_f_strictfp: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 3) strictfp ; "nan" ret i1 %0 } define i1 @not_isnan_f_strictfp(float %x) strictfp { -; CHECK-32-LABEL: not_isnan_f_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_isnan_f_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_isnan_f_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setl %al +; X86-NEXT: retl +; +; X64-LABEL: not_isnan_f_strictfp: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setl %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1020) strictfp ; ~"nan" ret i1 %0 } define i1 @isfinite_f_strictfp(float %x) strictfp { -; CHECK-32-LABEL: isfinite_f_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isfinite_f_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: retq +; X86-LABEL: isfinite_f_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %al +; X86-NEXT: retl +; +; X64-LABEL: isfinite_f_strictfp: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) strictfp ; 0x1f8 = "finite" ret i1 %0 } define i1 @not_isfinite_f_strictfp(float %x) strictfp { -; CHECK-32-LABEL: not_isfinite_f_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_isfinite_f_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_isfinite_f_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setge %al +; X86-NEXT: retl +; +; X64-LABEL: not_isfinite_f_strictfp: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setge %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) strictfp ; ~0x1f8 = ~"finite" ret i1 %0 } define i1 @iszero_f_strictfp(float %x) strictfp { -; CHECK-32-LABEL: iszero_f_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_f_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_f_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: iszero_f_strictfp: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 96) strictfp ; 0x60 = "zero" ret i1 %0 } define i1 @not_iszero_f_strictfp(float %x) strictfp { -; CHECK-32-LABEL: not_iszero_f_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_iszero_f_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_iszero_f_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_iszero_f_strictfp: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 927) strictfp ; ~0x60 = ~"zero" ret i1 %0 } define i1 @isnan_d(double %x) { -; CHECK-32-LABEL: isnan_d: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: fldl {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setp %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnan_d: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ucomisd %xmm0, %xmm0 -; CHECK-64-NEXT: setp %al -; CHECK-64-NEXT: retq +; X86-LABEL: isnan_d: +; X86: # %bb.0: # %entry +; X86-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %al +; X86-NEXT: retl +; +; X64-LABEL: isnan_d: +; X64: # %bb.0: # %entry +; X64-NEXT: ucomisd %xmm0, %xmm0 +; X64-NEXT: setp %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; "nan" ret i1 %0 } define i1 @isinf_d(double %x) { -; CHECK-32-LABEL: isinf_d: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: xorl $2146435072, %eax # imm = 0x7FF00000 -; CHECK-32-NEXT: orl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isinf_d: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movq %xmm0, %rax -; CHECK-64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-64-NEXT: andq %rax, %rcx -; CHECK-64-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 -; CHECK-64-NEXT: cmpq %rax, %rcx -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: isinf_d: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl $2146435072, %eax # imm = 0x7FF00000 +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: isinf_d: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movq %xmm0, %rax +; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-GENERIC-NEXT: andq %rax, %rcx +; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 +; X64-GENERIC-NEXT: cmpq %rax, %rcx +; X64-GENERIC-NEXT: sete %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: isinf_d: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movq %xmm0, %rax +; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NDD-NEXT: andq %rcx, %rax +; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000 +; X64-NDD-NEXT: cmpq %rcx, %rax +; X64-NDD-NEXT: sete %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 516) ; 0x204 = "inf" ret i1 %0 } define i1 @isfinite_d(double %x) { -; CHECK-32-LABEL: isfinite_d: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2146435072, %eax # imm = 0x7FF00000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isfinite_d: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movq %xmm0, %rax -; CHECK-64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-64-NEXT: andq %rax, %rcx -; CHECK-64-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 -; CHECK-64-NEXT: cmpq %rax, %rcx -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: retq +; X86-LABEL: isfinite_d: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2146435072, %eax # imm = 0x7FF00000 +; X86-NEXT: setl %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: isfinite_d: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movq %xmm0, %rax +; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-GENERIC-NEXT: andq %rax, %rcx +; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 +; X64-GENERIC-NEXT: cmpq %rax, %rcx +; X64-GENERIC-NEXT: setl %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: isfinite_d: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movq %xmm0, %rax +; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NDD-NEXT: andq %rcx, %rax +; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000 +; X64-NDD-NEXT: cmpq %rcx, %rax +; X64-NDD-NEXT: setl %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 504) ; 0x1f8 = "finite" ret i1 %0 } define i1 @isnormal_d(double %x) { -; CHECK-32-LABEL: isnormal_d: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: addl $-1048576, %eax # imm = 0xFFF00000 -; CHECK-32-NEXT: shrl $21, %eax -; CHECK-32-NEXT: cmpl $1023, %eax # imm = 0x3FF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnormal_d: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movq %xmm0, %rax -; CHECK-64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-64-NEXT: andq %rax, %rcx -; CHECK-64-NEXT: movabsq $-4503599627370496, %rax # imm = 0xFFF0000000000000 -; CHECK-64-NEXT: addq %rcx, %rax -; CHECK-64-NEXT: shrq $53, %rax -; CHECK-64-NEXT: cmpl $1023, %eax # imm = 0x3FF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: retq +; X86-LABEL: isnormal_d: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl $-1048576, %eax # imm = 0xFFF00000 +; X86-NEXT: shrl $21, %eax +; X86-NEXT: cmpl $1023, %eax # imm = 0x3FF +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: isnormal_d: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movq %xmm0, %rax +; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-GENERIC-NEXT: andq %rax, %rcx +; X64-GENERIC-NEXT: movabsq $-4503599627370496, %rax # imm = 0xFFF0000000000000 +; X64-GENERIC-NEXT: addq %rcx, %rax +; X64-GENERIC-NEXT: shrq $53, %rax +; X64-GENERIC-NEXT: cmpl $1023, %eax # imm = 0x3FF +; X64-GENERIC-NEXT: setb %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: isnormal_d: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movq %xmm0, %rax +; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NDD-NEXT: andq %rcx, %rax +; X64-NDD-NEXT: movabsq $-4503599627370496, %rcx # imm = 0xFFF0000000000000 +; X64-NDD-NEXT: addq %rcx, %rax +; X64-NDD-NEXT: shrq $53, %rax +; X64-NDD-NEXT: cmpl $1023, %eax # imm = 0x3FF +; X64-NDD-NEXT: setb %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 264) ; 0x108 = "normal" ret i1 %0 } define i1 @issubnormal_d(double %x) { -; CHECK-32-LABEL: issubnormal_d: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %ecx -; CHECK-32-NEXT: addl $-1, %eax -; CHECK-32-NEXT: adcl $-1, %ecx -; CHECK-32-NEXT: cmpl $-1, %eax -; CHECK-32-NEXT: sbbl $1048575, %ecx # imm = 0xFFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_d: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movq %xmm0, %rax -; CHECK-64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-64-NEXT: andq %rax, %rcx -; CHECK-64-NEXT: decq %rcx -; CHECK-64-NEXT: movabsq $4503599627370495, %rax # imm = 0xFFFFFFFFFFFFF -; CHECK-64-NEXT: cmpq %rax, %rcx -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_d: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: addl $-1, %eax +; X86-NEXT: adcl $-1, %ecx +; X86-NEXT: cmpl $-1, %eax +; X86-NEXT: sbbl $1048575, %ecx # imm = 0xFFFFF +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: issubnormal_d: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movq %xmm0, %rax +; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-GENERIC-NEXT: andq %rax, %rcx +; X64-GENERIC-NEXT: decq %rcx +; X64-GENERIC-NEXT: movabsq $4503599627370495, %rax # imm = 0xFFFFFFFFFFFFF +; X64-GENERIC-NEXT: cmpq %rax, %rcx +; X64-GENERIC-NEXT: setb %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: issubnormal_d: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movq %xmm0, %rax +; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NDD-NEXT: andq %rcx, %rax +; X64-NDD-NEXT: decq %rax +; X64-NDD-NEXT: movabsq $4503599627370495, %rcx # imm = 0xFFFFFFFFFFFFF +; X64-NDD-NEXT: cmpq %rcx, %rax +; X64-NDD-NEXT: setb %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 144) ; 0x90 = "subnormal" ret i1 %0 } define i1 @iszero_d(double %x) { -; CHECK-32-LABEL: iszero_d: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: orl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_d: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movq %xmm0, %rax -; CHECK-64-NEXT: shlq %rax -; CHECK-64-NEXT: testq %rax, %rax -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_d: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: iszero_d: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: shlq %rax +; X64-NEXT: testq %rax, %rax +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 96) ; 0x60 = "zero" ret i1 %0 } define i1 @issignaling_d(double %x) { -; CHECK-32-LABEL: issignaling_d: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: xorl %ecx, %ecx -; CHECK-32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx -; CHECK-32-NEXT: movl $2146435072, %ecx # imm = 0x7FF00000 -; CHECK-32-NEXT: sbbl %eax, %ecx -; CHECK-32-NEXT: setl %cl -; CHECK-32-NEXT: cmpl $2146959360, %eax # imm = 0x7FF80000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: andb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issignaling_d: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movq %xmm0, %rax -; CHECK-64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-64-NEXT: andq %rax, %rcx -; CHECK-64-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; CHECK-64-NEXT: cmpq %rax, %rcx -; CHECK-64-NEXT: setl %dl -; CHECK-64-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 -; CHECK-64-NEXT: cmpq %rax, %rcx -; CHECK-64-NEXT: setg %al -; CHECK-64-NEXT: andb %dl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: issignaling_d: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $2146435072, %ecx # imm = 0x7FF00000 +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: setl %cl +; X86-NEXT: cmpl $2146959360, %eax # imm = 0x7FF80000 +; X86-NEXT: setl %al +; X86-NEXT: andb %cl, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: issignaling_d: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movq %xmm0, %rax +; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-GENERIC-NEXT: andq %rax, %rcx +; X64-GENERIC-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; X64-GENERIC-NEXT: cmpq %rax, %rcx +; X64-GENERIC-NEXT: setl %dl +; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 +; X64-GENERIC-NEXT: cmpq %rax, %rcx +; X64-GENERIC-NEXT: setg %al +; X64-GENERIC-NEXT: andb %dl, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: issignaling_d: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movq %xmm0, %rax +; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NDD-NEXT: andq %rcx, %rax +; X64-NDD-NEXT: movabsq $9221120237041090560, %rcx # imm = 0x7FF8000000000000 +; X64-NDD-NEXT: cmpq %rcx, %rax +; X64-NDD-NEXT: setl %cl +; X64-NDD-NEXT: movabsq $9218868437227405312, %rdx # imm = 0x7FF0000000000000 +; X64-NDD-NEXT: cmpq %rdx, %rax +; X64-NDD-NEXT: setg %al +; X64-NDD-NEXT: andb %cl, %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1) ; "snan" ret i1 %0 } define i1 @isquiet_d(double %x) { -; CHECK-32-LABEL: isquiet_d: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2146959360, %eax # imm = 0x7FF80000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isquiet_d: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movq %xmm0, %rax -; CHECK-64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-64-NEXT: andq %rax, %rcx -; CHECK-64-NEXT: movabsq $9221120237041090559, %rax # imm = 0x7FF7FFFFFFFFFFFF -; CHECK-64-NEXT: cmpq %rax, %rcx -; CHECK-64-NEXT: setg %al -; CHECK-64-NEXT: retq +; X86-LABEL: isquiet_d: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2146959360, %eax # imm = 0x7FF80000 +; X86-NEXT: setge %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: isquiet_d: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movq %xmm0, %rax +; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-GENERIC-NEXT: andq %rax, %rcx +; X64-GENERIC-NEXT: movabsq $9221120237041090559, %rax # imm = 0x7FF7FFFFFFFFFFFF +; X64-GENERIC-NEXT: cmpq %rax, %rcx +; X64-GENERIC-NEXT: setg %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: isquiet_d: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movq %xmm0, %rax +; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NDD-NEXT: andq %rcx, %rax +; X64-NDD-NEXT: movabsq $9221120237041090559, %rcx # imm = 0x7FF7FFFFFFFFFFFF +; X64-NDD-NEXT: cmpq %rcx, %rax +; X64-NDD-NEXT: setg %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 2) ; "qnan" ret i1 %0 } define i1 @isnan_d_strictfp(double %x) strictfp { -; CHECK-32-LABEL: isnan_d_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: xorl %ecx, %ecx -; CHECK-32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx -; CHECK-32-NEXT: movl $2146435072, %ecx # imm = 0x7FF00000 -; CHECK-32-NEXT: sbbl %eax, %ecx -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnan_d_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movq %xmm0, %rax -; CHECK-64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-64-NEXT: andq %rax, %rcx -; CHECK-64-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 -; CHECK-64-NEXT: cmpq %rax, %rcx -; CHECK-64-NEXT: setg %al -; CHECK-64-NEXT: retq +; X86-LABEL: isnan_d_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $2146435072, %ecx # imm = 0x7FF00000 +; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: setl %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: isnan_d_strictfp: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movq %xmm0, %rax +; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-GENERIC-NEXT: andq %rax, %rcx +; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 +; X64-GENERIC-NEXT: cmpq %rax, %rcx +; X64-GENERIC-NEXT: setg %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: isnan_d_strictfp: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movq %xmm0, %rax +; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; X64-NDD-NEXT: andq %rcx, %rax +; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000 +; X64-NDD-NEXT: cmpq %rcx, %rax +; X64-NDD-NEXT: setg %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 3) strictfp ; "nan" ret i1 %0 } define i1 @iszero_d_strictfp(double %x) strictfp { -; CHECK-32-LABEL: iszero_d_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: orl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_d_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movq %xmm0, %rax -; CHECK-64-NEXT: shlq %rax -; CHECK-64-NEXT: testq %rax, %rax -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_d_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: iszero_d_strictfp: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: shlq %rax +; X64-NEXT: testq %rax, %rax +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 96) strictfp ; 0x60 = "zero" ret i1 %0 @@ -1339,70 +1417,70 @@ entry: define <1 x i1> @isnan_v1f(<1 x float> %x) { -; CHECK-32-LABEL: isnan_v1f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setp %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnan_v1f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ucomiss %xmm0, %xmm0 -; CHECK-64-NEXT: setp %al -; CHECK-64-NEXT: retq +; X86-LABEL: isnan_v1f: +; X86: # %bb.0: # %entry +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %al +; X86-NEXT: retl +; +; X64-LABEL: isnan_v1f: +; X64: # %bb.0: # %entry +; X64-NEXT: ucomiss %xmm0, %xmm0 +; X64-NEXT: setp %al +; X64-NEXT: retq entry: %0 = tail call <1 x i1> @llvm.is.fpclass.v1f32(<1 x float> %x, i32 3) ; "nan" ret <1 x i1> %0 } define <1 x i1> @isnan_v1f_strictfp(<1 x float> %x) strictfp { -; CHECK-32-LABEL: isnan_v1f_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnan_v1f_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: retq +; X86-LABEL: isnan_v1f_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %al +; X86-NEXT: retl +; +; X64-LABEL: isnan_v1f_strictfp: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %al +; X64-NEXT: retq entry: %0 = tail call <1 x i1> @llvm.is.fpclass.v1f32(<1 x float> %x, i32 3) strictfp ; "nan" ret <1 x i1> %0 } define <2 x i1> @isnan_v2f(<2 x float> %x) { -; CHECK-32-LABEL: isnan_v2f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setp %cl -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setp %dl -; CHECK-32-NEXT: movl %ecx, %eax -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnan_v2f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: cmpunordps %xmm0, %xmm0 -; CHECK-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; CHECK-64-NEXT: retq +; X86-LABEL: isnan_v2f: +; X86: # %bb.0: # %entry +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %cl +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %dl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: isnan_v2f: +; X64: # %bb.0: # %entry +; X64-NEXT: cmpunordps %xmm0, %xmm0 +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X64-NEXT: retq entry: %0 = tail call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 3) ; "nan" ret <2 x i1> %0 @@ -1410,173 +1488,173 @@ entry: define <2 x i1> @isnot_nan_v2f(<2 x float> %x) { -; CHECK-32-LABEL: isnot_nan_v2f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setnp %cl -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setnp %dl -; CHECK-32-NEXT: movl %ecx, %eax -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnot_nan_v2f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: cmpordps %xmm0, %xmm0 -; CHECK-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; CHECK-64-NEXT: retq +; X86-LABEL: isnot_nan_v2f: +; X86: # %bb.0: # %entry +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setnp %cl +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setnp %dl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: isnot_nan_v2f: +; X64: # %bb.0: # %entry +; X64-NEXT: cmpordps %xmm0, %xmm0 +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X64-NEXT: retq entry: %0 = tail call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 1020) ; 0x3fc = "zero|subnormal|normal|inf" ret <2 x i1> %0 } define <2 x i1> @isnan_v2f_strictfp(<2 x float> %x) strictfp { -; CHECK-32-LABEL: isnan_v2f_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: andl %ecx, %eax -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %ecx -; CHECK-32-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-32-NEXT: setge %dl -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isnan_v2f_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; CHECK-64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-64-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-64-NEXT: retq +; X86-LABEL: isnan_v2f_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %al +; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X86-NEXT: setge %dl +; X86-NEXT: retl +; +; X64-LABEL: isnan_v2f_strictfp: +; X64: # %bb.0: # %entry +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: retq entry: %0 = tail call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 3) strictfp ; "nan" ret <2 x i1> %0 } define <4 x i1> @isnan_v4f(<4 x float> %x) { -; CHECK-32-LABEL: isnan_v4f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setp %dh -; CHECK-32-NEXT: shlb $2, %dh -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setp %dl -; CHECK-32-NEXT: shlb $3, %dl -; CHECK-32-NEXT: orb %dh, %dl -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setp %dh -; CHECK-32-NEXT: fucomp %st(0) -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setp %al -; CHECK-32-NEXT: addb %al, %al -; CHECK-32-NEXT: orb %dh, %al -; CHECK-32-NEXT: orb %dl, %al -; CHECK-32-NEXT: movb %al, (%ecx) -; CHECK-32-NEXT: movl %ecx, %eax -; CHECK-32-NEXT: retl $4 -; -; CHECK-64-LABEL: isnan_v4f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: cmpunordps %xmm0, %xmm0 -; CHECK-64-NEXT: retq +; X86-LABEL: isnan_v4f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %dh +; X86-NEXT: shlb $2, %dh +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %dl +; X86-NEXT: shlb $3, %dl +; X86-NEXT: orb %dh, %dl +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %dh +; X86-NEXT: fucomp %st(0) +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setp %al +; X86-NEXT: addb %al, %al +; X86-NEXT: orb %dh, %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: movb %al, (%ecx) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: retl $4 +; +; X64-LABEL: isnan_v4f: +; X64: # %bb.0: # %entry +; X64-NEXT: cmpunordps %xmm0, %xmm0 +; X64-NEXT: retq entry: %0 = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 3) ; "nan" ret <4 x i1> %0 } define <4 x i1> @isnan_v4f_strictfp(<4 x float> %x) strictfp { -; CHECK-32-LABEL: isnan_v4f_strictfp: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: pushl %esi -; CHECK-32-NEXT: .cfi_def_cfa_offset 8 -; CHECK-32-NEXT: .cfi_offset %esi, -8 -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-32-NEXT: andl %ecx, %edx -; CHECK-32-NEXT: cmpl $2139095041, %edx # imm = 0x7F800001 -; CHECK-32-NEXT: setge %dh -; CHECK-32-NEXT: shlb $2, %dh -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-32-NEXT: andl %ecx, %esi -; CHECK-32-NEXT: cmpl $2139095041, %esi # imm = 0x7F800001 -; CHECK-32-NEXT: setge %dl -; CHECK-32-NEXT: shlb $3, %dl -; CHECK-32-NEXT: orb %dh, %dl -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-32-NEXT: andl %ecx, %esi -; CHECK-32-NEXT: cmpl $2139095041, %esi # imm = 0x7F800001 -; CHECK-32-NEXT: setge %dh -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %ecx -; CHECK-32-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: addb %cl, %cl -; CHECK-32-NEXT: orb %dh, %cl -; CHECK-32-NEXT: orb %dl, %cl -; CHECK-32-NEXT: movb %cl, (%eax) -; CHECK-32-NEXT: popl %esi -; CHECK-32-NEXT: .cfi_def_cfa_offset 4 -; CHECK-32-NEXT: retl $4 -; -; CHECK-64-LABEL: isnan_v4f_strictfp: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-64-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-64-NEXT: retq +; X86-LABEL: isnan_v4f_strictfp: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: cmpl $2139095041, %edx # imm = 0x7F800001 +; X86-NEXT: setge %dh +; X86-NEXT: shlb $2, %dh +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: andl %ecx, %esi +; X86-NEXT: cmpl $2139095041, %esi # imm = 0x7F800001 +; X86-NEXT: setge %dl +; X86-NEXT: shlb $3, %dl +; X86-NEXT: orb %dh, %dl +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: andl %ecx, %esi +; X86-NEXT: cmpl $2139095041, %esi # imm = 0x7F800001 +; X86-NEXT: setge %dh +; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X86-NEXT: setge %cl +; X86-NEXT: addb %cl, %cl +; X86-NEXT: orb %dh, %cl +; X86-NEXT: orb %dl, %cl +; X86-NEXT: movb %cl, (%eax) +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl $4 +; +; X64-LABEL: isnan_v4f_strictfp: +; X64: # %bb.0: # %entry +; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: retq entry: %0 = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> %x, i32 3) strictfp ; "nan" ret <4 x i1> %0 } define i1 @isnone_f(float %x) { -; CHECK-32-LABEL: isnone_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: xorl %eax, %eax -; CHECK-32-NEXT: retl +; X86-LABEL: isnone_f: +; X86: # %bb.0: # %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: retl ; -; CHECK-64-LABEL: isnone_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: xorl %eax, %eax -; CHECK-64-NEXT: retq +; X64-LABEL: isnone_f: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0) ret i1 %0 } define i1 @isany_f(float %x) { -; CHECK-32-LABEL: isany_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movb $1, %al -; CHECK-32-NEXT: retl +; X86-LABEL: isany_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movb $1, %al +; X86-NEXT: retl ; -; CHECK-64-LABEL: isany_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movb $1, %al -; CHECK-64-NEXT: retq +; X64-LABEL: isany_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movb $1, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023) ret i1 %0 @@ -1584,1056 +1662,1263 @@ entry: define i1 @iszero_or_nan_f(float %x) { -; CHECK-32-LABEL: iszero_or_nan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fldz -; CHECK-32-NEXT: fucompp -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_or_nan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: xorps %xmm1, %xmm1 -; CHECK-64-NEXT: ucomiss %xmm1, %xmm0 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_or_nan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fldz +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: iszero_or_nan_f: +; X64: # %bb.0: # %entry +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: ucomiss %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 99) ; 0x60|0x3 = "zero|nan" ret i1 %0 } define i1 @iszero_or_nan_f_daz(float %x) #0 { -; CHECK-32-LABEL: iszero_or_nan_f_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_or_nan_f_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_or_nan_f_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %cl +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: iszero_or_nan_f_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %cl +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 99) ; 0x60|0x3 = "zero|nan" ret i1 %0 } define i1 @iszero_or_nan_f_maybe_daz(float %x) #1 { -; CHECK-32-LABEL: iszero_or_nan_f_maybe_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_or_nan_f_maybe_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_or_nan_f_maybe_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %cl +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: iszero_or_nan_f_maybe_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %cl +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 99) ; 0x60|0x3 = "zero|nan" ret i1 %0 } define i1 @not_iszero_or_nan_f(float %x) { -; CHECK-32-LABEL: not_iszero_or_nan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fldz -; CHECK-32-NEXT: fucompp -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_iszero_or_nan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: xorps %xmm1, %xmm1 -; CHECK-64-NEXT: ucomiss %xmm1, %xmm0 -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_iszero_or_nan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fldz +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_iszero_or_nan_f: +; X64: # %bb.0: # %entry +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: ucomiss %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 924) ; ~0x60 = "~(zero|nan)" ret i1 %0 } define i1 @not_iszero_or_nan_f_daz(float %x) #0 { -; CHECK-32-LABEL: not_iszero_or_nan_f_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setl %cl -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: andb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_iszero_or_nan_f_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setl %cl -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: andb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_iszero_or_nan_f_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setl %cl +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: andb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: not_iszero_or_nan_f_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setl %cl +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: andb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 924) ; ~(0x60|0x3) = "~(zero|nan)" ret i1 %0 } define i1 @not_iszero_or_nan_f_maybe_daz(float %x) #1 { -; CHECK-32-LABEL: not_iszero_or_nan_f_maybe_daz: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setl %cl -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: andb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_iszero_or_nan_f_maybe_daz: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setl %cl -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: andb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_iszero_or_nan_f_maybe_daz: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setl %cl +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: andb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: not_iszero_or_nan_f_maybe_daz: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setl %cl +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: andb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 924) ; ~(0x60|0x3) = "~(zero|nan)" ret i1 %0 } define i1 @iszero_or_qnan_f(float %x) { -; CHECK-32-LABEL: iszero_or_qnan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_or_qnan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_or_qnan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %cl +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: iszero_or_qnan_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %cl +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 98) ; 0x60|0x2 = "zero|qnan" ret i1 %0 } define i1 @iszero_or_snan_f(float %x) { -; CHECK-32-LABEL: iszero_or_snan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setl %cl -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %dl -; CHECK-32-NEXT: andb %cl, %dl -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: orb %dl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: iszero_or_snan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setl %cl -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %dl -; CHECK-64-NEXT: andb %cl, %dl -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: orb %dl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: iszero_or_snan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setl %cl +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %dl +; X86-NEXT: andb %cl, %dl +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: iszero_or_snan_f: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setl %cl +; X64-GENERIC-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-GENERIC-NEXT: setge %dl +; X64-GENERIC-NEXT: andb %cl, %dl +; X64-GENERIC-NEXT: testl %eax, %eax +; X64-GENERIC-NEXT: sete %al +; X64-GENERIC-NEXT: orb %dl, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: iszero_or_snan_f: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NDD-NEXT: setl %cl +; X64-NDD-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NDD-NEXT: setge %dl +; X64-NDD-NEXT: andb %dl, %cl +; X64-NDD-NEXT: testl %eax, %eax +; X64-NDD-NEXT: sete %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 97) ; 0x60|0x1 = "zero|snan" ret i1 %0 } define i1 @not_iszero_or_qnan_f(float %x) { -; CHECK-32-LABEL: not_iszero_or_qnan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: pushl %esi -; CHECK-32-NEXT: .cfi_def_cfa_offset 8 -; CHECK-32-NEXT: .cfi_offset %esi, -8 -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setl %cl -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %dl -; CHECK-32-NEXT: andb %cl, %dl -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: leal -1(%eax), %esi -; CHECK-32-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %ch -; CHECK-32-NEXT: orb %cl, %ch -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %dl, %al -; CHECK-32-NEXT: orb %ch, %al -; CHECK-32-NEXT: popl %esi -; CHECK-32-NEXT: .cfi_def_cfa_offset 4 -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_iszero_or_qnan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setl %cl -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %dl -; CHECK-64-NEXT: andb %cl, %dl -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: leal -1(%rax), %esi -; CHECK-64-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %sil -; CHECK-64-NEXT: orb %cl, %sil -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %dl, %al -; CHECK-64-NEXT: orb %sil, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_iszero_or_qnan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setl %cl +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %dl +; X86-NEXT: andb %cl, %dl +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %cl +; X86-NEXT: leal -1(%eax), %esi +; X86-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF +; X86-NEXT: setb %ch +; X86-NEXT: orb %cl, %ch +; X86-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X86-NEXT: setb %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: orb %ch, %al +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: not_iszero_or_qnan_f: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setl %cl +; X64-GENERIC-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-GENERIC-NEXT: setge %dl +; X64-GENERIC-NEXT: andb %cl, %dl +; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: sete %cl +; X64-GENERIC-NEXT: leal -1(%rax), %esi +; X64-GENERIC-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF +; X64-GENERIC-NEXT: setb %sil +; X64-GENERIC-NEXT: orb %cl, %sil +; X64-GENERIC-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-GENERIC-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-GENERIC-NEXT: setb %al +; X64-GENERIC-NEXT: orb %dl, %al +; X64-GENERIC-NEXT: orb %sil, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: not_iszero_or_qnan_f: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NDD-NEXT: setl %cl +; X64-NDD-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NDD-NEXT: setge %dl +; X64-NDD-NEXT: andb %dl, %cl +; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: sete %dl +; X64-NDD-NEXT: decl %eax, %esi +; X64-NDD-NEXT: cmpl $8388607, %esi # imm = 0x7FFFFF +; X64-NDD-NEXT: setb %sil +; X64-NDD-NEXT: orb %sil, %dl +; X64-NDD-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-NDD-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-NDD-NEXT: setb %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: orb %dl, %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 925) ; ~(0x60|0x2) = "~(zero|qnan)" ret i1 %0 } define i1 @not_iszero_or_snan_f(float %x) { -; CHECK-32-LABEL: not_iszero_or_snan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: leal -1(%eax), %edx -; CHECK-32-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %dl -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %ch -; CHECK-32-NEXT: orb %cl, %ch -; CHECK-32-NEXT: orb %dl, %ch -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %ch, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_iszero_or_snan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: leal -1(%rax), %edx -; CHECK-64-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %dl -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %sil -; CHECK-64-NEXT: orb %cl, %sil -; CHECK-64-NEXT: orb %dl, %sil -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %sil, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_iszero_or_snan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %cl +; X86-NEXT: leal -1(%eax), %edx +; X86-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF +; X86-NEXT: setb %dl +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %ch +; X86-NEXT: orb %cl, %ch +; X86-NEXT: orb %dl, %ch +; X86-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X86-NEXT: setb %al +; X86-NEXT: orb %ch, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: not_iszero_or_snan_f: +; X64-GENERIC: # %bb.0: # %entry +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: sete %cl +; X64-GENERIC-NEXT: leal -1(%rax), %edx +; X64-GENERIC-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF +; X64-GENERIC-NEXT: setb %dl +; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setge %sil +; X64-GENERIC-NEXT: orb %cl, %sil +; X64-GENERIC-NEXT: orb %dl, %sil +; X64-GENERIC-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-GENERIC-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-GENERIC-NEXT: setb %al +; X64-GENERIC-NEXT: orb %sil, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: not_iszero_or_snan_f: +; X64-NDD: # %bb.0: # %entry +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: sete %cl +; X64-NDD-NEXT: decl %eax, %edx +; X64-NDD-NEXT: cmpl $8388607, %edx # imm = 0x7FFFFF +; X64-NDD-NEXT: setb %dl +; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NDD-NEXT: setge %sil +; X64-NDD-NEXT: orb %sil, %cl +; X64-NDD-NEXT: orb %dl, %cl +; X64-NDD-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-NDD-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-NDD-NEXT: setb %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 926) ; ~(0x60|0x1) = "~(zero|snan)" ret i1 %0 } define i1 @isinf_or_nan_f(float %x) { -; CHECK-32-LABEL: isinf_or_nan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isinf_or_nan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: retq +; X86-LABEL: isinf_or_nan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setge %al +; X86-NEXT: retl +; +; X64-LABEL: isinf_or_nan_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setge %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; 0x204|0x3 = "inf|nan" ret i1 %0 } define i1 @not_isinf_or_nan_f(float %x) { -; CHECK-32-LABEL: not_isinf_or_nan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_isinf_or_nan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_isinf_or_nan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %al +; X86-NEXT: retl +; +; X64-LABEL: not_isinf_or_nan_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; ~(0x204|0x3) = "~(inf|nan)" ret i1 %0 } define i1 @isfinite_or_nan_f(float %x) { -; CHECK-32-LABEL: isfinite_or_nan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: isfinite_or_nan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: isfinite_or_nan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: isfinite_or_nan_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setne %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; 0x1f8|0x3 = "finite|nan" ret i1 %0 } define i1 @not_isfinite_or_nan_f(float %x) { -; CHECK-32-LABEL: not_isfinite_or_nan_f: -; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_isfinite_or_nan_f: -; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_isfinite_or_nan_f: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: not_isfinite_or_nan_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; ~(0x1f8|0x3) = "~(finite|nan)" ret i1 %0 } define i1 @is_plus_inf_or_nan_f(float %x) { -; CHECK-32-LABEL: is_plus_inf_or_nan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_plus_inf_or_nan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_plus_inf_or_nan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: is_plus_inf_or_nan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 515) ; 0x200|0x3 = "+inf|nan" ret i1 %class } define i1 @is_minus_inf_or_nan_f(float %x) { -; CHECK-32-LABEL: is_minus_inf_or_nan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_minus_inf_or_nan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_minus_inf_or_nan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: sete %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: is_minus_inf_or_nan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: sete %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 7) ; "-inf|nan" ret i1 %class } define i1 @not_is_plus_inf_or_nan_f(float %x) { -; CHECK-32-LABEL: not_is_plus_inf_or_nan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_plus_inf_or_nan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_plus_inf_or_nan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: sete %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: not_is_plus_inf_or_nan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: sete %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 508) ; ~(0x200|0x3) = "~(+inf|nan)" ret i1 %class } define i1 @not_is_minus_inf_or_nan_f(float %x) { -; CHECK-32-LABEL: not_is_minus_inf_or_nan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_minus_inf_or_nan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_minus_inf_or_nan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: not_is_minus_inf_or_nan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1016) ; "~(-inf|nan)" ret i1 %class } define i1 @is_plus_inf_or_snan_f(float %x) { -; CHECK-32-LABEL: is_plus_inf_or_snan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: movl %eax, %ecx -; CHECK-32-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 -; CHECK-32-NEXT: setl %dl -; CHECK-32-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: andb %dl, %cl -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_plus_inf_or_snan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: movl %eax, %ecx -; CHECK-64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 -; CHECK-64-NEXT: setl %dl -; CHECK-64-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: andb %dl, %cl -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_plus_inf_or_snan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X86-NEXT: setl %dl +; X86-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X86-NEXT: setge %cl +; X86-NEXT: andb %dl, %cl +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: is_plus_inf_or_snan_f: +; X64-GENERIC: # %bb.0: +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: movl %eax, %ecx +; X64-GENERIC-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setl %dl +; X64-GENERIC-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X64-GENERIC-NEXT: setge %cl +; X64-GENERIC-NEXT: andb %dl, %cl +; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: sete %al +; X64-GENERIC-NEXT: orb %cl, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: is_plus_inf_or_snan_f: +; X64-NDD: # %bb.0: +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax, %ecx # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X64-NDD-NEXT: setl %dl +; X64-NDD-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X64-NDD-NEXT: setge %cl +; X64-NDD-NEXT: andb %dl, %cl +; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: sete %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 513) ; 0x200|0x1 = "+inf|snan" ret i1 %class } define i1 @is_plus_inf_or_qnan_f(float %x) { -; CHECK-32-LABEL: is_plus_inf_or_qnan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_plus_inf_or_qnan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_plus_inf_or_qnan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: is_plus_inf_or_qnan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 514) ; 0x200|0x1 = "+inf|qnan" ret i1 %class } define i1 @not_is_plus_inf_or_snan_f(float %x) { -; CHECK-32-LABEL: not_is_plus_inf_or_snan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setl %dl -; CHECK-32-NEXT: orb %cl, %dl -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: orb %dl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_plus_inf_or_snan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setl %dl -; CHECK-64-NEXT: orb %cl, %dl -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: orb %dl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_plus_inf_or_snan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: sete %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %dl +; X86-NEXT: orb %cl, %dl +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: not_is_plus_inf_or_snan_f: +; X64-GENERIC: # %bb.0: +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-GENERIC-NEXT: sete %cl +; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: setl %dl +; X64-GENERIC-NEXT: orb %cl, %dl +; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setge %al +; X64-GENERIC-NEXT: orb %dl, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: not_is_plus_inf_or_snan_f: +; X64-NDD: # %bb.0: +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NDD-NEXT: sete %cl +; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: setl %dl +; X64-NDD-NEXT: orb %dl, %cl +; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NDD-NEXT: setge %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 510) ; ~(+inf|snan) ret i1 %class } define i1 @not_is_plus_inf_or_qnan_f(float %x) { -; CHECK-32-LABEL: not_is_plus_inf_or_qnan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: movl %eax, %ecx -; CHECK-32-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 -; CHECK-32-NEXT: setl %dl -; CHECK-32-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-32-NEXT: setge %dh -; CHECK-32-NEXT: andb %dl, %dh -; CHECK-32-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: sete %dl -; CHECK-32-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: orb %dl, %al -; CHECK-32-NEXT: orb %dh, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_plus_inf_or_qnan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: movl %eax, %ecx -; CHECK-64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 -; CHECK-64-NEXT: setl %dl -; CHECK-64-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-64-NEXT: setge %sil -; CHECK-64-NEXT: andb %dl, %sil -; CHECK-64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: sete %dl -; CHECK-64-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: orb %dl, %al -; CHECK-64-NEXT: orb %sil, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_plus_inf_or_qnan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X86-NEXT: setl %dl +; X86-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X86-NEXT: setge %dh +; X86-NEXT: andb %dl, %dh +; X86-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: sete %dl +; X86-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X86-NEXT: setl %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: orb %dh, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: not_is_plus_inf_or_qnan_f: +; X64-GENERIC: # %bb.0: +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: movl %eax, %ecx +; X64-GENERIC-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setl %dl +; X64-GENERIC-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X64-GENERIC-NEXT: setge %sil +; X64-GENERIC-NEXT: andb %dl, %sil +; X64-GENERIC-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-GENERIC-NEXT: sete %dl +; X64-GENERIC-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X64-GENERIC-NEXT: setl %al +; X64-GENERIC-NEXT: orb %dl, %al +; X64-GENERIC-NEXT: orb %sil, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: not_is_plus_inf_or_qnan_f: +; X64-NDD: # %bb.0: +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax, %ecx # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X64-NDD-NEXT: setl %dl +; X64-NDD-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X64-NDD-NEXT: setge %sil +; X64-NDD-NEXT: andb %sil, %dl +; X64-NDD-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NDD-NEXT: sete %al +; X64-NDD-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X64-NDD-NEXT: setl %cl +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: orb %dl, %al +; X64-NDD-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 509) ; ~(+inf|qnan) ret i1 %class } define i1 @is_minus_inf_or_snan_f(float %x) { -; CHECK-32-LABEL: is_minus_inf_or_snan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: movl %eax, %ecx -; CHECK-32-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 -; CHECK-32-NEXT: setl %dl -; CHECK-32-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: andb %dl, %cl -; CHECK-32-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_minus_inf_or_snan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: movl %eax, %ecx -; CHECK-64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 -; CHECK-64-NEXT: setl %dl -; CHECK-64-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: andb %dl, %cl -; CHECK-64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_minus_inf_or_snan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X86-NEXT: setl %dl +; X86-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X86-NEXT: setge %cl +; X86-NEXT: andb %dl, %cl +; X86-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: sete %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: is_minus_inf_or_snan_f: +; X64-GENERIC: # %bb.0: +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: movl %eax, %ecx +; X64-GENERIC-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setl %dl +; X64-GENERIC-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X64-GENERIC-NEXT: setge %cl +; X64-GENERIC-NEXT: andb %dl, %cl +; X64-GENERIC-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-GENERIC-NEXT: sete %al +; X64-GENERIC-NEXT: orb %cl, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: is_minus_inf_or_snan_f: +; X64-NDD: # %bb.0: +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax, %ecx # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X64-NDD-NEXT: setl %dl +; X64-NDD-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X64-NDD-NEXT: setge %cl +; X64-NDD-NEXT: andb %dl, %cl +; X64-NDD-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NDD-NEXT: sete %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 5) ; "-inf|snan" ret i1 %class } define i1 @is_minus_inf_or_qnan_f(float %x) { -; CHECK-32-LABEL: is_minus_inf_or_qnan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: is_minus_inf_or_qnan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: is_minus_inf_or_qnan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: sete %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: is_minus_inf_or_qnan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: sete %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 6) ; "-inf|qnan" ret i1 %class } define i1 @not_is_minus_inf_or_snan_f(float %x) { -; CHECK-32-LABEL: not_is_minus_inf_or_snan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setl %dl -; CHECK-32-NEXT: orb %cl, %dl -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: orb %dl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_minus_inf_or_snan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setl %dl -; CHECK-64-NEXT: orb %cl, %dl -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: orb %dl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_minus_inf_or_snan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setl %dl +; X86-NEXT: orb %cl, %dl +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: not_is_minus_inf_or_snan_f: +; X64-GENERIC: # %bb.0: +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: sete %cl +; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: setl %dl +; X64-GENERIC-NEXT: orb %cl, %dl +; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setge %al +; X64-GENERIC-NEXT: orb %dl, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: not_is_minus_inf_or_snan_f: +; X64-NDD: # %bb.0: +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: sete %cl +; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: setl %dl +; X64-NDD-NEXT: orb %dl, %cl +; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NDD-NEXT: setge %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1018) ; "~(-inf|snan)" ret i1 %class } define i1 @not_is_minus_inf_or_qnan_f(float %x) { -; CHECK-32-LABEL: not_is_minus_inf_or_qnan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: movl %eax, %ecx -; CHECK-32-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 -; CHECK-32-NEXT: setl %dl -; CHECK-32-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-32-NEXT: setge %dh -; CHECK-32-NEXT: andb %dl, %dh -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %dl -; CHECK-32-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: orb %dl, %al -; CHECK-32-NEXT: orb %dh, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_is_minus_inf_or_qnan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: movl %eax, %ecx -; CHECK-64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 -; CHECK-64-NEXT: setl %dl -; CHECK-64-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-64-NEXT: setge %sil -; CHECK-64-NEXT: andb %dl, %sil -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %dl -; CHECK-64-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: orb %dl, %al -; CHECK-64-NEXT: orb %sil, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_is_minus_inf_or_qnan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X86-NEXT: setl %dl +; X86-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X86-NEXT: setge %dh +; X86-NEXT: andb %dl, %dh +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %dl +; X86-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X86-NEXT: setl %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: orb %dh, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: not_is_minus_inf_or_qnan_f: +; X64-GENERIC: # %bb.0: +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: movl %eax, %ecx +; X64-GENERIC-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setl %dl +; X64-GENERIC-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X64-GENERIC-NEXT: setge %sil +; X64-GENERIC-NEXT: andb %dl, %sil +; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: sete %dl +; X64-GENERIC-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X64-GENERIC-NEXT: setl %al +; X64-GENERIC-NEXT: orb %dl, %al +; X64-GENERIC-NEXT: orb %sil, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: not_is_minus_inf_or_qnan_f: +; X64-NDD: # %bb.0: +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax, %ecx # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X64-NDD-NEXT: setl %dl +; X64-NDD-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X64-NDD-NEXT: setge %sil +; X64-NDD-NEXT: andb %sil, %dl +; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: sete %al +; X64-NDD-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X64-NDD-NEXT: setl %cl +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: orb %dl, %al +; X64-NDD-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1017) ; "-inf|qnan" ret i1 %class } define i1 @issubnormal_or_nan_f(float %x) { -; CHECK-32-LABEL: issubnormal_or_nan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: decl %eax -; CHECK-32-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_or_nan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: decl %eax -; CHECK-64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_or_nan_f: +; X86: # %bb.0: +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %cl +; X86-NEXT: decl %eax +; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X86-NEXT: setb %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: issubnormal_or_nan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %cl +; X64-NEXT: decl %eax +; X64-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF +; X64-NEXT: setb %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 147) ; 0x90|0x3 = "subnormal|nan" ret i1 %class } define i1 @issubnormal_or_zero_or_nan_f(float %x) { -; CHECK-32-LABEL: issubnormal_or_zero_or_nan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_or_zero_or_nan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_or_zero_or_nan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: issubnormal_or_zero_or_nan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243) ; 0xf0|0x3 = "subnormal|zero|nan" ret i1 %class } define i1 @issubnormal_or_zero_or_nan_f_daz(float %x) #0 { -; CHECK-32-LABEL: issubnormal_or_zero_or_nan_f_daz: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fldz -; CHECK-32-NEXT: fucompp -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_or_zero_or_nan_f_daz: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: xorps %xmm1, %xmm1 -; CHECK-64-NEXT: ucomiss %xmm1, %xmm0 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_or_zero_or_nan_f_daz: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fldz +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: issubnormal_or_zero_or_nan_f_daz: +; X64: # %bb.0: +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: ucomiss %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243) ; 0xf0|0x3 = "subnormal|zero|nan" ret i1 %class } define i1 @issubnormal_or_zero_or_snan_f(float %x) { -; CHECK-32-LABEL: issubnormal_or_zero_or_snan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: movl %eax, %ecx -; CHECK-32-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 -; CHECK-32-NEXT: setl %dl -; CHECK-32-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: andb %dl, %cl -; CHECK-32-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_or_zero_or_snan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: movl %eax, %ecx -; CHECK-64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 -; CHECK-64-NEXT: setl %dl -; CHECK-64-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: andb %dl, %cl -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_or_zero_or_snan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X86-NEXT: setl %dl +; X86-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X86-NEXT: setge %cl +; X86-NEXT: andb %dl, %cl +; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: issubnormal_or_zero_or_snan_f: +; X64-GENERIC: # %bb.0: +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: movl %eax, %ecx +; X64-GENERIC-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setl %dl +; X64-GENERIC-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X64-GENERIC-NEXT: setge %cl +; X64-GENERIC-NEXT: andb %dl, %cl +; X64-GENERIC-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: sete %al +; X64-GENERIC-NEXT: orb %cl, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: issubnormal_or_zero_or_snan_f: +; X64-NDD: # %bb.0: +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax, %ecx # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000 +; X64-NDD-NEXT: setl %dl +; X64-NDD-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001 +; X64-NDD-NEXT: setge %cl +; X64-NDD-NEXT: andb %dl, %cl +; X64-NDD-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: sete %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 241) ; 0x90|0x1 = "subnormal|snan" ret i1 %class } define i1 @issubnormal_or_zero_or_qnan_f(float %x) { -; CHECK-32-LABEL: issubnormal_or_zero_or_qnan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: issubnormal_or_zero_or_qnan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: issubnormal_or_zero_or_qnan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: issubnormal_or_zero_or_qnan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 242) ; 0x90|0x2 = "subnormal|qnan" ret i1 %class } define i1 @not_issubnormal_or_nan_f(float %x) { -; CHECK-32-LABEL: not_issubnormal_or_nan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: sete %dl -; CHECK-32-NEXT: orb %cl, %dl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %dl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_or_nan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: testl %eax, %eax -; CHECK-64-NEXT: sete %dl -; CHECK-64-NEXT: orb %cl, %dl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %dl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_or_nan_f: +; X86: # %bb.0: +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %cl +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %dl +; X86-NEXT: orb %cl, %dl +; X86-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X86-NEXT: setb %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: not_issubnormal_or_nan_f: +; X64-GENERIC: # %bb.0: +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: sete %cl +; X64-GENERIC-NEXT: testl %eax, %eax +; X64-GENERIC-NEXT: sete %dl +; X64-GENERIC-NEXT: orb %cl, %dl +; X64-GENERIC-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-GENERIC-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-GENERIC-NEXT: setb %al +; X64-GENERIC-NEXT: orb %dl, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: not_issubnormal_or_nan_f: +; X64-NDD: # %bb.0: +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: sete %cl +; X64-NDD-NEXT: testl %eax, %eax +; X64-NDD-NEXT: sete %dl +; X64-NDD-NEXT: orb %dl, %cl +; X64-NDD-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-NDD-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-NDD-NEXT: setb %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 876) ; ~(0x90|0x3) = ~"subnormal|nan" ret i1 %class } define i1 @not_issubnormal_or_zero_or_nan_f(float %x) { -; CHECK-32-LABEL: not_issubnormal_or_zero_or_nan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: setne %cl -; CHECK-32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setl %al -; CHECK-32-NEXT: andb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_or_zero_or_nan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: setne %cl -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setl %al -; CHECK-64-NEXT: andb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_or_zero_or_nan_f: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: setne %cl +; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setl %al +; X86-NEXT: andb %cl, %al +; X86-NEXT: retl +; +; X64-LABEL: not_issubnormal_or_zero_or_nan_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setne %cl +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setl %al +; X64-NEXT: andb %cl, %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 780) ; ~(0xf0|0x3) = ~"subnormal|zero|nan" ret i1 %class } define i1 @not_issubnormal_or_zero_or_nan_f_daz(float %x) #0 { -; CHECK-32-LABEL: not_issubnormal_or_zero_or_nan_f_daz: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-32-NEXT: fldz -; CHECK-32-NEXT: fucompp -; CHECK-32-NEXT: fnstsw %ax -; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-32-NEXT: sahf -; CHECK-32-NEXT: setne %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_or_zero_or_nan_f_daz: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: xorps %xmm1, %xmm1 -; CHECK-64-NEXT: ucomiss %xmm1, %xmm0 -; CHECK-64-NEXT: setne %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_or_zero_or_nan_f_daz: +; X86: # %bb.0: +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: fldz +; X86-NEXT: fucompp +; X86-NEXT: fnstsw %ax +; X86-NEXT: # kill: def $ah killed $ah killed $ax +; X86-NEXT: sahf +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: not_issubnormal_or_zero_or_nan_f_daz: +; X64: # %bb.0: +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: ucomiss %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 780) ; ~(0xf0|0x3) = ~"subnormal|zero|nan" ret i1 %class } define i1 @not_issubnormal_or_zero_or_snan_f(float %x) { -; CHECK-32-LABEL: not_issubnormal_or_zero_or_snan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setge %cl -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %dl -; CHECK-32-NEXT: orb %cl, %dl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %dl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_or_zero_or_snan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setge %cl -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %dl -; CHECK-64-NEXT: orb %cl, %dl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %dl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_or_zero_or_snan_f: +; X86: # %bb.0: +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setge %cl +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %dl +; X86-NEXT: orb %cl, %dl +; X86-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X86-NEXT: setb %al +; X86-NEXT: orb %dl, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: not_issubnormal_or_zero_or_snan_f: +; X64-GENERIC: # %bb.0: +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setge %cl +; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: sete %dl +; X64-GENERIC-NEXT: orb %cl, %dl +; X64-GENERIC-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-GENERIC-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-GENERIC-NEXT: setb %al +; X64-GENERIC-NEXT: orb %dl, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: not_issubnormal_or_zero_or_snan_f: +; X64-NDD: # %bb.0: +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NDD-NEXT: setge %cl +; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: sete %dl +; X64-NDD-NEXT: orb %dl, %cl +; X64-NDD-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-NDD-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-NDD-NEXT: setb %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 782) ; ~(0x90|0x1) = ~"subnormal|snan" ret i1 %class } define i1 @not_issubnormal_or_zero_or_qnan_f(float %x) { -; CHECK-32-LABEL: not_issubnormal_or_zero_or_qnan_f: -; CHECK-32: # %bb.0: -; CHECK-32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-32-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-32-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-32-NEXT: setl %cl -; CHECK-32-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-32-NEXT: setge %dl -; CHECK-32-NEXT: andb %cl, %dl -; CHECK-32-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-32-NEXT: sete %cl -; CHECK-32-NEXT: orb %dl, %cl -; CHECK-32-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-32-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-32-NEXT: setb %al -; CHECK-32-NEXT: orb %cl, %al -; CHECK-32-NEXT: retl -; -; CHECK-64-LABEL: not_issubnormal_or_zero_or_qnan_f: -; CHECK-64: # %bb.0: -; CHECK-64-NEXT: movd %xmm0, %eax -; CHECK-64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; CHECK-64-NEXT: setl %cl -; CHECK-64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; CHECK-64-NEXT: setge %dl -; CHECK-64-NEXT: andb %cl, %dl -; CHECK-64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; CHECK-64-NEXT: sete %cl -; CHECK-64-NEXT: orb %dl, %cl -; CHECK-64-NEXT: addl $-8388608, %eax # imm = 0xFF800000 -; CHECK-64-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 -; CHECK-64-NEXT: setb %al -; CHECK-64-NEXT: orb %cl, %al -; CHECK-64-NEXT: retq +; X86-LABEL: not_issubnormal_or_zero_or_qnan_f: +; X86: # %bb.0: +; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-NEXT: setl %cl +; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-NEXT: setge %dl +; X86-NEXT: andb %cl, %dl +; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-NEXT: sete %cl +; X86-NEXT: orb %dl, %cl +; X86-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X86-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X86-NEXT: setb %al +; X86-NEXT: orb %cl, %al +; X86-NEXT: retl +; +; X64-GENERIC-LABEL: not_issubnormal_or_zero_or_qnan_f: +; X64-GENERIC: # %bb.0: +; X64-GENERIC-NEXT: movd %xmm0, %eax +; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GENERIC-NEXT: setl %cl +; X64-GENERIC-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-GENERIC-NEXT: setge %dl +; X64-GENERIC-NEXT: andb %cl, %dl +; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GENERIC-NEXT: sete %cl +; X64-GENERIC-NEXT: orb %dl, %cl +; X64-GENERIC-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-GENERIC-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-GENERIC-NEXT: setb %al +; X64-GENERIC-NEXT: orb %cl, %al +; X64-GENERIC-NEXT: retq +; +; X64-NDD-LABEL: not_issubnormal_or_zero_or_qnan_f: +; X64-NDD: # %bb.0: +; X64-NDD-NEXT: movd %xmm0, %eax +; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NDD-NEXT: setl %cl +; X64-NDD-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NDD-NEXT: setge %dl +; X64-NDD-NEXT: andb %dl, %cl +; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NDD-NEXT: sete %dl +; X64-NDD-NEXT: orb %dl, %cl +; X64-NDD-NEXT: addl $-8388608, %eax # imm = 0xFF800000 +; X64-NDD-NEXT: cmpl $2130706432, %eax # imm = 0x7F000000 +; X64-NDD-NEXT: setb %al +; X64-NDD-NEXT: orb %cl, %al +; X64-NDD-NEXT: retq %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 781) ; ~(0x90|0x2) = ~"subnormal|qnan" ret i1 %class } diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index 0adb9ddfc426a8..d71fd470651cf9 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -7771,6 +7771,234 @@ define <16 x float> @vpaddd_mask_test(<16 x float> %i, <16 x float> %j, <16 x i3 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %i ret <16 x float> %r } + +define <1 x float> @constrained_vector_tan_v1f32() #0 { +; CHECK-LABEL: constrained_vector_tan_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tan_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tan = call <1 x float> @llvm.experimental.constrained.tan.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %tan +} + +define <2 x double> @constrained_vector_tan_v2f64() #0 { +; CHECK-LABEL: constrained_vector_tan_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tan_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tan = call <2 x double> @llvm.experimental.constrained.tan.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %tan +} + +define <3 x float> @constrained_vector_tan_v3f32() #0 { +; CHECK-LABEL: constrained_vector_tan_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tan_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tan = call <3 x float> @llvm.experimental.constrained.tan.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %tan +} + +define <3 x double> @constrained_vector_tan_v3f64() #0 { +; CHECK-LABEL: constrained_vector_tan_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tan_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tan = call <3 x double> @llvm.experimental.constrained.tan.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %tan +} + +define <4 x double> @constrained_vector_tan_v4f64() #0 { +; CHECK-LABEL: constrained_vector_tan_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq tan@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tan_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tan@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tan = call <4 x double> @llvm.experimental.constrained.tan.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %tan +} + + + declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) attributes #0 = { strictfp } @@ -7786,6 +8014,7 @@ declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) @@ -7829,6 +8058,7 @@ declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x f declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) @@ -7882,6 +8112,8 @@ declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metada declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) @@ -7938,6 +8170,7 @@ declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/DebugInfo/debug_frame_symbol.ll b/llvm/test/DebugInfo/debug_frame_symbol.ll index fed080c2f74a88..56ac55e46a825e 100644 --- a/llvm/test/DebugInfo/debug_frame_symbol.ll +++ b/llvm/test/DebugInfo/debug_frame_symbol.ll @@ -22,7 +22,7 @@ entry: ret void, !dbg !12 } -attributes #0 = { noinline nounwind optnone ssp "frame-pointer"="non-leaf" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #0 = { noinline nounwind optnone ssp "frame-pointer"="non-leaf" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2, !3, !4, !5, !6} diff --git a/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test b/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test index 9e46570783c93c..5141ff6ce322cc 100644 --- a/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test +++ b/llvm/test/DebugInfo/symbolize-gnu-debuglink-no-realpath.test @@ -1,3 +1,4 @@ +# REQUIRES: shell # Ensure that no realpath assumptions are made about .gnu_debuglink paths. # Copy inputs to some other location with arbitrary names, with the original diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll index b92408a1bf1cd5..78275a16d3e8f7 100644 --- a/llvm/test/Feature/fp-intrinsics.ll +++ b/llvm/test/Feature/fp-intrinsics.ll @@ -151,6 +151,17 @@ entry: ret double %result } +; Verify that tan(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: ftan +; CHECK: call double @llvm.experimental.constrained.tan +define double @ftan() #0 { +entry: + %result = call double @llvm.experimental.constrained.tan.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + ; Verify that exp(42.0) isn't simplified when the rounding mode is unknown. ; CHECK-LABEL: f10 ; CHECK: call double @llvm.experimental.constrained.exp @@ -407,6 +418,7 @@ declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) diff --git a/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll b/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll index 3d67778049430f..64fcfdcc6127eb 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/calls-only-smallfn.ll @@ -24,5 +24,5 @@ entry: store i8 2, ptr %arrayidx1, align 1 ret void } -attributes #0 = { noinline nounwind optnone sanitize_address ssp uwtable(sync) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #0 = { noinline nounwind optnone sanitize_address ssp uwtable(sync) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } diff --git a/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll b/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll index fa491105e017d7..90e1ab35d9c36c 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/calls-only.ll @@ -51,5 +51,5 @@ entry: ; CHECK-NOT: store i64 -723401728380766731, ptr %126, align 1 ret void } -attributes #0 = { noinline nounwind optnone sanitize_address ssp uwtable(sync) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #0 = { noinline nounwind optnone sanitize_address ssp uwtable(sync) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll index a93f841649282e..bc61970ecc4b77 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -287,7 +287,7 @@ define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> [[A0:%.*]]) @@ -308,7 +308,7 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> [[A0:%.*]]) @@ -329,7 +329,7 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> [[A0:%.*]]) @@ -350,7 +350,7 @@ define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> [[A0:%.*]]) @@ -371,7 +371,7 @@ define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> [[A0:%.*]]) @@ -396,14 +396,14 @@ define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) #0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 6: -; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 7) +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 -18) ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[RES]] ; - %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 -18) ; <<8 x float>> [#uses=1] ret <8 x float> %res } declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone @@ -484,7 +484,7 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(ptr %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 6: ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx.ldu.dq.256(ptr [[A0]]) @@ -508,7 +508,7 @@ define <2 x double> @test_x86_avx_maskload_pd(ptr %a0, <2 x i64> %mask) #0 { ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] ; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 5: ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.maskload.pd(ptr [[A0:%.*]], <2 x i64> [[MASK:%.*]]) @@ -532,7 +532,7 @@ define <4 x double> @test_x86_avx_maskload_pd_256(ptr %a0, <4 x i64> %mask) #0 { ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] ; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 5: ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr [[A0:%.*]], <4 x i64> [[MASK:%.*]]) @@ -556,7 +556,7 @@ define <4 x float> @test_x86_avx_maskload_ps(ptr %a0, <4 x i32> %mask) #0 { ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] ; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 5: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.maskload.ps(ptr [[A0:%.*]], <4 x i32> [[MASK:%.*]]) @@ -580,7 +580,7 @@ define <8 x float> @test_x86_avx_maskload_ps_256(ptr %a0, <8 x i32> %mask) #0 { ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] ; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 5: ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr [[A0:%.*]], <8 x i32> [[MASK:%.*]]) @@ -608,7 +608,7 @@ define void @test_x86_avx_maskstore_pd(ptr %a0, <2 x i64> %mask, <2 x double> %a ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] ; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 7: ; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd(ptr [[A0:%.*]], <2 x i64> [[MASK:%.*]], <2 x double> [[A2:%.*]]) @@ -635,7 +635,7 @@ define void @test_x86_avx_maskstore_pd_256(ptr %a0, <4 x i64> %mask, <4 x double ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] ; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 7: ; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd.256(ptr [[A0:%.*]], <4 x i64> [[MASK:%.*]], <4 x double> [[A2:%.*]]) @@ -662,7 +662,7 @@ define void @test_x86_avx_maskstore_ps(ptr %a0, <4 x i32> %mask, <4 x float> %a2 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] ; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 7: ; CHECK-NEXT: call void @llvm.x86.avx.maskstore.ps(ptr [[A0:%.*]], <4 x i32> [[MASK:%.*]], <4 x float> [[A2:%.*]]) @@ -689,7 +689,7 @@ define void @test_x86_avx_maskstore_ps_256(ptr %a0, <8 x i32> %mask, <8 x float> ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] ; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 7: ; CHECK-NEXT: call void @llvm.x86.avx.maskstore.ps.256(ptr [[A0:%.*]], <8 x i32> [[MASK:%.*]], <8 x float> [[A2:%.*]]) @@ -773,7 +773,7 @@ define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> [[A0:%.*]]) @@ -794,7 +794,7 @@ define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> [[A0:%.*]]) @@ -886,7 +886,7 @@ define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> [[A0:%.*]], i32 7) @@ -907,7 +907,7 @@ define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> [[A0:%.*]], i32 7) @@ -945,7 +945,7 @@ define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 6: ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0:%.*]], <2 x i64> [[A1:%.*]]) @@ -970,7 +970,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 6: ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> [[A1:%.*]]) @@ -990,7 +990,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> ) @@ -1013,7 +1013,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) # ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 6: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A1:%.*]]) @@ -1031,7 +1031,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A2:%.*]] = load <4 x i32>, ptr [[A1:%.*]], align 16 @@ -1046,7 +1046,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] ; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 11: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A2]]) @@ -1072,7 +1072,7 @@ define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 6: ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]]) @@ -1348,14 +1348,14 @@ define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P:%.*]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr [[TMP7]], align 32 -; CHECK-NEXT: store <4 x i64> [[A3]], ptr [[P]], align 32, !nontemporal !1 +; CHECK-NEXT: store <4 x i64> [[A3]], ptr [[P]], align 32, !nontemporal [[META1:![0-9]+]] ; CHECK-NEXT: ret void ; %a2 = add <2 x i64> %a1, @@ -1373,14 +1373,14 @@ define void @movnt_ps(ptr %p, <8 x float> %a) nounwind #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P:%.*]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP7]], align 32 -; CHECK-NEXT: store <8 x float> [[A:%.*]], ptr [[P]], align 32, !nontemporal !1 +; CHECK-NEXT: store <8 x float> [[A:%.*]], ptr [[P]], align 32, !nontemporal [[META1]] ; CHECK-NEXT: ret void ; tail call void @llvm.x86.avx.movnt.ps.256(ptr %p, <8 x float> %a) nounwind @@ -1399,14 +1399,14 @@ define void @movnt_pd(ptr %p, <4 x double> %a1) nounwind #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P:%.*]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr [[TMP7]], align 32 -; CHECK-NEXT: store <4 x double> [[A2]], ptr [[P]], align 32, !nontemporal !1 +; CHECK-NEXT: store <4 x double> [[A2]], ptr [[P]], align 32, !nontemporal [[META1]] ; CHECK-NEXT: ret void ; %a2 = fadd <4 x double> %a1, diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll index 666c84e62da208..36f83750783350 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -52,14 +52,14 @@ define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) #0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 6: -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7) +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 -18) ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[RES]] ; - %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 -18) ; <<2 x double>> [#uses=1] ret <2 x double> %res } declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone @@ -77,14 +77,14 @@ define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) #0 { ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 6: -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 7) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 -18) ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; - %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 -18) ; <<4 x float>> [#uses=1] ret <4 x float> %res } declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone @@ -102,7 +102,7 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) #0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 6: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 17) @@ -128,7 +128,7 @@ define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 6: ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]], i8 7) @@ -145,10 +145,10 @@ define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A0:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 16 @@ -157,13 +157,13 @@ define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] ; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 11: ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> [[A0]], <16 x i8> [[A1:%.*]], i8 7) @@ -305,7 +305,7 @@ define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> [[A0:%.*]], i32 7) @@ -326,7 +326,7 @@ define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> [[A0:%.*]], i32 7) @@ -363,7 +363,7 @@ define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, ptr %a1) #0 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A1B:%.*]] = load <2 x double>, ptr [[A1:%.*]], align 16 @@ -390,7 +390,7 @@ define <4 x float> @test_x86_sse41_round_ss_load(<4 x float> %a0, ptr %a1) #0 { ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A1B:%.*]] = load <4 x float>, ptr [[A1:%.*]], align 16 diff --git a/llvm/test/LTO/X86/triple-init2.ll b/llvm/test/LTO/X86/triple-init2.ll index 2638180ef33c66..bc5ecf9785a287 100644 --- a/llvm/test/LTO/X86/triple-init2.ll +++ b/llvm/test/LTO/X86/triple-init2.ll @@ -11,21 +11,20 @@ ; RUN: llvm-lto2 run -r %t1,main,plx -o %t2 %t1 ; RUN: llvm-nm %t2.1 | FileCheck %s -; We check that LTO will be aware of target triple and prevent exp2 to ldexpf +; We check that LTO will be aware of target triple and prevent pow to exp10 ; transformation on Windows. -; CHECK: U exp2f +; CHECK: U powf target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.11.0" +declare float @llvm.pow.f32(float, float) + define dso_local i32 @main(i32 %argc, ptr nocapture readnone %argv) local_unnamed_addr { entry: %conv = sitofp i32 %argc to float - %exp2 = tail call float @llvm.exp2.f32(float %conv) + %exp2 = tail call float @llvm.pow.f32(float 10.0, float %conv) %conv1 = fptosi float %exp2 to i32 ret i32 %conv1 } -; Function Attrs: nounwind readnone speculatable -declare float @llvm.exp2.f32(float) - diff --git a/llvm/test/MC/RISCV/rvv/vsetvl.s b/llvm/test/MC/RISCV/rvv/vsetvl.s index c9197d8917a472..2741def0eeff21 100644 --- a/llvm/test/MC/RISCV/rvv/vsetvl.s +++ b/llvm/test/MC/RISCV/rvv/vsetvl.s @@ -1,5 +1,7 @@ # RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+zve32x %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ZVE32X # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR # RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ @@ -71,18 +73,21 @@ vsetvli a2, a0, e32, m8, ta, ma vsetvli a2, a0, e32, mf2, ta, ma # CHECK-INST: vsetvli a2, a0, e32, mf2, ta, ma +# CHECK-ZVE32X: :[[#@LINE-2]]:17: warning: use of vtype encodings with SEW > 16 and LMUL == mf2 may not be compatible with all RVV implementations{{$}} # CHECK-ENCODING: [0x57,0x76,0x75,0x0d] # CHECK-ERROR: instruction requires the following: 'V' (Vector Extension for Application Processors), 'Zve32x' (Vector Extensions for Embedded Processors){{$}} # CHECK-UNKNOWN: 0d757657 vsetvli a2, a0, e32, mf4, ta, ma # CHECK-INST: vsetvli a2, a0, e32, mf4, ta, ma +# CHECK-ZVE32X: :[[#@LINE-2]]:17: warning: use of vtype encodings with SEW > 8 and LMUL == mf4 may not be compatible with all RVV implementations{{$}} # CHECK-ENCODING: [0x57,0x76,0x65,0x0d] # CHECK-ERROR: instruction requires the following: 'V' (Vector Extension for Application Processors), 'Zve32x' (Vector Extensions for Embedded Processors){{$}} # CHECK-UNKNOWN: 0d657657 vsetvli a2, a0, e32, mf8, ta, ma # CHECK-INST: vsetvli a2, a0, e32, mf8, ta, ma +# CHECK-ZVE32X: :[[#@LINE-2]]:22: warning: use of vtype encodings with LMUL < SEWMIN/ELEN == mf4 is reserved{{$}} # CHECK-ENCODING: [0x57,0x76,0x55,0x0d] # CHECK-ERROR: instruction requires the following: 'V' (Vector Extension for Application Processors), 'Zve32x' (Vector Extensions for Embedded Processors){{$}} # CHECK-UNKNOWN: 0d557657 diff --git a/llvm/test/Other/lit-unicode.txt b/llvm/test/Other/lit-unicode.txt index b375fc505b737c..2f40001451688a 100644 --- a/llvm/test/Other/lit-unicode.txt +++ b/llvm/test/Other/lit-unicode.txt @@ -1,4 +1,5 @@ FIXME: See if we can fix this in lit by using Unicode strings. +REQUIRES: shell RUN: echo "ようこそ" | FileCheck %s CHECK: {{^}}ようこそ{{$}} diff --git a/llvm/test/Transforms/Inline/inline-switch-default.ll b/llvm/test/Transforms/Inline/inline-switch-default.ll index 288d414fe0e0eb..1cc2f2ef10f569 100644 --- a/llvm/test/Transforms/Inline/inline-switch-default.ll +++ b/llvm/test/Transforms/Inline/inline-switch-default.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt %s -S -passes=inline -inline-threshold=16 -min-jump-table-entries=4 | FileCheck %s -check-prefix=LOOKUPTABLE ; RUN: opt %s -S -passes=inline -inline-threshold=11 -min-jump-table-entries=5 | FileCheck %s -check-prefix=SWITCH -; REQUIRES: x86_64-linux +; REQUIRES: x86-registered-target target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/InstCombine/2008-05-31-AddBool.ll b/llvm/test/Transforms/InstCombine/2008-05-31-AddBool.ll index 9bc02603a2699c..707c34be50124e 100644 --- a/llvm/test/Transforms/InstCombine/2008-05-31-AddBool.ll +++ b/llvm/test/Transforms/InstCombine/2008-05-31-AddBool.ll @@ -1,9 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s ; PR2389 -; CHECK: xor - define i1 @test(i1 %a, i1 %b) { - %A = add i1 %a, %b - ret i1 %A +; CHECK-LABEL: define i1 @test( +; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = xor i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[R]] +; + %r = add i1 %a, %b + ret i1 %r } diff --git a/llvm/test/Transforms/InstCombine/2008-05-31-Bools.ll b/llvm/test/Transforms/InstCombine/2008-05-31-Bools.ll index e7dd74b06d9447..fa7c5424c203f6 100644 --- a/llvm/test/Transforms/InstCombine/2008-05-31-Bools.ll +++ b/llvm/test/Transforms/InstCombine/2008-05-31-Bools.ll @@ -1,24 +1,40 @@ -; RUN: opt < %s -passes=instcombine -S > %t -; RUN: grep "xor" %t -; RUN: grep "and" %t -; RUN: not grep "div" %t +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s define i1 @foo1(i1 %a, i1 %b) { - %A = sub i1 %a, %b - ret i1 %A +; CHECK-LABEL: define i1 @foo1( +; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = xor i1 [[B]], [[A]] +; CHECK-NEXT: ret i1 [[R]] +; + %r = sub i1 %a, %b + ret i1 %r } define i1 @foo2(i1 %a, i1 %b) { - %A = mul i1 %a, %b - ret i1 %A +; CHECK-LABEL: define i1 @foo2( +; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = and i1 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[R]] +; + %r = mul i1 %a, %b + ret i1 %r } define i1 @foo3(i1 %a, i1 %b) { - %A = udiv i1 %a, %b - ret i1 %A +; CHECK-LABEL: define i1 @foo3( +; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) { +; CHECK-NEXT: ret i1 [[A]] +; + %r = udiv i1 %a, %b + ret i1 %r } define i1 @foo4(i1 %a, i1 %b) { - %A = sdiv i1 %a, %b - ret i1 %A +; CHECK-LABEL: define i1 @foo4( +; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) { +; CHECK-NEXT: ret i1 [[A]] +; + %r = sdiv i1 %a, %b + ret i1 %r } diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll index 14379ebf3a9052..5a9767a64a2ced 100644 --- a/llvm/test/Transforms/InstCombine/and-compare.ll +++ b/llvm/test/Transforms/InstCombine/and-compare.ll @@ -4,6 +4,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +declare void @use.i8(i8) + ; Should be optimized to one and. define i1 @test1(i32 %a, i32 %b) { ; CHECK-LABEL: @test1( @@ -75,3 +77,98 @@ define <2 x i1> @test3vec(<2 x i64> %A) { ret <2 x i1> %cmp } +define i1 @test_ne_cp2(i8 %x, i8 %yy) { +; CHECK-LABEL: @test_ne_cp2( +; CHECK-NEXT: [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -16 +; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[X]], 16 +; CHECK-NEXT: call void @use.i8(i8 [[AND_X_NEG_Y]]) +; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]]) +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], 31 +; CHECK-NEXT: ret i1 [[R]] +; + %and_x_neg_y = and i8 %x, -16 + %and_x_y = and i8 %x, 16 + call void @use.i8(i8 %and_x_neg_y) + call void @use.i8(i8 %and_x_y) + %r = icmp ne i8 %and_x_neg_y, %and_x_y + ret i1 %r +} + +define i1 @test_ne_cp2_2(i8 %x, i8 %yy) { +; CHECK-LABEL: @test_ne_cp2_2( +; CHECK-NEXT: [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -4 +; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[X]], 4 +; CHECK-NEXT: call void @use.i8(i8 [[AND_X_NEG_Y]]) +; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]]) +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[X]], 8 +; CHECK-NEXT: ret i1 [[R]] +; + %and_x_neg_y = and i8 %x, -4 + %and_x_y = and i8 %x, 4 + call void @use.i8(i8 %and_x_neg_y) + call void @use.i8(i8 %and_x_y) + %r = icmp eq i8 %and_x_y, %and_x_neg_y + ret i1 %r +} + +define i1 @test_ne_cp2_other_okay_all_ones(i8 %x, i8 %yy) { +; CHECK-LABEL: @test_ne_cp2_other_okay_all_ones( +; CHECK-NEXT: [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -17 +; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[X]], 16 +; CHECK-NEXT: call void @use.i8(i8 [[AND_X_NEG_Y]]) +; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]]) +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %and_x_neg_y = and i8 %x, -17 + %and_x_y = and i8 %x, 16 + call void @use.i8(i8 %and_x_neg_y) + call void @use.i8(i8 %and_x_y) + %r = icmp ne i8 %and_x_neg_y, %and_x_y + ret i1 %r +} + +define i1 @test_ne_cp2_other_fail2(i8 %x, i8 %yy) { +; CHECK-LABEL: @test_ne_cp2_other_fail2( +; CHECK-NEXT: [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -16 +; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[X]], 17 +; CHECK-NEXT: call void @use.i8(i8 [[AND_X_NEG_Y]]) +; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]]) +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND_X_NEG_Y]], [[AND_X_Y]] +; CHECK-NEXT: ret i1 [[R]] +; + %and_x_neg_y = and i8 %x, -16 + %and_x_y = and i8 %x, 17 + call void @use.i8(i8 %and_x_neg_y) + call void @use.i8(i8 %and_x_y) + %r = icmp ne i8 %and_x_neg_y, %and_x_y + ret i1 %r +} + +define i1 @test_ne_cp2_other_okay(i8 %x, i8 %yy) { +; CHECK-LABEL: @test_ne_cp2_other_okay( +; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[X:%.*]], 16 +; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]]) +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %and_x_neg_y = and i8 %x, -17 + %and_x_y = and i8 %x, 16 + call void @use.i8(i8 %and_x_y) + %r = icmp ne i8 %and_x_neg_y, %and_x_y + ret i1 %r +} + +define i1 @test_ne_cp2_other_okay2(i8 %x, i8 %yy) { +; CHECK-LABEL: @test_ne_cp2_other_okay2( +; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[X:%.*]], 16 +; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]]) +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %and_x_neg_y = and i8 %x, -17 + %and_x_y = and i8 %x, 16 + call void @use.i8(i8 %and_x_y) + %r = icmp ne i8 %and_x_y, %and_x_neg_y + ret i1 %r +} diff --git a/llvm/test/Transforms/InstCombine/exp2-1.ll b/llvm/test/Transforms/InstCombine/exp2-1.ll index 2dff0b08ecf97b..d8bd0a4d8159db 100644 --- a/llvm/test/Transforms/InstCombine/exp2-1.ll +++ b/llvm/test/Transforms/InstCombine/exp2-1.ll @@ -242,8 +242,8 @@ define double @test_simplify9(i8 zeroext %x) { ; NOLDEXPF-NEXT: ret double [[RET]] ; ; NOLDEXP-LABEL: @test_simplify9( -; NOLDEXP-NEXT: [[CONV:%.*]] = uitofp i8 [[X:%.*]] to double -; NOLDEXP-NEXT: [[RET:%.*]] = call double @llvm.exp2.f64(double [[CONV]]) +; NOLDEXP-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 +; NOLDEXP-NEXT: [[RET:%.*]] = call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 [[TMP1]]) ; NOLDEXP-NEXT: ret double [[RET]] ; %conv = uitofp i8 %x to double @@ -263,13 +263,13 @@ define float @test_simplify10(i8 zeroext %x) { ; LDEXP16-NEXT: ret float [[RET]] ; ; NOLDEXPF-LABEL: @test_simplify10( -; NOLDEXPF-NEXT: [[CONV:%.*]] = uitofp i8 [[X:%.*]] to float -; NOLDEXPF-NEXT: [[RET:%.*]] = call float @llvm.exp2.f32(float [[CONV]]) +; NOLDEXPF-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 +; NOLDEXPF-NEXT: [[RET:%.*]] = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[TMP1]]) ; NOLDEXPF-NEXT: ret float [[RET]] ; ; NOLDEXP-LABEL: @test_simplify10( -; NOLDEXP-NEXT: [[CONV:%.*]] = uitofp i8 [[X:%.*]] to float -; NOLDEXP-NEXT: [[RET:%.*]] = call float @llvm.exp2.f32(float [[CONV]]) +; NOLDEXP-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 +; NOLDEXP-NEXT: [[RET:%.*]] = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[TMP1]]) ; NOLDEXP-NEXT: ret float [[RET]] ; %conv = uitofp i8 %x to float @@ -289,13 +289,13 @@ define float @sitofp_scalar_intrinsic_with_FMF(i8 %x) { ; LDEXP16-NEXT: ret float [[R]] ; ; NOLDEXPF-LABEL: @sitofp_scalar_intrinsic_with_FMF( -; NOLDEXPF-NEXT: [[S:%.*]] = sitofp i8 [[X:%.*]] to float -; NOLDEXPF-NEXT: [[R:%.*]] = tail call nnan float @llvm.exp2.f32(float [[S]]) +; NOLDEXPF-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 +; NOLDEXPF-NEXT: [[R:%.*]] = tail call nnan float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[TMP1]]) ; NOLDEXPF-NEXT: ret float [[R]] ; ; NOLDEXP-LABEL: @sitofp_scalar_intrinsic_with_FMF( -; NOLDEXP-NEXT: [[S:%.*]] = sitofp i8 [[X:%.*]] to float -; NOLDEXP-NEXT: [[R:%.*]] = tail call nnan float @llvm.exp2.f32(float [[S]]) +; NOLDEXP-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 +; NOLDEXP-NEXT: [[R:%.*]] = tail call nnan float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[TMP1]]) ; NOLDEXP-NEXT: ret float [[R]] ; %s = sitofp i8 %x to float @@ -317,9 +317,14 @@ define <2 x float> @sitofp_vector_intrinsic_with_FMF(<2 x i8> %x) { ; LDEXP16-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> , <2 x i16> [[TMP1]]) ; LDEXP16-NEXT: ret <2 x float> [[R]] ; +; NOLDEXPF-LABEL: @sitofp_vector_intrinsic_with_FMF( +; NOLDEXPF-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32> +; NOLDEXPF-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; NOLDEXPF-NEXT: ret <2 x float> [[R]] +; ; NOLDEXP-LABEL: @sitofp_vector_intrinsic_with_FMF( -; NOLDEXP-NEXT: [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float> -; NOLDEXP-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]]) +; NOLDEXP-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32> +; NOLDEXP-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) ; NOLDEXP-NEXT: ret <2 x float> [[R]] ; %s = sitofp <2 x i8> %x to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll index 6e5be5a19d6da5..969020140cb339 100644 --- a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll +++ b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll @@ -1,19 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S -passes=instcombine %s | FileCheck -check-prefixes=CHECK,LDEXP %s -; RUN: opt -S -passes=instcombine -disable-builtin=ldexpf -disable-builtin=ldexp -disable-builtin=ldexpl %s | FileCheck -check-prefixes=CHECK,NOLDEXP %s +; RUN: opt -S -passes=instcombine %s | FileCheck %s +; RUN: opt -S -passes=instcombine -disable-builtin=ldexpf -disable-builtin=ldexp -disable-builtin=ldexpl %s | FileCheck %s define float @exp2_f32_sitofp_i8(i8 %x) { -; LDEXP-LABEL: define float @exp2_f32_sitofp_i8( -; LDEXP-SAME: i8 [[X:%.*]]) { -; LDEXP-NEXT: [[TMP1:%.*]] = sext i8 [[X]] to i32 -; LDEXP-NEXT: [[LDEXPF:%.*]] = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[TMP1]]) -; LDEXP-NEXT: ret float [[LDEXPF]] -; -; NOLDEXP-LABEL: define float @exp2_f32_sitofp_i8( -; NOLDEXP-SAME: i8 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i8 [[X]] to float -; NOLDEXP-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[ITOFP]]) -; NOLDEXP-NEXT: ret float [[EXP2]] +; CHECK-LABEL: define float @exp2_f32_sitofp_i8( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X]] to i32 +; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: ret float [[EXP2]] ; %itofp = sitofp i8 %x to float %exp2 = call float @llvm.exp2.f32(float %itofp) @@ -21,17 +15,11 @@ define float @exp2_f32_sitofp_i8(i8 %x) { } define float @exp2_f32_sitofp_i8_flags(i8 %x) { -; LDEXP-LABEL: define float @exp2_f32_sitofp_i8_flags( -; LDEXP-SAME: i8 [[X:%.*]]) { -; LDEXP-NEXT: [[TMP1:%.*]] = sext i8 [[X]] to i32 -; LDEXP-NEXT: [[LDEXPF:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[TMP1]]) -; LDEXP-NEXT: ret float [[LDEXPF]] -; -; NOLDEXP-LABEL: define float @exp2_f32_sitofp_i8_flags( -; NOLDEXP-SAME: i8 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i8 [[X]] to float -; NOLDEXP-NEXT: [[EXP2:%.*]] = call nnan ninf float @llvm.exp2.f32(float [[ITOFP]]) -; NOLDEXP-NEXT: ret float [[EXP2]] +; CHECK-LABEL: define float @exp2_f32_sitofp_i8_flags( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X]] to i32 +; CHECK-NEXT: [[EXP2:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: ret float [[EXP2]] ; %itofp = sitofp i8 %x to float %exp2 = call nnan ninf float @llvm.exp2.f32(float %itofp) @@ -39,17 +27,11 @@ define float @exp2_f32_sitofp_i8_flags(i8 %x) { } define <2 x float> @exp2_v2f32_sitofp_v2i8(<2 x i8> %x) { -; LDEXP-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8( -; LDEXP-SAME: <2 x i8> [[X:%.*]]) { -; LDEXP-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32> -; LDEXP-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) -; LDEXP-NEXT: ret <2 x float> [[EXP2]] -; -; NOLDEXP-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8( -; NOLDEXP-SAME: <2 x i8> [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i8> [[X]] to <2 x float> -; NOLDEXP-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[ITOFP]]) -; NOLDEXP-NEXT: ret <2 x float> [[EXP2]] +; CHECK-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8( +; CHECK-SAME: <2 x i8> [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32> +; CHECK-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %itofp = sitofp <2 x i8> %x to <2 x float> %exp2 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %itofp) @@ -57,17 +39,11 @@ define <2 x float> @exp2_v2f32_sitofp_v2i8(<2 x i8> %x) { } define float @exp2_f32_uitofp_i8(i8 %x) { -; LDEXP-LABEL: define float @exp2_f32_uitofp_i8( -; LDEXP-SAME: i8 [[X:%.*]]) { -; LDEXP-NEXT: [[TMP1:%.*]] = zext i8 [[X]] to i32 -; LDEXP-NEXT: [[LDEXPF:%.*]] = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[TMP1]]) -; LDEXP-NEXT: ret float [[LDEXPF]] -; -; NOLDEXP-LABEL: define float @exp2_f32_uitofp_i8( -; NOLDEXP-SAME: i8 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = uitofp i8 [[X]] to float -; NOLDEXP-NEXT: [[EXP2:%.*]] = call float @llvm.exp2.f32(float [[ITOFP]]) -; NOLDEXP-NEXT: ret float [[EXP2]] +; CHECK-LABEL: define float @exp2_f32_uitofp_i8( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X]] to i32 +; CHECK-NEXT: [[EXP2:%.*]] = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: ret float [[EXP2]] ; %itofp = uitofp i8 %x to float %exp2 = call float @llvm.exp2.f32(float %itofp) @@ -77,8 +53,8 @@ define float @exp2_f32_uitofp_i8(i8 %x) { define half @exp2_f16_sitofp_i8(i8 %x) { ; CHECK-LABEL: define half @exp2_f16_sitofp_i8( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[ITOFP:%.*]] = sitofp i8 [[X]] to half -; CHECK-NEXT: [[EXP2:%.*]] = call half @llvm.exp2.f16(half [[ITOFP]]) +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X]] to i32 +; CHECK-NEXT: [[EXP2:%.*]] = call half @llvm.ldexp.f16.i32(half 0xH3C00, i32 [[TMP1]]) ; CHECK-NEXT: ret half [[EXP2]] ; %itofp = sitofp i8 %x to half @@ -87,17 +63,11 @@ define half @exp2_f16_sitofp_i8(i8 %x) { } define double @exp2_f64_sitofp_i8(i8 %x) { -; LDEXP-LABEL: define double @exp2_f64_sitofp_i8( -; LDEXP-SAME: i8 [[X:%.*]]) { -; LDEXP-NEXT: [[TMP1:%.*]] = sext i8 [[X]] to i32 -; LDEXP-NEXT: [[LDEXP:%.*]] = call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 [[TMP1]]) -; LDEXP-NEXT: ret double [[LDEXP]] -; -; NOLDEXP-LABEL: define double @exp2_f64_sitofp_i8( -; NOLDEXP-SAME: i8 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i8 [[X]] to double -; NOLDEXP-NEXT: [[EXP2:%.*]] = call double @llvm.exp2.f64(double [[ITOFP]]) -; NOLDEXP-NEXT: ret double [[EXP2]] +; CHECK-LABEL: define double @exp2_f64_sitofp_i8( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X]] to i32 +; CHECK-NEXT: [[EXP2:%.*]] = call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 [[TMP1]]) +; CHECK-NEXT: ret double [[EXP2]] ; %itofp = sitofp i8 %x to double %exp2 = call double @llvm.exp2.f64(double %itofp) @@ -105,17 +75,11 @@ define double @exp2_f64_sitofp_i8(i8 %x) { } define fp128 @exp2_fp128_sitofp_i8(i8 %x) { -; LDEXP-LABEL: define fp128 @exp2_fp128_sitofp_i8( -; LDEXP-SAME: i8 [[X:%.*]]) { -; LDEXP-NEXT: [[TMP1:%.*]] = sext i8 [[X]] to i32 -; LDEXP-NEXT: [[LDEXPL:%.*]] = call fp128 @llvm.ldexp.f128.i32(fp128 0xL00000000000000003FFF000000000000, i32 [[TMP1]]) -; LDEXP-NEXT: ret fp128 [[LDEXPL]] -; -; NOLDEXP-LABEL: define fp128 @exp2_fp128_sitofp_i8( -; NOLDEXP-SAME: i8 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i8 [[X]] to fp128 -; NOLDEXP-NEXT: [[EXP2:%.*]] = call fp128 @llvm.exp2.f128(fp128 [[ITOFP]]) -; NOLDEXP-NEXT: ret fp128 [[EXP2]] +; CHECK-LABEL: define fp128 @exp2_fp128_sitofp_i8( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X]] to i32 +; CHECK-NEXT: [[EXP2:%.*]] = call fp128 @llvm.ldexp.f128.i32(fp128 0xL00000000000000003FFF000000000000, i32 [[TMP1]]) +; CHECK-NEXT: ret fp128 [[EXP2]] ; %itofp = sitofp i8 %x to fp128 %exp2 = call fp128 @llvm.exp2.fp128(fp128 %itofp) @@ -123,17 +87,11 @@ define fp128 @exp2_fp128_sitofp_i8(i8 %x) { } define @exp2_nxv4f32_sitofp_i8( %x) { -; LDEXP-LABEL: define @exp2_nxv4f32_sitofp_i8( -; LDEXP-SAME: [[X:%.*]]) { -; LDEXP-NEXT: [[TMP1:%.*]] = sext [[X]] to -; LDEXP-NEXT: [[EXP2:%.*]] = call @llvm.ldexp.nxv4f32.nxv4i32( shufflevector ( insertelement ( poison, float 1.000000e+00, i64 0), poison, zeroinitializer), [[TMP1]]) -; LDEXP-NEXT: ret [[EXP2]] -; -; NOLDEXP-LABEL: define @exp2_nxv4f32_sitofp_i8( -; NOLDEXP-SAME: [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp [[X]] to -; NOLDEXP-NEXT: [[EXP2:%.*]] = call @llvm.exp2.nxv4f32( [[ITOFP]]) -; NOLDEXP-NEXT: ret [[EXP2]] +; CHECK-LABEL: define @exp2_nxv4f32_sitofp_i8( +; CHECK-SAME: [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sext [[X]] to +; CHECK-NEXT: [[EXP2:%.*]] = call @llvm.ldexp.nxv4f32.nxv4i32( shufflevector ( insertelement ( poison, float 1.000000e+00, i64 0), poison, zeroinitializer), [[TMP1]]) +; CHECK-NEXT: ret [[EXP2]] ; %itofp = sitofp %x to %exp2 = call @llvm.exp2.nxv4f32( %itofp) diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll index 304874645d5dc9..7ff111c42a9e06 100644 --- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll +++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll @@ -399,3 +399,13 @@ define i1 @icmp_eq_x_invertable_y2(i8 %x, i8 %y) { %r = icmp eq i8 %yy, %or ret i1 %r } + +define i1 @PR38139(i8 %arg) { +; CHECK-LABEL: @PR38139( +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[ARG:%.*]], -64 +; CHECK-NEXT: ret i1 [[R]] +; + %masked = or i8 %arg, 192 + %r = icmp ne i8 %masked, %arg + ret i1 %r +} diff --git a/llvm/test/Transforms/InstCombine/ldexp-ext.ll b/llvm/test/Transforms/InstCombine/ldexp-ext.ll new file mode 100644 index 00000000000000..4608553eb88743 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/ldexp-ext.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define float @ldexp_zext_float(float %x, i1 %bool) { +; CHECK-LABEL: @ldexp_zext_float( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[BOOL:%.*]], float 2.000000e+00, float 1.000000e+00 +; CHECK-NEXT: [[LDEXP:%.*]] = fmul float [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret float [[LDEXP]] +; + %zext = zext i1 %bool to i32 + %ldexp = call float @llvm.ldexp.f32.i32(float %x, i32 %zext) + ret float %ldexp +} + +define float @ldexp_zext_float_negative(float %x, i8 %y) { +; CHECK-LABEL: @ldexp_zext_float_negative( +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[Y:%.*]] to i32 +; CHECK-NEXT: [[LDEXP:%.*]] = call float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[ZEXT]]) +; CHECK-NEXT: ret float [[LDEXP]] +; + %zext = zext i8 %y to i32 + %ldexp = call float @llvm.ldexp.f32.i32(float %x, i32 %zext) + ret float %ldexp +} + +define double @ldexp_zext_double(double %x, i1 %bool) { +; CHECK-LABEL: @ldexp_zext_double( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[BOOL:%.*]], double 2.000000e+00, double 1.000000e+00 +; CHECK-NEXT: [[LDEXP:%.*]] = fmul double [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret double [[LDEXP]] +; + %zext = zext i1 %bool to i32 + %ldexp = call double @llvm.ldexp.f64.i32(double %x, i32 %zext) + ret double %ldexp +} + +define double @ldexp_zext_double_fast_math(double %x, i1 %bool) { +; CHECK-LABEL: @ldexp_zext_double_fast_math( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[BOOL:%.*]], double 2.000000e+00, double 1.000000e+00 +; CHECK-NEXT: [[LDEXP:%.*]] = fmul reassoc double [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret double [[LDEXP]] +; + %zext = zext i1 %bool to i32 + %ldexp = call reassoc double @llvm.ldexp.f64.i32(double %x, i32 %zext) + ret double %ldexp +} + +define <2 x float> @ldexp_zext_float_vector(<2 x float> %x, <2 x i1> %bool) { +; CHECK-LABEL: @ldexp_zext_float_vector( +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> , <2 x float> +; CHECK-NEXT: [[LDEXP:%.*]] = fmul <2 x float> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret <2 x float> [[LDEXP]] +; + %zext = zext <2 x i1> %bool to <2 x i32> + %ldexp = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> %zext) + ret <2 x float> %ldexp +} + +define float @ldexp_sext_float(float %x, i1 %bool) { +; CHECK-LABEL: @ldexp_sext_float( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[BOOL:%.*]], float 5.000000e-01, float 1.000000e+00 +; CHECK-NEXT: [[LDEXP:%.*]] = fmul float [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret float [[LDEXP]] +; + %sext = sext i1 %bool to i32 + %ldexp = call float @llvm.ldexp.f32.i32(float %x, i32 %sext) + ret float %ldexp +} + +define float @ldexp_sext_float_negative(float %x, i8 %y) { +; CHECK-LABEL: @ldexp_sext_float_negative( +; CHECK-NEXT: [[SEXT:%.*]] = sext i8 [[Y:%.*]] to i32 +; CHECK-NEXT: [[LDEXP:%.*]] = call float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[SEXT]]) +; CHECK-NEXT: ret float [[LDEXP]] +; + %sext = sext i8 %y to i32 + %ldexp = call float @llvm.ldexp.f32.i32(float %x, i32 %sext) + ret float %ldexp +} + +define double @ldexp_sext_double(double %x, i1 %bool) { +; CHECK-LABEL: @ldexp_sext_double( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[BOOL:%.*]], double 5.000000e-01, double 1.000000e+00 +; CHECK-NEXT: [[LDEXP:%.*]] = fmul double [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret double [[LDEXP]] +; + %sext = sext i1 %bool to i32 + %ldexp = call double @llvm.ldexp.f64.i32(double %x, i32 %sext) + ret double %ldexp +} + +define double @ldexp_sext_double_fast_math(double %x, i1 %bool) { +; CHECK-LABEL: @ldexp_sext_double_fast_math( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[BOOL:%.*]], double 5.000000e-01, double 1.000000e+00 +; CHECK-NEXT: [[LDEXP:%.*]] = fmul reassoc double [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret double [[LDEXP]] +; + %sext = sext i1 %bool to i32 + %ldexp = call reassoc double @llvm.ldexp.f64.i32(double %x, i32 %sext) + ret double %ldexp +} + +define <2 x float> @ldexp_sext_float_vector(<2 x float> %x, <2 x i1> %bool) { +; CHECK-LABEL: @ldexp_sext_float_vector( +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> , <2 x float> +; CHECK-NEXT: [[LDEXP:%.*]] = fmul <2 x float> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: ret <2 x float> [[LDEXP]] +; + %sext = sext <2 x i1> %bool to <2 x i32> + %ldexp = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> %sext) + ret <2 x float> %ldexp +} diff --git a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll index b61f8809bd2598..cb51e920932635 100644 --- a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll +++ b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll @@ -5,16 +5,10 @@ define float @pow_sitofp_f32_const_base_2(i32 %x) { -; LDEXP-LABEL: define float @pow_sitofp_f32_const_base_2( -; LDEXP-SAME: i32 [[X:%.*]]) { -; LDEXP-NEXT: [[LDEXPF:%.*]] = tail call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[X]]) -; LDEXP-NEXT: ret float [[LDEXPF]] -; -; NOLDEXP-LABEL: define float @pow_sitofp_f32_const_base_2( -; NOLDEXP-SAME: i32 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i32 [[X]] to float -; NOLDEXP-NEXT: [[POW:%.*]] = tail call float @llvm.exp2.f32(float [[ITOFP]]) -; NOLDEXP-NEXT: ret float [[POW]] +; CHECK-LABEL: define float @pow_sitofp_f32_const_base_2( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[X]]) +; CHECK-NEXT: ret float [[EXP2]] ; %itofp = sitofp i32 %x to float %pow = tail call float @llvm.pow.f32(float 2.000000e+00, float %itofp) @@ -22,16 +16,10 @@ define float @pow_sitofp_f32_const_base_2(i32 %x) { } define float @pow_sitofp_f32_const_base_2__flags(i32 %x) { -; LDEXP-LABEL: define float @pow_sitofp_f32_const_base_2__flags( -; LDEXP-SAME: i32 [[X:%.*]]) { -; LDEXP-NEXT: [[LDEXPF:%.*]] = tail call nnan nsz float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[X]]) -; LDEXP-NEXT: ret float [[LDEXPF]] -; -; NOLDEXP-LABEL: define float @pow_sitofp_f32_const_base_2__flags( -; NOLDEXP-SAME: i32 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i32 [[X]] to float -; NOLDEXP-NEXT: [[EXP2:%.*]] = tail call nnan nsz float @llvm.exp2.f32(float [[ITOFP]]) -; NOLDEXP-NEXT: ret float [[EXP2]] +; CHECK-LABEL: define float @pow_sitofp_f32_const_base_2__flags( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call nnan nsz float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[X]]) +; CHECK-NEXT: ret float [[EXP2]] ; %itofp = sitofp i32 %x to float %pow = tail call nsz nnan float @llvm.pow.f32(float 2.000000e+00, float %itofp) @@ -115,16 +103,10 @@ define float @pow_sitofp_f32_const_base_16(i32 %x) { } define double @pow_sitofp_f64_const_base_2(i32 %x) { -; LDEXP-LABEL: define double @pow_sitofp_f64_const_base_2( -; LDEXP-SAME: i32 [[X:%.*]]) { -; LDEXP-NEXT: [[LDEXP:%.*]] = tail call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 [[X]]) -; LDEXP-NEXT: ret double [[LDEXP]] -; -; NOLDEXP-LABEL: define double @pow_sitofp_f64_const_base_2( -; NOLDEXP-SAME: i32 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i32 [[X]] to double -; NOLDEXP-NEXT: [[POW:%.*]] = tail call double @llvm.exp2.f64(double [[ITOFP]]) -; NOLDEXP-NEXT: ret double [[POW]] +; CHECK-LABEL: define double @pow_sitofp_f64_const_base_2( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 [[X]]) +; CHECK-NEXT: ret double [[EXP2]] ; %itofp = sitofp i32 %x to double %pow = tail call double @llvm.pow.f64(double 2.000000e+00, double %itofp) @@ -144,16 +126,10 @@ define half @pow_sitofp_f16_const_base_2(i32 %x) { } define <2 x float> @pow_sitofp_v2f32_const_base_2(<2 x i32> %x) { -; LDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2( -; LDEXP-SAME: <2 x i32> [[X:%.*]]) { -; LDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[X]]) -; LDEXP-NEXT: ret <2 x float> [[EXP2]] -; -; NOLDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2( -; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float> -; NOLDEXP-NEXT: [[POW:%.*]] = tail call <2 x float> @llvm.exp2.v2f32(<2 x float> [[ITOFP]]) -; NOLDEXP-NEXT: ret <2 x float> [[POW]] +; CHECK-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[X]]) +; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x float> %pow = tail call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %itofp) @@ -199,16 +175,10 @@ define <2 x float> @pow_sitofp_v2f32_const_base_mixed_2(<2 x i32> %x) { } define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(<2 x i32> %x) { -; LDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2__flags( -; LDEXP-SAME: <2 x i32> [[X:%.*]]) { -; LDEXP-NEXT: [[EXP2:%.*]] = tail call nsz afn <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[X]]) -; LDEXP-NEXT: ret <2 x float> [[EXP2]] -; -; NOLDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2__flags( -; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float> -; NOLDEXP-NEXT: [[POW:%.*]] = tail call nsz afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[ITOFP]]) -; NOLDEXP-NEXT: ret <2 x float> [[POW]] +; CHECK-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2__flags( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call nsz afn <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[X]]) +; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x float> %pow = tail call nsz afn <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %itofp) @@ -216,16 +186,10 @@ define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(<2 x i32> %x) { } define @pow_sitofp_nxv4f32_const_base_2( %x) { -; LDEXP-LABEL: define @pow_sitofp_nxv4f32_const_base_2( -; LDEXP-SAME: [[X:%.*]]) { -; LDEXP-NEXT: [[EXP2:%.*]] = tail call @llvm.ldexp.nxv4f32.nxv4i32( shufflevector ( insertelement ( poison, float 1.000000e+00, i64 0), poison, zeroinitializer), [[X]]) -; LDEXP-NEXT: ret [[EXP2]] -; -; NOLDEXP-LABEL: define @pow_sitofp_nxv4f32_const_base_2( -; NOLDEXP-SAME: [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp [[X]] to -; NOLDEXP-NEXT: [[POW:%.*]] = tail call @llvm.exp2.nxv4f32( [[ITOFP]]) -; NOLDEXP-NEXT: ret [[POW]] +; CHECK-LABEL: define @pow_sitofp_nxv4f32_const_base_2( +; CHECK-SAME: [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call @llvm.ldexp.nxv4f32.nxv4i32( shufflevector ( insertelement ( poison, float 1.000000e+00, i64 0), poison, zeroinitializer), [[X]]) +; CHECK-NEXT: ret [[EXP2]] ; %itofp = sitofp %x to %pow = tail call @llvm.pow.nxv4f32( splat (float 2.0), %itofp) @@ -233,16 +197,10 @@ define @pow_sitofp_nxv4f32_const_base_2( } define <2 x half> @pow_sitofp_v2f16_const_base_2(<2 x i32> %x) { -; LDEXP-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_2( -; LDEXP-SAME: <2 x i32> [[X:%.*]]) { -; LDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> , <2 x i32> [[X]]) -; LDEXP-NEXT: ret <2 x half> [[EXP2]] -; -; NOLDEXP-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_2( -; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half> -; NOLDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.exp2.v2f16(<2 x half> [[ITOFP]]) -; NOLDEXP-NEXT: ret <2 x half> [[EXP2]] +; CHECK-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_2( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> , <2 x i32> [[X]]) +; CHECK-NEXT: ret <2 x half> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x half> %pow = tail call <2 x half> @llvm.pow.v2f16(<2 x half> , <2 x half> %itofp) @@ -250,16 +208,10 @@ define <2 x half> @pow_sitofp_v2f16_const_base_2(<2 x i32> %x) { } define <2 x double> @pow_sitofp_v2f64_const_base_2(<2 x i32> %x) { -; LDEXP-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_2( -; LDEXP-SAME: <2 x i32> [[X:%.*]]) { -; LDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> , <2 x i32> [[X]]) -; LDEXP-NEXT: ret <2 x double> [[EXP2]] -; -; NOLDEXP-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_2( -; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double> -; NOLDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.exp2.v2f64(<2 x double> [[ITOFP]]) -; NOLDEXP-NEXT: ret <2 x double> [[EXP2]] +; CHECK-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_2( +; CHECK-SAME: <2 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> , <2 x i32> [[X]]) +; CHECK-NEXT: ret <2 x double> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x double> %pow = tail call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %itofp) @@ -333,16 +285,10 @@ define <2 x double> @pow_sitofp_v2f64_const_base_8(<2 x i32> %x) { } define fp128 @pow_sitofp_fp128_const_base_2(i32 %x) { -; LDEXP-LABEL: define fp128 @pow_sitofp_fp128_const_base_2( -; LDEXP-SAME: i32 [[X:%.*]]) { -; LDEXP-NEXT: [[LDEXPL:%.*]] = tail call fp128 @llvm.ldexp.f128.i32(fp128 0xL00000000000000003FFF000000000000, i32 [[X]]) -; LDEXP-NEXT: ret fp128 [[LDEXPL]] -; -; NOLDEXP-LABEL: define fp128 @pow_sitofp_fp128_const_base_2( -; NOLDEXP-SAME: i32 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i32 [[X]] to fp128 -; NOLDEXP-NEXT: [[POW:%.*]] = tail call fp128 @llvm.exp2.f128(fp128 [[ITOFP]]) -; NOLDEXP-NEXT: ret fp128 [[POW]] +; CHECK-LABEL: define fp128 @pow_sitofp_fp128_const_base_2( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call fp128 @llvm.ldexp.f128.i32(fp128 0xL00000000000000003FFF000000000000, i32 [[X]]) +; CHECK-NEXT: ret fp128 [[EXP2]] ; %itofp = sitofp i32 %x to fp128 %pow = tail call fp128 @llvm.pow.fp128(fp128 0xL00000000000000004000000000000000, fp128 %itofp) @@ -412,16 +358,10 @@ define float @libcall_powf_sitofp_f32_const_base_2__flags(i32 %x) { } define float @readnone_libcall_powf_sitofp_f32_const_base_2(i32 %x) { -; LDEXP-LABEL: define float @readnone_libcall_powf_sitofp_f32_const_base_2( -; LDEXP-SAME: i32 [[X:%.*]]) { -; LDEXP-NEXT: [[LDEXPF:%.*]] = tail call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[X]]) -; LDEXP-NEXT: ret float [[LDEXPF]] -; -; NOLDEXP-LABEL: define float @readnone_libcall_powf_sitofp_f32_const_base_2( -; NOLDEXP-SAME: i32 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i32 [[X]] to float -; NOLDEXP-NEXT: [[POW:%.*]] = tail call float @llvm.exp2.f32(float [[ITOFP]]) -; NOLDEXP-NEXT: ret float [[POW]] +; CHECK-LABEL: define float @readnone_libcall_powf_sitofp_f32_const_base_2( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 [[X]]) +; CHECK-NEXT: ret float [[EXP2]] ; %itofp = sitofp i32 %x to float %pow = tail call float @powf(float 2.000000e+00, float %itofp) memory(none) @@ -429,16 +369,10 @@ define float @readnone_libcall_powf_sitofp_f32_const_base_2(i32 %x) { } define double @readnone_libcall_pow_sitofp_f32_const_base_2(i32 %x) { -; LDEXP-LABEL: define double @readnone_libcall_pow_sitofp_f32_const_base_2( -; LDEXP-SAME: i32 [[X:%.*]]) { -; LDEXP-NEXT: [[LDEXP:%.*]] = tail call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 [[X]]) -; LDEXP-NEXT: ret double [[LDEXP]] -; -; NOLDEXP-LABEL: define double @readnone_libcall_pow_sitofp_f32_const_base_2( -; NOLDEXP-SAME: i32 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i32 [[X]] to double -; NOLDEXP-NEXT: [[POW:%.*]] = tail call double @llvm.exp2.f64(double [[ITOFP]]) -; NOLDEXP-NEXT: ret double [[POW]] +; CHECK-LABEL: define double @readnone_libcall_pow_sitofp_f32_const_base_2( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 [[X]]) +; CHECK-NEXT: ret double [[EXP2]] ; %itofp = sitofp i32 %x to double %pow = tail call double @pow(double 2.000000e+00, double %itofp) memory(none) @@ -446,16 +380,10 @@ define double @readnone_libcall_pow_sitofp_f32_const_base_2(i32 %x) { } define fp128 @readnone_libcall_powl_sitofp_fp128_const_base_2(i32 %x) { -; LDEXP-LABEL: define fp128 @readnone_libcall_powl_sitofp_fp128_const_base_2( -; LDEXP-SAME: i32 [[X:%.*]]) { -; LDEXP-NEXT: [[LDEXPL:%.*]] = tail call fp128 @llvm.ldexp.f128.i32(fp128 0xL00000000000000003FFF000000000000, i32 [[X]]) -; LDEXP-NEXT: ret fp128 [[LDEXPL]] -; -; NOLDEXP-LABEL: define fp128 @readnone_libcall_powl_sitofp_fp128_const_base_2( -; NOLDEXP-SAME: i32 [[X:%.*]]) { -; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp i32 [[X]] to fp128 -; NOLDEXP-NEXT: [[POW:%.*]] = tail call fp128 @llvm.exp2.f128(fp128 [[ITOFP]]) -; NOLDEXP-NEXT: ret fp128 [[POW]] +; CHECK-LABEL: define fp128 @readnone_libcall_powl_sitofp_fp128_const_base_2( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[EXP2:%.*]] = tail call fp128 @llvm.ldexp.f128.i32(fp128 0xL00000000000000003FFF000000000000, i32 [[X]]) +; CHECK-NEXT: ret fp128 [[EXP2]] ; %itofp = sitofp i32 %x to fp128 %pow = tail call fp128 @powl(fp128 0xL00000000000000004000000000000000, fp128 %itofp) memory(none) diff --git a/llvm/test/Transforms/Util/libcalls-shrinkwrap-double.ll b/llvm/test/Transforms/Util/libcalls-shrinkwrap-double.ll index 1866fc98fc8d8b..4ac216f85c74c0 100644 --- a/llvm/test/Transforms/Util/libcalls-shrinkwrap-double.ll +++ b/llvm/test/Transforms/Util/libcalls-shrinkwrap-double.ll @@ -448,4 +448,4 @@ define void @test_pow_strictfp(i32 %int_val, double %exp) strictfp { declare double @pow(double, double) -; CHECK: ![[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 2000} +; CHECK: ![[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 1048575} diff --git a/llvm/test/Transforms/Util/libcalls-shrinkwrap-float.ll b/llvm/test/Transforms/Util/libcalls-shrinkwrap-float.ll index 2a08041e2b1e5f..f4dc79759d17ed 100644 --- a/llvm/test/Transforms/Util/libcalls-shrinkwrap-float.ll +++ b/llvm/test/Transforms/Util/libcalls-shrinkwrap-float.ll @@ -350,4 +350,4 @@ declare float @log2f(float) declare float @logbf(float) declare float @log1pf(float) -; CHECK: ![[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 2000} +; CHECK: ![[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 1048575} diff --git a/llvm/test/Transforms/Util/libcalls-shrinkwrap-long-double.ll b/llvm/test/Transforms/Util/libcalls-shrinkwrap-long-double.ll index 076e958086daaf..c2b981c81c75d7 100644 --- a/llvm/test/Transforms/Util/libcalls-shrinkwrap-long-double.ll +++ b/llvm/test/Transforms/Util/libcalls-shrinkwrap-long-double.ll @@ -351,4 +351,4 @@ declare x86_fp80 @log2l(x86_fp80) declare x86_fp80 @logbl(x86_fp80) declare x86_fp80 @log1pl(x86_fp80) -; CHECK: ![[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 2000} +; CHECK: ![[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 1048575} diff --git a/llvm/tools/dsymutil/MachOUtils.cpp b/llvm/tools/dsymutil/MachOUtils.cpp index 8e144d640ed01f..b52ab1ce6d2949 100644 --- a/llvm/tools/dsymutil/MachOUtils.cpp +++ b/llvm/tools/dsymutil/MachOUtils.cpp @@ -630,7 +630,7 @@ bool generateDsymCompanion( // Emit the Dwarf sections contents. for (const MCSection &Sec : MCAsm) { - if (Sec.begin() == Sec.end()) + if (Sec.empty()) continue; uint64_t Pos = OutFile.tell(); diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp index 0958e16c2197ac..e48e3f4d22c123 100644 --- a/llvm/tools/llvm-as/llvm-as.cpp +++ b/llvm/tools/llvm-as/llvm-as.cpp @@ -142,10 +142,11 @@ int main(int argc, char **argv) { } // Convert to new debug format if requested. - M->setIsNewDbgInfoFormat(UseNewDbgInfoFormat && - WriteNewDbgInfoFormatToBitcode); - if (M->IsNewDbgInfoFormat) + assert(!M->IsNewDbgInfoFormat && "Unexpectedly in new debug mode"); + if (UseNewDbgInfoFormat && WriteNewDbgInfoFormatToBitcode) { + M->convertToNewDbgValues(); M->removeDebugIntrinsicDeclarations(); + } std::unique_ptr Index = std::move(ModuleAndIndex.Index); diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp index d28af85bc739eb..fbbb5506e43e05 100644 --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -258,7 +258,7 @@ int main(int argc, char **argv) { // All that llvm-dis does is write the assembly to a file. if (!DontPrint) { if (M) { - M->setIsNewDbgInfoFormat(WriteNewDbgInfoFormat); + ScopedDbgInfoFormatSetter FormatSetter(*M, WriteNewDbgInfoFormat); if (WriteNewDbgInfoFormat) M->removeDebugIntrinsicDeclarations(); M->print(Out->os(), Annotator.get(), PreserveAssemblyUseListOrder); diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp index b84469d1c757f8..7794f2d81ed064 100644 --- a/llvm/tools/llvm-link/llvm-link.cpp +++ b/llvm/tools/llvm-link/llvm-link.cpp @@ -489,6 +489,12 @@ int main(int argc, char **argv) { if (LoadBitcodeIntoNewDbgInfoFormat == cl::boolOrDefault::BOU_UNSET) LoadBitcodeIntoNewDbgInfoFormat = cl::boolOrDefault::BOU_TRUE; + // RemoveDIs debug-info transition: tests may request that we /try/ to use the + // new debug-info format. + if (TryUseNewDbgInfoFormat) { + // Turn the new debug-info format on. + UseNewDbgInfoFormat = true; + } // Since llvm-link collects multiple IR modules together, for simplicity's // sake we disable the "PreserveInputDbgFormat" flag to enforce a single // debug info format. @@ -550,7 +556,7 @@ int main(int argc, char **argv) { SetFormat(WriteNewDbgInfoFormat); Composite->print(Out.os(), nullptr, PreserveAssemblyUseListOrder); } else if (Force || !CheckBitcodeOutputToConsole(Out.os())) { - SetFormat(UseNewDbgInfoFormat && WriteNewDbgInfoFormatToBitcode); + SetFormat(WriteNewDbgInfoFormatToBitcode); WriteBitcodeToFile(*Composite, Out.os(), PreserveBitcodeUseListOrder); } diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp index 6e4dda8351a1b1..7007d944801a75 100644 --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -723,11 +723,13 @@ TEST(APFloatTest, IsSmallestNormalized) { EXPECT_FALSE(APFloat::getZero(Semantics, false).isSmallestNormalized()); EXPECT_FALSE(APFloat::getZero(Semantics, true).isSmallestNormalized()); - EXPECT_FALSE(APFloat::getInf(Semantics, false).isSmallestNormalized()); - EXPECT_FALSE(APFloat::getInf(Semantics, true).isSmallestNormalized()); + if (APFloat::hasNanOrInf(Semantics)) { + EXPECT_FALSE(APFloat::getInf(Semantics, false).isSmallestNormalized()); + EXPECT_FALSE(APFloat::getInf(Semantics, true).isSmallestNormalized()); - EXPECT_FALSE(APFloat::getQNaN(Semantics).isSmallestNormalized()); - EXPECT_FALSE(APFloat::getSNaN(Semantics).isSmallestNormalized()); + EXPECT_FALSE(APFloat::getQNaN(Semantics).isSmallestNormalized()); + EXPECT_FALSE(APFloat::getSNaN(Semantics).isSmallestNormalized()); + } EXPECT_FALSE(APFloat::getLargest(Semantics).isSmallestNormalized()); EXPECT_FALSE(APFloat::getLargest(Semantics, true).isSmallestNormalized()); @@ -1823,6 +1825,9 @@ TEST(APFloatTest, getLargest) { 30, APFloat::getLargest(APFloat::Float8E4M3B11FNUZ()).convertToDouble()); EXPECT_EQ(3.40116213421e+38f, APFloat::getLargest(APFloat::FloatTF32()).convertToFloat()); + EXPECT_EQ(28, APFloat::getLargest(APFloat::Float6E3M2FN()).convertToDouble()); + EXPECT_EQ(7.5, + APFloat::getLargest(APFloat::Float6E2M3FN()).convertToDouble()); } TEST(APFloatTest, getSmallest) { @@ -1881,6 +1886,20 @@ TEST(APFloatTest, getSmallest) { EXPECT_TRUE(test.isFiniteNonZero()); EXPECT_TRUE(test.isDenormal()); EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + test = APFloat::getSmallest(APFloat::Float6E3M2FN(), false); + expected = APFloat(APFloat::Float6E3M2FN(), "0x0.1p0"); + EXPECT_FALSE(test.isNegative()); + EXPECT_TRUE(test.isFiniteNonZero()); + EXPECT_TRUE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + test = APFloat::getSmallest(APFloat::Float6E2M3FN(), false); + expected = APFloat(APFloat::Float6E2M3FN(), "0x0.2p0"); + EXPECT_FALSE(test.isNegative()); + EXPECT_TRUE(test.isFiniteNonZero()); + EXPECT_TRUE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); } TEST(APFloatTest, getSmallestNormalized) { @@ -1963,6 +1982,21 @@ TEST(APFloatTest, getSmallestNormalized) { EXPECT_FALSE(test.isDenormal()); EXPECT_TRUE(test.bitwiseIsEqual(expected)); EXPECT_TRUE(test.isSmallestNormalized()); + test = APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false); + expected = APFloat(APFloat::Float6E3M2FN(), "0x1p-2"); + EXPECT_FALSE(test.isNegative()); + EXPECT_TRUE(test.isFiniteNonZero()); + EXPECT_FALSE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + EXPECT_TRUE(test.isSmallestNormalized()); + + test = APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false); + expected = APFloat(APFloat::Float6E2M3FN(), "0x1p0"); + EXPECT_FALSE(test.isNegative()); + EXPECT_TRUE(test.isFiniteNonZero()); + EXPECT_FALSE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + EXPECT_TRUE(test.isSmallestNormalized()); } TEST(APFloatTest, getZero) { @@ -1996,7 +2030,11 @@ TEST(APFloatTest, getZero) { {&APFloat::Float8E4M3B11FNUZ(), false, false, {0, 0}, 1}, {&APFloat::Float8E4M3B11FNUZ(), true, false, {0, 0}, 1}, {&APFloat::FloatTF32(), false, true, {0, 0}, 1}, - {&APFloat::FloatTF32(), true, true, {0x40000ULL, 0}, 1}}; + {&APFloat::FloatTF32(), true, true, {0x40000ULL, 0}, 1}, + {&APFloat::Float6E3M2FN(), false, true, {0, 0}, 1}, + {&APFloat::Float6E3M2FN(), true, true, {0x20ULL, 0}, 1}, + {&APFloat::Float6E2M3FN(), false, true, {0, 0}, 1}, + {&APFloat::Float6E2M3FN(), true, true, {0x20ULL, 0}, 1}}; const unsigned NumGetZeroTests = std::size(GetZeroTest); for (unsigned i = 0; i < NumGetZeroTests; ++i) { APFloat test = APFloat::getZero(*GetZeroTest[i].semantics, @@ -5161,6 +5199,90 @@ TEST(APFloatTest, Float8ExhaustivePair) { } } +TEST(APFloatTest, Float6ExhaustivePair) { + // Test each pair of 6-bit floats with non-standard semantics + for (APFloat::Semantics Sem : + {APFloat::S_Float6E3M2FN, APFloat::S_Float6E2M3FN}) { + const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem); + for (int i = 1; i < 64; i++) { + for (int j = 1; j < 64; j++) { + SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) + + ",j=" + std::to_string(j)); + APFloat x(S, APInt(6, i)); + APFloat y(S, APInt(6, j)); + + bool losesInfo; + APFloat x16 = x; + x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_FALSE(losesInfo); + APFloat y16 = y; + y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_FALSE(losesInfo); + + // Add + APFloat z = x; + z.add(y, APFloat::rmNearestTiesToEven); + APFloat z16 = x16; + z16.add(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Subtract + z = x; + z.subtract(y, APFloat::rmNearestTiesToEven); + z16 = x16; + z16.subtract(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Multiply + z = x; + z.multiply(y, APFloat::rmNearestTiesToEven); + z16 = x16; + z16.multiply(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Skip divide by 0 + if (j == 0 || j == 32) + continue; + + // Divide + z = x; + z.divide(y, APFloat::rmNearestTiesToEven); + z16 = x16; + z16.divide(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Mod + z = x; + z.mod(y); + z16 = x16; + z16.mod(y16); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Remainder + z = x; + z.remainder(y); + z16 = x16; + z16.remainder(y16); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + } + } + } +} + TEST(APFloatTest, ConvertE4M3FNToE5M2) { bool losesInfo; APFloat test(APFloat::Float8E4M3FN(), "1.0"); @@ -6620,28 +6742,39 @@ TEST(APFloatTest, getExactLog2) { EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2()); EXPECT_EQ(INT_MIN, APFloat(Semantics, "3.0").getExactLog2Abs()); EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2Abs()); - EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat(Semantics, "-8.0").getExactLog2()); - EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2()); - EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2Abs()); - EXPECT_EQ(INT_MIN, APFloat(Semantics, "-0.25").getExactLog2()); - EXPECT_EQ(-2, APFloat(Semantics, "-0.25").getExactLog2Abs()); - EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2Abs()); - EXPECT_EQ(3, APFloat(Semantics, "-8.0").getExactLog2Abs()); + + if (I == APFloat::S_Float6E2M3FN) { + EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2()); + EXPECT_EQ(INT_MIN, APFloat(Semantics, "-4.0").getExactLog2()); + EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2Abs()); + EXPECT_EQ(2, APFloat(Semantics, "-4.0").getExactLog2Abs()); + } else { + EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2()); + EXPECT_EQ(INT_MIN, APFloat(Semantics, "-8.0").getExactLog2()); + EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2()); + EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2Abs()); + EXPECT_EQ(INT_MIN, APFloat(Semantics, "-0.25").getExactLog2()); + EXPECT_EQ(-2, APFloat(Semantics, "-0.25").getExactLog2Abs()); + EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2Abs()); + EXPECT_EQ(3, APFloat(Semantics, "-8.0").getExactLog2Abs()); + } EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, false).getExactLog2()); EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, true).getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, false).getExactLog2Abs()); EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, true).getExactLog2Abs()); - EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2Abs()); - EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2Abs()); - EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2Abs()); - EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2Abs()); + + if (APFloat::hasNanOrInf(Semantics)) { + EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2()); + EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2()); + EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2()); + EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2()); + + EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2Abs()); + EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2Abs()); + EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2Abs()); + EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2Abs()); + } EXPECT_EQ(INT_MIN, scalbn(One, MinExp - Precision - 1, APFloat::rmNearestTiesToEven) @@ -6660,4 +6793,311 @@ TEST(APFloatTest, getExactLog2) { } } +TEST(APFloatTest, Float6E3M2FNFromString) { + // Exactly representable + EXPECT_EQ(28, APFloat(APFloat::Float6E3M2FN(), "28").convertToDouble()); + // Round down to maximum value + EXPECT_EQ(28, APFloat(APFloat::Float6E3M2FN(), "32").convertToDouble()); + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG + EXPECT_DEATH(APFloat(APFloat::Float6E3M2FN(), "inf"), + "This floating point format does not support Inf"); + EXPECT_DEATH(APFloat(APFloat::Float6E3M2FN(), "nan"), + "This floating point format does not support NaN"); +#endif +#endif + + EXPECT_TRUE(APFloat(APFloat::Float6E3M2FN(), "0").isPosZero()); + EXPECT_TRUE(APFloat(APFloat::Float6E3M2FN(), "-0").isNegZero()); +} + +TEST(APFloatTest, Float6E2M3FNFromString) { + // Exactly representable + EXPECT_EQ(7.5, APFloat(APFloat::Float6E2M3FN(), "7.5").convertToDouble()); + // Round down to maximum value + EXPECT_EQ(7.5, APFloat(APFloat::Float6E2M3FN(), "32").convertToDouble()); + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG + EXPECT_DEATH(APFloat(APFloat::Float6E2M3FN(), "inf"), + "This floating point format does not support Inf"); + EXPECT_DEATH(APFloat(APFloat::Float6E2M3FN(), "nan"), + "This floating point format does not support NaN"); +#endif +#endif + + EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "0").isPosZero()); + EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "-0").isNegZero()); +} + +TEST(APFloatTest, ConvertE3M2FToE2M3F) { + bool losesInfo; + APFloat test(APFloat::Float6E3M2FN(), "1.0"); + APFloat::opStatus status = test.convert( + APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(1.0f, test.convertToFloat()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + test = APFloat(APFloat::Float6E3M2FN(), "0.0"); + status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(0.0f, test.convertToFloat()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + // Test overflow + losesInfo = false; + test = APFloat(APFloat::Float6E3M2FN(), "28"); + status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(7.5f, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + EXPECT_EQ(status, APFloat::opInexact); + + // Test underflow + test = APFloat(APFloat::Float6E3M2FN(), ".0625"); + status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(0., test.convertToFloat()); + EXPECT_TRUE(losesInfo); + EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); + + // Testing inexact rounding to denormal number + losesInfo = false; + test = APFloat(APFloat::Float6E3M2FN(), "0.1875"); + status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(0.25, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); +} + +TEST(APFloatTest, ConvertE2M3FToE3M2F) { + bool losesInfo; + APFloat test(APFloat::Float6E2M3FN(), "1.0"); + APFloat::opStatus status = test.convert( + APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(1.0f, test.convertToFloat()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + test = APFloat(APFloat::Float6E2M3FN(), "0.0"); + status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(0.0f, test.convertToFloat()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + test = APFloat(APFloat::Float6E2M3FN(), ".125"); + status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(.125, test.convertToFloat()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + // Test inexact rounding + losesInfo = false; + test = APFloat(APFloat::Float6E2M3FN(), "7.5"); + status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(8, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + EXPECT_EQ(status, APFloat::opInexact); +} + +TEST(APFloatTest, Float6E3M2FNNext) { + APFloat test(APFloat::Float6E3M2FN(), APFloat::uninitialized); + APFloat expected(APFloat::Float6E3M2FN(), APFloat::uninitialized); + + // 1. NextUp of largest bit pattern is the same + test = APFloat::getLargest(APFloat::Float6E3M2FN()); + expected = APFloat::getLargest(APFloat::Float6E3M2FN()); + EXPECT_EQ(test.next(false), APFloat::opOK); + EXPECT_FALSE(test.isInfinity()); + EXPECT_FALSE(test.isZero()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 2. NextUp of smallest negative denormal is -0 + test = APFloat::getSmallest(APFloat::Float6E3M2FN(), true); + expected = APFloat::getZero(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(test.next(false), APFloat::opOK); + EXPECT_TRUE(test.isNegZero()); + EXPECT_FALSE(test.isPosZero()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 3. nextDown of negative of largest value is the same + test = APFloat::getLargest(APFloat::Float6E3M2FN(), true); + expected = test; + EXPECT_EQ(test.next(true), APFloat::opOK); + EXPECT_FALSE(test.isInfinity()); + EXPECT_FALSE(test.isZero()); + EXPECT_FALSE(test.isNaN()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 4. nextDown of +0 is smallest negative denormal + test = APFloat::getZero(APFloat::Float6E3M2FN(), false); + expected = APFloat::getSmallest(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(test.next(true), APFloat::opOK); + EXPECT_FALSE(test.isZero()); + EXPECT_TRUE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); +} + +TEST(APFloatTest, Float6E2M3FNNext) { + APFloat test(APFloat::Float6E2M3FN(), APFloat::uninitialized); + APFloat expected(APFloat::Float6E2M3FN(), APFloat::uninitialized); + + // 1. NextUp of largest bit pattern is the same + test = APFloat::getLargest(APFloat::Float6E2M3FN()); + expected = APFloat::getLargest(APFloat::Float6E2M3FN()); + EXPECT_EQ(test.next(false), APFloat::opOK); + EXPECT_FALSE(test.isInfinity()); + EXPECT_FALSE(test.isZero()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 2. NextUp of smallest negative denormal is -0 + test = APFloat::getSmallest(APFloat::Float6E2M3FN(), true); + expected = APFloat::getZero(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(test.next(false), APFloat::opOK); + EXPECT_TRUE(test.isNegZero()); + EXPECT_FALSE(test.isPosZero()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 3. nextDown of negative of largest value is the same + test = APFloat::getLargest(APFloat::Float6E2M3FN(), true); + expected = test; + EXPECT_EQ(test.next(true), APFloat::opOK); + EXPECT_FALSE(test.isInfinity()); + EXPECT_FALSE(test.isZero()); + EXPECT_FALSE(test.isNaN()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 4. nextDown of +0 is smallest negative denormal + test = APFloat::getZero(APFloat::Float6E2M3FN(), false); + expected = APFloat::getSmallest(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(test.next(true), APFloat::opOK); + EXPECT_FALSE(test.isZero()); + EXPECT_TRUE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); +} + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG +TEST(APFloatTest, Float6E3M2FNGetInfNaN) { + EXPECT_DEATH(APFloat::getInf(APFloat::Float6E3M2FN()), + "This floating point format does not support Inf"); + EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E3M2FN()), + "This floating point format does not support NaN"); +} + +TEST(APFloatTest, Float6E2M3FNGetInfNaN) { + EXPECT_DEATH(APFloat::getInf(APFloat::Float6E2M3FN()), + "This floating point format does not support Inf"); + EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E2M3FN()), + "This floating point format does not support NaN"); +} +#endif +#endif + +TEST(APFloatTest, Float6E3M2FNToDouble) { + APFloat One(APFloat::Float6E3M2FN(), "1.0"); + EXPECT_EQ(1.0, One.convertToDouble()); + APFloat Two(APFloat::Float6E3M2FN(), "2.0"); + EXPECT_EQ(2.0, Two.convertToDouble()); + APFloat PosLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), false); + EXPECT_EQ(28., PosLargest.convertToDouble()); + APFloat NegLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(-28., NegLargest.convertToDouble()); + APFloat PosSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false); + EXPECT_EQ(0x1p-2, PosSmallest.convertToDouble()); + APFloat NegSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(-0x1p-2, NegSmallest.convertToDouble()); + + APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E3M2FN(), false); + EXPECT_TRUE(SmallestDenorm.isDenormal()); + EXPECT_EQ(0x0.1p0, SmallestDenorm.convertToDouble()); +} + +TEST(APFloatTest, Float6E2M3FNToDouble) { + APFloat One(APFloat::Float6E2M3FN(), "1.0"); + EXPECT_EQ(1.0, One.convertToDouble()); + APFloat Two(APFloat::Float6E2M3FN(), "2.0"); + EXPECT_EQ(2.0, Two.convertToDouble()); + APFloat PosLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), false); + EXPECT_EQ(7.5, PosLargest.convertToDouble()); + APFloat NegLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(-7.5, NegLargest.convertToDouble()); + APFloat PosSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false); + EXPECT_EQ(0x1p0, PosSmallest.convertToDouble()); + APFloat NegSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(-0x1p0, NegSmallest.convertToDouble()); + + APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E2M3FN(), false); + EXPECT_TRUE(SmallestDenorm.isDenormal()); + EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToDouble()); +} + +TEST(APFloatTest, Float6E3M2FNToFloat) { + APFloat PosZero = APFloat::getZero(APFloat::Float6E3M2FN()); + APFloat PosZeroToFloat(PosZero.convertToFloat()); + EXPECT_TRUE(PosZeroToFloat.isPosZero()); + APFloat NegZero = APFloat::getZero(APFloat::Float6E3M2FN(), true); + APFloat NegZeroToFloat(NegZero.convertToFloat()); + EXPECT_TRUE(NegZeroToFloat.isNegZero()); + + APFloat One(APFloat::Float6E3M2FN(), "1.0"); + EXPECT_EQ(1.0F, One.convertToFloat()); + APFloat Two(APFloat::Float6E3M2FN(), "2.0"); + EXPECT_EQ(2.0F, Two.convertToFloat()); + + APFloat PosLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), false); + EXPECT_EQ(28., PosLargest.convertToFloat()); + APFloat NegLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(-28, NegLargest.convertToFloat()); + APFloat PosSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false); + EXPECT_EQ(0x1p-2, PosSmallest.convertToFloat()); + APFloat NegSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(-0x1p-2, NegSmallest.convertToFloat()); + + APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E3M2FN(), false); + EXPECT_TRUE(SmallestDenorm.isDenormal()); + EXPECT_EQ(0x0.1p0, SmallestDenorm.convertToFloat()); +} + +TEST(APFloatTest, Float6E2M3FNToFloat) { + APFloat PosZero = APFloat::getZero(APFloat::Float6E2M3FN()); + APFloat PosZeroToFloat(PosZero.convertToFloat()); + EXPECT_TRUE(PosZeroToFloat.isPosZero()); + APFloat NegZero = APFloat::getZero(APFloat::Float6E2M3FN(), true); + APFloat NegZeroToFloat(NegZero.convertToFloat()); + EXPECT_TRUE(NegZeroToFloat.isNegZero()); + + APFloat One(APFloat::Float6E2M3FN(), "1.0"); + EXPECT_EQ(1.0F, One.convertToFloat()); + APFloat Two(APFloat::Float6E2M3FN(), "2.0"); + EXPECT_EQ(2.0F, Two.convertToFloat()); + + APFloat PosLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), false); + EXPECT_EQ(7.5, PosLargest.convertToFloat()); + APFloat NegLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(-7.5, NegLargest.convertToFloat()); + APFloat PosSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false); + EXPECT_EQ(0x1p0, PosSmallest.convertToFloat()); + APFloat NegSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(-0x1p0, NegSmallest.convertToFloat()); + + APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E2M3FN(), false); + EXPECT_TRUE(SmallestDenorm.isDenormal()); + EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToFloat()); +} } // namespace diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp index 24f4f11db9a8b9..f6a053792f8529 100644 --- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/IRSimilarityIdentifier.h" -#include "llvm/ADT/ScopeExit.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -23,11 +22,6 @@ using namespace llvm; using namespace IRSimilarity; -extern llvm::cl::opt UseNewDbgInfoFormat; -extern cl::opt PreserveInputDbgFormat; -extern bool WriteNewDbgInfoFormatToBitcode; -extern cl::opt WriteNewDbgInfoFormat; - static std::unique_ptr makeLLVMModule(LLVMContext &Context, StringRef ModuleStr) { SMDiagnostic Err; @@ -1312,18 +1306,19 @@ TEST(IRInstructionMapper, CallBrInstIllegal) { ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber); } -// Checks that an debuginfo records are mapped to be invisible. Since they +// Checks that an debuginfo intrinsics are mapped to be invisible. Since they // do not semantically change the program, they can be recognized as similar. TEST(IRInstructionMapper, DebugInfoInvisible) { StringRef ModuleString = R"( define i32 @f(i32 %a, i32 %b) { then: - %0 = add i32 %a, %b - #dbg_value(i32 0, !0, !0, !0) - %1 = add i32 %a, %b + %0 = add i32 %a, %b + call void @llvm.dbg.value(metadata !0) + %1 = add i32 %a, %b ret i32 0 } + declare void @llvm.dbg.value(metadata) !0 = distinct !{!"test\00", i32 10})"; LLVMContext Context; std::unique_ptr M = makeLLVMModule(Context, ModuleString); @@ -1919,19 +1914,19 @@ TEST(IRSimilarityCandidate, CheckRegionsDifferentTypes) { ASSERT_FALSE(longSimCandCompare(InstrList)); } -// Check that debug records do not impact similarity. They are marked as +// Check that debug instructions do not impact similarity. They are marked as // invisible. TEST(IRSimilarityCandidate, IdenticalWithDebug) { StringRef ModuleString = R"( define i32 @f(i32 %a, i32 %b) { bb0: %0 = add i32 %a, %b - #dbg_value(i32 0, !0, !0, !0) + call void @llvm.dbg.value(metadata !0) %1 = add i32 %b, %a ret i32 0 bb1: %2 = add i32 %a, %b - #dbg_value(i32 1, !1, !1, !1) + call void @llvm.dbg.value(metadata !1) %3 = add i32 %b, %a ret i32 0 bb2: @@ -1940,6 +1935,7 @@ TEST(IRSimilarityCandidate, IdenticalWithDebug) { ret i32 0 } + declare void @llvm.dbg.value(metadata) !0 = distinct !{!"test\00", i32 10} !1 = distinct !{!"test\00", i32 11})"; LLVMContext Context; diff --git a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp index 91a0745a0cc76e..f873bbd4293af5 100644 --- a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp +++ b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp @@ -25,6 +25,8 @@ using namespace llvm; +extern cl::opt UseNewDbgInfoFormat; + static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { SMDiagnostic Err; std::unique_ptr Mod = parseAssemblyString(IR, Err, C); @@ -42,6 +44,8 @@ namespace { // by DbgVariableRecords, the dbg.value replacement. TEST(BasicBlockDbgInfoTest, InsertAfterSelf) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { call void @llvm.dbg.value(metadata i16 %a, metadata !9, metadata !DIExpression()), !dbg !11 @@ -68,6 +72,8 @@ TEST(BasicBlockDbgInfoTest, InsertAfterSelf) { !11 = !DILocation(line: 1, column: 1, scope: !6) )"); + // Convert the module to "new" form debug-info. + M->convertToNewDbgValues(); // Fetch the entry block. BasicBlock &BB = M->getFunction("f")->getEntryBlock(); @@ -97,10 +103,16 @@ TEST(BasicBlockDbgInfoTest, InsertAfterSelf) { EXPECT_TRUE(RetInst->hasDbgRecords()); auto Range2 = RetInst->getDbgRecordRange(); EXPECT_EQ(std::distance(Range2.begin(), Range2.end()), 1u); + + M->convertFromNewDbgValues(); + + UseNewDbgInfoFormat = false; } TEST(BasicBlockDbgInfoTest, SplitBasicBlockBefore) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"---( define dso_local void @func() #0 !dbg !10 { %1 = alloca i32, align 4 @@ -138,6 +150,8 @@ TEST(BasicBlockDbgInfoTest, SplitBasicBlockBefore) { )---"); ASSERT_TRUE(M); + M->convertToNewDbgValues(); + Function *F = M->getFunction("func"); BasicBlock &BB = F->getEntryBlock(); @@ -147,10 +161,14 @@ TEST(BasicBlockDbgInfoTest, SplitBasicBlockBefore) { BasicBlock &BBBefore = F->getEntryBlock(); auto I2 = std::prev(BBBefore.end(), 2); ASSERT_TRUE(I2->hasDbgRecords()); + + UseNewDbgInfoFormat = false; } TEST(BasicBlockDbgInfoTest, MarkerOperations) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { call void @llvm.dbg.value(metadata i16 %a, metadata !9, metadata !DIExpression()), !dbg !11 @@ -178,6 +196,8 @@ TEST(BasicBlockDbgInfoTest, MarkerOperations) { // Fetch the entry block, BasicBlock &BB = M->getFunction("f")->getEntryBlock(); + // Convert the module to "new" form debug-info. + M->convertToNewDbgValues(); EXPECT_EQ(BB.size(), 2u); // Fetch out our two markers, @@ -275,10 +295,14 @@ TEST(BasicBlockDbgInfoTest, MarkerOperations) { // Teardown, Instr1->insertBefore(BB, BB.begin()); + + UseNewDbgInfoFormat = false; } TEST(BasicBlockDbgInfoTest, HeadBitOperations) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { %b = add i16 %a, 1, !dbg !11 @@ -308,6 +332,8 @@ TEST(BasicBlockDbgInfoTest, HeadBitOperations) { // Test that the movement of debug-data when using moveBefore etc and // insertBefore etc are governed by the "head" bit of iterators. BasicBlock &BB = M->getFunction("f")->getEntryBlock(); + // Convert the module to "new" form debug-info. + M->convertToNewDbgValues(); // Test that the head bit behaves as expected: it should be set when the // code wants the _start_ of the block, but not otherwise. @@ -378,10 +404,14 @@ TEST(BasicBlockDbgInfoTest, HeadBitOperations) { DInst->DebugMarker->StoredDbgRecords.empty()); EXPECT_FALSE(CInst->DebugMarker->StoredDbgRecords.empty()); EXPECT_EQ(&*BB.begin(), CInst); + + UseNewDbgInfoFormat = false; } TEST(BasicBlockDbgInfoTest, InstrDbgAccess) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { %b = add i16 %a, 1, !dbg !11 @@ -411,6 +441,8 @@ TEST(BasicBlockDbgInfoTest, InstrDbgAccess) { // Check that DbgVariableRecords can be accessed from Instructions without // digging into the depths of DbgMarkers. BasicBlock &BB = M->getFunction("f")->getEntryBlock(); + // Convert the module to "new" form debug-info. + M->convertToNewDbgValues(); Instruction *BInst = &*BB.begin(); Instruction *CInst = BInst->getNextNode(); @@ -451,6 +483,8 @@ TEST(BasicBlockDbgInfoTest, InstrDbgAccess) { CInst->dropOneDbgRecord(DVR1); EXPECT_FALSE(CInst->hasDbgRecords()); EXPECT_EQ(CInst->DebugMarker->StoredDbgRecords.size(), 0u); + + UseNewDbgInfoFormat = false; } /* Let's recall the big illustration from BasicBlock::spliceDebugInfo: @@ -543,7 +577,9 @@ class DbgSpliceTest : public ::testing::Test { DbgVariableRecord *DVRA, *DVRB, *DVRConst; void SetUp() override { + UseNewDbgInfoFormat = true; M = parseIR(C, SpliceTestIR.c_str()); + M->convertToNewDbgValues(); BBEntry = &M->getFunction("f")->getEntryBlock(); BBExit = BBEntry->getNextNode(); @@ -563,6 +599,8 @@ class DbgSpliceTest : public ::testing::Test { cast(&*CInst->DebugMarker->StoredDbgRecords.begin()); } + void TearDown() override { UseNewDbgInfoFormat = false; } + bool InstContainsDbgVariableRecord(Instruction *I, DbgVariableRecord *DVR) { for (DbgRecord &D : I->getDbgRecordRange()) { if (&D == DVR) { @@ -1149,6 +1187,8 @@ metadata !9, metadata !DIExpression()), !dbg !11 Dest %c = add i16 %b, 1, // then the trailing DbgVariableRecords should get flushed back out. TEST(BasicBlockDbgInfoTest, DbgSpliceTrailing) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1179,6 +1219,7 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceTrailing) { BasicBlock &Entry = M->getFunction("f")->getEntryBlock(); BasicBlock &Exit = *Entry.getNextNode(); + M->convertToNewDbgValues(); // Begin by forcing entry block to have dangling DbgVariableRecord. Entry.getTerminator()->eraseFromParent(); @@ -1193,6 +1234,8 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceTrailing) { Instruction *BInst = &*Entry.begin(); ASSERT_TRUE(BInst->DebugMarker); EXPECT_EQ(BInst->DebugMarker->StoredDbgRecords.size(), 1u); + + UseNewDbgInfoFormat = false; } // When we remove instructions from the program, adjacent DbgVariableRecords @@ -1201,6 +1244,8 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceTrailing) { // dbg.values. Test that this can be replicated correctly by DbgVariableRecords TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1228,6 +1273,7 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) { )"); BasicBlock &Entry = M->getFunction("f")->getEntryBlock(); + M->convertToNewDbgValues(); // Fetch the relevant instructions from the converted function. Instruction *SubInst = &*Entry.begin(); @@ -1270,12 +1316,16 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) { EXPECT_EQ(std::distance(R4.begin(), R4.end()), 1u); auto R5 = RetInst->getDbgRecordRange(); EXPECT_EQ(std::distance(R5.begin(), R5.end()), 1u); + + UseNewDbgInfoFormat = false; } // Test instruction removal and re-insertion, this time with one // DbgVariableRecord that should hop up one instruction. TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDbgVariableRecord) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1302,6 +1352,7 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDbgVariableRecord) { )"); BasicBlock &Entry = M->getFunction("f")->getEntryBlock(); + M->convertToNewDbgValues(); // Fetch the relevant instructions from the converted function. Instruction *SubInst = &*Entry.begin(); @@ -1340,6 +1391,8 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDbgVariableRecord) { EXPECT_FALSE(RetInst->hasDbgRecords()); auto R3 = AddInst->getDbgRecordRange(); EXPECT_EQ(std::distance(R3.begin(), R3.end()), 1u); + + UseNewDbgInfoFormat = false; } // Similar to the above, what if we splice into an empty block with debug-info, @@ -1348,6 +1401,8 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDbgVariableRecord) { // of the i16 0 dbg.value. TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty1) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1381,6 +1436,7 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty1) { Function &F = *M->getFunction("f"); BasicBlock &Entry = F.getEntryBlock(); BasicBlock &Exit = *Entry.getNextNode(); + M->convertToNewDbgValues(); // Begin by forcing entry block to have dangling DbgVariableRecord. Entry.getTerminator()->eraseFromParent(); @@ -1407,12 +1463,16 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty1) { // No trailing DbgVariableRecords in the entry block now. EXPECT_EQ(Entry.getTrailingDbgRecords(), nullptr); + + UseNewDbgInfoFormat = false; } // Similar test again, but this time: splice the contents of exit into entry, // with the intention of leaving the first dbg.value (i16 0) behind. TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty2) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1446,6 +1506,7 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty2) { Function &F = *M->getFunction("f"); BasicBlock &Entry = F.getEntryBlock(); BasicBlock &Exit = *Entry.getNextNode(); + M->convertToNewDbgValues(); // Begin by forcing entry block to have dangling DbgVariableRecord. Entry.getTerminator()->eraseFromParent(); @@ -1476,12 +1537,16 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty2) { EXPECT_FALSE(Exit.getTrailingDbgRecords()->empty()); Exit.getTrailingDbgRecords()->eraseFromParent(); Exit.deleteTrailingDbgRecords(); + + UseNewDbgInfoFormat = false; } // What if we moveBefore end() -- there might be no debug-info there, in which // case we shouldn't crash. TEST(BasicBlockDbgInfoTest, DbgMoveToEnd) { LLVMContext C; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1511,6 +1576,7 @@ TEST(BasicBlockDbgInfoTest, DbgMoveToEnd) { Function &F = *M->getFunction("f"); BasicBlock &Entry = F.getEntryBlock(); BasicBlock &Exit = *Entry.getNextNode(); + M->convertToNewDbgValues(); // Move the return to the end of the entry block. Instruction *Br = Entry.getTerminator(); @@ -1523,6 +1589,8 @@ TEST(BasicBlockDbgInfoTest, DbgMoveToEnd) { EXPECT_EQ(Entry.getTrailingDbgRecords(), nullptr); EXPECT_EQ(Exit.getTrailingDbgRecords(), nullptr); EXPECT_FALSE(Ret->hasDbgRecords()); + + UseNewDbgInfoFormat = false; } } // End anonymous namespace. diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index cac8acbe15a79d..ec3f33318f8cdb 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -156,7 +156,7 @@ TEST(StripTest, LoopMetadata) { EXPECT_FALSE(BrokenDebugInfo); } -TEST(MetadataTest, DeleteInstUsedByDbgRecord) { +TEST(MetadataTest, DeleteInstUsedByDbgValue) { LLVMContext C; std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { @@ -187,13 +187,12 @@ TEST(MetadataTest, DeleteInstUsedByDbgRecord) { // Find the dbg.value using %b. SmallVector DVIs; - SmallVector DVRs; - findDbgValues(DVIs, &I, &DVRs); + findDbgValues(DVIs, &I); // Delete %b. The dbg.value should now point to undef. I.eraseFromParent(); - EXPECT_EQ(DVRs[0]->getNumVariableLocationOps(), 1u); - EXPECT_TRUE(isa(DVRs[0]->getValue(0))); + EXPECT_EQ(DVIs[0]->getNumVariableLocationOps(), 1u); + EXPECT_TRUE(isa(DVIs[0]->getValue(0))); } TEST(DbgVariableIntrinsic, EmptyMDIsKillLocation) { @@ -231,8 +230,8 @@ TEST(DbgVariableIntrinsic, EmptyMDIsKillLocation) { // Get the dbg.declare. Function &F = *cast(M->getNamedValue("fun")); - DbgVariableRecord *DbgDeclare = - cast(&*F.front().front().getDbgRecordRange().begin()); + DbgVariableIntrinsic *DbgDeclare = + cast(&F.front().front()); // Check that this form counts as a "no location" marker. EXPECT_TRUE(DbgDeclare->isKillLocation()); } @@ -240,9 +239,6 @@ TEST(DbgVariableIntrinsic, EmptyMDIsKillLocation) { // Duplicate of above test, but in DbgVariableRecord representation. TEST(MetadataTest, DeleteInstUsedByDbgVariableRecord) { LLVMContext C; - bool OldDbgValueMode = UseNewDbgInfoFormat; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { %b = add i16 %a, 1, !dbg !11 @@ -268,7 +264,10 @@ TEST(MetadataTest, DeleteInstUsedByDbgVariableRecord) { !11 = !DILocation(line: 1, column: 1, scope: !6) )"); + bool OldDbgValueMode = UseNewDbgInfoFormat; + UseNewDbgInfoFormat = true; Instruction &I = *M->getFunction("f")->getEntryBlock().getFirstNonPHI(); + M->convertToNewDbgValues(); // Find the DbgVariableRecords using %b. SmallVector DVIs; @@ -290,8 +289,6 @@ TEST(MetadataTest, DeleteInstUsedByDbgVariableRecord) { // Ensure that the order of dbg.value intrinsics returned by findDbgValues, and // their corresponding DbgVariableRecord representation, are consistent. TEST(MetadataTest, OrderingOfDbgVariableRecords) { - bool OldDbgValueMode = UseNewDbgInfoFormat; - UseNewDbgInfoFormat = false; LLVMContext C; std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { @@ -319,6 +316,8 @@ TEST(MetadataTest, OrderingOfDbgVariableRecords) { !12 = !DILocalVariable(name: "bar", scope: !6, file: !1, line: 1, type: !10) )"); + bool OldDbgValueMode = UseNewDbgInfoFormat; + UseNewDbgInfoFormat = true; Instruction &I = *M->getFunction("f")->getEntryBlock().getFirstNonPHI(); SmallVector DVIs; @@ -516,15 +515,14 @@ TEST(DbgAssignIntrinsicTest, replaceVariableLocationOp) { Value *V1 = Fun.getArg(0); Value *P1 = Fun.getArg(1); Value *P2 = Fun.getArg(2); - DbgVariableRecord *DbgAssign = cast( - &*Fun.front().front().getDbgRecordRange().begin()); - ASSERT_TRUE(V1 == DbgAssign->getVariableLocationOp(0)); - ASSERT_TRUE(P1 == DbgAssign->getAddress()); + DbgAssignIntrinsic *DAI = cast(Fun.begin()->begin()); + ASSERT_TRUE(V1 == DAI->getVariableLocationOp(0)); + ASSERT_TRUE(P1 == DAI->getAddress()); #define TEST_REPLACE(Old, New, ExpectedValue, ExpectedAddr) \ - DbgAssign->replaceVariableLocationOp(Old, New); \ - EXPECT_EQ(DbgAssign->getVariableLocationOp(0), ExpectedValue); \ - EXPECT_EQ(DbgAssign->getAddress(), ExpectedAddr); + DAI->replaceVariableLocationOp(Old, New); \ + EXPECT_EQ(DAI->getVariableLocationOp(0), ExpectedValue); \ + EXPECT_EQ(DAI->getAddress(), ExpectedAddr); // Replace address only. TEST_REPLACE(/*Old*/ P1, /*New*/ P2, /*Value*/ V1, /*Address*/ P2); @@ -535,8 +533,8 @@ TEST(DbgAssignIntrinsicTest, replaceVariableLocationOp) { // Replace address only, value uses a DIArgList. // Value = {DIArgList(V1)}, Addr = P1. - DbgAssign->setRawLocation(DIArgList::get(C, ValueAsMetadata::get(V1))); - DbgAssign->setExpression(DIExpression::get( + DAI->setRawLocation(DIArgList::get(C, ValueAsMetadata::get(V1))); + DAI->setExpression(DIExpression::get( C, {dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_stack_value})); TEST_REPLACE(/*Old*/ P1, /*New*/ P2, /*Value*/ V1, /*Address*/ P2); #undef TEST_REPLACE @@ -622,11 +620,11 @@ TEST(AssignmentTrackingTest, Utils) { // // Check there are two llvm.dbg.assign intrinsics linked to Alloca. auto CheckFun1Mapping = [&Alloca]() { - auto Markers = at::getDVRAssignmentMarkers(&Alloca); + auto Markers = at::getAssignmentMarkers(&Alloca); EXPECT_TRUE(std::distance(Markers.begin(), Markers.end()) == 2); // Check those two entries are distinct. - DbgVariableRecord *First = *Markers.begin(); - DbgVariableRecord *Second = *std::next(Markers.begin()); + DbgAssignIntrinsic *First = *Markers.begin(); + DbgAssignIntrinsic *Second = *std::next(Markers.begin()); EXPECT_NE(First, Second); // Check that we can get back to Alloca from each llvm.dbg.assign. @@ -662,7 +660,7 @@ TEST(AssignmentTrackingTest, Utils) { DIAssignID *Fun2ID = cast_or_null( Fun2Alloca.getMetadata(LLVMContext::MD_DIAssignID)); EXPECT_NE(New, Fun2ID); - auto Fun2Markers = at::getDVRAssignmentMarkers(&Fun2Alloca); + auto Fun2Markers = at::getAssignmentMarkers(&Fun2Alloca); ASSERT_TRUE(std::distance(Fun2Markers.begin(), Fun2Markers.end()) == 1); auto Fun2Insts = at::getAssignmentInsts(*Fun2Markers.begin()); ASSERT_TRUE(std::distance(Fun2Insts.begin(), Fun2Insts.end()) == 1); @@ -671,10 +669,10 @@ TEST(AssignmentTrackingTest, Utils) { // 3. Check that deleting dbg.assigns from a specific instruction works. Instruction &Fun3Alloca = *M->getFunction("fun3")->getEntryBlock().getFirstNonPHIOrDbg(); - auto Fun3Markers = at::getDVRAssignmentMarkers(&Fun3Alloca); + auto Fun3Markers = at::getAssignmentMarkers(&Fun3Alloca); ASSERT_TRUE(std::distance(Fun3Markers.begin(), Fun3Markers.end()) == 1); at::deleteAssignmentMarkers(&Fun3Alloca); - Fun3Markers = at::getDVRAssignmentMarkers(&Fun3Alloca); + Fun3Markers = at::getAssignmentMarkers(&Fun3Alloca); EXPECT_EQ(Fun3Markers.empty(), true); // 4. Check that deleting works and applies only to the target function. @@ -685,7 +683,7 @@ TEST(AssignmentTrackingTest, Utils) { // llvm.dbg.assign. EXPECT_EQ(Fun2ID, cast_or_null( Fun2Alloca.getMetadata(LLVMContext::MD_DIAssignID))); - EXPECT_FALSE(at::getDVRAssignmentMarkers(&Fun2Alloca).empty()); + EXPECT_FALSE(at::getAssignmentMarkers(&Fun2Alloca).empty()); } TEST(IRBuilder, GetSetInsertionPointWithEmptyBasicBlock) { @@ -771,12 +769,12 @@ TEST(AssignmentTrackingTest, InstrMethods) { // Use SetVectors to check that the attachments and markers are unique // (another test requirement). SetVector OrigIDs; - SetVector Markers; + SetVector Markers; for (const Instruction *SI : Stores) { Metadata *ID = SI->getMetadata(LLVMContext::MD_DIAssignID); ASSERT_TRUE(OrigIDs.insert(ID)); ASSERT_TRUE(ID != nullptr); - auto Range = at::getDVRAssignmentMarkers(SI); + auto Range = at::getAssignmentMarkers(SI); ASSERT_TRUE(std::distance(Range.begin(), Range.end()) == 1); ASSERT_TRUE(Markers.insert(*Range.begin())); } @@ -869,8 +867,6 @@ TEST(AssignmentTrackingTest, InstrMethods) { // dbg.values that have been converted to a non-instruction format. TEST(MetadataTest, ConvertDbgToDbgVariableRecord) { LLVMContext C; - bool OldDbgValueMode = UseNewDbgInfoFormat; - UseNewDbgInfoFormat = false; std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { call void @llvm.dbg.value(metadata i16 %a, metadata !9, metadata !DIExpression()), !dbg !11 @@ -1045,14 +1041,14 @@ TEST(MetadataTest, ConvertDbgToDbgVariableRecord) { // The record of those trailing DbgVariableRecords would dangle and cause an // assertion failure if it lived until the end of the LLVMContext. ExitBlock->deleteTrailingDbgRecords(); - UseNewDbgInfoFormat = OldDbgValueMode; } TEST(MetadataTest, DbgVariableRecordConversionRoutines) { LLVMContext C; - bool OldDbgValueMode = UseNewDbgInfoFormat; - UseNewDbgInfoFormat = false; + // For the purpose of this test, set and un-set the command line option + // corresponding to UseNewDbgInfoFormat. + UseNewDbgInfoFormat = true; std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { @@ -1083,11 +1079,6 @@ TEST(MetadataTest, DbgVariableRecordConversionRoutines) { !11 = !DILocation(line: 1, column: 1, scope: !6) )"); - // For the purpose of this test, set and un-set the command line option - // corresponding to UseNewDbgInfoFormat, but only after parsing, to ensure - // that the IR starts off in the old format. - UseNewDbgInfoFormat = true; - // Check that the conversion routines and utilities between dbg.value // debug-info format and DbgVariableRecords works. Function *F = M->getFunction("f"); @@ -1192,7 +1183,7 @@ TEST(MetadataTest, DbgVariableRecordConversionRoutines) { EXPECT_EQ(DVI2->getVariable(), DLV2); EXPECT_EQ(DVI2->getExpression(), Expr2); - UseNewDbgInfoFormat = OldDbgValueMode; + UseNewDbgInfoFormat = false; } // Test that the hashing function for DISubprograms representing methods produce diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp index ff96df85812002..2001df090aed53 100644 --- a/llvm/unittests/IR/IRBuilderTest.cpp +++ b/llvm/unittests/IR/IRBuilderTest.cpp @@ -994,17 +994,17 @@ TEST_F(IRBuilderTest, DIBuilder) { EXPECT_TRUE(verifyModule(*M)); }; - // Test in new-debug mode. - EXPECT_TRUE(M->IsNewDbgInfoFormat); + // Test in old-debug mode. + EXPECT_FALSE(M->IsNewDbgInfoFormat); RunTest(); - // Test in old-debug mode. - // Reset the test then call convertFromNewDbgValues to flip the flag + // Test in new-debug mode. + // Reset the test then call convertToNewDbgValues to flip the flag // on the test's Module, Function and BasicBlock. TearDown(); SetUp(); - M->convertFromNewDbgValues(); - EXPECT_FALSE(M->IsNewDbgInfoFormat); + M->convertToNewDbgValues(); + EXPECT_TRUE(M->IsNewDbgInfoFormat); RunTest(); } diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp index b6044b28629204..b47c73f0b329ae 100644 --- a/llvm/unittests/IR/InstructionsTest.cpp +++ b/llvm/unittests/IR/InstructionsTest.cpp @@ -25,15 +25,12 @@ #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/SourceMgr.h" #include "llvm-c/Core.h" #include "gmock/gmock-matchers.h" #include "gtest/gtest.h" #include -extern llvm::cl::opt UseNewDbgInfoFormat; - namespace llvm { namespace { @@ -1463,8 +1460,6 @@ TEST(InstructionsTest, GetSplat) { TEST(InstructionsTest, SkipDebug) { LLVMContext C; - bool OldDbgValueMode = UseNewDbgInfoFormat; - UseNewDbgInfoFormat = false; std::unique_ptr M = parseIR(C, R"( declare void @llvm.dbg.value(metadata, metadata, metadata) @@ -1500,7 +1495,6 @@ TEST(InstructionsTest, SkipDebug) { // After the terminator, there are no non-debug instructions. EXPECT_EQ(nullptr, Term->getNextNonDebugInstruction()); - UseNewDbgInfoFormat = OldDbgValueMode; } TEST(InstructionsTest, PhiMightNotBeFPMathOperator) { diff --git a/llvm/unittests/IR/ValueTest.cpp b/llvm/unittests/IR/ValueTest.cpp index 33a86d510d45cb..246c2fc7fe4063 100644 --- a/llvm/unittests/IR/ValueTest.cpp +++ b/llvm/unittests/IR/ValueTest.cpp @@ -13,7 +13,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" using namespace llvm; @@ -256,8 +255,6 @@ TEST(ValueTest, getLocalSlotDeath) { TEST(ValueTest, replaceUsesOutsideBlock) { // Check that Value::replaceUsesOutsideBlock(New, BB) replaces uses outside // BB, including dbg.* uses of MetadataAsValue(ValueAsMetadata(this)). - bool OldDbgValueMode = UseNewDbgInfoFormat; - UseNewDbgInfoFormat = false; const auto *IR = R"( define i32 @f() !dbg !6 { entry: @@ -318,7 +315,6 @@ TEST(ValueTest, replaceUsesOutsideBlock) { // These users are outside Entry so should be changed. ASSERT_TRUE(ExitDbg->getValue(0) == cast(B)); ASSERT_TRUE(Ret->getOperand(0) == cast(B)); - UseNewDbgInfoFormat = OldDbgValueMode; } TEST(ValueTest, replaceUsesOutsideBlockDbgVariableRecord) { @@ -363,6 +359,10 @@ TEST(ValueTest, replaceUsesOutsideBlockDbgVariableRecord) { if (!M) Err.print("ValueTest", errs()); + bool OldDbgValueMode = UseNewDbgInfoFormat; + UseNewDbgInfoFormat = true; + M->convertToNewDbgValues(); + auto GetNext = [](auto *I) { return &*++I->getIterator(); }; Function *F = M->getFunction("f"); @@ -389,6 +389,7 @@ TEST(ValueTest, replaceUsesOutsideBlockDbgVariableRecord) { EXPECT_TRUE(DVR1->getVariableLocationOp(0) == cast(A)); // These users are outside Entry so should be changed. EXPECT_TRUE(DVR2->getVariableLocationOp(0) == cast(B)); + UseNewDbgInfoFormat = OldDbgValueMode; } } // end anonymous namespace diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 22a8a05ef3719a..128321fc3ae731 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -312,8 +312,6 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { } TEST(ParseArchString, RejectsUnrecognizedExtensionNamesByDefault) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString("rv64ib", true).takeError()), - "unsupported standard user-level extension 'b'"); EXPECT_EQ( toString( RISCVISAInfo::parseArchString("rv32i_zmadeup", true).takeError()), @@ -326,9 +324,6 @@ TEST(ParseArchString, RejectsUnrecognizedExtensionNamesByDefault) { toString( RISCVISAInfo::parseArchString("rv64g_xmadeup", true).takeError()), "unsupported non-standard user-level extension 'xmadeup'"); - EXPECT_EQ( - toString(RISCVISAInfo::parseArchString("rv64ib1p0", true).takeError()), - "unsupported standard user-level extension 'b'"); EXPECT_EQ( toString( RISCVISAInfo::parseArchString("rv32i_zmadeup1p0", true).takeError()), @@ -344,8 +339,7 @@ TEST(ParseArchString, RejectsUnrecognizedExtensionNamesByDefault) { } TEST(ParseArchString, IgnoresUnrecognizedExtensionNamesWithIgnoreUnknown) { - for (StringRef Input : {"rv32ib", "rv32i_zmadeup", - "rv64i_smadeup", "rv64i_xmadeup"}) { + for (StringRef Input : {"rv32i_zmadeup", "rv64i_smadeup", "rv64i_xmadeup"}) { auto MaybeISAInfo = RISCVISAInfo::parseArchString(Input, true, false, true); ASSERT_THAT_EXPECTED(MaybeISAInfo, Succeeded()); RISCVISAInfo &Info = **MaybeISAInfo; @@ -913,6 +907,7 @@ R"(All available -march extensions for RISC-V f 2.2 d 2.2 c 2.0 + b 1.0 v 1.0 h 1.0 zic64b 1.0 diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index b2e57f2cca13ed..df5ee8a33350e7 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1536,7 +1536,7 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_FCMA, AArch64::AEK_PAUTH}), "8.4-A"), ARMCPUTestParams( - "apple-a14", "armv8.5-a", "crypto-neon-fp-armv8", + "apple-a14", "armv8.4-a", "crypto-neon-fp-armv8", AArch64::ExtensionBitset( {AArch64::AEK_CRC, AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_SHA3, AArch64::AEK_FP, AArch64::AEK_SIMD, @@ -1544,7 +1544,7 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_RCPC, AArch64::AEK_DOTPROD, AArch64::AEK_FP16, AArch64::AEK_FP16FML, AArch64::AEK_SHA3, AArch64::AEK_JSCVT, AArch64::AEK_FCMA, AArch64::AEK_PAUTH}), - "8.5-A"), + "8.4-A"), ARMCPUTestParams( "apple-a15", "armv8.6-a", "crypto-neon-fp-armv8", AArch64::ExtensionBitset( @@ -1579,7 +1579,7 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_PAUTH}), "8.6-A"), ARMCPUTestParams( - "apple-m1", "armv8.5-a", "crypto-neon-fp-armv8", + "apple-m1", "armv8.4-a", "crypto-neon-fp-armv8", AArch64::ExtensionBitset( {AArch64::AEK_CRC, AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_SHA3, AArch64::AEK_FP, AArch64::AEK_SIMD, @@ -1587,7 +1587,7 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_RCPC, AArch64::AEK_DOTPROD, AArch64::AEK_FP16, AArch64::AEK_FP16FML, AArch64::AEK_SHA3, AArch64::AEK_JSCVT, AArch64::AEK_FCMA, AArch64::AEK_PAUTH}), - "8.5-A"), + "8.4-A"), ARMCPUTestParams( "apple-m2", "armv8.6-a", "crypto-neon-fp-armv8", AArch64::ExtensionBitset( diff --git a/llvm/unittests/Transforms/Utils/CloningTest.cpp b/llvm/unittests/Transforms/Utils/CloningTest.cpp index 1d0d56a2099ceb..5e302d9c0a0d3e 100644 --- a/llvm/unittests/Transforms/Utils/CloningTest.cpp +++ b/llvm/unittests/Transforms/Utils/CloningTest.cpp @@ -844,9 +844,8 @@ TEST(CloneFunction, CloneFunctionWithInlinedSubprograms) { EXPECT_FALSE(verifyModule(*ImplModule, &errs())); // Check that DILexicalBlock of inlined function was not cloned. - auto DbgDeclareI = Func->begin()->begin()->getDbgRecordRange().begin(); - auto ClonedDbgDeclareI = - ClonedFunc->begin()->begin()->getDbgRecordRange().begin(); + auto DbgDeclareI = Func->begin()->begin(); + auto ClonedDbgDeclareI = ClonedFunc->begin()->begin(); const DebugLoc &DbgLoc = DbgDeclareI->getDebugLoc(); const DebugLoc &ClonedDbgLoc = ClonedDbgDeclareI->getDebugLoc(); EXPECT_NE(DbgLoc.get(), ClonedDbgLoc.get()); diff --git a/llvm/unittests/Transforms/Utils/LocalTest.cpp b/llvm/unittests/Transforms/Utils/LocalTest.cpp index 316d59a9d22969..9b1176765c17f1 100644 --- a/llvm/unittests/Transforms/Utils/LocalTest.cpp +++ b/llvm/unittests/Transforms/Utils/LocalTest.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/PostDominators.h" @@ -27,27 +26,6 @@ using namespace llvm; -extern llvm::cl::opt UseNewDbgInfoFormat; -extern cl::opt PreserveInputDbgFormat; -extern bool WriteNewDbgInfoFormatToBitcode; -extern cl::opt WriteNewDbgInfoFormat; - -// Backup all of the existing settings that may be modified when -// PreserveInputDbgFormat=true, so that when the test is finished we return them -// (and the "preserve" setting) to their original values. -static auto SaveDbgInfoFormat() { - return make_scope_exit( - [OldPreserveInputDbgFormat = PreserveInputDbgFormat.getValue(), - OldUseNewDbgInfoFormat = UseNewDbgInfoFormat.getValue(), - OldWriteNewDbgInfoFormatToBitcode = WriteNewDbgInfoFormatToBitcode, - OldWriteNewDbgInfoFormat = WriteNewDbgInfoFormat.getValue()] { - PreserveInputDbgFormat = OldPreserveInputDbgFormat; - UseNewDbgInfoFormat = OldUseNewDbgInfoFormat; - WriteNewDbgInfoFormatToBitcode = OldWriteNewDbgInfoFormatToBitcode; - WriteNewDbgInfoFormat = OldWriteNewDbgInfoFormat; - }); -} - TEST(Local, RecursivelyDeleteDeadPHINodes) { LLVMContext C; @@ -138,6 +116,7 @@ static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { TEST(Local, ReplaceDbgDeclare) { LLVMContext C; + // Original C source to get debug info for a local variable: // void f() { int x; } std::unique_ptr M = parseIR(C, @@ -145,11 +124,11 @@ TEST(Local, ReplaceDbgDeclare) { define void @f() !dbg !8 { entry: %x = alloca i32, align 4 - #dbg_declare(ptr %x, !11, !DIExpression(), !13) - #dbg_declare(ptr %x, !11, !DIExpression(), !13) + call void @llvm.dbg.declare(metadata i32* %x, metadata !11, metadata !DIExpression()), !dbg !13 + call void @llvm.dbg.declare(metadata i32* %x, metadata !11, metadata !DIExpression()), !dbg !13 ret void, !dbg !14 } - + declare void @llvm.dbg.declare(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) @@ -172,18 +151,20 @@ TEST(Local, ReplaceDbgDeclare) { Instruction *Inst = &F->front().front(); auto *AI = dyn_cast(Inst); ASSERT_TRUE(AI); - + Inst = Inst->getNextNode()->getNextNode(); + ASSERT_TRUE(Inst); + auto *DII = dyn_cast(Inst); + ASSERT_TRUE(DII); Value *NewBase = Constant::getNullValue(PointerType::getUnqual(C)); DIBuilder DIB(*M); replaceDbgDeclare(AI, NewBase, DIB, DIExpression::ApplyOffset, 0); - // There should be exactly two dbg.declares, attached to the terminator. - Inst = F->front().getTerminator(); - ASSERT_TRUE(Inst); - EXPECT_TRUE(Inst->hasDbgRecords()); - EXPECT_EQ(range_size(Inst->getDbgRecordRange()), 2u); - for (DbgVariableRecord &DVR : filterDbgVars(Inst->getDbgRecordRange())) - EXPECT_EQ(DVR.getAddress(), NewBase); + // There should be exactly two dbg.declares. + int Declares = 0; + for (const Instruction &I : F->front()) + if (isa(I)) + Declares++; + EXPECT_EQ(2, Declares); } /// Build the dominator tree for the function and run the Test. @@ -518,10 +499,11 @@ struct SalvageDebugInfoTest : ::testing::Test { entry: %x = add i32 0, 1 %y = add i32 %x, 2 - #dbg_value(i32 %x, !11, !DIExpression(), !13) - #dbg_value(i32 %y, !11, !DIExpression(), !13) + call void @llvm.dbg.value(metadata i32 %x, metadata !11, metadata !DIExpression()), !dbg !13 + call void @llvm.dbg.value(metadata i32 %y, metadata !11, metadata !DIExpression()), !dbg !13 ret void, !dbg !14 } + declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) @@ -544,47 +526,48 @@ struct SalvageDebugInfoTest : ::testing::Test { ASSERT_TRUE(F); } - bool doesDebugValueDescribeX(const DbgVariableRecord &DVR) { - if (DVR.getNumVariableLocationOps() != 1u) + bool doesDebugValueDescribeX(const DbgValueInst &DI) { + if (DI.getNumVariableLocationOps() != 1u) return false; - const auto &CI = *cast(DVR.getValue(0)); + const auto &CI = *cast(DI.getValue(0)); if (CI.isZero()) - return DVR.getExpression()->getElements().equals( + return DI.getExpression()->getElements().equals( {dwarf::DW_OP_plus_uconst, 1, dwarf::DW_OP_stack_value}); else if (CI.isOneValue()) - return DVR.getExpression()->getElements().empty(); + return DI.getExpression()->getElements().empty(); return false; } - bool doesDebugValueDescribeY(const DbgVariableRecord &DVR) { - if (DVR.getNumVariableLocationOps() != 1u) + bool doesDebugValueDescribeY(const DbgValueInst &DI) { + if (DI.getNumVariableLocationOps() != 1u) return false; - const auto &CI = *cast(DVR.getVariableLocationOp(0)); + const auto &CI = *cast(DI.getVariableLocationOp(0)); if (CI.isZero()) - return DVR.getExpression()->getElements().equals( + return DI.getExpression()->getElements().equals( {dwarf::DW_OP_plus_uconst, 3, dwarf::DW_OP_stack_value}); else if (CI.isOneValue()) - return DVR.getExpression()->getElements().equals( + return DI.getExpression()->getElements().equals( {dwarf::DW_OP_plus_uconst, 2, dwarf::DW_OP_stack_value}); return false; } void verifyDebugValuesAreSalvaged() { - // The function should only contain debug values and a terminator. - EXPECT_EQ(F->size(), 1u); - EXPECT_TRUE(F->begin()->begin()->isTerminator()); - // Check that the debug values for %x and %y are preserved. bool FoundX = false; bool FoundY = false; - for (DbgVariableRecord &DVR : - filterDbgVars(F->begin()->begin()->getDbgRecordRange())) { - EXPECT_EQ(DVR.getVariable()->getName(), "x"); - FoundX |= doesDebugValueDescribeX(DVR); - FoundY |= doesDebugValueDescribeY(DVR); + for (const Instruction &I : F->front()) { + auto DI = dyn_cast(&I); + if (!DI) { + // The function should only contain debug values and a terminator. + ASSERT_TRUE(I.isTerminator()); + continue; + } + EXPECT_EQ(DI->getVariable()->getName(), "x"); + FoundX |= doesDebugValueDescribeX(*DI); + FoundY |= doesDebugValueDescribeY(*DI); } - EXPECT_TRUE(FoundX); - EXPECT_TRUE(FoundY); + ASSERT_TRUE(FoundX); + ASSERT_TRUE(FoundY); } }; @@ -607,12 +590,6 @@ TEST_F(SalvageDebugInfoTest, RecursiveBlockSimplification) { TEST(Local, wouldInstructionBeTriviallyDead) { LLVMContext Ctx; - // FIXME: PreserveInputDbgFormat is set to true because this test has - // been written to expect debug intrinsics rather than debug records. - // TODO: This test doesn't have a DbgRecord equivalent form so delete - // it when debug intrinsics are removed. - auto SettingGuard = SaveDbgInfoFormat(); - PreserveInputDbgFormat = cl::boolOrDefault::BOU_TRUE; std::unique_ptr M = parseIR(Ctx, R"( define dso_local void @fun() local_unnamed_addr #0 !dbg !9 { @@ -706,10 +683,12 @@ TEST(Local, FindDbgUsers) { R"( define dso_local void @fun(ptr %a) #0 !dbg !11 { entry: - #dbg_assign(ptr %a, !16, !DIExpression(), !15, ptr %a, !DIExpression(), !19) + call void @llvm.dbg.assign(metadata ptr %a, metadata !16, metadata !DIExpression(), metadata !15, metadata ptr %a, metadata !DIExpression()), !dbg !19 ret void } + declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2, !3, !9} !llvm.ident = !{!10} @@ -736,13 +715,9 @@ TEST(Local, FindDbgUsers) { verifyModule(*M, &errs(), &BrokenDebugInfo); ASSERT_FALSE(BrokenDebugInfo); - // Convert to debug intrinsics as we want to test findDbgUsers and - // findDbgValue's debug-intrinsic-finding code here. - // TODO: Remove this test when debug intrinsics are removed. - M->convertFromNewDbgValues(); - Function &Fun = *cast(M->getNamedValue("fun")); Value *Arg = Fun.getArg(0); + SmallVector Users; // Arg (%a) is used twice by a single dbg.assign. Check findDbgUsers returns // only 1 pointer to it rather than 2. @@ -763,7 +738,7 @@ TEST(Local, FindDbgRecords) { R"( define dso_local void @fun(ptr %a) #0 !dbg !11 { entry: - #dbg_assign(ptr %a, !16, !DIExpression(), !15, ptr %a, !DIExpression(), !19) + call void @llvm.dbg.assign(metadata ptr %a, metadata !16, metadata !DIExpression(), metadata !15, metadata ptr %a, metadata !DIExpression()), !dbg !19 ret void } @@ -792,6 +767,9 @@ TEST(Local, FindDbgRecords) { bool BrokenDebugInfo = true; verifyModule(*M, &errs(), &BrokenDebugInfo); ASSERT_FALSE(BrokenDebugInfo); + bool NewDbgInfoFormat = UseNewDbgInfoFormat; + UseNewDbgInfoFormat = true; + M->convertToNewDbgValues(); Function &Fun = *cast(M->getNamedValue("fun")); Value *Arg = Fun.getArg(0); @@ -811,10 +789,12 @@ TEST(Local, FindDbgRecords) { findDbgValues(Vals, Arg, &Records); EXPECT_EQ(Vals.size(), 0u); EXPECT_EQ(Records.size(), 1u); + UseNewDbgInfoFormat = NewDbgInfoFormat; } TEST(Local, ReplaceAllDbgUsesWith) { using namespace llvm::dwarf; + LLVMContext Ctx; // Note: The datalayout simulates Darwin/x86_64. @@ -827,36 +807,39 @@ TEST(Local, ReplaceAllDbgUsesWith) { define void @f() !dbg !6 { entry: %a = add i32 0, 1, !dbg !15 + call void @llvm.dbg.value(metadata i32 %a, metadata !9, metadata !DIExpression()), !dbg !15 - #dbg_value(i32 %a, !9, !DIExpression(), !15) %b = add i64 0, 1, !dbg !16 + call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression()), !dbg !16 + call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression(DW_OP_lit0, DW_OP_mul)), !dbg !16 + call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_stack_value)), !dbg !16 + call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 8)), !dbg !16 + call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_LLVM_fragment, 0, 8)), !dbg !16 + call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_stack_value, DW_OP_LLVM_fragment, 0, 8)), !dbg !16 - #dbg_value(i64 %b, !11, !DIExpression(), !16) - #dbg_value(i64 %b, !11, !DIExpression(DW_OP_lit0, DW_OP_mul), !16) - #dbg_value(i64 %b, !11, !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_stack_value), !16) - #dbg_value(i64 %b, !11, !DIExpression(DW_OP_LLVM_fragment, 0, 8), !16) - #dbg_value(i64 %b, !11, !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_LLVM_fragment, 0, 8), !16) - #dbg_value(i64 %b, !11, !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_stack_value, DW_OP_LLVM_fragment, 0, 8), !16) - %c = inttoptr i64 0 to ptr, !dbg !17 + %c = inttoptr i64 0 to i64*, !dbg !17 + call void @llvm.dbg.declare(metadata i64* %c, metadata !13, metadata !DIExpression()), !dbg !17 - #dbg_declare(ptr %c, !13, !DIExpression(), !17) - %d = inttoptr i64 0 to ptr, !dbg !18 + %d = inttoptr i64 0 to i32*, !dbg !18 + call void @llvm.dbg.declare(metadata i32* %d, metadata !20, metadata !DIExpression()), !dbg !18 - #dbg_declare(ptr %d, !20, !DIExpression(), !18) %e = add <2 x i16> zeroinitializer, zeroinitializer + call void @llvm.dbg.value(metadata <2 x i16> %e, metadata !14, metadata !DIExpression()), !dbg !18 - #dbg_value(<2 x i16> %e, !14, !DIExpression(), !18) %f = call i32 @escape(i32 0) + call void @llvm.dbg.value(metadata i32 %f, metadata !9, metadata !DIExpression()), !dbg !15 - #dbg_value(i32 %f, !9, !DIExpression(), !15) %barrier = call i32 @escape(i32 0) %g = call i32 @escape(i32 %f) + call void @llvm.dbg.value(metadata i32 %g, metadata !9, metadata !DIExpression()), !dbg !15 - #dbg_value(i32 %g, !9, !DIExpression(), !15) ret void, !dbg !19 } + declare void @llvm.dbg.declare(metadata, metadata, metadata) + declare void @llvm.dbg.value(metadata, metadata, metadata) + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!5} @@ -911,47 +894,38 @@ TEST(Local, ReplaceAllDbgUsesWith) { EXPECT_TRUE(replaceAllDbgUsesWith(D, C, C, DT)); SmallVector CDbgVals; - SmallVector CDbgRecords; - findDbgUsers(CDbgVals, &C, &CDbgRecords); - EXPECT_EQ(0U, CDbgVals.size()); - EXPECT_EQ(2U, CDbgRecords.size()); - EXPECT_TRUE(all_of( - CDbgRecords, [](DbgVariableRecord *DVR) { return DVR->isDbgDeclare(); })); + findDbgUsers(CDbgVals, &C); + EXPECT_EQ(2U, CDbgVals.size()); + EXPECT_TRUE(all_of(CDbgVals, [](DbgVariableIntrinsic *DII) { + return isa(DII); + })); EXPECT_TRUE(replaceAllDbgUsesWith(C, D, D, DT)); SmallVector DDbgVals; - SmallVector DDbgRecords; - findDbgUsers(DDbgVals, &D, &DDbgRecords); - EXPECT_EQ(0U, DDbgVals.size()); - EXPECT_EQ(2U, DDbgRecords.size()); - EXPECT_TRUE(all_of( - DDbgRecords, [](DbgVariableRecord *DVR) { return DVR->isDbgDeclare(); })); + findDbgUsers(DDbgVals, &D); + EXPECT_EQ(2U, DDbgVals.size()); + EXPECT_TRUE(all_of(DDbgVals, [](DbgVariableIntrinsic *DII) { + return isa(DII); + })); // Introduce a use-before-def. Check that the dbg.value for %a is salvaged. EXPECT_TRUE(replaceAllDbgUsesWith(A, F_, F_, DT)); - EXPECT_FALSE(A.hasDbgRecords()); - EXPECT_TRUE(B.hasDbgRecords()); - DbgVariableRecord *BDbgVal = - cast(&*B.getDbgRecordRange().begin()); - EXPECT_EQ(BDbgVal->getNumVariableLocationOps(), 1u); - EXPECT_EQ(ConstantInt::get(A.getType(), 0), - BDbgVal->getVariableLocationOp(0)); + auto *ADbgVal = cast(A.getNextNode()); + EXPECT_EQ(ADbgVal->getNumVariableLocationOps(), 1u); + EXPECT_EQ(ConstantInt::get(A.getType(), 0), ADbgVal->getVariableLocationOp(0)); // Introduce a use-before-def. Check that the dbg.values for %f become undef. EXPECT_TRUE(replaceAllDbgUsesWith(F_, G, G, DT)); - DbgVariableRecord *BarrierDbgVal = - cast(&*Barrier.getDbgRecordRange().begin()); - EXPECT_EQ(BarrierDbgVal->getNumVariableLocationOps(), 1u); - EXPECT_TRUE(BarrierDbgVal->isKillLocation()); + auto *FDbgVal = cast(F_.getNextNode()); + EXPECT_EQ(FDbgVal->getNumVariableLocationOps(), 1u); + EXPECT_TRUE(FDbgVal->isKillLocation()); - SmallVector BarrierDbgVals; - SmallVector BarrierDbgRecs; - findDbgValues(BarrierDbgVals, &F_, &BarrierDbgRecs); - EXPECT_EQ(0U, BarrierDbgVals.size()); - EXPECT_EQ(0U, BarrierDbgRecs.size()); + SmallVector FDbgVals; + findDbgValues(FDbgVals, &F_); + EXPECT_EQ(0U, FDbgVals.size()); // Simulate i32 -> i64 conversion to test sign-extension. Here are some // interesting cases to handle: @@ -961,15 +935,13 @@ TEST(Local, ReplaceAllDbgUsesWith) { // 4-6) like (1-3), but with a fragment EXPECT_TRUE(replaceAllDbgUsesWith(B, A, A, DT)); - SmallVector BDbgVals; - SmallVector BDbgRecs; - findDbgValues(BDbgVals, &A, &BDbgRecs); - EXPECT_EQ(0U, BDbgVals.size()); - EXPECT_EQ(6U, BDbgRecs.size()); + SmallVector ADbgVals; + findDbgValues(ADbgVals, &A); + EXPECT_EQ(6U, ADbgVals.size()); // Check that %a has a dbg.value with a DIExpression matching \p Ops. auto hasADbgVal = [&](ArrayRef Ops) { - return any_of(BDbgRecs, [&](DbgVariableRecord *DVI) { + return any_of(ADbgVals, [&](DbgValueInst *DVI) { assert(DVI->getVariable()->getName() == "2"); return DVI->getExpression()->getElements() == Ops; }); @@ -1372,11 +1344,6 @@ TEST(Local, ExpressionForConstant) { TEST(Local, ReplaceDbgVariableRecord) { LLVMContext C; - // FIXME: PreserveInputDbgFormat is set to true because this test has - // been written to expect debug intrinsics rather than debug records; use the - // intrinsic format until we update the test checks. - auto SettingGuard = SaveDbgInfoFormat(); - PreserveInputDbgFormat = cl::boolOrDefault::BOU_TRUE; // Test that RAUW also replaces the operands of DbgVariableRecord objects, // i.e. non-instruction stored debugging information. diff --git a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn index e16ca31b81a8d3..780a69f1f3299b 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn @@ -18,6 +18,7 @@ unittest("ClangAnalysisFlowSensitiveTests") { "//llvm/lib/Testing/Support", ] sources = [ + "ASTOpsTest.cpp", "ArenaTest.cpp", "CFGMatchSwitchTest.cpp", "ChromiumCheckModelTest.cpp", diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index ab3ae4ce8186c6..9d1ec8d2f7d312 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -347,6 +347,7 @@ if (current_toolchain == default_toolchain) { "__chrono/year_month.h", "__chrono/year_month_day.h", "__chrono/year_month_weekday.h", + "__chrono/zoned_time.h", "__compare/common_comparison_category.h", "__compare/compare_partial_order_fallback.h", "__compare/compare_strong_order_fallback.h", diff --git a/llvm/utils/lit/tests/xunit-output.py b/llvm/utils/lit/tests/xunit-output.py index 92b693256c69b2..67d99849fe36d9 100644 --- a/llvm/utils/lit/tests/xunit-output.py +++ b/llvm/utils/lit/tests/xunit-output.py @@ -1,4 +1,4 @@ -# REQUIRES: shell +# UNSUPPORTED: system-windows # Check xunit output # RUN: rm -rf %t.xunit.xml diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md index a355d5a90e4d1b..69781bb868bbf8 100644 --- a/mlir/docs/DialectConversion.md +++ b/mlir/docs/DialectConversion.md @@ -372,19 +372,23 @@ class TypeConverter { From the perspective of type conversion, the types of block arguments are a bit special. Throughout the conversion process, blocks may move between regions of different operations. Given this, the conversion of the types for blocks must be -done explicitly via a conversion pattern. To convert the types of block -arguments within a Region, a custom hook on the `ConversionPatternRewriter` must -be invoked; `convertRegionTypes`. This hook uses a provided type converter to -apply type conversions to all blocks within a given region, and all blocks that -move into that region. As noted above, the conversions performed by this method -use the argument materialization hook on the `TypeConverter`. This hook also -takes an optional `TypeConverter::SignatureConversion` parameter that applies a -custom conversion to the entry block of the region. The types of the entry block -arguments are often tied semantically to details on the operation, e.g. func::FuncOp, -AffineForOp, etc. To convert the signature of just the region entry block, and -not any other blocks within the region, the `applySignatureConversion` hook may -be used instead. A signature conversion, `TypeConverter::SignatureConversion`, -can be built programmatically: +done explicitly via a conversion pattern. + +To convert the types of block arguments within a Region, a custom hook on the +`ConversionPatternRewriter` must be invoked; `convertRegionTypes`. This hook +uses a provided type converter to apply type conversions to all blocks of a +given region. As noted above, the conversions performed by this method use the +argument materialization hook on the `TypeConverter`. This hook also takes an +optional `TypeConverter::SignatureConversion` parameter that applies a custom +conversion to the entry block of the region. The types of the entry block +arguments are often tied semantically to the operation, e.g., +`func::FuncOp`, `AffineForOp`, etc. + +To convert the signature of just one given block, the +`applySignatureConversion` hook can be used. + +A signature conversion, `TypeConverter::SignatureConversion`, can be built +programmatically: ```c++ class SignatureConversion { diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h index 48b7835ae5fca6..2d5e9d27c5bdfc 100644 --- a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h +++ b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h @@ -46,9 +46,6 @@ class LLVMDialect; #define GEN_PASS_DECL_GPUTOLLVMCONVERSIONPASS #include "mlir/Conversion/Passes.h.inc" -using OwnedBlob = std::unique_ptr>; -using BlobGenerator = - std::function; using LoweringCallback = std::function( Operation *, llvm::LLVMContext &, StringRef)>; @@ -66,10 +63,9 @@ struct FunctionCallBuilder { /// Collect a set of patterns to convert from the GPU dialect to LLVM and /// populate converter for gpu types. -void populateGpuToLLVMConversionPatterns( - LLVMTypeConverter &converter, RewritePatternSet &patterns, - StringRef gpuBinaryAnnotation = {}, bool kernelBarePtrCallConv = false, - SymbolTable *cachedModuleTable = nullptr); +void populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter, + RewritePatternSet &patterns, + bool kernelBarePtrCallConv = false); /// A function that maps a MemorySpace enum to a target-specific integer value. using MemorySpaceMapping = std::function; diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index eb58f4adc31d36..db67d6a5ff1287 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -478,11 +478,7 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> { /*default=*/"false", "Use bare pointers to pass memref arguments to kernels. " "The kernel must use the same setting for this option." - >, - Option<"gpuBinaryAnnotation", "gpu-binary-annotation", "std::string", - /*default=*/"gpu::getDefaultGpuBinaryAnnotation()", - "Annotation attribute string for GPU binary" - > + > ]; let dependentDialects = [ diff --git a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td index 550c5c0cf4f60f..1517f71f1a7c90 100644 --- a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td @@ -40,9 +40,14 @@ def ArithIntRangeOpts : Pass<"int-range-optimizations"> { let summary = "Do optimizations based on integer range analysis"; let description = [{ This pass runs integer range analysis and apllies optimizations based on its - results. e.g. replace arith.cmpi with const if it can be inferred from - args ranges. + results. It replaces operations with known-constant results with said constants, + rewrites `(0 <= %x < D) mod D` to `%x`. }]; + // Explicitly depend on "arith" because this pass could create operations in + // `arith` out of thin air in some cases. + let dependentDialects = [ + "::mlir::arith::ArithDialect" + ]; } def ArithEmulateUnsupportedFloats : Pass<"arith-emulate-unsupported-floats"> { diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h index 3cf81d2e58f21c..04a6386a199de4 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h @@ -17,9 +17,13 @@ #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/TensorEncoding.h" +#include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/InferTypeOpInterface.h" +#include "mlir/Interfaces/LoopLikeInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +#include "llvm/ADT/bit.h" + //===----------------------------------------------------------------------===// // // Type aliases to help code be more self-documenting. Unfortunately @@ -54,6 +58,42 @@ struct COOSegment { } }; +/// A simple wrapper to encode a bitset of (at most 64) levels, currently used +/// by `sparse_tensor.iterate` operation for the set of levels on which the +/// coordinates should be loaded. +class LevelSet { + uint64_t bits = 0; + +public: + LevelSet() = default; + explicit LevelSet(uint64_t bits) : bits(bits) {} + operator uint64_t() const { return bits; } + + LevelSet &set(unsigned i) { + assert(i < 64); + bits |= static_cast(0x01u) << i; + return *this; + } + + LevelSet &operator|=(LevelSet lhs) { + bits |= static_cast(lhs); + return *this; + } + + LevelSet &lshift(unsigned offset) { + bits = bits << offset; + return *this; + } + + bool operator[](unsigned i) const { + assert(i < 64); + return (bits & (1 << i)) != 0; + } + + unsigned count() const { return llvm::popcount(bits); } + bool empty() const { return bits == 0; } +}; + } // namespace sparse_tensor } // namespace mlir diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td index 53dd8e39438cc6..69b212cce4ceba 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td @@ -19,6 +19,21 @@ class SparseTensor_Attr traits = []> : AttrDef; +//===----------------------------------------------------------------------===// +// A simple bitset attribute wrapped around a single int64_t to encode a set of +// sparse tensor levels. +//===----------------------------------------------------------------------===// + +def LevelSetAttr : + TypedAttrBase< + I64, "IntegerAttr", + And<[CPred<"::llvm::isa<::mlir::IntegerAttr>($_self)">, + CPred<"::llvm::cast<::mlir::IntegerAttr>($_self).getType().isInteger(64)">]>, + "LevelSet attribute"> { + let returnType = [{::mlir::sparse_tensor::LevelSet}]; + let convertFromStorage = [{::mlir::sparse_tensor::LevelSet($_self.getValue().getZExtValue())}]; +} + //===----------------------------------------------------------------------===// // These attributes are just like `IndexAttr` except that they clarify whether // the index refers to a dimension (an axis of the semantic tensor) or a level diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 4e4441c640ed95..5ae6f9f3443f8c 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -15,6 +15,8 @@ include "mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td" include "mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/Interfaces/ControlFlowInterfaces.td" +include "mlir/Interfaces/LoopLikeInterface.td" //===----------------------------------------------------------------------===// // Base class. @@ -1304,7 +1306,7 @@ def SparseTensor_SelectOp : SparseTensor_Op<"select", [Pure, SameOperandsAndResu def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator, ParentOneOf<["BinaryOp", "UnaryOp", "ReduceOp", "SelectOp", - "ForeachOp"]>]> { + "ForeachOp", "IterateOp"]>]> { let summary = "Yield from sparse_tensor set-like operations"; let description = [{ Yields a value from within a `binary`, `unary`, `reduce`, @@ -1476,7 +1478,7 @@ def ExtractIterSpaceOp : SparseTensor_Op<"extract_iteration_space", the returned iteration space covers. `hiLvl - loLvl` defines the dimension of the iteration space. - The type of returned the value is automatically inferred to + The type of returned the value is must be `!sparse_tensor.iter_space<#INPUT_ENCODING, lvls = $loLvl to $hiLvl>`. The returned iteration space can then be iterated over by `sparse_tensor.iterate` operations to visit every stored element @@ -1487,6 +1489,7 @@ def ExtractIterSpaceOp : SparseTensor_Op<"extract_iteration_space", // Extracts a 1-D iteration space from a COO tensor at level 1. %space = sparse_tensor.iteration.extract_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0> + ->!sparse_tensor.iter_space<#COO, lvls = 1> ``` }]; @@ -1499,20 +1502,120 @@ def ExtractIterSpaceOp : SparseTensor_Op<"extract_iteration_space", return getHiLvl() - getLoLvl(); } ArrayRef<::mlir::sparse_tensor::LevelType> getSpaceLvlTypes() { - return getResultSpace().getType().getLvlTypes(); + return getExtractedSpace().getType().getLvlTypes(); } }]; let arguments = (ins AnySparseTensor:$tensor, Optional:$parentIter, LevelAttr:$loLvl, LevelAttr:$hiLvl); - let results = (outs AnySparseIterSpace:$resultSpace); + let results = (outs AnySparseIterSpace:$extractedSpace); let assemblyFormat = "$tensor (`at` $parentIter^)? `lvls` `=` custom($loLvl, $hiLvl) " - " attr-dict `:` type($tensor) (`,` type($parentIter)^)?"; + " attr-dict `:` type($tensor) (`,` type($parentIter)^)? " + "`->` qualified(type($extractedSpace))"; let hasVerifier = 1; } +def IterateOp : SparseTensor_Op<"iterate", + [RecursiveMemoryEffects, RecursivelySpeculatable, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, + SingleBlockImplicitTerminator<"sparse_tensor::YieldOp">]> { + + let summary = "Iterates over a sparse iteration space"; + let description = [{ + The `sparse_tensor.iterate` operation represents a loop (nest) over + the provided iteration space extracted from a specific sparse tensor. + The operation defines an SSA value for a sparse iterator that points + to the current stored element in the sparse tensor and SSA values + for coordinates of the stored element. The coordinates are always + converted to `index` type despite of the underlying sparse tensor + storage. When coordinates are not used, the SSA values can be skipped + by `_` symbols, which usually leads to simpler generated code after + sparsification. For example: + + ```mlir + // The coordinate for level 0 is not used when iterating over a 2-D + // iteration space. + %sparse_tensor.iterate %iterator in %space at(_, %crd_1) + : !sparse_tensor.iter_space<#CSR, lvls = 0 to 2> + ``` + + `sparse_tensor.iterate` can also operate on loop-carried variables. + It returns the final values after loop termination. + The initial values of the variables are passed as additional SSA operands + to the iterator SSA value and used coordinate SSA values mentioned + above. The operation region has an argument for the iterator, variadic + arguments for specified (used) coordiates and followed by one argument + for each loop-carried variable, representing the value of the variable + at the current iteration. + The body region must contain exactly one block that terminates with + `sparse_tensor.yield`. + + The results of an `sparse_tensor.iterate` hold the final values after + the last iteration. If the `sparse_tensor.iterate` defines any values, + a yield must be explicitly present. + The number and types of the `sparse_tensor.iterate` results must match + the initial values in the iter_args binding and the yield operands. + + + A nested `sparse_tensor.iterate` example that prints all the coordinates + stored in the sparse input: + + ```mlir + func.func @nested_iterate(%sp : tensor<4x8xf32, #COO>) { + // Iterates over the first level of %sp + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 + : tensor<4x8xf32, #COO> -> !sparse_tensor.iter_space<#COO, lvls = 0 to 1> + %r1 = sparse_tensor.iterate %it1 in %l1 at (%coord0) + : !sparse_tensor.iter_space<#COO, lvls = 0 to 1> { + // Iterates over the second level of %sp + %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 + : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0 to 1> + -> !sparse_tensor.iter_space<#COO, lvls = 1 to 2> + %r2 = sparse_tensor.iterate %it2 in %l2 at (coord1) + : !sparse_tensor.iter_space<#COO, lvls = 1 to 2> { + vector.print %coord0 : index + vector.print %coord1 : index + } + } + } + + ``` + }]; + + let arguments = (ins AnySparseIterSpace:$iterSpace, + Variadic:$initArgs, + LevelSetAttr:$crdUsedLvls); + let results = (outs Variadic:$results); + let regions = (region SizedRegion<1>:$region); + + let extraClassDeclaration = [{ + unsigned getSpaceDim() { + return getIterSpace().getType().getSpaceDim(); + } + BlockArgument getIterator() { + return getRegion().getArguments().front(); + } + Block::BlockArgListType getCrds() { + // The first block argument is iterator, the remaining arguments are + // referenced coordinates. + return getRegion().getArguments().slice(1, getCrdUsedLvls().count()); + } + unsigned getNumRegionIterArgs() { + return getRegion().getArguments().size() - 1 - getCrdUsedLvls().count(); + } + }]; + + let hasVerifier = 1; + let hasRegionVerifier = 1; + let hasCustomAssemblyFormat = 1; +} + //===----------------------------------------------------------------------===// // Sparse Tensor Debugging and Test-Only Operations. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index f6c51499f271c5..f83f3a3fdf9929 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -247,7 +247,8 @@ class TypeConverter { /// Attempts a 1-1 type conversion, expecting the result type to be /// `TargetType`. Returns the converted type cast to `TargetType` on success, /// and a null type on conversion or cast failure. - template TargetType convertType(Type t) const { + template + TargetType convertType(Type t) const { return dyn_cast_or_null(convertType(t)); } @@ -661,42 +662,42 @@ class ConversionPatternRewriter final : public PatternRewriter { public: ~ConversionPatternRewriter() override; - /// Apply a signature conversion to the entry block of the given region. This - /// replaces the entry block with a new block containing the updated - /// signature. The new entry block to the region is returned for convenience. - /// If no block argument types are changing, the entry original block will be + /// Apply a signature conversion to given block. This replaces the block with + /// a new block containing the updated signature. The operations of the given + /// block are inlined into the newly-created block, which is returned. + /// + /// If no block argument types are changing, the original block will be /// left in place and returned. /// - /// If provided, `converter` will be used for any materializations. + /// A signature converison must be provided. (Type converters can construct + /// a signature conversion with `convertBlockSignature`.) + /// + /// Optionally, a type converter can be provided to build materializations. + /// Note: If no type converter was provided or the type converter does not + /// specify any suitable argument/target materialization rules, the dialect + /// conversion may fail to legalize unresolved materializations. Block * - applySignatureConversion(Region *region, + applySignatureConversion(Block *block, TypeConverter::SignatureConversion &conversion, const TypeConverter *converter = nullptr); - /// Convert the types of block arguments within the given region. This + /// Apply a signature conversion to each block in the given region. This /// replaces each block with a new block containing the updated signature. If /// an updated signature would match the current signature, the respective - /// block is left in place as is. + /// block is left in place as is. (See `applySignatureConversion` for + /// details.) The new entry block of the region is returned. + /// + /// SignatureConversions are computed with the specified type converter. + /// This function returns "failure" if the type converter failed to compute + /// a SignatureConversion for at least one block. /// - /// The entry block may have a special conversion if `entryConversion` is - /// provided. On success, the new entry block to the region is returned for - /// convenience. Otherwise, failure is returned. + /// Optionally, a special SignatureConversion can be specified for the entry + /// block. This is because the types of the entry block arguments are often + /// tied semantically to the operation. FailureOr convertRegionTypes( Region *region, const TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion = nullptr); - /// Convert the types of block arguments within the given region except for - /// the entry region. This replaces each non-entry block with a new block - /// containing the updated signature. If an updated signature would match the - /// current signature, the respective block is left in place as is. - /// - /// If special conversion behavior is needed for the non-entry blocks (for - /// example, we need to convert only a subset of a BB arguments), such - /// behavior can be specified in blockConversions. - LogicalResult convertNonEntryRegionTypes( - Region *region, const TypeConverter &converter, - ArrayRef blockConversions); - /// Replace all the uses of the block argument `from` with value `to`. void replaceUsesOfBlockArgument(BlockArgument from, Value to); diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index d8150aeb828a59..6656be830989a4 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -40,31 +40,35 @@ Value computeAbs(Value real, Value imag, arith::FastMathFlags fmf, Value max = b.create(absReal, absImag, fmf); Value min = b.create(absReal, absImag, fmf); - Value ratio = b.create(min, max, fmf); - Value ratioSq = b.create(ratio, ratio, fmf); - Value ratioSqPlusOne = b.create(ratioSq, one, fmf); + + // The lowering below requires NaNs and infinities to work correctly. + arith::FastMathFlags fmfWithNaNInf = arith::bitEnumClear( + fmf, arith::FastMathFlags::nnan | arith::FastMathFlags::ninf); + Value ratio = b.create(min, max, fmfWithNaNInf); + Value ratioSq = b.create(ratio, ratio, fmfWithNaNInf); + Value ratioSqPlusOne = b.create(ratioSq, one, fmfWithNaNInf); Value result; if (fn == AbsFn::rsqrt) { - ratioSqPlusOne = b.create(ratioSqPlusOne, fmf); - min = b.create(min, fmf); - max = b.create(max, fmf); + ratioSqPlusOne = b.create(ratioSqPlusOne, fmfWithNaNInf); + min = b.create(min, fmfWithNaNInf); + max = b.create(max, fmfWithNaNInf); } if (fn == AbsFn::sqrt) { Value quarter = b.create( real.getType(), b.getFloatAttr(real.getType(), 0.25)); // sqrt(sqrt(a*b)) would avoid the pow, but will overflow more easily. - Value sqrt = b.create(max, fmf); - Value p025 = b.create(ratioSqPlusOne, quarter, fmf); - result = b.create(sqrt, p025, fmf); + Value sqrt = b.create(max, fmfWithNaNInf); + Value p025 = b.create(ratioSqPlusOne, quarter, fmfWithNaNInf); + result = b.create(sqrt, p025, fmfWithNaNInf); } else { - Value sqrt = b.create(ratioSqPlusOne, fmf); - result = b.create(max, sqrt, fmf); + Value sqrt = b.create(ratioSqPlusOne, fmfWithNaNInf); + result = b.create(max, sqrt, fmfWithNaNInf); } - Value isNaN = - b.create(arith::CmpFPredicate::UNO, result, result, fmf); + Value isNaN = b.create(arith::CmpFPredicate::UNO, result, + result, fmfWithNaNInf); return b.create(isNaN, min, result); } @@ -595,17 +599,20 @@ struct Log1pOpConversion : public OpConversionPattern { Value maxMinusOne = b.create(maxAbs, one, fmf); Value maxAbsOfRealPlusOneAndImagMinusOne = b.create(useReal, real, maxMinusOne); - Value minMaxRatio = b.create(minAbs, maxAbs, fmf); + arith::FastMathFlags fmfWithNaNInf = arith::bitEnumClear( + fmf, arith::FastMathFlags::nnan | arith::FastMathFlags::ninf); + Value minMaxRatio = b.create(minAbs, maxAbs, fmfWithNaNInf); Value logOfMaxAbsOfRealPlusOneAndImag = b.create(maxAbsOfRealPlusOneAndImagMinusOne, fmf); Value logOfSqrtPart = b.create( - b.create(minMaxRatio, minMaxRatio, fmf), fmf); + b.create(minMaxRatio, minMaxRatio, fmfWithNaNInf), + fmfWithNaNInf); Value r = b.create( - b.create(half, logOfSqrtPart, fmf), - logOfMaxAbsOfRealPlusOneAndImag, fmf); + b.create(half, logOfSqrtPart, fmfWithNaNInf), + logOfMaxAbsOfRealPlusOneAndImag, fmfWithNaNInf); Value resultReal = b.create( - b.create(arith::CmpFPredicate::UNO, r, r, fmf), minAbs, - r); + b.create(arith::CmpFPredicate::UNO, r, r, fmfWithNaNInf), + minAbs, r); Value resultImag = b.create(imag, realPlusOne, fmf); rewriter.replaceOpWithNewOp(op, type, resultReal, resultImag); diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp index 82bfa9514a8841..92b28ff9c58737 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -49,8 +49,6 @@ namespace mlir { using namespace mlir; -static constexpr const char *kGpuBinaryStorageSuffix = "_gpubin_cst"; - namespace { class GpuToLLVMConversionPass : public impl::GpuToLLVMConversionPassBase { @@ -97,36 +95,6 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern { Type llvmIntPtrType = IntegerType::get( context, this->getTypeConverter()->getPointerBitwidth(0)); - FunctionCallBuilder moduleLoadCallBuilder = { - "mgpuModuleLoad", - llvmPointerType /* void *module */, - {llvmPointerType /* void *cubin */, llvmInt64Type /* size_t size */}}; - FunctionCallBuilder moduleUnloadCallBuilder = { - "mgpuModuleUnload", llvmVoidType, {llvmPointerType /* void *module */}}; - FunctionCallBuilder moduleGetFunctionCallBuilder = { - "mgpuModuleGetFunction", - llvmPointerType /* void *function */, - { - llvmPointerType, /* void *module */ - llvmPointerType /* char *name */ - }}; - FunctionCallBuilder launchKernelCallBuilder = { - "mgpuLaunchKernel", - llvmVoidType, - { - llvmPointerType, /* void* f */ - llvmIntPtrType, /* intptr_t gridXDim */ - llvmIntPtrType, /* intptr_t gridyDim */ - llvmIntPtrType, /* intptr_t gridZDim */ - llvmIntPtrType, /* intptr_t blockXDim */ - llvmIntPtrType, /* intptr_t blockYDim */ - llvmIntPtrType, /* intptr_t blockZDim */ - llvmInt32Type, /* unsigned int sharedMemBytes */ - llvmPointerType, /* void *hstream */ - llvmPointerType, /* void **kernelParams */ - llvmPointerType, /* void **extra */ - llvmInt64Type /* size_t paramsCount */ - }}; FunctionCallBuilder streamCreateCallBuilder = { "mgpuStreamCreate", llvmPointerType /* void *stream */, {}}; FunctionCallBuilder streamDestroyCallBuilder = { @@ -451,55 +419,21 @@ class ConvertWaitAsyncOpToGpuRuntimeCallPattern ConversionPatternRewriter &rewriter) const override; }; -/// A rewrite patter to convert gpu.launch_func operations into a sequence of -/// GPU runtime calls. Currently it supports CUDA and ROCm (HIP). -/// -/// In essence, a gpu.launch_func operations gets compiled into the following -/// sequence of runtime calls: -/// -/// * moduleLoad -- loads the module given the cubin / hsaco data -/// * moduleGetFunction -- gets a handle to the actual kernel function -/// * getStreamHelper -- initializes a new compute stream on GPU -/// * launchKernel -- launches the kernel on a stream -/// * streamSynchronize -- waits for operations on the stream to finish -/// -/// Intermediate data structures are allocated on the stack. -class ConvertLaunchFuncOpToGpuRuntimeCallPattern +/// A rewrite patter to legalize gpu.launch_func with LLVM types. +class LegalizeLaunchFuncOpPattern : public ConvertOpToGpuRuntimeCallPattern { public: - ConvertLaunchFuncOpToGpuRuntimeCallPattern( - const LLVMTypeConverter &typeConverter, StringRef gpuBinaryAnnotation, - bool kernelBarePtrCallConv, SymbolTable *cachedModuleTable) + LegalizeLaunchFuncOpPattern(const LLVMTypeConverter &typeConverter, + bool kernelBarePtrCallConv) : ConvertOpToGpuRuntimeCallPattern(typeConverter), - gpuBinaryAnnotation(gpuBinaryAnnotation), - kernelBarePtrCallConv(kernelBarePtrCallConv), - cachedModuleTable(cachedModuleTable) {} + kernelBarePtrCallConv(kernelBarePtrCallConv) {} private: - Value generateParamsArray(gpu::LaunchFuncOp launchOp, OpAdaptor adaptor, - OpBuilder &builder) const; - Value generateKernelNameConstant(StringRef moduleName, StringRef name, - Location loc, OpBuilder &builder) const; - LogicalResult matchAndRewrite(gpu::LaunchFuncOp launchOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override; - llvm::SmallString<32> gpuBinaryAnnotation; bool kernelBarePtrCallConv; - SymbolTable *cachedModuleTable; -}; - -class EraseGpuModuleOpPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(gpu::GPUModuleOp op, - PatternRewriter &rewriter) const override { - // GPU kernel modules are no longer necessary since we have a global - // constant with the CUBIN, or HSACO data. - rewriter.eraseOp(op); - return success(); - } }; /// A rewrite pattern to convert gpu.memcpy operations into a GPU runtime @@ -587,7 +521,6 @@ DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SetCsrPointersOp) void GpuToLLVMConversionPass::runOnOperation() { MLIRContext *context = &getContext(); - SymbolTable symbolTable = SymbolTable(getOperation()); LowerToLLVMOptions options(context); options.useBarePtrCallConv = hostBarePtrCallConv; RewritePatternSet patterns(context); @@ -604,30 +537,20 @@ void GpuToLLVMConversionPass::runOnOperation() { iface->populateConvertToLLVMConversionPatterns(target, converter, patterns); } - // Preserve GPU modules if they have target attributes. - target.addDynamicallyLegalOp( - [](gpu::GPUModuleOp module) -> bool { - return module.getTargetsAttr() != nullptr; - }); - // Accept as legal LaunchFuncOps if they refer to GPU Modules with targets and - // the operands have been lowered. + // Preserve GPU modules and binaries. Modules are preserved as they can be + // converted later by `gpu-module-to-binary`. + target.addLegalOp(); + // Accept as legal LaunchFuncOps if the operands have been lowered. target.addDynamicallyLegalOp( - [&](gpu::LaunchFuncOp op) -> bool { - auto module = - symbolTable.lookup(op.getKernelModuleName()); - return converter.isLegal(op->getOperandTypes()) && - converter.isLegal(op->getResultTypes()) && - (module && module.getTargetsAttr() && - !module.getTargetsAttr().empty()); - }); + [&](gpu::LaunchFuncOp op) -> bool { return converter.isLegal(op); }); // These aren't covered by the ConvertToLLVMPatternInterface right now. populateVectorToLLVMConversionPatterns(converter, patterns); populateFinalizeMemRefToLLVMConversionPatterns(converter, patterns); populateAsyncStructuralTypeConversionsAndLegality(converter, patterns, target); - populateGpuToLLVMConversionPatterns(converter, patterns, gpuBinaryAnnotation, - kernelBarePtrCallConv, &symbolTable); + populateGpuToLLVMConversionPatterns(converter, patterns, + kernelBarePtrCallConv); if (failed( applyPartialConversion(getOperation(), target, std::move(patterns)))) @@ -1002,100 +925,8 @@ LogicalResult ConvertWaitAsyncOpToGpuRuntimeCallPattern::matchAndRewrite( return success(); } -// Creates a struct containing all kernel parameters on the stack and returns -// an array of type-erased pointers to the fields of the struct. The array can -// then be passed to the CUDA / ROCm (HIP) kernel launch calls. -// The generated code is essentially as follows: -// -// %struct = alloca(sizeof(struct { Parameters... })) -// %array = alloca(NumParameters * sizeof(void *)) -// for (i : [0, NumParameters)) -// %fieldPtr = llvm.getelementptr %struct[0, i] -// llvm.store parameters[i], %fieldPtr -// %elementPtr = llvm.getelementptr %array[i] -// llvm.store %fieldPtr, %elementPtr -// return %array -Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateParamsArray( - gpu::LaunchFuncOp launchOp, OpAdaptor adaptor, OpBuilder &builder) const { - auto loc = launchOp.getLoc(); - auto numKernelOperands = launchOp.getNumKernelOperands(); - // Note: If `useBarePtrCallConv` is set in the type converter's options, - // the value of `kernelBarePtrCallConv` will be ignored. - SmallVector arguments = getTypeConverter()->promoteOperands( - loc, launchOp.getOperands().take_back(numKernelOperands), - adaptor.getOperands().take_back(numKernelOperands), builder, - /*useBarePtrCallConv=*/kernelBarePtrCallConv); - auto numArguments = arguments.size(); - SmallVector argumentTypes; - argumentTypes.reserve(numArguments); - for (auto argument : arguments) - argumentTypes.push_back(argument.getType()); - auto structType = LLVM::LLVMStructType::getNewIdentified(context, StringRef(), - argumentTypes); - auto one = builder.create(loc, llvmInt32Type, 1); - auto structPtr = - builder.create(loc, llvmPointerType, structType, one, - /*alignment=*/0); - auto arraySize = - builder.create(loc, llvmInt32Type, numArguments); - auto arrayPtr = builder.create( - loc, llvmPointerType, llvmPointerType, arraySize, /*alignment=*/0); - for (const auto &en : llvm::enumerate(arguments)) { - const auto index = static_cast(en.index()); - Value fieldPtr = - builder.create(loc, llvmPointerType, structType, structPtr, - ArrayRef{0, index}); - builder.create(loc, en.value(), fieldPtr); - auto elementPtr = - builder.create(loc, llvmPointerType, llvmPointerType, - arrayPtr, ArrayRef{index}); - builder.create(loc, fieldPtr, elementPtr); - } - return arrayPtr; -} - -// Generates an LLVM IR dialect global that contains the name of the given -// kernel function as a C string, and returns a pointer to its beginning. -// The code is essentially: -// -// llvm.global constant @kernel_name("function_name\00") -// func(...) { -// %0 = llvm.addressof @kernel_name -// %1 = llvm.constant (0 : index) -// %2 = llvm.getelementptr %0[%1, %1] : !llvm<"i8*"> -// } -Value ConvertLaunchFuncOpToGpuRuntimeCallPattern::generateKernelNameConstant( - StringRef moduleName, StringRef name, Location loc, - OpBuilder &builder) const { - // Make sure the trailing zero is included in the constant. - std::vector kernelName(name.begin(), name.end()); - kernelName.push_back('\0'); - - std::string globalName = - std::string(llvm::formatv("{0}_{1}_kernel_name", moduleName, name)); - return LLVM::createGlobalString( - loc, builder, globalName, StringRef(kernelName.data(), kernelName.size()), - LLVM::Linkage::Internal); -} - -// Emits LLVM IR to launch a kernel function. Expects the module that contains -// the compiled kernel function as a cubin in the 'nvvm.cubin' attribute, or a -// hsaco in the 'rocdl.hsaco' attribute of the kernel function in the IR. -// -// %0 = call %binarygetter -// %1 = call %moduleLoad(%0) -// %2 = -// %3 = call %moduleGetFunction(%1, %2) -// %4 = call %streamCreate() -// %5 = -// call %launchKernel(%3, , 0, %4, %5, nullptr) -// call %streamSynchronize(%4) -// call %streamDestroy(%4) -// call %moduleUnload(%1) -// -// If the op is async, the stream corresponds to the (single) async dependency -// as well as the async token the op produces. -LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite( +// Legalize the op's operands. +LogicalResult LegalizeLaunchFuncOpPattern::matchAndRewrite( gpu::LaunchFuncOp launchOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { if (failed(areAllLLVMTypes(launchOp, adaptor.getOperands(), rewriter))) @@ -1114,123 +945,37 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite( Location loc = launchOp.getLoc(); - // Create an LLVM global with CUBIN extracted from the kernel annotation and - // obtain a pointer to the first byte in it. - gpu::GPUModuleOp kernelModule; - if (cachedModuleTable) - kernelModule = cachedModuleTable->lookup( - launchOp.getKernelModuleName()); - else - kernelModule = SymbolTable::lookupNearestSymbolFrom( - launchOp, launchOp.getKernelModuleName()); - assert(kernelModule && "expected a kernel module"); - - // If the module has Targets then just update the op operands. - if (ArrayAttr targets = kernelModule.getTargetsAttr()) { - Value stream = Value(); - if (!adaptor.getAsyncDependencies().empty()) - stream = adaptor.getAsyncDependencies().front(); - // If the async keyword is present and there are no dependencies, then a - // stream must be created to pass to subsequent operations. - else if (launchOp.getAsyncToken()) - stream = streamCreateCallBuilder.create(loc, rewriter, {}).getResult(); - - // Lower the kernel operands to match kernel parameters. - // Note: If `useBarePtrCallConv` is set in the type converter's options, - // the value of `kernelBarePtrCallConv` will be ignored. - SmallVector arguments = getTypeConverter()->promoteOperands( - loc, launchOp.getKernelOperands(), adaptor.getKernelOperands(), - rewriter, /*useBarePtrCallConv=*/kernelBarePtrCallConv); - - std::optional clusterSize = std::nullopt; - if (launchOp.hasClusterSize()) { - clusterSize = - gpu::KernelDim3{adaptor.getClusterSizeX(), adaptor.getClusterSizeY(), - adaptor.getClusterSizeZ()}; - } - rewriter.create( - launchOp.getLoc(), launchOp.getKernelAttr(), - gpu::KernelDim3{adaptor.getGridSizeX(), adaptor.getGridSizeY(), - adaptor.getGridSizeZ()}, - gpu::KernelDim3{adaptor.getBlockSizeX(), adaptor.getBlockSizeY(), - adaptor.getBlockSizeZ()}, - adaptor.getDynamicSharedMemorySize(), arguments, stream, clusterSize); - if (launchOp.getAsyncToken()) - rewriter.replaceOp(launchOp, {stream}); - else - rewriter.eraseOp(launchOp); - return success(); - } + Value stream = Value(); + if (!adaptor.getAsyncDependencies().empty()) + stream = adaptor.getAsyncDependencies().front(); + // If the async keyword is present and there are no dependencies, then a + // stream must be created to pass to subsequent operations. + else if (launchOp.getAsyncToken()) + stream = streamCreateCallBuilder.create(loc, rewriter, {}).getResult(); + // Lower the kernel operands to match kernel parameters. + // Note: If `useBarePtrCallConv` is set in the type converter's options, + // the value of `kernelBarePtrCallConv` will be ignored. + SmallVector arguments = getTypeConverter()->promoteOperands( + loc, launchOp.getKernelOperands(), adaptor.getKernelOperands(), rewriter, + /*useBarePtrCallConv=*/kernelBarePtrCallConv); - auto binaryAttr = - kernelModule->getAttrOfType(gpuBinaryAnnotation); - if (!binaryAttr) { - kernelModule.emitOpError() - << "missing " << gpuBinaryAnnotation << " attribute"; - return failure(); + std::optional clusterSize = std::nullopt; + if (launchOp.hasClusterSize()) { + clusterSize = + gpu::KernelDim3{adaptor.getClusterSizeX(), adaptor.getClusterSizeY(), + adaptor.getClusterSizeZ()}; } - - SmallString<128> nameBuffer(kernelModule.getName()); - nameBuffer.append(kGpuBinaryStorageSuffix); - Value data = - LLVM::createGlobalString(loc, rewriter, nameBuffer.str(), - binaryAttr.getValue(), LLVM::Linkage::Internal); - - // Pass the binary size. SPIRV requires binary size. - auto gpuBlob = binaryAttr.getValue(); - auto gpuBlobSize = rewriter.create( - loc, llvmInt64Type, - mlir::IntegerAttr::get(llvmInt64Type, - static_cast(gpuBlob.size()))); - - auto module = - moduleLoadCallBuilder.create(loc, rewriter, {data, gpuBlobSize}); - - // Pass the count of the parameters to runtime wrappers - auto paramsCount = rewriter.create( - loc, llvmInt64Type, - mlir::IntegerAttr::get( - llvmInt64Type, - static_cast(launchOp.getNumKernelOperands()))); - - // Get the function from the module. The name corresponds to the name of - // the kernel function. - auto kernelName = generateKernelNameConstant( - launchOp.getKernelModuleName().getValue(), - launchOp.getKernelName().getValue(), loc, rewriter); - auto function = moduleGetFunctionCallBuilder.create( - loc, rewriter, {module.getResult(), kernelName}); - Value zero = rewriter.create(loc, llvmInt32Type, 0); - Value stream = - adaptor.getAsyncDependencies().empty() - ? streamCreateCallBuilder.create(loc, rewriter, {}).getResult() - : adaptor.getAsyncDependencies().front(); - // Create array of pointers to kernel arguments. - auto kernelParams = generateParamsArray(launchOp, adaptor, rewriter); - auto nullpointer = rewriter.create(loc, llvmPointerType); - Value dynamicSharedMemorySize = launchOp.getDynamicSharedMemorySize() - ? launchOp.getDynamicSharedMemorySize() - : zero; - launchKernelCallBuilder.create( - loc, rewriter, - {function.getResult(), adaptor.getGridSizeX(), adaptor.getGridSizeY(), - adaptor.getGridSizeZ(), adaptor.getBlockSizeX(), adaptor.getBlockSizeY(), - adaptor.getBlockSizeZ(), dynamicSharedMemorySize, stream, kernelParams, - /*extra=*/nullpointer, paramsCount}); - - if (launchOp.getAsyncToken()) { - // Async launch: make dependent ops use the same stream. + rewriter.create( + launchOp.getLoc(), launchOp.getKernelAttr(), + gpu::KernelDim3{adaptor.getGridSizeX(), adaptor.getGridSizeY(), + adaptor.getGridSizeZ()}, + gpu::KernelDim3{adaptor.getBlockSizeX(), adaptor.getBlockSizeY(), + adaptor.getBlockSizeZ()}, + adaptor.getDynamicSharedMemorySize(), arguments, stream, clusterSize); + if (launchOp.getAsyncToken()) rewriter.replaceOp(launchOp, {stream}); - } else { - // Synchronize with host and destroy stream. This must be the stream created - // above (with no other uses) because we check that the synchronous version - // does not have any async dependencies. - streamSynchronizeCallBuilder.create(loc, rewriter, stream); - streamDestroyCallBuilder.create(loc, rewriter, stream); + else rewriter.eraseOp(launchOp); - } - moduleUnloadCallBuilder.create(loc, rewriter, module.getResult()); - return success(); } @@ -1978,9 +1723,7 @@ LogicalResult ConvertCreateBsrOpToGpuRuntimeCallPattern::matchAndRewrite( void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, - StringRef gpuBinaryAnnotation, - bool kernelBarePtrCallConv, - SymbolTable *cachedModuleTable) { + bool kernelBarePtrCallConv) { addOpaquePointerConversion(converter); addOpaquePointerConversion(converter); addOpaquePointerConversion(converter); @@ -2017,7 +1760,5 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter, ConvertSpGEMMCopyOpToGpuRuntimeCallPattern, ConvertSpMatGetSizeOpToGpuRuntimeCallPattern, ConvertSetCsrPointersOpToGpuRuntimeCallPattern>(converter); - patterns.add( - converter, gpuBinaryAnnotation, kernelBarePtrCallConv, cachedModuleTable); - patterns.add(&converter.getContext()); + patterns.add(converter, kernelBarePtrCallConv); } diff --git a/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp b/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp index d90cf931385fcc..f62de1f17a6668 100644 --- a/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp +++ b/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp @@ -162,7 +162,7 @@ struct ForOpConversion final : SCFToSPIRVPattern { signatureConverter.remapInput(0, newIndVar); for (unsigned i = 1, e = body->getNumArguments(); i < e; i++) signatureConverter.remapInput(i, header->getArgument(i)); - body = rewriter.applySignatureConversion(&forOp.getRegion(), + body = rewriter.applySignatureConversion(&forOp.getRegion().front(), signatureConverter); // Move the blocks from the forOp into the loopOp. This is the body of the diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 5797c5681a5fdd..2f6647a2a27b15 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -2544,7 +2544,6 @@ std::optional mlir::arith::getNeutralElement(Operation *op) { .Case([](arith::MulIOp op) { return AtomicRMWKind::muli; }) .Default([](Operation *op) { return std::nullopt; }); if (!maybeKind) { - op->emitError() << "Unknown neutral element for: " << *op; return std::nullopt; } diff --git a/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp b/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp index 2473169962b956..8005f9103b2356 100644 --- a/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp @@ -8,11 +8,17 @@ #include +#include "mlir/Analysis/DataFlowFramework.h" #include "mlir/Dialect/Arith/Transforms/Passes.h" #include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h" #include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" +#include "mlir/Transforms/FoldUtils.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" namespace mlir::arith { @@ -24,88 +30,50 @@ using namespace mlir; using namespace mlir::arith; using namespace mlir::dataflow; -/// Returns true if 2 integer ranges have intersection. -static bool intersects(const ConstantIntRanges &lhs, - const ConstantIntRanges &rhs) { - return !((lhs.smax().slt(rhs.smin()) || lhs.smin().sgt(rhs.smax())) && - (lhs.umax().ult(rhs.umin()) || lhs.umin().ugt(rhs.umax()))); +static std::optional getMaybeConstantValue(DataFlowSolver &solver, + Value value) { + auto *maybeInferredRange = + solver.lookupState(value); + if (!maybeInferredRange || maybeInferredRange->getValue().isUninitialized()) + return std::nullopt; + const ConstantIntRanges &inferredRange = + maybeInferredRange->getValue().getValue(); + return inferredRange.getConstantValue(); } -static FailureOr handleEq(ConstantIntRanges lhs, ConstantIntRanges rhs) { - if (!intersects(lhs, rhs)) - return false; - - return failure(); -} - -static FailureOr handleNe(ConstantIntRanges lhs, ConstantIntRanges rhs) { - if (!intersects(lhs, rhs)) - return true; - - return failure(); -} - -static FailureOr handleSlt(ConstantIntRanges lhs, ConstantIntRanges rhs) { - if (lhs.smax().slt(rhs.smin())) - return true; - - if (lhs.smin().sge(rhs.smax())) - return false; - - return failure(); -} - -static FailureOr handleSle(ConstantIntRanges lhs, ConstantIntRanges rhs) { - if (lhs.smax().sle(rhs.smin())) - return true; - - if (lhs.smin().sgt(rhs.smax())) - return false; - - return failure(); -} - -static FailureOr handleSgt(ConstantIntRanges lhs, ConstantIntRanges rhs) { - return handleSlt(std::move(rhs), std::move(lhs)); -} - -static FailureOr handleSge(ConstantIntRanges lhs, ConstantIntRanges rhs) { - return handleSle(std::move(rhs), std::move(lhs)); -} - -static FailureOr handleUlt(ConstantIntRanges lhs, ConstantIntRanges rhs) { - if (lhs.umax().ult(rhs.umin())) - return true; - - if (lhs.umin().uge(rhs.umax())) - return false; - - return failure(); -} - -static FailureOr handleUle(ConstantIntRanges lhs, ConstantIntRanges rhs) { - if (lhs.umax().ule(rhs.umin())) - return true; - - if (lhs.umin().ugt(rhs.umax())) - return false; - - return failure(); -} - -static FailureOr handleUgt(ConstantIntRanges lhs, ConstantIntRanges rhs) { - return handleUlt(std::move(rhs), std::move(lhs)); -} - -static FailureOr handleUge(ConstantIntRanges lhs, ConstantIntRanges rhs) { - return handleUle(std::move(rhs), std::move(lhs)); +/// Patterned after SCCP +static LogicalResult maybeReplaceWithConstant(DataFlowSolver &solver, + PatternRewriter &rewriter, + Value value) { + if (value.use_empty()) + return failure(); + std::optional maybeConstValue = getMaybeConstantValue(solver, value); + if (!maybeConstValue.has_value()) + return failure(); + + Operation *maybeDefiningOp = value.getDefiningOp(); + Dialect *valueDialect = + maybeDefiningOp ? maybeDefiningOp->getDialect() + : value.getParentRegion()->getParentOp()->getDialect(); + Attribute constAttr = + rewriter.getIntegerAttr(value.getType(), *maybeConstValue); + Operation *constOp = valueDialect->materializeConstant( + rewriter, constAttr, value.getType(), value.getLoc()); + // Fall back to arith.constant if the dialect materializer doesn't know what + // to do with an integer constant. + if (!constOp) + constOp = rewriter.getContext() + ->getLoadedDialect() + ->materializeConstant(rewriter, constAttr, value.getType(), + value.getLoc()); + if (!constOp) + return failure(); + + rewriter.replaceAllUsesWith(value, constOp->getResult(0)); + return success(); } namespace { -/// This class listens on IR transformations performed during a pass relying on -/// information from a `DataflowSolver`. It erases state associated with the -/// erased operation and its results from the `DataFlowSolver` so that Patterns -/// do not accidentally query old state information for newly created Ops. class DataFlowListener : public RewriterBase::Listener { public: DataFlowListener(DataFlowSolver &s) : s(s) {} @@ -120,52 +88,95 @@ class DataFlowListener : public RewriterBase::Listener { DataFlowSolver &s; }; -struct ConvertCmpOp : public OpRewritePattern { +/// Rewrite any results of `op` that were inferred to be constant integers to +/// and replace their uses with that constant. Return success() if all results +/// where thus replaced and the operation is erased. Also replace any block +/// arguments with their constant values. +struct MaterializeKnownConstantValues : public RewritePattern { + MaterializeKnownConstantValues(MLIRContext *context, DataFlowSolver &s) + : RewritePattern(Pattern::MatchAnyOpTypeTag(), /*benefit=*/1, context), + solver(s) {} + + LogicalResult match(Operation *op) const override { + if (matchPattern(op, m_Constant())) + return failure(); - ConvertCmpOp(MLIRContext *context, DataFlowSolver &s) - : OpRewritePattern(context), solver(s) {} + auto needsReplacing = [&](Value v) { + return getMaybeConstantValue(solver, v).has_value() && !v.use_empty(); + }; + bool hasConstantResults = llvm::any_of(op->getResults(), needsReplacing); + if (op->getNumRegions() == 0) + return success(hasConstantResults); + bool hasConstantRegionArgs = false; + for (Region ®ion : op->getRegions()) { + for (Block &block : region.getBlocks()) { + hasConstantRegionArgs |= + llvm::any_of(block.getArguments(), needsReplacing); + } + } + return success(hasConstantResults || hasConstantRegionArgs); + } - LogicalResult matchAndRewrite(arith::CmpIOp op, + void rewrite(Operation *op, PatternRewriter &rewriter) const override { + bool replacedAll = (op->getNumResults() != 0); + for (Value v : op->getResults()) + replacedAll &= + (succeeded(maybeReplaceWithConstant(solver, rewriter, v)) || + v.use_empty()); + if (replacedAll && isOpTriviallyDead(op)) { + rewriter.eraseOp(op); + return; + } + + PatternRewriter::InsertionGuard guard(rewriter); + for (Region ®ion : op->getRegions()) { + for (Block &block : region.getBlocks()) { + rewriter.setInsertionPointToStart(&block); + for (BlockArgument &arg : block.getArguments()) { + (void)maybeReplaceWithConstant(solver, rewriter, arg); + } + } + } + } + +private: + DataFlowSolver &solver; +}; + +template +struct DeleteTrivialRem : public OpRewritePattern { + DeleteTrivialRem(MLIRContext *context, DataFlowSolver &s) + : OpRewritePattern(context), solver(s) {} + + LogicalResult matchAndRewrite(RemOp op, PatternRewriter &rewriter) const override { - auto *lhsResult = - solver.lookupState(op.getLhs()); - if (!lhsResult || lhsResult->getValue().isUninitialized()) + Value lhs = op.getOperand(0); + Value rhs = op.getOperand(1); + auto maybeModulus = getConstantIntValue(rhs); + if (!maybeModulus.has_value()) return failure(); - - auto *rhsResult = - solver.lookupState(op.getRhs()); - if (!rhsResult || rhsResult->getValue().isUninitialized()) + int64_t modulus = *maybeModulus; + if (modulus <= 0) return failure(); - - using HandlerFunc = - FailureOr (*)(ConstantIntRanges, ConstantIntRanges); - std::array - handlers{}; - using Pred = arith::CmpIPredicate; - handlers[static_cast(Pred::eq)] = &handleEq; - handlers[static_cast(Pred::ne)] = &handleNe; - handlers[static_cast(Pred::slt)] = &handleSlt; - handlers[static_cast(Pred::sle)] = &handleSle; - handlers[static_cast(Pred::sgt)] = &handleSgt; - handlers[static_cast(Pred::sge)] = &handleSge; - handlers[static_cast(Pred::ult)] = &handleUlt; - handlers[static_cast(Pred::ule)] = &handleUle; - handlers[static_cast(Pred::ugt)] = &handleUgt; - handlers[static_cast(Pred::uge)] = &handleUge; - - HandlerFunc handler = handlers[static_cast(op.getPredicate())]; - if (!handler) + auto *maybeLhsRange = solver.lookupState(lhs); + if (!maybeLhsRange || maybeLhsRange->getValue().isUninitialized()) return failure(); - - ConstantIntRanges lhsValue = lhsResult->getValue().getValue(); - ConstantIntRanges rhsValue = rhsResult->getValue().getValue(); - FailureOr result = handler(lhsValue, rhsValue); - - if (failed(result)) + const ConstantIntRanges &lhsRange = maybeLhsRange->getValue().getValue(); + const APInt &min = isa(op) ? lhsRange.umin() : lhsRange.smin(); + const APInt &max = isa(op) ? lhsRange.umax() : lhsRange.smax(); + // The minima and maxima here are given as closed ranges, we must be + // strictly less than the modulus. + if (min.isNegative() || min.uge(modulus)) + return failure(); + if (max.isNegative() || max.uge(modulus)) + return failure(); + if (!min.ule(max)) return failure(); - rewriter.replaceOpWithNewOp( - op, static_cast(*result), /*width*/ 1); + // With all those conditions out of the way, we know thas this invocation of + // a remainder is a noop because the input is strictly within the range + // [0, modulus), so get rid of it. + rewriter.replaceOp(op, ValueRange{lhs}); return success(); } @@ -201,7 +212,8 @@ struct IntRangeOptimizationsPass void mlir::arith::populateIntRangeOptimizationsPatterns( RewritePatternSet &patterns, DataFlowSolver &solver) { - patterns.add(patterns.getContext(), solver); + patterns.add, + DeleteTrivialRem>(patterns.getContext(), solver); } std::unique_ptr mlir::arith::createIntRangeOptimizationsPass() { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp index 22968096a68913..af38485291182f 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp @@ -106,27 +106,23 @@ struct FunctionNonEntryBlockConversion ConversionPatternRewriter &rewriter) const override { rewriter.startOpModification(op); Region ®ion = op.getFunctionBody(); - SmallVector conversions; - for (Block &block : llvm::drop_begin(region, 1)) { - conversions.emplace_back(block.getNumArguments()); - TypeConverter::SignatureConversion &back = conversions.back(); + for (Block &block : + llvm::make_early_inc_range(llvm::drop_begin(region, 1))) { + TypeConverter::SignatureConversion conversion( + /*numOrigInputs=*/block.getNumArguments()); for (BlockArgument blockArgument : block.getArguments()) { int idx = blockArgument.getArgNumber(); if (blockArgsToDetensor.count(blockArgument)) - back.addInputs(idx, {getTypeConverter()->convertType( - block.getArgumentTypes()[idx])}); + conversion.addInputs(idx, {getTypeConverter()->convertType( + block.getArgumentTypes()[idx])}); else - back.addInputs(idx, {block.getArgumentTypes()[idx]}); + conversion.addInputs(idx, {block.getArgumentTypes()[idx]}); } - } - if (failed(rewriter.convertNonEntryRegionTypes(®ion, *typeConverter, - conversions))) { - rewriter.cancelOpModification(op); - return failure(); + rewriter.applySignatureConversion(&block, conversion, getTypeConverter()); } rewriter.finalizeOpModification(op); diff --git a/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt index 6f59b69bddce86..fe2cc90688d085 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt @@ -45,11 +45,13 @@ add_mlir_dialect_library(MLIRSparseTensorDialect LINK_LIBS PUBLIC MLIRArithDialect + MLIRControlFlowInterfaces MLIRComplexDialect MLIRDialect MLIRDialectUtils MLIRIR MLIRInferTypeOpInterface + MLIRLoopLikeInterface MLIRSupport MLIRSparseTensorEnums ) diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 4adb1c19096a24..232d25d718c652 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -2130,6 +2130,106 @@ static void printLevelRange(OpAsmPrinter &p, Operation *, IntegerAttr lvlLo, printLevelRange(p, lo, hi); } +static ParseResult +parseSparseSpaceLoop(OpAsmParser &parser, OperationState &state, + SmallVectorImpl &iterators, + SmallVectorImpl &iterArgs) { + SmallVector spaces; + SmallVector initArgs; + + // Parse "%iters, ... in %spaces, ..." + if (parser.parseArgumentList(iterators) || parser.parseKeyword("in") || + parser.parseOperandList(spaces)) + return failure(); + + if (iterators.size() != spaces.size()) + return parser.emitError( + parser.getNameLoc(), + "mismatch in number of sparse iterators and sparse spaces"); + + // Parse "at(%crd0, _, ...)" + LevelSet crdUsedLvlSet; + bool hasUsedCrds = succeeded(parser.parseOptionalKeyword("at")); + unsigned lvlCrdCnt = 0; + if (hasUsedCrds) { + ParseResult crdList = parser.parseCommaSeparatedList( + OpAsmParser::Delimiter::Paren, [&]() -> ParseResult { + if (parser.parseOptionalKeyword("_")) { + if (parser.parseArgument(iterArgs.emplace_back())) + return failure(); + // Always use IndexType for the coordinate. + crdUsedLvlSet.set(lvlCrdCnt); + iterArgs.back().type = parser.getBuilder().getIndexType(); + } + lvlCrdCnt += 1; + return success(); + }); + if (failed(crdList)) { + return parser.emitError( + parser.getNameLoc(), + "expecting SSA value or \"_\" for level coordinates"); + } + } + // Set the CrdUsedLvl bitset. + state.addAttribute("crdUsedLvls", + parser.getBuilder().getI64IntegerAttr(crdUsedLvlSet)); + + // Parse "iter_args(%arg = %init, ...)" + bool hasIterArgs = succeeded(parser.parseOptionalKeyword("iter_args")); + if (hasIterArgs) + if (parser.parseAssignmentList(iterArgs, initArgs)) + return failure(); + + SmallVector iterSpaceTps; + // parse ": sparse_tensor.iter_space -> ret" + if (parser.parseColon() || parser.parseTypeList(iterSpaceTps)) + return failure(); + if (iterSpaceTps.size() != spaces.size()) + return parser.emitError(parser.getNameLoc(), + "mismatch in number of iteration space operands " + "and iteration space types"); + + for (auto [it, tp] : llvm::zip_equal(iterators, iterSpaceTps)) { + IterSpaceType spaceTp = llvm::dyn_cast(tp); + if (!spaceTp) + return parser.emitError(parser.getNameLoc(), + "expected sparse_tensor.iter_space type for " + "iteration space operands"); + if (hasUsedCrds && spaceTp.getSpaceDim() != lvlCrdCnt) + return parser.emitError(parser.getNameLoc(), + "mismatch in number of iteration space dimension " + "and specified coordinates"); + it.type = spaceTp.getIteratorType(); + } + + if (hasIterArgs) + if (parser.parseArrowTypeList(state.types)) + return failure(); + + // Resolves input operands. + if (parser.resolveOperands(spaces, iterSpaceTps, parser.getNameLoc(), + state.operands)) + return failure(); + + if (hasIterArgs) { + unsigned numCrds = crdUsedLvlSet.count(); + // Strip off leading args that used for coordinates. + MutableArrayRef args = MutableArrayRef(iterArgs).drop_front(numCrds); + if (args.size() != initArgs.size() || args.size() != state.types.size()) { + return parser.emitError( + parser.getNameLoc(), + "mismatch in number of iteration arguments and return values"); + } + + for (auto [it, init, tp] : llvm::zip_equal(args, initArgs, state.types)) { + it.type = tp; + if (parser.resolveOperand(init, tp, state.operands)) + return failure(); + } + } + return success(); +} + LogicalResult ExtractIterSpaceOp::inferReturnTypes( MLIRContext *ctx, std::optional loc, ValueRange ops, DictionaryAttr attr, OpaqueProperties prop, RegionRange region, @@ -2153,7 +2253,7 @@ LogicalResult ExtractIterSpaceOp::verify() { } if (pIter) { - IterSpaceType spaceTp = getResultSpace().getType(); + IterSpaceType spaceTp = getExtractedSpace().getType(); if (pIter.getType().getEncoding() != spaceTp.getEncoding()) return emitOpError( "mismatch in parent iterator encoding and iteration space encoding."); @@ -2166,6 +2266,161 @@ LogicalResult ExtractIterSpaceOp::verify() { return success(); } +ParseResult IterateOp::parse(OpAsmParser &parser, OperationState &result) { + OpAsmParser::Argument iterator; + OpAsmParser::UnresolvedOperand iterSpace; + + SmallVector iters, iterArgs; + if (parseSparseSpaceLoop(parser, result, iters, iterArgs)) + return failure(); + if (iters.size() != 1) + return parser.emitError(parser.getNameLoc(), + "expected only one iterator/iteration space"); + + iters.append(iterArgs); + Region *body = result.addRegion(); + if (parser.parseRegion(*body, iters)) + return failure(); + + IterateOp::ensureTerminator(*body, parser.getBuilder(), result.location); + + // Parse the optional attribute list. + if (parser.parseOptionalAttrDict(result.attributes)) + return failure(); + + return success(); +} + +/// Prints the initialization list in the form of +/// (%inner = %outer, %inner2 = %outer2, <...>) +/// where 'inner' values are assumed to be region arguments and 'outer' values +/// are regular SSA values. +static void printInitializationList(OpAsmPrinter &p, + Block::BlockArgListType blocksArgs, + ValueRange initializers, + StringRef prefix = "") { + assert(blocksArgs.size() == initializers.size() && + "expected same length of arguments and initializers"); + if (initializers.empty()) + return; + + p << prefix << '('; + llvm::interleaveComma(llvm::zip(blocksArgs, initializers), p, [&](auto it) { + p << std::get<0>(it) << " = " << std::get<1>(it); + }); + p << ")"; +} + +static void printUsedCrdsList(OpAsmPrinter &p, unsigned spaceDim, + Block::BlockArgListType blocksArgs, + LevelSet crdUsedLvls) { + if (crdUsedLvls.empty()) + return; + + p << " at("; + for (unsigned i = 0; i < spaceDim; i++) { + if (crdUsedLvls[i]) { + p << blocksArgs.front(); + blocksArgs = blocksArgs.drop_front(); + } else { + p << "_"; + } + if (i != spaceDim - 1) + p << ", "; + } + assert(blocksArgs.empty()); + p << ")"; +} + +void IterateOp::print(OpAsmPrinter &p) { + p << " " << getIterator() << " in " << getIterSpace(); + printUsedCrdsList(p, getSpaceDim(), getCrds(), getCrdUsedLvls()); + printInitializationList(p, getRegionIterArgs(), getInitArgs(), " iter_args"); + + p << " : " << getIterSpace().getType() << " "; + if (!getInitArgs().empty()) + p << "-> (" << getInitArgs().getTypes() << ") "; + + p.printRegion(getRegion(), /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/!getInitArgs().empty()); +} + +LogicalResult IterateOp::verify() { + if (getInitArgs().size() != getNumResults()) { + return emitOpError( + "mismatch in number of loop-carried values and defined values"); + } + return success(); +} + +LogicalResult IterateOp::verifyRegions() { + if (getIterator().getType() != getIterSpace().getType().getIteratorType()) + return emitOpError("mismatch in iterator and iteration space type"); + if (getNumRegionIterArgs() != getNumResults()) + return emitOpError( + "mismatch in number of basic block args and defined values"); + + auto initArgs = getInitArgs(); + auto iterArgs = getRegionIterArgs(); + auto yieldVals = getYieldedValues(); + auto opResults = getResults(); + if (!llvm::all_equal({initArgs.size(), iterArgs.size(), yieldVals.size(), + opResults.size()})) { + return emitOpError() << "number mismatch between iter args and results."; + } + + for (auto [i, init, iter, yield, ret] : + llvm::enumerate(initArgs, iterArgs, yieldVals, opResults)) { + if (init.getType() != ret.getType()) + return emitOpError() << "types mismatch between " << i + << "th iter operand and defined value"; + if (iter.getType() != ret.getType()) + return emitOpError() << "types mismatch between " << i + << "th iter region arg and defined value"; + if (yield.getType() != ret.getType()) + return emitOpError() << "types mismatch between " << i + << "th yield value and defined value"; + } + + return success(); +} + +/// OpInterfaces' methods implemented by IterateOp. +SmallVector IterateOp::getLoopRegions() { return {&getRegion()}; } + +MutableArrayRef IterateOp::getInitsMutable() { + return getInitArgsMutable(); +} + +Block::BlockArgListType IterateOp::getRegionIterArgs() { + return getRegion().getArguments().take_back(getNumRegionIterArgs()); +} + +std::optional> IterateOp::getYieldedValuesMutable() { + return cast( + getRegion().getBlocks().front().getTerminator()) + .getResultsMutable(); +} + +std::optional IterateOp::getLoopResults() { return getResults(); } + +OperandRange IterateOp::getEntrySuccessorOperands(RegionBranchPoint point) { + return getInitArgs(); +} + +void IterateOp::getSuccessorRegions(RegionBranchPoint point, + SmallVectorImpl ®ions) { + // Both the operation itself and the region may be branching into the body or + // back into the operation itself. + regions.push_back(RegionSuccessor(&getRegion(), getRegionIterArgs())); + // It is possible for loop not to enter the body. + regions.push_back(RegionSuccessor(getResults())); +} + +//===----------------------------------------------------------------------===// +// Sparse Tensor Dialect Setups. +//===----------------------------------------------------------------------===// + /// Materialize a single constant operation from a given attribute value with /// the desired resultant type. Operation *SparseTensorDialect::materializeConstant(OpBuilder &builder, diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 6a362afc52f259..2c43a6f15aa83f 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -999,9 +999,13 @@ static StringRef sanitizeIdentifier(StringRef name, SmallString<16> &buffer, bool allowTrailingDigit = true) { assert(!name.empty() && "Shouldn't have an empty name here"); + auto validChar = [&](char ch) { + return llvm::isAlnum(ch) || allowedPunctChars.contains(ch); + }; + auto copyNameToBuffer = [&] { for (char ch : name) { - if (llvm::isAlnum(ch) || allowedPunctChars.contains(ch)) + if (validChar(ch)) buffer.push_back(ch); else if (ch == ' ') buffer.push_back('_'); @@ -1013,7 +1017,7 @@ static StringRef sanitizeIdentifier(StringRef name, SmallString<16> &buffer, // Check to see if this name is valid. If it starts with a digit, then it // could conflict with the autogenerated numeric ID's, so add an underscore // prefix to avoid problems. - if (isdigit(name[0])) { + if (isdigit(name[0]) || (!validChar(name[0]) && name[0] != ' ')) { buffer.push_back('_'); copyNameToBuffer(); return buffer; @@ -1029,7 +1033,7 @@ static StringRef sanitizeIdentifier(StringRef name, SmallString<16> &buffer, // Check to see that the name consists of only valid identifier characters. for (char ch : name) { - if (!llvm::isAlnum(ch) && !allowedPunctChars.contains(ch)) { + if (!validChar(ch)) { copyNameToBuffer(); return buffer; } diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index d407d60334c70d..2f0efe1b1e454e 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -839,27 +839,6 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { // Type Conversion //===--------------------------------------------------------------------===// - /// Attempt to convert the signature of the given block, if successful a new - /// block is returned containing the new arguments. Returns `block` if it did - /// not require conversion. - FailureOr convertBlockSignature( - ConversionPatternRewriter &rewriter, Block *block, - const TypeConverter *converter, - TypeConverter::SignatureConversion *conversion = nullptr); - - /// Convert the types of non-entry block arguments within the given region. - LogicalResult convertNonEntryRegionTypes( - ConversionPatternRewriter &rewriter, Region *region, - const TypeConverter &converter, - ArrayRef blockConversions = {}); - - /// Apply a signature conversion on the given region, using `converter` for - /// materializations if not null. - Block * - applySignatureConversion(ConversionPatternRewriter &rewriter, Region *region, - TypeConverter::SignatureConversion &conversion, - const TypeConverter *converter); - /// Convert the types of block arguments within the given region. FailureOr convertRegionTypes(ConversionPatternRewriter &rewriter, Region *region, @@ -1294,34 +1273,6 @@ bool ConversionPatternRewriterImpl::wasOpReplaced(Operation *op) const { //===----------------------------------------------------------------------===// // Type Conversion -FailureOr ConversionPatternRewriterImpl::convertBlockSignature( - ConversionPatternRewriter &rewriter, Block *block, - const TypeConverter *converter, - TypeConverter::SignatureConversion *conversion) { - if (conversion) - return applySignatureConversion(rewriter, block, converter, *conversion); - - // If a converter wasn't provided, and the block wasn't already converted, - // there is nothing we can do. - if (!converter) - return failure(); - - // Try to convert the signature for the block with the provided converter. - if (auto conversion = converter->convertBlockSignature(block)) - return applySignatureConversion(rewriter, block, converter, *conversion); - return failure(); -} - -Block *ConversionPatternRewriterImpl::applySignatureConversion( - ConversionPatternRewriter &rewriter, Region *region, - TypeConverter::SignatureConversion &conversion, - const TypeConverter *converter) { - if (!region->empty()) - return *convertBlockSignature(rewriter, ®ion->front(), converter, - &conversion); - return nullptr; -} - FailureOr ConversionPatternRewriterImpl::convertRegionTypes( ConversionPatternRewriter &rewriter, Region *region, const TypeConverter &converter, @@ -1330,42 +1281,29 @@ FailureOr ConversionPatternRewriterImpl::convertRegionTypes( if (region->empty()) return nullptr; - if (failed(convertNonEntryRegionTypes(rewriter, region, converter))) - return failure(); - - FailureOr newEntry = convertBlockSignature( - rewriter, ®ion->front(), &converter, entryConversion); - return newEntry; -} - -LogicalResult ConversionPatternRewriterImpl::convertNonEntryRegionTypes( - ConversionPatternRewriter &rewriter, Region *region, - const TypeConverter &converter, - ArrayRef blockConversions) { - regionToConverter[region] = &converter; - if (region->empty()) - return success(); - - // Convert the arguments of each block within the region. - int blockIdx = 0; - assert((blockConversions.empty() || - blockConversions.size() == region->getBlocks().size() - 1) && - "expected either to provide no SignatureConversions at all or to " - "provide a SignatureConversion for each non-entry block"); - + // Convert the arguments of each non-entry block within the region. for (Block &block : llvm::make_early_inc_range(llvm::drop_begin(*region, 1))) { - TypeConverter::SignatureConversion *blockConversion = - blockConversions.empty() - ? nullptr - : const_cast( - &blockConversions[blockIdx++]); - - if (failed(convertBlockSignature(rewriter, &block, &converter, - blockConversion))) + // Compute the signature for the block with the provided converter. + std::optional conversion = + converter.convertBlockSignature(&block); + if (!conversion) return failure(); - } - return success(); + // Convert the block with the computed signature. + applySignatureConversion(rewriter, &block, &converter, *conversion); + } + + // Convert the entry block. If an entry signature conversion was provided, + // use that one. Otherwise, compute the signature with the type converter. + if (entryConversion) + return applySignatureConversion(rewriter, ®ion->front(), &converter, + *entryConversion); + std::optional conversion = + converter.convertBlockSignature(®ion->front()); + if (!conversion) + return failure(); + return applySignatureConversion(rewriter, ®ion->front(), &converter, + *conversion); } Block *ConversionPatternRewriterImpl::applySignatureConversion( @@ -1676,12 +1614,12 @@ void ConversionPatternRewriter::eraseBlock(Block *block) { } Block *ConversionPatternRewriter::applySignatureConversion( - Region *region, TypeConverter::SignatureConversion &conversion, + Block *block, TypeConverter::SignatureConversion &conversion, const TypeConverter *converter) { - assert(!impl->wasOpReplaced(region->getParentOp()) && + assert(!impl->wasOpReplaced(block->getParentOp()) && "attempting to apply a signature conversion to a block within a " "replaced/erased op"); - return impl->applySignatureConversion(*this, region, conversion, converter); + return impl->applySignatureConversion(*this, block, converter, conversion); } FailureOr ConversionPatternRewriter::convertRegionTypes( @@ -1693,16 +1631,6 @@ FailureOr ConversionPatternRewriter::convertRegionTypes( return impl->convertRegionTypes(*this, region, converter, entryConversion); } -LogicalResult ConversionPatternRewriter::convertNonEntryRegionTypes( - Region *region, const TypeConverter &converter, - ArrayRef blockConversions) { - assert(!impl->wasOpReplaced(region->getParentOp()) && - "attempting to apply a signature conversion to a block within a " - "replaced/erased op"); - return impl->convertNonEntryRegionTypes(*this, region, converter, - blockConversions); -} - void ConversionPatternRewriter::replaceUsesOfBlockArgument(BlockArgument from, Value to) { LLVM_DEBUG({ @@ -2231,11 +2159,14 @@ LogicalResult OperationLegalizer::legalizePatternBlockRewrites( // If the region of the block has a type converter, try to convert the block // directly. if (auto *converter = impl.regionToConverter.lookup(block->getParent())) { - if (failed(impl.convertBlockSignature(rewriter, block, converter))) { + std::optional conversion = + converter->convertBlockSignature(block); + if (!conversion) { LLVM_DEBUG(logFailure(impl.logger, "failed to convert types of moved " "block")); return failure(); } + impl.applySignatureConversion(rewriter, block, converter, *conversion); continue; } diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index 6dafe29e2e5f69..d7767bda08435f 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s --convert-complex-to-standard --split-input-file |\ -// RUN: FileCheck %s --dump-input=always +// RUN: FileCheck %s // CHECK-LABEL: func @complex_abs // CHECK-SAME: %[[ARG:.*]]: complex @@ -703,14 +703,14 @@ func.func @complex_sqrt_nnan_ninf(%arg: complex) -> complex { // CHECK: %[[ABSIM:.*]] = math.absf %[[IM]] fastmath : f32 // CHECK: %[[MAX:.*]] = arith.maximumf %[[ABSRE]], %[[ABSIM]] fastmath : f32 // CHECK: %[[MIN:.*]] = arith.minimumf %[[ABSRE]], %[[ABSIM]] fastmath : f32 -// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 -// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 -// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] : f32 // CHECK: %[[QUARTER:.*]] = arith.constant 2.500000e-01 : f32 -// CHECK: %[[SQRT_MAX:.*]] = math.sqrt %[[MAX]] fastmath : f32 -// CHECK: %[[POW:.*]] = math.powf %[[RATIO_SQ_PLUS_ONE]], %[[QUARTER]] fastmath : f32 -// CHECK: %[[SQRT_ABS_OR_NAN:.*]] = arith.mulf %[[SQRT_MAX]], %[[POW]] fastmath : f32 -// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[SQRT_ABS_OR_NAN]], %[[SQRT_ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[SQRT_MAX:.*]] = math.sqrt %[[MAX]] : f32 +// CHECK: %[[POW:.*]] = math.powf %[[RATIO_SQ_PLUS_ONE]], %[[QUARTER]] : f32 +// CHECK: %[[SQRT_ABS_OR_NAN:.*]] = arith.mulf %[[SQRT_MAX]], %[[POW]] : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[SQRT_ABS_OR_NAN]], %[[SQRT_ABS_OR_NAN]] : f32 // CHECK: %[[SQRT_ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[SQRT_ABS_OR_NAN]] : f32 // CHECK: %[[ARGARG:.*]] = math.atan2 %[[IM]], %[[RE]] fastmath : f32 // CHECK: %[[SQRTARG:.*]] = arith.mulf %[[ARGARG]], %[[HALF]] fastmath : f32 @@ -819,12 +819,12 @@ func.func @complex_abs_with_fmf(%arg: complex) -> f32 { // CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 // CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 // CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 -// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 -// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 -// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 -// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 -// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 -// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 // CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 // CHECK: return %[[ABS]] : f32 @@ -918,12 +918,12 @@ func.func @complex_log_with_fmf(%arg: complex) -> complex { // CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 // CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 // CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 -// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 -// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 -// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 -// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 -// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 -// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 // CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 // CHECK: %[[RESULT_REAL:.*]] = math.log %[[ABS]] fastmath : f32 // CHECK: %[[REAL2:.*]] = complex.re %[[ARG]] : complex @@ -952,14 +952,14 @@ func.func @complex_log1p_with_fmf(%arg: complex) -> complex { // CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 // CHECK: %[[CMPF:.*]] = arith.cmpf ogt, %[[REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath : f32 // CHECK: %[[MAX_MINUS_ONE:.*]] = arith.subf %[[MAX]], %[[ONE]] fastmath : f32 -// CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %0, %[[MAX_MINUS_ONE]] : f32 -// CHECK: %[[MIN_MAX_RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %[[REAL]], %[[MAX_MINUS_ONE]] : f32 +// CHECK: %[[MIN_MAX_RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 // CHECK: %[[LOG_1:.*]] = math.log1p %[[SELECT]] fastmath : f32 -// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[MIN_MAX_RATIO]], %[[MIN_MAX_RATIO]] fastmath : f32 -// CHECK: %[[LOG_SQ:.*]] = math.log1p %[[RATIO_SQ]] fastmath : f32 -// CHECK: %[[HALF_LOG_SQ:.*]] = arith.mulf %cst, %[[LOG_SQ]] fastmath : f32 -// CHECK: %[[R:.*]] = arith.addf %[[HALF_LOG_SQ]], %[[LOG_1]] fastmath : f32 -// CHECK: %[[ISNAN:.*]] = arith.cmpf uno, %[[R]], %[[R]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[MIN_MAX_RATIO]], %[[MIN_MAX_RATIO]] fastmath : f32 +// CHECK: %[[LOG_SQ:.*]] = math.log1p %[[RATIO_SQ]] fastmath : f32 +// CHECK: %[[HALF_LOG_SQ:.*]] = arith.mulf %cst, %[[LOG_SQ]] fastmath : f32 +// CHECK: %[[R:.*]] = arith.addf %[[HALF_LOG_SQ]], %[[LOG_1]] fastmath : f32 +// CHECK: %[[ISNAN:.*]] = arith.cmpf uno, %[[R]], %[[R]] fastmath : f32 // CHECK: %[[RESULT_REAL:.*]] = arith.select %[[ISNAN]], %[[MIN]], %[[R]] : f32 // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] fastmath : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex @@ -1298,14 +1298,14 @@ func.func @complex_atan2_with_fmf(%lhs: complex, // CHECK: %[[ABSIM:.*]] = math.absf %[[IM]] fastmath : f32 // CHECK: %[[MAX:.*]] = arith.maximumf %[[ABSRE]], %[[ABSIM]] fastmath : f32 // CHECK: %[[MIN:.*]] = arith.minimumf %[[ABSRE]], %[[ABSIM]] fastmath : f32 -// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 -// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 -// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 // CHECK: %[[QUARTER:.*]] = arith.constant 2.500000e-01 : f32 -// CHECK: %[[SQRT_MAX:.*]] = math.sqrt %[[MAX]] fastmath : f32 -// CHECK: %[[POW:.*]] = math.powf %[[RATIO_SQ_PLUS_ONE]], %[[QUARTER]] fastmath : f32 -// CHECK: %[[SQRT_ABS_OR_NAN:.*]] = arith.mulf %[[SQRT_MAX]], %[[POW]] fastmath : f32 -// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[SQRT_ABS_OR_NAN]], %[[SQRT_ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[SQRT_MAX:.*]] = math.sqrt %[[MAX]] fastmath : f32 +// CHECK: %[[POW:.*]] = math.powf %[[RATIO_SQ_PLUS_ONE]], %[[QUARTER]] fastmath : f32 +// CHECK: %[[SQRT_ABS_OR_NAN:.*]] = arith.mulf %[[SQRT_MAX]], %[[POW]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[SQRT_ABS_OR_NAN]], %[[SQRT_ABS_OR_NAN]] fastmath : f32 // CHECK: %[[SQRT_ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[SQRT_ABS_OR_NAN]] : f32 // CHECK: %[[ARGARG:.*]] = math.atan2 %[[IM]], %[[RE]] fastmath : f32 // CHECK: %[[SQRTARG:.*]] = arith.mulf %[[ARGARG]], %[[HALF]] fastmath : f32 @@ -1539,12 +1539,12 @@ func.func @complex_atan2_with_fmf(%lhs: complex, // CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath : f32 // CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 // CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 -// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 -// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 -// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 -// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 -// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 -// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 // CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 // CHECK: %[[VAR436:.*]] = math.log %[[ABS]] fastmath : f32 // CHECK: %[[VAR437:.*]] = complex.re %[[VAR415]] : complex @@ -1778,14 +1778,14 @@ func.func @complex_sqrt_with_fmf(%arg: complex) -> complex { // CHECK: %[[ABSIM:.*]] = math.absf %[[IM]] fastmath : f32 // CHECK: %[[MAX:.*]] = arith.maximumf %[[ABSRE]], %[[ABSIM]] fastmath : f32 // CHECK: %[[MIN:.*]] = arith.minimumf %[[ABSRE]], %[[ABSIM]] fastmath : f32 -// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 -// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 -// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 // CHECK: %[[QUARTER:.*]] = arith.constant 2.500000e-01 : f32 -// CHECK: %[[SQRT_MAX:.*]] = math.sqrt %[[MAX]] fastmath : f32 -// CHECK: %[[POW:.*]] = math.powf %[[RATIO_SQ_PLUS_ONE]], %[[QUARTER]] fastmath : f32 -// CHECK: %[[SQRT_ABS_OR_NAN:.*]] = arith.mulf %[[SQRT_MAX]], %[[POW]] fastmath : f32 -// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[SQRT_ABS_OR_NAN]], %[[SQRT_ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[SQRT_MAX:.*]] = math.sqrt %[[MAX]] fastmath : f32 +// CHECK: %[[POW:.*]] = math.powf %[[RATIO_SQ_PLUS_ONE]], %[[QUARTER]] fastmath : f32 +// CHECK: %[[SQRT_ABS_OR_NAN:.*]] = arith.mulf %[[SQRT_MAX]], %[[POW]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[SQRT_ABS_OR_NAN]], %[[SQRT_ABS_OR_NAN]] fastmath : f32 // CHECK: %[[SQRT_ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[SQRT_ABS_OR_NAN]] : f32 // CHECK: %[[ARGARG:.*]] = math.atan2 %[[IM]], %[[RE]] fastmath : f32 // CHECK: %[[SQRTARG:.*]] = arith.mulf %[[ARGARG]], %[[HALF]] fastmath : f32 @@ -1886,12 +1886,12 @@ func.func @complex_sign_with_fmf(%arg: complex) -> complex { // CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG2]] fastmath : f32 // CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 // CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL]], %[[ABS_IMAG]] fastmath : f32 -// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 -// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 -// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 -// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 -// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 -// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 +// CHECK: %[[RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath : f32 +// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[RATIO]], %[[RATIO]] fastmath : f32 +// CHECK: %[[RATIO_SQ_PLUS_ONE:.*]] = arith.addf %[[RATIO_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[SQRT:.*]] = math.sqrt %[[RATIO_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[ABS_OR_NAN:.*]] = arith.mulf %[[MAX]], %[[SQRT]] fastmath : f32 +// CHECK: %[[IS_NAN:.*]] = arith.cmpf uno, %[[ABS_OR_NAN]], %[[ABS_OR_NAN]] fastmath : f32 // CHECK: %[[ABS:.*]] = arith.select %[[IS_NAN]], %[[MIN]], %[[ABS_OR_NAN]] : f32 // CHECK: %[[REAL_SIGN:.*]] = arith.divf %[[REAL]], %[[ABS]] fastmath : f32 // CHECK: %[[IMAG_SIGN:.*]] = arith.divf %[[IMAG]], %[[ABS]] fastmath : f32 diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir index c0b05ef0860333..6c5c1e09c0eb5f 100644 --- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir @@ -1,15 +1,8 @@ -// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" -split-input-file | FileCheck %s -// RUN: mlir-opt %s --gpu-to-llvm="gpu-binary-annotation=rocdl.hsaco" -split-input-file | FileCheck %s --check-prefix=ROCDL +// RUN: mlir-opt %s --gpu-to-llvm -split-input-file | FileCheck %s module attributes {gpu.container_module} { - - // CHECK: llvm.mlir.global internal constant @[[KERNEL_NAME:.*]]("kernel\00") - // CHECK: llvm.mlir.global internal constant @[[GLOBAL:.*]]("CUBIN") - // ROCDL: llvm.mlir.global internal constant @[[GLOBAL:.*]]("HSACO") - - gpu.module @kernel_module attributes { - nvvm.cubin = "CUBIN", rocdl.hsaco = "HSACO" - } { + // CHECK: gpu.module + gpu.module @kernel_module [#nvvm.target] { llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: i64, %arg4: i64, %arg5: i64) attributes {gpu.kernel} { @@ -18,9 +11,17 @@ module attributes {gpu.container_module} { } func.func @foo(%buffer: memref) { + // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64 + // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32 + // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32 %c8 = arith.constant 8 : index %c32 = arith.constant 32 : i32 %c256 = arith.constant 256 : i32 + + // CHECK: gpu.launch_func @kernel_module::@kernel + // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64 + // CHECK: dynamic_shared_memory_size [[C256]] + // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64) gpu.launch_func @kernel_module::@kernel blocks in (%c8, %c8, %c8) threads in (%c8, %c8, %c8) @@ -28,46 +29,13 @@ module attributes {gpu.container_module} { args(%c32 : i32, %buffer : memref) return } - - // CHECK-DAG: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32 - // CHECK-DAG: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64 - // CHECK: [[ADDRESSOF:%.*]] = llvm.mlir.addressof @[[GLOBAL]] - // CHECK: [[BINARY:%.*]] = llvm.getelementptr [[ADDRESSOF]]{{\[}}0, 0] - // CHECK-SAME: -> !llvm.ptr - // CHECK: [[BINARYSIZE:%.*]] = llvm.mlir.constant - // CHECK: [[MODULE:%.*]] = llvm.call @mgpuModuleLoad([[BINARY]], [[BINARYSIZE]]) - // CHECK: [[PARAMSCOUNT:%.*]] = llvm.mlir.constant - // CHECK: [[FUNC:%.*]] = llvm.call @mgpuModuleGetFunction([[MODULE]], {{.*}}) - - // CHECK: [[STREAM:%.*]] = llvm.call @mgpuStreamCreate - - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) - // CHECK: %[[MEMREF:.*]] = llvm.alloca %[[ONE]] x !llvm.struct[[STRUCT_BODY:<.*>]] - // CHECK: [[NUM_PARAMS:%.*]] = llvm.mlir.constant(6 : i32) : i32 - // CHECK-NEXT: [[PARAMS:%.*]] = llvm.alloca [[NUM_PARAMS]] x !llvm.ptr - - // CHECK: llvm.getelementptr %[[MEMREF]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct[[STRUCT_BODY:<.*>]] - // CHECK: llvm.getelementptr %[[MEMREF]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct[[STRUCT_BODY:<.*>]] - // CHECK: llvm.getelementptr %[[MEMREF]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct[[STRUCT_BODY:<.*>]] - // CHECK: llvm.getelementptr %[[MEMREF]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct[[STRUCT_BODY:<.*>]] - // CHECK: llvm.getelementptr %[[MEMREF]][0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct[[STRUCT_BODY:<.*>]] - // CHECK: llvm.getelementptr %[[MEMREF]][0, 5] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct[[STRUCT_BODY:<.*>]] - - // CHECK: [[EXTRA_PARAMS:%.*]] = llvm.mlir.zero : !llvm.ptr - - // CHECK: llvm.call @mgpuLaunchKernel([[FUNC]], [[C8]], [[C8]], [[C8]], - // CHECK-SAME: [[C8]], [[C8]], [[C8]], [[C256]], [[STREAM]], - // CHECK-SAME: [[PARAMS]], [[EXTRA_PARAMS]], [[PARAMSCOUNT]]) - // CHECK: llvm.call @mgpuStreamSynchronize - // CHECK: llvm.call @mgpuStreamDestroy - // CHECK: llvm.call @mgpuModuleUnload } + // ----- module attributes {gpu.container_module} { // CHECK: gpu.module - // ROCDL: gpu.module gpu.module @kernel_module [#nvvm.target] { llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: i64, %arg4: i64, @@ -80,15 +48,19 @@ module attributes {gpu.container_module} { // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64 // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32 // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32 - %c8 = arith.constant 8 : index + // CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64 + %c8 = arith.constant 8 : index %c32 = arith.constant 32 : i32 %c256 = arith.constant 256 : i32 + %c2 = arith.constant 2 : index // CHECK: gpu.launch_func @kernel_module::@kernel + // CHECK: clusters in ([[C2]], [[C2]], [[C2]]) // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64 // CHECK: dynamic_shared_memory_size [[C256]] // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64) gpu.launch_func @kernel_module::@kernel + clusters in (%c2, %c2, %c2) blocks in (%c8, %c8, %c8) threads in (%c8, %c8, %c8) dynamic_shared_memory_size %c256 @@ -97,18 +69,11 @@ module attributes {gpu.container_module} { } } - // ----- module attributes {gpu.container_module} { - // CHECK: gpu.module - gpu.module @kernel_module [#nvvm.target] { - llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr, - %arg2: !llvm.ptr, %arg3: i64, %arg4: i64, - %arg5: i64) attributes {gpu.kernel} { - llvm.return - } - } + // CHECK: gpu.binary + gpu.binary @kernel_module [#gpu.object<#rocdl.target, "blob">] func.func @foo(%buffer: memref) { // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64 diff --git a/mlir/test/Conversion/IndexToSPRIV/index-to-spirv.mlir b/mlir/test/Conversion/IndexToSPIRV/index-to-spirv.mlir similarity index 100% rename from mlir/test/Conversion/IndexToSPRIV/index-to-spirv.mlir rename to mlir/test/Conversion/IndexToSPIRV/index-to-spirv.mlir diff --git a/mlir/test/Dialect/Arith/int-range-interface.mlir b/mlir/test/Dialect/Arith/int-range-interface.mlir index 60f0ab41afa48d..e00b7692fe3968 100644 --- a/mlir/test/Dialect/Arith/int-range-interface.mlir +++ b/mlir/test/Dialect/Arith/int-range-interface.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-int-range-inference -canonicalize %s | FileCheck %s +// RUN: mlir-opt -int-range-optimizations -canonicalize %s | FileCheck %s // CHECK-LABEL: func @add_min_max // CHECK: %[[c3:.*]] = arith.constant 3 : index diff --git a/mlir/test/Dialect/Arith/int-range-opts.mlir b/mlir/test/Dialect/Arith/int-range-opts.mlir index dd62a481a12464..ea5969a1002580 100644 --- a/mlir/test/Dialect/Arith/int-range-opts.mlir +++ b/mlir/test/Dialect/Arith/int-range-opts.mlir @@ -96,3 +96,39 @@ func.func @test() -> i8 { return %1: i8 } +// ----- + +// CHECK-LABEL: func @trivial_rem +// CHECK: [[val:%.+]] = test.with_bounds +// CHECK: return [[val]] +func.func @trivial_rem() -> i8 { + %c64 = arith.constant 64 : i8 + %val = test.with_bounds { umin = 0 : ui8, umax = 63 : ui8, smin = 0 : si8, smax = 63 : si8 } : i8 + %mod = arith.remsi %val, %c64 : i8 + return %mod : i8 +} + +// ----- + +// CHECK-LABEL: func @non_const_rhs +// CHECK: [[mod:%.+]] = arith.remui +// CHECK: return [[mod]] +func.func @non_const_rhs() -> i8 { + %c64 = arith.constant 64 : i8 + %val = test.with_bounds { umin = 0 : ui8, umax = 2 : ui8, smin = 0 : si8, smax = 2 : si8 } : i8 + %rhs = test.with_bounds { umin = 63 : ui8, umax = 64 : ui8, smin = 63 : si8, smax = 64 : si8 } : i8 + %mod = arith.remui %val, %rhs : i8 + return %mod : i8 +} + +// ----- + +// CHECK-LABEL: func @wraps +// CHECK: [[mod:%.+]] = arith.remsi +// CHECK: return [[mod]] +func.func @wraps() -> i8 { + %c64 = arith.constant 64 : i8 + %val = test.with_bounds { umin = 63 : ui8, umax = 65 : ui8, smin = 63 : si8, smax = 65 : si8 } : i8 + %mod = arith.remsi %val, %c64 : i8 + return %mod : i8 +} diff --git a/mlir/test/Dialect/GPU/int-range-interface.mlir b/mlir/test/Dialect/GPU/int-range-interface.mlir index 980f7e5873e0cb..a0917a2fdf1100 100644 --- a/mlir/test/Dialect/GPU/int-range-interface.mlir +++ b/mlir/test/Dialect/GPU/int-range-interface.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-int-range-inference -split-input-file %s | FileCheck %s +// RUN: mlir-opt -int-range-optimizations -split-input-file %s | FileCheck %s // CHECK-LABEL: func @launch_func func.func @launch_func(%arg0 : index) { diff --git a/mlir/test/Dialect/Index/int-range-inference.mlir b/mlir/test/Dialect/Index/int-range-inference.mlir index 2784d5fd5cf701..951624d573a646 100644 --- a/mlir/test/Dialect/Index/int-range-inference.mlir +++ b/mlir/test/Dialect/Index/int-range-inference.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-int-range-inference -canonicalize %s | FileCheck %s +// RUN: mlir-opt -int-range-optimizations -canonicalize %s | FileCheck %s // Most operations are covered by the `arith` tests, which use the same code // Here, we add a few tests to ensure the "index can be 32- or 64-bit" handling diff --git a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir index f3cf7c4dffa05f..8feb3c2a2c306a 100644 --- a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir @@ -355,7 +355,6 @@ module { %0 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) outs(%arg1 : tensor) { ^bb0(%in: f32, %out: f32): %1 = llvm.fmul %in, %in : f32 - // expected-error @below {{Unknown neutral element for:}} %2 = llvm.fadd %1, %out : f32 linalg.yield %2 : f32 } -> tensor diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index 3fa696e1600a93..eb0dc01be25b93 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -1025,6 +1025,7 @@ func.func @sparse_print(%arg0: tensor<10x10xf64>) { func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 2>) { // expected-error@+1 {{'sparse_tensor.extract_iteration_space' expect larger level upper bound than lower bound}} %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 2 to 0 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 2> + -> !sparse_tensor.iter_space<#COO, lvls = 0 to 2> return } @@ -1040,6 +1041,7 @@ func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) { // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be specified iff level lower bound equals 0}} %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 0 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0> + -> !sparse_tensor.iter_space<#COO, lvls = 1> return } @@ -1054,7 +1056,7 @@ func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>) { // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be specified iff level lower bound equals 0}} - %l1 = sparse_tensor.extract_iteration_space %sp lvls = 1 : tensor<4x8xf32, #COO> + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 1 : tensor<4x8xf32, #COO> -> !sparse_tensor.iter_space<#COO, lvls = 1> return } @@ -1077,6 +1079,7 @@ func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>) { func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#CSR, lvls = 0>) { // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op mismatch in parent iterator encoding and iteration space encoding.}} %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#CSR, lvls = 0> + -> !sparse_tensor.iter_space<#COO, lvls = 1> return } @@ -1092,5 +1095,63 @@ func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) { // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be used to extract an iteration space from a consecutive level.}} %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 2 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0> + -> !sparse_tensor.iter_space<#COO, lvls = 2> return } + + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) -> index { + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO> -> !sparse_tensor.iter_space<#COO, lvls = 0> + // expected-error @+1 {{'sparse_tensor.iterate' op different number of region iter_args and yielded values: 2 != 1}} + %r1, %r2 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%si = %i, %sj = %j): !sparse_tensor.iter_space<#COO, lvls = 0> -> (index, index) { + sparse_tensor.yield %si : index + } + return %r1 : index +} + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +// expected-note@+1 {{prior use here}} +func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index) -> f32 { + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO> -> !sparse_tensor.iter_space<#COO, lvls = 0> + // expected-error @+1 {{use of value '%i' expects different type than prior uses: 'f32' vs 'index'}} + %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%outer = %i): !sparse_tensor.iter_space<#COO, lvls = 0> -> f32 { + sparse_tensor.yield %outer : f32 + } + return %r1 : f32 +} + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) -> index { + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO> -> !sparse_tensor.iter_space<#COO, lvls = 0> + // expected-error @+1 {{'sparse_tensor.iterate' op 0-th region iter_arg and 0-th yielded value have different type: 'index' != 'f32'}} + %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%si = %i): !sparse_tensor.iter_space<#COO, lvls = 0> -> index { + %y = arith.constant 1.0 : f32 + sparse_tensor.yield %y : f32 + } + return %r1 : index +} diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index d34071279e5129..bce0b41a99828a 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -758,8 +758,37 @@ func.func @sparse_has_runtime() -> i1 { func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) -> (!sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>) { // Extracting the iteration space for the first level needs no parent iterator. - %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO> + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO> -> !sparse_tensor.iter_space<#COO, lvls = 0> // Extracting the iteration space for the second level needs a parent iterator. %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0> + -> !sparse_tensor.iter_space<#COO, lvls = 1> return %l1, %l2 : !sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1> } + + +// ----- + +#COO = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : compressed(nonunique), + j : singleton(soa) + ) +}> + +// CHECK-LABEL: func.func @sparse_iterate( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<4x8xf32, #sparse{{[0-9]*}}>, +// CHECK-SAME: %[[VAL_1:.*]]: index, +// CHECK-SAME: %[[VAL_2:.*]]: index) -> index { +// CHECK: %[[VAL_3:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] lvls = 0 : tensor<4x8xf32, #sparse{{[0-9]*}}> +// CHECK: %[[VAL_4:.*]] = sparse_tensor.iterate %[[VAL_5:.*]] in %[[VAL_3]] at(%[[VAL_6:.*]]) iter_args(%[[VAL_7:.*]] = %[[VAL_1]]) : !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0> -> (index) { +// CHECK: sparse_tensor.yield %[[VAL_7]] : index +// CHECK: } +// CHECK: return %[[VAL_4]] : index +// CHECK: } +func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) -> index { + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO> -> !sparse_tensor.iter_space<#COO, lvls = 0> + %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%outer = %i): !sparse_tensor.iter_space<#COO, lvls = 0 to 1> -> index { + sparse_tensor.yield %outer : index + } + return %r1 : index +} diff --git a/mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir b/mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir new file mode 100644 index 00000000000000..f70fab3b7251df --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir @@ -0,0 +1,27 @@ +// RUN: mlir-opt %s --loop-invariant-code-motion | FileCheck %s + +#CSR = #sparse_tensor.encoding<{ + map = (i, j) -> ( + i : dense, + j : compressed + ) +}> + +// Make sure that pure instructions are hoisted outside the loop. +// +// CHECK: sparse_tensor.values +// CHECK: sparse_tensor.positions +// CHECK: sparse_tensor.coordinate +// CHECK: sparse_tensor.iterate +func.func @sparse_iterate(%sp : tensor) { + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor + -> !sparse_tensor.iter_space<#CSR, lvls = 0> + sparse_tensor.iterate %it1 in %l1 at (%crd) : !sparse_tensor.iter_space<#CSR, lvls = 0> { + %0 = sparse_tensor.values %sp : tensor to memref + %1 = sparse_tensor.positions %sp { level = 1 : index } : tensor to memref + %2 = sparse_tensor.coordinates %sp { level = 1 : index } : tensor to memref + "test.op"(%0, %1, %2) : (memref, memref, memref) -> () + } + + return +} diff --git a/mlir/test/IR/print-attr-type-aliases.mlir b/mlir/test/IR/print-attr-type-aliases.mlir index 162eacd0022832..27c5a75addbb59 100644 --- a/mlir/test/IR/print-attr-type-aliases.mlir +++ b/mlir/test/IR/print-attr-type-aliases.mlir @@ -11,6 +11,9 @@ // CHECK-DAG: #_0_test_alias = "alias_test:prefixed_digit" "test.op"() {alias_test = "alias_test:prefixed_digit"} : () -> () +// CHECK-DAG: #_25test = "alias_test:prefixed_symbol" +"test.op"() {alias_test = "alias_test:prefixed_symbol"} : () -> () + // CHECK-DAG: #test_alias_conflict0_ = "alias_test:sanitize_conflict_a" // CHECK-DAG: #test_alias_conflict0_1 = "alias_test:sanitize_conflict_b" "test.op"() {alias_test = ["alias_test:sanitize_conflict_a", "alias_test:sanitize_conflict_b"]} : () -> () diff --git a/mlir/test/Interfaces/InferIntRangeInterface/infer-int-range-test-ops.mlir b/mlir/test/Interfaces/InferIntRangeInterface/infer-int-range-test-ops.mlir index 2106eeefdca4d4..1ec3441b1fde81 100644 --- a/mlir/test/Interfaces/InferIntRangeInterface/infer-int-range-test-ops.mlir +++ b/mlir/test/Interfaces/InferIntRangeInterface/infer-int-range-test-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-int-range-inference %s | FileCheck %s +// RUN: mlir-opt -int-range-optimizations %s | FileCheck %s // CHECK-LABEL: func @constant // CHECK: %[[cst:.*]] = "test.constant"() <{value = 3 : index} @@ -103,13 +103,11 @@ func.func @func_args_unbound(%arg0 : index) -> index { // CHECK-LABEL: func @propagate_across_while_loop_false() func.func @propagate_across_while_loop_false() -> index { - // CHECK-DAG: %[[C0:.*]] = "test.constant"() <{value = 0 - // CHECK-DAG: %[[C1:.*]] = "test.constant"() <{value = 1 + // CHECK: %[[C1:.*]] = "test.constant"() <{value = 1 %0 = test.with_bounds { umin = 0 : index, umax = 0 : index, smin = 0 : index, smax = 0 : index } : index %1 = scf.while : () -> index { %false = arith.constant false - // CHECK: scf.condition(%{{.*}}) %[[C0]] scf.condition(%false) %0 : index } do { ^bb0(%i1: index): @@ -122,12 +120,10 @@ func.func @propagate_across_while_loop_false() -> index { // CHECK-LABEL: func @propagate_across_while_loop func.func @propagate_across_while_loop(%arg0 : i1) -> index { - // CHECK-DAG: %[[C0:.*]] = "test.constant"() <{value = 0 - // CHECK-DAG: %[[C1:.*]] = "test.constant"() <{value = 1 + // CHECK: %[[C1:.*]] = "test.constant"() <{value = 1 %0 = test.with_bounds { umin = 0 : index, umax = 0 : index, smin = 0 : index, smax = 0 : index } : index %1 = scf.while : () -> index { - // CHECK: scf.condition(%{{.*}}) %[[C0]] scf.condition(%arg0) %0 : index } do { ^bb0(%i1: index): diff --git a/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp b/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp index a3a8913d5964c6..64add8cef36986 100644 --- a/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp @@ -188,6 +188,7 @@ struct TestOpAsmInterface : public OpAsmDialectInterface { .Case("alias_test:dot_in_name", StringRef("test.alias")) .Case("alias_test:trailing_digit", StringRef("test_alias0")) .Case("alias_test:prefixed_digit", StringRef("0_test_alias")) + .Case("alias_test:prefixed_symbol", StringRef("%test")) .Case("alias_test:sanitize_conflict_a", StringRef("test_alias_conflict0")) .Case("alias_test:sanitize_conflict_b", diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index f9f7d4eacf948a..a14a5da3410980 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -1516,8 +1516,9 @@ struct TestTestSignatureConversionNoConverter if (failed( converter.convertSignatureArgs(entry->getArgumentTypes(), result))) return failure(); - rewriter.modifyOpInPlace( - op, [&] { rewriter.applySignatureConversion(®ion, result); }); + rewriter.modifyOpInPlace(op, [&] { + rewriter.applySignatureConversion(®ion.front(), result); + }); return success(); } diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt index 975a41ac3d5fea..66b1faf78e2d8b 100644 --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -24,7 +24,6 @@ add_mlir_library(MLIRTestTransforms TestConstantFold.cpp TestControlFlowSink.cpp TestInlining.cpp - TestIntRangeInference.cpp TestMakeIsolatedFromAbove.cpp ${MLIRTestTransformsPDLSrc} diff --git a/mlir/test/lib/Transforms/TestIntRangeInference.cpp b/mlir/test/lib/Transforms/TestIntRangeInference.cpp deleted file mode 100644 index 5758f6acf2f0ff..00000000000000 --- a/mlir/test/lib/Transforms/TestIntRangeInference.cpp +++ /dev/null @@ -1,125 +0,0 @@ -//===- TestIntRangeInference.cpp - Create consts from range inference ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// TODO: This pass is needed to test integer range inference until that -// functionality has been integrated into SCCP. -//===----------------------------------------------------------------------===// - -#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h" -#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h" -#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h" -#include "mlir/Interfaces/SideEffectInterfaces.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Pass/PassRegistry.h" -#include "mlir/Support/TypeID.h" -#include "mlir/Transforms/FoldUtils.h" -#include - -using namespace mlir; -using namespace mlir::dataflow; - -/// Patterned after SCCP -static LogicalResult replaceWithConstant(DataFlowSolver &solver, OpBuilder &b, - OperationFolder &folder, Value value) { - auto *maybeInferredRange = - solver.lookupState(value); - if (!maybeInferredRange || maybeInferredRange->getValue().isUninitialized()) - return failure(); - const ConstantIntRanges &inferredRange = - maybeInferredRange->getValue().getValue(); - std::optional maybeConstValue = inferredRange.getConstantValue(); - if (!maybeConstValue.has_value()) - return failure(); - - Operation *maybeDefiningOp = value.getDefiningOp(); - Dialect *valueDialect = - maybeDefiningOp ? maybeDefiningOp->getDialect() - : value.getParentRegion()->getParentOp()->getDialect(); - Attribute constAttr = b.getIntegerAttr(value.getType(), *maybeConstValue); - Value constant = folder.getOrCreateConstant( - b.getInsertionBlock(), valueDialect, constAttr, value.getType()); - if (!constant) - return failure(); - - value.replaceAllUsesWith(constant); - return success(); -} - -static void rewrite(DataFlowSolver &solver, MLIRContext *context, - MutableArrayRef initialRegions) { - SmallVector worklist; - auto addToWorklist = [&](MutableArrayRef regions) { - for (Region ®ion : regions) - for (Block &block : llvm::reverse(region)) - worklist.push_back(&block); - }; - - OpBuilder builder(context); - OperationFolder folder(context); - - addToWorklist(initialRegions); - while (!worklist.empty()) { - Block *block = worklist.pop_back_val(); - - for (Operation &op : llvm::make_early_inc_range(*block)) { - builder.setInsertionPoint(&op); - - // Replace any result with constants. - bool replacedAll = op.getNumResults() != 0; - for (Value res : op.getResults()) - replacedAll &= - succeeded(replaceWithConstant(solver, builder, folder, res)); - - // If all of the results of the operation were replaced, try to erase - // the operation completely. - if (replacedAll && wouldOpBeTriviallyDead(&op)) { - assert(op.use_empty() && "expected all uses to be replaced"); - op.erase(); - continue; - } - - // Add any the regions of this operation to the worklist. - addToWorklist(op.getRegions()); - } - - // Replace any block arguments with constants. - builder.setInsertionPointToStart(block); - for (BlockArgument arg : block->getArguments()) - (void)replaceWithConstant(solver, builder, folder, arg); - } -} - -namespace { -struct TestIntRangeInference - : PassWrapper> { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestIntRangeInference) - - StringRef getArgument() const final { return "test-int-range-inference"; } - StringRef getDescription() const final { - return "Test integer range inference analysis"; - } - - void runOnOperation() override { - Operation *op = getOperation(); - DataFlowSolver solver; - solver.load(); - solver.load(); - solver.load(); - if (failed(solver.initializeAndRun(op))) - return signalPassFailure(); - rewrite(solver, op->getContext(), op->getRegions()); - } -}; -} // end anonymous namespace - -namespace mlir { -namespace test { -void registerTestIntRangeInference() { - PassRegistration(); -} -} // end namespace test -} // end namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index d2ba3d06835fb1..d0de74dd6eaf4e 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -97,9 +97,11 @@ void registerTestDynamicPipelinePass(); void registerTestEmulateNarrowTypePass(); void registerTestExpandMathPass(); void registerTestFooAnalysisPass(); +void registerTestComposeSubView(); +void registerTestMultiBuffering(); +void registerTestIRVisitorsPass(); void registerTestGenericIRVisitorsPass(); void registerTestInterfaces(); -void registerTestIntRangeInference(); void registerTestIRVisitorsPass(); void registerTestLastModifiedPass(); void registerTestLinalgDecomposeOps(); @@ -226,9 +228,11 @@ void registerTestPasses() { mlir::test::registerTestEmulateNarrowTypePass(); mlir::test::registerTestExpandMathPass(); mlir::test::registerTestFooAnalysisPass(); + mlir::test::registerTestComposeSubView(); + mlir::test::registerTestMultiBuffering(); + mlir::test::registerTestIRVisitorsPass(); mlir::test::registerTestGenericIRVisitorsPass(); mlir::test::registerTestInterfaces(); - mlir::test::registerTestIntRangeInference(); mlir::test::registerTestIRVisitorsPass(); mlir::test::registerTestLastModifiedPass(); mlir::test::registerTestLinalgDecomposeOps(); diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel index ff3b035f64ade5..41b85d2a523686 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel @@ -28,6 +28,7 @@ libc_test( deps = [ "//libc:__support_big_int", "//libc:__support_fputil_dyadic_float", + "//libc:__support_macros_properties_types", "//libc:__support_uint128", "//libc/test/UnitTest:fp_test_helpers", "//libc/utils/MPFRWrapper:mpfr_wrapper", diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 0fc791ef23bbd7..0254e127980e58 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2903,7 +2903,9 @@ td_library( ]), includes = ["include"], deps = [ + ":ControlFlowInterfacesTdFiles", ":InferTypeOpInterfaceTdFiles", + ":LoopLikeInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", ], @@ -3091,9 +3093,11 @@ cc_library( ":BufferizationInterfaces", ":BytecodeOpInterface", ":ComplexDialect", + ":ControlFlowInterfaces", ":DialectUtils", ":IR", ":InferTypeOpInterface", + ":LoopLikeInterface", ":SideEffectInterfaces", ":SparseTensorAttrDefsIncGen", ":SparseTensorEnums", @@ -12713,14 +12717,15 @@ cc_library( ":ArithDialect", ":ArithPassIncGen", ":ArithUtils", - ":BufferizationDialect", ":BufferizationInterfaces", ":BufferizationTransforms", + ":DialectUtils", ":FuncDialect", ":FuncTransforms", ":IR", ":MemRefDialect", ":Pass", + ":SideEffectInterfaces", ":Support", ":TensorDialect", ":TransformUtils",