diff options
Diffstat (limited to 'clang-r353983e/include/llvm/Analysis')
104 files changed, 34483 insertions, 0 deletions
diff --git a/clang-r353983e/include/llvm/Analysis/AliasAnalysis.h b/clang-r353983e/include/llvm/Analysis/AliasAnalysis.h new file mode 100644 index 00000000..4e55f017 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/AliasAnalysis.h @@ -0,0 +1,1105 @@ +//===- llvm/Analysis/AliasAnalysis.h - Alias Analysis Interface -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the generic AliasAnalysis interface, which is used as the +// common interface used by all clients of alias analysis information, and +// implemented by all alias analysis implementations. Mod/Ref information is +// also captured by this interface. +// +// Implementations of this interface must implement the various virtual methods, +// which automatically provides functionality for the entire suite of client +// APIs. +// +// This API identifies memory regions with the MemoryLocation class. The pointer +// component specifies the base memory address of the region. The Size specifies +// the maximum size (in address units) of the memory region, or +// MemoryLocation::UnknownSize if the size is not known. The TBAA tag +// identifies the "type" of the memory reference; see the +// TypeBasedAliasAnalysis class for details. +// +// Some non-obvious details include: +// - Pointers that point to two completely different objects in memory never +// alias, regardless of the value of the Size component. +// - NoAlias doesn't imply inequal pointers. The most obvious example of this +// is two pointers to constant memory. Even if they are equal, constant +// memory is never stored to, so there will never be any dependencies. +// In this and other situations, the pointers may be both NoAlias and +// MustAlias at the same time. The current API can only return one result, +// though this is rarely a problem in practice. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_ALIASANALYSIS_H +#define LLVM_ANALYSIS_ALIASANALYSIS_H + +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include <cstdint> +#include <functional> +#include <memory> +#include <vector> + +namespace llvm { + +class AnalysisUsage; +class BasicAAResult; +class BasicBlock; +class DominatorTree; +class OrderedBasicBlock; +class Value; + +/// The possible results of an alias query. +/// +/// These results are always computed between two MemoryLocation objects as +/// a query to some alias analysis. +/// +/// Note that these are unscoped enumerations because we would like to support +/// implicitly testing a result for the existence of any possible aliasing with +/// a conversion to bool, but an "enum class" doesn't support this. The +/// canonical names from the literature are suffixed and unique anyways, and so +/// they serve as global constants in LLVM for these results. +/// +/// See docs/AliasAnalysis.html for more information on the specific meanings +/// of these values. +enum AliasResult : uint8_t { + /// The two locations do not alias at all. + /// + /// This value is arranged to convert to false, while all other values + /// convert to true. This allows a boolean context to convert the result to + /// a binary flag indicating whether there is the possibility of aliasing. + NoAlias = 0, + /// The two locations may or may not alias. This is the least precise result. + MayAlias, + /// The two locations alias, but only due to a partial overlap. + PartialAlias, + /// The two locations precisely alias each other. + MustAlias, +}; + +/// << operator for AliasResult. +raw_ostream &operator<<(raw_ostream &OS, AliasResult AR); + +/// Flags indicating whether a memory access modifies or references memory. +/// +/// This is no access at all, a modification, a reference, or both +/// a modification and a reference. These are specifically structured such that +/// they form a three bit matrix and bit-tests for 'mod' or 'ref' or 'must' +/// work with any of the possible values. +enum class ModRefInfo : uint8_t { + /// Must is provided for completeness, but no routines will return only + /// Must today. See definition of Must below. + Must = 0, + /// The access may reference the value stored in memory, + /// a mustAlias relation was found, and no mayAlias or partialAlias found. + MustRef = 1, + /// The access may modify the value stored in memory, + /// a mustAlias relation was found, and no mayAlias or partialAlias found. + MustMod = 2, + /// The access may reference, modify or both the value stored in memory, + /// a mustAlias relation was found, and no mayAlias or partialAlias found. + MustModRef = MustRef | MustMod, + /// The access neither references nor modifies the value stored in memory. + NoModRef = 4, + /// The access may reference the value stored in memory. + Ref = NoModRef | MustRef, + /// The access may modify the value stored in memory. + Mod = NoModRef | MustMod, + /// The access may reference and may modify the value stored in memory. + ModRef = Ref | Mod, + + /// About Must: + /// Must is set in a best effort manner. + /// We usually do not try our best to infer Must, instead it is merely + /// another piece of "free" information that is presented when available. + /// Must set means there was certainly a MustAlias found. For calls, + /// where multiple arguments are checked (argmemonly), this translates to + /// only MustAlias or NoAlias was found. + /// Must is not set for RAR accesses, even if the two locations must + /// alias. The reason is that two read accesses translate to an early return + /// of NoModRef. An additional alias check to set Must may be + /// expensive. Other cases may also not set Must(e.g. callCapturesBefore). + /// We refer to Must being *set* when the most significant bit is *cleared*. + /// Conversely we *clear* Must information by *setting* the Must bit to 1. +}; + +LLVM_NODISCARD inline bool isNoModRef(const ModRefInfo MRI) { + return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef)) == + static_cast<int>(ModRefInfo::Must); +} +LLVM_NODISCARD inline bool isModOrRefSet(const ModRefInfo MRI) { + return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef); +} +LLVM_NODISCARD inline bool isModAndRefSet(const ModRefInfo MRI) { + return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustModRef)) == + static_cast<int>(ModRefInfo::MustModRef); +} +LLVM_NODISCARD inline bool isModSet(const ModRefInfo MRI) { + return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustMod); +} +LLVM_NODISCARD inline bool isRefSet(const ModRefInfo MRI) { + return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::MustRef); +} +LLVM_NODISCARD inline bool isMustSet(const ModRefInfo MRI) { + return !(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::NoModRef)); +} + +LLVM_NODISCARD inline ModRefInfo setMod(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) | + static_cast<int>(ModRefInfo::MustMod)); +} +LLVM_NODISCARD inline ModRefInfo setRef(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) | + static_cast<int>(ModRefInfo::MustRef)); +} +LLVM_NODISCARD inline ModRefInfo setMust(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) & + static_cast<int>(ModRefInfo::MustModRef)); +} +LLVM_NODISCARD inline ModRefInfo setModAndRef(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) | + static_cast<int>(ModRefInfo::MustModRef)); +} +LLVM_NODISCARD inline ModRefInfo clearMod(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Ref)); +} +LLVM_NODISCARD inline ModRefInfo clearRef(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Mod)); +} +LLVM_NODISCARD inline ModRefInfo clearMust(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) | + static_cast<int>(ModRefInfo::NoModRef)); +} +LLVM_NODISCARD inline ModRefInfo unionModRef(const ModRefInfo MRI1, + const ModRefInfo MRI2) { + return ModRefInfo(static_cast<int>(MRI1) | static_cast<int>(MRI2)); +} +LLVM_NODISCARD inline ModRefInfo intersectModRef(const ModRefInfo MRI1, + const ModRefInfo MRI2) { + return ModRefInfo(static_cast<int>(MRI1) & static_cast<int>(MRI2)); +} + +/// The locations at which a function might access memory. +/// +/// These are primarily used in conjunction with the \c AccessKind bits to +/// describe both the nature of access and the locations of access for a +/// function call. +enum FunctionModRefLocation { + /// Base case is no access to memory. + FMRL_Nowhere = 0, + /// Access to memory via argument pointers. + FMRL_ArgumentPointees = 8, + /// Memory that is inaccessible via LLVM IR. + FMRL_InaccessibleMem = 16, + /// Access to any memory. + FMRL_Anywhere = 32 | FMRL_InaccessibleMem | FMRL_ArgumentPointees +}; + +/// Summary of how a function affects memory in the program. +/// +/// Loads from constant globals are not considered memory accesses for this +/// interface. Also, functions may freely modify stack space local to their +/// invocation without having to report it through these interfaces. +enum FunctionModRefBehavior { + /// This function does not perform any non-local loads or stores to memory. + /// + /// This property corresponds to the GCC 'const' attribute. + /// This property corresponds to the LLVM IR 'readnone' attribute. + /// This property corresponds to the IntrNoMem LLVM intrinsic flag. + FMRB_DoesNotAccessMemory = + FMRL_Nowhere | static_cast<int>(ModRefInfo::NoModRef), + + /// The only memory references in this function (if it has any) are + /// non-volatile loads from objects pointed to by its pointer-typed + /// arguments, with arbitrary offsets. + /// + /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag. + FMRB_OnlyReadsArgumentPointees = + FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::Ref), + + /// The only memory references in this function (if it has any) are + /// non-volatile loads and stores from objects pointed to by its + /// pointer-typed arguments, with arbitrary offsets. + /// + /// This property corresponds to the IntrArgMemOnly LLVM intrinsic flag. + FMRB_OnlyAccessesArgumentPointees = + FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::ModRef), + + /// The only memory references in this function (if it has any) are + /// references of memory that is otherwise inaccessible via LLVM IR. + /// + /// This property corresponds to the LLVM IR inaccessiblememonly attribute. + FMRB_OnlyAccessesInaccessibleMem = + FMRL_InaccessibleMem | static_cast<int>(ModRefInfo::ModRef), + + /// The function may perform non-volatile loads and stores of objects + /// pointed to by its pointer-typed arguments, with arbitrary offsets, and + /// it may also perform loads and stores of memory that is otherwise + /// inaccessible via LLVM IR. + /// + /// This property corresponds to the LLVM IR + /// inaccessiblemem_or_argmemonly attribute. + FMRB_OnlyAccessesInaccessibleOrArgMem = FMRL_InaccessibleMem | + FMRL_ArgumentPointees | + static_cast<int>(ModRefInfo::ModRef), + + /// This function does not perform any non-local stores or volatile loads, + /// but may read from any memory location. + /// + /// This property corresponds to the GCC 'pure' attribute. + /// This property corresponds to the LLVM IR 'readonly' attribute. + /// This property corresponds to the IntrReadMem LLVM intrinsic flag. + FMRB_OnlyReadsMemory = FMRL_Anywhere | static_cast<int>(ModRefInfo::Ref), + + // This function does not read from memory anywhere, but may write to any + // memory location. + // + // This property corresponds to the LLVM IR 'writeonly' attribute. + // This property corresponds to the IntrWriteMem LLVM intrinsic flag. + FMRB_DoesNotReadMemory = FMRL_Anywhere | static_cast<int>(ModRefInfo::Mod), + + /// This indicates that the function could not be classified into one of the + /// behaviors above. + FMRB_UnknownModRefBehavior = + FMRL_Anywhere | static_cast<int>(ModRefInfo::ModRef) +}; + +// Wrapper method strips bits significant only in FunctionModRefBehavior, +// to obtain a valid ModRefInfo. The benefit of using the wrapper is that if +// ModRefInfo enum changes, the wrapper can be updated to & with the new enum +// entry with all bits set to 1. +LLVM_NODISCARD inline ModRefInfo +createModRefInfo(const FunctionModRefBehavior FMRB) { + return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef)); +} + +class AAResults { +public: + // Make these results default constructable and movable. We have to spell + // these out because MSVC won't synthesize them. + AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {} + AAResults(AAResults &&Arg); + ~AAResults(); + + /// Register a specific AA result. + template <typename AAResultT> void addAAResult(AAResultT &AAResult) { + // FIXME: We should use a much lighter weight system than the usual + // polymorphic pattern because we don't own AAResult. It should + // ideally involve two pointers and no separate allocation. + AAs.emplace_back(new Model<AAResultT>(AAResult, *this)); + } + + /// Register a function analysis ID that the results aggregation depends on. + /// + /// This is used in the new pass manager to implement the invalidation logic + /// where we must invalidate the results aggregation if any of our component + /// analyses become invalid. + void addAADependencyID(AnalysisKey *ID) { AADeps.push_back(ID); } + + /// Handle invalidation events in the new pass manager. + /// + /// The aggregation is invalidated if any of the underlying analyses is + /// invalidated. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + + //===--------------------------------------------------------------------===// + /// \name Alias Queries + /// @{ + + /// The main low level interface to the alias analysis implementation. + /// Returns an AliasResult indicating whether the two pointers are aliased to + /// each other. This is the interface that must be implemented by specific + /// alias analysis implementations. + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + + /// A convenience wrapper around the primary \c alias interface. + AliasResult alias(const Value *V1, LocationSize V1Size, const Value *V2, + LocationSize V2Size) { + return alias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size)); + } + + /// A convenience wrapper around the primary \c alias interface. + AliasResult alias(const Value *V1, const Value *V2) { + return alias(V1, LocationSize::unknown(), V2, LocationSize::unknown()); + } + + /// A trivial helper function to check to see if the specified pointers are + /// no-alias. + bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { + return alias(LocA, LocB) == NoAlias; + } + + /// A convenience wrapper around the \c isNoAlias helper interface. + bool isNoAlias(const Value *V1, LocationSize V1Size, const Value *V2, + LocationSize V2Size) { + return isNoAlias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size)); + } + + /// A convenience wrapper around the \c isNoAlias helper interface. + bool isNoAlias(const Value *V1, const Value *V2) { + return isNoAlias(MemoryLocation(V1), MemoryLocation(V2)); + } + + /// A trivial helper function to check to see if the specified pointers are + /// must-alias. + bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { + return alias(LocA, LocB) == MustAlias; + } + + /// A convenience wrapper around the \c isMustAlias helper interface. + bool isMustAlias(const Value *V1, const Value *V2) { + return alias(V1, LocationSize::precise(1), V2, LocationSize::precise(1)) == + MustAlias; + } + + /// Checks whether the given location points to constant memory, or if + /// \p OrLocal is true whether it points to a local alloca. + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false); + + /// A convenience wrapper around the primary \c pointsToConstantMemory + /// interface. + bool pointsToConstantMemory(const Value *P, bool OrLocal = false) { + return pointsToConstantMemory(MemoryLocation(P), OrLocal); + } + + /// @} + //===--------------------------------------------------------------------===// + /// \name Simple mod/ref information + /// @{ + + /// Get the ModRef info associated with a pointer argument of a call. The + /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note + /// that these bits do not necessarily account for the overall behavior of + /// the function, but rather only provide additional per-argument + /// information. This never sets ModRefInfo::Must. + ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx); + + /// Return the behavior of the given call site. + FunctionModRefBehavior getModRefBehavior(const CallBase *Call); + + /// Return the behavior when calling the given function. + FunctionModRefBehavior getModRefBehavior(const Function *F); + + /// Checks if the specified call is known to never read or write memory. + /// + /// Note that if the call only reads from known-constant memory, it is also + /// legal to return true. Also, calls that unwind the stack are legal for + /// this predicate. + /// + /// Many optimizations (such as CSE and LICM) can be performed on such calls + /// without worrying about aliasing properties, and many calls have this + /// property (e.g. calls to 'sin' and 'cos'). + /// + /// This property corresponds to the GCC 'const' attribute. + bool doesNotAccessMemory(const CallBase *Call) { + return getModRefBehavior(Call) == FMRB_DoesNotAccessMemory; + } + + /// Checks if the specified function is known to never read or write memory. + /// + /// Note that if the function only reads from known-constant memory, it is + /// also legal to return true. Also, function that unwind the stack are legal + /// for this predicate. + /// + /// Many optimizations (such as CSE and LICM) can be performed on such calls + /// to such functions without worrying about aliasing properties, and many + /// functions have this property (e.g. 'sin' and 'cos'). + /// + /// This property corresponds to the GCC 'const' attribute. + bool doesNotAccessMemory(const Function *F) { + return getModRefBehavior(F) == FMRB_DoesNotAccessMemory; + } + + /// Checks if the specified call is known to only read from non-volatile + /// memory (or not access memory at all). + /// + /// Calls that unwind the stack are legal for this predicate. + /// + /// This property allows many common optimizations to be performed in the + /// absence of interfering store instructions, such as CSE of strlen calls. + /// + /// This property corresponds to the GCC 'pure' attribute. + bool onlyReadsMemory(const CallBase *Call) { + return onlyReadsMemory(getModRefBehavior(Call)); + } + + /// Checks if the specified function is known to only read from non-volatile + /// memory (or not access memory at all). + /// + /// Functions that unwind the stack are legal for this predicate. + /// + /// This property allows many common optimizations to be performed in the + /// absence of interfering store instructions, such as CSE of strlen calls. + /// + /// This property corresponds to the GCC 'pure' attribute. + bool onlyReadsMemory(const Function *F) { + return onlyReadsMemory(getModRefBehavior(F)); + } + + /// Checks if functions with the specified behavior are known to only read + /// from non-volatile memory (or not access memory at all). + static bool onlyReadsMemory(FunctionModRefBehavior MRB) { + return !isModSet(createModRefInfo(MRB)); + } + + /// Checks if functions with the specified behavior are known to only write + /// memory (or not access memory at all). + static bool doesNotReadMemory(FunctionModRefBehavior MRB) { + return !isRefSet(createModRefInfo(MRB)); + } + + /// Checks if functions with the specified behavior are known to read and + /// write at most from objects pointed to by their pointer-typed arguments + /// (with arbitrary offsets). + static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB) { + return !(MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees); + } + + /// Checks if functions with the specified behavior are known to potentially + /// read or write from objects pointed to be their pointer-typed arguments + /// (with arbitrary offsets). + static bool doesAccessArgPointees(FunctionModRefBehavior MRB) { + return isModOrRefSet(createModRefInfo(MRB)) && + (MRB & FMRL_ArgumentPointees); + } + + /// Checks if functions with the specified behavior are known to read and + /// write at most from memory that is inaccessible from LLVM IR. + static bool onlyAccessesInaccessibleMem(FunctionModRefBehavior MRB) { + return !(MRB & FMRL_Anywhere & ~FMRL_InaccessibleMem); + } + + /// Checks if functions with the specified behavior are known to potentially + /// read or write from memory that is inaccessible from LLVM IR. + static bool doesAccessInaccessibleMem(FunctionModRefBehavior MRB) { + return isModOrRefSet(createModRefInfo(MRB)) && (MRB & FMRL_InaccessibleMem); + } + + /// Checks if functions with the specified behavior are known to read and + /// write at most from memory that is inaccessible from LLVM IR or objects + /// pointed to by their pointer-typed arguments (with arbitrary offsets). + static bool onlyAccessesInaccessibleOrArgMem(FunctionModRefBehavior MRB) { + return !(MRB & FMRL_Anywhere & + ~(FMRL_InaccessibleMem | FMRL_ArgumentPointees)); + } + + /// getModRefInfo (for call sites) - Return information about whether + /// a particular call site modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc); + + /// getModRefInfo (for call sites) - A convenience wrapper. + ModRefInfo getModRefInfo(const CallBase *Call, const Value *P, + LocationSize Size) { + return getModRefInfo(Call, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for loads) - Return information about whether + /// a particular load modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc); + + /// getModRefInfo (for loads) - A convenience wrapper. + ModRefInfo getModRefInfo(const LoadInst *L, const Value *P, + LocationSize Size) { + return getModRefInfo(L, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for stores) - Return information about whether + /// a particular store modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc); + + /// getModRefInfo (for stores) - A convenience wrapper. + ModRefInfo getModRefInfo(const StoreInst *S, const Value *P, + LocationSize Size) { + return getModRefInfo(S, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for fences) - Return information about whether + /// a particular store modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc); + + /// getModRefInfo (for fences) - A convenience wrapper. + ModRefInfo getModRefInfo(const FenceInst *S, const Value *P, + LocationSize Size) { + return getModRefInfo(S, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for cmpxchges) - Return information about whether + /// a particular cmpxchg modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, + const MemoryLocation &Loc); + + /// getModRefInfo (for cmpxchges) - A convenience wrapper. + ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, const Value *P, + LocationSize Size) { + return getModRefInfo(CX, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for atomicrmws) - Return information about whether + /// a particular atomicrmw modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc); + + /// getModRefInfo (for atomicrmws) - A convenience wrapper. + ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const Value *P, + LocationSize Size) { + return getModRefInfo(RMW, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for va_args) - Return information about whether + /// a particular va_arg modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc); + + /// getModRefInfo (for va_args) - A convenience wrapper. + ModRefInfo getModRefInfo(const VAArgInst *I, const Value *P, + LocationSize Size) { + return getModRefInfo(I, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for catchpads) - Return information about whether + /// a particular catchpad modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc); + + /// getModRefInfo (for catchpads) - A convenience wrapper. + ModRefInfo getModRefInfo(const CatchPadInst *I, const Value *P, + LocationSize Size) { + return getModRefInfo(I, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for catchrets) - Return information about whether + /// a particular catchret modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc); + + /// getModRefInfo (for catchrets) - A convenience wrapper. + ModRefInfo getModRefInfo(const CatchReturnInst *I, const Value *P, + LocationSize Size) { + return getModRefInfo(I, MemoryLocation(P, Size)); + } + + /// Check whether or not an instruction may read or write the optionally + /// specified memory location. + /// + /// + /// An instruction that doesn't read or write memory may be trivially LICM'd + /// for example. + /// + /// For function calls, this delegates to the alias-analysis specific + /// call-site mod-ref behavior queries. Otherwise it delegates to the specific + /// helpers above. + ModRefInfo getModRefInfo(const Instruction *I, + const Optional<MemoryLocation> &OptLoc) { + if (OptLoc == None) { + if (const auto *Call = dyn_cast<CallBase>(I)) { + return createModRefInfo(getModRefBehavior(Call)); + } + } + + const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation()); + + switch (I->getOpcode()) { + case Instruction::VAArg: return getModRefInfo((const VAArgInst*)I, Loc); + case Instruction::Load: return getModRefInfo((const LoadInst*)I, Loc); + case Instruction::Store: return getModRefInfo((const StoreInst*)I, Loc); + case Instruction::Fence: return getModRefInfo((const FenceInst*)I, Loc); + case Instruction::AtomicCmpXchg: + return getModRefInfo((const AtomicCmpXchgInst*)I, Loc); + case Instruction::AtomicRMW: + return getModRefInfo((const AtomicRMWInst*)I, Loc); + case Instruction::Call: return getModRefInfo((const CallInst*)I, Loc); + case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,Loc); + case Instruction::CatchPad: + return getModRefInfo((const CatchPadInst *)I, Loc); + case Instruction::CatchRet: + return getModRefInfo((const CatchReturnInst *)I, Loc); + default: + return ModRefInfo::NoModRef; + } + } + + /// A convenience wrapper for constructing the memory location. + ModRefInfo getModRefInfo(const Instruction *I, const Value *P, + LocationSize Size) { + return getModRefInfo(I, MemoryLocation(P, Size)); + } + + /// Return information about whether a call and an instruction may refer to + /// the same memory locations. + ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call); + + /// Return information about whether two call sites may refer to the same set + /// of memory locations. See the AA documentation for details: + /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo + ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2); + + /// Return information about whether a particular call site modifies + /// or reads the specified memory location \p MemLoc before instruction \p I + /// in a BasicBlock. An ordered basic block \p OBB can be used to speed up + /// instruction ordering queries inside the BasicBlock containing \p I. + /// Early exits in callCapturesBefore may lead to ModRefInfo::Must not being + /// set. + ModRefInfo callCapturesBefore(const Instruction *I, + const MemoryLocation &MemLoc, DominatorTree *DT, + OrderedBasicBlock *OBB = nullptr); + + /// A convenience wrapper to synthesize a memory location. + ModRefInfo callCapturesBefore(const Instruction *I, const Value *P, + LocationSize Size, DominatorTree *DT, + OrderedBasicBlock *OBB = nullptr) { + return callCapturesBefore(I, MemoryLocation(P, Size), DT, OBB); + } + + /// @} + //===--------------------------------------------------------------------===// + /// \name Higher level methods for querying mod/ref information. + /// @{ + + /// Check if it is possible for execution of the specified basic block to + /// modify the location Loc. + bool canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc); + + /// A convenience wrapper synthesizing a memory location. + bool canBasicBlockModify(const BasicBlock &BB, const Value *P, + LocationSize Size) { + return canBasicBlockModify(BB, MemoryLocation(P, Size)); + } + + /// Check if it is possible for the execution of the specified instructions + /// to mod\ref (according to the mode) the location Loc. + /// + /// The instructions to consider are all of the instructions in the range of + /// [I1,I2] INCLUSIVE. I1 and I2 must be in the same basic block. + bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, + const MemoryLocation &Loc, + const ModRefInfo Mode); + + /// A convenience wrapper synthesizing a memory location. + bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, + const Value *Ptr, LocationSize Size, + const ModRefInfo Mode) { + return canInstructionRangeModRef(I1, I2, MemoryLocation(Ptr, Size), Mode); + } + +private: + class Concept; + + template <typename T> class Model; + + template <typename T> friend class AAResultBase; + + const TargetLibraryInfo &TLI; + + std::vector<std::unique_ptr<Concept>> AAs; + + std::vector<AnalysisKey *> AADeps; +}; + +/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis +/// pointer or reference. +using AliasAnalysis = AAResults; + +/// A private abstract base class describing the concept of an individual alias +/// analysis implementation. +/// +/// This interface is implemented by any \c Model instantiation. It is also the +/// interface which a type used to instantiate the model must provide. +/// +/// All of these methods model methods by the same name in the \c +/// AAResults class. Only differences and specifics to how the +/// implementations are called are documented here. +class AAResults::Concept { +public: + virtual ~Concept() = 0; + + /// An update API used internally by the AAResults to provide + /// a handle back to the top level aggregation. + virtual void setAAResults(AAResults *NewAAR) = 0; + + //===--------------------------------------------------------------------===// + /// \name Alias Queries + /// @{ + + /// The main low level interface to the alias analysis implementation. + /// Returns an AliasResult indicating whether the two pointers are aliased to + /// each other. This is the interface that must be implemented by specific + /// alias analysis implementations. + virtual AliasResult alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) = 0; + + /// Checks whether the given location points to constant memory, or if + /// \p OrLocal is true whether it points to a local alloca. + virtual bool pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) = 0; + + /// @} + //===--------------------------------------------------------------------===// + /// \name Simple mod/ref information + /// @{ + + /// Get the ModRef info associated with a pointer argument of a callsite. The + /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note + /// that these bits do not necessarily account for the overall behavior of + /// the function, but rather only provide additional per-argument + /// information. + virtual ModRefInfo getArgModRefInfo(const CallBase *Call, + unsigned ArgIdx) = 0; + + /// Return the behavior of the given call site. + virtual FunctionModRefBehavior getModRefBehavior(const CallBase *Call) = 0; + + /// Return the behavior when calling the given function. + virtual FunctionModRefBehavior getModRefBehavior(const Function *F) = 0; + + /// getModRefInfo (for call sites) - Return information about whether + /// a particular call site modifies or reads the specified memory location. + virtual ModRefInfo getModRefInfo(const CallBase *Call, + const MemoryLocation &Loc) = 0; + + /// Return information about whether two call sites may refer to the same set + /// of memory locations. See the AA documentation for details: + /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo + virtual ModRefInfo getModRefInfo(const CallBase *Call1, + const CallBase *Call2) = 0; + + /// @} +}; + +/// A private class template which derives from \c Concept and wraps some other +/// type. +/// +/// This models the concept by directly forwarding each interface point to the +/// wrapped type which must implement a compatible interface. This provides +/// a type erased binding. +template <typename AAResultT> class AAResults::Model final : public Concept { + AAResultT &Result; + +public: + explicit Model(AAResultT &Result, AAResults &AAR) : Result(Result) { + Result.setAAResults(&AAR); + } + ~Model() override = default; + + void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); } + + AliasResult alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) override { + return Result.alias(LocA, LocB); + } + + bool pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) override { + return Result.pointsToConstantMemory(Loc, OrLocal); + } + + ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override { + return Result.getArgModRefInfo(Call, ArgIdx); + } + + FunctionModRefBehavior getModRefBehavior(const CallBase *Call) override { + return Result.getModRefBehavior(Call); + } + + FunctionModRefBehavior getModRefBehavior(const Function *F) override { + return Result.getModRefBehavior(F); + } + + ModRefInfo getModRefInfo(const CallBase *Call, + const MemoryLocation &Loc) override { + return Result.getModRefInfo(Call, Loc); + } + + ModRefInfo getModRefInfo(const CallBase *Call1, + const CallBase *Call2) override { + return Result.getModRefInfo(Call1, Call2); + } +}; + +/// A CRTP-driven "mixin" base class to help implement the function alias +/// analysis results concept. +/// +/// Because of the nature of many alias analysis implementations, they often +/// only implement a subset of the interface. This base class will attempt to +/// implement the remaining portions of the interface in terms of simpler forms +/// of the interface where possible, and otherwise provide conservatively +/// correct fallback implementations. +/// +/// Implementors of an alias analysis should derive from this CRTP, and then +/// override specific methods that they wish to customize. There is no need to +/// use virtual anywhere, the CRTP base class does static dispatch to the +/// derived type passed into it. +template <typename DerivedT> class AAResultBase { + // Expose some parts of the interface only to the AAResults::Model + // for wrapping. Specifically, this allows the model to call our + // setAAResults method without exposing it as a fully public API. + friend class AAResults::Model<DerivedT>; + + /// A pointer to the AAResults object that this AAResult is + /// aggregated within. May be null if not aggregated. + AAResults *AAR; + + /// Helper to dispatch calls back through the derived type. + DerivedT &derived() { return static_cast<DerivedT &>(*this); } + + /// A setter for the AAResults pointer, which is used to satisfy the + /// AAResults::Model contract. + void setAAResults(AAResults *NewAAR) { AAR = NewAAR; } + +protected: + /// This proxy class models a common pattern where we delegate to either the + /// top-level \c AAResults aggregation if one is registered, or to the + /// current result if none are registered. + class AAResultsProxy { + AAResults *AAR; + DerivedT &CurrentResult; + + public: + AAResultsProxy(AAResults *AAR, DerivedT &CurrentResult) + : AAR(AAR), CurrentResult(CurrentResult) {} + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { + return AAR ? AAR->alias(LocA, LocB) : CurrentResult.alias(LocA, LocB); + } + + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) { + return AAR ? AAR->pointsToConstantMemory(Loc, OrLocal) + : CurrentResult.pointsToConstantMemory(Loc, OrLocal); + } + + ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { + return AAR ? AAR->getArgModRefInfo(Call, ArgIdx) + : CurrentResult.getArgModRefInfo(Call, ArgIdx); + } + + FunctionModRefBehavior getModRefBehavior(const CallBase *Call) { + return AAR ? AAR->getModRefBehavior(Call) + : CurrentResult.getModRefBehavior(Call); + } + + FunctionModRefBehavior getModRefBehavior(const Function *F) { + return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F); + } + + ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) { + return AAR ? AAR->getModRefInfo(Call, Loc) + : CurrentResult.getModRefInfo(Call, Loc); + } + + ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) { + return AAR ? AAR->getModRefInfo(Call1, Call2) + : CurrentResult.getModRefInfo(Call1, Call2); + } + }; + + explicit AAResultBase() = default; + + // Provide all the copy and move constructors so that derived types aren't + // constrained. + AAResultBase(const AAResultBase &Arg) {} + AAResultBase(AAResultBase &&Arg) {} + + /// Get a proxy for the best AA result set to query at this time. + /// + /// When this result is part of a larger aggregation, this will proxy to that + /// aggregation. When this result is used in isolation, it will just delegate + /// back to the derived class's implementation. + /// + /// Note that callers of this need to take considerable care to not cause + /// performance problems when they use this routine, in the case of a large + /// number of alias analyses being aggregated, it can be expensive to walk + /// back across the chain. + AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); } + +public: + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { + return MayAlias; + } + + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) { + return false; + } + + ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { + return ModRefInfo::ModRef; + } + + FunctionModRefBehavior getModRefBehavior(const CallBase *Call) { + return FMRB_UnknownModRefBehavior; + } + + FunctionModRefBehavior getModRefBehavior(const Function *F) { + return FMRB_UnknownModRefBehavior; + } + + ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) { + return ModRefInfo::ModRef; + } + + ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) { + return ModRefInfo::ModRef; + } +}; + +/// Return true if this pointer is returned by a noalias function. +bool isNoAliasCall(const Value *V); + +/// Return true if this is an argument with the noalias attribute. +bool isNoAliasArgument(const Value *V); + +/// Return true if this pointer refers to a distinct and identifiable object. +/// This returns true for: +/// Global Variables and Functions (but not Global Aliases) +/// Allocas +/// ByVal and NoAlias Arguments +/// NoAlias returns (e.g. calls to malloc) +/// +bool isIdentifiedObject(const Value *V); + +/// Return true if V is umabigously identified at the function-level. +/// Different IdentifiedFunctionLocals can't alias. +/// Further, an IdentifiedFunctionLocal can not alias with any function +/// arguments other than itself, which is not necessarily true for +/// IdentifiedObjects. +bool isIdentifiedFunctionLocal(const Value *V); + +/// A manager for alias analyses. +/// +/// This class can have analyses registered with it and when run, it will run +/// all of them and aggregate their results into single AA results interface +/// that dispatches across all of the alias analysis results available. +/// +/// Note that the order in which analyses are registered is very significant. +/// That is the order in which the results will be aggregated and queried. +/// +/// This manager effectively wraps the AnalysisManager for registering alias +/// analyses. When you register your alias analysis with this manager, it will +/// ensure the analysis itself is registered with its AnalysisManager. +class AAManager : public AnalysisInfoMixin<AAManager> { +public: + using Result = AAResults; + + /// Register a specific AA result. + template <typename AnalysisT> void registerFunctionAnalysis() { + ResultGetters.push_back(&getFunctionAAResultImpl<AnalysisT>); + } + + /// Register a specific AA result. + template <typename AnalysisT> void registerModuleAnalysis() { + ResultGetters.push_back(&getModuleAAResultImpl<AnalysisT>); + } + + Result run(Function &F, FunctionAnalysisManager &AM) { + Result R(AM.getResult<TargetLibraryAnalysis>(F)); + for (auto &Getter : ResultGetters) + (*Getter)(F, AM, R); + return R; + } + +private: + friend AnalysisInfoMixin<AAManager>; + + static AnalysisKey Key; + + SmallVector<void (*)(Function &F, FunctionAnalysisManager &AM, + AAResults &AAResults), + 4> ResultGetters; + + template <typename AnalysisT> + static void getFunctionAAResultImpl(Function &F, + FunctionAnalysisManager &AM, + AAResults &AAResults) { + AAResults.addAAResult(AM.template getResult<AnalysisT>(F)); + AAResults.addAADependencyID(AnalysisT::ID()); + } + + template <typename AnalysisT> + static void getModuleAAResultImpl(Function &F, FunctionAnalysisManager &AM, + AAResults &AAResults) { + auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); + auto &MAM = MAMProxy.getManager(); + if (auto *R = MAM.template getCachedResult<AnalysisT>(*F.getParent())) { + AAResults.addAAResult(*R); + MAMProxy + .template registerOuterAnalysisInvalidation<AnalysisT, AAManager>(); + } + } +}; + +/// A wrapper pass to provide the legacy pass manager access to a suitably +/// prepared AAResults object. +class AAResultsWrapperPass : public FunctionPass { + std::unique_ptr<AAResults> AAR; + +public: + static char ID; + + AAResultsWrapperPass(); + + AAResults &getAAResults() { return *AAR; } + const AAResults &getAAResults() const { return *AAR; } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +/// A wrapper pass for external alias analyses. This just squirrels away the +/// callback used to run any analyses and register their results. +struct ExternalAAWrapperPass : ImmutablePass { + using CallbackT = std::function<void(Pass &, Function &, AAResults &)>; + + CallbackT CB; + + static char ID; + + ExternalAAWrapperPass() : ImmutablePass(ID) { + initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + explicit ExternalAAWrapperPass(CallbackT CB) + : ImmutablePass(ID), CB(std::move(CB)) { + initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; + +FunctionPass *createAAResultsWrapperPass(); + +/// A wrapper pass around a callback which can be used to populate the +/// AAResults in the AAResultsWrapperPass from an external AA. +/// +/// The callback provided here will be used each time we prepare an AAResults +/// object, and will receive a reference to the function wrapper pass, the +/// function, and the AAResults object to populate. This should be used when +/// setting up a custom pass pipeline to inject a hook into the AA results. +ImmutablePass *createExternalAAWrapperPass( + std::function<void(Pass &, Function &, AAResults &)> Callback); + +/// A helper for the legacy pass manager to create a \c AAResults +/// object populated to the best of our ability for a particular function when +/// inside of a \c ModulePass or a \c CallGraphSCCPass. +/// +/// If a \c ModulePass or a \c CallGraphSCCPass calls \p +/// createLegacyPMAAResults, it also needs to call \p addUsedAAAnalyses in \p +/// getAnalysisUsage. +AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR); + +/// A helper for the legacy pass manager to populate \p AU to add uses to make +/// sure the analyses required by \p createLegacyPMAAResults are available. +void getAAResultsAnalysisUsage(AnalysisUsage &AU); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_ALIASANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/AliasAnalysisEvaluator.h b/clang-r353983e/include/llvm/Analysis/AliasAnalysisEvaluator.h new file mode 100644 index 00000000..972eceaa --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/AliasAnalysisEvaluator.h @@ -0,0 +1,73 @@ +//===- AliasAnalysisEvaluator.h - Alias Analysis Accuracy Evaluator -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements a simple N^2 alias analysis accuracy evaluator. The +/// analysis result is a set of statistics of how many times the AA +/// infrastructure provides each kind of alias result and mod/ref result when +/// queried with all pairs of pointers in the function. +/// +/// It can be used to evaluate a change in an alias analysis implementation, +/// algorithm, or the AA pipeline infrastructure itself. It acts like a stable +/// and easily tested consumer of all AA information exposed. +/// +/// This is inspired and adapted from code by: Naveen Neelakantam, Francesco +/// Spadini, and Wojciech Stryjewski. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_ALIASANALYSISEVALUATOR_H +#define LLVM_ANALYSIS_ALIASANALYSISEVALUATOR_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class AAResults; + +class AAEvaluator : public PassInfoMixin<AAEvaluator> { + int64_t FunctionCount; + int64_t NoAliasCount, MayAliasCount, PartialAliasCount, MustAliasCount; + int64_t NoModRefCount, ModCount, RefCount, ModRefCount; + int64_t MustCount, MustRefCount, MustModCount, MustModRefCount; + +public: + AAEvaluator() + : FunctionCount(), NoAliasCount(), MayAliasCount(), PartialAliasCount(), + MustAliasCount(), NoModRefCount(), ModCount(), RefCount(), + ModRefCount(), MustCount(), MustRefCount(), MustModCount(), + MustModRefCount() {} + AAEvaluator(AAEvaluator &&Arg) + : FunctionCount(Arg.FunctionCount), NoAliasCount(Arg.NoAliasCount), + MayAliasCount(Arg.MayAliasCount), + PartialAliasCount(Arg.PartialAliasCount), + MustAliasCount(Arg.MustAliasCount), NoModRefCount(Arg.NoModRefCount), + ModCount(Arg.ModCount), RefCount(Arg.RefCount), + ModRefCount(Arg.ModRefCount), MustCount(Arg.MustCount), + MustRefCount(Arg.MustRefCount), MustModCount(Arg.MustModCount), + MustModRefCount(Arg.MustModRefCount) { + Arg.FunctionCount = 0; + } + ~AAEvaluator(); + + /// Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + // Allow the legacy pass to run this using an internal API. + friend class AAEvalLegacyPass; + + void runInternal(Function &F, AAResults &AA); +}; + +/// Create a wrapper of the above for the legacy pass manager. +FunctionPass *createAAEvalPass(); + +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/AliasSetTracker.h b/clang-r353983e/include/llvm/Analysis/AliasSetTracker.h new file mode 100644 index 00000000..34a509b7 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/AliasSetTracker.h @@ -0,0 +1,466 @@ +//===- llvm/Analysis/AliasSetTracker.h - Build Alias Sets -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines two classes: AliasSetTracker and AliasSet. These interfaces +// are used to classify a collection of pointer references into a maximal number +// of disjoint sets. Each AliasSet object constructed by the AliasSetTracker +// object refers to memory disjoint from the other sets. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_ALIASSETTRACKER_H +#define LLVM_ANALYSIS_ALIASSETTRACKER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Casting.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <vector> + +namespace llvm { + +class AliasSetTracker; +class BasicBlock; +class LoadInst; +class Loop; +class MemorySSA; +class AnyMemSetInst; +class AnyMemTransferInst; +class raw_ostream; +class StoreInst; +class VAArgInst; +class Value; + +class AliasSet : public ilist_node<AliasSet> { + friend class AliasSetTracker; + + class PointerRec { + Value *Val; // The pointer this record corresponds to. + PointerRec **PrevInList = nullptr; + PointerRec *NextInList = nullptr; + AliasSet *AS = nullptr; + LocationSize Size = LocationSize::mapEmpty(); + AAMDNodes AAInfo; + + // Whether the size for this record has been set at all. This makes no + // guarantees about the size being known. + bool isSizeSet() const { return Size != LocationSize::mapEmpty(); } + + public: + PointerRec(Value *V) + : Val(V), AAInfo(DenseMapInfo<AAMDNodes>::getEmptyKey()) {} + + Value *getValue() const { return Val; } + + PointerRec *getNext() const { return NextInList; } + bool hasAliasSet() const { return AS != nullptr; } + + PointerRec** setPrevInList(PointerRec **PIL) { + PrevInList = PIL; + return &NextInList; + } + + bool updateSizeAndAAInfo(LocationSize NewSize, const AAMDNodes &NewAAInfo) { + bool SizeChanged = false; + if (NewSize != Size) { + LocationSize OldSize = Size; + Size = isSizeSet() ? Size.unionWith(NewSize) : NewSize; + SizeChanged = OldSize != Size; + } + + if (AAInfo == DenseMapInfo<AAMDNodes>::getEmptyKey()) + // We don't have a AAInfo yet. Set it to NewAAInfo. + AAInfo = NewAAInfo; + else { + AAMDNodes Intersection(AAInfo.intersect(NewAAInfo)); + if (!Intersection) { + // NewAAInfo conflicts with AAInfo. + AAInfo = DenseMapInfo<AAMDNodes>::getTombstoneKey(); + return SizeChanged; + } + AAInfo = Intersection; + } + return SizeChanged; + } + + LocationSize getSize() const { + assert(isSizeSet() && "Getting an unset size!"); + return Size; + } + + /// Return the AAInfo, or null if there is no information or conflicting + /// information. + AAMDNodes getAAInfo() const { + // If we have missing or conflicting AAInfo, return null. + if (AAInfo == DenseMapInfo<AAMDNodes>::getEmptyKey() || + AAInfo == DenseMapInfo<AAMDNodes>::getTombstoneKey()) + return AAMDNodes(); + return AAInfo; + } + + AliasSet *getAliasSet(AliasSetTracker &AST) { + assert(AS && "No AliasSet yet!"); + if (AS->Forward) { + AliasSet *OldAS = AS; + AS = OldAS->getForwardedTarget(AST); + AS->addRef(); + OldAS->dropRef(AST); + } + return AS; + } + + void setAliasSet(AliasSet *as) { + assert(!AS && "Already have an alias set!"); + AS = as; + } + + void eraseFromList() { + if (NextInList) NextInList->PrevInList = PrevInList; + *PrevInList = NextInList; + if (AS->PtrListEnd == &NextInList) { + AS->PtrListEnd = PrevInList; + assert(*AS->PtrListEnd == nullptr && "List not terminated right!"); + } + delete this; + } + }; + + // Doubly linked list of nodes. + PointerRec *PtrList = nullptr; + PointerRec **PtrListEnd; + // Forwarding pointer. + AliasSet *Forward = nullptr; + + /// All instructions without a specific address in this alias set. + /// In rare cases this vector can have a null'ed out WeakVH + /// instances (can happen if some other loop pass deletes an + /// instruction in this list). + std::vector<WeakVH> UnknownInsts; + + /// Number of nodes pointing to this AliasSet plus the number of AliasSets + /// forwarding to it. + unsigned RefCount : 27; + + // Signifies that this set should be considered to alias any pointer. + // Use when the tracker holding this set is saturated. + unsigned AliasAny : 1; + + /// The kinds of access this alias set models. + /// + /// We keep track of whether this alias set merely refers to the locations of + /// memory (and not any particular access), whether it modifies or references + /// the memory, or whether it does both. The lattice goes from "NoAccess" to + /// either RefAccess or ModAccess, then to ModRefAccess as necessary. + enum AccessLattice { + NoAccess = 0, + RefAccess = 1, + ModAccess = 2, + ModRefAccess = RefAccess | ModAccess + }; + unsigned Access : 2; + + /// The kind of alias relationship between pointers of the set. + /// + /// These represent conservatively correct alias results between any members + /// of the set. We represent these independently of the values of alias + /// results in order to pack it into a single bit. Lattice goes from + /// MustAlias to MayAlias. + enum AliasLattice { + SetMustAlias = 0, SetMayAlias = 1 + }; + unsigned Alias : 1; + + unsigned SetSize = 0; + + void addRef() { ++RefCount; } + + void dropRef(AliasSetTracker &AST) { + assert(RefCount >= 1 && "Invalid reference count detected!"); + if (--RefCount == 0) + removeFromTracker(AST); + } + + Instruction *getUnknownInst(unsigned i) const { + assert(i < UnknownInsts.size()); + return cast_or_null<Instruction>(UnknownInsts[i]); + } + +public: + AliasSet(const AliasSet &) = delete; + AliasSet &operator=(const AliasSet &) = delete; + + /// Accessors... + bool isRef() const { return Access & RefAccess; } + bool isMod() const { return Access & ModAccess; } + bool isMustAlias() const { return Alias == SetMustAlias; } + bool isMayAlias() const { return Alias == SetMayAlias; } + + /// Return true if this alias set should be ignored as part of the + /// AliasSetTracker object. + bool isForwardingAliasSet() const { return Forward; } + + /// Merge the specified alias set into this alias set. + void mergeSetIn(AliasSet &AS, AliasSetTracker &AST); + + // Alias Set iteration - Allow access to all of the pointers which are part of + // this alias set. + class iterator; + iterator begin() const { return iterator(PtrList); } + iterator end() const { return iterator(); } + bool empty() const { return PtrList == nullptr; } + + // Unfortunately, ilist::size() is linear, so we have to add code to keep + // track of the list's exact size. + unsigned size() { return SetSize; } + + /// If this alias set is known to contain a single instruction and *only* a + /// single unique instruction, return it. Otherwise, return nullptr. + Instruction* getUniqueInstruction(); + + void print(raw_ostream &OS) const; + void dump() const; + + /// Define an iterator for alias sets... this is just a forward iterator. + class iterator : public std::iterator<std::forward_iterator_tag, + PointerRec, ptrdiff_t> { + PointerRec *CurNode; + + public: + explicit iterator(PointerRec *CN = nullptr) : CurNode(CN) {} + + bool operator==(const iterator& x) const { + return CurNode == x.CurNode; + } + bool operator!=(const iterator& x) const { return !operator==(x); } + + value_type &operator*() const { + assert(CurNode && "Dereferencing AliasSet.end()!"); + return *CurNode; + } + value_type *operator->() const { return &operator*(); } + + Value *getPointer() const { return CurNode->getValue(); } + LocationSize getSize() const { return CurNode->getSize(); } + AAMDNodes getAAInfo() const { return CurNode->getAAInfo(); } + + iterator& operator++() { // Preincrement + assert(CurNode && "Advancing past AliasSet.end()!"); + CurNode = CurNode->getNext(); + return *this; + } + iterator operator++(int) { // Postincrement + iterator tmp = *this; ++*this; return tmp; + } + }; + +private: + // Can only be created by AliasSetTracker. + AliasSet() + : PtrListEnd(&PtrList), RefCount(0), AliasAny(false), Access(NoAccess), + Alias(SetMustAlias) {} + + PointerRec *getSomePointer() const { + return PtrList; + } + + /// Return the real alias set this represents. If this has been merged with + /// another set and is forwarding, return the ultimate destination set. This + /// also implements the union-find collapsing as well. + AliasSet *getForwardedTarget(AliasSetTracker &AST) { + if (!Forward) return this; + + AliasSet *Dest = Forward->getForwardedTarget(AST); + if (Dest != Forward) { + Dest->addRef(); + Forward->dropRef(AST); + Forward = Dest; + } + return Dest; + } + + void removeFromTracker(AliasSetTracker &AST); + + void addPointer(AliasSetTracker &AST, PointerRec &Entry, LocationSize Size, + const AAMDNodes &AAInfo, bool KnownMustAlias = false, + bool SkipSizeUpdate = false); + void addUnknownInst(Instruction *I, AliasAnalysis &AA); + + void removeUnknownInst(AliasSetTracker &AST, Instruction *I) { + bool WasEmpty = UnknownInsts.empty(); + for (size_t i = 0, e = UnknownInsts.size(); i != e; ++i) + if (UnknownInsts[i] == I) { + UnknownInsts[i] = UnknownInsts.back(); + UnknownInsts.pop_back(); + --i; --e; // Revisit the moved entry. + } + if (!WasEmpty && UnknownInsts.empty()) + dropRef(AST); + } + +public: + /// If the specified pointer "may" (or must) alias one of the members in the + /// set return the appropriate AliasResult. Otherwise return NoAlias. + AliasResult aliasesPointer(const Value *Ptr, LocationSize Size, + const AAMDNodes &AAInfo, AliasAnalysis &AA) const; + bool aliasesUnknownInst(const Instruction *Inst, AliasAnalysis &AA) const; +}; + +inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) { + AS.print(OS); + return OS; +} + +class AliasSetTracker { + /// A CallbackVH to arrange for AliasSetTracker to be notified whenever a + /// Value is deleted. + class ASTCallbackVH final : public CallbackVH { + AliasSetTracker *AST; + + void deleted() override; + void allUsesReplacedWith(Value *) override; + + public: + ASTCallbackVH(Value *V, AliasSetTracker *AST = nullptr); + + ASTCallbackVH &operator=(Value *V); + }; + /// Traits to tell DenseMap that tell us how to compare and hash the value + /// handle. + struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {}; + + AliasAnalysis &AA; + MemorySSA *MSSA; + Loop *L; + ilist<AliasSet> AliasSets; + + using PointerMapType = DenseMap<ASTCallbackVH, AliasSet::PointerRec *, + ASTCallbackVHDenseMapInfo>; + + // Map from pointers to their node + PointerMapType PointerMap; + +public: + /// Create an empty collection of AliasSets, and use the specified alias + /// analysis object to disambiguate load and store addresses. + explicit AliasSetTracker(AliasAnalysis &aa) : AA(aa) {} + explicit AliasSetTracker(AliasAnalysis &aa, MemorySSA *mssa, Loop *l) + : AA(aa), MSSA(mssa), L(l) {} + ~AliasSetTracker() { clear(); } + + /// These methods are used to add different types of instructions to the alias + /// sets. Adding a new instruction can result in one of three actions + /// happening: + /// + /// 1. If the instruction doesn't alias any other sets, create a new set. + /// 2. If the instruction aliases exactly one set, add it to the set + /// 3. If the instruction aliases multiple sets, merge the sets, and add + /// the instruction to the result. + /// + /// These methods return true if inserting the instruction resulted in the + /// addition of a new alias set (i.e., the pointer did not alias anything). + /// + void add(Value *Ptr, LocationSize Size, const AAMDNodes &AAInfo); // Add a loc + void add(LoadInst *LI); + void add(StoreInst *SI); + void add(VAArgInst *VAAI); + void add(AnyMemSetInst *MSI); + void add(AnyMemTransferInst *MTI); + void add(Instruction *I); // Dispatch to one of the other add methods... + void add(BasicBlock &BB); // Add all instructions in basic block + void add(const AliasSetTracker &AST); // Add alias relations from another AST + void addUnknown(Instruction *I); + void addAllInstructionsInLoopUsingMSSA(); + + void clear(); + + /// Return the alias sets that are active. + const ilist<AliasSet> &getAliasSets() const { return AliasSets; } + + /// Return the alias set which contains the specified memory location. If + /// the memory location aliases two or more existing alias sets, will have + /// the effect of merging those alias sets before the single resulting alias + /// set is returned. + AliasSet &getAliasSetFor(const MemoryLocation &MemLoc); + + /// Return the underlying alias analysis object used by this tracker. + AliasAnalysis &getAliasAnalysis() const { return AA; } + + /// This method is used to remove a pointer value from the AliasSetTracker + /// entirely. It should be used when an instruction is deleted from the + /// program to update the AST. If you don't use this, you would have dangling + /// pointers to deleted instructions. + void deleteValue(Value *PtrVal); + + /// This method should be used whenever a preexisting value in the program is + /// copied or cloned, introducing a new value. Note that it is ok for clients + /// that use this method to introduce the same value multiple times: if the + /// tracker already knows about a value, it will ignore the request. + void copyValue(Value *From, Value *To); + + using iterator = ilist<AliasSet>::iterator; + using const_iterator = ilist<AliasSet>::const_iterator; + + const_iterator begin() const { return AliasSets.begin(); } + const_iterator end() const { return AliasSets.end(); } + + iterator begin() { return AliasSets.begin(); } + iterator end() { return AliasSets.end(); } + + void print(raw_ostream &OS) const; + void dump() const; + +private: + friend class AliasSet; + + // The total number of pointers contained in all "may" alias sets. + unsigned TotalMayAliasSetSize = 0; + + // A non-null value signifies this AST is saturated. A saturated AST lumps + // all pointers into a single "May" set. + AliasSet *AliasAnyAS = nullptr; + + void removeAliasSet(AliasSet *AS); + + /// Just like operator[] on the map, except that it creates an entry for the + /// pointer if it doesn't already exist. + AliasSet::PointerRec &getEntryFor(Value *V) { + AliasSet::PointerRec *&Entry = PointerMap[ASTCallbackVH(V, this)]; + if (!Entry) + Entry = new AliasSet::PointerRec(V); + return *Entry; + } + + AliasSet &addPointer(MemoryLocation Loc, AliasSet::AccessLattice E); + AliasSet *mergeAliasSetsForPointer(const Value *Ptr, LocationSize Size, + const AAMDNodes &AAInfo, + bool &MustAliasAll); + + /// Merge all alias sets into a single set that is considered to alias any + /// pointer. + AliasSet &mergeAllAliasSets(); + + AliasSet *findAliasSetForUnknownInst(Instruction *Inst); +}; + +inline raw_ostream& operator<<(raw_ostream &OS, const AliasSetTracker &AST) { + AST.print(OS); + return OS; +} + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_ALIASSETTRACKER_H diff --git a/clang-r353983e/include/llvm/Analysis/AssumptionCache.h b/clang-r353983e/include/llvm/Analysis/AssumptionCache.h new file mode 100644 index 00000000..b4284647 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/AssumptionCache.h @@ -0,0 +1,239 @@ +//===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that keeps track of @llvm.assume intrinsics in +// the functions of a module (allowing assumptions within any function to be +// found cheaply by other parts of the optimizer). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_ASSUMPTIONCACHE_H +#define LLVM_ANALYSIS_ASSUMPTIONCACHE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include <memory> + +namespace llvm { + +class CallInst; +class Function; +class raw_ostream; +class Value; + +/// A cache of \@llvm.assume calls within a function. +/// +/// This cache provides fast lookup of assumptions within a function by caching +/// them and amortizing the cost of scanning for them across all queries. Passes +/// that create new assumptions are required to call registerAssumption() to +/// register any new \@llvm.assume calls that they create. Deletions of +/// \@llvm.assume calls do not require special handling. +class AssumptionCache { + /// The function for which this cache is handling assumptions. + /// + /// We track this to lazily populate our assumptions. + Function &F; + + /// Vector of weak value handles to calls of the \@llvm.assume + /// intrinsic. + SmallVector<WeakTrackingVH, 4> AssumeHandles; + + class AffectedValueCallbackVH final : public CallbackVH { + AssumptionCache *AC; + + void deleted() override; + void allUsesReplacedWith(Value *) override; + + public: + using DMI = DenseMapInfo<Value *>; + + AffectedValueCallbackVH(Value *V, AssumptionCache *AC = nullptr) + : CallbackVH(V), AC(AC) {} + }; + + friend AffectedValueCallbackVH; + + /// A map of values about which an assumption might be providing + /// information to the relevant set of assumptions. + using AffectedValuesMap = + DenseMap<AffectedValueCallbackVH, SmallVector<WeakTrackingVH, 1>, + AffectedValueCallbackVH::DMI>; + AffectedValuesMap AffectedValues; + + /// Get the vector of assumptions which affect a value from the cache. + SmallVector<WeakTrackingVH, 1> &getOrInsertAffectedValues(Value *V); + + /// Copy affected values in the cache for OV to be affected values for NV. + void copyAffectedValuesInCache(Value *OV, Value *NV); + + /// Flag tracking whether we have scanned the function yet. + /// + /// We want to be as lazy about this as possible, and so we scan the function + /// at the last moment. + bool Scanned = false; + + /// Scan the function for assumptions and add them to the cache. + void scanFunction(); + +public: + /// Construct an AssumptionCache from a function by scanning all of + /// its instructions. + AssumptionCache(Function &F) : F(F) {} + + /// This cache is designed to be self-updating and so it should never be + /// invalidated. + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &) { + return false; + } + + /// Add an \@llvm.assume intrinsic to this function's cache. + /// + /// The call passed in must be an instruction within this function and must + /// not already be in the cache. + void registerAssumption(CallInst *CI); + + /// Remove an \@llvm.assume intrinsic from this function's cache if it has + /// been added to the cache earlier. + void unregisterAssumption(CallInst *CI); + + /// Update the cache of values being affected by this assumption (i.e. + /// the values about which this assumption provides information). + void updateAffectedValues(CallInst *CI); + + /// Clear the cache of \@llvm.assume intrinsics for a function. + /// + /// It will be re-scanned the next time it is requested. + void clear() { + AssumeHandles.clear(); + AffectedValues.clear(); + Scanned = false; + } + + /// Access the list of assumption handles currently tracked for this + /// function. + /// + /// Note that these produce weak handles that may be null. The caller must + /// handle that case. + /// FIXME: We should replace this with pointee_iterator<filter_iterator<...>> + /// when we can write that to filter out the null values. Then caller code + /// will become simpler. + MutableArrayRef<WeakTrackingVH> assumptions() { + if (!Scanned) + scanFunction(); + return AssumeHandles; + } + + /// Access the list of assumptions which affect this value. + MutableArrayRef<WeakTrackingVH> assumptionsFor(const Value *V) { + if (!Scanned) + scanFunction(); + + auto AVI = AffectedValues.find_as(const_cast<Value *>(V)); + if (AVI == AffectedValues.end()) + return MutableArrayRef<WeakTrackingVH>(); + + return AVI->second; + } +}; + +/// A function analysis which provides an \c AssumptionCache. +/// +/// This analysis is intended for use with the new pass manager and will vend +/// assumption caches for a given function. +class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> { + friend AnalysisInfoMixin<AssumptionAnalysis>; + + static AnalysisKey Key; + +public: + using Result = AssumptionCache; + + AssumptionCache run(Function &F, FunctionAnalysisManager &) { + return AssumptionCache(F); + } +}; + +/// Printer pass for the \c AssumptionAnalysis results. +class AssumptionPrinterPass : public PassInfoMixin<AssumptionPrinterPass> { + raw_ostream &OS; + +public: + explicit AssumptionPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// An immutable pass that tracks lazily created \c AssumptionCache +/// objects. +/// +/// This is essentially a workaround for the legacy pass manager's weaknesses +/// which associates each assumption cache with Function and clears it if the +/// function is deleted. The nature of the AssumptionCache is that it is not +/// invalidated by any changes to the function body and so this is sufficient +/// to be conservatively correct. +class AssumptionCacheTracker : public ImmutablePass { + /// A callback value handle applied to function objects, which we use to + /// delete our cache of intrinsics for a function when it is deleted. + class FunctionCallbackVH final : public CallbackVH { + AssumptionCacheTracker *ACT; + + void deleted() override; + + public: + using DMI = DenseMapInfo<Value *>; + + FunctionCallbackVH(Value *V, AssumptionCacheTracker *ACT = nullptr) + : CallbackVH(V), ACT(ACT) {} + }; + + friend FunctionCallbackVH; + + using FunctionCallsMap = + DenseMap<FunctionCallbackVH, std::unique_ptr<AssumptionCache>, + FunctionCallbackVH::DMI>; + + FunctionCallsMap AssumptionCaches; + +public: + /// Get the cached assumptions for a function. + /// + /// If no assumptions are cached, this will scan the function. Otherwise, the + /// existing cache will be returned. + AssumptionCache &getAssumptionCache(Function &F); + + /// Return the cached assumptions for a function if it has already been + /// scanned. Otherwise return nullptr. + AssumptionCache *lookupAssumptionCache(Function &F); + + AssumptionCacheTracker(); + ~AssumptionCacheTracker() override; + + void releaseMemory() override { + verifyAnalysis(); + AssumptionCaches.shrink_and_clear(); + } + + void verifyAnalysis() const override; + + bool doFinalization(Module &) override { + verifyAnalysis(); + return false; + } + + static char ID; // Pass identification, replacement for typeid +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_ASSUMPTIONCACHE_H diff --git a/clang-r353983e/include/llvm/Analysis/BasicAliasAnalysis.h b/clang-r353983e/include/llvm/Analysis/BasicAliasAnalysis.h new file mode 100644 index 00000000..29f584ce --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/BasicAliasAnalysis.h @@ -0,0 +1,278 @@ +//===- BasicAliasAnalysis.h - Stateless, local Alias Analysis ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for LLVM's primary stateless and local alias analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_BASICALIASANALYSIS_H +#define LLVM_ANALYSIS_BASICALIASANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include <algorithm> +#include <cstdint> +#include <memory> +#include <utility> + +namespace llvm { + +struct AAMDNodes; +class APInt; +class AssumptionCache; +class BasicBlock; +class DataLayout; +class DominatorTree; +class Function; +class GEPOperator; +class LoopInfo; +class PHINode; +class SelectInst; +class TargetLibraryInfo; +class PhiValues; +class Value; + +/// This is the AA result object for the basic, local, and stateless alias +/// analysis. It implements the AA query interface in an entirely stateless +/// manner. As one consequence, it is never invalidated due to IR changes. +/// While it does retain some storage, that is used as an optimization and not +/// to preserve information from query to query. However it does retain handles +/// to various other analyses and must be recomputed when those analyses are. +class BasicAAResult : public AAResultBase<BasicAAResult> { + friend AAResultBase<BasicAAResult>; + + const DataLayout &DL; + const Function &F; + const TargetLibraryInfo &TLI; + AssumptionCache &AC; + DominatorTree *DT; + LoopInfo *LI; + PhiValues *PV; + +public: + BasicAAResult(const DataLayout &DL, const Function &F, + const TargetLibraryInfo &TLI, AssumptionCache &AC, + DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, + PhiValues *PV = nullptr) + : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), PV(PV) + {} + + BasicAAResult(const BasicAAResult &Arg) + : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), + DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {} + BasicAAResult(BasicAAResult &&Arg) + : AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), + AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {} + + /// Handle invalidation events in the new pass manager. + bool invalidate(Function &Fn, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + + ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc); + + ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2); + + /// Chases pointers until we find a (constant global) or not. + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal); + + /// Get the location associated with a pointer argument of a callsite. + ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx); + + /// Returns the behavior when calling the given call site. + FunctionModRefBehavior getModRefBehavior(const CallBase *Call); + + /// Returns the behavior when calling the given function. For use when the + /// call site is not known. + FunctionModRefBehavior getModRefBehavior(const Function *Fn); + +private: + // A linear transformation of a Value; this class represents ZExt(SExt(V, + // SExtBits), ZExtBits) * Scale + Offset. + struct VariableGEPIndex { + // An opaque Value - we can't decompose this further. + const Value *V; + + // We need to track what extensions we've done as we consider the same Value + // with different extensions as different variables in a GEP's linear + // expression; + // e.g.: if V == -1, then sext(x) != zext(x). + unsigned ZExtBits; + unsigned SExtBits; + + APInt Scale; + + bool operator==(const VariableGEPIndex &Other) const { + return V == Other.V && ZExtBits == Other.ZExtBits && + SExtBits == Other.SExtBits && Scale == Other.Scale; + } + + bool operator!=(const VariableGEPIndex &Other) const { + return !operator==(Other); + } + }; + + // Represents the internal structure of a GEP, decomposed into a base pointer, + // constant offsets, and variable scaled indices. + struct DecomposedGEP { + // Base pointer of the GEP + const Value *Base; + // Total constant offset w.r.t the base from indexing into structs + APInt StructOffset; + // Total constant offset w.r.t the base from indexing through + // pointers/arrays/vectors + APInt OtherOffset; + // Scaled variable (non-constant) indices. + SmallVector<VariableGEPIndex, 4> VarIndices; + }; + + /// Track alias queries to guard against recursion. + using LocPair = std::pair<MemoryLocation, MemoryLocation>; + using AliasCacheTy = SmallDenseMap<LocPair, AliasResult, 8>; + AliasCacheTy AliasCache; + using IsCapturedCacheTy = SmallDenseMap<const Value *, bool, 8>; + IsCapturedCacheTy IsCapturedCache; + + /// Tracks phi nodes we have visited. + /// + /// When interpret "Value" pointer equality as value equality we need to make + /// sure that the "Value" is not part of a cycle. Otherwise, two uses could + /// come from different "iterations" of a cycle and see different values for + /// the same "Value" pointer. + /// + /// The following example shows the problem: + /// %p = phi(%alloca1, %addr2) + /// %l = load %ptr + /// %addr1 = gep, %alloca2, 0, %l + /// %addr2 = gep %alloca2, 0, (%l + 1) + /// alias(%p, %addr1) -> MayAlias ! + /// store %l, ... + SmallPtrSet<const BasicBlock *, 8> VisitedPhiBBs; + + /// Tracks instructions visited by pointsToConstantMemory. + SmallPtrSet<const Value *, 16> Visited; + + static const Value * + GetLinearExpression(const Value *V, APInt &Scale, APInt &Offset, + unsigned &ZExtBits, unsigned &SExtBits, + const DataLayout &DL, unsigned Depth, AssumptionCache *AC, + DominatorTree *DT, bool &NSW, bool &NUW); + + static bool DecomposeGEPExpression(const Value *V, DecomposedGEP &Decomposed, + const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT); + + static bool isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, + const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, + LocationSize ObjectAccessSize); + + /// A Heuristic for aliasGEP that searches for a constant offset + /// between the variables. + /// + /// GetLinearExpression has some limitations, as generally zext(%x + 1) + /// != zext(%x) + zext(1) if the arithmetic overflows. GetLinearExpression + /// will therefore conservatively refuse to decompose these expressions. + /// However, we know that, for all %x, zext(%x) != zext(%x + 1), even if + /// the addition overflows. + bool + constantOffsetHeuristic(const SmallVectorImpl<VariableGEPIndex> &VarIndices, + LocationSize V1Size, LocationSize V2Size, + APInt BaseOffset, AssumptionCache *AC, + DominatorTree *DT); + + bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2); + + void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, + const SmallVectorImpl<VariableGEPIndex> &Src); + + AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size, + const AAMDNodes &V1AAInfo, const Value *V2, + LocationSize V2Size, const AAMDNodes &V2AAInfo, + const Value *UnderlyingV1, const Value *UnderlyingV2); + + AliasResult aliasPHI(const PHINode *PN, LocationSize PNSize, + const AAMDNodes &PNAAInfo, const Value *V2, + LocationSize V2Size, const AAMDNodes &V2AAInfo, + const Value *UnderV2); + + AliasResult aliasSelect(const SelectInst *SI, LocationSize SISize, + const AAMDNodes &SIAAInfo, const Value *V2, + LocationSize V2Size, const AAMDNodes &V2AAInfo, + const Value *UnderV2); + + AliasResult aliasCheck(const Value *V1, LocationSize V1Size, + AAMDNodes V1AATag, const Value *V2, + LocationSize V2Size, AAMDNodes V2AATag, + const Value *O1 = nullptr, const Value *O2 = nullptr); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class BasicAA : public AnalysisInfoMixin<BasicAA> { + friend AnalysisInfoMixin<BasicAA>; + + static AnalysisKey Key; + +public: + using Result = BasicAAResult; + + BasicAAResult run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy wrapper pass to provide the BasicAAResult object. +class BasicAAWrapperPass : public FunctionPass { + std::unique_ptr<BasicAAResult> Result; + + virtual void anchor(); + +public: + static char ID; + + BasicAAWrapperPass(); + + BasicAAResult &getResult() { return *Result; } + const BasicAAResult &getResult() const { return *Result; } + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +FunctionPass *createBasicAAWrapperPass(); + +/// A helper for the legacy pass manager to create a \c BasicAAResult object +/// populated to the best of our ability for a particular function when inside +/// of a \c ModulePass or a \c CallGraphSCCPass. +BasicAAResult createLegacyPMBasicAAResult(Pass &P, Function &F); + +/// This class is a functor to be used in legacy module or SCC passes for +/// computing AA results for a function. We store the results in fields so that +/// they live long enough to be queried, but we re-use them each time. +class LegacyAARGetter { + Pass &P; + Optional<BasicAAResult> BAR; + Optional<AAResults> AAR; + +public: + LegacyAARGetter(Pass &P) : P(P) {} + AAResults &operator()(Function &F) { + BAR.emplace(createLegacyPMBasicAAResult(P, F)); + AAR.emplace(createLegacyPMAAResults(P, F, *BAR)); + return *AAR; + } +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_BASICALIASANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/BlockFrequencyInfo.h b/clang-r353983e/include/llvm/Analysis/BlockFrequencyInfo.h new file mode 100644 index 00000000..c73c7fab --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/BlockFrequencyInfo.h @@ -0,0 +1,155 @@ +//===- BlockFrequencyInfo.h - Block Frequency Analysis ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_BLOCKFREQUENCYINFO_H +#define LLVM_ANALYSIS_BLOCKFREQUENCYINFO_H + +#include "llvm/ADT/Optional.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include <cstdint> +#include <memory> + +namespace llvm { + +class BasicBlock; +class BranchProbabilityInfo; +class Function; +class LoopInfo; +class Module; +class raw_ostream; +template <class BlockT> class BlockFrequencyInfoImpl; + +enum PGOViewCountsType { PGOVCT_None, PGOVCT_Graph, PGOVCT_Text }; + +/// BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to +/// estimate IR basic block frequencies. +class BlockFrequencyInfo { + using ImplType = BlockFrequencyInfoImpl<BasicBlock>; + + std::unique_ptr<ImplType> BFI; + +public: + BlockFrequencyInfo(); + BlockFrequencyInfo(const Function &F, const BranchProbabilityInfo &BPI, + const LoopInfo &LI); + BlockFrequencyInfo(const BlockFrequencyInfo &) = delete; + BlockFrequencyInfo &operator=(const BlockFrequencyInfo &) = delete; + BlockFrequencyInfo(BlockFrequencyInfo &&Arg); + BlockFrequencyInfo &operator=(BlockFrequencyInfo &&RHS); + ~BlockFrequencyInfo(); + + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); + + const Function *getFunction() const; + const BranchProbabilityInfo *getBPI() const; + void view(StringRef = "BlockFrequencyDAGs") const; + + /// getblockFreq - Return block frequency. Return 0 if we don't have the + /// information. Please note that initial frequency is equal to ENTRY_FREQ. It + /// means that we should not rely on the value itself, but only on the + /// comparison to the other block frequencies. We do this to avoid using of + /// floating points. + BlockFrequency getBlockFreq(const BasicBlock *BB) const; + + /// Returns the estimated profile count of \p BB. + /// This computes the relative block frequency of \p BB and multiplies it by + /// the enclosing function's count (if available) and returns the value. + Optional<uint64_t> getBlockProfileCount(const BasicBlock *BB) const; + + /// Returns the estimated profile count of \p Freq. + /// This uses the frequency \p Freq and multiplies it by + /// the enclosing function's count (if available) and returns the value. + Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const; + + /// Returns true if \p BB is an irreducible loop header + /// block. Otherwise false. + bool isIrrLoopHeader(const BasicBlock *BB); + + // Set the frequency of the given basic block. + void setBlockFreq(const BasicBlock *BB, uint64_t Freq); + + /// Set the frequency of \p ReferenceBB to \p Freq and scale the frequencies + /// of the blocks in \p BlocksToScale such that their frequencies relative + /// to \p ReferenceBB remain unchanged. + void setBlockFreqAndScale(const BasicBlock *ReferenceBB, uint64_t Freq, + SmallPtrSetImpl<BasicBlock *> &BlocksToScale); + + /// calculate - compute block frequency info for the given function. + void calculate(const Function &F, const BranchProbabilityInfo &BPI, + const LoopInfo &LI); + + // Print the block frequency Freq to OS using the current functions entry + // frequency to convert freq into a relative decimal form. + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const; + + // Convenience method that attempts to look up the frequency associated with + // BB and print it to OS. + raw_ostream &printBlockFreq(raw_ostream &OS, const BasicBlock *BB) const; + + uint64_t getEntryFreq() const; + void releaseMemory(); + void print(raw_ostream &OS) const; +}; + +/// Analysis pass which computes \c BlockFrequencyInfo. +class BlockFrequencyAnalysis + : public AnalysisInfoMixin<BlockFrequencyAnalysis> { + friend AnalysisInfoMixin<BlockFrequencyAnalysis>; + + static AnalysisKey Key; + +public: + /// Provide the result type for this analysis pass. + using Result = BlockFrequencyInfo; + + /// Run the analysis pass over a function and produce BFI. + Result run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass for the \c BlockFrequencyInfo results. +class BlockFrequencyPrinterPass + : public PassInfoMixin<BlockFrequencyPrinterPass> { + raw_ostream &OS; + +public: + explicit BlockFrequencyPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy analysis pass which computes \c BlockFrequencyInfo. +class BlockFrequencyInfoWrapperPass : public FunctionPass { + BlockFrequencyInfo BFI; + +public: + static char ID; + + BlockFrequencyInfoWrapperPass(); + ~BlockFrequencyInfoWrapperPass() override; + + BlockFrequencyInfo &getBFI() { return BFI; } + const BlockFrequencyInfo &getBFI() const { return BFI; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M) const override; +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_BLOCKFREQUENCYINFO_H diff --git a/clang-r353983e/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/clang-r353983e/include/llvm/Analysis/BlockFrequencyInfoImpl.h new file mode 100644 index 00000000..813bad49 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -0,0 +1,1471 @@ +//==- BlockFrequencyInfoImpl.h - Block Frequency Implementation --*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Shared implementation of BlockFrequency for IR and Machine Instructions. +// See the documentation below for BlockFrequencyInfoImpl for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H +#define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/ScaledNumber.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <deque> +#include <iterator> +#include <limits> +#include <list> +#include <string> +#include <utility> +#include <vector> + +#define DEBUG_TYPE "block-freq" + +namespace llvm { + +class BranchProbabilityInfo; +class Function; +class Loop; +class LoopInfo; +class MachineBasicBlock; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineLoop; +class MachineLoopInfo; + +namespace bfi_detail { + +struct IrreducibleGraph; + +// This is part of a workaround for a GCC 4.7 crash on lambdas. +template <class BT> struct BlockEdgesAdder; + +/// Mass of a block. +/// +/// This class implements a sort of fixed-point fraction always between 0.0 and +/// 1.0. getMass() == std::numeric_limits<uint64_t>::max() indicates a value of +/// 1.0. +/// +/// Masses can be added and subtracted. Simple saturation arithmetic is used, +/// so arithmetic operations never overflow or underflow. +/// +/// Masses can be multiplied. Multiplication treats full mass as 1.0 and uses +/// an inexpensive floating-point algorithm that's off-by-one (almost, but not +/// quite, maximum precision). +/// +/// Masses can be scaled by \a BranchProbability at maximum precision. +class BlockMass { + uint64_t Mass = 0; + +public: + BlockMass() = default; + explicit BlockMass(uint64_t Mass) : Mass(Mass) {} + + static BlockMass getEmpty() { return BlockMass(); } + + static BlockMass getFull() { + return BlockMass(std::numeric_limits<uint64_t>::max()); + } + + uint64_t getMass() const { return Mass; } + + bool isFull() const { return Mass == std::numeric_limits<uint64_t>::max(); } + bool isEmpty() const { return !Mass; } + + bool operator!() const { return isEmpty(); } + + /// Add another mass. + /// + /// Adds another mass, saturating at \a isFull() rather than overflowing. + BlockMass &operator+=(BlockMass X) { + uint64_t Sum = Mass + X.Mass; + Mass = Sum < Mass ? std::numeric_limits<uint64_t>::max() : Sum; + return *this; + } + + /// Subtract another mass. + /// + /// Subtracts another mass, saturating at \a isEmpty() rather than + /// undeflowing. + BlockMass &operator-=(BlockMass X) { + uint64_t Diff = Mass - X.Mass; + Mass = Diff > Mass ? 0 : Diff; + return *this; + } + + BlockMass &operator*=(BranchProbability P) { + Mass = P.scale(Mass); + return *this; + } + + bool operator==(BlockMass X) const { return Mass == X.Mass; } + bool operator!=(BlockMass X) const { return Mass != X.Mass; } + bool operator<=(BlockMass X) const { return Mass <= X.Mass; } + bool operator>=(BlockMass X) const { return Mass >= X.Mass; } + bool operator<(BlockMass X) const { return Mass < X.Mass; } + bool operator>(BlockMass X) const { return Mass > X.Mass; } + + /// Convert to scaled number. + /// + /// Convert to \a ScaledNumber. \a isFull() gives 1.0, while \a isEmpty() + /// gives slightly above 0.0. + ScaledNumber<uint64_t> toScaled() const; + + void dump() const; + raw_ostream &print(raw_ostream &OS) const; +}; + +inline BlockMass operator+(BlockMass L, BlockMass R) { + return BlockMass(L) += R; +} +inline BlockMass operator-(BlockMass L, BlockMass R) { + return BlockMass(L) -= R; +} +inline BlockMass operator*(BlockMass L, BranchProbability R) { + return BlockMass(L) *= R; +} +inline BlockMass operator*(BranchProbability L, BlockMass R) { + return BlockMass(R) *= L; +} + +inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) { + return X.print(OS); +} + +} // end namespace bfi_detail + +/// Base class for BlockFrequencyInfoImpl +/// +/// BlockFrequencyInfoImplBase has supporting data structures and some +/// algorithms for BlockFrequencyInfoImplBase. Only algorithms that depend on +/// the block type (or that call such algorithms) are skipped here. +/// +/// Nevertheless, the majority of the overall algorithm documention lives with +/// BlockFrequencyInfoImpl. See there for details. +class BlockFrequencyInfoImplBase { +public: + using Scaled64 = ScaledNumber<uint64_t>; + using BlockMass = bfi_detail::BlockMass; + + /// Representative of a block. + /// + /// This is a simple wrapper around an index into the reverse-post-order + /// traversal of the blocks. + /// + /// Unlike a block pointer, its order has meaning (location in the + /// topological sort) and it's class is the same regardless of block type. + struct BlockNode { + using IndexType = uint32_t; + + IndexType Index; + + BlockNode() : Index(std::numeric_limits<uint32_t>::max()) {} + BlockNode(IndexType Index) : Index(Index) {} + + bool operator==(const BlockNode &X) const { return Index == X.Index; } + bool operator!=(const BlockNode &X) const { return Index != X.Index; } + bool operator<=(const BlockNode &X) const { return Index <= X.Index; } + bool operator>=(const BlockNode &X) const { return Index >= X.Index; } + bool operator<(const BlockNode &X) const { return Index < X.Index; } + bool operator>(const BlockNode &X) const { return Index > X.Index; } + + bool isValid() const { return Index <= getMaxIndex(); } + + static size_t getMaxIndex() { + return std::numeric_limits<uint32_t>::max() - 1; + } + }; + + /// Stats about a block itself. + struct FrequencyData { + Scaled64 Scaled; + uint64_t Integer; + }; + + /// Data about a loop. + /// + /// Contains the data necessary to represent a loop as a pseudo-node once it's + /// packaged. + struct LoopData { + using ExitMap = SmallVector<std::pair<BlockNode, BlockMass>, 4>; + using NodeList = SmallVector<BlockNode, 4>; + using HeaderMassList = SmallVector<BlockMass, 1>; + + LoopData *Parent; ///< The parent loop. + bool IsPackaged = false; ///< Whether this has been packaged. + uint32_t NumHeaders = 1; ///< Number of headers. + ExitMap Exits; ///< Successor edges (and weights). + NodeList Nodes; ///< Header and the members of the loop. + HeaderMassList BackedgeMass; ///< Mass returned to each loop header. + BlockMass Mass; + Scaled64 Scale; + + LoopData(LoopData *Parent, const BlockNode &Header) + : Parent(Parent), Nodes(1, Header), BackedgeMass(1) {} + + template <class It1, class It2> + LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther, + It2 LastOther) + : Parent(Parent), Nodes(FirstHeader, LastHeader) { + NumHeaders = Nodes.size(); + Nodes.insert(Nodes.end(), FirstOther, LastOther); + BackedgeMass.resize(NumHeaders); + } + + bool isHeader(const BlockNode &Node) const { + if (isIrreducible()) + return std::binary_search(Nodes.begin(), Nodes.begin() + NumHeaders, + Node); + return Node == Nodes[0]; + } + + BlockNode getHeader() const { return Nodes[0]; } + bool isIrreducible() const { return NumHeaders > 1; } + + HeaderMassList::difference_type getHeaderIndex(const BlockNode &B) { + assert(isHeader(B) && "this is only valid on loop header blocks"); + if (isIrreducible()) + return std::lower_bound(Nodes.begin(), Nodes.begin() + NumHeaders, B) - + Nodes.begin(); + return 0; + } + + NodeList::const_iterator members_begin() const { + return Nodes.begin() + NumHeaders; + } + + NodeList::const_iterator members_end() const { return Nodes.end(); } + iterator_range<NodeList::const_iterator> members() const { + return make_range(members_begin(), members_end()); + } + }; + + /// Index of loop information. + struct WorkingData { + BlockNode Node; ///< This node. + LoopData *Loop = nullptr; ///< The loop this block is inside. + BlockMass Mass; ///< Mass distribution from the entry block. + + WorkingData(const BlockNode &Node) : Node(Node) {} + + bool isLoopHeader() const { return Loop && Loop->isHeader(Node); } + + bool isDoubleLoopHeader() const { + return isLoopHeader() && Loop->Parent && Loop->Parent->isIrreducible() && + Loop->Parent->isHeader(Node); + } + + LoopData *getContainingLoop() const { + if (!isLoopHeader()) + return Loop; + if (!isDoubleLoopHeader()) + return Loop->Parent; + return Loop->Parent->Parent; + } + + /// Resolve a node to its representative. + /// + /// Get the node currently representing Node, which could be a containing + /// loop. + /// + /// This function should only be called when distributing mass. As long as + /// there are no irreducible edges to Node, then it will have complexity + /// O(1) in this context. + /// + /// In general, the complexity is O(L), where L is the number of loop + /// headers Node has been packaged into. Since this method is called in + /// the context of distributing mass, L will be the number of loop headers + /// an early exit edge jumps out of. + BlockNode getResolvedNode() const { + auto L = getPackagedLoop(); + return L ? L->getHeader() : Node; + } + + LoopData *getPackagedLoop() const { + if (!Loop || !Loop->IsPackaged) + return nullptr; + auto L = Loop; + while (L->Parent && L->Parent->IsPackaged) + L = L->Parent; + return L; + } + + /// Get the appropriate mass for a node. + /// + /// Get appropriate mass for Node. If Node is a loop-header (whose loop + /// has been packaged), returns the mass of its pseudo-node. If it's a + /// node inside a packaged loop, it returns the loop's mass. + BlockMass &getMass() { + if (!isAPackage()) + return Mass; + if (!isADoublePackage()) + return Loop->Mass; + return Loop->Parent->Mass; + } + + /// Has ContainingLoop been packaged up? + bool isPackaged() const { return getResolvedNode() != Node; } + + /// Has Loop been packaged up? + bool isAPackage() const { return isLoopHeader() && Loop->IsPackaged; } + + /// Has Loop been packaged up twice? + bool isADoublePackage() const { + return isDoubleLoopHeader() && Loop->Parent->IsPackaged; + } + }; + + /// Unscaled probability weight. + /// + /// Probability weight for an edge in the graph (including the + /// successor/target node). + /// + /// All edges in the original function are 32-bit. However, exit edges from + /// loop packages are taken from 64-bit exit masses, so we need 64-bits of + /// space in general. + /// + /// In addition to the raw weight amount, Weight stores the type of the edge + /// in the current context (i.e., the context of the loop being processed). + /// Is this a local edge within the loop, an exit from the loop, or a + /// backedge to the loop header? + struct Weight { + enum DistType { Local, Exit, Backedge }; + DistType Type = Local; + BlockNode TargetNode; + uint64_t Amount = 0; + + Weight() = default; + Weight(DistType Type, BlockNode TargetNode, uint64_t Amount) + : Type(Type), TargetNode(TargetNode), Amount(Amount) {} + }; + + /// Distribution of unscaled probability weight. + /// + /// Distribution of unscaled probability weight to a set of successors. + /// + /// This class collates the successor edge weights for later processing. + /// + /// \a DidOverflow indicates whether \a Total did overflow while adding to + /// the distribution. It should never overflow twice. + struct Distribution { + using WeightList = SmallVector<Weight, 4>; + + WeightList Weights; ///< Individual successor weights. + uint64_t Total = 0; ///< Sum of all weights. + bool DidOverflow = false; ///< Whether \a Total did overflow. + + Distribution() = default; + + void addLocal(const BlockNode &Node, uint64_t Amount) { + add(Node, Amount, Weight::Local); + } + + void addExit(const BlockNode &Node, uint64_t Amount) { + add(Node, Amount, Weight::Exit); + } + + void addBackedge(const BlockNode &Node, uint64_t Amount) { + add(Node, Amount, Weight::Backedge); + } + + /// Normalize the distribution. + /// + /// Combines multiple edges to the same \a Weight::TargetNode and scales + /// down so that \a Total fits into 32-bits. + /// + /// This is linear in the size of \a Weights. For the vast majority of + /// cases, adjacent edge weights are combined by sorting WeightList and + /// combining adjacent weights. However, for very large edge lists an + /// auxiliary hash table is used. + void normalize(); + + private: + void add(const BlockNode &Node, uint64_t Amount, Weight::DistType Type); + }; + + /// Data about each block. This is used downstream. + std::vector<FrequencyData> Freqs; + + /// Whether each block is an irreducible loop header. + /// This is used downstream. + SparseBitVector<> IsIrrLoopHeader; + + /// Loop data: see initializeLoops(). + std::vector<WorkingData> Working; + + /// Indexed information about loops. + std::list<LoopData> Loops; + + /// Virtual destructor. + /// + /// Need a virtual destructor to mask the compiler warning about + /// getBlockName(). + virtual ~BlockFrequencyInfoImplBase() = default; + + /// Add all edges out of a packaged loop to the distribution. + /// + /// Adds all edges from LocalLoopHead to Dist. Calls addToDist() to add each + /// successor edge. + /// + /// \return \c true unless there's an irreducible backedge. + bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, + Distribution &Dist); + + /// Add an edge to the distribution. + /// + /// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the + /// edge is local/exit/backedge is in the context of LoopHead. Otherwise, + /// every edge should be a local edge (since all the loops are packaged up). + /// + /// \return \c true unless aborted due to an irreducible backedge. + bool addToDist(Distribution &Dist, const LoopData *OuterLoop, + const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight); + + LoopData &getLoopPackage(const BlockNode &Head) { + assert(Head.Index < Working.size()); + assert(Working[Head.Index].isLoopHeader()); + return *Working[Head.Index].Loop; + } + + /// Analyze irreducible SCCs. + /// + /// Separate irreducible SCCs from \c G, which is an explict graph of \c + /// OuterLoop (or the top-level function, if \c OuterLoop is \c nullptr). + /// Insert them into \a Loops before \c Insert. + /// + /// \return the \c LoopData nodes representing the irreducible SCCs. + iterator_range<std::list<LoopData>::iterator> + analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, + std::list<LoopData>::iterator Insert); + + /// Update a loop after packaging irreducible SCCs inside of it. + /// + /// Update \c OuterLoop. Before finding irreducible control flow, it was + /// partway through \a computeMassInLoop(), so \a LoopData::Exits and \a + /// LoopData::BackedgeMass need to be reset. Also, nodes that were packaged + /// up need to be removed from \a OuterLoop::Nodes. + void updateLoopWithIrreducible(LoopData &OuterLoop); + + /// Distribute mass according to a distribution. + /// + /// Distributes the mass in Source according to Dist. If LoopHead.isValid(), + /// backedges and exits are stored in its entry in Loops. + /// + /// Mass is distributed in parallel from two copies of the source mass. + void distributeMass(const BlockNode &Source, LoopData *OuterLoop, + Distribution &Dist); + + /// Compute the loop scale for a loop. + void computeLoopScale(LoopData &Loop); + + /// Adjust the mass of all headers in an irreducible loop. + /// + /// Initially, irreducible loops are assumed to distribute their mass + /// equally among its headers. This can lead to wrong frequency estimates + /// since some headers may be executed more frequently than others. + /// + /// This adjusts header mass distribution so it matches the weights of + /// the backedges going into each of the loop headers. + void adjustLoopHeaderMass(LoopData &Loop); + + void distributeIrrLoopHeaderMass(Distribution &Dist); + + /// Package up a loop. + void packageLoop(LoopData &Loop); + + /// Unwrap loops. + void unwrapLoops(); + + /// Finalize frequency metrics. + /// + /// Calculates final frequencies and cleans up no-longer-needed data + /// structures. + void finalizeMetrics(); + + /// Clear all memory. + void clear(); + + virtual std::string getBlockName(const BlockNode &Node) const; + std::string getLoopName(const LoopData &Loop) const; + + virtual raw_ostream &print(raw_ostream &OS) const { return OS; } + void dump() const { print(dbgs()); } + + Scaled64 getFloatingBlockFreq(const BlockNode &Node) const; + + BlockFrequency getBlockFreq(const BlockNode &Node) const; + Optional<uint64_t> getBlockProfileCount(const Function &F, + const BlockNode &Node) const; + Optional<uint64_t> getProfileCountFromFreq(const Function &F, + uint64_t Freq) const; + bool isIrrLoopHeader(const BlockNode &Node); + + void setBlockFreq(const BlockNode &Node, uint64_t Freq); + + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const; + raw_ostream &printBlockFreq(raw_ostream &OS, + const BlockFrequency &Freq) const; + + uint64_t getEntryFreq() const { + assert(!Freqs.empty()); + return Freqs[0].Integer; + } +}; + +namespace bfi_detail { + +template <class BlockT> struct TypeMap {}; +template <> struct TypeMap<BasicBlock> { + using BlockT = BasicBlock; + using FunctionT = Function; + using BranchProbabilityInfoT = BranchProbabilityInfo; + using LoopT = Loop; + using LoopInfoT = LoopInfo; +}; +template <> struct TypeMap<MachineBasicBlock> { + using BlockT = MachineBasicBlock; + using FunctionT = MachineFunction; + using BranchProbabilityInfoT = MachineBranchProbabilityInfo; + using LoopT = MachineLoop; + using LoopInfoT = MachineLoopInfo; +}; + +/// Get the name of a MachineBasicBlock. +/// +/// Get the name of a MachineBasicBlock. It's templated so that including from +/// CodeGen is unnecessary (that would be a layering issue). +/// +/// This is used mainly for debug output. The name is similar to +/// MachineBasicBlock::getFullName(), but skips the name of the function. +template <class BlockT> std::string getBlockName(const BlockT *BB) { + assert(BB && "Unexpected nullptr"); + auto MachineName = "BB" + Twine(BB->getNumber()); + if (BB->getBasicBlock()) + return (MachineName + "[" + BB->getName() + "]").str(); + return MachineName.str(); +} +/// Get the name of a BasicBlock. +template <> inline std::string getBlockName(const BasicBlock *BB) { + assert(BB && "Unexpected nullptr"); + return BB->getName().str(); +} + +/// Graph of irreducible control flow. +/// +/// This graph is used for determining the SCCs in a loop (or top-level +/// function) that has irreducible control flow. +/// +/// During the block frequency algorithm, the local graphs are defined in a +/// light-weight way, deferring to the \a BasicBlock or \a MachineBasicBlock +/// graphs for most edges, but getting others from \a LoopData::ExitMap. The +/// latter only has successor information. +/// +/// \a IrreducibleGraph makes this graph explicit. It's in a form that can use +/// \a GraphTraits (so that \a analyzeIrreducible() can use \a scc_iterator), +/// and it explicitly lists predecessors and successors. The initialization +/// that relies on \c MachineBasicBlock is defined in the header. +struct IrreducibleGraph { + using BFIBase = BlockFrequencyInfoImplBase; + + BFIBase &BFI; + + using BlockNode = BFIBase::BlockNode; + struct IrrNode { + BlockNode Node; + unsigned NumIn = 0; + std::deque<const IrrNode *> Edges; + + IrrNode(const BlockNode &Node) : Node(Node) {} + + using iterator = std::deque<const IrrNode *>::const_iterator; + + iterator pred_begin() const { return Edges.begin(); } + iterator succ_begin() const { return Edges.begin() + NumIn; } + iterator pred_end() const { return succ_begin(); } + iterator succ_end() const { return Edges.end(); } + }; + BlockNode Start; + const IrrNode *StartIrr = nullptr; + std::vector<IrrNode> Nodes; + SmallDenseMap<uint32_t, IrrNode *, 4> Lookup; + + /// Construct an explicit graph containing irreducible control flow. + /// + /// Construct an explicit graph of the control flow in \c OuterLoop (or the + /// top-level function, if \c OuterLoop is \c nullptr). Uses \c + /// addBlockEdges to add block successors that have not been packaged into + /// loops. + /// + /// \a BlockFrequencyInfoImpl::computeIrreducibleMass() is the only expected + /// user of this. + template <class BlockEdgesAdder> + IrreducibleGraph(BFIBase &BFI, const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges) : BFI(BFI) { + initialize(OuterLoop, addBlockEdges); + } + + template <class BlockEdgesAdder> + void initialize(const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges); + void addNodesInLoop(const BFIBase::LoopData &OuterLoop); + void addNodesInFunction(); + + void addNode(const BlockNode &Node) { + Nodes.emplace_back(Node); + BFI.Working[Node.Index].getMass() = BlockMass::getEmpty(); + } + + void indexNodes(); + template <class BlockEdgesAdder> + void addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges); + void addEdge(IrrNode &Irr, const BlockNode &Succ, + const BFIBase::LoopData *OuterLoop); +}; + +template <class BlockEdgesAdder> +void IrreducibleGraph::initialize(const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges) { + if (OuterLoop) { + addNodesInLoop(*OuterLoop); + for (auto N : OuterLoop->Nodes) + addEdges(N, OuterLoop, addBlockEdges); + } else { + addNodesInFunction(); + for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index) + addEdges(Index, OuterLoop, addBlockEdges); + } + StartIrr = Lookup[Start.Index]; +} + +template <class BlockEdgesAdder> +void IrreducibleGraph::addEdges(const BlockNode &Node, + const BFIBase::LoopData *OuterLoop, + BlockEdgesAdder addBlockEdges) { + auto L = Lookup.find(Node.Index); + if (L == Lookup.end()) + return; + IrrNode &Irr = *L->second; + const auto &Working = BFI.Working[Node.Index]; + + if (Working.isAPackage()) + for (const auto &I : Working.Loop->Exits) + addEdge(Irr, I.first, OuterLoop); + else + addBlockEdges(*this, Irr, OuterLoop); +} + +} // end namespace bfi_detail + +/// Shared implementation for block frequency analysis. +/// +/// This is a shared implementation of BlockFrequencyInfo and +/// MachineBlockFrequencyInfo, and calculates the relative frequencies of +/// blocks. +/// +/// LoopInfo defines a loop as a "non-trivial" SCC dominated by a single block, +/// which is called the header. A given loop, L, can have sub-loops, which are +/// loops within the subgraph of L that exclude its header. (A "trivial" SCC +/// consists of a single block that does not have a self-edge.) +/// +/// In addition to loops, this algorithm has limited support for irreducible +/// SCCs, which are SCCs with multiple entry blocks. Irreducible SCCs are +/// discovered on they fly, and modelled as loops with multiple headers. +/// +/// The headers of irreducible sub-SCCs consist of its entry blocks and all +/// nodes that are targets of a backedge within it (excluding backedges within +/// true sub-loops). Block frequency calculations act as if a block is +/// inserted that intercepts all the edges to the headers. All backedges and +/// entries point to this block. Its successors are the headers, which split +/// the frequency evenly. +/// +/// This algorithm leverages BlockMass and ScaledNumber to maintain precision, +/// separates mass distribution from loop scaling, and dithers to eliminate +/// probability mass loss. +/// +/// The implementation is split between BlockFrequencyInfoImpl, which knows the +/// type of graph being modelled (BasicBlock vs. MachineBasicBlock), and +/// BlockFrequencyInfoImplBase, which doesn't. The base class uses \a +/// BlockNode, a wrapper around a uint32_t. BlockNode is numbered from 0 in +/// reverse-post order. This gives two advantages: it's easy to compare the +/// relative ordering of two nodes, and maps keyed on BlockT can be represented +/// by vectors. +/// +/// This algorithm is O(V+E), unless there is irreducible control flow, in +/// which case it's O(V*E) in the worst case. +/// +/// These are the main stages: +/// +/// 0. Reverse post-order traversal (\a initializeRPOT()). +/// +/// Run a single post-order traversal and save it (in reverse) in RPOT. +/// All other stages make use of this ordering. Save a lookup from BlockT +/// to BlockNode (the index into RPOT) in Nodes. +/// +/// 1. Loop initialization (\a initializeLoops()). +/// +/// Translate LoopInfo/MachineLoopInfo into a form suitable for the rest of +/// the algorithm. In particular, store the immediate members of each loop +/// in reverse post-order. +/// +/// 2. Calculate mass and scale in loops (\a computeMassInLoops()). +/// +/// For each loop (bottom-up), distribute mass through the DAG resulting +/// from ignoring backedges and treating sub-loops as a single pseudo-node. +/// Track the backedge mass distributed to the loop header, and use it to +/// calculate the loop scale (number of loop iterations). Immediate +/// members that represent sub-loops will already have been visited and +/// packaged into a pseudo-node. +/// +/// Distributing mass in a loop is a reverse-post-order traversal through +/// the loop. Start by assigning full mass to the Loop header. For each +/// node in the loop: +/// +/// - Fetch and categorize the weight distribution for its successors. +/// If this is a packaged-subloop, the weight distribution is stored +/// in \a LoopData::Exits. Otherwise, fetch it from +/// BranchProbabilityInfo. +/// +/// - Each successor is categorized as \a Weight::Local, a local edge +/// within the current loop, \a Weight::Backedge, a backedge to the +/// loop header, or \a Weight::Exit, any successor outside the loop. +/// The weight, the successor, and its category are stored in \a +/// Distribution. There can be multiple edges to each successor. +/// +/// - If there's a backedge to a non-header, there's an irreducible SCC. +/// The usual flow is temporarily aborted. \a +/// computeIrreducibleMass() finds the irreducible SCCs within the +/// loop, packages them up, and restarts the flow. +/// +/// - Normalize the distribution: scale weights down so that their sum +/// is 32-bits, and coalesce multiple edges to the same node. +/// +/// - Distribute the mass accordingly, dithering to minimize mass loss, +/// as described in \a distributeMass(). +/// +/// In the case of irreducible loops, instead of a single loop header, +/// there will be several. The computation of backedge masses is similar +/// but instead of having a single backedge mass, there will be one +/// backedge per loop header. In these cases, each backedge will carry +/// a mass proportional to the edge weights along the corresponding +/// path. +/// +/// At the end of propagation, the full mass assigned to the loop will be +/// distributed among the loop headers proportionally according to the +/// mass flowing through their backedges. +/// +/// Finally, calculate the loop scale from the accumulated backedge mass. +/// +/// 3. Distribute mass in the function (\a computeMassInFunction()). +/// +/// Finally, distribute mass through the DAG resulting from packaging all +/// loops in the function. This uses the same algorithm as distributing +/// mass in a loop, except that there are no exit or backedge edges. +/// +/// 4. Unpackage loops (\a unwrapLoops()). +/// +/// Initialize each block's frequency to a floating point representation of +/// its mass. +/// +/// Visit loops top-down, scaling the frequencies of its immediate members +/// by the loop's pseudo-node's frequency. +/// +/// 5. Convert frequencies to a 64-bit range (\a finalizeMetrics()). +/// +/// Using the min and max frequencies as a guide, translate floating point +/// frequencies to an appropriate range in uint64_t. +/// +/// It has some known flaws. +/// +/// - The model of irreducible control flow is a rough approximation. +/// +/// Modelling irreducible control flow exactly involves setting up and +/// solving a group of infinite geometric series. Such precision is +/// unlikely to be worthwhile, since most of our algorithms give up on +/// irreducible control flow anyway. +/// +/// Nevertheless, we might find that we need to get closer. Here's a sort +/// of TODO list for the model with diminishing returns, to be completed as +/// necessary. +/// +/// - The headers for the \a LoopData representing an irreducible SCC +/// include non-entry blocks. When these extra blocks exist, they +/// indicate a self-contained irreducible sub-SCC. We could treat them +/// as sub-loops, rather than arbitrarily shoving the problematic +/// blocks into the headers of the main irreducible SCC. +/// +/// - Entry frequencies are assumed to be evenly split between the +/// headers of a given irreducible SCC, which is the only option if we +/// need to compute mass in the SCC before its parent loop. Instead, +/// we could partially compute mass in the parent loop, and stop when +/// we get to the SCC. Here, we have the correct ratio of entry +/// masses, which we can use to adjust their relative frequencies. +/// Compute mass in the SCC, and then continue propagation in the +/// parent. +/// +/// - We can propagate mass iteratively through the SCC, for some fixed +/// number of iterations. Each iteration starts by assigning the entry +/// blocks their backedge mass from the prior iteration. The final +/// mass for each block (and each exit, and the total backedge mass +/// used for computing loop scale) is the sum of all iterations. +/// (Running this until fixed point would "solve" the geometric +/// series by simulation.) +template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { + // This is part of a workaround for a GCC 4.7 crash on lambdas. + friend struct bfi_detail::BlockEdgesAdder<BT>; + + using BlockT = typename bfi_detail::TypeMap<BT>::BlockT; + using FunctionT = typename bfi_detail::TypeMap<BT>::FunctionT; + using BranchProbabilityInfoT = + typename bfi_detail::TypeMap<BT>::BranchProbabilityInfoT; + using LoopT = typename bfi_detail::TypeMap<BT>::LoopT; + using LoopInfoT = typename bfi_detail::TypeMap<BT>::LoopInfoT; + using Successor = GraphTraits<const BlockT *>; + using Predecessor = GraphTraits<Inverse<const BlockT *>>; + + const BranchProbabilityInfoT *BPI = nullptr; + const LoopInfoT *LI = nullptr; + const FunctionT *F = nullptr; + + // All blocks in reverse postorder. + std::vector<const BlockT *> RPOT; + DenseMap<const BlockT *, BlockNode> Nodes; + + using rpot_iterator = typename std::vector<const BlockT *>::const_iterator; + + rpot_iterator rpot_begin() const { return RPOT.begin(); } + rpot_iterator rpot_end() const { return RPOT.end(); } + + size_t getIndex(const rpot_iterator &I) const { return I - rpot_begin(); } + + BlockNode getNode(const rpot_iterator &I) const { + return BlockNode(getIndex(I)); + } + BlockNode getNode(const BlockT *BB) const { return Nodes.lookup(BB); } + + const BlockT *getBlock(const BlockNode &Node) const { + assert(Node.Index < RPOT.size()); + return RPOT[Node.Index]; + } + + /// Run (and save) a post-order traversal. + /// + /// Saves a reverse post-order traversal of all the nodes in \a F. + void initializeRPOT(); + + /// Initialize loop data. + /// + /// Build up \a Loops using \a LoopInfo. \a LoopInfo gives us a mapping from + /// each block to the deepest loop it's in, but we need the inverse. For each + /// loop, we store in reverse post-order its "immediate" members, defined as + /// the header, the headers of immediate sub-loops, and all other blocks in + /// the loop that are not in sub-loops. + void initializeLoops(); + + /// Propagate to a block's successors. + /// + /// In the context of distributing mass through \c OuterLoop, divide the mass + /// currently assigned to \c Node between its successors. + /// + /// \return \c true unless there's an irreducible backedge. + bool propagateMassToSuccessors(LoopData *OuterLoop, const BlockNode &Node); + + /// Compute mass in a particular loop. + /// + /// Assign mass to \c Loop's header, and then for each block in \c Loop in + /// reverse post-order, distribute mass to its successors. Only visits nodes + /// that have not been packaged into sub-loops. + /// + /// \pre \a computeMassInLoop() has been called for each subloop of \c Loop. + /// \return \c true unless there's an irreducible backedge. + bool computeMassInLoop(LoopData &Loop); + + /// Try to compute mass in the top-level function. + /// + /// Assign mass to the entry block, and then for each block in reverse + /// post-order, distribute mass to its successors. Skips nodes that have + /// been packaged into loops. + /// + /// \pre \a computeMassInLoops() has been called. + /// \return \c true unless there's an irreducible backedge. + bool tryToComputeMassInFunction(); + + /// Compute mass in (and package up) irreducible SCCs. + /// + /// Find the irreducible SCCs in \c OuterLoop, add them to \a Loops (in front + /// of \c Insert), and call \a computeMassInLoop() on each of them. + /// + /// If \c OuterLoop is \c nullptr, it refers to the top-level function. + /// + /// \pre \a computeMassInLoop() has been called for each subloop of \c + /// OuterLoop. + /// \pre \c Insert points at the last loop successfully processed by \a + /// computeMassInLoop(). + /// \pre \c OuterLoop has irreducible SCCs. + void computeIrreducibleMass(LoopData *OuterLoop, + std::list<LoopData>::iterator Insert); + + /// Compute mass in all loops. + /// + /// For each loop bottom-up, call \a computeMassInLoop(). + /// + /// \a computeMassInLoop() aborts (and returns \c false) on loops that + /// contain a irreducible sub-SCCs. Use \a computeIrreducibleMass() and then + /// re-enter \a computeMassInLoop(). + /// + /// \post \a computeMassInLoop() has returned \c true for every loop. + void computeMassInLoops(); + + /// Compute mass in the top-level function. + /// + /// Uses \a tryToComputeMassInFunction() and \a computeIrreducibleMass() to + /// compute mass in the top-level function. + /// + /// \post \a tryToComputeMassInFunction() has returned \c true. + void computeMassInFunction(); + + std::string getBlockName(const BlockNode &Node) const override { + return bfi_detail::getBlockName(getBlock(Node)); + } + +public: + BlockFrequencyInfoImpl() = default; + + const FunctionT *getFunction() const { return F; } + + void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI, + const LoopInfoT &LI); + + using BlockFrequencyInfoImplBase::getEntryFreq; + + BlockFrequency getBlockFreq(const BlockT *BB) const { + return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); + } + + Optional<uint64_t> getBlockProfileCount(const Function &F, + const BlockT *BB) const { + return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB)); + } + + Optional<uint64_t> getProfileCountFromFreq(const Function &F, + uint64_t Freq) const { + return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq); + } + + bool isIrrLoopHeader(const BlockT *BB) { + return BlockFrequencyInfoImplBase::isIrrLoopHeader(getNode(BB)); + } + + void setBlockFreq(const BlockT *BB, uint64_t Freq); + + Scaled64 getFloatingBlockFreq(const BlockT *BB) const { + return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB)); + } + + const BranchProbabilityInfoT &getBPI() const { return *BPI; } + + /// Print the frequencies for the current function. + /// + /// Prints the frequencies for the blocks in the current function. + /// + /// Blocks are printed in the natural iteration order of the function, rather + /// than reverse post-order. This provides two advantages: writing -analyze + /// tests is easier (since blocks come out in source order), and even + /// unreachable blocks are printed. + /// + /// \a BlockFrequencyInfoImplBase::print() only knows reverse post-order, so + /// we need to override it here. + raw_ostream &print(raw_ostream &OS) const override; + + using BlockFrequencyInfoImplBase::dump; + using BlockFrequencyInfoImplBase::printBlockFreq; + + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockT *BB) const { + return BlockFrequencyInfoImplBase::printBlockFreq(OS, getNode(BB)); + } +}; + +template <class BT> +void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F, + const BranchProbabilityInfoT &BPI, + const LoopInfoT &LI) { + // Save the parameters. + this->BPI = &BPI; + this->LI = &LI; + this->F = &F; + + // Clean up left-over data structures. + BlockFrequencyInfoImplBase::clear(); + RPOT.clear(); + Nodes.clear(); + + // Initialize. + LLVM_DEBUG(dbgs() << "\nblock-frequency: " << F.getName() + << "\n=================" + << std::string(F.getName().size(), '=') << "\n"); + initializeRPOT(); + initializeLoops(); + + // Visit loops in post-order to find the local mass distribution, and then do + // the full function. + computeMassInLoops(); + computeMassInFunction(); + unwrapLoops(); + finalizeMetrics(); +} + +template <class BT> +void BlockFrequencyInfoImpl<BT>::setBlockFreq(const BlockT *BB, uint64_t Freq) { + if (Nodes.count(BB)) + BlockFrequencyInfoImplBase::setBlockFreq(getNode(BB), Freq); + else { + // If BB is a newly added block after BFI is done, we need to create a new + // BlockNode for it assigned with a new index. The index can be determined + // by the size of Freqs. + BlockNode NewNode(Freqs.size()); + Nodes[BB] = NewNode; + Freqs.emplace_back(); + BlockFrequencyInfoImplBase::setBlockFreq(NewNode, Freq); + } +} + +template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() { + const BlockT *Entry = &F->front(); + RPOT.reserve(F->size()); + std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT)); + std::reverse(RPOT.begin(), RPOT.end()); + + assert(RPOT.size() - 1 <= BlockNode::getMaxIndex() && + "More nodes in function than Block Frequency Info supports"); + + LLVM_DEBUG(dbgs() << "reverse-post-order-traversal\n"); + for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) { + BlockNode Node = getNode(I); + LLVM_DEBUG(dbgs() << " - " << getIndex(I) << ": " << getBlockName(Node) + << "\n"); + Nodes[*I] = Node; + } + + Working.reserve(RPOT.size()); + for (size_t Index = 0; Index < RPOT.size(); ++Index) + Working.emplace_back(Index); + Freqs.resize(RPOT.size()); +} + +template <class BT> void BlockFrequencyInfoImpl<BT>::initializeLoops() { + LLVM_DEBUG(dbgs() << "loop-detection\n"); + if (LI->empty()) + return; + + // Visit loops top down and assign them an index. + std::deque<std::pair<const LoopT *, LoopData *>> Q; + for (const LoopT *L : *LI) + Q.emplace_back(L, nullptr); + while (!Q.empty()) { + const LoopT *Loop = Q.front().first; + LoopData *Parent = Q.front().second; + Q.pop_front(); + + BlockNode Header = getNode(Loop->getHeader()); + assert(Header.isValid()); + + Loops.emplace_back(Parent, Header); + Working[Header.Index].Loop = &Loops.back(); + LLVM_DEBUG(dbgs() << " - loop = " << getBlockName(Header) << "\n"); + + for (const LoopT *L : *Loop) + Q.emplace_back(L, &Loops.back()); + } + + // Visit nodes in reverse post-order and add them to their deepest containing + // loop. + for (size_t Index = 0; Index < RPOT.size(); ++Index) { + // Loop headers have already been mostly mapped. + if (Working[Index].isLoopHeader()) { + LoopData *ContainingLoop = Working[Index].getContainingLoop(); + if (ContainingLoop) + ContainingLoop->Nodes.push_back(Index); + continue; + } + + const LoopT *Loop = LI->getLoopFor(RPOT[Index]); + if (!Loop) + continue; + + // Add this node to its containing loop's member list. + BlockNode Header = getNode(Loop->getHeader()); + assert(Header.isValid()); + const auto &HeaderData = Working[Header.Index]; + assert(HeaderData.isLoopHeader()); + + Working[Index].Loop = HeaderData.Loop; + HeaderData.Loop->Nodes.push_back(Index); + LLVM_DEBUG(dbgs() << " - loop = " << getBlockName(Header) + << ": member = " << getBlockName(Index) << "\n"); + } +} + +template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInLoops() { + // Visit loops with the deepest first, and the top-level loops last. + for (auto L = Loops.rbegin(), E = Loops.rend(); L != E; ++L) { + if (computeMassInLoop(*L)) + continue; + auto Next = std::next(L); + computeIrreducibleMass(&*L, L.base()); + L = std::prev(Next); + if (computeMassInLoop(*L)) + continue; + llvm_unreachable("unhandled irreducible control flow"); + } +} + +template <class BT> +bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) { + // Compute mass in loop. + LLVM_DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n"); + + if (Loop.isIrreducible()) { + LLVM_DEBUG(dbgs() << "isIrreducible = true\n"); + Distribution Dist; + unsigned NumHeadersWithWeight = 0; + Optional<uint64_t> MinHeaderWeight; + DenseSet<uint32_t> HeadersWithoutWeight; + HeadersWithoutWeight.reserve(Loop.NumHeaders); + for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { + auto &HeaderNode = Loop.Nodes[H]; + const BlockT *Block = getBlock(HeaderNode); + IsIrrLoopHeader.set(Loop.Nodes[H].Index); + Optional<uint64_t> HeaderWeight = Block->getIrrLoopHeaderWeight(); + if (!HeaderWeight) { + LLVM_DEBUG(dbgs() << "Missing irr loop header metadata on " + << getBlockName(HeaderNode) << "\n"); + HeadersWithoutWeight.insert(H); + continue; + } + LLVM_DEBUG(dbgs() << getBlockName(HeaderNode) + << " has irr loop header weight " + << HeaderWeight.getValue() << "\n"); + NumHeadersWithWeight++; + uint64_t HeaderWeightValue = HeaderWeight.getValue(); + if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight) + MinHeaderWeight = HeaderWeightValue; + if (HeaderWeightValue) { + Dist.addLocal(HeaderNode, HeaderWeightValue); + } + } + // As a heuristic, if some headers don't have a weight, give them the + // minimium weight seen (not to disrupt the existing trends too much by + // using a weight that's in the general range of the other headers' weights, + // and the minimum seems to perform better than the average.) + // FIXME: better update in the passes that drop the header weight. + // If no headers have a weight, give them even weight (use weight 1). + if (!MinHeaderWeight) + MinHeaderWeight = 1; + for (uint32_t H : HeadersWithoutWeight) { + auto &HeaderNode = Loop.Nodes[H]; + assert(!getBlock(HeaderNode)->getIrrLoopHeaderWeight() && + "Shouldn't have a weight metadata"); + uint64_t MinWeight = MinHeaderWeight.getValue(); + LLVM_DEBUG(dbgs() << "Giving weight " << MinWeight << " to " + << getBlockName(HeaderNode) << "\n"); + if (MinWeight) + Dist.addLocal(HeaderNode, MinWeight); + } + distributeIrrLoopHeaderMass(Dist); + for (const BlockNode &M : Loop.Nodes) + if (!propagateMassToSuccessors(&Loop, M)) + llvm_unreachable("unhandled irreducible control flow"); + if (NumHeadersWithWeight == 0) + // No headers have a metadata. Adjust header mass. + adjustLoopHeaderMass(Loop); + } else { + Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); + if (!propagateMassToSuccessors(&Loop, Loop.getHeader())) + llvm_unreachable("irreducible control flow to loop header!?"); + for (const BlockNode &M : Loop.members()) + if (!propagateMassToSuccessors(&Loop, M)) + // Irreducible backedge. + return false; + } + + computeLoopScale(Loop); + packageLoop(Loop); + return true; +} + +template <class BT> +bool BlockFrequencyInfoImpl<BT>::tryToComputeMassInFunction() { + // Compute mass in function. + LLVM_DEBUG(dbgs() << "compute-mass-in-function\n"); + assert(!Working.empty() && "no blocks in function"); + assert(!Working[0].isLoopHeader() && "entry block is a loop header"); + + Working[0].getMass() = BlockMass::getFull(); + for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE; ++I) { + // Check for nodes that have been packaged. + BlockNode Node = getNode(I); + if (Working[Node.Index].isPackaged()) + continue; + + if (!propagateMassToSuccessors(nullptr, Node)) + return false; + } + return true; +} + +template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() { + if (tryToComputeMassInFunction()) + return; + computeIrreducibleMass(nullptr, Loops.begin()); + if (tryToComputeMassInFunction()) + return; + llvm_unreachable("unhandled irreducible control flow"); +} + +/// \note This should be a lambda, but that crashes GCC 4.7. +namespace bfi_detail { + +template <class BT> struct BlockEdgesAdder { + using BlockT = BT; + using LoopData = BlockFrequencyInfoImplBase::LoopData; + using Successor = GraphTraits<const BlockT *>; + + const BlockFrequencyInfoImpl<BT> &BFI; + + explicit BlockEdgesAdder(const BlockFrequencyInfoImpl<BT> &BFI) + : BFI(BFI) {} + + void operator()(IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr, + const LoopData *OuterLoop) { + const BlockT *BB = BFI.RPOT[Irr.Node.Index]; + for (const auto Succ : children<const BlockT *>(BB)) + G.addEdge(Irr, BFI.getNode(Succ), OuterLoop); + } +}; + +} // end namespace bfi_detail + +template <class BT> +void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass( + LoopData *OuterLoop, std::list<LoopData>::iterator Insert) { + LLVM_DEBUG(dbgs() << "analyze-irreducible-in-"; + if (OuterLoop) dbgs() + << "loop: " << getLoopName(*OuterLoop) << "\n"; + else dbgs() << "function\n"); + + using namespace bfi_detail; + + // Ideally, addBlockEdges() would be declared here as a lambda, but that + // crashes GCC 4.7. + BlockEdgesAdder<BT> addBlockEdges(*this); + IrreducibleGraph G(*this, OuterLoop, addBlockEdges); + + for (auto &L : analyzeIrreducible(G, OuterLoop, Insert)) + computeMassInLoop(L); + + if (!OuterLoop) + return; + updateLoopWithIrreducible(*OuterLoop); +} + +// A helper function that converts a branch probability into weight. +inline uint32_t getWeightFromBranchProb(const BranchProbability Prob) { + return Prob.getNumerator(); +} + +template <class BT> +bool +BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop, + const BlockNode &Node) { + LLVM_DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n"); + // Calculate probability for successors. + Distribution Dist; + if (auto *Loop = Working[Node.Index].getPackagedLoop()) { + assert(Loop != OuterLoop && "Cannot propagate mass in a packaged loop"); + if (!addLoopSuccessorsToDist(OuterLoop, *Loop, Dist)) + // Irreducible backedge. + return false; + } else { + const BlockT *BB = getBlock(Node); + for (auto SI = GraphTraits<const BlockT *>::child_begin(BB), + SE = GraphTraits<const BlockT *>::child_end(BB); + SI != SE; ++SI) + if (!addToDist( + Dist, OuterLoop, Node, getNode(*SI), + getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI)))) + // Irreducible backedge. + return false; + } + + // Distribute mass to successors, saving exit and backedge data in the + // loop header. + distributeMass(Node, OuterLoop, Dist); + return true; +} + +template <class BT> +raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const { + if (!F) + return OS; + OS << "block-frequency-info: " << F->getName() << "\n"; + for (const BlockT &BB : *F) { + OS << " - " << bfi_detail::getBlockName(&BB) << ": float = "; + getFloatingBlockFreq(&BB).print(OS, 5) + << ", int = " << getBlockFreq(&BB).getFrequency(); + if (Optional<uint64_t> ProfileCount = + BlockFrequencyInfoImplBase::getBlockProfileCount( + F->getFunction(), getNode(&BB))) + OS << ", count = " << ProfileCount.getValue(); + if (Optional<uint64_t> IrrLoopHeaderWeight = + BB.getIrrLoopHeaderWeight()) + OS << ", irr_loop_header_weight = " << IrrLoopHeaderWeight.getValue(); + OS << "\n"; + } + + // Add an extra newline for readability. + OS << "\n"; + return OS; +} + +// Graph trait base class for block frequency information graph +// viewer. + +enum GVDAGType { GVDT_None, GVDT_Fraction, GVDT_Integer, GVDT_Count }; + +template <class BlockFrequencyInfoT, class BranchProbabilityInfoT> +struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits { + using GTraits = GraphTraits<BlockFrequencyInfoT *>; + using NodeRef = typename GTraits::NodeRef; + using EdgeIter = typename GTraits::ChildIteratorType; + using NodeIter = typename GTraits::nodes_iterator; + + uint64_t MaxFrequency = 0; + + explicit BFIDOTGraphTraitsBase(bool isSimple = false) + : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const BlockFrequencyInfoT *G) { + return G->getFunction()->getName(); + } + + std::string getNodeAttributes(NodeRef Node, const BlockFrequencyInfoT *Graph, + unsigned HotPercentThreshold = 0) { + std::string Result; + if (!HotPercentThreshold) + return Result; + + // Compute MaxFrequency on the fly: + if (!MaxFrequency) { + for (NodeIter I = GTraits::nodes_begin(Graph), + E = GTraits::nodes_end(Graph); + I != E; ++I) { + NodeRef N = *I; + MaxFrequency = + std::max(MaxFrequency, Graph->getBlockFreq(N).getFrequency()); + } + } + BlockFrequency Freq = Graph->getBlockFreq(Node); + BlockFrequency HotFreq = + (BlockFrequency(MaxFrequency) * + BranchProbability::getBranchProbability(HotPercentThreshold, 100)); + + if (Freq < HotFreq) + return Result; + + raw_string_ostream OS(Result); + OS << "color=\"red\""; + OS.flush(); + return Result; + } + + std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph, + GVDAGType GType, int layout_order = -1) { + std::string Result; + raw_string_ostream OS(Result); + + if (layout_order != -1) + OS << Node->getName() << "[" << layout_order << "] : "; + else + OS << Node->getName() << " : "; + switch (GType) { + case GVDT_Fraction: + Graph->printBlockFreq(OS, Node); + break; + case GVDT_Integer: + OS << Graph->getBlockFreq(Node).getFrequency(); + break; + case GVDT_Count: { + auto Count = Graph->getBlockProfileCount(Node); + if (Count) + OS << Count.getValue(); + else + OS << "Unknown"; + break; + } + case GVDT_None: + llvm_unreachable("If we are not supposed to render a graph we should " + "never reach this point."); + } + return Result; + } + + std::string getEdgeAttributes(NodeRef Node, EdgeIter EI, + const BlockFrequencyInfoT *BFI, + const BranchProbabilityInfoT *BPI, + unsigned HotPercentThreshold = 0) { + std::string Str; + if (!BPI) + return Str; + + BranchProbability BP = BPI->getEdgeProbability(Node, EI); + uint32_t N = BP.getNumerator(); + uint32_t D = BP.getDenominator(); + double Percent = 100.0 * N / D; + raw_string_ostream OS(Str); + OS << format("label=\"%.1f%%\"", Percent); + + if (HotPercentThreshold) { + BlockFrequency EFreq = BFI->getBlockFreq(Node) * BP; + BlockFrequency HotFreq = BlockFrequency(MaxFrequency) * + BranchProbability(HotPercentThreshold, 100); + + if (EFreq >= HotFreq) { + OS << ",color=\"red\""; + } + } + + OS.flush(); + return Str; + } +}; + +} // end namespace llvm + +#undef DEBUG_TYPE + +#endif // LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H diff --git a/clang-r353983e/include/llvm/Analysis/BranchProbabilityInfo.h b/clang-r353983e/include/llvm/Analysis/BranchProbabilityInfo.h new file mode 100644 index 00000000..97cb730d --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/BranchProbabilityInfo.h @@ -0,0 +1,252 @@ +//===- BranchProbabilityInfo.h - Branch Probability Analysis ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass is used to evaluate branch probabilties. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H +#define LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <utility> + +namespace llvm { + +class Function; +class LoopInfo; +class raw_ostream; +class TargetLibraryInfo; +class Value; + +/// Analysis providing branch probability information. +/// +/// This is a function analysis which provides information on the relative +/// probabilities of each "edge" in the function's CFG where such an edge is +/// defined by a pair (PredBlock and an index in the successors). The +/// probability of an edge from one block is always relative to the +/// probabilities of other edges from the block. The probabilites of all edges +/// from a block sum to exactly one (100%). +/// We use a pair (PredBlock and an index in the successors) to uniquely +/// identify an edge, since we can have multiple edges from Src to Dst. +/// As an example, we can have a switch which jumps to Dst with value 0 and +/// value 10. +class BranchProbabilityInfo { +public: + BranchProbabilityInfo() = default; + + BranchProbabilityInfo(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI = nullptr) { + calculate(F, LI, TLI); + } + + BranchProbabilityInfo(BranchProbabilityInfo &&Arg) + : Probs(std::move(Arg.Probs)), LastF(Arg.LastF), + PostDominatedByUnreachable(std::move(Arg.PostDominatedByUnreachable)), + PostDominatedByColdCall(std::move(Arg.PostDominatedByColdCall)) {} + + BranchProbabilityInfo(const BranchProbabilityInfo &) = delete; + BranchProbabilityInfo &operator=(const BranchProbabilityInfo &) = delete; + + BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) { + releaseMemory(); + Probs = std::move(RHS.Probs); + PostDominatedByColdCall = std::move(RHS.PostDominatedByColdCall); + PostDominatedByUnreachable = std::move(RHS.PostDominatedByUnreachable); + return *this; + } + + void releaseMemory(); + + void print(raw_ostream &OS) const; + + /// Get an edge's probability, relative to other out-edges of the Src. + /// + /// This routine provides access to the fractional probability between zero + /// (0%) and one (100%) of this edge executing, relative to other edges + /// leaving the 'Src' block. The returned probability is never zero, and can + /// only be one if the source block has only one successor. + BranchProbability getEdgeProbability(const BasicBlock *Src, + unsigned IndexInSuccessors) const; + + /// Get the probability of going from Src to Dst. + /// + /// It returns the sum of all probabilities for edges from Src to Dst. + BranchProbability getEdgeProbability(const BasicBlock *Src, + const BasicBlock *Dst) const; + + BranchProbability getEdgeProbability(const BasicBlock *Src, + succ_const_iterator Dst) const; + + /// Test if an edge is hot relative to other out-edges of the Src. + /// + /// Check whether this edge out of the source block is 'hot'. We define hot + /// as having a relative probability >= 80%. + bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const; + + /// Retrieve the hot successor of a block if one exists. + /// + /// Given a basic block, look through its successors and if one exists for + /// which \see isEdgeHot would return true, return that successor block. + const BasicBlock *getHotSucc(const BasicBlock *BB) const; + + /// Print an edge's probability. + /// + /// Retrieves an edge's probability similarly to \see getEdgeProbability, but + /// then prints that probability to the provided stream. That stream is then + /// returned. + raw_ostream &printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, + const BasicBlock *Dst) const; + + /// Set the raw edge probability for the given edge. + /// + /// This allows a pass to explicitly set the edge probability for an edge. It + /// can be used when updating the CFG to update and preserve the branch + /// probability information. Read the implementation of how these edge + /// probabilities are calculated carefully before using! + void setEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors, + BranchProbability Prob); + + static BranchProbability getBranchProbStackProtector(bool IsLikely) { + static const BranchProbability LikelyProb((1u << 20) - 1, 1u << 20); + return IsLikely ? LikelyProb : LikelyProb.getCompl(); + } + + void calculate(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI = nullptr); + + /// Forget analysis results for the given basic block. + void eraseBlock(const BasicBlock *BB); + + // Use to track SCCs for handling irreducible loops. + using SccMap = DenseMap<const BasicBlock *, int>; + using SccHeaderMap = DenseMap<const BasicBlock *, bool>; + using SccHeaderMaps = std::vector<SccHeaderMap>; + struct SccInfo { + SccMap SccNums; + SccHeaderMaps SccHeaders; + }; + +private: + // We need to store CallbackVH's in order to correctly handle basic block + // removal. + class BasicBlockCallbackVH final : public CallbackVH { + BranchProbabilityInfo *BPI; + + void deleted() override { + assert(BPI != nullptr); + BPI->eraseBlock(cast<BasicBlock>(getValPtr())); + BPI->Handles.erase(*this); + } + + public: + BasicBlockCallbackVH(const Value *V, BranchProbabilityInfo *BPI = nullptr) + : CallbackVH(const_cast<Value *>(V)), BPI(BPI) {} + }; + + DenseSet<BasicBlockCallbackVH, DenseMapInfo<Value*>> Handles; + + // Since we allow duplicate edges from one basic block to another, we use + // a pair (PredBlock and an index in the successors) to specify an edge. + using Edge = std::pair<const BasicBlock *, unsigned>; + + // Default weight value. Used when we don't have information about the edge. + // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of + // the successors have a weight yet. But it doesn't make sense when providing + // weight to an edge that may have siblings with non-zero weights. This can + // be handled various ways, but it's probably fine for an edge with unknown + // weight to just "inherit" the non-zero weight of an adjacent successor. + static const uint32_t DEFAULT_WEIGHT = 16; + + DenseMap<Edge, BranchProbability> Probs; + + /// Track the last function we run over for printing. + const Function *LastF; + + /// Track the set of blocks directly succeeded by a returning block. + SmallPtrSet<const BasicBlock *, 16> PostDominatedByUnreachable; + + /// Track the set of blocks that always lead to a cold call. + SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall; + + void updatePostDominatedByUnreachable(const BasicBlock *BB); + void updatePostDominatedByColdCall(const BasicBlock *BB); + bool calcUnreachableHeuristics(const BasicBlock *BB); + bool calcMetadataWeights(const BasicBlock *BB); + bool calcColdCallHeuristics(const BasicBlock *BB); + bool calcPointerHeuristics(const BasicBlock *BB); + bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI, + SccInfo &SccI); + bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); + bool calcFloatingPointHeuristics(const BasicBlock *BB); + bool calcInvokeHeuristics(const BasicBlock *BB); +}; + +/// Analysis pass which computes \c BranchProbabilityInfo. +class BranchProbabilityAnalysis + : public AnalysisInfoMixin<BranchProbabilityAnalysis> { + friend AnalysisInfoMixin<BranchProbabilityAnalysis>; + + static AnalysisKey Key; + +public: + /// Provide the result type for this analysis pass. + using Result = BranchProbabilityInfo; + + /// Run the analysis pass over a function and produce BPI. + BranchProbabilityInfo run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass for the \c BranchProbabilityAnalysis results. +class BranchProbabilityPrinterPass + : public PassInfoMixin<BranchProbabilityPrinterPass> { + raw_ostream &OS; + +public: + explicit BranchProbabilityPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy analysis pass which computes \c BranchProbabilityInfo. +class BranchProbabilityInfoWrapperPass : public FunctionPass { + BranchProbabilityInfo BPI; + +public: + static char ID; + + BranchProbabilityInfoWrapperPass() : FunctionPass(ID) { + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + } + + BranchProbabilityInfo &getBPI() { return BPI; } + const BranchProbabilityInfo &getBPI() const { return BPI; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H diff --git a/clang-r353983e/include/llvm/Analysis/CFG.h b/clang-r353983e/include/llvm/Analysis/CFG.h new file mode 100644 index 00000000..bcff4fb8 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CFG.h @@ -0,0 +1,159 @@ +//===-- Analysis/CFG.h - BasicBlock Analyses --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This family of functions performs analyses on basic blocks, and instructions +// contained within basic blocks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CFG_H +#define LLVM_ANALYSIS_CFG_H + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" + +namespace llvm { + +class BasicBlock; +class DominatorTree; +class Function; +class Instruction; +class LoopInfo; + +/// Analyze the specified function to find all of the loop backedges in the +/// function and return them. This is a relatively cheap (compared to +/// computing dominators and loop info) analysis. +/// +/// The output is added to Result, as pairs of <from,to> edge info. +void FindFunctionBackedges( + const Function &F, + SmallVectorImpl<std::pair<const BasicBlock *, const BasicBlock *> > & + Result); + +/// Search for the specified successor of basic block BB and return its position +/// in the terminator instruction's list of successors. It is an error to call +/// this with a block that is not a successor. +unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ); + +/// Return true if the specified edge is a critical edge. Critical edges are +/// edges from a block with multiple successors to a block with multiple +/// predecessors. +/// +bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, + bool AllowIdenticalEdges = false); + +/// Determine whether instruction 'To' is reachable from 'From', +/// returning true if uncertain. +/// +/// Determine whether there is a path from From to To within a single function. +/// Returns false only if we can prove that once 'From' has been executed then +/// 'To' can not be executed. Conservatively returns true. +/// +/// This function is linear with respect to the number of blocks in the CFG, +/// walking down successors from From to reach To, with a fixed threshold. +/// Using DT or LI allows us to answer more quickly. LI reduces the cost of +/// an entire loop of any number of blocks to be the same as the cost of a +/// single block. DT reduces the cost by allowing the search to terminate when +/// we find a block that dominates the block containing 'To'. DT is most useful +/// on branchy code but not loops, and LI is most useful on code with loops but +/// does not help on branchy code outside loops. +bool isPotentiallyReachable(const Instruction *From, const Instruction *To, + const DominatorTree *DT = nullptr, + const LoopInfo *LI = nullptr); + +/// Determine whether block 'To' is reachable from 'From', returning +/// true if uncertain. +/// +/// Determine whether there is a path from From to To within a single function. +/// Returns false only if we can prove that once 'From' has been reached then +/// 'To' can not be executed. Conservatively returns true. +bool isPotentiallyReachable(const BasicBlock *From, const BasicBlock *To, + const DominatorTree *DT = nullptr, + const LoopInfo *LI = nullptr); + +/// Determine whether there is at least one path from a block in +/// 'Worklist' to 'StopBB', returning true if uncertain. +/// +/// Determine whether there is a path from at least one block in Worklist to +/// StopBB within a single function. Returns false only if we can prove that +/// once any block in 'Worklist' has been reached then 'StopBB' can not be +/// executed. Conservatively returns true. +bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist, + BasicBlock *StopBB, + const DominatorTree *DT = nullptr, + const LoopInfo *LI = nullptr); + +/// Return true if the control flow in \p RPOTraversal is irreducible. +/// +/// This is a generic implementation to detect CFG irreducibility based on loop +/// info analysis. It can be used for any kind of CFG (Loop, MachineLoop, +/// Function, MachineFunction, etc.) by providing an RPO traversal (\p +/// RPOTraversal) and the loop info analysis (\p LI) of the CFG. This utility +/// function is only recommended when loop info analysis is available. If loop +/// info analysis isn't available, please, don't compute it explicitly for this +/// purpose. There are more efficient ways to detect CFG irreducibility that +/// don't require recomputing loop info analysis (e.g., T1/T2 or Tarjan's +/// algorithm). +/// +/// Requirements: +/// 1) GraphTraits must be implemented for NodeT type. It is used to access +/// NodeT successors. +// 2) \p RPOTraversal must be a valid reverse post-order traversal of the +/// target CFG with begin()/end() iterator interfaces. +/// 3) \p LI must be a valid LoopInfoBase that contains up-to-date loop +/// analysis information of the CFG. +/// +/// This algorithm uses the information about reducible loop back-edges already +/// computed in \p LI. When a back-edge is found during the RPO traversal, the +/// algorithm checks whether the back-edge is one of the reducible back-edges in +/// loop info. If it isn't, the CFG is irreducible. For example, for the CFG +/// below (canonical irreducible graph) loop info won't contain any loop, so the +/// algorithm will return that the CFG is irreducible when checking the B <- +/// -> C back-edge. +/// +/// (A->B, A->C, B->C, C->B, C->D) +/// A +/// / \ +/// B<- ->C +/// | +/// D +/// +template <class NodeT, class RPOTraversalT, class LoopInfoT, + class GT = GraphTraits<NodeT>> +bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI) { + /// Check whether the edge (\p Src, \p Dst) is a reducible loop backedge + /// according to LI. I.e., check if there exists a loop that contains Src and + /// where Dst is the loop header. + auto isProperBackedge = [&](NodeT Src, NodeT Dst) { + for (const auto *Lp = LI.getLoopFor(Src); Lp; Lp = Lp->getParentLoop()) { + if (Lp->getHeader() == Dst) + return true; + } + return false; + }; + + SmallPtrSet<NodeT, 32> Visited; + for (NodeT Node : RPOTraversal) { + Visited.insert(Node); + for (NodeT Succ : make_range(GT::child_begin(Node), GT::child_end(Node))) { + // Succ hasn't been visited yet + if (!Visited.count(Succ)) + continue; + // We already visited Succ, thus Node->Succ must be a backedge. Check that + // the head matches what we have in the loop information. Otherwise, we + // have an irreducible graph. + if (!isProperBackedge(Node, Succ)) + return true; + } + } + + return false; +} +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/CFGPrinter.h b/clang-r353983e/include/llvm/Analysis/CFGPrinter.h new file mode 100644 index 00000000..aaefc116 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CFGPrinter.h @@ -0,0 +1,185 @@ +//===-- CFGPrinter.h - CFG printer external interface -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a 'dot-cfg' analysis pass, which emits the +// cfg.<fnname>.dot file for each function in the program, with a graph of the +// CFG for that function. +// +// This file defines external functions that can be called to explicitly +// instantiate the CFG printer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CFGPRINTER_H +#define LLVM_ANALYSIS_CFGPRINTER_H + +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/GraphWriter.h" + +namespace llvm { +class CFGViewerPass + : public PassInfoMixin<CFGViewerPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +class CFGOnlyViewerPass + : public PassInfoMixin<CFGOnlyViewerPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +class CFGPrinterPass + : public PassInfoMixin<CFGPrinterPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +class CFGOnlyPrinterPass + : public PassInfoMixin<CFGOnlyPrinterPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +template<> +struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const Function *F) { + return "CFG for '" + F->getName().str() + "' function"; + } + + static std::string getSimpleNodeLabel(const BasicBlock *Node, + const Function *) { + if (!Node->getName().empty()) + return Node->getName().str(); + + std::string Str; + raw_string_ostream OS(Str); + + Node->printAsOperand(OS, false); + return OS.str(); + } + + static std::string getCompleteNodeLabel(const BasicBlock *Node, + const Function *) { + enum { MaxColumns = 80 }; + std::string Str; + raw_string_ostream OS(Str); + + if (Node->getName().empty()) { + Node->printAsOperand(OS, false); + OS << ":"; + } + + OS << *Node; + std::string OutStr = OS.str(); + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + unsigned ColNum = 0; + unsigned LastSpace = 0; + for (unsigned i = 0; i != OutStr.length(); ++i) { + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + ColNum = 0; + LastSpace = 0; + } else if (OutStr[i] == ';') { // Delete comments! + unsigned Idx = OutStr.find('\n', i+1); // Find end of line + OutStr.erase(OutStr.begin()+i, OutStr.begin()+Idx); + --i; + } else if (ColNum == MaxColumns) { // Wrap lines. + // Wrap very long names even though we can't find a space. + if (!LastSpace) + LastSpace = i; + OutStr.insert(LastSpace, "\\l..."); + ColNum = i - LastSpace; + LastSpace = 0; + i += 3; // The loop will advance 'i' again. + } + else + ++ColNum; + if (OutStr[i] == ' ') + LastSpace = i; + } + return OutStr; + } + + std::string getNodeLabel(const BasicBlock *Node, + const Function *Graph) { + if (isSimple()) + return getSimpleNodeLabel(Node, Graph); + else + return getCompleteNodeLabel(Node, Graph); + } + + static std::string getEdgeSourceLabel(const BasicBlock *Node, + succ_const_iterator I) { + // Label source of conditional branches with "T" or "F" + if (const BranchInst *BI = dyn_cast<BranchInst>(Node->getTerminator())) + if (BI->isConditional()) + return (I == succ_begin(Node)) ? "T" : "F"; + + // Label source of switch edges with the associated value. + if (const SwitchInst *SI = dyn_cast<SwitchInst>(Node->getTerminator())) { + unsigned SuccNo = I.getSuccessorIndex(); + + if (SuccNo == 0) return "def"; + + std::string Str; + raw_string_ostream OS(Str); + auto Case = *SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo); + OS << Case.getCaseValue()->getValue(); + return OS.str(); + } + return ""; + } + + /// Display the raw branch weights from PGO. + std::string getEdgeAttributes(const BasicBlock *Node, succ_const_iterator I, + const Function *F) { + const Instruction *TI = Node->getTerminator(); + if (TI->getNumSuccessors() == 1) + return ""; + + MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); + if (!WeightsNode) + return ""; + + MDString *MDName = cast<MDString>(WeightsNode->getOperand(0)); + if (MDName->getString() != "branch_weights") + return ""; + + unsigned OpNo = I.getSuccessorIndex() + 1; + if (OpNo >= WeightsNode->getNumOperands()) + return ""; + ConstantInt *Weight = + mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(OpNo)); + if (!Weight) + return ""; + + // Prepend a 'W' to indicate that this is a weight rather than the actual + // profile count (due to scaling). + return ("label=\"W:" + Twine(Weight->getZExtValue()) + "\"").str(); + } +}; +} // End llvm namespace + +namespace llvm { + class FunctionPass; + FunctionPass *createCFGPrinterLegacyPassPass (); + FunctionPass *createCFGOnlyPrinterLegacyPassPass (); +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/CFLAliasAnalysisUtils.h b/clang-r353983e/include/llvm/Analysis/CFLAliasAnalysisUtils.h new file mode 100644 index 00000000..02f999a5 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CFLAliasAnalysisUtils.h @@ -0,0 +1,57 @@ +//=- CFLAliasAnalysisUtils.h - Utilities for CFL Alias Analysis ----*- C++-*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// \file +// These are the utilities/helpers used by the CFL Alias Analyses available in +// tree, i.e. Steensgaard's and Andersens'. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H +#define LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/ValueHandle.h" + +namespace llvm { +namespace cflaa { + +template <typename AAResult> struct FunctionHandle final : public CallbackVH { + FunctionHandle(Function *Fn, AAResult *Result) + : CallbackVH(Fn), Result(Result) { + assert(Fn != nullptr); + assert(Result != nullptr); + } + + void deleted() override { removeSelfFromCache(); } + void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } + +private: + AAResult *Result; + + void removeSelfFromCache() { + assert(Result != nullptr); + auto *Val = getValPtr(); + Result->evict(cast<Function>(Val)); + setValPtr(nullptr); + } +}; + +static inline const Function *parentFunctionOfValue(const Value *Val) { + if (auto *Inst = dyn_cast<Instruction>(Val)) { + auto *Bb = Inst->getParent(); + return Bb->getParent(); + } + + if (auto *Arg = dyn_cast<Argument>(Val)) + return Arg->getParent(); + return nullptr; +} // namespace cflaa +} // namespace llvm +} + +#endif // LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H diff --git a/clang-r353983e/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/clang-r353983e/include/llvm/Analysis/CFLAndersAliasAnalysis.h new file mode 100644 index 00000000..696aaeb6 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CFLAndersAliasAnalysis.h @@ -0,0 +1,125 @@ +//==- CFLAndersAliasAnalysis.h - Unification-based Alias Analysis -*- C++-*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for LLVM's inclusion-based alias analysis +/// implemented with CFL graph reachability. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CFLANDERSALIASANALYSIS_H +#define LLVM_ANALYSIS_CFLANDERSALIASANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysisUtils.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include <forward_list> +#include <memory> + +namespace llvm { + +class Function; +class MemoryLocation; +class TargetLibraryInfo; + +namespace cflaa { + +struct AliasSummary; + +} // end namespace cflaa + +class CFLAndersAAResult : public AAResultBase<CFLAndersAAResult> { + friend AAResultBase<CFLAndersAAResult>; + + class FunctionInfo; + +public: + explicit CFLAndersAAResult(const TargetLibraryInfo &TLI); + CFLAndersAAResult(CFLAndersAAResult &&RHS); + ~CFLAndersAAResult(); + + /// Handle invalidation events from the new pass manager. + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &) { + return false; + } + + /// Evict the given function from cache + void evict(const Function *Fn); + + /// Get the alias summary for the given function + /// Return nullptr if the summary is not found or not available + const cflaa::AliasSummary *getAliasSummary(const Function &); + + AliasResult query(const MemoryLocation &, const MemoryLocation &); + AliasResult alias(const MemoryLocation &, const MemoryLocation &); + +private: + /// Ensures that the given function is available in the cache. + /// Returns the appropriate entry from the cache. + const Optional<FunctionInfo> &ensureCached(const Function &); + + /// Inserts the given Function into the cache. + void scan(const Function &); + + /// Build summary for a given function + FunctionInfo buildInfoFrom(const Function &); + + const TargetLibraryInfo &TLI; + + /// Cached mapping of Functions to their StratifiedSets. + /// If a function's sets are currently being built, it is marked + /// in the cache as an Optional without a value. This way, if we + /// have any kind of recursion, it is discernable from a function + /// that simply has empty sets. + DenseMap<const Function *, Optional<FunctionInfo>> Cache; + + std::forward_list<cflaa::FunctionHandle<CFLAndersAAResult>> Handles; +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +/// +/// FIXME: We really should refactor CFL to use the analysis more heavily, and +/// in particular to leverage invalidation to trigger re-computation. +class CFLAndersAA : public AnalysisInfoMixin<CFLAndersAA> { + friend AnalysisInfoMixin<CFLAndersAA>; + + static AnalysisKey Key; + +public: + using Result = CFLAndersAAResult; + + CFLAndersAAResult run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy wrapper pass to provide the CFLAndersAAResult object. +class CFLAndersAAWrapperPass : public ImmutablePass { + std::unique_ptr<CFLAndersAAResult> Result; + +public: + static char ID; + + CFLAndersAAWrapperPass(); + + CFLAndersAAResult &getResult() { return *Result; } + const CFLAndersAAResult &getResult() const { return *Result; } + + void initializePass() override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +// createCFLAndersAAWrapperPass - This pass implements a set-based approach to +// alias analysis. +ImmutablePass *createCFLAndersAAWrapperPass(); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_CFLANDERSALIASANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/clang-r353983e/include/llvm/Analysis/CFLSteensAliasAnalysis.h new file mode 100644 index 00000000..2d3b43c6 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CFLSteensAliasAnalysis.h @@ -0,0 +1,142 @@ +//==- CFLSteensAliasAnalysis.h - Unification-based Alias Analysis -*- C++-*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for LLVM's unification-based alias analysis +/// implemented with CFL graph reachability. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CFLSTEENSALIASANALYSIS_H +#define LLVM_ANALYSIS_CFLSTEENSALIASANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysisUtils.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include <forward_list> +#include <memory> + +namespace llvm { + +class Function; +class TargetLibraryInfo; + +namespace cflaa { + +struct AliasSummary; + +} // end namespace cflaa + +class CFLSteensAAResult : public AAResultBase<CFLSteensAAResult> { + friend AAResultBase<CFLSteensAAResult>; + + class FunctionInfo; + +public: + explicit CFLSteensAAResult(const TargetLibraryInfo &TLI); + CFLSteensAAResult(CFLSteensAAResult &&Arg); + ~CFLSteensAAResult(); + + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &) { + return false; + } + + /// Inserts the given Function into the cache. + void scan(Function *Fn); + + void evict(Function *Fn); + + /// Ensures that the given function is available in the cache. + /// Returns the appropriate entry from the cache. + const Optional<FunctionInfo> &ensureCached(Function *Fn); + + /// Get the alias summary for the given function + /// Return nullptr if the summary is not found or not available + const cflaa::AliasSummary *getAliasSummary(Function &Fn); + + AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB); + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { + if (LocA.Ptr == LocB.Ptr) + return MustAlias; + + // Comparisons between global variables and other constants should be + // handled by BasicAA. + // CFLSteensAA may report NoAlias when comparing a GlobalValue and + // ConstantExpr, but every query needs to have at least one Value tied to a + // Function, and neither GlobalValues nor ConstantExprs are. + if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) + return AAResultBase::alias(LocA, LocB); + + AliasResult QueryResult = query(LocA, LocB); + if (QueryResult == MayAlias) + return AAResultBase::alias(LocA, LocB); + + return QueryResult; + } + +private: + const TargetLibraryInfo &TLI; + + /// Cached mapping of Functions to their StratifiedSets. + /// If a function's sets are currently being built, it is marked + /// in the cache as an Optional without a value. This way, if we + /// have any kind of recursion, it is discernable from a function + /// that simply has empty sets. + DenseMap<Function *, Optional<FunctionInfo>> Cache; + std::forward_list<cflaa::FunctionHandle<CFLSteensAAResult>> Handles; + + FunctionInfo buildSetsFrom(Function *F); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +/// +/// FIXME: We really should refactor CFL to use the analysis more heavily, and +/// in particular to leverage invalidation to trigger re-computation of sets. +class CFLSteensAA : public AnalysisInfoMixin<CFLSteensAA> { + friend AnalysisInfoMixin<CFLSteensAA>; + + static AnalysisKey Key; + +public: + using Result = CFLSteensAAResult; + + CFLSteensAAResult run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy wrapper pass to provide the CFLSteensAAResult object. +class CFLSteensAAWrapperPass : public ImmutablePass { + std::unique_ptr<CFLSteensAAResult> Result; + +public: + static char ID; + + CFLSteensAAWrapperPass(); + + CFLSteensAAResult &getResult() { return *Result; } + const CFLSteensAAResult &getResult() const { return *Result; } + + void initializePass() override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +// createCFLSteensAAWrapperPass - This pass implements a set-based approach to +// alias analysis. +ImmutablePass *createCFLSteensAAWrapperPass(); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_CFLSTEENSALIASANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/CGSCCPassManager.h b/clang-r353983e/include/llvm/Analysis/CGSCCPassManager.h new file mode 100644 index 00000000..6d269546 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CGSCCPassManager.h @@ -0,0 +1,880 @@ +//===- CGSCCPassManager.h - Call graph pass management ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This header provides classes for managing passes over SCCs of the call +/// graph. These passes form an important component of LLVM's interprocedural +/// optimizations. Because they operate on the SCCs of the call graph, and they +/// traverse the graph in post-order, they can effectively do pair-wise +/// interprocedural optimizations for all call edges in the program while +/// incrementally refining it and improving the context of these pair-wise +/// optimizations. At each call site edge, the callee has already been +/// optimized as much as is possible. This in turn allows very accurate +/// analysis of it for IPO. +/// +/// A secondary more general goal is to be able to isolate optimization on +/// unrelated parts of the IR module. This is useful to ensure our +/// optimizations are principled and don't miss oportunities where refinement +/// of one part of the module influence transformations in another part of the +/// module. But this is also useful if we want to parallelize the optimizations +/// across common large module graph shapes which tend to be very wide and have +/// large regions of unrelated cliques. +/// +/// To satisfy these goals, we use the LazyCallGraph which provides two graphs +/// nested inside each other (and built lazily from the bottom-up): the call +/// graph proper, and a reference graph. The reference graph is super set of +/// the call graph and is a conservative approximation of what could through +/// scalar or CGSCC transforms *become* the call graph. Using this allows us to +/// ensure we optimize functions prior to them being introduced into the call +/// graph by devirtualization or other technique, and thus ensures that +/// subsequent pair-wise interprocedural optimizations observe the optimized +/// form of these functions. The (potentially transitive) reference +/// reachability used by the reference graph is a conservative approximation +/// that still allows us to have independent regions of the graph. +/// +/// FIXME: There is one major drawback of the reference graph: in its naive +/// form it is quadratic because it contains a distinct edge for each +/// (potentially indirect) reference, even if are all through some common +/// global table of function pointers. This can be fixed in a number of ways +/// that essentially preserve enough of the normalization. While it isn't +/// expected to completely preclude the usability of this, it will need to be +/// addressed. +/// +/// +/// All of these issues are made substantially more complex in the face of +/// mutations to the call graph while optimization passes are being run. When +/// mutations to the call graph occur we want to achieve two different things: +/// +/// - We need to update the call graph in-flight and invalidate analyses +/// cached on entities in the graph. Because of the cache-based analysis +/// design of the pass manager, it is essential to have stable identities for +/// the elements of the IR that passes traverse, and to invalidate any +/// analyses cached on these elements as the mutations take place. +/// +/// - We want to preserve the incremental and post-order traversal of the +/// graph even as it is refined and mutated. This means we want optimization +/// to observe the most refined form of the call graph and to do so in +/// post-order. +/// +/// To address this, the CGSCC manager uses both worklists that can be expanded +/// by passes which transform the IR, and provides invalidation tests to skip +/// entries that become dead. This extra data is provided to every SCC pass so +/// that it can carefully update the manager's traversal as the call graph +/// mutates. +/// +/// We also provide support for running function passes within the CGSCC walk, +/// and there we provide automatic update of the call graph including of the +/// pass manager to reflect call graph changes that fall out naturally as part +/// of scalar transformations. +/// +/// The patterns used to ensure the goals of post-order visitation of the fully +/// refined graph: +/// +/// 1) Sink toward the "bottom" as the graph is refined. This means that any +/// iteration continues in some valid post-order sequence after the mutation +/// has altered the structure. +/// +/// 2) Enqueue in post-order, including the current entity. If the current +/// entity's shape changes, it and everything after it in post-order needs +/// to be visited to observe that shape. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H +#define LLVM_ANALYSIS_CGSCCPASSMANAGER_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PriorityWorklist.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <utility> + +namespace llvm { + +struct CGSCCUpdateResult; +class Module; + +// Allow debug logging in this inline function. +#define DEBUG_TYPE "cgscc" + +/// Extern template declaration for the analysis set for this IR unit. +extern template class AllAnalysesOn<LazyCallGraph::SCC>; + +extern template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>; + +/// The CGSCC analysis manager. +/// +/// See the documentation for the AnalysisManager template for detail +/// documentation. This type serves as a convenient way to refer to this +/// construct in the adaptors and proxies used to integrate this into the larger +/// pass manager infrastructure. +using CGSCCAnalysisManager = + AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>; + +// Explicit specialization and instantiation declarations for the pass manager. +// See the comments on the definition of the specialization for details on how +// it differs from the primary template. +template <> +PreservedAnalyses +PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, + CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC, + CGSCCAnalysisManager &AM, + LazyCallGraph &G, CGSCCUpdateResult &UR); +extern template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, + LazyCallGraph &, CGSCCUpdateResult &>; + +/// The CGSCC pass manager. +/// +/// See the documentation for the PassManager template for details. It runs +/// a sequence of SCC passes over each SCC that the manager is run over. This +/// type serves as a convenient way to refer to this construct. +using CGSCCPassManager = + PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, + CGSCCUpdateResult &>; + +/// An explicit specialization of the require analysis template pass. +template <typename AnalysisT> +struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager, + LazyCallGraph &, CGSCCUpdateResult &> + : PassInfoMixin<RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, + CGSCCAnalysisManager, LazyCallGraph &, + CGSCCUpdateResult &>> { + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &) { + (void)AM.template getResult<AnalysisT>(C, CG); + return PreservedAnalyses::all(); + } +}; + +/// A proxy from a \c CGSCCAnalysisManager to a \c Module. +using CGSCCAnalysisManagerModuleProxy = + InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>; + +/// We need a specialized result for the \c CGSCCAnalysisManagerModuleProxy so +/// it can have access to the call graph in order to walk all the SCCs when +/// invalidating things. +template <> class CGSCCAnalysisManagerModuleProxy::Result { +public: + explicit Result(CGSCCAnalysisManager &InnerAM, LazyCallGraph &G) + : InnerAM(&InnerAM), G(&G) {} + + /// Accessor for the analysis manager. + CGSCCAnalysisManager &getManager() { return *InnerAM; } + + /// Handler for invalidation of the Module. + /// + /// If the proxy analysis itself is preserved, then we assume that the set of + /// SCCs in the Module hasn't changed. Thus any pointers to SCCs in the + /// CGSCCAnalysisManager are still valid, and we don't need to call \c clear + /// on the CGSCCAnalysisManager. + /// + /// Regardless of whether this analysis is marked as preserved, all of the + /// analyses in the \c CGSCCAnalysisManager are potentially invalidated based + /// on the set of preserved analyses. + bool invalidate(Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &Inv); + +private: + CGSCCAnalysisManager *InnerAM; + LazyCallGraph *G; +}; + +/// Provide a specialized run method for the \c CGSCCAnalysisManagerModuleProxy +/// so it can pass the lazy call graph to the result. +template <> +CGSCCAnalysisManagerModuleProxy::Result +CGSCCAnalysisManagerModuleProxy::run(Module &M, ModuleAnalysisManager &AM); + +// Ensure the \c CGSCCAnalysisManagerModuleProxy is provided as an extern +// template. +extern template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>; + +extern template class OuterAnalysisManagerProxy< + ModuleAnalysisManager, LazyCallGraph::SCC, LazyCallGraph &>; + +/// A proxy from a \c ModuleAnalysisManager to an \c SCC. +using ModuleAnalysisManagerCGSCCProxy = + OuterAnalysisManagerProxy<ModuleAnalysisManager, LazyCallGraph::SCC, + LazyCallGraph &>; + +/// Support structure for SCC passes to communicate updates the call graph back +/// to the CGSCC pass manager infrsatructure. +/// +/// The CGSCC pass manager runs SCC passes which are allowed to update the call +/// graph and SCC structures. This means the structure the pass manager works +/// on is mutating underneath it. In order to support that, there needs to be +/// careful communication about the precise nature and ramifications of these +/// updates to the pass management infrastructure. +/// +/// All SCC passes will have to accept a reference to the management layer's +/// update result struct and use it to reflect the results of any CG updates +/// performed. +/// +/// Passes which do not change the call graph structure in any way can just +/// ignore this argument to their run method. +struct CGSCCUpdateResult { + /// Worklist of the RefSCCs queued for processing. + /// + /// When a pass refines the graph and creates new RefSCCs or causes them to + /// have a different shape or set of component SCCs it should add the RefSCCs + /// to this worklist so that we visit them in the refined form. + /// + /// This worklist is in reverse post-order, as we pop off the back in order + /// to observe RefSCCs in post-order. When adding RefSCCs, clients should add + /// them in reverse post-order. + SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> &RCWorklist; + + /// Worklist of the SCCs queued for processing. + /// + /// When a pass refines the graph and creates new SCCs or causes them to have + /// a different shape or set of component functions it should add the SCCs to + /// this worklist so that we visit them in the refined form. + /// + /// Note that if the SCCs are part of a RefSCC that is added to the \c + /// RCWorklist, they don't need to be added here as visiting the RefSCC will + /// be sufficient to re-visit the SCCs within it. + /// + /// This worklist is in reverse post-order, as we pop off the back in order + /// to observe SCCs in post-order. When adding SCCs, clients should add them + /// in reverse post-order. + SmallPriorityWorklist<LazyCallGraph::SCC *, 1> &CWorklist; + + /// The set of invalidated RefSCCs which should be skipped if they are found + /// in \c RCWorklist. + /// + /// This is used to quickly prune out RefSCCs when they get deleted and + /// happen to already be on the worklist. We use this primarily to avoid + /// scanning the list and removing entries from it. + SmallPtrSetImpl<LazyCallGraph::RefSCC *> &InvalidatedRefSCCs; + + /// The set of invalidated SCCs which should be skipped if they are found + /// in \c CWorklist. + /// + /// This is used to quickly prune out SCCs when they get deleted and happen + /// to already be on the worklist. We use this primarily to avoid scanning + /// the list and removing entries from it. + SmallPtrSetImpl<LazyCallGraph::SCC *> &InvalidatedSCCs; + + /// If non-null, the updated current \c RefSCC being processed. + /// + /// This is set when a graph refinement takes place an the "current" point in + /// the graph moves "down" or earlier in the post-order walk. This will often + /// cause the "current" RefSCC to be a newly created RefSCC object and the + /// old one to be added to the above worklist. When that happens, this + /// pointer is non-null and can be used to continue processing the "top" of + /// the post-order walk. + LazyCallGraph::RefSCC *UpdatedRC; + + /// If non-null, the updated current \c SCC being processed. + /// + /// This is set when a graph refinement takes place an the "current" point in + /// the graph moves "down" or earlier in the post-order walk. This will often + /// cause the "current" SCC to be a newly created SCC object and the old one + /// to be added to the above worklist. When that happens, this pointer is + /// non-null and can be used to continue processing the "top" of the + /// post-order walk. + LazyCallGraph::SCC *UpdatedC; + + /// A hacky area where the inliner can retain history about inlining + /// decisions that mutated the call graph's SCC structure in order to avoid + /// infinite inlining. See the comments in the inliner's CG update logic. + /// + /// FIXME: Keeping this here seems like a big layering issue, we should look + /// for a better technique. + SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> + &InlinedInternalEdges; +}; + +/// The core module pass which does a post-order walk of the SCCs and +/// runs a CGSCC pass over each one. +/// +/// Designed to allow composition of a CGSCCPass(Manager) and +/// a ModulePassManager. Note that this pass must be run with a module analysis +/// manager as it uses the LazyCallGraph analysis. It will also run the +/// \c CGSCCAnalysisManagerModuleProxy analysis prior to running the CGSCC +/// pass over the module to enable a \c FunctionAnalysisManager to be used +/// within this run safely. +template <typename CGSCCPassT> +class ModuleToPostOrderCGSCCPassAdaptor + : public PassInfoMixin<ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>> { +public: + explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) + : Pass(std::move(Pass)) {} + + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + ModuleToPostOrderCGSCCPassAdaptor( + const ModuleToPostOrderCGSCCPassAdaptor &Arg) + : Pass(Arg.Pass) {} + + ModuleToPostOrderCGSCCPassAdaptor(ModuleToPostOrderCGSCCPassAdaptor &&Arg) + : Pass(std::move(Arg.Pass)) {} + + friend void swap(ModuleToPostOrderCGSCCPassAdaptor &LHS, + ModuleToPostOrderCGSCCPassAdaptor &RHS) { + std::swap(LHS.Pass, RHS.Pass); + } + + ModuleToPostOrderCGSCCPassAdaptor & + operator=(ModuleToPostOrderCGSCCPassAdaptor RHS) { + swap(*this, RHS); + return *this; + } + + /// Runs the CGSCC pass across every SCC in the module. + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) { + // Setup the CGSCC analysis manager from its proxy. + CGSCCAnalysisManager &CGAM = + AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager(); + + // Get the call graph for this module. + LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M); + + // We keep worklists to allow us to push more work onto the pass manager as + // the passes are run. + SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist; + SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist; + + // Keep sets for invalidated SCCs and RefSCCs that should be skipped when + // iterating off the worklists. + SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet; + SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet; + + SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> + InlinedInternalEdges; + + CGSCCUpdateResult UR = {RCWorklist, CWorklist, InvalidRefSCCSet, + InvalidSCCSet, nullptr, nullptr, + InlinedInternalEdges}; + + // Request PassInstrumentation from analysis manager, will use it to run + // instrumenting callbacks for the passes later. + PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M); + + PreservedAnalyses PA = PreservedAnalyses::all(); + CG.buildRefSCCs(); + for (auto RCI = CG.postorder_ref_scc_begin(), + RCE = CG.postorder_ref_scc_end(); + RCI != RCE;) { + assert(RCWorklist.empty() && + "Should always start with an empty RefSCC worklist"); + // The postorder_ref_sccs range we are walking is lazily constructed, so + // we only push the first one onto the worklist. The worklist allows us + // to capture *new* RefSCCs created during transformations. + // + // We really want to form RefSCCs lazily because that makes them cheaper + // to update as the program is simplified and allows us to have greater + // cache locality as forming a RefSCC touches all the parts of all the + // functions within that RefSCC. + // + // We also eagerly increment the iterator to the next position because + // the CGSCC passes below may delete the current RefSCC. + RCWorklist.insert(&*RCI++); + + do { + LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val(); + if (InvalidRefSCCSet.count(RC)) { + LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n"); + continue; + } + + assert(CWorklist.empty() && + "Should always start with an empty SCC worklist"); + + LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC + << "\n"); + + // Push the initial SCCs in reverse post-order as we'll pop off the + // back and so see this in post-order. + for (LazyCallGraph::SCC &C : llvm::reverse(*RC)) + CWorklist.insert(&C); + + do { + LazyCallGraph::SCC *C = CWorklist.pop_back_val(); + // Due to call graph mutations, we may have invalid SCCs or SCCs from + // other RefSCCs in the worklist. The invalid ones are dead and the + // other RefSCCs should be queued above, so we just need to skip both + // scenarios here. + if (InvalidSCCSet.count(C)) { + LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n"); + continue; + } + if (&C->getOuterRefSCC() != RC) { + LLVM_DEBUG(dbgs() + << "Skipping an SCC that is now part of some other " + "RefSCC...\n"); + continue; + } + + do { + // Check that we didn't miss any update scenario. + assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!"); + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + assert(&C->getOuterRefSCC() == RC && + "Processing an SCC in a different RefSCC!"); + + UR.UpdatedRC = nullptr; + UR.UpdatedC = nullptr; + + // Check the PassInstrumentation's BeforePass callbacks before + // running the pass, skip its execution completely if asked to + // (callback returns false). + if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C)) + continue; + + PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR); + + if (UR.InvalidatedSCCs.count(C)) + PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass); + else + PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C); + + // Update the SCC and RefSCC if necessary. + C = UR.UpdatedC ? UR.UpdatedC : C; + RC = UR.UpdatedRC ? UR.UpdatedRC : RC; + + // If the CGSCC pass wasn't able to provide a valid updated SCC, + // the current SCC may simply need to be skipped if invalid. + if (UR.InvalidatedSCCs.count(C)) { + LLVM_DEBUG(dbgs() + << "Skipping invalidated root or island SCC!\n"); + break; + } + // Check that we didn't miss any update scenario. + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + + // We handle invalidating the CGSCC analysis manager's information + // for the (potentially updated) SCC here. Note that any other SCCs + // whose structure has changed should have been invalidated by + // whatever was updating the call graph. This SCC gets invalidated + // late as it contains the nodes that were actively being + // processed. + CGAM.invalidate(*C, PassPA); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + PA.intersect(std::move(PassPA)); + + // The pass may have restructured the call graph and refined the + // current SCC and/or RefSCC. We need to update our current SCC and + // RefSCC pointers to follow these. Also, when the current SCC is + // refined, re-run the SCC pass over the newly refined SCC in order + // to observe the most precise SCC model available. This inherently + // cannot cycle excessively as it only happens when we split SCCs + // apart, at most converging on a DAG of single nodes. + // FIXME: If we ever start having RefSCC passes, we'll want to + // iterate there too. + if (UR.UpdatedC) + LLVM_DEBUG(dbgs() + << "Re-running SCC passes after a refinement of the " + "current SCC: " + << *UR.UpdatedC << "\n"); + + // Note that both `C` and `RC` may at this point refer to deleted, + // invalid SCC and RefSCCs respectively. But we will short circuit + // the processing when we check them in the loop above. + } while (UR.UpdatedC); + } while (!CWorklist.empty()); + + // We only need to keep internal inlined edge information within + // a RefSCC, clear it to save on space and let the next time we visit + // any of these functions have a fresh start. + InlinedInternalEdges.clear(); + } while (!RCWorklist.empty()); + } + + // By definition we preserve the call garph, all SCC analyses, and the + // analysis proxies by handling them above and in any nested pass managers. + PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>(); + PA.preserve<LazyCallGraphAnalysis>(); + PA.preserve<CGSCCAnalysisManagerModuleProxy>(); + PA.preserve<FunctionAnalysisManagerModuleProxy>(); + return PA; + } + +private: + CGSCCPassT Pass; +}; + +/// A function to deduce a function pass type and wrap it in the +/// templated adaptor. +template <typename CGSCCPassT> +ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT> +createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) { + return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass)); +} + +/// A proxy from a \c FunctionAnalysisManager to an \c SCC. +/// +/// When a module pass runs and triggers invalidation, both the CGSCC and +/// Function analysis manager proxies on the module get an invalidation event. +/// We don't want to fully duplicate responsibility for most of the +/// invalidation logic. Instead, this layer is only responsible for SCC-local +/// invalidation events. We work with the module's FunctionAnalysisManager to +/// invalidate function analyses. +class FunctionAnalysisManagerCGSCCProxy + : public AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy> { +public: + class Result { + public: + explicit Result(FunctionAnalysisManager &FAM) : FAM(&FAM) {} + + /// Accessor for the analysis manager. + FunctionAnalysisManager &getManager() { return *FAM; } + + bool invalidate(LazyCallGraph::SCC &C, const PreservedAnalyses &PA, + CGSCCAnalysisManager::Invalidator &Inv); + + private: + FunctionAnalysisManager *FAM; + }; + + /// Computes the \c FunctionAnalysisManager and stores it in the result proxy. + Result run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &); + +private: + friend AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy>; + + static AnalysisKey Key; +}; + +extern template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>; + +/// A proxy from a \c CGSCCAnalysisManager to a \c Function. +using CGSCCAnalysisManagerFunctionProxy = + OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>; + +/// Helper to update the call graph after running a function pass. +/// +/// Function passes can only mutate the call graph in specific ways. This +/// routine provides a helper that updates the call graph in those ways +/// including returning whether any changes were made and populating a CG +/// update result struct for the overall CGSCC walk. +LazyCallGraph::SCC &updateCGAndAnalysisManagerForFunctionPass( + LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR); + +/// Adaptor that maps from a SCC to its functions. +/// +/// Designed to allow composition of a FunctionPass(Manager) and +/// a CGSCCPassManager. Note that if this pass is constructed with a pointer +/// to a \c CGSCCAnalysisManager it will run the +/// \c FunctionAnalysisManagerCGSCCProxy analysis prior to running the function +/// pass over the SCC to enable a \c FunctionAnalysisManager to be used +/// within this run safely. +template <typename FunctionPassT> +class CGSCCToFunctionPassAdaptor + : public PassInfoMixin<CGSCCToFunctionPassAdaptor<FunctionPassT>> { +public: + explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass) + : Pass(std::move(Pass)) {} + + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + CGSCCToFunctionPassAdaptor(const CGSCCToFunctionPassAdaptor &Arg) + : Pass(Arg.Pass) {} + + CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg) + : Pass(std::move(Arg.Pass)) {} + + friend void swap(CGSCCToFunctionPassAdaptor &LHS, + CGSCCToFunctionPassAdaptor &RHS) { + std::swap(LHS.Pass, RHS.Pass); + } + + CGSCCToFunctionPassAdaptor &operator=(CGSCCToFunctionPassAdaptor RHS) { + swap(*this, RHS); + return *this; + } + + /// Runs the function pass across every function in the module. + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR) { + // Setup the function analysis manager from its proxy. + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); + + SmallVector<LazyCallGraph::Node *, 4> Nodes; + for (LazyCallGraph::Node &N : C) + Nodes.push_back(&N); + + // The SCC may get split while we are optimizing functions due to deleting + // edges. If this happens, the current SCC can shift, so keep track of + // a pointer we can overwrite. + LazyCallGraph::SCC *CurrentC = &C; + + LLVM_DEBUG(dbgs() << "Running function passes across an SCC: " << C + << "\n"); + + PreservedAnalyses PA = PreservedAnalyses::all(); + for (LazyCallGraph::Node *N : Nodes) { + // Skip nodes from other SCCs. These may have been split out during + // processing. We'll eventually visit those SCCs and pick up the nodes + // there. + if (CG.lookupSCC(*N) != CurrentC) + continue; + + Function &F = N->getFunction(); + + PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F); + if (!PI.runBeforePass<Function>(Pass, F)) + continue; + + PreservedAnalyses PassPA = Pass.run(F, FAM); + + PI.runAfterPass<Function>(Pass, F); + + // We know that the function pass couldn't have invalidated any other + // function's analyses (that's the contract of a function pass), so + // directly handle the function analysis manager's invalidation here. + FAM.invalidate(F, PassPA); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + PA.intersect(std::move(PassPA)); + + // If the call graph hasn't been preserved, update it based on this + // function pass. This may also update the current SCC to point to + // a smaller, more refined SCC. + auto PAC = PA.getChecker<LazyCallGraphAnalysis>(); + if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) { + CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N, + AM, UR); + assert( + CG.lookupSCC(*N) == CurrentC && + "Current SCC not updated to the SCC containing the current node!"); + } + } + + // By definition we preserve the proxy. And we preserve all analyses on + // Functions. This precludes *any* invalidation of function analyses by the + // proxy, but that's OK because we've taken care to invalidate analyses in + // the function analysis manager incrementally above. + PA.preserveSet<AllAnalysesOn<Function>>(); + PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); + + // We've also ensured that we updated the call graph along the way. + PA.preserve<LazyCallGraphAnalysis>(); + + return PA; + } + +private: + FunctionPassT Pass; +}; + +/// A function to deduce a function pass type and wrap it in the +/// templated adaptor. +template <typename FunctionPassT> +CGSCCToFunctionPassAdaptor<FunctionPassT> +createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) { + return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass)); +} + +/// A helper that repeats an SCC pass each time an indirect call is refined to +/// a direct call by that pass. +/// +/// While the CGSCC pass manager works to re-visit SCCs and RefSCCs as they +/// change shape, we may also want to repeat an SCC pass if it simply refines +/// an indirect call to a direct call, even if doing so does not alter the +/// shape of the graph. Note that this only pertains to direct calls to +/// functions where IPO across the SCC may be able to compute more precise +/// results. For intrinsics, we assume scalar optimizations already can fully +/// reason about them. +/// +/// This repetition has the potential to be very large however, as each one +/// might refine a single call site. As a consequence, in practice we use an +/// upper bound on the number of repetitions to limit things. +template <typename PassT> +class DevirtSCCRepeatedPass + : public PassInfoMixin<DevirtSCCRepeatedPass<PassT>> { +public: + explicit DevirtSCCRepeatedPass(PassT Pass, int MaxIterations) + : Pass(std::move(Pass)), MaxIterations(MaxIterations) {} + + /// Runs the wrapped pass up to \c MaxIterations on the SCC, iterating + /// whenever an indirect call is refined. + PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR) { + PreservedAnalyses PA = PreservedAnalyses::all(); + PassInstrumentation PI = + AM.getResult<PassInstrumentationAnalysis>(InitialC, CG); + + // The SCC may be refined while we are running passes over it, so set up + // a pointer that we can update. + LazyCallGraph::SCC *C = &InitialC; + + // Collect value handles for all of the indirect call sites. + SmallVector<WeakTrackingVH, 8> CallHandles; + + // Struct to track the counts of direct and indirect calls in each function + // of the SCC. + struct CallCount { + int Direct; + int Indirect; + }; + + // Put value handles on all of the indirect calls and return the number of + // direct calls for each function in the SCC. + auto ScanSCC = [](LazyCallGraph::SCC &C, + SmallVectorImpl<WeakTrackingVH> &CallHandles) { + assert(CallHandles.empty() && "Must start with a clear set of handles."); + + SmallVector<CallCount, 4> CallCounts; + for (LazyCallGraph::Node &N : C) { + CallCounts.push_back({0, 0}); + CallCount &Count = CallCounts.back(); + for (Instruction &I : instructions(N.getFunction())) + if (auto CS = CallSite(&I)) { + if (CS.getCalledFunction()) { + ++Count.Direct; + } else { + ++Count.Indirect; + CallHandles.push_back(WeakTrackingVH(&I)); + } + } + } + + return CallCounts; + }; + + // Populate the initial call handles and get the initial call counts. + auto CallCounts = ScanSCC(*C, CallHandles); + + for (int Iteration = 0;; ++Iteration) { + + if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C)) + continue; + + PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR); + + if (UR.InvalidatedSCCs.count(C)) + PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass); + else + PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C); + + // If the SCC structure has changed, bail immediately and let the outer + // CGSCC layer handle any iteration to reflect the refined structure. + if (UR.UpdatedC && UR.UpdatedC != C) { + PA.intersect(std::move(PassPA)); + break; + } + + // Check that we didn't miss any update scenario. + assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + assert((int)CallCounts.size() == C->size() && + "Cannot have changed the size of the SCC!"); + + // Check whether any of the handles were devirtualized. + auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) { + if (!CallH) + return false; + auto CS = CallSite(CallH); + if (!CS) + return false; + + // If the call is still indirect, leave it alone. + Function *F = CS.getCalledFunction(); + if (!F) + return false; + + LLVM_DEBUG(dbgs() << "Found devirutalized call from " + << CS.getParent()->getParent()->getName() << " to " + << F->getName() << "\n"); + + // We now have a direct call where previously we had an indirect call, + // so iterate to process this devirtualization site. + return true; + }; + bool Devirt = llvm::any_of(CallHandles, IsDevirtualizedHandle); + + // Rescan to build up a new set of handles and count how many direct + // calls remain. If we decide to iterate, this also sets up the input to + // the next iteration. + CallHandles.clear(); + auto NewCallCounts = ScanSCC(*C, CallHandles); + + // If we haven't found an explicit devirtualization already see if we + // have decreased the number of indirect calls and increased the number + // of direct calls for any function in the SCC. This can be fooled by all + // manner of transformations such as DCE and other things, but seems to + // work well in practice. + if (!Devirt) + for (int i = 0, Size = C->size(); i < Size; ++i) + if (CallCounts[i].Indirect > NewCallCounts[i].Indirect && + CallCounts[i].Direct < NewCallCounts[i].Direct) { + Devirt = true; + break; + } + + if (!Devirt) { + PA.intersect(std::move(PassPA)); + break; + } + + // Otherwise, if we've already hit our max, we're done. + if (Iteration >= MaxIterations) { + LLVM_DEBUG( + dbgs() << "Found another devirtualization after hitting the max " + "number of repetitions (" + << MaxIterations << ") on SCC: " << *C << "\n"); + PA.intersect(std::move(PassPA)); + break; + } + + LLVM_DEBUG( + dbgs() + << "Repeating an SCC pass after finding a devirtualization in: " << *C + << "\n"); + + // Move over the new call counts in preparation for iterating. + CallCounts = std::move(NewCallCounts); + + // Update the analysis manager with each run and intersect the total set + // of preserved analyses so we're ready to iterate. + AM.invalidate(*C, PassPA); + PA.intersect(std::move(PassPA)); + } + + // Note that we don't add any preserved entries here unlike a more normal + // "pass manager" because we only handle invalidation *between* iterations, + // not after the last iteration. + return PA; + } + +private: + PassT Pass; + int MaxIterations; +}; + +/// A function to deduce a function pass type and wrap it in the +/// templated adaptor. +template <typename PassT> +DevirtSCCRepeatedPass<PassT> createDevirtSCCRepeatedPass(PassT Pass, + int MaxIterations) { + return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations); +} + +// Clear out the debug logging macro. +#undef DEBUG_TYPE + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_CGSCCPASSMANAGER_H diff --git a/clang-r353983e/include/llvm/Analysis/CallGraph.h b/clang-r353983e/include/llvm/Analysis/CallGraph.h new file mode 100644 index 00000000..a743cbcc --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CallGraph.h @@ -0,0 +1,508 @@ +//===- CallGraph.h - Build a Module's call graph ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file provides interfaces used to build and manipulate a call graph, +/// which is a very useful tool for interprocedural optimization. +/// +/// Every function in a module is represented as a node in the call graph. The +/// callgraph node keeps track of which functions are called by the function +/// corresponding to the node. +/// +/// A call graph may contain nodes where the function that they correspond to +/// is null. These 'external' nodes are used to represent control flow that is +/// not represented (or analyzable) in the module. In particular, this +/// analysis builds one external node such that: +/// 1. All functions in the module without internal linkage will have edges +/// from this external node, indicating that they could be called by +/// functions outside of the module. +/// 2. All functions whose address is used for something more than a direct +/// call, for example being stored into a memory location will also have +/// an edge from this external node. Since they may be called by an +/// unknown caller later, they must be tracked as such. +/// +/// There is a second external node added for calls that leave this module. +/// Functions have a call edge to the external node iff: +/// 1. The function is external, reflecting the fact that they could call +/// anything without internal linkage or that has its address taken. +/// 2. The function contains an indirect function call. +/// +/// As an extension in the future, there may be multiple nodes with a null +/// function. These will be used when we can prove (through pointer analysis) +/// that an indirect call site can call only a specific set of functions. +/// +/// Because of these properties, the CallGraph captures a conservative superset +/// of all of the caller-callee relationships, which is useful for +/// transformations. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CALLGRAPH_H +#define LLVM_ANALYSIS_CALLGRAPH_H + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include <cassert> +#include <map> +#include <memory> +#include <utility> +#include <vector> + +namespace llvm { + +class CallGraphNode; +class Module; +class raw_ostream; + +/// The basic data container for the call graph of a \c Module of IR. +/// +/// This class exposes both the interface to the call graph for a module of IR. +/// +/// The core call graph itself can also be updated to reflect changes to the IR. +class CallGraph { + Module &M; + + using FunctionMapTy = + std::map<const Function *, std::unique_ptr<CallGraphNode>>; + + /// A map from \c Function* to \c CallGraphNode*. + FunctionMapTy FunctionMap; + + /// This node has edges to all external functions and those internal + /// functions that have their address taken. + CallGraphNode *ExternalCallingNode; + + /// This node has edges to it from all functions making indirect calls + /// or calling an external function. + std::unique_ptr<CallGraphNode> CallsExternalNode; + + /// Replace the function represented by this node by another. + /// + /// This does not rescan the body of the function, so it is suitable when + /// splicing the body of one function to another while also updating all + /// callers from the old function to the new. + void spliceFunction(const Function *From, const Function *To); + + /// Add a function to the call graph, and link the node to all of the + /// functions that it calls. + void addToCallGraph(Function *F); + +public: + explicit CallGraph(Module &M); + CallGraph(CallGraph &&Arg); + ~CallGraph(); + + void print(raw_ostream &OS) const; + void dump() const; + + using iterator = FunctionMapTy::iterator; + using const_iterator = FunctionMapTy::const_iterator; + + /// Returns the module the call graph corresponds to. + Module &getModule() const { return M; } + + inline iterator begin() { return FunctionMap.begin(); } + inline iterator end() { return FunctionMap.end(); } + inline const_iterator begin() const { return FunctionMap.begin(); } + inline const_iterator end() const { return FunctionMap.end(); } + + /// Returns the call graph node for the provided function. + inline const CallGraphNode *operator[](const Function *F) const { + const_iterator I = FunctionMap.find(F); + assert(I != FunctionMap.end() && "Function not in callgraph!"); + return I->second.get(); + } + + /// Returns the call graph node for the provided function. + inline CallGraphNode *operator[](const Function *F) { + const_iterator I = FunctionMap.find(F); + assert(I != FunctionMap.end() && "Function not in callgraph!"); + return I->second.get(); + } + + /// Returns the \c CallGraphNode which is used to represent + /// undetermined calls into the callgraph. + CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; } + + CallGraphNode *getCallsExternalNode() const { + return CallsExternalNode.get(); + } + + //===--------------------------------------------------------------------- + // Functions to keep a call graph up to date with a function that has been + // modified. + // + + /// Unlink the function from this module, returning it. + /// + /// Because this removes the function from the module, the call graph node is + /// destroyed. This is only valid if the function does not call any other + /// functions (ie, there are no edges in it's CGN). The easiest way to do + /// this is to dropAllReferences before calling this. + Function *removeFunctionFromModule(CallGraphNode *CGN); + + /// Similar to operator[], but this will insert a new CallGraphNode for + /// \c F if one does not already exist. + CallGraphNode *getOrInsertFunction(const Function *F); +}; + +/// A node in the call graph for a module. +/// +/// Typically represents a function in the call graph. There are also special +/// "null" nodes used to represent theoretical entries in the call graph. +class CallGraphNode { +public: + /// A pair of the calling instruction (a call or invoke) + /// and the call graph node being called. + using CallRecord = std::pair<WeakTrackingVH, CallGraphNode *>; + +public: + using CalledFunctionsVector = std::vector<CallRecord>; + + /// Creates a node for the specified function. + inline CallGraphNode(Function *F) : F(F) {} + + CallGraphNode(const CallGraphNode &) = delete; + CallGraphNode &operator=(const CallGraphNode &) = delete; + + ~CallGraphNode() { + assert(NumReferences == 0 && "Node deleted while references remain"); + } + + using iterator = std::vector<CallRecord>::iterator; + using const_iterator = std::vector<CallRecord>::const_iterator; + + /// Returns the function that this call graph node represents. + Function *getFunction() const { return F; } + + inline iterator begin() { return CalledFunctions.begin(); } + inline iterator end() { return CalledFunctions.end(); } + inline const_iterator begin() const { return CalledFunctions.begin(); } + inline const_iterator end() const { return CalledFunctions.end(); } + inline bool empty() const { return CalledFunctions.empty(); } + inline unsigned size() const { return (unsigned)CalledFunctions.size(); } + + /// Returns the number of other CallGraphNodes in this CallGraph that + /// reference this node in their callee list. + unsigned getNumReferences() const { return NumReferences; } + + /// Returns the i'th called function. + CallGraphNode *operator[](unsigned i) const { + assert(i < CalledFunctions.size() && "Invalid index"); + return CalledFunctions[i].second; + } + + /// Print out this call graph node. + void dump() const; + void print(raw_ostream &OS) const; + + //===--------------------------------------------------------------------- + // Methods to keep a call graph up to date with a function that has been + // modified + // + + /// Removes all edges from this CallGraphNode to any functions it + /// calls. + void removeAllCalledFunctions() { + while (!CalledFunctions.empty()) { + CalledFunctions.back().second->DropRef(); + CalledFunctions.pop_back(); + } + } + + /// Moves all the callee information from N to this node. + void stealCalledFunctionsFrom(CallGraphNode *N) { + assert(CalledFunctions.empty() && + "Cannot steal callsite information if I already have some"); + std::swap(CalledFunctions, N->CalledFunctions); + } + + /// Adds a function to the list of functions called by this one. + void addCalledFunction(CallSite CS, CallGraphNode *M) { + assert(!CS.getInstruction() || !CS.getCalledFunction() || + !CS.getCalledFunction()->isIntrinsic() || + !Intrinsic::isLeaf(CS.getCalledFunction()->getIntrinsicID())); + CalledFunctions.emplace_back(CS.getInstruction(), M); + M->AddRef(); + } + + void removeCallEdge(iterator I) { + I->second->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); + } + + /// Removes the edge in the node for the specified call site. + /// + /// Note that this method takes linear time, so it should be used sparingly. + void removeCallEdgeFor(CallSite CS); + + /// Removes all call edges from this node to the specified callee + /// function. + /// + /// This takes more time to execute than removeCallEdgeTo, so it should not + /// be used unless necessary. + void removeAnyCallEdgeTo(CallGraphNode *Callee); + + /// Removes one edge associated with a null callsite from this node to + /// the specified callee function. + void removeOneAbstractEdgeTo(CallGraphNode *Callee); + + /// Replaces the edge in the node for the specified call site with a + /// new one. + /// + /// Note that this method takes linear time, so it should be used sparingly. + void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode); + +private: + friend class CallGraph; + + Function *F; + + std::vector<CallRecord> CalledFunctions; + + /// The number of times that this CallGraphNode occurs in the + /// CalledFunctions array of this or other CallGraphNodes. + unsigned NumReferences = 0; + + void DropRef() { --NumReferences; } + void AddRef() { ++NumReferences; } + + /// A special function that should only be used by the CallGraph class. + void allReferencesDropped() { NumReferences = 0; } +}; + +/// An analysis pass to compute the \c CallGraph for a \c Module. +/// +/// This class implements the concept of an analysis pass used by the \c +/// ModuleAnalysisManager to run an analysis over a module and cache the +/// resulting data. +class CallGraphAnalysis : public AnalysisInfoMixin<CallGraphAnalysis> { + friend AnalysisInfoMixin<CallGraphAnalysis>; + + static AnalysisKey Key; + +public: + /// A formulaic type to inform clients of the result type. + using Result = CallGraph; + + /// Compute the \c CallGraph for the module \c M. + /// + /// The real work here is done in the \c CallGraph constructor. + CallGraph run(Module &M, ModuleAnalysisManager &) { return CallGraph(M); } +}; + +/// Printer pass for the \c CallGraphAnalysis results. +class CallGraphPrinterPass : public PassInfoMixin<CallGraphPrinterPass> { + raw_ostream &OS; + +public: + explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +/// The \c ModulePass which wraps up a \c CallGraph and the logic to +/// build it. +/// +/// This class exposes both the interface to the call graph container and the +/// module pass which runs over a module of IR and produces the call graph. The +/// call graph interface is entirelly a wrapper around a \c CallGraph object +/// which is stored internally for each module. +class CallGraphWrapperPass : public ModulePass { + std::unique_ptr<CallGraph> G; + +public: + static char ID; // Class identification, replacement for typeinfo + + CallGraphWrapperPass(); + ~CallGraphWrapperPass() override; + + /// The internal \c CallGraph around which the rest of this interface + /// is wrapped. + const CallGraph &getCallGraph() const { return *G; } + CallGraph &getCallGraph() { return *G; } + + using iterator = CallGraph::iterator; + using const_iterator = CallGraph::const_iterator; + + /// Returns the module the call graph corresponds to. + Module &getModule() const { return G->getModule(); } + + inline iterator begin() { return G->begin(); } + inline iterator end() { return G->end(); } + inline const_iterator begin() const { return G->begin(); } + inline const_iterator end() const { return G->end(); } + + /// Returns the call graph node for the provided function. + inline const CallGraphNode *operator[](const Function *F) const { + return (*G)[F]; + } + + /// Returns the call graph node for the provided function. + inline CallGraphNode *operator[](const Function *F) { return (*G)[F]; } + + /// Returns the \c CallGraphNode which is used to represent + /// undetermined calls into the callgraph. + CallGraphNode *getExternalCallingNode() const { + return G->getExternalCallingNode(); + } + + CallGraphNode *getCallsExternalNode() const { + return G->getCallsExternalNode(); + } + + //===--------------------------------------------------------------------- + // Functions to keep a call graph up to date with a function that has been + // modified. + // + + /// Unlink the function from this module, returning it. + /// + /// Because this removes the function from the module, the call graph node is + /// destroyed. This is only valid if the function does not call any other + /// functions (ie, there are no edges in it's CGN). The easiest way to do + /// this is to dropAllReferences before calling this. + Function *removeFunctionFromModule(CallGraphNode *CGN) { + return G->removeFunctionFromModule(CGN); + } + + /// Similar to operator[], but this will insert a new CallGraphNode for + /// \c F if one does not already exist. + CallGraphNode *getOrInsertFunction(const Function *F) { + return G->getOrInsertFunction(F); + } + + //===--------------------------------------------------------------------- + // Implementation of the ModulePass interface needed here. + // + + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnModule(Module &M) override; + void releaseMemory() override; + + void print(raw_ostream &o, const Module *) const override; + void dump() const; +}; + +//===----------------------------------------------------------------------===// +// GraphTraits specializations for call graphs so that they can be treated as +// graphs by the generic graph algorithms. +// + +// Provide graph traits for tranversing call graphs using standard graph +// traversals. +template <> struct GraphTraits<CallGraphNode *> { + using NodeRef = CallGraphNode *; + using CGNPairTy = CallGraphNode::CallRecord; + + static NodeRef getEntryNode(CallGraphNode *CGN) { return CGN; } + static CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } + + using ChildIteratorType = + mapped_iterator<CallGraphNode::iterator, decltype(&CGNGetValue)>; + + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &CGNGetValue); + } + + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &CGNGetValue); + } +}; + +template <> struct GraphTraits<const CallGraphNode *> { + using NodeRef = const CallGraphNode *; + using CGNPairTy = CallGraphNode::CallRecord; + using EdgeRef = const CallGraphNode::CallRecord &; + + static NodeRef getEntryNode(const CallGraphNode *CGN) { return CGN; } + static const CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } + + using ChildIteratorType = + mapped_iterator<CallGraphNode::const_iterator, decltype(&CGNGetValue)>; + using ChildEdgeIteratorType = CallGraphNode::const_iterator; + + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &CGNGetValue); + } + + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &CGNGetValue); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } + + static NodeRef edge_dest(EdgeRef E) { return E.second; } +}; + +template <> +struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> { + using PairTy = + std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; + + static NodeRef getEntryNode(CallGraph *CGN) { + return CGN->getExternalCallingNode(); // Start at the external node! + } + + static CallGraphNode *CGGetValuePtr(const PairTy &P) { + return P.second.get(); + } + + // nodes_iterator/begin/end - Allow iteration over all nodes in the graph + using nodes_iterator = + mapped_iterator<CallGraph::iterator, decltype(&CGGetValuePtr)>; + + static nodes_iterator nodes_begin(CallGraph *CG) { + return nodes_iterator(CG->begin(), &CGGetValuePtr); + } + + static nodes_iterator nodes_end(CallGraph *CG) { + return nodes_iterator(CG->end(), &CGGetValuePtr); + } +}; + +template <> +struct GraphTraits<const CallGraph *> : public GraphTraits< + const CallGraphNode *> { + using PairTy = + std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; + + static NodeRef getEntryNode(const CallGraph *CGN) { + return CGN->getExternalCallingNode(); // Start at the external node! + } + + static const CallGraphNode *CGGetValuePtr(const PairTy &P) { + return P.second.get(); + } + + // nodes_iterator/begin/end - Allow iteration over all nodes in the graph + using nodes_iterator = + mapped_iterator<CallGraph::const_iterator, decltype(&CGGetValuePtr)>; + + static nodes_iterator nodes_begin(const CallGraph *CG) { + return nodes_iterator(CG->begin(), &CGGetValuePtr); + } + + static nodes_iterator nodes_end(const CallGraph *CG) { + return nodes_iterator(CG->end(), &CGGetValuePtr); + } +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_CALLGRAPH_H diff --git a/clang-r353983e/include/llvm/Analysis/CallGraphSCCPass.h b/clang-r353983e/include/llvm/Analysis/CallGraphSCCPass.h new file mode 100644 index 00000000..1b5b7e2f --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CallGraphSCCPass.h @@ -0,0 +1,136 @@ +//===- CallGraphSCCPass.h - Pass that operates BU on call graph -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the CallGraphSCCPass class, which is used for passes which +// are implemented as bottom-up traversals on the call graph. Because there may +// be cycles in the call graph, passes of this type operate on the call-graph in +// SCC order: that is, they process function bottom-up, except for recursive +// functions, which they process all at once. +// +// These passes are inherently interprocedural, and are required to keep the +// call graph up-to-date if they do anything which could modify it. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CALLGRAPHSCCPASS_H +#define LLVM_ANALYSIS_CALLGRAPHSCCPASS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Pass.h" +#include <vector> + +namespace llvm { + +class CallGraph; +class CallGraphNode; +class CallGraphSCC; +class PMStack; + +class CallGraphSCCPass : public Pass { +public: + explicit CallGraphSCCPass(char &pid) : Pass(PT_CallGraphSCC, pid) {} + + /// createPrinterPass - Get a pass that prints the Module + /// corresponding to a CallGraph. + Pass *createPrinterPass(raw_ostream &OS, + const std::string &Banner) const override; + + using llvm::Pass::doInitialization; + using llvm::Pass::doFinalization; + + /// doInitialization - This method is called before the SCC's of the program + /// has been processed, allowing the pass to do initialization as necessary. + virtual bool doInitialization(CallGraph &CG) { + return false; + } + + /// runOnSCC - This method should be implemented by the subclass to perform + /// whatever action is necessary for the specified SCC. Note that + /// non-recursive (or only self-recursive) functions will have an SCC size of + /// 1, where recursive portions of the call graph will have SCC size > 1. + /// + /// SCC passes that add or delete functions to the SCC are required to update + /// the SCC list, otherwise stale pointers may be dereferenced. + virtual bool runOnSCC(CallGraphSCC &SCC) = 0; + + /// doFinalization - This method is called after the SCC's of the program has + /// been processed, allowing the pass to do final cleanup as necessary. + virtual bool doFinalization(CallGraph &CG) { + return false; + } + + /// Assign pass manager to manager this pass + void assignPassManager(PMStack &PMS, PassManagerType PMT) override; + + /// Return what kind of Pass Manager can manage this pass. + PassManagerType getPotentialPassManagerType() const override { + return PMT_CallGraphPassManager; + } + + /// getAnalysisUsage - For this class, we declare that we require and preserve + /// the call graph. If the derived class implements this method, it should + /// always explicitly call the implementation here. + void getAnalysisUsage(AnalysisUsage &Info) const override; + +protected: + /// Optional passes call this function to check whether the pass should be + /// skipped. This is the case when optimization bisect is over the limit. + bool skipSCC(CallGraphSCC &SCC) const; +}; + +/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on. +class CallGraphSCC { + const CallGraph &CG; // The call graph for this SCC. + void *Context; // The CGPassManager object that is vending this. + std::vector<CallGraphNode *> Nodes; + +public: + CallGraphSCC(CallGraph &cg, void *context) : CG(cg), Context(context) {} + + void initialize(ArrayRef<CallGraphNode *> NewNodes) { + Nodes.assign(NewNodes.begin(), NewNodes.end()); + } + + bool isSingular() const { return Nodes.size() == 1; } + unsigned size() const { return Nodes.size(); } + + /// ReplaceNode - This informs the SCC and the pass manager that the specified + /// Old node has been deleted, and New is to be used in its place. + void ReplaceNode(CallGraphNode *Old, CallGraphNode *New); + + using iterator = std::vector<CallGraphNode *>::const_iterator; + + iterator begin() const { return Nodes.begin(); } + iterator end() const { return Nodes.end(); } + + const CallGraph &getCallGraph() { return CG; } +}; + +void initializeDummyCGSCCPassPass(PassRegistry &); + +/// This pass is required by interprocedural register allocation. It forces +/// codegen to follow bottom up order on call graph. +class DummyCGSCCPass : public CallGraphSCCPass { +public: + static char ID; + + DummyCGSCCPass() : CallGraphSCCPass(ID) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeDummyCGSCCPassPass(Registry); + } + + bool runOnSCC(CallGraphSCC &SCC) override { return false; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_CALLGRAPHSCCPASS_H diff --git a/clang-r353983e/include/llvm/Analysis/CallPrinter.h b/clang-r353983e/include/llvm/Analysis/CallPrinter.h new file mode 100644 index 00000000..8d4159f3 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CallPrinter.h @@ -0,0 +1,26 @@ +//===-- CallPrinter.h - Call graph printer external interface ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines external functions that can be called to explicitly +// instantiate the call graph printer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CALLPRINTER_H +#define LLVM_ANALYSIS_CALLPRINTER_H + +namespace llvm { + +class ModulePass; + +ModulePass *createCallGraphViewerPass(); +ModulePass *createCallGraphDOTPrinterPass(); + +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/CaptureTracking.h b/clang-r353983e/include/llvm/Analysis/CaptureTracking.h new file mode 100644 index 00000000..ca7abd34 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CaptureTracking.h @@ -0,0 +1,97 @@ +//===----- llvm/Analysis/CaptureTracking.h - Pointer capture ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains routines that help determine which pointers are captured. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CAPTURETRACKING_H +#define LLVM_ANALYSIS_CAPTURETRACKING_H + +namespace llvm { + + class Value; + class Use; + class Instruction; + class DominatorTree; + class OrderedBasicBlock; + + /// The default value for MaxUsesToExplore argument. It's relatively small to + /// keep the cost of analysis reasonable for clients like BasicAliasAnalysis, + /// where the results can't be cached. + /// TODO: we should probably introduce a caching CaptureTracking analysis and + /// use it where possible. The caching version can use much higher limit or + /// don't have this cap at all. + unsigned constexpr DefaultMaxUsesToExplore = 20; + + /// PointerMayBeCaptured - Return true if this pointer value may be captured + /// by the enclosing function (which is required to exist). This routine can + /// be expensive, so consider caching the results. The boolean ReturnCaptures + /// specifies whether returning the value (or part of it) from the function + /// counts as capturing it or not. The boolean StoreCaptures specified + /// whether storing the value (or part of it) into memory anywhere + /// automatically counts as capturing it or not. + /// MaxUsesToExplore specifies how many uses should the analysis explore for + /// one value before giving up due too "too many uses". + bool PointerMayBeCaptured(const Value *V, + bool ReturnCaptures, + bool StoreCaptures, + unsigned MaxUsesToExplore = DefaultMaxUsesToExplore); + + /// PointerMayBeCapturedBefore - Return true if this pointer value may be + /// captured by the enclosing function (which is required to exist). If a + /// DominatorTree is provided, only captures which happen before the given + /// instruction are considered. This routine can be expensive, so consider + /// caching the results. The boolean ReturnCaptures specifies whether + /// returning the value (or part of it) from the function counts as capturing + /// it or not. The boolean StoreCaptures specified whether storing the value + /// (or part of it) into memory anywhere automatically counts as capturing it + /// or not. Captures by the provided instruction are considered if the + /// final parameter is true. An ordered basic block in \p OBB could be used + /// to speed up capture-tracker queries. + /// MaxUsesToExplore specifies how many uses should the analysis explore for + /// one value before giving up due too "too many uses". + bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, + bool StoreCaptures, const Instruction *I, + const DominatorTree *DT, bool IncludeI = false, + OrderedBasicBlock *OBB = nullptr, + unsigned MaxUsesToExplore = DefaultMaxUsesToExplore); + + /// This callback is used in conjunction with PointerMayBeCaptured. In + /// addition to the interface here, you'll need to provide your own getters + /// to see whether anything was captured. + struct CaptureTracker { + virtual ~CaptureTracker(); + + /// tooManyUses - The depth of traversal has breached a limit. There may be + /// capturing instructions that will not be passed into captured(). + virtual void tooManyUses() = 0; + + /// shouldExplore - This is the use of a value derived from the pointer. + /// To prune the search (ie., assume that none of its users could possibly + /// capture) return false. To search it, return true. + /// + /// U->getUser() is always an Instruction. + virtual bool shouldExplore(const Use *U); + + /// captured - Information about the pointer was captured by the user of + /// use U. Return true to stop the traversal or false to continue looking + /// for more capturing instructions. + virtual bool captured(const Use *U) = 0; + }; + + /// PointerMayBeCaptured - Visit the value and the values derived from it and + /// find values which appear to be capturing the pointer value. This feeds + /// results into and is controlled by the CaptureTracker object. + /// MaxUsesToExplore specifies how many uses should the analysis explore for + /// one value before giving up due too "too many uses". + void PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, + unsigned MaxUsesToExplore = DefaultMaxUsesToExplore); +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/CmpInstAnalysis.h b/clang-r353983e/include/llvm/Analysis/CmpInstAnalysis.h new file mode 100644 index 00000000..3d34cd12 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CmpInstAnalysis.h @@ -0,0 +1,70 @@ +//===-- CmpInstAnalysis.h - Utils to help fold compare insts ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file holds routines to help analyse compare instructions +// and fold them into constants or other compare instructions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CMPINSTANALYSIS_H +#define LLVM_ANALYSIS_CMPINSTANALYSIS_H + +#include "llvm/IR/InstrTypes.h" + +namespace llvm { + class ICmpInst; + class Value; + + /// Encode a icmp predicate into a three bit mask. These bits are carefully + /// arranged to allow folding of expressions such as: + /// + /// (A < B) | (A > B) --> (A != B) + /// + /// Note that this is only valid if the first and second predicates have the + /// same sign. It is illegal to do: (A u< B) | (A s> B) + /// + /// Three bits are used to represent the condition, as follows: + /// 0 A > B + /// 1 A == B + /// 2 A < B + /// + /// <=> Value Definition + /// 000 0 Always false + /// 001 1 A > B + /// 010 2 A == B + /// 011 3 A >= B + /// 100 4 A < B + /// 101 5 A != B + /// 110 6 A <= B + /// 111 7 Always true + /// + unsigned getICmpCode(const ICmpInst *ICI, bool InvertPred = false); + + /// This is the complement of getICmpCode. It turns a predicate code into + /// either a constant true or false or the predicate for a new ICmp. + /// The sign is passed in to determine which kind of predicate to use in the + /// new ICmp instruction. + /// Non-NULL return value will be a true or false constant. + /// NULL return means a new ICmp is needed. The predicate is output in Pred. + Constant *getPredForICmpCode(unsigned Code, bool Sign, Type *OpTy, + CmpInst::Predicate &Pred); + + /// Return true if both predicates match sign or if at least one of them is an + /// equality comparison (which is signless). + bool predicatesFoldable(CmpInst::Predicate P1, CmpInst::Predicate P2); + + /// Decompose an icmp into the form ((X & Mask) pred 0) if possible. The + /// returned predicate is either == or !=. Returns false if decomposition + /// fails. + bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, + Value *&X, APInt &Mask, + bool LookThroughTrunc = true); + +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/CodeMetrics.h b/clang-r353983e/include/llvm/Analysis/CodeMetrics.h new file mode 100644 index 00000000..1482b66a --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/CodeMetrics.h @@ -0,0 +1,95 @@ +//===- CodeMetrics.h - Code cost measurements -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements various weight measurements for code, helping +// the Inliner and other passes decide whether to duplicate its contents. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CODEMETRICS_H +#define LLVM_ANALYSIS_CODEMETRICS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" + +namespace llvm { +class AssumptionCache; +class BasicBlock; +class Loop; +class Function; +class Instruction; +class DataLayout; +class TargetTransformInfo; +class Value; + +/// Utility to calculate the size and a few similar metrics for a set +/// of basic blocks. +struct CodeMetrics { + /// True if this function contains a call to setjmp or other functions + /// with attribute "returns twice" without having the attribute itself. + bool exposesReturnsTwice = false; + + /// True if this function calls itself. + bool isRecursive = false; + + /// True if this function cannot be duplicated. + /// + /// True if this function contains one or more indirect branches, or it contains + /// one or more 'noduplicate' instructions. + bool notDuplicatable = false; + + /// True if this function contains a call to a convergent function. + bool convergent = false; + + /// True if this function calls alloca (in the C sense). + bool usesDynamicAlloca = false; + + /// Number of instructions in the analyzed blocks. + unsigned NumInsts = false; + + /// Number of analyzed blocks. + unsigned NumBlocks = false; + + /// Keeps track of basic block code size estimates. + DenseMap<const BasicBlock *, unsigned> NumBBInsts; + + /// Keep track of the number of calls to 'big' functions. + unsigned NumCalls = false; + + /// The number of calls to internal functions with a single caller. + /// + /// These are likely targets for future inlining, likely exposed by + /// interleaved devirtualization. + unsigned NumInlineCandidates = 0; + + /// How many instructions produce vector values. + /// + /// The inliner is more aggressive with inlining vector kernels. + unsigned NumVectorInsts = 0; + + /// How many 'ret' instructions the blocks contain. + unsigned NumRets = 0; + + /// Add information about a block to the current state. + void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, + const SmallPtrSetImpl<const Value*> &EphValues); + + /// Collect a loop's ephemeral values (those used only by an assume + /// or similar intrinsics in the loop). + static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, + SmallPtrSetImpl<const Value *> &EphValues); + + /// Collect a functions's ephemeral values (those used only by an + /// assume or similar intrinsics in the function). + static void collectEphemeralValues(const Function *L, AssumptionCache *AC, + SmallPtrSetImpl<const Value *> &EphValues); +}; + +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/ConstantFolding.h b/clang-r353983e/include/llvm/Analysis/ConstantFolding.h new file mode 100644 index 00000000..43a2df0d --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ConstantFolding.h @@ -0,0 +1,159 @@ +//===-- ConstantFolding.h - Fold instructions into constants ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares routines for folding instructions into constants when all +// operands are constants, for example "sub i32 1, 0" -> "1". +// +// Also, to supplement the basic VMCore ConstantExpr simplifications, +// this file declares some additional folding routines that can make use of +// DataLayout information. These functions cannot go in VMCore due to library +// dependency issues. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CONSTANTFOLDING_H +#define LLVM_ANALYSIS_CONSTANTFOLDING_H + +namespace llvm { +class APInt; +template <typename T> class ArrayRef; +class CallBase; +class Constant; +class ConstantExpr; +class ConstantVector; +class DataLayout; +class Function; +class GlobalValue; +class Instruction; +class TargetLibraryInfo; +class Type; + +/// If this constant is a constant offset from a global, return the global and +/// the constant. Because of constantexprs, this function is recursive. +bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, + const DataLayout &DL); + +/// ConstantFoldInstruction - Try to constant fold the specified instruction. +/// If successful, the constant result is returned, if not, null is returned. +/// Note that this fails if not all of the operands are constant. Otherwise, +/// this function can only fail when attempting to fold instructions like loads +/// and stores, which have no constant expression form. +Constant *ConstantFoldInstruction(Instruction *I, const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr); + +/// ConstantFoldConstant - Attempt to fold the constant using the +/// specified DataLayout. +/// If successful, the constant result is returned, if not, null is returned. +Constant *ConstantFoldConstant(const Constant *C, const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr); + +/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the +/// specified operands. If successful, the constant result is returned, if not, +/// null is returned. Note that this function can fail when attempting to +/// fold instructions like loads and stores, which have no constant expression +/// form. +/// +Constant *ConstantFoldInstOperands(Instruction *I, ArrayRef<Constant *> Ops, + const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr); + +/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare +/// instruction (icmp/fcmp) with the specified operands. If it fails, it +/// returns a constant expression of the specified operands. +/// +Constant * +ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, + Constant *RHS, const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr); + +/// Attempt to constant fold a binary operation with the specified +/// operands. If it fails, it returns a constant expression of the specified +/// operands. +Constant *ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, + Constant *RHS, const DataLayout &DL); + +/// Attempt to constant fold a select instruction with the specified +/// operands. The constant result is returned if successful; if not, null is +/// returned. +Constant *ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, + Constant *V2); + +/// Attempt to constant fold a cast with the specified operand. If it +/// fails, it returns a constant expression of the specified operand. +Constant *ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, + const DataLayout &DL); + +/// ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue +/// instruction with the specified operands and indices. The constant result is +/// returned if successful; if not, null is returned. +Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, + ArrayRef<unsigned> Idxs); + +/// Attempt to constant fold an extractvalue instruction with the +/// specified operands and indices. The constant result is returned if +/// successful; if not, null is returned. +Constant *ConstantFoldExtractValueInstruction(Constant *Agg, + ArrayRef<unsigned> Idxs); + +/// Attempt to constant fold an insertelement instruction with the +/// specified operands and indices. The constant result is returned if +/// successful; if not, null is returned. +Constant *ConstantFoldInsertElementInstruction(Constant *Val, + Constant *Elt, + Constant *Idx); + +/// Attempt to constant fold an extractelement instruction with the +/// specified operands and indices. The constant result is returned if +/// successful; if not, null is returned. +Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx); + +/// Attempt to constant fold a shufflevector instruction with the +/// specified operands and indices. The constant result is returned if +/// successful; if not, null is returned. +Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, + Constant *Mask); + +/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would +/// produce if it is constant and determinable. If this is not determinable, +/// return null. +Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &DL); + +/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a +/// getelementptr constantexpr, return the constant value being addressed by the +/// constant expression, or null if something is funny and we can't decide. +Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE); + +/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr +/// indices (with an *implied* zero pointer index that is not in the list), +/// return the constant value being addressed by a virtual load, or null if +/// something is funny and we can't decide. +Constant *ConstantFoldLoadThroughGEPIndices(Constant *C, + ArrayRef<Constant *> Indices); + +/// canConstantFoldCallTo - Return true if its even possible to fold a call to +/// the specified function. +bool canConstantFoldCallTo(const CallBase *Call, const Function *F); + +/// ConstantFoldCall - Attempt to constant fold a call to the specified function +/// with the specified arguments, returning null if unsuccessful. +Constant *ConstantFoldCall(const CallBase *Call, Function *F, + ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI = nullptr); + +/// ConstantFoldLoadThroughBitcast - try to cast constant to destination type +/// returning null if unsuccessful. Can cast pointer to pointer or pointer to +/// integer and vice versa if their sizes are equal. +Constant *ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, + const DataLayout &DL); + +/// Check whether the given call has no side-effects. +/// Specifically checks for math routimes which sometimes set errno. +bool isMathLibCallNoop(const CallBase *Call, const TargetLibraryInfo *TLI); +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/DOTGraphTraitsPass.h b/clang-r353983e/include/llvm/Analysis/DOTGraphTraitsPass.h new file mode 100644 index 00000000..0410a331 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -0,0 +1,188 @@ +//===-- DOTGraphTraitsPass.h - Print/View dotty graphs-----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Templates to create dotty viewer and printer passes for GraphTraits graphs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DOTGRAPHTRAITSPASS_H +#define LLVM_ANALYSIS_DOTGRAPHTRAITSPASS_H + +#include "llvm/Analysis/CFGPrinter.h" +#include "llvm/Pass.h" +#include "llvm/Support/FileSystem.h" + +namespace llvm { + +/// Default traits class for extracting a graph from an analysis pass. +/// +/// This assumes that 'GraphT' is 'AnalysisT *' and so just passes it through. +template <typename AnalysisT, typename GraphT = AnalysisT *> +struct DefaultAnalysisGraphTraits { + static GraphT getGraph(AnalysisT *A) { return A; } +}; + +template < + typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> > +class DOTGraphTraitsViewer : public FunctionPass { +public: + DOTGraphTraitsViewer(StringRef GraphName, char &ID) + : FunctionPass(ID), Name(GraphName) {} + + /// Return true if this function should be processed. + /// + /// An implementation of this class my override this function to indicate that + /// only certain functions should be viewed. + /// + /// @param Analysis The current analysis result for this function. + virtual bool processFunction(Function &F, AnalysisT &Analysis) { + return true; + } + + bool runOnFunction(Function &F) override { + auto &Analysis = getAnalysis<AnalysisT>(); + + if (!processFunction(F, Analysis)) + return false; + + GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis); + std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph); + std::string Title = GraphName + " for '" + F.getName().str() + "' function"; + + ViewGraph(Graph, Name, IsSimple, Title); + + return false; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<AnalysisT>(); + } + +private: + std::string Name; +}; + +template < + typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> > +class DOTGraphTraitsPrinter : public FunctionPass { +public: + DOTGraphTraitsPrinter(StringRef GraphName, char &ID) + : FunctionPass(ID), Name(GraphName) {} + + /// Return true if this function should be processed. + /// + /// An implementation of this class my override this function to indicate that + /// only certain functions should be printed. + /// + /// @param Analysis The current analysis result for this function. + virtual bool processFunction(Function &F, AnalysisT &Analysis) { + return true; + } + + bool runOnFunction(Function &F) override { + auto &Analysis = getAnalysis<AnalysisT>(); + + if (!processFunction(F, Analysis)) + return false; + + GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis); + std::string Filename = Name + "." + F.getName().str() + ".dot"; + std::error_code EC; + + errs() << "Writing '" << Filename << "'..."; + + raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph); + std::string Title = GraphName + " for '" + F.getName().str() + "' function"; + + if (!EC) + WriteGraph(File, Graph, IsSimple, Title); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + + return false; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<AnalysisT>(); + } + +private: + std::string Name; +}; + +template < + typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> > +class DOTGraphTraitsModuleViewer : public ModulePass { +public: + DOTGraphTraitsModuleViewer(StringRef GraphName, char &ID) + : ModulePass(ID), Name(GraphName) {} + + bool runOnModule(Module &M) override { + GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>()); + std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph); + + ViewGraph(Graph, Name, IsSimple, Title); + + return false; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<AnalysisT>(); + } + +private: + std::string Name; +}; + +template < + typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> > +class DOTGraphTraitsModulePrinter : public ModulePass { +public: + DOTGraphTraitsModulePrinter(StringRef GraphName, char &ID) + : ModulePass(ID), Name(GraphName) {} + + bool runOnModule(Module &M) override { + GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>()); + std::string Filename = Name + ".dot"; + std::error_code EC; + + errs() << "Writing '" << Filename << "'..."; + + raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph); + + if (!EC) + WriteGraph(File, Graph, IsSimple, Title); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + + return false; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<AnalysisT>(); + } + +private: + std::string Name; +}; + +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/DemandedBits.h b/clang-r353983e/include/llvm/Analysis/DemandedBits.h new file mode 100644 index 00000000..04db3eb5 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/DemandedBits.h @@ -0,0 +1,135 @@ +//===- llvm/Analysis/DemandedBits.h - Determine demanded bits ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements a demanded bits analysis. A demanded bit is one that +// contributes to a result; bits that are not demanded can be either zero or +// one without affecting control or data flow. For example in this sequence: +// +// %1 = add i32 %x, %y +// %2 = trunc i32 %1 to i16 +// +// Only the lowest 16 bits of %1 are demanded; the rest are removed by the +// trunc. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DEMANDED_BITS_H +#define LLVM_ANALYSIS_DEMANDED_BITS_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +class AssumptionCache; +class DominatorTree; +class Function; +class Instruction; +struct KnownBits; +class raw_ostream; + +class DemandedBits { +public: + DemandedBits(Function &F, AssumptionCache &AC, DominatorTree &DT) : + F(F), AC(AC), DT(DT) {} + + /// Return the bits demanded from instruction I. + /// + /// For vector instructions individual vector elements are not distinguished: + /// A bit is demanded if it is demanded for any of the vector elements. The + /// size of the return value corresponds to the type size in bits of the + /// scalar type. + /// + /// Instructions that do not have integer or vector of integer type are + /// accepted, but will always produce a mask with all bits set. + APInt getDemandedBits(Instruction *I); + + /// Return true if, during analysis, I could not be reached. + bool isInstructionDead(Instruction *I); + + /// Return whether this use is dead by means of not having any demanded bits. + bool isUseDead(Use *U); + + void print(raw_ostream &OS); + +private: + void performAnalysis(); + void determineLiveOperandBits(const Instruction *UserI, + const Value *Val, unsigned OperandNo, + const APInt &AOut, APInt &AB, + KnownBits &Known, KnownBits &Known2, bool &KnownBitsComputed); + + Function &F; + AssumptionCache &AC; + DominatorTree &DT; + + bool Analyzed = false; + + // The set of visited instructions (non-integer-typed only). + SmallPtrSet<Instruction*, 32> Visited; + DenseMap<Instruction *, APInt> AliveBits; + // Uses with no demanded bits. If the user also has no demanded bits, the use + // might not be stored explicitly in this map, to save memory during analysis. + SmallPtrSet<Use *, 16> DeadUses; +}; + +class DemandedBitsWrapperPass : public FunctionPass { +private: + mutable Optional<DemandedBits> DB; + +public: + static char ID; // Pass identification, replacement for typeid + + DemandedBitsWrapperPass(); + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Clean up memory in between runs + void releaseMemory() override; + + DemandedBits &getDemandedBits() { return *DB; } + + void print(raw_ostream &OS, const Module *M) const override; +}; + +/// An analysis that produces \c DemandedBits for a function. +class DemandedBitsAnalysis : public AnalysisInfoMixin<DemandedBitsAnalysis> { + friend AnalysisInfoMixin<DemandedBitsAnalysis>; + + static AnalysisKey Key; + +public: + /// Provide the result type for this analysis pass. + using Result = DemandedBits; + + /// Run the analysis pass over a function and produce demanded bits + /// information. + DemandedBits run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass for DemandedBits +class DemandedBitsPrinterPass : public PassInfoMixin<DemandedBitsPrinterPass> { + raw_ostream &OS; + +public: + explicit DemandedBitsPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Create a demanded bits analysis pass. +FunctionPass *createDemandedBitsWrapperPass(); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_DEMANDED_BITS_H diff --git a/clang-r353983e/include/llvm/Analysis/DependenceAnalysis.h b/clang-r353983e/include/llvm/Analysis/DependenceAnalysis.h new file mode 100644 index 00000000..997013a5 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/DependenceAnalysis.h @@ -0,0 +1,978 @@ +//===-- llvm/Analysis/DependenceAnalysis.h -------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// DependenceAnalysis is an LLVM pass that analyses dependences between memory +// accesses. Currently, it is an implementation of the approach described in +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +// +// There's a single entry point that analyzes the dependence between a pair +// of memory references in a function, returning either NULL, for no dependence, +// or a more-or-less detailed description of the dependence between them. +// +// This pass exists to support the DependenceGraph pass. There are two separate +// passes because there's a useful separation of concerns. A dependence exists +// if two conditions are met: +// +// 1) Two instructions reference the same memory location, and +// 2) There is a flow of control leading from one instruction to the other. +// +// DependenceAnalysis attacks the first condition; DependenceGraph will attack +// the second (it's not yet ready). +// +// Please note that this is work in progress and the interface is subject to +// change. +// +// Plausible changes: +// Return a set of more precise dependences instead of just one dependence +// summarizing all. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DEPENDENCEANALYSIS_H +#define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H + +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" + +namespace llvm { +template <typename T> class ArrayRef; + class Loop; + class LoopInfo; + class ScalarEvolution; + class SCEV; + class SCEVConstant; + class raw_ostream; + + /// Dependence - This class represents a dependence between two memory + /// memory references in a function. It contains minimal information and + /// is used in the very common situation where the compiler is unable to + /// determine anything beyond the existence of a dependence; that is, it + /// represents a confused dependence (see also FullDependence). In most + /// cases (for output, flow, and anti dependences), the dependence implies + /// an ordering, where the source must precede the destination; in contrast, + /// input dependences are unordered. + /// + /// When a dependence graph is built, each Dependence will be a member of + /// the set of predecessor edges for its destination instruction and a set + /// if successor edges for its source instruction. These sets are represented + /// as singly-linked lists, with the "next" fields stored in the dependence + /// itelf. + class Dependence { + protected: + Dependence(Dependence &&) = default; + Dependence &operator=(Dependence &&) = default; + + public: + Dependence(Instruction *Source, + Instruction *Destination) : + Src(Source), + Dst(Destination), + NextPredecessor(nullptr), + NextSuccessor(nullptr) {} + virtual ~Dependence() {} + + /// Dependence::DVEntry - Each level in the distance/direction vector + /// has a direction (or perhaps a union of several directions), and + /// perhaps a distance. + struct DVEntry { + enum { NONE = 0, + LT = 1, + EQ = 2, + LE = 3, + GT = 4, + NE = 5, + GE = 6, + ALL = 7 }; + unsigned char Direction : 3; // Init to ALL, then refine. + bool Scalar : 1; // Init to true. + bool PeelFirst : 1; // Peeling the first iteration will break dependence. + bool PeelLast : 1; // Peeling the last iteration will break the dependence. + bool Splitable : 1; // Splitting the loop will break dependence. + const SCEV *Distance; // NULL implies no distance available. + DVEntry() : Direction(ALL), Scalar(true), PeelFirst(false), + PeelLast(false), Splitable(false), Distance(nullptr) { } + }; + + /// getSrc - Returns the source instruction for this dependence. + /// + Instruction *getSrc() const { return Src; } + + /// getDst - Returns the destination instruction for this dependence. + /// + Instruction *getDst() const { return Dst; } + + /// isInput - Returns true if this is an input dependence. + /// + bool isInput() const; + + /// isOutput - Returns true if this is an output dependence. + /// + bool isOutput() const; + + /// isFlow - Returns true if this is a flow (aka true) dependence. + /// + bool isFlow() const; + + /// isAnti - Returns true if this is an anti dependence. + /// + bool isAnti() const; + + /// isOrdered - Returns true if dependence is Output, Flow, or Anti + /// + bool isOrdered() const { return isOutput() || isFlow() || isAnti(); } + + /// isUnordered - Returns true if dependence is Input + /// + bool isUnordered() const { return isInput(); } + + /// isLoopIndependent - Returns true if this is a loop-independent + /// dependence. + virtual bool isLoopIndependent() const { return true; } + + /// isConfused - Returns true if this dependence is confused + /// (the compiler understands nothing and makes worst-case + /// assumptions). + virtual bool isConfused() const { return true; } + + /// isConsistent - Returns true if this dependence is consistent + /// (occurs every time the source and destination are executed). + virtual bool isConsistent() const { return false; } + + /// getLevels - Returns the number of common loops surrounding the + /// source and destination of the dependence. + virtual unsigned getLevels() const { return 0; } + + /// getDirection - Returns the direction associated with a particular + /// level. + virtual unsigned getDirection(unsigned Level) const { return DVEntry::ALL; } + + /// getDistance - Returns the distance (or NULL) associated with a + /// particular level. + virtual const SCEV *getDistance(unsigned Level) const { return nullptr; } + + /// isPeelFirst - Returns true if peeling the first iteration from + /// this loop will break this dependence. + virtual bool isPeelFirst(unsigned Level) const { return false; } + + /// isPeelLast - Returns true if peeling the last iteration from + /// this loop will break this dependence. + virtual bool isPeelLast(unsigned Level) const { return false; } + + /// isSplitable - Returns true if splitting this loop will break + /// the dependence. + virtual bool isSplitable(unsigned Level) const { return false; } + + /// isScalar - Returns true if a particular level is scalar; that is, + /// if no subscript in the source or destination mention the induction + /// variable associated with the loop at this level. + virtual bool isScalar(unsigned Level) const; + + /// getNextPredecessor - Returns the value of the NextPredecessor + /// field. + const Dependence *getNextPredecessor() const { return NextPredecessor; } + + /// getNextSuccessor - Returns the value of the NextSuccessor + /// field. + const Dependence *getNextSuccessor() const { return NextSuccessor; } + + /// setNextPredecessor - Sets the value of the NextPredecessor + /// field. + void setNextPredecessor(const Dependence *pred) { NextPredecessor = pred; } + + /// setNextSuccessor - Sets the value of the NextSuccessor + /// field. + void setNextSuccessor(const Dependence *succ) { NextSuccessor = succ; } + + /// dump - For debugging purposes, dumps a dependence to OS. + /// + void dump(raw_ostream &OS) const; + + private: + Instruction *Src, *Dst; + const Dependence *NextPredecessor, *NextSuccessor; + friend class DependenceInfo; + }; + + /// FullDependence - This class represents a dependence between two memory + /// references in a function. It contains detailed information about the + /// dependence (direction vectors, etc.) and is used when the compiler is + /// able to accurately analyze the interaction of the references; that is, + /// it is not a confused dependence (see Dependence). In most cases + /// (for output, flow, and anti dependences), the dependence implies an + /// ordering, where the source must precede the destination; in contrast, + /// input dependences are unordered. + class FullDependence final : public Dependence { + public: + FullDependence(Instruction *Src, Instruction *Dst, bool LoopIndependent, + unsigned Levels); + + /// isLoopIndependent - Returns true if this is a loop-independent + /// dependence. + bool isLoopIndependent() const override { return LoopIndependent; } + + /// isConfused - Returns true if this dependence is confused + /// (the compiler understands nothing and makes worst-case + /// assumptions). + bool isConfused() const override { return false; } + + /// isConsistent - Returns true if this dependence is consistent + /// (occurs every time the source and destination are executed). + bool isConsistent() const override { return Consistent; } + + /// getLevels - Returns the number of common loops surrounding the + /// source and destination of the dependence. + unsigned getLevels() const override { return Levels; } + + /// getDirection - Returns the direction associated with a particular + /// level. + unsigned getDirection(unsigned Level) const override; + + /// getDistance - Returns the distance (or NULL) associated with a + /// particular level. + const SCEV *getDistance(unsigned Level) const override; + + /// isPeelFirst - Returns true if peeling the first iteration from + /// this loop will break this dependence. + bool isPeelFirst(unsigned Level) const override; + + /// isPeelLast - Returns true if peeling the last iteration from + /// this loop will break this dependence. + bool isPeelLast(unsigned Level) const override; + + /// isSplitable - Returns true if splitting the loop will break + /// the dependence. + bool isSplitable(unsigned Level) const override; + + /// isScalar - Returns true if a particular level is scalar; that is, + /// if no subscript in the source or destination mention the induction + /// variable associated with the loop at this level. + bool isScalar(unsigned Level) const override; + + private: + unsigned short Levels; + bool LoopIndependent; + bool Consistent; // Init to true, then refine. + std::unique_ptr<DVEntry[]> DV; + friend class DependenceInfo; + }; + + /// DependenceInfo - This class is the main dependence-analysis driver. + /// + class DependenceInfo { + public: + DependenceInfo(Function *F, AliasAnalysis *AA, ScalarEvolution *SE, + LoopInfo *LI) + : AA(AA), SE(SE), LI(LI), F(F) {} + + /// Handle transitive invalidation when the cached analysis results go away. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + + /// depends - Tests for a dependence between the Src and Dst instructions. + /// Returns NULL if no dependence; otherwise, returns a Dependence (or a + /// FullDependence) with as much information as can be gleaned. + /// The flag PossiblyLoopIndependent should be set by the caller + /// if it appears that control flow can reach from Src to Dst + /// without traversing a loop back edge. + std::unique_ptr<Dependence> depends(Instruction *Src, + Instruction *Dst, + bool PossiblyLoopIndependent); + + /// getSplitIteration - Give a dependence that's splittable at some + /// particular level, return the iteration that should be used to split + /// the loop. + /// + /// Generally, the dependence analyzer will be used to build + /// a dependence graph for a function (basically a map from instructions + /// to dependences). Looking for cycles in the graph shows us loops + /// that cannot be trivially vectorized/parallelized. + /// + /// We can try to improve the situation by examining all the dependences + /// that make up the cycle, looking for ones we can break. + /// Sometimes, peeling the first or last iteration of a loop will break + /// dependences, and there are flags for those possibilities. + /// Sometimes, splitting a loop at some other iteration will do the trick, + /// and we've got a flag for that case. Rather than waste the space to + /// record the exact iteration (since we rarely know), we provide + /// a method that calculates the iteration. It's a drag that it must work + /// from scratch, but wonderful in that it's possible. + /// + /// Here's an example: + /// + /// for (i = 0; i < 10; i++) + /// A[i] = ... + /// ... = A[11 - i] + /// + /// There's a loop-carried flow dependence from the store to the load, + /// found by the weak-crossing SIV test. The dependence will have a flag, + /// indicating that the dependence can be broken by splitting the loop. + /// Calling getSplitIteration will return 5. + /// Splitting the loop breaks the dependence, like so: + /// + /// for (i = 0; i <= 5; i++) + /// A[i] = ... + /// ... = A[11 - i] + /// for (i = 6; i < 10; i++) + /// A[i] = ... + /// ... = A[11 - i] + /// + /// breaks the dependence and allows us to vectorize/parallelize + /// both loops. + const SCEV *getSplitIteration(const Dependence &Dep, unsigned Level); + + Function *getFunction() const { return F; } + + private: + AliasAnalysis *AA; + ScalarEvolution *SE; + LoopInfo *LI; + Function *F; + + /// Subscript - This private struct represents a pair of subscripts from + /// a pair of potentially multi-dimensional array references. We use a + /// vector of them to guide subscript partitioning. + struct Subscript { + const SCEV *Src; + const SCEV *Dst; + enum ClassificationKind { ZIV, SIV, RDIV, MIV, NonLinear } Classification; + SmallBitVector Loops; + SmallBitVector GroupLoops; + SmallBitVector Group; + }; + + struct CoefficientInfo { + const SCEV *Coeff; + const SCEV *PosPart; + const SCEV *NegPart; + const SCEV *Iterations; + }; + + struct BoundInfo { + const SCEV *Iterations; + const SCEV *Upper[8]; + const SCEV *Lower[8]; + unsigned char Direction; + unsigned char DirSet; + }; + + /// Constraint - This private class represents a constraint, as defined + /// in the paper + /// + /// Practical Dependence Testing + /// Goff, Kennedy, Tseng + /// PLDI 1991 + /// + /// There are 5 kinds of constraint, in a hierarchy. + /// 1) Any - indicates no constraint, any dependence is possible. + /// 2) Line - A line ax + by = c, where a, b, and c are parameters, + /// representing the dependence equation. + /// 3) Distance - The value d of the dependence distance; + /// 4) Point - A point <x, y> representing the dependence from + /// iteration x to iteration y. + /// 5) Empty - No dependence is possible. + class Constraint { + private: + enum ConstraintKind { Empty, Point, Distance, Line, Any } Kind; + ScalarEvolution *SE; + const SCEV *A; + const SCEV *B; + const SCEV *C; + const Loop *AssociatedLoop; + + public: + /// isEmpty - Return true if the constraint is of kind Empty. + bool isEmpty() const { return Kind == Empty; } + + /// isPoint - Return true if the constraint is of kind Point. + bool isPoint() const { return Kind == Point; } + + /// isDistance - Return true if the constraint is of kind Distance. + bool isDistance() const { return Kind == Distance; } + + /// isLine - Return true if the constraint is of kind Line. + /// Since Distance's can also be represented as Lines, we also return + /// true if the constraint is of kind Distance. + bool isLine() const { return Kind == Line || Kind == Distance; } + + /// isAny - Return true if the constraint is of kind Any; + bool isAny() const { return Kind == Any; } + + /// getX - If constraint is a point <X, Y>, returns X. + /// Otherwise assert. + const SCEV *getX() const; + + /// getY - If constraint is a point <X, Y>, returns Y. + /// Otherwise assert. + const SCEV *getY() const; + + /// getA - If constraint is a line AX + BY = C, returns A. + /// Otherwise assert. + const SCEV *getA() const; + + /// getB - If constraint is a line AX + BY = C, returns B. + /// Otherwise assert. + const SCEV *getB() const; + + /// getC - If constraint is a line AX + BY = C, returns C. + /// Otherwise assert. + const SCEV *getC() const; + + /// getD - If constraint is a distance, returns D. + /// Otherwise assert. + const SCEV *getD() const; + + /// getAssociatedLoop - Returns the loop associated with this constraint. + const Loop *getAssociatedLoop() const; + + /// setPoint - Change a constraint to Point. + void setPoint(const SCEV *X, const SCEV *Y, const Loop *CurrentLoop); + + /// setLine - Change a constraint to Line. + void setLine(const SCEV *A, const SCEV *B, + const SCEV *C, const Loop *CurrentLoop); + + /// setDistance - Change a constraint to Distance. + void setDistance(const SCEV *D, const Loop *CurrentLoop); + + /// setEmpty - Change a constraint to Empty. + void setEmpty(); + + /// setAny - Change a constraint to Any. + void setAny(ScalarEvolution *SE); + + /// dump - For debugging purposes. Dumps the constraint + /// out to OS. + void dump(raw_ostream &OS) const; + }; + + /// establishNestingLevels - Examines the loop nesting of the Src and Dst + /// instructions and establishes their shared loops. Sets the variables + /// CommonLevels, SrcLevels, and MaxLevels. + /// The source and destination instructions needn't be contained in the same + /// loop. The routine establishNestingLevels finds the level of most deeply + /// nested loop that contains them both, CommonLevels. An instruction that's + /// not contained in a loop is at level = 0. MaxLevels is equal to the level + /// of the source plus the level of the destination, minus CommonLevels. + /// This lets us allocate vectors MaxLevels in length, with room for every + /// distinct loop referenced in both the source and destination subscripts. + /// The variable SrcLevels is the nesting depth of the source instruction. + /// It's used to help calculate distinct loops referenced by the destination. + /// Here's the map from loops to levels: + /// 0 - unused + /// 1 - outermost common loop + /// ... - other common loops + /// CommonLevels - innermost common loop + /// ... - loops containing Src but not Dst + /// SrcLevels - innermost loop containing Src but not Dst + /// ... - loops containing Dst but not Src + /// MaxLevels - innermost loop containing Dst but not Src + /// Consider the follow code fragment: + /// for (a = ...) { + /// for (b = ...) { + /// for (c = ...) { + /// for (d = ...) { + /// A[] = ...; + /// } + /// } + /// for (e = ...) { + /// for (f = ...) { + /// for (g = ...) { + /// ... = A[]; + /// } + /// } + /// } + /// } + /// } + /// If we're looking at the possibility of a dependence between the store + /// to A (the Src) and the load from A (the Dst), we'll note that they + /// have 2 loops in common, so CommonLevels will equal 2 and the direction + /// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. + /// A map from loop names to level indices would look like + /// a - 1 + /// b - 2 = CommonLevels + /// c - 3 + /// d - 4 = SrcLevels + /// e - 5 + /// f - 6 + /// g - 7 = MaxLevels + void establishNestingLevels(const Instruction *Src, + const Instruction *Dst); + + unsigned CommonLevels, SrcLevels, MaxLevels; + + /// mapSrcLoop - Given one of the loops containing the source, return + /// its level index in our numbering scheme. + unsigned mapSrcLoop(const Loop *SrcLoop) const; + + /// mapDstLoop - Given one of the loops containing the destination, + /// return its level index in our numbering scheme. + unsigned mapDstLoop(const Loop *DstLoop) const; + + /// isLoopInvariant - Returns true if Expression is loop invariant + /// in LoopNest. + bool isLoopInvariant(const SCEV *Expression, const Loop *LoopNest) const; + + /// Makes sure all subscript pairs share the same integer type by + /// sign-extending as necessary. + /// Sign-extending a subscript is safe because getelementptr assumes the + /// array subscripts are signed. + void unifySubscriptType(ArrayRef<Subscript *> Pairs); + + /// removeMatchingExtensions - Examines a subscript pair. + /// If the source and destination are identically sign (or zero) + /// extended, it strips off the extension in an effort to + /// simplify the actual analysis. + void removeMatchingExtensions(Subscript *Pair); + + /// collectCommonLoops - Finds the set of loops from the LoopNest that + /// have a level <= CommonLevels and are referred to by the SCEV Expression. + void collectCommonLoops(const SCEV *Expression, + const Loop *LoopNest, + SmallBitVector &Loops) const; + + /// checkSrcSubscript - Examines the SCEV Src, returning true iff it's + /// linear. Collect the set of loops mentioned by Src. + bool checkSrcSubscript(const SCEV *Src, + const Loop *LoopNest, + SmallBitVector &Loops); + + /// checkDstSubscript - Examines the SCEV Dst, returning true iff it's + /// linear. Collect the set of loops mentioned by Dst. + bool checkDstSubscript(const SCEV *Dst, + const Loop *LoopNest, + SmallBitVector &Loops); + + /// isKnownPredicate - Compare X and Y using the predicate Pred. + /// Basically a wrapper for SCEV::isKnownPredicate, + /// but tries harder, especially in the presence of sign and zero + /// extensions and symbolics. + bool isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *X, + const SCEV *Y) const; + + /// isKnownLessThan - Compare to see if S is less than Size + /// Another wrapper for isKnownNegative(S - max(Size, 1)) with some extra + /// checking if S is an AddRec and we can prove lessthan using the loop + /// bounds. + bool isKnownLessThan(const SCEV *S, const SCEV *Size) const; + + /// isKnownNonNegative - Compare to see if S is known not to be negative + /// Uses the fact that S comes from Ptr, which may be an inbound GEP, + /// Proving there is no wrapping going on. + bool isKnownNonNegative(const SCEV *S, const Value *Ptr) const; + + /// collectUpperBound - All subscripts are the same type (on my machine, + /// an i64). The loop bound may be a smaller type. collectUpperBound + /// find the bound, if available, and zero extends it to the Type T. + /// (I zero extend since the bound should always be >= 0.) + /// If no upper bound is available, return NULL. + const SCEV *collectUpperBound(const Loop *l, Type *T) const; + + /// collectConstantUpperBound - Calls collectUpperBound(), then + /// attempts to cast it to SCEVConstant. If the cast fails, + /// returns NULL. + const SCEVConstant *collectConstantUpperBound(const Loop *l, Type *T) const; + + /// classifyPair - Examines the subscript pair (the Src and Dst SCEVs) + /// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. + /// Collects the associated loops in a set. + Subscript::ClassificationKind classifyPair(const SCEV *Src, + const Loop *SrcLoopNest, + const SCEV *Dst, + const Loop *DstLoopNest, + SmallBitVector &Loops); + + /// testZIV - Tests the ZIV subscript pair (Src and Dst) for dependence. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// If the dependence isn't proven to exist, + /// marks the Result as inconsistent. + bool testZIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const; + + /// testSIV - Tests the SIV subscript pair (Src and Dst) for dependence. + /// Things of the form [c1 + a1*i] and [c2 + a2*j], where + /// i and j are induction variables, c1 and c2 are loop invariant, + /// and a1 and a2 are constant. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction vector entry and, when possible, + /// the distance vector entry. + /// If the dependence isn't proven to exist, + /// marks the Result as inconsistent. + bool testSIV(const SCEV *Src, + const SCEV *Dst, + unsigned &Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const; + + /// testRDIV - Tests the RDIV subscript pair (Src and Dst) for dependence. + /// Things of the form [c1 + a1*i] and [c2 + a2*j] + /// where i and j are induction variables, c1 and c2 are loop invariant, + /// and a1 and a2 are constant. + /// With minor algebra, this test can also be used for things like + /// [c1 + a1*i + a2*j][c2]. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Marks the Result as inconsistent. + bool testRDIV(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const; + + /// testMIV - Tests the MIV subscript pair (Src and Dst) for dependence. + /// Returns true if dependence disproved. + /// Can sometimes refine direction vectors. + bool testMIV(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const; + + /// strongSIVtest - Tests the strong SIV subscript pair (Src and Dst) + /// for dependence. + /// Things of the form [c1 + a*i] and [c2 + a*i], + /// where i is an induction variable, c1 and c2 are loop invariant, + /// and a is a constant + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction and distance. + bool strongSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurrentLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const; + + /// weakCrossingSIVtest - Tests the weak-crossing SIV subscript pair + /// (Src and Dst) for dependence. + /// Things of the form [c1 + a*i] and [c2 - a*i], + /// where i is an induction variable, c1 and c2 are loop invariant, + /// and a is a constant. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction entry. + /// Set consistent to false. + /// Marks the dependence as splitable. + bool weakCrossingSIVtest(const SCEV *SrcCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurrentLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint, + const SCEV *&SplitIter) const; + + /// ExactSIVtest - Tests the SIV subscript pair + /// (Src and Dst) for dependence. + /// Things of the form [c1 + a1*i] and [c2 + a2*i], + /// where i is an induction variable, c1 and c2 are loop invariant, + /// and a1 and a2 are constant. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction entry. + /// Set consistent to false. + bool exactSIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurrentLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const; + + /// weakZeroSrcSIVtest - Tests the weak-zero SIV subscript pair + /// (Src and Dst) for dependence. + /// Things of the form [c1] and [c2 + a*i], + /// where i is an induction variable, c1 and c2 are loop invariant, + /// and a is a constant. See also weakZeroDstSIVtest. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction entry. + /// Set consistent to false. + /// If loop peeling will break the dependence, mark appropriately. + bool weakZeroSrcSIVtest(const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurrentLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const; + + /// weakZeroDstSIVtest - Tests the weak-zero SIV subscript pair + /// (Src and Dst) for dependence. + /// Things of the form [c1 + a*i] and [c2], + /// where i is an induction variable, c1 and c2 are loop invariant, + /// and a is a constant. See also weakZeroSrcSIVtest. + /// Returns true if any possible dependence is disproved. + /// If there might be a dependence, returns false. + /// Sets appropriate direction entry. + /// Set consistent to false. + /// If loop peeling will break the dependence, mark appropriately. + bool weakZeroDstSIVtest(const SCEV *SrcCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurrentLoop, + unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const; + + /// exactRDIVtest - Tests the RDIV subscript pair for dependence. + /// Things of the form [c1 + a*i] and [c2 + b*j], + /// where i and j are induction variable, c1 and c2 are loop invariant, + /// and a and b are constants. + /// Returns true if any possible dependence is disproved. + /// Marks the result as inconsistent. + /// Works in some cases that symbolicRDIVtest doesn't, + /// and vice versa. + bool exactRDIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *SrcLoop, + const Loop *DstLoop, + FullDependence &Result) const; + + /// symbolicRDIVtest - Tests the RDIV subscript pair for dependence. + /// Things of the form [c1 + a*i] and [c2 + b*j], + /// where i and j are induction variable, c1 and c2 are loop invariant, + /// and a and b are constants. + /// Returns true if any possible dependence is disproved. + /// Marks the result as inconsistent. + /// Works in some cases that exactRDIVtest doesn't, + /// and vice versa. Can also be used as a backup for + /// ordinary SIV tests. + bool symbolicRDIVtest(const SCEV *SrcCoeff, + const SCEV *DstCoeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *SrcLoop, + const Loop *DstLoop) const; + + /// gcdMIVtest - Tests an MIV subscript pair for dependence. + /// Returns true if any possible dependence is disproved. + /// Marks the result as inconsistent. + /// Can sometimes disprove the equal direction for 1 or more loops. + // Can handle some symbolics that even the SIV tests don't get, + /// so we use it as a backup for everything. + bool gcdMIVtest(const SCEV *Src, + const SCEV *Dst, + FullDependence &Result) const; + + /// banerjeeMIVtest - Tests an MIV subscript pair for dependence. + /// Returns true if any possible dependence is disproved. + /// Marks the result as inconsistent. + /// Computes directions. + bool banerjeeMIVtest(const SCEV *Src, + const SCEV *Dst, + const SmallBitVector &Loops, + FullDependence &Result) const; + + /// collectCoefficientInfo - Walks through the subscript, + /// collecting each coefficient, the associated loop bounds, + /// and recording its positive and negative parts for later use. + CoefficientInfo *collectCoeffInfo(const SCEV *Subscript, + bool SrcFlag, + const SCEV *&Constant) const; + + /// getPositivePart - X^+ = max(X, 0). + /// + const SCEV *getPositivePart(const SCEV *X) const; + + /// getNegativePart - X^- = min(X, 0). + /// + const SCEV *getNegativePart(const SCEV *X) const; + + /// getLowerBound - Looks through all the bounds info and + /// computes the lower bound given the current direction settings + /// at each level. + const SCEV *getLowerBound(BoundInfo *Bound) const; + + /// getUpperBound - Looks through all the bounds info and + /// computes the upper bound given the current direction settings + /// at each level. + const SCEV *getUpperBound(BoundInfo *Bound) const; + + /// exploreDirections - Hierarchically expands the direction vector + /// search space, combining the directions of discovered dependences + /// in the DirSet field of Bound. Returns the number of distinct + /// dependences discovered. If the dependence is disproved, + /// it will return 0. + unsigned exploreDirections(unsigned Level, + CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + const SmallBitVector &Loops, + unsigned &DepthExpanded, + const SCEV *Delta) const; + + /// testBounds - Returns true iff the current bounds are plausible. + bool testBounds(unsigned char DirKind, + unsigned Level, + BoundInfo *Bound, + const SCEV *Delta) const; + + /// findBoundsALL - Computes the upper and lower bounds for level K + /// using the * direction. Records them in Bound. + void findBoundsALL(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const; + + /// findBoundsLT - Computes the upper and lower bounds for level K + /// using the < direction. Records them in Bound. + void findBoundsLT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const; + + /// findBoundsGT - Computes the upper and lower bounds for level K + /// using the > direction. Records them in Bound. + void findBoundsGT(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const; + + /// findBoundsEQ - Computes the upper and lower bounds for level K + /// using the = direction. Records them in Bound. + void findBoundsEQ(CoefficientInfo *A, + CoefficientInfo *B, + BoundInfo *Bound, + unsigned K) const; + + /// intersectConstraints - Updates X with the intersection + /// of the Constraints X and Y. Returns true if X has changed. + bool intersectConstraints(Constraint *X, + const Constraint *Y); + + /// propagate - Review the constraints, looking for opportunities + /// to simplify a subscript pair (Src and Dst). + /// Return true if some simplification occurs. + /// If the simplification isn't exact (that is, if it is conservative + /// in terms of dependence), set consistent to false. + bool propagate(const SCEV *&Src, + const SCEV *&Dst, + SmallBitVector &Loops, + SmallVectorImpl<Constraint> &Constraints, + bool &Consistent); + + /// propagateDistance - Attempt to propagate a distance + /// constraint into a subscript pair (Src and Dst). + /// Return true if some simplification occurs. + /// If the simplification isn't exact (that is, if it is conservative + /// in terms of dependence), set consistent to false. + bool propagateDistance(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent); + + /// propagatePoint - Attempt to propagate a point + /// constraint into a subscript pair (Src and Dst). + /// Return true if some simplification occurs. + bool propagatePoint(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint); + + /// propagateLine - Attempt to propagate a line + /// constraint into a subscript pair (Src and Dst). + /// Return true if some simplification occurs. + /// If the simplification isn't exact (that is, if it is conservative + /// in terms of dependence), set consistent to false. + bool propagateLine(const SCEV *&Src, + const SCEV *&Dst, + Constraint &CurConstraint, + bool &Consistent); + + /// findCoefficient - Given a linear SCEV, + /// return the coefficient corresponding to specified loop. + /// If there isn't one, return the SCEV constant 0. + /// For example, given a*i + b*j + c*k, returning the coefficient + /// corresponding to the j loop would yield b. + const SCEV *findCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const; + + /// zeroCoefficient - Given a linear SCEV, + /// return the SCEV given by zeroing out the coefficient + /// corresponding to the specified loop. + /// For example, given a*i + b*j + c*k, zeroing the coefficient + /// corresponding to the j loop would yield a*i + c*k. + const SCEV *zeroCoefficient(const SCEV *Expr, + const Loop *TargetLoop) const; + + /// addToCoefficient - Given a linear SCEV Expr, + /// return the SCEV given by adding some Value to the + /// coefficient corresponding to the specified TargetLoop. + /// For example, given a*i + b*j + c*k, adding 1 to the coefficient + /// corresponding to the j loop would yield a*i + (b+1)*j + c*k. + const SCEV *addToCoefficient(const SCEV *Expr, + const Loop *TargetLoop, + const SCEV *Value) const; + + /// updateDirection - Update direction vector entry + /// based on the current constraint. + void updateDirection(Dependence::DVEntry &Level, + const Constraint &CurConstraint) const; + + bool tryDelinearize(Instruction *Src, Instruction *Dst, + SmallVectorImpl<Subscript> &Pair); + }; // class DependenceInfo + + /// AnalysisPass to compute dependence information in a function + class DependenceAnalysis : public AnalysisInfoMixin<DependenceAnalysis> { + public: + typedef DependenceInfo Result; + Result run(Function &F, FunctionAnalysisManager &FAM); + + private: + static AnalysisKey Key; + friend struct AnalysisInfoMixin<DependenceAnalysis>; + }; // class DependenceAnalysis + + /// Printer pass to dump DA results. + struct DependenceAnalysisPrinterPass + : public PassInfoMixin<DependenceAnalysisPrinterPass> { + DependenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + + private: + raw_ostream &OS; + }; // class DependenceAnalysisPrinterPass + + /// Legacy pass manager pass to access dependence information + class DependenceAnalysisWrapperPass : public FunctionPass { + public: + static char ID; // Class identification, replacement for typeinfo + DependenceAnalysisWrapperPass() : FunctionPass(ID) { + initializeDependenceAnalysisWrapperPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &) const override; + void print(raw_ostream &, const Module * = nullptr) const override; + DependenceInfo &getDI() const; + + private: + std::unique_ptr<DependenceInfo> info; + }; // class DependenceAnalysisWrapperPass + + /// createDependenceAnalysisPass - This creates an instance of the + /// DependenceAnalysis wrapper pass. + FunctionPass *createDependenceAnalysisWrapperPass(); + +} // namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/DivergenceAnalysis.h b/clang-r353983e/include/llvm/Analysis/DivergenceAnalysis.h new file mode 100644 index 00000000..3cfb9d13 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/DivergenceAnalysis.h @@ -0,0 +1,204 @@ +//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// The divergence analysis determines which instructions and branches are +// divergent given a set of divergent source instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H +#define LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/SyncDependenceAnalysis.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" +#include <vector> + +namespace llvm { +class Module; +class Value; +class Instruction; +class Loop; +class raw_ostream; +class TargetTransformInfo; + +/// \brief Generic divergence analysis for reducible CFGs. +/// +/// This analysis propagates divergence in a data-parallel context from sources +/// of divergence to all users. It requires reducible CFGs. All assignments +/// should be in SSA form. +class DivergenceAnalysis { +public: + /// \brief This instance will analyze the whole function \p F or the loop \p + /// RegionLoop. + /// + /// \param RegionLoop if non-null the analysis is restricted to \p RegionLoop. + /// Otherwise the whole function is analyzed. + /// \param IsLCSSAForm whether the analysis may assume that the IR in the + /// region in in LCSSA form. + DivergenceAnalysis(const Function &F, const Loop *RegionLoop, + const DominatorTree &DT, const LoopInfo &LI, + SyncDependenceAnalysis &SDA, bool IsLCSSAForm); + + /// \brief The loop that defines the analyzed region (if any). + const Loop *getRegionLoop() const { return RegionLoop; } + const Function &getFunction() const { return F; } + + /// \brief Whether \p BB is part of the region. + bool inRegion(const BasicBlock &BB) const; + /// \brief Whether \p I is part of the region. + bool inRegion(const Instruction &I) const; + + /// \brief Mark \p UniVal as a value that is always uniform. + void addUniformOverride(const Value &UniVal); + + /// \brief Mark \p DivVal as a value that is always divergent. + void markDivergent(const Value &DivVal); + + /// \brief Propagate divergence to all instructions in the region. + /// Divergence is seeded by calls to \p markDivergent. + void compute(); + + /// \brief Whether any value was marked or analyzed to be divergent. + bool hasDetectedDivergence() const { return !DivergentValues.empty(); } + + /// \brief Whether \p Val will always return a uniform value regardless of its + /// operands + bool isAlwaysUniform(const Value &Val) const; + + /// \brief Whether \p Val is a divergent value + bool isDivergent(const Value &Val) const; + + void print(raw_ostream &OS, const Module *) const; + +private: + bool updateTerminator(const Instruction &Term) const; + bool updatePHINode(const PHINode &Phi) const; + + /// \brief Computes whether \p Inst is divergent based on the + /// divergence of its operands. + /// + /// \returns Whether \p Inst is divergent. + /// + /// This should only be called for non-phi, non-terminator instructions. + bool updateNormalInstruction(const Instruction &Inst) const; + + /// \brief Mark users of live-out users as divergent. + /// + /// \param LoopHeader the header of the divergent loop. + /// + /// Marks all users of live-out values of the loop headed by \p LoopHeader + /// as divergent and puts them on the worklist. + void taintLoopLiveOuts(const BasicBlock &LoopHeader); + + /// \brief Push all users of \p Val (in the region) to the worklist + void pushUsers(const Value &I); + + /// \brief Push all phi nodes in @block to the worklist + void pushPHINodes(const BasicBlock &Block); + + /// \brief Mark \p Block as join divergent + /// + /// A block is join divergent if two threads may reach it from different + /// incoming blocks at the same time. + void markBlockJoinDivergent(const BasicBlock &Block) { + DivergentJoinBlocks.insert(&Block); + } + + /// \brief Whether \p Val is divergent when read in \p ObservingBlock. + bool isTemporalDivergent(const BasicBlock &ObservingBlock, + const Value &Val) const; + + /// \brief Whether \p Block is join divergent + /// + /// (see markBlockJoinDivergent). + bool isJoinDivergent(const BasicBlock &Block) const { + return DivergentJoinBlocks.find(&Block) != DivergentJoinBlocks.end(); + } + + /// \brief Propagate control-induced divergence to users (phi nodes and + /// instructions). + // + // \param JoinBlock is a divergent loop exit or join point of two disjoint + // paths. + // \returns Whether \p JoinBlock is a divergent loop exit of \p TermLoop. + bool propagateJoinDivergence(const BasicBlock &JoinBlock, + const Loop *TermLoop); + + /// \brief Propagate induced value divergence due to control divergence in \p + /// Term. + void propagateBranchDivergence(const Instruction &Term); + + /// \brief Propagate divergent caused by a divergent loop exit. + /// + /// \param ExitingLoop is a divergent loop. + void propagateLoopDivergence(const Loop &ExitingLoop); + +private: + const Function &F; + // If regionLoop != nullptr, analysis is only performed within \p RegionLoop. + // Otw, analyze the whole function + const Loop *RegionLoop; + + const DominatorTree &DT; + const LoopInfo &LI; + + // Recognized divergent loops + DenseSet<const Loop *> DivergentLoops; + + // The SDA links divergent branches to divergent control-flow joins. + SyncDependenceAnalysis &SDA; + + // Use simplified code path for LCSSA form. + bool IsLCSSAForm; + + // Set of known-uniform values. + DenseSet<const Value *> UniformOverrides; + + // Blocks with joining divergent control from different predecessors. + DenseSet<const BasicBlock *> DivergentJoinBlocks; + + // Detected/marked divergent values. + DenseSet<const Value *> DivergentValues; + + // Internal worklist for divergence propagation. + std::vector<const Instruction *> Worklist; +}; + +/// \brief Divergence analysis frontend for GPU kernels. +class GPUDivergenceAnalysis { + SyncDependenceAnalysis SDA; + DivergenceAnalysis DA; + +public: + /// Runs the divergence analysis on @F, a GPU kernel + GPUDivergenceAnalysis(Function &F, const DominatorTree &DT, + const PostDominatorTree &PDT, const LoopInfo &LI, + const TargetTransformInfo &TTI); + + /// Whether any divergence was detected. + bool hasDivergence() const { return DA.hasDetectedDivergence(); } + + /// The GPU kernel this analysis result is for + const Function &getFunction() const { return DA.getFunction(); } + + /// Whether \p V is divergent. + bool isDivergent(const Value &V) const; + + /// Whether \p V is uniform/non-divergent + bool isUniform(const Value &V) const { return !isDivergent(V); } + + /// Print all divergent values in the kernel. + void print(raw_ostream &OS, const Module *) const; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/DomPrinter.h b/clang-r353983e/include/llvm/Analysis/DomPrinter.h new file mode 100644 index 00000000..a177f877 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/DomPrinter.h @@ -0,0 +1,29 @@ +//===-- DomPrinter.h - Dom printer external interface ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines external functions that can be called to explicitly +// instantiate the dominance tree printer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DOMPRINTER_H +#define LLVM_ANALYSIS_DOMPRINTER_H + +namespace llvm { + class FunctionPass; + FunctionPass *createDomPrinterPass(); + FunctionPass *createDomOnlyPrinterPass(); + FunctionPass *createDomViewerPass(); + FunctionPass *createDomOnlyViewerPass(); + FunctionPass *createPostDomPrinterPass(); + FunctionPass *createPostDomOnlyPrinterPass(); + FunctionPass *createPostDomViewerPass(); + FunctionPass *createPostDomOnlyViewerPass(); +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/DomTreeUpdater.h b/clang-r353983e/include/llvm/Analysis/DomTreeUpdater.h new file mode 100644 index 00000000..fcfd3c12 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/DomTreeUpdater.h @@ -0,0 +1,256 @@ +//===- DomTreeUpdater.h - DomTree/Post DomTree Updater ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the DomTreeUpdater class, which provides a uniform way to +// update dominator tree related data structures. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DOMTREEUPDATER_H +#define LLVM_ANALYSIS_DOMTREEUPDATER_H + +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/GenericDomTree.h" +#include <functional> +#include <vector> + +namespace llvm { +class DomTreeUpdater { +public: + enum class UpdateStrategy : unsigned char { Eager = 0, Lazy = 1 }; + + explicit DomTreeUpdater(UpdateStrategy Strategy_) : Strategy(Strategy_) {} + DomTreeUpdater(DominatorTree &DT_, UpdateStrategy Strategy_) + : DT(&DT_), Strategy(Strategy_) {} + DomTreeUpdater(DominatorTree *DT_, UpdateStrategy Strategy_) + : DT(DT_), Strategy(Strategy_) {} + DomTreeUpdater(PostDominatorTree &PDT_, UpdateStrategy Strategy_) + : PDT(&PDT_), Strategy(Strategy_) {} + DomTreeUpdater(PostDominatorTree *PDT_, UpdateStrategy Strategy_) + : PDT(PDT_), Strategy(Strategy_) {} + DomTreeUpdater(DominatorTree &DT_, PostDominatorTree &PDT_, + UpdateStrategy Strategy_) + : DT(&DT_), PDT(&PDT_), Strategy(Strategy_) {} + DomTreeUpdater(DominatorTree *DT_, PostDominatorTree *PDT_, + UpdateStrategy Strategy_) + : DT(DT_), PDT(PDT_), Strategy(Strategy_) {} + + ~DomTreeUpdater() { flush(); } + + /// Returns true if the current strategy is Lazy. + bool isLazy() const { return Strategy == UpdateStrategy::Lazy; }; + + /// Returns true if the current strategy is Eager. + bool isEager() const { return Strategy == UpdateStrategy::Eager; }; + + /// Returns true if it holds a DominatorTree. + bool hasDomTree() const { return DT != nullptr; } + + /// Returns true if it holds a PostDominatorTree. + bool hasPostDomTree() const { return PDT != nullptr; } + + /// Returns true if there is BasicBlock awaiting deletion. + /// The deletion will only happen until a flush event and + /// all available trees are up-to-date. + /// Returns false under Eager UpdateStrategy. + bool hasPendingDeletedBB() const { return !DeletedBBs.empty(); } + + /// Returns true if DelBB is awaiting deletion. + /// Returns false under Eager UpdateStrategy. + bool isBBPendingDeletion(BasicBlock *DelBB) const; + + /// Returns true if either of DT or PDT is valid and the tree has at + /// least one update pending. If DT or PDT is nullptr it is treated + /// as having no pending updates. This function does not check + /// whether there is BasicBlock awaiting deletion. + /// Returns false under Eager UpdateStrategy. + bool hasPendingUpdates() const; + + /// Returns true if there are DominatorTree updates queued. + /// Returns false under Eager UpdateStrategy or DT is nullptr. + bool hasPendingDomTreeUpdates() const; + + /// Returns true if there are PostDominatorTree updates queued. + /// Returns false under Eager UpdateStrategy or PDT is nullptr. + bool hasPendingPostDomTreeUpdates() const; + + /// Apply updates on all available trees. Under Eager UpdateStrategy with + /// ForceRemoveDuplicates enabled or under Lazy UpdateStrategy, it will + /// discard duplicated updates and self-dominance updates. If both DT and PDT + /// are nullptrs, this function discards all updates. The Eager Strategy + /// applies the updates immediately while the Lazy Strategy queues the + /// updates. It is required for the state of the LLVM IR to be updated + /// *before* applying the Updates because the internal update routine will + /// analyze the current state of the relationship between a pair of (From, To) + /// BasicBlocks to determine whether a single update needs to be discarded. + void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates, + bool ForceRemoveDuplicates = false); + + /// Notify all available trees on an edge insertion. If both DT and PDT are + /// nullptrs, this function discards the update. Under either Strategy, + /// self-dominance update will be removed. The Eager Strategy applies + /// the update immediately while the Lazy Strategy queues the update. + /// It is recommended to only use this method when you have exactly one + /// insertion (and no deletions). It is recommended to use applyUpdates() in + /// all other cases. This function has to be called *after* making the update + /// on the actual CFG. An internal functions checks if the edge exists in the + /// CFG in DEBUG mode. + void insertEdge(BasicBlock *From, BasicBlock *To); + + /// Notify all available trees on an edge insertion. + /// Under either Strategy, the following updates will be discard silently + /// 1. Invalid - Inserting an edge that does not exist in the CFG. + /// 2. Self-dominance update. + /// 3. Both DT and PDT are nullptrs. + /// The Eager Strategy applies the update immediately while the Lazy Strategy + /// queues the update. It is recommended to only use this method when you have + /// exactly one insertion (and no deletions) and want to discard an invalid + /// update. + void insertEdgeRelaxed(BasicBlock *From, BasicBlock *To); + + /// Notify all available trees on an edge deletion. If both DT and PDT are + /// nullptrs, this function discards the update. Under either Strategy, + /// self-dominance update will be removed. The Eager Strategy applies + /// the update immediately while the Lazy Strategy queues the update. + /// It is recommended to only use this method when you have exactly one + /// deletion (and no insertions). It is recommended to use applyUpdates() in + /// all other cases. This function has to be called *after* making the update + /// on the actual CFG. An internal functions checks if the edge doesn't exist + /// in the CFG in DEBUG mode. + void deleteEdge(BasicBlock *From, BasicBlock *To); + + /// Notify all available trees on an edge deletion. + /// Under either Strategy, the following updates will be discard silently + /// 1. Invalid - Deleting an edge that still exists in the CFG. + /// 2. Self-dominance update. + /// 3. Both DT and PDT are nullptrs. + /// The Eager Strategy applies the update immediately while the Lazy Strategy + /// queues the update. It is recommended to only use this method when you have + /// exactly one deletion (and no insertions) and want to discard an invalid + /// update. + void deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To); + + /// Delete DelBB. DelBB will be removed from its Parent and + /// erased from available trees if it exists and finally get deleted. + /// Under Eager UpdateStrategy, DelBB will be processed immediately. + /// Under Lazy UpdateStrategy, DelBB will be queued until a flush event and + /// all available trees are up-to-date. Assert if any instruction of DelBB is + /// modified while awaiting deletion. When both DT and PDT are nullptrs, DelBB + /// will be queued until flush() is called. + void deleteBB(BasicBlock *DelBB); + + /// Delete DelBB. DelBB will be removed from its Parent and + /// erased from available trees if it exists. Then the callback will + /// be called. Finally, DelBB will be deleted. + /// Under Eager UpdateStrategy, DelBB will be processed immediately. + /// Under Lazy UpdateStrategy, DelBB will be queued until a flush event and + /// all available trees are up-to-date. Assert if any instruction of DelBB is + /// modified while awaiting deletion. Multiple callbacks can be queued for one + /// DelBB under Lazy UpdateStrategy. + void callbackDeleteBB(BasicBlock *DelBB, + std::function<void(BasicBlock *)> Callback); + + /// Recalculate all available trees and flush all BasicBlocks + /// awaiting deletion immediately. + void recalculate(Function &F); + + /// Flush DomTree updates and return DomTree. + /// It also flush out of date updates applied by all available trees + /// and flush Deleted BBs if both trees are up-to-date. + /// It must only be called when it has a DomTree. + DominatorTree &getDomTree(); + + /// Flush PostDomTree updates and return PostDomTree. + /// It also flush out of date updates applied by all available trees + /// and flush Deleted BBs if both trees are up-to-date. + /// It must only be called when it has a PostDomTree. + PostDominatorTree &getPostDomTree(); + + /// Apply all pending updates to available trees and flush all BasicBlocks + /// awaiting deletion. + /// Does nothing under Eager UpdateStrategy. + void flush(); + + /// Debug method to help view the internal state of this class. + LLVM_DUMP_METHOD void dump() const; + +private: + class CallBackOnDeletion final : public CallbackVH { + public: + CallBackOnDeletion(BasicBlock *V, + std::function<void(BasicBlock *)> Callback) + : CallbackVH(V), DelBB(V), Callback_(Callback) {} + + private: + BasicBlock *DelBB = nullptr; + std::function<void(BasicBlock *)> Callback_; + + void deleted() override { + Callback_(DelBB); + CallbackVH::deleted(); + } + }; + + SmallVector<DominatorTree::UpdateType, 16> PendUpdates; + size_t PendDTUpdateIndex = 0; + size_t PendPDTUpdateIndex = 0; + DominatorTree *DT = nullptr; + PostDominatorTree *PDT = nullptr; + const UpdateStrategy Strategy; + SmallPtrSet<BasicBlock *, 8> DeletedBBs; + std::vector<CallBackOnDeletion> Callbacks; + bool IsRecalculatingDomTree = false; + bool IsRecalculatingPostDomTree = false; + + /// First remove all the instructions of DelBB and then make sure DelBB has a + /// valid terminator instruction which is necessary to have when DelBB still + /// has to be inside of its parent Function while awaiting deletion under Lazy + /// UpdateStrategy to prevent other routines from asserting the state of the + /// IR is inconsistent. Assert if DelBB is nullptr or has predecessors. + void validateDeleteBB(BasicBlock *DelBB); + + /// Returns true if at least one BasicBlock is deleted. + bool forceFlushDeletedBB(); + + /// Deduplicate and remove unnecessary updates (no-ops) when using Lazy + /// UpdateStrategy. Returns true if the update is queued for update. + bool applyLazyUpdate(DominatorTree::UpdateKind Kind, BasicBlock *From, + BasicBlock *To); + + /// Helper function to apply all pending DomTree updates. + void applyDomTreeUpdates(); + + /// Helper function to apply all pending PostDomTree updates. + void applyPostDomTreeUpdates(); + + /// Helper function to flush deleted BasicBlocks if all available + /// trees are up-to-date. + void tryFlushDeletedBB(); + + /// Drop all updates applied by all available trees and delete BasicBlocks if + /// all available trees are up-to-date. + void dropOutOfDateUpdates(); + + /// Erase Basic Block node that has been unlinked from Function + /// in the DomTree and PostDomTree. + void eraseDelBBNode(BasicBlock *DelBB); + + /// Returns true if the update appears in the LLVM IR. + /// It is used to check whether an update is valid in + /// insertEdge/deleteEdge or is unnecessary in the batch update. + bool isUpdateValid(DominatorTree::UpdateType Update) const; + + /// Returns true if the update is self dominance. + bool isSelfDominance(DominatorTree::UpdateType Update) const; +}; +} // namespace llvm + +#endif // LLVM_ANALYSIS_DOMTREEUPDATER_H diff --git a/clang-r353983e/include/llvm/Analysis/DominanceFrontier.h b/clang-r353983e/include/llvm/Analysis/DominanceFrontier.h new file mode 100644 index 00000000..c0bf30e1 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/DominanceFrontier.h @@ -0,0 +1,210 @@ +//===- llvm/Analysis/DominanceFrontier.h - Dominator Frontiers --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the DominanceFrontier class, which calculate and holds the +// dominance frontier for a function. +// +// This should be considered deprecated, don't add any more uses of this data +// structure. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DOMINANCEFRONTIER_H +#define LLVM_ANALYSIS_DOMINANCEFRONTIER_H + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/GenericDomTree.h" +#include <cassert> +#include <map> +#include <set> +#include <utility> +#include <vector> + +namespace llvm { + +class Function; +class raw_ostream; + +//===----------------------------------------------------------------------===// +/// DominanceFrontierBase - Common base class for computing forward and inverse +/// dominance frontiers for a function. +/// +template <class BlockT, bool IsPostDom> +class DominanceFrontierBase { +public: + using DomSetType = std::set<BlockT *>; // Dom set for a bb + using DomSetMapType = std::map<BlockT *, DomSetType>; // Dom set map + +protected: + using BlockTraits = GraphTraits<BlockT *>; + + DomSetMapType Frontiers; + // Postdominators can have multiple roots. + SmallVector<BlockT *, IsPostDom ? 4 : 1> Roots; + static constexpr bool IsPostDominators = IsPostDom; + +public: + DominanceFrontierBase() = default; + + /// getRoots - Return the root blocks of the current CFG. This may include + /// multiple blocks if we are computing post dominators. For forward + /// dominators, this will always be a single block (the entry node). + const SmallVectorImpl<BlockT *> &getRoots() const { return Roots; } + + BlockT *getRoot() const { + assert(Roots.size() == 1 && "Should always have entry node!"); + return Roots[0]; + } + + /// isPostDominator - Returns true if analysis based of postdoms + bool isPostDominator() const { + return IsPostDominators; + } + + void releaseMemory() { + Frontiers.clear(); + } + + // Accessor interface: + using iterator = typename DomSetMapType::iterator; + using const_iterator = typename DomSetMapType::const_iterator; + + iterator begin() { return Frontiers.begin(); } + const_iterator begin() const { return Frontiers.begin(); } + iterator end() { return Frontiers.end(); } + const_iterator end() const { return Frontiers.end(); } + iterator find(BlockT *B) { return Frontiers.find(B); } + const_iterator find(BlockT *B) const { return Frontiers.find(B); } + + iterator addBasicBlock(BlockT *BB, const DomSetType &frontier) { + assert(find(BB) == end() && "Block already in DominanceFrontier!"); + return Frontiers.insert(std::make_pair(BB, frontier)).first; + } + + /// removeBlock - Remove basic block BB's frontier. + void removeBlock(BlockT *BB); + + void addToFrontier(iterator I, BlockT *Node); + + void removeFromFrontier(iterator I, BlockT *Node); + + /// compareDomSet - Return false if two domsets match. Otherwise + /// return true; + bool compareDomSet(DomSetType &DS1, const DomSetType &DS2) const; + + /// compare - Return true if the other dominance frontier base matches + /// this dominance frontier base. Otherwise return false. + bool compare(DominanceFrontierBase &Other) const; + + /// print - Convert to human readable form + /// + void print(raw_ostream &OS) const; + + /// dump - Dump the dominance frontier to dbgs(). +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() const; +#endif +}; + +//===------------------------------------- +/// DominanceFrontier Class - Concrete subclass of DominanceFrontierBase that is +/// used to compute a forward dominator frontiers. +/// +template <class BlockT> +class ForwardDominanceFrontierBase + : public DominanceFrontierBase<BlockT, false> { +private: + using BlockTraits = GraphTraits<BlockT *>; + +public: + using DomTreeT = DomTreeBase<BlockT>; + using DomTreeNodeT = DomTreeNodeBase<BlockT>; + using DomSetType = typename DominanceFrontierBase<BlockT, false>::DomSetType; + + void analyze(DomTreeT &DT) { + assert(DT.getRoots().size() == 1 && + "Only one entry block for forward domfronts!"); + this->Roots = {DT.getRoot()}; + calculate(DT, DT[this->Roots[0]]); + } + + const DomSetType &calculate(const DomTreeT &DT, const DomTreeNodeT *Node); +}; + +class DominanceFrontier : public ForwardDominanceFrontierBase<BasicBlock> { +public: + using DomTreeT = DomTreeBase<BasicBlock>; + using DomTreeNodeT = DomTreeNodeBase<BasicBlock>; + using DomSetType = DominanceFrontierBase<BasicBlock, false>::DomSetType; + using iterator = DominanceFrontierBase<BasicBlock, false>::iterator; + using const_iterator = + DominanceFrontierBase<BasicBlock, false>::const_iterator; + + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); +}; + +class DominanceFrontierWrapperPass : public FunctionPass { + DominanceFrontier DF; + +public: + static char ID; // Pass ID, replacement for typeid + + DominanceFrontierWrapperPass(); + + DominanceFrontier &getDominanceFrontier() { return DF; } + const DominanceFrontier &getDominanceFrontier() const { return DF; } + + void releaseMemory() override; + + bool runOnFunction(Function &) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + void print(raw_ostream &OS, const Module * = nullptr) const override; + + void dump() const; +}; + +extern template class DominanceFrontierBase<BasicBlock, false>; +extern template class DominanceFrontierBase<BasicBlock, true>; +extern template class ForwardDominanceFrontierBase<BasicBlock>; + +/// Analysis pass which computes a \c DominanceFrontier. +class DominanceFrontierAnalysis + : public AnalysisInfoMixin<DominanceFrontierAnalysis> { + friend AnalysisInfoMixin<DominanceFrontierAnalysis>; + + static AnalysisKey Key; + +public: + /// Provide the result type for this analysis pass. + using Result = DominanceFrontier; + + /// Run the analysis pass over a function and produce a dominator tree. + DominanceFrontier run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass for the \c DominanceFrontier. +class DominanceFrontierPrinterPass + : public PassInfoMixin<DominanceFrontierPrinterPass> { + raw_ostream &OS; + +public: + explicit DominanceFrontierPrinterPass(raw_ostream &OS); + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_DOMINANCEFRONTIER_H diff --git a/clang-r353983e/include/llvm/Analysis/DominanceFrontierImpl.h b/clang-r353983e/include/llvm/Analysis/DominanceFrontierImpl.h new file mode 100644 index 00000000..aa764be9 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/DominanceFrontierImpl.h @@ -0,0 +1,231 @@ +//===- llvm/Analysis/DominanceFrontier.h - Dominator Frontiers --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the generic implementation of the DominanceFrontier class, which +// calculate and holds the dominance frontier for a function for. +// +// This should be considered deprecated, don't add any more uses of this data +// structure. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DOMINANCEFRONTIERIMPL_H +#define LLVM_ANALYSIS_DOMINANCEFRONTIERIMPL_H + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <set> +#include <utility> +#include <vector> + +namespace llvm { + +template <class BlockT> +class DFCalculateWorkObject { +public: + using DomTreeNodeT = DomTreeNodeBase<BlockT>; + + DFCalculateWorkObject(BlockT *B, BlockT *P, const DomTreeNodeT *N, + const DomTreeNodeT *PN) + : currentBB(B), parentBB(P), Node(N), parentNode(PN) {} + + BlockT *currentBB; + BlockT *parentBB; + const DomTreeNodeT *Node; + const DomTreeNodeT *parentNode; +}; + +template <class BlockT, bool IsPostDom> +void DominanceFrontierBase<BlockT, IsPostDom>::removeBlock(BlockT *BB) { + assert(find(BB) != end() && "Block is not in DominanceFrontier!"); + for (iterator I = begin(), E = end(); I != E; ++I) + I->second.erase(BB); + Frontiers.erase(BB); +} + +template <class BlockT, bool IsPostDom> +void DominanceFrontierBase<BlockT, IsPostDom>::addToFrontier(iterator I, + BlockT *Node) { + assert(I != end() && "BB is not in DominanceFrontier!"); + assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB"); + I->second.erase(Node); +} + +template <class BlockT, bool IsPostDom> +void DominanceFrontierBase<BlockT, IsPostDom>::removeFromFrontier( + iterator I, BlockT *Node) { + assert(I != end() && "BB is not in DominanceFrontier!"); + assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB"); + I->second.erase(Node); +} + +template <class BlockT, bool IsPostDom> +bool DominanceFrontierBase<BlockT, IsPostDom>::compareDomSet( + DomSetType &DS1, const DomSetType &DS2) const { + std::set<BlockT *> tmpSet; + for (BlockT *BB : DS2) + tmpSet.insert(BB); + + for (typename DomSetType::const_iterator I = DS1.begin(), E = DS1.end(); + I != E;) { + BlockT *Node = *I++; + + if (tmpSet.erase(Node) == 0) + // Node is in DS1 but tnot in DS2. + return true; + } + + if (!tmpSet.empty()) { + // There are nodes that are in DS2 but not in DS1. + return true; + } + + // DS1 and DS2 matches. + return false; +} + +template <class BlockT, bool IsPostDom> +bool DominanceFrontierBase<BlockT, IsPostDom>::compare( + DominanceFrontierBase<BlockT, IsPostDom> &Other) const { + DomSetMapType tmpFrontiers; + for (typename DomSetMapType::const_iterator I = Other.begin(), + E = Other.end(); + I != E; ++I) + tmpFrontiers.insert(std::make_pair(I->first, I->second)); + + for (typename DomSetMapType::iterator I = tmpFrontiers.begin(), + E = tmpFrontiers.end(); + I != E;) { + BlockT *Node = I->first; + const_iterator DFI = find(Node); + if (DFI == end()) + return true; + + if (compareDomSet(I->second, DFI->second)) + return true; + + ++I; + tmpFrontiers.erase(Node); + } + + if (!tmpFrontiers.empty()) + return true; + + return false; +} + +template <class BlockT, bool IsPostDom> +void DominanceFrontierBase<BlockT, IsPostDom>::print(raw_ostream &OS) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) { + OS << " DomFrontier for BB "; + if (I->first) + I->first->printAsOperand(OS, false); + else + OS << " <<exit node>>"; + OS << " is:\t"; + + const std::set<BlockT *> &BBs = I->second; + + for (const BlockT *BB : BBs) { + OS << ' '; + if (BB) + BB->printAsOperand(OS, false); + else + OS << "<<exit node>>"; + } + OS << '\n'; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +template <class BlockT, bool IsPostDom> +void DominanceFrontierBase<BlockT, IsPostDom>::dump() const { + print(dbgs()); +} +#endif + +template <class BlockT> +const typename ForwardDominanceFrontierBase<BlockT>::DomSetType & +ForwardDominanceFrontierBase<BlockT>::calculate(const DomTreeT &DT, + const DomTreeNodeT *Node) { + BlockT *BB = Node->getBlock(); + DomSetType *Result = nullptr; + + std::vector<DFCalculateWorkObject<BlockT>> workList; + SmallPtrSet<BlockT *, 32> visited; + + workList.push_back(DFCalculateWorkObject<BlockT>(BB, nullptr, Node, nullptr)); + do { + DFCalculateWorkObject<BlockT> *currentW = &workList.back(); + assert(currentW && "Missing work object."); + + BlockT *currentBB = currentW->currentBB; + BlockT *parentBB = currentW->parentBB; + const DomTreeNodeT *currentNode = currentW->Node; + const DomTreeNodeT *parentNode = currentW->parentNode; + assert(currentBB && "Invalid work object. Missing current Basic Block"); + assert(currentNode && "Invalid work object. Missing current Node"); + DomSetType &S = this->Frontiers[currentBB]; + + // Visit each block only once. + if (visited.insert(currentBB).second) { + // Loop over CFG successors to calculate DFlocal[currentNode] + for (const auto Succ : children<BlockT *>(currentBB)) { + // Does Node immediately dominate this successor? + if (DT[Succ]->getIDom() != currentNode) + S.insert(Succ); + } + } + + // At this point, S is DFlocal. Now we union in DFup's of our children... + // Loop through and visit the nodes that Node immediately dominates (Node's + // children in the IDomTree) + bool visitChild = false; + for (typename DomTreeNodeT::const_iterator NI = currentNode->begin(), + NE = currentNode->end(); + NI != NE; ++NI) { + DomTreeNodeT *IDominee = *NI; + BlockT *childBB = IDominee->getBlock(); + if (visited.count(childBB) == 0) { + workList.push_back(DFCalculateWorkObject<BlockT>( + childBB, currentBB, IDominee, currentNode)); + visitChild = true; + } + } + + // If all children are visited or there is any child then pop this block + // from the workList. + if (!visitChild) { + if (!parentBB) { + Result = &S; + break; + } + + typename DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end(); + DomSetType &parentSet = this->Frontiers[parentBB]; + for (; CDFI != CDFE; ++CDFI) { + if (!DT.properlyDominates(parentNode, DT[*CDFI])) + parentSet.insert(*CDFI); + } + workList.pop_back(); + } + + } while (!workList.empty()); + + return *Result; +} + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_DOMINANCEFRONTIERIMPL_H diff --git a/clang-r353983e/include/llvm/Analysis/EHPersonalities.h b/clang-r353983e/include/llvm/Analysis/EHPersonalities.h new file mode 100644 index 00000000..d89aa116 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/EHPersonalities.h @@ -0,0 +1,118 @@ +//===- EHPersonalities.h - Compute EH-related information -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_EHPERSONALITIES_H +#define LLVM_ANALYSIS_EHPERSONALITIES_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +class BasicBlock; +class Function; +class Value; + +enum class EHPersonality { + Unknown, + GNU_Ada, + GNU_C, + GNU_C_SjLj, + GNU_CXX, + GNU_CXX_SjLj, + GNU_ObjC, + MSVC_X86SEH, + MSVC_Win64SEH, + MSVC_CXX, + CoreCLR, + Rust, + Wasm_CXX +}; + +/// See if the given exception handling personality function is one +/// that we understand. If so, return a description of it; otherwise return +/// Unknown. +EHPersonality classifyEHPersonality(const Value *Pers); + +StringRef getEHPersonalityName(EHPersonality Pers); + +EHPersonality getDefaultEHPersonality(const Triple &T); + +/// Returns true if this personality function catches asynchronous +/// exceptions. +inline bool isAsynchronousEHPersonality(EHPersonality Pers) { + // The two SEH personality functions can catch asynch exceptions. We assume + // unknown personalities don't catch asynch exceptions. + switch (Pers) { + case EHPersonality::MSVC_X86SEH: + case EHPersonality::MSVC_Win64SEH: + return true; + default: + return false; + } + llvm_unreachable("invalid enum"); +} + +/// Returns true if this is a personality function that invokes +/// handler funclets (which must return to it). +inline bool isFuncletEHPersonality(EHPersonality Pers) { + switch (Pers) { + case EHPersonality::MSVC_CXX: + case EHPersonality::MSVC_X86SEH: + case EHPersonality::MSVC_Win64SEH: + case EHPersonality::CoreCLR: + return true; + default: + return false; + } + llvm_unreachable("invalid enum"); +} + +/// Returns true if this personality uses scope-style EH IR instructions: +/// catchswitch, catchpad/ret, and cleanuppad/ret. +inline bool isScopedEHPersonality(EHPersonality Pers) { + switch (Pers) { + case EHPersonality::MSVC_CXX: + case EHPersonality::MSVC_X86SEH: + case EHPersonality::MSVC_Win64SEH: + case EHPersonality::CoreCLR: + case EHPersonality::Wasm_CXX: + return true; + default: + return false; + } + llvm_unreachable("invalid enum"); +} + +/// Return true if this personality may be safely removed if there +/// are no invoke instructions remaining in the current function. +inline bool isNoOpWithoutInvoke(EHPersonality Pers) { + switch (Pers) { + case EHPersonality::Unknown: + return false; + // All known personalities currently have this behavior + default: + return true; + } + llvm_unreachable("invalid enum"); +} + +bool canSimplifyInvokeNoUnwind(const Function *F); + +typedef TinyPtrVector<BasicBlock *> ColorVector; + +/// If an EH funclet personality is in use (see isFuncletEHPersonality), +/// this will recompute which blocks are in which funclet. It is possible that +/// some blocks are in multiple funclets. Consider this analysis to be +/// expensive. +DenseMap<BasicBlock *, ColorVector> colorEHFunclets(Function &F); + +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/GlobalsModRef.h b/clang-r353983e/include/llvm/Analysis/GlobalsModRef.h new file mode 100644 index 00000000..14b20971 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/GlobalsModRef.h @@ -0,0 +1,155 @@ +//===- GlobalsModRef.h - Simple Mod/Ref AA for Globals ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for a simple mod/ref and alias analysis over globals. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_GLOBALSMODREF_H +#define LLVM_ANALYSIS_GLOBALSMODREF_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include <list> + +namespace llvm { + +/// An alias analysis result set for globals. +/// +/// This focuses on handling aliasing properties of globals and interprocedural +/// function call mod/ref information. +class GlobalsAAResult : public AAResultBase<GlobalsAAResult> { + friend AAResultBase<GlobalsAAResult>; + + class FunctionInfo; + + const DataLayout &DL; + const TargetLibraryInfo &TLI; + + /// The globals that do not have their addresses taken. + SmallPtrSet<const GlobalValue *, 8> NonAddressTakenGlobals; + + /// IndirectGlobals - The memory pointed to by this global is known to be + /// 'owned' by the global. + SmallPtrSet<const GlobalValue *, 8> IndirectGlobals; + + /// AllocsForIndirectGlobals - If an instruction allocates memory for an + /// indirect global, this map indicates which one. + DenseMap<const Value *, const GlobalValue *> AllocsForIndirectGlobals; + + /// For each function, keep track of what globals are modified or read. + DenseMap<const Function *, FunctionInfo> FunctionInfos; + + /// A map of functions to SCC. The SCCs are described by a simple integer + /// ID that is only useful for comparing for equality (are two functions + /// in the same SCC or not?) + DenseMap<const Function *, unsigned> FunctionToSCCMap; + + /// Handle to clear this analysis on deletion of values. + struct DeletionCallbackHandle final : CallbackVH { + GlobalsAAResult *GAR; + std::list<DeletionCallbackHandle>::iterator I; + + DeletionCallbackHandle(GlobalsAAResult &GAR, Value *V) + : CallbackVH(V), GAR(&GAR) {} + + void deleted() override; + }; + + /// List of callbacks for globals being tracked by this analysis. Note that + /// these objects are quite large, but we only anticipate having one per + /// global tracked by this analysis. There are numerous optimizations we + /// could perform to the memory utilization here if this becomes a problem. + std::list<DeletionCallbackHandle> Handles; + + explicit GlobalsAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI); + +public: + GlobalsAAResult(GlobalsAAResult &&Arg); + ~GlobalsAAResult(); + + static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI, + CallGraph &CG); + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + + using AAResultBase::getModRefInfo; + ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc); + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + FunctionModRefBehavior getModRefBehavior(const Function *F); + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + FunctionModRefBehavior getModRefBehavior(const CallBase *Call); + +private: + FunctionInfo *getFunctionInfo(const Function *F); + + void AnalyzeGlobals(Module &M); + void AnalyzeCallGraph(CallGraph &CG, Module &M); + bool AnalyzeUsesOfPointer(Value *V, + SmallPtrSetImpl<Function *> *Readers = nullptr, + SmallPtrSetImpl<Function *> *Writers = nullptr, + GlobalValue *OkayStoreDest = nullptr); + bool AnalyzeIndirectGlobalMemory(GlobalVariable *GV); + void CollectSCCMembership(CallGraph &CG); + + bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V); + ModRefInfo getModRefInfoForArgument(const CallBase *Call, + const GlobalValue *GV); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class GlobalsAA : public AnalysisInfoMixin<GlobalsAA> { + friend AnalysisInfoMixin<GlobalsAA>; + static AnalysisKey Key; + +public: + typedef GlobalsAAResult Result; + + GlobalsAAResult run(Module &M, ModuleAnalysisManager &AM); +}; + +/// Legacy wrapper pass to provide the GlobalsAAResult object. +class GlobalsAAWrapperPass : public ModulePass { + std::unique_ptr<GlobalsAAResult> Result; + +public: + static char ID; + + GlobalsAAWrapperPass(); + + GlobalsAAResult &getResult() { return *Result; } + const GlobalsAAResult &getResult() const { return *Result; } + + bool runOnModule(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +//===--------------------------------------------------------------------===// +// +// createGlobalsAAWrapperPass - This pass provides alias and mod/ref info for +// global values that do not have their addresses taken. +// +ModulePass *createGlobalsAAWrapperPass(); +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/GuardUtils.h b/clang-r353983e/include/llvm/Analysis/GuardUtils.h new file mode 100644 index 00000000..41e7b7c0 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/GuardUtils.h @@ -0,0 +1,44 @@ +//===-- GuardUtils.h - Utils for work with guards ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Utils that are used to perform analyzes related to guards and their +// conditions. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_GUARDUTILS_H +#define LLVM_ANALYSIS_GUARDUTILS_H + +namespace llvm { + +class BasicBlock; +class User; +class Value; + +/// Returns true iff \p U has semantics of a guard expressed in a form of call +/// of llvm.experimental.guard intrinsic. +bool isGuard(const User *U); + +/// Returns true iff \p U has semantics of a guard expressed in a form of a +/// widenable conditional branch to deopt block. +bool isGuardAsWidenableBranch(const User *U); + +/// If U is widenable branch looking like: +/// %cond = ... +/// %wc = call i1 @llvm.experimental.widenable.condition() +/// %branch_cond = and i1 %cond, %wc +/// br i1 %branch_cond, label %if_true_bb, label %if_false_bb ; <--- U +/// The function returns true, and the values %cond and %wc and blocks +/// %if_true_bb, if_false_bb are returned in +/// the parameters (Condition, WidenableCondition, IfTrueBB and IfFalseFF) +/// respectively. If \p U does not match this pattern, return false. +bool parseWidenableBranch(const User *U, Value *&Condition, + Value *&WidenableCondition, BasicBlock *&IfTrueBB, + BasicBlock *&IfFalseBB); + +} // llvm + +#endif // LLVM_ANALYSIS_GUARDUTILS_H diff --git a/clang-r353983e/include/llvm/Analysis/IVDescriptors.h b/clang-r353983e/include/llvm/Analysis/IVDescriptors.h new file mode 100644 index 00000000..254cabfc --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/IVDescriptors.h @@ -0,0 +1,356 @@ +//===- llvm/Analysis/IVDescriptors.h - IndVar Descriptors -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file "describes" induction and recurrence variables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_IVDESCRIPTORS_H +#define LLVM_ANALYSIS_IVDESCRIPTORS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Casting.h" + +namespace llvm { + +class AliasSet; +class AliasSetTracker; +class BasicBlock; +class DataLayout; +class Loop; +class LoopInfo; +class OptimizationRemarkEmitter; +class PredicatedScalarEvolution; +class PredIteratorCache; +class ScalarEvolution; +class SCEV; +class TargetLibraryInfo; +class TargetTransformInfo; + +/// The RecurrenceDescriptor is used to identify recurrences variables in a +/// loop. Reduction is a special case of recurrence that has uses of the +/// recurrence variable outside the loop. The method isReductionPHI identifies +/// reductions that are basic recurrences. +/// +/// Basic recurrences are defined as the summation, product, OR, AND, XOR, min, +/// or max of a set of terms. For example: for(i=0; i<n; i++) { total += +/// array[i]; } is a summation of array elements. Basic recurrences are a +/// special case of chains of recurrences (CR). See ScalarEvolution for CR +/// references. + +/// This struct holds information about recurrence variables. +class RecurrenceDescriptor { +public: + /// This enum represents the kinds of recurrences that we support. + enum RecurrenceKind { + RK_NoRecurrence, ///< Not a recurrence. + RK_IntegerAdd, ///< Sum of integers. + RK_IntegerMult, ///< Product of integers. + RK_IntegerOr, ///< Bitwise or logical OR of numbers. + RK_IntegerAnd, ///< Bitwise or logical AND of numbers. + RK_IntegerXor, ///< Bitwise or logical XOR of numbers. + RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()). + RK_FloatAdd, ///< Sum of floats. + RK_FloatMult, ///< Product of floats. + RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()). + }; + + // This enum represents the kind of minmax recurrence. + enum MinMaxRecurrenceKind { + MRK_Invalid, + MRK_UIntMin, + MRK_UIntMax, + MRK_SIntMin, + MRK_SIntMax, + MRK_FloatMin, + MRK_FloatMax + }; + + RecurrenceDescriptor() = default; + + RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K, + MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT, + bool Signed, SmallPtrSetImpl<Instruction *> &CI) + : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK), + UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) { + CastInsts.insert(CI.begin(), CI.end()); + } + + /// This POD struct holds information about a potential recurrence operation. + class InstDesc { + public: + InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr) + : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid), + UnsafeAlgebraInst(UAI) {} + + InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr) + : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K), + UnsafeAlgebraInst(UAI) {} + + bool isRecurrence() { return IsRecurrence; } + + bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } + + Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } + + MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; } + + Instruction *getPatternInst() { return PatternLastInst; } + + private: + // Is this instruction a recurrence candidate. + bool IsRecurrence; + // The last instruction in a min/max pattern (select of the select(icmp()) + // pattern), or the current recurrence instruction otherwise. + Instruction *PatternLastInst; + // If this is a min/max pattern the comparison predicate. + MinMaxRecurrenceKind MinMaxKind; + // Recurrence has unsafe algebra. + Instruction *UnsafeAlgebraInst; + }; + + /// Returns a struct describing if the instruction 'I' can be a recurrence + /// variable of type 'Kind'. If the recurrence is a min/max pattern of + /// select(icmp()) this function advances the instruction pointer 'I' from the + /// compare instruction to the select instruction and stores this pointer in + /// 'PatternLastInst' member of the returned struct. + static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, + InstDesc &Prev, bool HasFunNoNaNAttr); + + /// Returns true if instruction I has multiple uses in Insts + static bool hasMultipleUsesOf(Instruction *I, + SmallPtrSetImpl<Instruction *> &Insts, + unsigned MaxNumUses); + + /// Returns true if all uses of the instruction I is within the Set. + static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set); + + /// Returns a struct describing if the instruction if the instruction is a + /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y) + /// or max(X, Y). + static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev); + + /// Returns a struct describing if the instruction is a + /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern. + static InstDesc isConditionalRdxPattern(RecurrenceKind Kind, Instruction *I); + + /// Returns identity corresponding to the RecurrenceKind. + static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp); + + /// Returns the opcode of binary operation corresponding to the + /// RecurrenceKind. + static unsigned getRecurrenceBinOp(RecurrenceKind Kind); + + /// Returns true if Phi is a reduction of type Kind and adds it to the + /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are + /// non-null, the minimal bit width needed to compute the reduction will be + /// computed. + static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop, + bool HasFunNoNaNAttr, + RecurrenceDescriptor &RedDes, + DemandedBits *DB = nullptr, + AssumptionCache *AC = nullptr, + DominatorTree *DT = nullptr); + + /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor + /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are + /// non-null, the minimal bit width needed to compute the reduction will be + /// computed. + static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, + RecurrenceDescriptor &RedDes, + DemandedBits *DB = nullptr, + AssumptionCache *AC = nullptr, + DominatorTree *DT = nullptr); + + /// Returns true if Phi is a first-order recurrence. A first-order recurrence + /// is a non-reduction recurrence relation in which the value of the + /// recurrence in the current loop iteration equals a value defined in the + /// previous iteration. \p SinkAfter includes pairs of instructions where the + /// first will be rescheduled to appear after the second if/when the loop is + /// vectorized. It may be augmented with additional pairs if needed in order + /// to handle Phi as a first-order recurrence. + static bool + isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, + DenseMap<Instruction *, Instruction *> &SinkAfter, + DominatorTree *DT); + + RecurrenceKind getRecurrenceKind() { return Kind; } + + MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; } + + TrackingVH<Value> getRecurrenceStartValue() { return StartValue; } + + Instruction *getLoopExitInstr() { return LoopExitInstr; } + + /// Returns true if the recurrence has unsafe algebra which requires a relaxed + /// floating-point model. + bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } + + /// Returns first unsafe algebra instruction in the PHI node's use-chain. + Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } + + /// Returns true if the recurrence kind is an integer kind. + static bool isIntegerRecurrenceKind(RecurrenceKind Kind); + + /// Returns true if the recurrence kind is a floating point kind. + static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind); + + /// Returns true if the recurrence kind is an arithmetic kind. + static bool isArithmeticRecurrenceKind(RecurrenceKind Kind); + + /// Returns the type of the recurrence. This type can be narrower than the + /// actual type of the Phi if the recurrence has been type-promoted. + Type *getRecurrenceType() { return RecurrenceType; } + + /// Returns a reference to the instructions used for type-promoting the + /// recurrence. + SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; } + + /// Returns true if all source operands of the recurrence are SExtInsts. + bool isSigned() { return IsSigned; } + +private: + // The starting value of the recurrence. + // It does not have to be zero! + TrackingVH<Value> StartValue; + // The instruction who's value is used outside the loop. + Instruction *LoopExitInstr = nullptr; + // The kind of the recurrence. + RecurrenceKind Kind = RK_NoRecurrence; + // If this a min/max recurrence the kind of recurrence. + MinMaxRecurrenceKind MinMaxKind = MRK_Invalid; + // First occurrence of unasfe algebra in the PHI's use-chain. + Instruction *UnsafeAlgebraInst = nullptr; + // The type of the recurrence. + Type *RecurrenceType = nullptr; + // True if all source operands of the recurrence are SExtInsts. + bool IsSigned = false; + // Instructions used for type-promoting the recurrence. + SmallPtrSet<Instruction *, 8> CastInsts; +}; + +/// A struct for saving information about induction variables. +class InductionDescriptor { +public: + /// This enum represents the kinds of inductions that we support. + enum InductionKind { + IK_NoInduction, ///< Not an induction variable. + IK_IntInduction, ///< Integer induction variable. Step = C. + IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem). + IK_FpInduction ///< Floating point induction variable. + }; + +public: + /// Default constructor - creates an invalid induction. + InductionDescriptor() = default; + + /// Get the consecutive direction. Returns: + /// 0 - unknown or non-consecutive. + /// 1 - consecutive and increasing. + /// -1 - consecutive and decreasing. + int getConsecutiveDirection() const; + + Value *getStartValue() const { return StartValue; } + InductionKind getKind() const { return IK; } + const SCEV *getStep() const { return Step; } + BinaryOperator *getInductionBinOp() const { return InductionBinOp; } + ConstantInt *getConstIntStepValue() const; + + /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an + /// induction, the induction descriptor \p D will contain the data describing + /// this induction. If by some other means the caller has a better SCEV + /// expression for \p Phi than the one returned by the ScalarEvolution + /// analysis, it can be passed through \p Expr. If the def-use chain + /// associated with the phi includes casts (that we know we can ignore + /// under proper runtime checks), they are passed through \p CastsToIgnore. + static bool + isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, + InductionDescriptor &D, const SCEV *Expr = nullptr, + SmallVectorImpl<Instruction *> *CastsToIgnore = nullptr); + + /// Returns true if \p Phi is a floating point induction in the loop \p L. + /// If \p Phi is an induction, the induction descriptor \p D will contain + /// the data describing this induction. + static bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, + InductionDescriptor &D); + + /// Returns true if \p Phi is a loop \p L induction, in the context associated + /// with the run-time predicate of PSE. If \p Assume is true, this can add + /// further SCEV predicates to \p PSE in order to prove that \p Phi is an + /// induction. + /// If \p Phi is an induction, \p D will contain the data describing this + /// induction. + static bool isInductionPHI(PHINode *Phi, const Loop *L, + PredicatedScalarEvolution &PSE, + InductionDescriptor &D, bool Assume = false); + + /// Returns true if the induction type is FP and the binary operator does + /// not have the "fast-math" property. Such operation requires a relaxed FP + /// mode. + bool hasUnsafeAlgebra() { + return InductionBinOp && !cast<FPMathOperator>(InductionBinOp)->isFast(); + } + + /// Returns induction operator that does not have "fast-math" property + /// and requires FP unsafe mode. + Instruction *getUnsafeAlgebraInst() { + if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast()) + return nullptr; + return InductionBinOp; + } + + /// Returns binary opcode of the induction operator. + Instruction::BinaryOps getInductionOpcode() const { + return InductionBinOp ? InductionBinOp->getOpcode() + : Instruction::BinaryOpsEnd; + } + + /// Returns a reference to the type cast instructions in the induction + /// update chain, that are redundant when guarded with a runtime + /// SCEV overflow check. + const SmallVectorImpl<Instruction *> &getCastInsts() const { + return RedundantCasts; + } + +private: + /// Private constructor - used by \c isInductionPHI. + InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, + BinaryOperator *InductionBinOp = nullptr, + SmallVectorImpl<Instruction *> *Casts = nullptr); + + /// Start value. + TrackingVH<Value> StartValue; + /// Induction kind. + InductionKind IK = IK_NoInduction; + /// Step value. + const SCEV *Step = nullptr; + // Instruction that advances induction variable. + BinaryOperator *InductionBinOp = nullptr; + // Instructions used for type-casts of the induction variable, + // that are redundant when guarded with a runtime SCEV overflow check. + SmallVector<Instruction *, 2> RedundantCasts; +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_IVDESCRIPTORS_H diff --git a/clang-r353983e/include/llvm/Analysis/IVUsers.h b/clang-r353983e/include/llvm/Analysis/IVUsers.h new file mode 100644 index 00000000..f8ea3bcc --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/IVUsers.h @@ -0,0 +1,201 @@ +//===- llvm/Analysis/IVUsers.h - Induction Variable Users -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements bookkeeping for "interesting" users of expressions +// computed from induction variables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_IVUSERS_H +#define LLVM_ANALYSIS_IVUSERS_H + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" +#include "llvm/IR/ValueHandle.h" + +namespace llvm { + +class AssumptionCache; +class DominatorTree; +class Instruction; +class Value; +class ScalarEvolution; +class SCEV; +class IVUsers; +class DataLayout; + +/// IVStrideUse - Keep track of one use of a strided induction variable. +/// The Expr member keeps track of the expression, User is the actual user +/// instruction of the operand, and 'OperandValToReplace' is the operand of +/// the User that is the use. +class IVStrideUse final : public CallbackVH, public ilist_node<IVStrideUse> { + friend class IVUsers; +public: + IVStrideUse(IVUsers *P, Instruction* U, Value *O) + : CallbackVH(U), Parent(P), OperandValToReplace(O) { + } + + /// getUser - Return the user instruction for this use. + Instruction *getUser() const { + return cast<Instruction>(getValPtr()); + } + + /// setUser - Assign a new user instruction for this use. + void setUser(Instruction *NewUser) { + setValPtr(NewUser); + } + + /// getOperandValToReplace - Return the Value of the operand in the user + /// instruction that this IVStrideUse is representing. + Value *getOperandValToReplace() const { + return OperandValToReplace; + } + + /// setOperandValToReplace - Assign a new Value as the operand value + /// to replace. + void setOperandValToReplace(Value *Op) { + OperandValToReplace = Op; + } + + /// getPostIncLoops - Return the set of loops for which the expression has + /// been adjusted to use post-inc mode. + const PostIncLoopSet &getPostIncLoops() const { + return PostIncLoops; + } + + /// transformToPostInc - Transform the expression to post-inc form for the + /// given loop. + void transformToPostInc(const Loop *L); + +private: + /// Parent - a pointer to the IVUsers that owns this IVStrideUse. + IVUsers *Parent; + + /// OperandValToReplace - The Value of the operand in the user instruction + /// that this IVStrideUse is representing. + WeakTrackingVH OperandValToReplace; + + /// PostIncLoops - The set of loops for which Expr has been adjusted to + /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept. + PostIncLoopSet PostIncLoops; + + /// Deleted - Implementation of CallbackVH virtual function to + /// receive notification when the User is deleted. + void deleted() override; +}; + +class IVUsers { + friend class IVStrideUse; + Loop *L; + AssumptionCache *AC; + LoopInfo *LI; + DominatorTree *DT; + ScalarEvolution *SE; + SmallPtrSet<Instruction*, 16> Processed; + + /// IVUses - A list of all tracked IV uses of induction variable expressions + /// we are interested in. + ilist<IVStrideUse> IVUses; + + // Ephemeral values used by @llvm.assume in this function. + SmallPtrSet<const Value *, 32> EphValues; + +public: + IVUsers(Loop *L, AssumptionCache *AC, LoopInfo *LI, DominatorTree *DT, + ScalarEvolution *SE); + + IVUsers(IVUsers &&X) + : L(std::move(X.L)), AC(std::move(X.AC)), DT(std::move(X.DT)), + SE(std::move(X.SE)), Processed(std::move(X.Processed)), + IVUses(std::move(X.IVUses)), EphValues(std::move(X.EphValues)) { + for (IVStrideUse &U : IVUses) + U.Parent = this; + } + IVUsers(const IVUsers &) = delete; + IVUsers &operator=(IVUsers &&) = delete; + IVUsers &operator=(const IVUsers &) = delete; + + Loop *getLoop() const { return L; } + + /// AddUsersIfInteresting - Inspect the specified Instruction. If it is a + /// reducible SCEV, recursively add its users to the IVUsesByStride set and + /// return true. Otherwise, return false. + bool AddUsersIfInteresting(Instruction *I); + + IVStrideUse &AddUser(Instruction *User, Value *Operand); + + /// getReplacementExpr - Return a SCEV expression which computes the + /// value of the OperandValToReplace of the given IVStrideUse. + const SCEV *getReplacementExpr(const IVStrideUse &IU) const; + + /// getExpr - Return the expression for the use. + const SCEV *getExpr(const IVStrideUse &IU) const; + + const SCEV *getStride(const IVStrideUse &IU, const Loop *L) const; + + typedef ilist<IVStrideUse>::iterator iterator; + typedef ilist<IVStrideUse>::const_iterator const_iterator; + iterator begin() { return IVUses.begin(); } + iterator end() { return IVUses.end(); } + const_iterator begin() const { return IVUses.begin(); } + const_iterator end() const { return IVUses.end(); } + bool empty() const { return IVUses.empty(); } + + bool isIVUserOrOperand(Instruction *Inst) const { + return Processed.count(Inst); + } + + void releaseMemory(); + + void print(raw_ostream &OS, const Module * = nullptr) const; + + /// dump - This method is used for debugging. + void dump() const; + +protected: + bool AddUsersImpl(Instruction *I, SmallPtrSetImpl<Loop*> &SimpleLoopNests); +}; + +Pass *createIVUsersPass(); + +class IVUsersWrapperPass : public LoopPass { + std::unique_ptr<IVUsers> IU; + +public: + static char ID; + + IVUsersWrapperPass(); + + IVUsers &getIU() { return *IU; } + const IVUsers &getIU() const { return *IU; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void releaseMemory() override; + + void print(raw_ostream &OS, const Module * = nullptr) const override; +}; + +/// Analysis pass that exposes the \c IVUsers for a loop. +class IVUsersAnalysis : public AnalysisInfoMixin<IVUsersAnalysis> { + friend AnalysisInfoMixin<IVUsersAnalysis>; + static AnalysisKey Key; + +public: + typedef IVUsers Result; + + IVUsers run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR); +}; + +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/clang-r353983e/include/llvm/Analysis/IndirectCallPromotionAnalysis.h new file mode 100644 index 00000000..8a05e913 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/IndirectCallPromotionAnalysis.h @@ -0,0 +1,68 @@ +//===- IndirectCallPromotionAnalysis.h - Indirect call analysis -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Interface to identify indirect call promotion candidates. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INDIRECTCALLPROMOTIONANALYSIS_H +#define LLVM_ANALYSIS_INDIRECTCALLPROMOTIONANALYSIS_H + +#include "llvm/ProfileData/InstrProf.h" + +namespace llvm { + +class Instruction; + +// Class for identifying profitable indirect call promotion candidates when +// the indirect-call value profile metadata is available. +class ICallPromotionAnalysis { +private: + // Allocate space to read the profile annotation. + std::unique_ptr<InstrProfValueData[]> ValueDataArray; + + // Count is the call count for the direct-call target. + // TotalCount is the total call count for the indirect-call callsite. + // RemainingCount is the TotalCount minus promoted-direct-call count. + // Return true we should promote this indirect-call target. + bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount, + uint64_t RemainingCount); + + // Returns the number of profitable candidates to promote for the + // current ValueDataArray and the given \p Inst. + uint32_t getProfitablePromotionCandidates(const Instruction *Inst, + uint32_t NumVals, + uint64_t TotalCount); + + // Noncopyable + ICallPromotionAnalysis(const ICallPromotionAnalysis &other) = delete; + ICallPromotionAnalysis & + operator=(const ICallPromotionAnalysis &other) = delete; + +public: + ICallPromotionAnalysis(); + + /// Returns reference to array of InstrProfValueData for the given + /// instruction \p I. + /// + /// The \p NumVals, \p TotalCount and \p NumCandidates + /// are set to the number of values in the array, the total profile count + /// of the indirect call \p I, and the number of profitable candidates + /// in the given array (which is sorted in reverse order of profitability). + /// + /// The returned array space is owned by this class, and overwritten on + /// subsequent calls. + ArrayRef<InstrProfValueData> + getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals, + uint64_t &TotalCount, + uint32_t &NumCandidates); +}; + +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/IndirectCallVisitor.h b/clang-r353983e/include/llvm/Analysis/IndirectCallVisitor.h new file mode 100644 index 00000000..1d1f3f4c --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/IndirectCallVisitor.h @@ -0,0 +1,38 @@ +//===-- IndirectCallVisitor.h - indirect call visitor ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements defines a visitor class and a helper function that find +// all indirect call-sites in a function. + +#ifndef LLVM_ANALYSIS_INDIRECTCALLVISITOR_H +#define LLVM_ANALYSIS_INDIRECTCALLVISITOR_H + +#include "llvm/IR/InstVisitor.h" +#include <vector> + +namespace llvm { +// Visitor class that finds all indirect call. +struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> { + std::vector<Instruction *> IndirectCalls; + PGOIndirectCallVisitor() {} + + void visitCallBase(CallBase &Call) { + if (Call.isIndirectCall()) + IndirectCalls.push_back(&Call); + } +}; + +// Helper function that finds all indirect call sites. +inline std::vector<Instruction *> findIndirectCalls(Function &F) { + PGOIndirectCallVisitor ICV; + ICV.visit(F); + return ICV.IndirectCalls; +} +} // namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/InlineCost.h b/clang-r353983e/include/llvm/Analysis/InlineCost.h new file mode 100644 index 00000000..e3860933 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/InlineCost.h @@ -0,0 +1,237 @@ +//===- InlineCost.h - Cost analysis for inliner -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements heuristics for inlining decisions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INLINECOST_H +#define LLVM_ANALYSIS_INLINECOST_H + +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include <cassert> +#include <climits> + +namespace llvm { +class AssumptionCacheTracker; +class BlockFrequencyInfo; +class CallSite; +class DataLayout; +class Function; +class ProfileSummaryInfo; +class TargetTransformInfo; + +namespace InlineConstants { +// Various thresholds used by inline cost analysis. +/// Use when optsize (-Os) is specified. +const int OptSizeThreshold = 50; + +/// Use when minsize (-Oz) is specified. +const int OptMinSizeThreshold = 5; + +/// Use when -O3 is specified. +const int OptAggressiveThreshold = 250; + +// Various magic constants used to adjust heuristics. +const int InstrCost = 5; +const int IndirectCallThreshold = 100; +const int CallPenalty = 25; +const int LastCallToStaticBonus = 15000; +const int ColdccPenalty = 2000; +/// Do not inline functions which allocate this many bytes on the stack +/// when the caller is recursive. +const unsigned TotalAllocaSizeRecursiveCaller = 1024; +} + +/// Represents the cost of inlining a function. +/// +/// This supports special values for functions which should "always" or +/// "never" be inlined. Otherwise, the cost represents a unitless amount; +/// smaller values increase the likelihood of the function being inlined. +/// +/// Objects of this type also provide the adjusted threshold for inlining +/// based on the information available for a particular callsite. They can be +/// directly tested to determine if inlining should occur given the cost and +/// threshold for this cost metric. +class InlineCost { + enum SentinelValues { + AlwaysInlineCost = INT_MIN, + NeverInlineCost = INT_MAX + }; + + /// The estimated cost of inlining this callsite. + const int Cost; + + /// The adjusted threshold against which this cost was computed. + const int Threshold; + + /// Must be set for Always and Never instances. + const char *Reason = nullptr; + + // Trivial constructor, interesting logic in the factory functions below. + InlineCost(int Cost, int Threshold, const char *Reason = nullptr) + : Cost(Cost), Threshold(Threshold), Reason(Reason) { + assert((isVariable() || Reason) && + "Reason must be provided for Never or Always"); + } + +public: + static InlineCost get(int Cost, int Threshold) { + assert(Cost > AlwaysInlineCost && "Cost crosses sentinel value"); + assert(Cost < NeverInlineCost && "Cost crosses sentinel value"); + return InlineCost(Cost, Threshold); + } + static InlineCost getAlways(const char *Reason) { + return InlineCost(AlwaysInlineCost, 0, Reason); + } + static InlineCost getNever(const char *Reason) { + return InlineCost(NeverInlineCost, 0, Reason); + } + + /// Test whether the inline cost is low enough for inlining. + explicit operator bool() const { + return Cost < Threshold; + } + + bool isAlways() const { return Cost == AlwaysInlineCost; } + bool isNever() const { return Cost == NeverInlineCost; } + bool isVariable() const { return !isAlways() && !isNever(); } + + /// Get the inline cost estimate. + /// It is an error to call this on an "always" or "never" InlineCost. + int getCost() const { + assert(isVariable() && "Invalid access of InlineCost"); + return Cost; + } + + /// Get the threshold against which the cost was computed + int getThreshold() const { + assert(isVariable() && "Invalid access of InlineCost"); + return Threshold; + } + + /// Get the reason of Always or Never. + const char *getReason() const { + assert((Reason || isVariable()) && + "InlineCost reason must be set for Always or Never"); + return Reason; + } + + /// Get the cost delta from the threshold for inlining. + /// Only valid if the cost is of the variable kind. Returns a negative + /// value if the cost is too high to inline. + int getCostDelta() const { return Threshold - getCost(); } +}; + +/// InlineResult is basically true or false. For false results the message +/// describes a reason why it is decided not to inline. +struct InlineResult { + const char *message = nullptr; + InlineResult(bool result, const char *message = nullptr) + : message(result ? nullptr : (message ? message : "cost > threshold")) {} + InlineResult(const char *message = nullptr) : message(message) {} + operator bool() const { return !message; } + operator const char *() const { return message; } +}; + +/// Thresholds to tune inline cost analysis. The inline cost analysis decides +/// the condition to apply a threshold and applies it. Otherwise, +/// DefaultThreshold is used. If a threshold is Optional, it is applied only +/// when it has a valid value. Typically, users of inline cost analysis +/// obtain an InlineParams object through one of the \c getInlineParams methods +/// and pass it to \c getInlineCost. Some specialized versions of inliner +/// (such as the pre-inliner) might have custom logic to compute \c InlineParams +/// object. + +struct InlineParams { + /// The default threshold to start with for a callee. + int DefaultThreshold; + + /// Threshold to use for callees with inline hint. + Optional<int> HintThreshold; + + /// Threshold to use for cold callees. + Optional<int> ColdThreshold; + + /// Threshold to use when the caller is optimized for size. + Optional<int> OptSizeThreshold; + + /// Threshold to use when the caller is optimized for minsize. + Optional<int> OptMinSizeThreshold; + + /// Threshold to use when the callsite is considered hot. + Optional<int> HotCallSiteThreshold; + + /// Threshold to use when the callsite is considered hot relative to function + /// entry. + Optional<int> LocallyHotCallSiteThreshold; + + /// Threshold to use when the callsite is considered cold. + Optional<int> ColdCallSiteThreshold; + + /// Compute inline cost even when the cost has exceeded the threshold. + Optional<bool> ComputeFullInlineCost; +}; + +/// Generate the parameters to tune the inline cost analysis based only on the +/// commandline options. +InlineParams getInlineParams(); + +/// Generate the parameters to tune the inline cost analysis based on command +/// line options. If -inline-threshold option is not explicitly passed, +/// \p Threshold is used as the default threshold. +InlineParams getInlineParams(int Threshold); + +/// Generate the parameters to tune the inline cost analysis based on command +/// line options. If -inline-threshold option is not explicitly passed, +/// the default threshold is computed from \p OptLevel and \p SizeOptLevel. +/// An \p OptLevel value above 3 is considered an aggressive optimization mode. +/// \p SizeOptLevel of 1 corresponds to the -Os flag and 2 corresponds to +/// the -Oz flag. +InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel); + +/// Return the cost associated with a callsite, including parameter passing +/// and the call/return instruction. +int getCallsiteCost(CallSite CS, const DataLayout &DL); + +/// Get an InlineCost object representing the cost of inlining this +/// callsite. +/// +/// Note that a default threshold is passed into this function. This threshold +/// could be modified based on callsite's properties and only costs below this +/// new threshold are computed with any accuracy. The new threshold can be +/// used to bound the computation necessary to determine whether the cost is +/// sufficiently low to warrant inlining. +/// +/// Also note that calling this function *dynamically* computes the cost of +/// inlining the callsite. It is an expensive, heavyweight call. +InlineCost getInlineCost( + CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, + std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr); + +/// Get an InlineCost with the callee explicitly specified. +/// This allows you to calculate the cost of inlining a function via a +/// pointer. This behaves exactly as the version with no explicit callee +/// parameter in all other respects. +// +InlineCost +getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params, + TargetTransformInfo &CalleeTTI, + std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE); + +/// Minimal filter to detect invalid constructs for inlining. +InlineResult isInlineViable(Function &Callee); +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/InstructionPrecedenceTracking.h b/clang-r353983e/include/llvm/Analysis/InstructionPrecedenceTracking.h new file mode 100644 index 00000000..eb72eac2 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/InstructionPrecedenceTracking.h @@ -0,0 +1,149 @@ +//===-- InstructionPrecedenceTracking.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Implements a class that is able to define some instructions as "special" +// (e.g. as having implicit control flow, or writing memory, or having another +// interesting property) and then efficiently answers queries of the types: +// 1. Are there any special instructions in the block of interest? +// 2. Return first of the special instructions in the given block; +// 3. Check if the given instruction is preceeded by the first special +// instruction in the same block. +// The class provides caching that allows to answer these queries quickly. The +// user must make sure that the cached data is invalidated properly whenever +// a content of some tracked block is changed. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INSTRUCTIONPRECEDENCETRACKING_H +#define LLVM_ANALYSIS_INSTRUCTIONPRECEDENCETRACKING_H + +#include "llvm/IR/Dominators.h" +#include "llvm/Analysis/OrderedInstructions.h" + +namespace llvm { + +class InstructionPrecedenceTracking { + // Maps a block to the topmost special instruction in it. If the value is + // nullptr, it means that it is known that this block does not contain any + // special instructions. + DenseMap<const BasicBlock *, const Instruction *> FirstSpecialInsts; + // Allows to answer queries about precedence of instructions within one block. + OrderedInstructions OI; + + // Fills information about the given block's special instructions. + void fill(const BasicBlock *BB); + +#ifndef NDEBUG + /// Asserts that the cached info for \p BB is up-to-date. This helps to catch + /// the usage error of accessing a block without properly invalidating after a + /// previous transform. + void validate(const BasicBlock *BB) const; + + /// Asserts whether or not the contents of this tracking is up-to-date. This + /// helps to catch the usage error of accessing a block without properly + /// invalidating after a previous transform. + void validateAll() const; +#endif + +protected: + InstructionPrecedenceTracking(DominatorTree *DT) + : OI(OrderedInstructions(DT)) {} + + /// Returns the topmost special instruction from the block \p BB. Returns + /// nullptr if there is no special instructions in the block. + const Instruction *getFirstSpecialInstruction(const BasicBlock *BB); + + /// Returns true iff at least one instruction from the basic block \p BB is + /// special. + bool hasSpecialInstructions(const BasicBlock *BB); + + /// Returns true iff the first special instruction of \p Insn's block exists + /// and dominates \p Insn. + bool isPreceededBySpecialInstruction(const Instruction *Insn); + + /// A predicate that defines whether or not the instruction \p Insn is + /// considered special and needs to be tracked. Implementing this method in + /// children classes allows to implement tracking of implicit control flow, + /// memory writing instructions or any other kinds of instructions we might + /// be interested in. + virtual bool isSpecialInstruction(const Instruction *Insn) const = 0; + + virtual ~InstructionPrecedenceTracking() = default; + +public: + /// Notifies this tracking that we are going to insert a new instruction \p + /// Inst to the basic block \p BB. It makes all necessary updates to internal + /// caches to keep them consistent. + void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB); + + /// Notifies this tracking that we are going to remove the instruction \p Inst + /// It makes all necessary updates to internal caches to keep them consistent. + void removeInstruction(const Instruction *Inst); + + /// Invalidates all information from this tracking. + void clear(); +}; + +/// This class allows to keep track on instructions with implicit control flow. +/// These are instructions that may not pass execution to their successors. For +/// example, throwing calls and guards do not always do this. If we need to know +/// for sure that some instruction is guaranteed to execute if the given block +/// is reached, then we need to make sure that there is no implicit control flow +/// instruction (ICFI) preceeding it. For example, this check is required if we +/// perform PRE moving non-speculable instruction to other place. +class ImplicitControlFlowTracking : public InstructionPrecedenceTracking { +public: + ImplicitControlFlowTracking(DominatorTree *DT) + : InstructionPrecedenceTracking(DT) {} + + /// Returns the topmost instruction with implicit control flow from the given + /// basic block. Returns nullptr if there is no such instructions in the block. + const Instruction *getFirstICFI(const BasicBlock *BB) { + return getFirstSpecialInstruction(BB); + } + + /// Returns true if at least one instruction from the given basic block has + /// implicit control flow. + bool hasICF(const BasicBlock *BB) { + return hasSpecialInstructions(BB); + } + + /// Returns true if the first ICFI of Insn's block exists and dominates Insn. + bool isDominatedByICFIFromSameBlock(const Instruction *Insn) { + return isPreceededBySpecialInstruction(Insn); + } + + virtual bool isSpecialInstruction(const Instruction *Insn) const; +}; + +class MemoryWriteTracking : public InstructionPrecedenceTracking { +public: + MemoryWriteTracking(DominatorTree *DT) : InstructionPrecedenceTracking(DT) {} + + /// Returns the topmost instruction that may write memory from the given + /// basic block. Returns nullptr if there is no such instructions in the block. + const Instruction *getFirstMemoryWrite(const BasicBlock *BB) { + return getFirstSpecialInstruction(BB); + } + + /// Returns true if at least one instruction from the given basic block may + /// write memory. + bool mayWriteToMemory(const BasicBlock *BB) { + return hasSpecialInstructions(BB); + } + + /// Returns true if the first memory writing instruction of Insn's block + /// exists and dominates Insn. + bool isDominatedByMemoryWriteFromSameBlock(const Instruction *Insn) { + return isPreceededBySpecialInstruction(Insn); + } + + virtual bool isSpecialInstruction(const Instruction *Insn) const; +}; + +} // llvm + +#endif // LLVM_ANALYSIS_INSTRUCTIONPRECEDENCETRACKING_H diff --git a/clang-r353983e/include/llvm/Analysis/InstructionSimplify.h b/clang-r353983e/include/llvm/Analysis/InstructionSimplify.h new file mode 100644 index 00000000..a9040439 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/InstructionSimplify.h @@ -0,0 +1,292 @@ +//===-- InstructionSimplify.h - Fold instrs into simpler forms --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares routines for folding instructions into simpler forms +// that do not require creating new instructions. This does constant folding +// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either +// returning a constant ("and i32 %x, 0" -> "0") or an already existing value +// ("and i32 %x, %x" -> "%x"). If the simplification is also an instruction +// then it dominates the original instruction. +// +// These routines implicitly resolve undef uses. The easiest way to be safe when +// using these routines to obtain simplified values for existing instructions is +// to always replace all uses of the instructions with the resulting simplified +// values. This will prevent other code from seeing the same undef uses and +// resolving them to different values. +// +// These routines are designed to tolerate moderately incomplete IR, such as +// instructions that are not connected to basic blocks yet. However, they do +// require that all the IR that they encounter be valid. In particular, they +// require that all non-constant values be defined in the same function, and the +// same call context of that function (and not split between caller and callee +// contexts of a directly recursive call, for example). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H +#define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H + +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/User.h" + +namespace llvm { +class Function; +template <typename T, typename... TArgs> class AnalysisManager; +template <class T> class ArrayRef; +class AssumptionCache; +class CallBase; +class DominatorTree; +class DataLayout; +class FastMathFlags; +struct LoopStandardAnalysisResults; +class OptimizationRemarkEmitter; +class Pass; +class TargetLibraryInfo; +class Type; +class Value; +class MDNode; +class BinaryOperator; + +/// InstrInfoQuery provides an interface to query additional information for +/// instructions like metadata or keywords like nsw, which provides conservative +/// results if the users specified it is safe to use. +struct InstrInfoQuery { + InstrInfoQuery(bool UMD) : UseInstrInfo(UMD) {} + InstrInfoQuery() : UseInstrInfo(true) {} + bool UseInstrInfo = true; + + MDNode *getMetadata(const Instruction *I, unsigned KindID) const { + if (UseInstrInfo) + return I->getMetadata(KindID); + return nullptr; + } + + template <class InstT> bool hasNoUnsignedWrap(const InstT *Op) const { + if (UseInstrInfo) + return Op->hasNoUnsignedWrap(); + return false; + } + + template <class InstT> bool hasNoSignedWrap(const InstT *Op) const { + if (UseInstrInfo) + return Op->hasNoSignedWrap(); + return false; + } + + bool isExact(const BinaryOperator *Op) const { + if (UseInstrInfo && isa<PossiblyExactOperator>(Op)) + return cast<PossiblyExactOperator>(Op)->isExact(); + return false; + } +}; + +struct SimplifyQuery { + const DataLayout &DL; + const TargetLibraryInfo *TLI = nullptr; + const DominatorTree *DT = nullptr; + AssumptionCache *AC = nullptr; + const Instruction *CxtI = nullptr; + + // Wrapper to query additional information for instructions like metadata or + // keywords like nsw, which provides conservative results if those cannot + // be safely used. + const InstrInfoQuery IIQ; + + SimplifyQuery(const DataLayout &DL, const Instruction *CXTI = nullptr) + : DL(DL), CxtI(CXTI) {} + + SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI, + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, + const Instruction *CXTI = nullptr, bool UseInstrInfo = true) + : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI), IIQ(UseInstrInfo) {} + SimplifyQuery getWithInstruction(Instruction *I) const { + SimplifyQuery Copy(*this); + Copy.CxtI = I; + return Copy; + } +}; + +// NOTE: the explicit multiple argument versions of these functions are +// deprecated. +// Please use the SimplifyQuery versions in new code. + +/// Given operands for an Add, fold the result or return null. +Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, + const SimplifyQuery &Q); + +/// Given operands for a Sub, fold the result or return null. +Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, + const SimplifyQuery &Q); + +/// Given operands for an FAdd, fold the result or return null. +Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for an FSub, fold the result or return null. +Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for an FMul, fold the result or return null. +Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for a Mul, fold the result or return null. +Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an SDiv, fold the result or return null. +Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for a UDiv, fold the result or return null. +Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an FDiv, fold the result or return null. +Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for an SRem, fold the result or return null. +Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for a URem, fold the result or return null. +Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an FRem, fold the result or return null. +Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for a Shl, fold the result or return null. +Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Q); + +/// Given operands for a LShr, fold the result or return null. +Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q); + +/// Given operands for a AShr, fold the result or return nulll. +Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q); + +/// Given operands for an And, fold the result or return null. +Value *SimplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an Or, fold the result or return null. +Value *SimplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an Xor, fold the result or return null. +Value *SimplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an ICmpInst, fold the result or return null. +Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q); + +/// Given operands for an FCmpInst, fold the result or return null. +Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); + +/// Given operands for a SelectInst, fold the result or return null. +Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const SimplifyQuery &Q); + +/// Given operands for a GetElementPtrInst, fold the result or return null. +Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, + const SimplifyQuery &Q); + +/// Given operands for an InsertValueInst, fold the result or return null. +Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, + const SimplifyQuery &Q); + +/// Given operands for an InsertElement, fold the result or return null. +Value *SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, + const SimplifyQuery &Q); + +/// Given operands for an ExtractValueInst, fold the result or return null. +Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, + const SimplifyQuery &Q); + +/// Given operands for an ExtractElementInst, fold the result or return null. +Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, + const SimplifyQuery &Q); + +/// Given operands for a CastInst, fold the result or return null. +Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, + const SimplifyQuery &Q); + +/// Given operands for a ShuffleVectorInst, fold the result or return null. +Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, + Type *RetTy, const SimplifyQuery &Q); + +//=== Helper functions for higher up the class hierarchy. + +/// Given operands for a CmpInst, fold the result or return null. +Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q); + +/// Given operands for a BinaryOperator, fold the result or return null. +Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const SimplifyQuery &Q); + +/// Given operands for an FP BinaryOperator, fold the result or return null. +/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the +/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. +Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); + +/// Given a callsite, fold the result or return null. +Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q); + +/// Given a function and iterators over arguments, fold the result or return +/// null. +Value *SimplifyCall(CallBase *Call, Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const SimplifyQuery &Q); + +/// Given a function and set of arguments, fold the result or return null. +Value *SimplifyCall(CallBase *Call, Value *V, ArrayRef<Value *> Args, + const SimplifyQuery &Q); + +/// See if we can compute a simplified version of this instruction. If not, +/// return null. +Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, + OptimizationRemarkEmitter *ORE = nullptr); + +/// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. +/// +/// This first performs a normal RAUW of I with SimpleV. It then recursively +/// attempts to simplify those users updated by the operation. The 'I' +/// instruction must not be equal to the simplified value 'SimpleV'. +/// +/// The function returns true if any simplifications were performed. +bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr); + +/// Recursively attempt to simplify an instruction. +/// +/// This routine uses SimplifyInstruction to simplify 'I', and if successful +/// replaces uses of 'I' with the simplified value. It then recurses on each +/// of the users impacted. It returns true if any simplifications were +/// performed. +bool recursivelySimplifyInstruction(Instruction *I, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr); + +// These helper functions return a SimplifyQuery structure that contains as +// many of the optional analysis we use as are currently valid. This is the +// strongly preferred way of constructing SimplifyQuery in passes. +const SimplifyQuery getBestSimplifyQuery(Pass &, Function &); +template <class T, class... TArgs> +const SimplifyQuery getBestSimplifyQuery(AnalysisManager<T, TArgs...> &, + Function &); +const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &, + const DataLayout &); +} // end namespace llvm + +#endif + diff --git a/clang-r353983e/include/llvm/Analysis/Interval.h b/clang-r353983e/include/llvm/Analysis/Interval.h new file mode 100644 index 00000000..5c9a4535 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/Interval.h @@ -0,0 +1,141 @@ +//===- llvm/Analysis/Interval.h - Interval Class Declaration ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the Interval class, which +// represents a set of CFG nodes and is a portion of an interval partition. +// +// Intervals have some interesting and useful properties, including the +// following: +// 1. The header node of an interval dominates all of the elements of the +// interval +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INTERVAL_H +#define LLVM_ANALYSIS_INTERVAL_H + +#include "llvm/ADT/GraphTraits.h" +#include <vector> + +namespace llvm { + +class BasicBlock; +class raw_ostream; + +//===----------------------------------------------------------------------===// +// +/// Interval Class - An Interval is a set of nodes defined such that every node +/// in the interval has all of its predecessors in the interval (except for the +/// header) +/// +class Interval { + /// HeaderNode - The header BasicBlock, which dominates all BasicBlocks in this + /// interval. Also, any loops in this interval must go through the HeaderNode. + /// + BasicBlock *HeaderNode; + +public: + using succ_iterator = std::vector<BasicBlock*>::iterator; + using pred_iterator = std::vector<BasicBlock*>::iterator; + using node_iterator = std::vector<BasicBlock*>::iterator; + + inline Interval(BasicBlock *Header) : HeaderNode(Header) { + Nodes.push_back(Header); + } + + inline BasicBlock *getHeaderNode() const { return HeaderNode; } + + /// Nodes - The basic blocks in this interval. + std::vector<BasicBlock*> Nodes; + + /// Successors - List of BasicBlocks that are reachable directly from nodes in + /// this interval, but are not in the interval themselves. + /// These nodes necessarily must be header nodes for other intervals. + std::vector<BasicBlock*> Successors; + + /// Predecessors - List of BasicBlocks that have this Interval's header block + /// as one of their successors. + std::vector<BasicBlock*> Predecessors; + + /// contains - Find out if a basic block is in this interval + inline bool contains(BasicBlock *BB) const { + for (BasicBlock *Node : Nodes) + if (Node == BB) + return true; + return false; + // I don't want the dependency on <algorithm> + //return find(Nodes.begin(), Nodes.end(), BB) != Nodes.end(); + } + + /// isSuccessor - find out if a basic block is a successor of this Interval + inline bool isSuccessor(BasicBlock *BB) const { + for (BasicBlock *Successor : Successors) + if (Successor == BB) + return true; + return false; + // I don't want the dependency on <algorithm> + //return find(Successors.begin(), Successors.end(), BB) != Successors.end(); + } + + /// Equality operator. It is only valid to compare two intervals from the + /// same partition, because of this, all we have to check is the header node + /// for equality. + inline bool operator==(const Interval &I) const { + return HeaderNode == I.HeaderNode; + } + + /// isLoop - Find out if there is a back edge in this interval... + bool isLoop() const; + + /// print - Show contents in human readable format... + void print(raw_ostream &O) const; +}; + +/// succ_begin/succ_end - define methods so that Intervals may be used +/// just like BasicBlocks can with the succ_* functions, and *::succ_iterator. +/// +inline Interval::succ_iterator succ_begin(Interval *I) { + return I->Successors.begin(); +} +inline Interval::succ_iterator succ_end(Interval *I) { + return I->Successors.end(); +} + +/// pred_begin/pred_end - define methods so that Intervals may be used +/// just like BasicBlocks can with the pred_* functions, and *::pred_iterator. +/// +inline Interval::pred_iterator pred_begin(Interval *I) { + return I->Predecessors.begin(); +} +inline Interval::pred_iterator pred_end(Interval *I) { + return I->Predecessors.end(); +} + +template <> struct GraphTraits<Interval*> { + using NodeRef = Interval *; + using ChildIteratorType = Interval::succ_iterator; + + static NodeRef getEntryNode(Interval *I) { return I; } + + /// nodes_iterator/begin/end - Allow iteration over all nodes in the graph + static ChildIteratorType child_begin(NodeRef N) { return succ_begin(N); } + static ChildIteratorType child_end(NodeRef N) { return succ_end(N); } +}; + +template <> struct GraphTraits<Inverse<Interval*>> { + using NodeRef = Interval *; + using ChildIteratorType = Interval::pred_iterator; + + static NodeRef getEntryNode(Inverse<Interval *> G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); } + static ChildIteratorType child_end(NodeRef N) { return pred_end(N); } +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_INTERVAL_H diff --git a/clang-r353983e/include/llvm/Analysis/IntervalIterator.h b/clang-r353983e/include/llvm/Analysis/IntervalIterator.h new file mode 100644 index 00000000..efaaf971 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/IntervalIterator.h @@ -0,0 +1,267 @@ +//===- IntervalIterator.h - Interval Iterator Declaration -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an iterator that enumerates the intervals in a control flow +// graph of some sort. This iterator is parametric, allowing iterator over the +// following types of graphs: +// +// 1. A Function* object, composed of BasicBlock nodes. +// 2. An IntervalPartition& object, composed of Interval nodes. +// +// This iterator is defined to walk the control flow graph, returning intervals +// in depth first order. These intervals are completely filled in except for +// the predecessor fields (the successor information is filled in however). +// +// By default, the intervals created by this iterator are deleted after they +// are no longer any use to the iterator. This behavior can be changed by +// passing a false value into the intervals_begin() function. This causes the +// IOwnMem member to be set, and the intervals to not be deleted. +// +// It is only safe to use this if all of the intervals are deleted by the caller +// and all of the intervals are processed. However, the user of the iterator is +// not allowed to modify or delete the intervals until after the iterator has +// been used completely. The IntervalPartition class uses this functionality. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INTERVALITERATOR_H +#define LLVM_ANALYSIS_INTERVALITERATOR_H + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/Analysis/Interval.h" +#include "llvm/Analysis/IntervalPartition.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <set> +#include <utility> +#include <vector> + +namespace llvm { + +class BasicBlock; + +// getNodeHeader - Given a source graph node and the source graph, return the +// BasicBlock that is the header node. This is the opposite of +// getSourceGraphNode. +inline BasicBlock *getNodeHeader(BasicBlock *BB) { return BB; } +inline BasicBlock *getNodeHeader(Interval *I) { return I->getHeaderNode(); } + +// getSourceGraphNode - Given a BasicBlock and the source graph, return the +// source graph node that corresponds to the BasicBlock. This is the opposite +// of getNodeHeader. +inline BasicBlock *getSourceGraphNode(Function *, BasicBlock *BB) { + return BB; +} +inline Interval *getSourceGraphNode(IntervalPartition *IP, BasicBlock *BB) { + return IP->getBlockInterval(BB); +} + +// addNodeToInterval - This method exists to assist the generic ProcessNode +// with the task of adding a node to the new interval, depending on the +// type of the source node. In the case of a CFG source graph (BasicBlock +// case), the BasicBlock itself is added to the interval. +inline void addNodeToInterval(Interval *Int, BasicBlock *BB) { + Int->Nodes.push_back(BB); +} + +// addNodeToInterval - This method exists to assist the generic ProcessNode +// with the task of adding a node to the new interval, depending on the +// type of the source node. In the case of a CFG source graph (BasicBlock +// case), the BasicBlock itself is added to the interval. In the case of +// an IntervalPartition source graph (Interval case), all of the member +// BasicBlocks are added to the interval. +inline void addNodeToInterval(Interval *Int, Interval *I) { + // Add all of the nodes in I as new nodes in Int. + Int->Nodes.insert(Int->Nodes.end(), I->Nodes.begin(), I->Nodes.end()); +} + +template<class NodeTy, class OrigContainer_t, class GT = GraphTraits<NodeTy *>, + class IGT = GraphTraits<Inverse<NodeTy *>>> +class IntervalIterator { + std::vector<std::pair<Interval *, typename Interval::succ_iterator>> IntStack; + std::set<BasicBlock *> Visited; + OrigContainer_t *OrigContainer; + bool IOwnMem; // If True, delete intervals when done with them + // See file header for conditions of use + +public: + using iterator_category = std::forward_iterator_tag; + + IntervalIterator() = default; // End iterator, empty stack + + IntervalIterator(Function *M, bool OwnMemory) : IOwnMem(OwnMemory) { + OrigContainer = M; + if (!ProcessInterval(&M->front())) { + llvm_unreachable("ProcessInterval should never fail for first interval!"); + } + } + + IntervalIterator(IntervalIterator &&x) + : IntStack(std::move(x.IntStack)), Visited(std::move(x.Visited)), + OrigContainer(x.OrigContainer), IOwnMem(x.IOwnMem) { + x.IOwnMem = false; + } + + IntervalIterator(IntervalPartition &IP, bool OwnMemory) : IOwnMem(OwnMemory) { + OrigContainer = &IP; + if (!ProcessInterval(IP.getRootInterval())) { + llvm_unreachable("ProcessInterval should never fail for first interval!"); + } + } + + ~IntervalIterator() { + if (IOwnMem) + while (!IntStack.empty()) { + delete operator*(); + IntStack.pop_back(); + } + } + + bool operator==(const IntervalIterator &x) const { + return IntStack == x.IntStack; + } + bool operator!=(const IntervalIterator &x) const { return !(*this == x); } + + const Interval *operator*() const { return IntStack.back().first; } + Interval *operator*() { return IntStack.back().first; } + const Interval *operator->() const { return operator*(); } + Interval *operator->() { return operator*(); } + + IntervalIterator &operator++() { // Preincrement + assert(!IntStack.empty() && "Attempting to use interval iterator at end!"); + do { + // All of the intervals on the stack have been visited. Try visiting + // their successors now. + Interval::succ_iterator &SuccIt = IntStack.back().second, + EndIt = succ_end(IntStack.back().first); + while (SuccIt != EndIt) { // Loop over all interval succs + bool Done = ProcessInterval(getSourceGraphNode(OrigContainer, *SuccIt)); + ++SuccIt; // Increment iterator + if (Done) return *this; // Found a new interval! Use it! + } + + // Free interval memory... if necessary + if (IOwnMem) delete IntStack.back().first; + + // We ran out of successors for this interval... pop off the stack + IntStack.pop_back(); + } while (!IntStack.empty()); + + return *this; + } + + IntervalIterator operator++(int) { // Postincrement + IntervalIterator tmp = *this; + ++*this; + return tmp; + } + +private: + // ProcessInterval - This method is used during the construction of the + // interval graph. It walks through the source graph, recursively creating + // an interval per invocation until the entire graph is covered. This uses + // the ProcessNode method to add all of the nodes to the interval. + // + // This method is templated because it may operate on two different source + // graphs: a basic block graph, or a preexisting interval graph. + bool ProcessInterval(NodeTy *Node) { + BasicBlock *Header = getNodeHeader(Node); + if (!Visited.insert(Header).second) + return false; + + Interval *Int = new Interval(Header); + + // Check all of our successors to see if they are in the interval... + for (typename GT::ChildIteratorType I = GT::child_begin(Node), + E = GT::child_end(Node); I != E; ++I) + ProcessNode(Int, getSourceGraphNode(OrigContainer, *I)); + + IntStack.push_back(std::make_pair(Int, succ_begin(Int))); + return true; + } + + // ProcessNode - This method is called by ProcessInterval to add nodes to the + // interval being constructed, and it is also called recursively as it walks + // the source graph. A node is added to the current interval only if all of + // its predecessors are already in the graph. This also takes care of keeping + // the successor set of an interval up to date. + // + // This method is templated because it may operate on two different source + // graphs: a basic block graph, or a preexisting interval graph. + void ProcessNode(Interval *Int, NodeTy *Node) { + assert(Int && "Null interval == bad!"); + assert(Node && "Null Node == bad!"); + + BasicBlock *NodeHeader = getNodeHeader(Node); + + if (Visited.count(NodeHeader)) { // Node already been visited? + if (Int->contains(NodeHeader)) { // Already in this interval... + return; + } else { // In other interval, add as successor + if (!Int->isSuccessor(NodeHeader)) // Add only if not already in set + Int->Successors.push_back(NodeHeader); + } + } else { // Otherwise, not in interval yet + for (typename IGT::ChildIteratorType I = IGT::child_begin(Node), + E = IGT::child_end(Node); I != E; ++I) { + if (!Int->contains(*I)) { // If pred not in interval, we can't be + if (!Int->isSuccessor(NodeHeader)) // Add only if not already in set + Int->Successors.push_back(NodeHeader); + return; // See you later + } + } + + // If we get here, then all of the predecessors of BB are in the interval + // already. In this case, we must add BB to the interval! + addNodeToInterval(Int, Node); + Visited.insert(NodeHeader); // The node has now been visited! + + if (Int->isSuccessor(NodeHeader)) { + // If we were in the successor list from before... remove from succ list + Int->Successors.erase(std::remove(Int->Successors.begin(), + Int->Successors.end(), NodeHeader), + Int->Successors.end()); + } + + // Now that we have discovered that Node is in the interval, perhaps some + // of its successors are as well? + for (typename GT::ChildIteratorType It = GT::child_begin(Node), + End = GT::child_end(Node); It != End; ++It) + ProcessNode(Int, getSourceGraphNode(OrigContainer, *It)); + } + } +}; + +using function_interval_iterator = IntervalIterator<BasicBlock, Function>; +using interval_part_interval_iterator = + IntervalIterator<Interval, IntervalPartition>; + +inline function_interval_iterator intervals_begin(Function *F, + bool DeleteInts = true) { + return function_interval_iterator(F, DeleteInts); +} +inline function_interval_iterator intervals_end(Function *) { + return function_interval_iterator(); +} + +inline interval_part_interval_iterator + intervals_begin(IntervalPartition &IP, bool DeleteIntervals = true) { + return interval_part_interval_iterator(IP, DeleteIntervals); +} + +inline interval_part_interval_iterator intervals_end(IntervalPartition &IP) { + return interval_part_interval_iterator(); +} + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_INTERVALITERATOR_H diff --git a/clang-r353983e/include/llvm/Analysis/IntervalPartition.h b/clang-r353983e/include/llvm/Analysis/IntervalPartition.h new file mode 100644 index 00000000..5b127c25 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/IntervalPartition.h @@ -0,0 +1,110 @@ +//===- IntervalPartition.h - Interval partition Calculation -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the IntervalPartition class, which +// calculates and represents the interval partition of a function, or a +// preexisting interval partition. +// +// In this way, the interval partition may be used to reduce a flow graph down +// to its degenerate single node interval partition (unless it is irreducible). +// +// TODO: The IntervalPartition class should take a bool parameter that tells +// whether it should add the "tails" of an interval to an interval itself or if +// they should be represented as distinct intervals. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INTERVALPARTITION_H +#define LLVM_ANALYSIS_INTERVALPARTITION_H + +#include "llvm/Pass.h" +#include <map> +#include <vector> + +namespace llvm { + +class BasicBlock; +class Interval; + +//===----------------------------------------------------------------------===// +// +// IntervalPartition - This class builds and holds an "interval partition" for +// a function. This partition divides the control flow graph into a set of +// maximal intervals, as defined with the properties above. Intuitively, an +// interval is a (possibly nonexistent) loop with a "tail" of non-looping +// nodes following it. +// +class IntervalPartition : public FunctionPass { + using IntervalMapTy = std::map<BasicBlock *, Interval *>; + IntervalMapTy IntervalMap; + + using IntervalListTy = std::vector<Interval *>; + Interval *RootInterval = nullptr; + std::vector<Interval *> Intervals; + +public: + static char ID; // Pass identification, replacement for typeid + + IntervalPartition() : FunctionPass(ID) { + initializeIntervalPartitionPass(*PassRegistry::getPassRegistry()); + } + + // run - Calculate the interval partition for this function + bool runOnFunction(Function &F) override; + + // IntervalPartition ctor - Build a reduced interval partition from an + // existing interval graph. This takes an additional boolean parameter to + // distinguish it from a copy constructor. Always pass in false for now. + IntervalPartition(IntervalPartition &I, bool); + + // print - Show contents in human readable format... + void print(raw_ostream &O, const Module* = nullptr) const override; + + // getRootInterval() - Return the root interval that contains the starting + // block of the function. + inline Interval *getRootInterval() { return RootInterval; } + + // isDegeneratePartition() - Returns true if the interval partition contains + // a single interval, and thus cannot be simplified anymore. + bool isDegeneratePartition() { return Intervals.size() == 1; } + + // TODO: isIrreducible - look for triangle graph. + + // getBlockInterval - Return the interval that a basic block exists in. + inline Interval *getBlockInterval(BasicBlock *BB) { + IntervalMapTy::iterator I = IntervalMap.find(BB); + return I != IntervalMap.end() ? I->second : nullptr; + } + + // getAnalysisUsage - Implement the Pass API + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + // Interface to Intervals vector... + const std::vector<Interval*> &getIntervals() const { return Intervals; } + + // releaseMemory - Reset state back to before function was analyzed + void releaseMemory() override; + +private: + // addIntervalToPartition - Add an interval to the internal list of intervals, + // and then add mappings from all of the basic blocks in the interval to the + // interval itself (in the IntervalMap). + void addIntervalToPartition(Interval *I); + + // updatePredecessors - Interval generation only sets the successor fields of + // the interval data structures. After interval generation is complete, + // run through all of the intervals and propagate successor info as + // predecessor info. + void updatePredecessors(Interval *Int); +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_INTERVALPARTITION_H diff --git a/clang-r353983e/include/llvm/Analysis/IteratedDominanceFrontier.h b/clang-r353983e/include/llvm/Analysis/IteratedDominanceFrontier.h new file mode 100644 index 00000000..e7d19d1a --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/IteratedDominanceFrontier.h @@ -0,0 +1,100 @@ +//===- IteratedDominanceFrontier.h - Calculate IDF --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Compute iterated dominance frontiers using a linear time algorithm. +/// +/// The algorithm used here is based on: +/// +/// Sreedhar and Gao. A linear time algorithm for placing phi-nodes. +/// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of +/// Programming Languages +/// POPL '95. ACM, New York, NY, 62-73. +/// +/// It has been modified to not explicitly use the DJ graph data structure and +/// to directly compute pruned SSA using per-variable liveness information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_IDF_H +#define LLVM_ANALYSIS_IDF_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFGDiff.h" +#include "llvm/IR/Dominators.h" + +namespace llvm { + +/// Determine the iterated dominance frontier, given a set of defining +/// blocks, and optionally, a set of live-in blocks. +/// +/// In turn, the results can be used to place phi nodes. +/// +/// This algorithm is a linear time computation of Iterated Dominance Frontiers, +/// pruned using the live-in set. +/// By default, liveness is not used to prune the IDF computation. +/// The template parameters should be either BasicBlock* or Inverse<BasicBlock +/// *>, depending on if you want the forward or reverse IDF. +template <class NodeTy, bool IsPostDom> +class IDFCalculator { + public: + IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT) + : DT(DT), GD(nullptr), useLiveIn(false) {} + + IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT, + const GraphDiff<BasicBlock *, IsPostDom> *GD) + : DT(DT), GD(GD), useLiveIn(false) {} + + /// Give the IDF calculator the set of blocks in which the value is + /// defined. This is equivalent to the set of starting blocks it should be + /// calculating the IDF for (though later gets pruned based on liveness). + /// + /// Note: This set *must* live for the entire lifetime of the IDF calculator. + void setDefiningBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) { + DefBlocks = &Blocks; + } + + /// Give the IDF calculator the set of blocks in which the value is + /// live on entry to the block. This is used to prune the IDF calculation to + /// not include blocks where any phi insertion would be dead. + /// + /// Note: This set *must* live for the entire lifetime of the IDF calculator. + + void setLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) { + LiveInBlocks = &Blocks; + useLiveIn = true; + } + + /// Reset the live-in block set to be empty, and tell the IDF + /// calculator to not use liveness anymore. + void resetLiveInBlocks() { + LiveInBlocks = nullptr; + useLiveIn = false; + } + + /// Calculate iterated dominance frontiers + /// + /// This uses the linear-time phi algorithm based on DJ-graphs mentioned in + /// the file-level comment. It performs DF->IDF pruning using the live-in + /// set, to avoid computing the IDF for blocks where an inserted PHI node + /// would be dead. + void calculate(SmallVectorImpl<BasicBlock *> &IDFBlocks); + +private: + DominatorTreeBase<BasicBlock, IsPostDom> &DT; + const GraphDiff<BasicBlock *, IsPostDom> *GD; + bool useLiveIn; + const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks; + const SmallPtrSetImpl<BasicBlock *> *DefBlocks; +}; +typedef IDFCalculator<BasicBlock *, false> ForwardIDFCalculator; +typedef IDFCalculator<Inverse<BasicBlock *>, true> ReverseIDFCalculator; +} +#endif diff --git a/clang-r353983e/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/clang-r353983e/include/llvm/Analysis/LazyBlockFrequencyInfo.h new file mode 100644 index 00000000..0e7dc943 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LazyBlockFrequencyInfo.h @@ -0,0 +1,131 @@ +//===- LazyBlockFrequencyInfo.h - Lazy Block Frequency Analysis -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is an alternative analysis pass to BlockFrequencyInfoWrapperPass. The +// difference is that with this pass the block frequencies are not computed when +// the analysis pass is executed but rather when the BFI result is explicitly +// requested by the analysis client. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LAZYBLOCKFREQUENCYINFO_H +#define LLVM_ANALYSIS_LAZYBLOCKFREQUENCYINFO_H + +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/LazyBranchProbabilityInfo.h" +#include "llvm/Pass.h" + +namespace llvm { +class AnalysisUsage; +class BranchProbabilityInfo; +class Function; +class LoopInfo; + +/// Wraps a BFI to allow lazy computation of the block frequencies. +/// +/// A pass that only conditionally uses BFI can uncondtionally require the +/// analysis without paying for the overhead if BFI doesn't end up being used. +template <typename FunctionT, typename BranchProbabilityInfoPassT, + typename LoopInfoT, typename BlockFrequencyInfoT> +class LazyBlockFrequencyInfo { +public: + LazyBlockFrequencyInfo() + : Calculated(false), F(nullptr), BPIPass(nullptr), LI(nullptr) {} + + /// Set up the per-function input. + void setAnalysis(const FunctionT *F, BranchProbabilityInfoPassT *BPIPass, + const LoopInfoT *LI) { + this->F = F; + this->BPIPass = BPIPass; + this->LI = LI; + } + + /// Retrieve the BFI with the block frequencies computed. + BlockFrequencyInfoT &getCalculated() { + if (!Calculated) { + assert(F && BPIPass && LI && "call setAnalysis"); + BFI.calculate( + *F, BPIPassTrait<BranchProbabilityInfoPassT>::getBPI(BPIPass), *LI); + Calculated = true; + } + return BFI; + } + + const BlockFrequencyInfoT &getCalculated() const { + return const_cast<LazyBlockFrequencyInfo *>(this)->getCalculated(); + } + + void releaseMemory() { + BFI.releaseMemory(); + Calculated = false; + setAnalysis(nullptr, nullptr, nullptr); + } + +private: + BlockFrequencyInfoT BFI; + bool Calculated; + const FunctionT *F; + BranchProbabilityInfoPassT *BPIPass; + const LoopInfoT *LI; +}; + +/// This is an alternative analysis pass to +/// BlockFrequencyInfoWrapperPass. The difference is that with this pass the +/// block frequencies are not computed when the analysis pass is executed but +/// rather when the BFI result is explicitly requested by the analysis client. +/// +/// There are some additional requirements for any client pass that wants to use +/// the analysis: +/// +/// 1. The pass needs to initialize dependent passes with: +/// +/// INITIALIZE_PASS_DEPENDENCY(LazyBFIPass) +/// +/// 2. Similarly, getAnalysisUsage should call: +/// +/// LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU) +/// +/// 3. The computed BFI should be requested with +/// getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() before either LoopInfo +/// or BPI could be invalidated for example by changing the CFG. +/// +/// Note that it is expected that we wouldn't need this functionality for the +/// new PM since with the new PM, analyses are executed on demand. + +class LazyBlockFrequencyInfoPass : public FunctionPass { +private: + LazyBlockFrequencyInfo<Function, LazyBranchProbabilityInfoPass, LoopInfo, + BlockFrequencyInfo> + LBFI; + +public: + static char ID; + + LazyBlockFrequencyInfoPass(); + + /// Compute and return the block frequencies. + BlockFrequencyInfo &getBFI() { return LBFI.getCalculated(); } + + /// Compute and return the block frequencies. + const BlockFrequencyInfo &getBFI() const { return LBFI.getCalculated(); } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Helper for client passes to set up the analysis usage on behalf of this + /// pass. + static void getLazyBFIAnalysisUsage(AnalysisUsage &AU); + + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M) const override; +}; + +/// Helper for client passes to initialize dependent passes for LBFI. +void initializeLazyBFIPassPass(PassRegistry &Registry); +} +#endif diff --git a/clang-r353983e/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/clang-r353983e/include/llvm/Analysis/LazyBranchProbabilityInfo.h new file mode 100644 index 00000000..cae0778c --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -0,0 +1,123 @@ +//===- LazyBranchProbabilityInfo.h - Lazy Branch Probability ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is an alternative analysis pass to BranchProbabilityInfoWrapperPass. +// The difference is that with this pass the branch probabilities are not +// computed when the analysis pass is executed but rather when the BPI results +// is explicitly requested by the analysis client. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LAZYBRANCHPROBABILITYINFO_H +#define LLVM_ANALYSIS_LAZYBRANCHPROBABILITYINFO_H + +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Pass.h" + +namespace llvm { +class AnalysisUsage; +class Function; +class LoopInfo; +class TargetLibraryInfo; + +/// This is an alternative analysis pass to +/// BranchProbabilityInfoWrapperPass. The difference is that with this pass the +/// branch probabilities are not computed when the analysis pass is executed but +/// rather when the BPI results is explicitly requested by the analysis client. +/// +/// There are some additional requirements for any client pass that wants to use +/// the analysis: +/// +/// 1. The pass needs to initialize dependent passes with: +/// +/// INITIALIZE_PASS_DEPENDENCY(LazyBPIPass) +/// +/// 2. Similarly, getAnalysisUsage should call: +/// +/// LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU) +/// +/// 3. The computed BPI should be requested with +/// getAnalysis<LazyBranchProbabilityInfoPass>().getBPI() before LoopInfo +/// could be invalidated for example by changing the CFG. +/// +/// Note that it is expected that we wouldn't need this functionality for the +/// new PM since with the new PM, analyses are executed on demand. +class LazyBranchProbabilityInfoPass : public FunctionPass { + + /// Wraps a BPI to allow lazy computation of the branch probabilities. + /// + /// A pass that only conditionally uses BPI can uncondtionally require the + /// analysis without paying for the overhead if BPI doesn't end up being used. + class LazyBranchProbabilityInfo { + public: + LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI, + const TargetLibraryInfo *TLI) + : Calculated(false), F(F), LI(LI), TLI(TLI) {} + + /// Retrieve the BPI with the branch probabilities computed. + BranchProbabilityInfo &getCalculated() { + if (!Calculated) { + assert(F && LI && "call setAnalysis"); + BPI.calculate(*F, *LI, TLI); + Calculated = true; + } + return BPI; + } + + const BranchProbabilityInfo &getCalculated() const { + return const_cast<LazyBranchProbabilityInfo *>(this)->getCalculated(); + } + + private: + BranchProbabilityInfo BPI; + bool Calculated; + const Function *F; + const LoopInfo *LI; + const TargetLibraryInfo *TLI; + }; + + std::unique_ptr<LazyBranchProbabilityInfo> LBPI; + +public: + static char ID; + + LazyBranchProbabilityInfoPass(); + + /// Compute and return the branch probabilities. + BranchProbabilityInfo &getBPI() { return LBPI->getCalculated(); } + + /// Compute and return the branch probabilities. + const BranchProbabilityInfo &getBPI() const { return LBPI->getCalculated(); } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Helper for client passes to set up the analysis usage on behalf of this + /// pass. + static void getLazyBPIAnalysisUsage(AnalysisUsage &AU); + + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M) const override; +}; + +/// Helper for client passes to initialize dependent passes for LBPI. +void initializeLazyBPIPassPass(PassRegistry &Registry); + +/// Simple trait class that provides a mapping between BPI passes and the +/// corresponding BPInfo. +template <typename PassT> struct BPIPassTrait { + static PassT &getBPI(PassT *P) { return *P; } +}; + +template <> struct BPIPassTrait<LazyBranchProbabilityInfoPass> { + static BranchProbabilityInfo &getBPI(LazyBranchProbabilityInfoPass *P) { + return P->getBPI(); + } +}; +} +#endif diff --git a/clang-r353983e/include/llvm/Analysis/LazyCallGraph.h b/clang-r353983e/include/llvm/Analysis/LazyCallGraph.h new file mode 100644 index 00000000..32865476 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LazyCallGraph.h @@ -0,0 +1,1287 @@ +//===- LazyCallGraph.h - Analysis of a Module's call graph ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Implements a lazy call graph analysis and related passes for the new pass +/// manager. +/// +/// NB: This is *not* a traditional call graph! It is a graph which models both +/// the current calls and potential calls. As a consequence there are many +/// edges in this call graph that do not correspond to a 'call' or 'invoke' +/// instruction. +/// +/// The primary use cases of this graph analysis is to facilitate iterating +/// across the functions of a module in ways that ensure all callees are +/// visited prior to a caller (given any SCC constraints), or vice versa. As +/// such is it particularly well suited to organizing CGSCC optimizations such +/// as inlining, outlining, argument promotion, etc. That is its primary use +/// case and motivates the design. It may not be appropriate for other +/// purposes. The use graph of functions or some other conservative analysis of +/// call instructions may be interesting for optimizations and subsequent +/// analyses which don't work in the context of an overly specified +/// potential-call-edge graph. +/// +/// To understand the specific rules and nature of this call graph analysis, +/// see the documentation of the \c LazyCallGraph below. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LAZYCALLGRAPH_H +#define LLVM_ANALYSIS_LAZYCALLGRAPH_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <iterator> +#include <string> +#include <utility> + +namespace llvm { + +class Module; +class Value; + +/// A lazily constructed view of the call graph of a module. +/// +/// With the edges of this graph, the motivating constraint that we are +/// attempting to maintain is that function-local optimization, CGSCC-local +/// optimizations, and optimizations transforming a pair of functions connected +/// by an edge in the graph, do not invalidate a bottom-up traversal of the SCC +/// DAG. That is, no optimizations will delete, remove, or add an edge such +/// that functions already visited in a bottom-up order of the SCC DAG are no +/// longer valid to have visited, or such that functions not yet visited in +/// a bottom-up order of the SCC DAG are not required to have already been +/// visited. +/// +/// Within this constraint, the desire is to minimize the merge points of the +/// SCC DAG. The greater the fanout of the SCC DAG and the fewer merge points +/// in the SCC DAG, the more independence there is in optimizing within it. +/// There is a strong desire to enable parallelization of optimizations over +/// the call graph, and both limited fanout and merge points will (artificially +/// in some cases) limit the scaling of such an effort. +/// +/// To this end, graph represents both direct and any potential resolution to +/// an indirect call edge. Another way to think about it is that it represents +/// both the direct call edges and any direct call edges that might be formed +/// through static optimizations. Specifically, it considers taking the address +/// of a function to be an edge in the call graph because this might be +/// forwarded to become a direct call by some subsequent function-local +/// optimization. The result is that the graph closely follows the use-def +/// edges for functions. Walking "up" the graph can be done by looking at all +/// of the uses of a function. +/// +/// The roots of the call graph are the external functions and functions +/// escaped into global variables. Those functions can be called from outside +/// of the module or via unknowable means in the IR -- we may not be able to +/// form even a potential call edge from a function body which may dynamically +/// load the function and call it. +/// +/// This analysis still requires updates to remain valid after optimizations +/// which could potentially change the set of potential callees. The +/// constraints it operates under only make the traversal order remain valid. +/// +/// The entire analysis must be re-computed if full interprocedural +/// optimizations run at any point. For example, globalopt completely +/// invalidates the information in this analysis. +/// +/// FIXME: This class is named LazyCallGraph in a lame attempt to distinguish +/// it from the existing CallGraph. At some point, it is expected that this +/// will be the only call graph and it will be renamed accordingly. +class LazyCallGraph { +public: + class Node; + class EdgeSequence; + class SCC; + class RefSCC; + class edge_iterator; + class call_edge_iterator; + + /// A class used to represent edges in the call graph. + /// + /// The lazy call graph models both *call* edges and *reference* edges. Call + /// edges are much what you would expect, and exist when there is a 'call' or + /// 'invoke' instruction of some function. Reference edges are also tracked + /// along side these, and exist whenever any instruction (transitively + /// through its operands) references a function. All call edges are + /// inherently reference edges, and so the reference graph forms a superset + /// of the formal call graph. + /// + /// All of these forms of edges are fundamentally represented as outgoing + /// edges. The edges are stored in the source node and point at the target + /// node. This allows the edge structure itself to be a very compact data + /// structure: essentially a tagged pointer. + class Edge { + public: + /// The kind of edge in the graph. + enum Kind : bool { Ref = false, Call = true }; + + Edge(); + explicit Edge(Node &N, Kind K); + + /// Test whether the edge is null. + /// + /// This happens when an edge has been deleted. We leave the edge objects + /// around but clear them. + explicit operator bool() const; + + /// Returnss the \c Kind of the edge. + Kind getKind() const; + + /// Test whether the edge represents a direct call to a function. + /// + /// This requires that the edge is not null. + bool isCall() const; + + /// Get the call graph node referenced by this edge. + /// + /// This requires that the edge is not null. + Node &getNode() const; + + /// Get the function referenced by this edge. + /// + /// This requires that the edge is not null. + Function &getFunction() const; + + private: + friend class LazyCallGraph::EdgeSequence; + friend class LazyCallGraph::RefSCC; + + PointerIntPair<Node *, 1, Kind> Value; + + void setKind(Kind K) { Value.setInt(K); } + }; + + /// The edge sequence object. + /// + /// This typically exists entirely within the node but is exposed as + /// a separate type because a node doesn't initially have edges. An explicit + /// population step is required to produce this sequence at first and it is + /// then cached in the node. It is also used to represent edges entering the + /// graph from outside the module to model the graph's roots. + /// + /// The sequence itself both iterable and indexable. The indexes remain + /// stable even as the sequence mutates (including removal). + class EdgeSequence { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + friend class LazyCallGraph::RefSCC; + + using VectorT = SmallVector<Edge, 4>; + using VectorImplT = SmallVectorImpl<Edge>; + + public: + /// An iterator used for the edges to both entry nodes and child nodes. + class iterator + : public iterator_adaptor_base<iterator, VectorImplT::iterator, + std::forward_iterator_tag> { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + VectorImplT::iterator E; + + // Build the iterator for a specific position in the edge list. + iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E) + : iterator_adaptor_base(BaseI), E(E) { + while (I != E && !*I) + ++I; + } + + public: + iterator() = default; + + using iterator_adaptor_base::operator++; + iterator &operator++() { + do { + ++I; + } while (I != E && !*I); + return *this; + } + }; + + /// An iterator over specifically call edges. + /// + /// This has the same iteration properties as the \c iterator, but + /// restricts itself to edges which represent actual calls. + class call_iterator + : public iterator_adaptor_base<call_iterator, VectorImplT::iterator, + std::forward_iterator_tag> { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + VectorImplT::iterator E; + + /// Advance the iterator to the next valid, call edge. + void advanceToNextEdge() { + while (I != E && (!*I || !I->isCall())) + ++I; + } + + // Build the iterator for a specific position in the edge list. + call_iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E) + : iterator_adaptor_base(BaseI), E(E) { + advanceToNextEdge(); + } + + public: + call_iterator() = default; + + using iterator_adaptor_base::operator++; + call_iterator &operator++() { + ++I; + advanceToNextEdge(); + return *this; + } + }; + + iterator begin() { return iterator(Edges.begin(), Edges.end()); } + iterator end() { return iterator(Edges.end(), Edges.end()); } + + Edge &operator[](int i) { return Edges[i]; } + Edge &operator[](Node &N) { + assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!"); + auto &E = Edges[EdgeIndexMap.find(&N)->second]; + assert(E && "Dead or null edge!"); + return E; + } + + Edge *lookup(Node &N) { + auto EI = EdgeIndexMap.find(&N); + if (EI == EdgeIndexMap.end()) + return nullptr; + auto &E = Edges[EI->second]; + return E ? &E : nullptr; + } + + call_iterator call_begin() { + return call_iterator(Edges.begin(), Edges.end()); + } + call_iterator call_end() { return call_iterator(Edges.end(), Edges.end()); } + + iterator_range<call_iterator> calls() { + return make_range(call_begin(), call_end()); + } + + bool empty() { + for (auto &E : Edges) + if (E) + return false; + + return true; + } + + private: + VectorT Edges; + DenseMap<Node *, int> EdgeIndexMap; + + EdgeSequence() = default; + + /// Internal helper to insert an edge to a node. + void insertEdgeInternal(Node &ChildN, Edge::Kind EK); + + /// Internal helper to change an edge kind. + void setEdgeKind(Node &ChildN, Edge::Kind EK); + + /// Internal helper to remove the edge to the given function. + bool removeEdgeInternal(Node &ChildN); + + /// Internal helper to replace an edge key with a new one. + /// + /// This should be used when the function for a particular node in the + /// graph gets replaced and we are updating all of the edges to that node + /// to use the new function as the key. + void replaceEdgeKey(Function &OldTarget, Function &NewTarget); + }; + + /// A node in the call graph. + /// + /// This represents a single node. It's primary roles are to cache the list of + /// callees, de-duplicate and provide fast testing of whether a function is + /// a callee, and facilitate iteration of child nodes in the graph. + /// + /// The node works much like an optional in order to lazily populate the + /// edges of each node. Until populated, there are no edges. Once populated, + /// you can access the edges by dereferencing the node or using the `->` + /// operator as if the node was an `Optional<EdgeSequence>`. + class Node { + friend class LazyCallGraph; + friend class LazyCallGraph::RefSCC; + + public: + LazyCallGraph &getGraph() const { return *G; } + + Function &getFunction() const { return *F; } + + StringRef getName() const { return F->getName(); } + + /// Equality is defined as address equality. + bool operator==(const Node &N) const { return this == &N; } + bool operator!=(const Node &N) const { return !operator==(N); } + + /// Tests whether the node has been populated with edges. + bool isPopulated() const { return Edges.hasValue(); } + + /// Tests whether this is actually a dead node and no longer valid. + /// + /// Users rarely interact with nodes in this state and other methods are + /// invalid. This is used to model a node in an edge list where the + /// function has been completely removed. + bool isDead() const { + assert(!G == !F && + "Both graph and function pointers should be null or non-null."); + return !G; + } + + // We allow accessing the edges by dereferencing or using the arrow + // operator, essentially wrapping the internal optional. + EdgeSequence &operator*() const { + // Rip const off because the node itself isn't changing here. + return const_cast<EdgeSequence &>(*Edges); + } + EdgeSequence *operator->() const { return &**this; } + + /// Populate the edges of this node if necessary. + /// + /// The first time this is called it will populate the edges for this node + /// in the graph. It does this by scanning the underlying function, so once + /// this is done, any changes to that function must be explicitly reflected + /// in updates to the graph. + /// + /// \returns the populated \c EdgeSequence to simplify walking it. + /// + /// This will not update or re-scan anything if called repeatedly. Instead, + /// the edge sequence is cached and returned immediately on subsequent + /// calls. + EdgeSequence &populate() { + if (Edges) + return *Edges; + + return populateSlow(); + } + + private: + LazyCallGraph *G; + Function *F; + + // We provide for the DFS numbering and Tarjan walk lowlink numbers to be + // stored directly within the node. These are both '-1' when nodes are part + // of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk. + int DFSNumber = 0; + int LowLink = 0; + + Optional<EdgeSequence> Edges; + + /// Basic constructor implements the scanning of F into Edges and + /// EdgeIndexMap. + Node(LazyCallGraph &G, Function &F) : G(&G), F(&F) {} + + /// Implementation of the scan when populating. + EdgeSequence &populateSlow(); + + /// Internal helper to directly replace the function with a new one. + /// + /// This is used to facilitate tranfsormations which need to replace the + /// formal Function object but directly move the body and users from one to + /// the other. + void replaceFunction(Function &NewF); + + void clear() { Edges.reset(); } + + /// Print the name of this node's function. + friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) { + return OS << N.F->getName(); + } + + /// Dump the name of this node's function to stderr. + void dump() const; + }; + + /// An SCC of the call graph. + /// + /// This represents a Strongly Connected Component of the direct call graph + /// -- ignoring indirect calls and function references. It stores this as + /// a collection of call graph nodes. While the order of nodes in the SCC is + /// stable, it is not any particular order. + /// + /// The SCCs are nested within a \c RefSCC, see below for details about that + /// outer structure. SCCs do not support mutation of the call graph, that + /// must be done through the containing \c RefSCC in order to fully reason + /// about the ordering and connections of the graph. + class SCC { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + RefSCC *OuterRefSCC; + SmallVector<Node *, 1> Nodes; + + template <typename NodeRangeT> + SCC(RefSCC &OuterRefSCC, NodeRangeT &&Nodes) + : OuterRefSCC(&OuterRefSCC), Nodes(std::forward<NodeRangeT>(Nodes)) {} + + void clear() { + OuterRefSCC = nullptr; + Nodes.clear(); + } + + /// Print a short descrtiption useful for debugging or logging. + /// + /// We print the function names in the SCC wrapped in '()'s and skipping + /// the middle functions if there are a large number. + // + // Note: this is defined inline to dodge issues with GCC's interpretation + // of enclosing namespaces for friend function declarations. + friend raw_ostream &operator<<(raw_ostream &OS, const SCC &C) { + OS << '('; + int i = 0; + for (LazyCallGraph::Node &N : C) { + if (i > 0) + OS << ", "; + // Elide the inner elements if there are too many. + if (i > 8) { + OS << "..., " << *C.Nodes.back(); + break; + } + OS << N; + ++i; + } + OS << ')'; + return OS; + } + + /// Dump a short description of this SCC to stderr. + void dump() const; + +#ifndef NDEBUG + /// Verify invariants about the SCC. + /// + /// This will attempt to validate all of the basic invariants within an + /// SCC, but not that it is a strongly connected componet per-se. Primarily + /// useful while building and updating the graph to check that basic + /// properties are in place rather than having inexplicable crashes later. + void verify(); +#endif + + public: + using iterator = pointee_iterator<SmallVectorImpl<Node *>::const_iterator>; + + iterator begin() const { return Nodes.begin(); } + iterator end() const { return Nodes.end(); } + + int size() const { return Nodes.size(); } + + RefSCC &getOuterRefSCC() const { return *OuterRefSCC; } + + /// Test if this SCC is a parent of \a C. + /// + /// Note that this is linear in the number of edges departing the current + /// SCC. + bool isParentOf(const SCC &C) const; + + /// Test if this SCC is an ancestor of \a C. + /// + /// Note that in the worst case this is linear in the number of edges + /// departing the current SCC and every SCC in the entire graph reachable + /// from this SCC. Thus this very well may walk every edge in the entire + /// call graph! Do not call this in a tight loop! + bool isAncestorOf(const SCC &C) const; + + /// Test if this SCC is a child of \a C. + /// + /// See the comments for \c isParentOf for detailed notes about the + /// complexity of this routine. + bool isChildOf(const SCC &C) const { return C.isParentOf(*this); } + + /// Test if this SCC is a descendant of \a C. + /// + /// See the comments for \c isParentOf for detailed notes about the + /// complexity of this routine. + bool isDescendantOf(const SCC &C) const { return C.isAncestorOf(*this); } + + /// Provide a short name by printing this SCC to a std::string. + /// + /// This copes with the fact that we don't have a name per-se for an SCC + /// while still making the use of this in debugging and logging useful. + std::string getName() const { + std::string Name; + raw_string_ostream OS(Name); + OS << *this; + OS.flush(); + return Name; + } + }; + + /// A RefSCC of the call graph. + /// + /// This models a Strongly Connected Component of function reference edges in + /// the call graph. As opposed to actual SCCs, these can be used to scope + /// subgraphs of the module which are independent from other subgraphs of the + /// module because they do not reference it in any way. This is also the unit + /// where we do mutation of the graph in order to restrict mutations to those + /// which don't violate this independence. + /// + /// A RefSCC contains a DAG of actual SCCs. All the nodes within the RefSCC + /// are necessarily within some actual SCC that nests within it. Since + /// a direct call *is* a reference, there will always be at least one RefSCC + /// around any SCC. + class RefSCC { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + LazyCallGraph *G; + + /// A postorder list of the inner SCCs. + SmallVector<SCC *, 4> SCCs; + + /// A map from SCC to index in the postorder list. + SmallDenseMap<SCC *, int, 4> SCCIndices; + + /// Fast-path constructor. RefSCCs should instead be constructed by calling + /// formRefSCCFast on the graph itself. + RefSCC(LazyCallGraph &G); + + void clear() { + SCCs.clear(); + SCCIndices.clear(); + } + + /// Print a short description useful for debugging or logging. + /// + /// We print the SCCs wrapped in '[]'s and skipping the middle SCCs if + /// there are a large number. + // + // Note: this is defined inline to dodge issues with GCC's interpretation + // of enclosing namespaces for friend function declarations. + friend raw_ostream &operator<<(raw_ostream &OS, const RefSCC &RC) { + OS << '['; + int i = 0; + for (LazyCallGraph::SCC &C : RC) { + if (i > 0) + OS << ", "; + // Elide the inner elements if there are too many. + if (i > 4) { + OS << "..., " << *RC.SCCs.back(); + break; + } + OS << C; + ++i; + } + OS << ']'; + return OS; + } + + /// Dump a short description of this RefSCC to stderr. + void dump() const; + +#ifndef NDEBUG + /// Verify invariants about the RefSCC and all its SCCs. + /// + /// This will attempt to validate all of the invariants *within* the + /// RefSCC, but not that it is a strongly connected component of the larger + /// graph. This makes it useful even when partially through an update. + /// + /// Invariants checked: + /// - SCCs and their indices match. + /// - The SCCs list is in fact in post-order. + void verify(); +#endif + + /// Handle any necessary parent set updates after inserting a trivial ref + /// or call edge. + void handleTrivialEdgeInsertion(Node &SourceN, Node &TargetN); + + public: + using iterator = pointee_iterator<SmallVectorImpl<SCC *>::const_iterator>; + using range = iterator_range<iterator>; + using parent_iterator = + pointee_iterator<SmallPtrSetImpl<RefSCC *>::const_iterator>; + + iterator begin() const { return SCCs.begin(); } + iterator end() const { return SCCs.end(); } + + ssize_t size() const { return SCCs.size(); } + + SCC &operator[](int Idx) { return *SCCs[Idx]; } + + iterator find(SCC &C) const { + return SCCs.begin() + SCCIndices.find(&C)->second; + } + + /// Test if this RefSCC is a parent of \a RC. + /// + /// CAUTION: This method walks every edge in the \c RefSCC, it can be very + /// expensive. + bool isParentOf(const RefSCC &RC) const; + + /// Test if this RefSCC is an ancestor of \a RC. + /// + /// CAUTION: This method walks the directed graph of edges as far as + /// necessary to find a possible path to the argument. In the worst case + /// this may walk the entire graph and can be extremely expensive. + bool isAncestorOf(const RefSCC &RC) const; + + /// Test if this RefSCC is a child of \a RC. + /// + /// CAUTION: This method walks every edge in the argument \c RefSCC, it can + /// be very expensive. + bool isChildOf(const RefSCC &RC) const { return RC.isParentOf(*this); } + + /// Test if this RefSCC is a descendant of \a RC. + /// + /// CAUTION: This method walks the directed graph of edges as far as + /// necessary to find a possible path from the argument. In the worst case + /// this may walk the entire graph and can be extremely expensive. + bool isDescendantOf(const RefSCC &RC) const { + return RC.isAncestorOf(*this); + } + + /// Provide a short name by printing this RefSCC to a std::string. + /// + /// This copes with the fact that we don't have a name per-se for an RefSCC + /// while still making the use of this in debugging and logging useful. + std::string getName() const { + std::string Name; + raw_string_ostream OS(Name); + OS << *this; + OS.flush(); + return Name; + } + + ///@{ + /// \name Mutation API + /// + /// These methods provide the core API for updating the call graph in the + /// presence of (potentially still in-flight) DFS-found RefSCCs and SCCs. + /// + /// Note that these methods sometimes have complex runtimes, so be careful + /// how you call them. + + /// Make an existing internal ref edge into a call edge. + /// + /// This may form a larger cycle and thus collapse SCCs into TargetN's SCC. + /// If that happens, the optional callback \p MergedCB will be invoked (if + /// provided) on the SCCs being merged away prior to actually performing + /// the merge. Note that this will never include the target SCC as that + /// will be the SCC functions are merged into to resolve the cycle. Once + /// this function returns, these merged SCCs are not in a valid state but + /// the pointers will remain valid until destruction of the parent graph + /// instance for the purpose of clearing cached information. This function + /// also returns 'true' if a cycle was formed and some SCCs merged away as + /// a convenience. + /// + /// After this operation, both SourceN's SCC and TargetN's SCC may move + /// position within this RefSCC's postorder list. Any SCCs merged are + /// merged into the TargetN's SCC in order to preserve reachability analyses + /// which took place on that SCC. + bool switchInternalEdgeToCall( + Node &SourceN, Node &TargetN, + function_ref<void(ArrayRef<SCC *> MergedSCCs)> MergeCB = {}); + + /// Make an existing internal call edge between separate SCCs into a ref + /// edge. + /// + /// If SourceN and TargetN in separate SCCs within this RefSCC, changing + /// the call edge between them to a ref edge is a trivial operation that + /// does not require any structural changes to the call graph. + void switchTrivialInternalEdgeToRef(Node &SourceN, Node &TargetN); + + /// Make an existing internal call edge within a single SCC into a ref + /// edge. + /// + /// Since SourceN and TargetN are part of a single SCC, this SCC may be + /// split up due to breaking a cycle in the call edges that formed it. If + /// that happens, then this routine will insert new SCCs into the postorder + /// list *before* the SCC of TargetN (previously the SCC of both). This + /// preserves postorder as the TargetN can reach all of the other nodes by + /// definition of previously being in a single SCC formed by the cycle from + /// SourceN to TargetN. + /// + /// The newly added SCCs are added *immediately* and contiguously + /// prior to the TargetN SCC and return the range covering the new SCCs in + /// the RefSCC's postorder sequence. You can directly iterate the returned + /// range to observe all of the new SCCs in postorder. + /// + /// Note that if SourceN and TargetN are in separate SCCs, the simpler + /// routine `switchTrivialInternalEdgeToRef` should be used instead. + iterator_range<iterator> switchInternalEdgeToRef(Node &SourceN, + Node &TargetN); + + /// Make an existing outgoing ref edge into a call edge. + /// + /// Note that this is trivial as there are no cyclic impacts and there + /// remains a reference edge. + void switchOutgoingEdgeToCall(Node &SourceN, Node &TargetN); + + /// Make an existing outgoing call edge into a ref edge. + /// + /// This is trivial as there are no cyclic impacts and there remains + /// a reference edge. + void switchOutgoingEdgeToRef(Node &SourceN, Node &TargetN); + + /// Insert a ref edge from one node in this RefSCC to another in this + /// RefSCC. + /// + /// This is always a trivial operation as it doesn't change any part of the + /// graph structure besides connecting the two nodes. + /// + /// Note that we don't support directly inserting internal *call* edges + /// because that could change the graph structure and requires returning + /// information about what became invalid. As a consequence, the pattern + /// should be to first insert the necessary ref edge, and then to switch it + /// to a call edge if needed and handle any invalidation that results. See + /// the \c switchInternalEdgeToCall routine for details. + void insertInternalRefEdge(Node &SourceN, Node &TargetN); + + /// Insert an edge whose parent is in this RefSCC and child is in some + /// child RefSCC. + /// + /// There must be an existing path from the \p SourceN to the \p TargetN. + /// This operation is inexpensive and does not change the set of SCCs and + /// RefSCCs in the graph. + void insertOutgoingEdge(Node &SourceN, Node &TargetN, Edge::Kind EK); + + /// Insert an edge whose source is in a descendant RefSCC and target is in + /// this RefSCC. + /// + /// There must be an existing path from the target to the source in this + /// case. + /// + /// NB! This is has the potential to be a very expensive function. It + /// inherently forms a cycle in the prior RefSCC DAG and we have to merge + /// RefSCCs to resolve that cycle. But finding all of the RefSCCs which + /// participate in the cycle can in the worst case require traversing every + /// RefSCC in the graph. Every attempt is made to avoid that, but passes + /// must still exercise caution calling this routine repeatedly. + /// + /// Also note that this can only insert ref edges. In order to insert + /// a call edge, first insert a ref edge and then switch it to a call edge. + /// These are intentionally kept as separate interfaces because each step + /// of the operation invalidates a different set of data structures. + /// + /// This returns all the RefSCCs which were merged into the this RefSCC + /// (the target's). This allows callers to invalidate any cached + /// information. + /// + /// FIXME: We could possibly optimize this quite a bit for cases where the + /// caller and callee are very nearby in the graph. See comments in the + /// implementation for details, but that use case might impact users. + SmallVector<RefSCC *, 1> insertIncomingRefEdge(Node &SourceN, + Node &TargetN); + + /// Remove an edge whose source is in this RefSCC and target is *not*. + /// + /// This removes an inter-RefSCC edge. All inter-RefSCC edges originating + /// from this SCC have been fully explored by any in-flight DFS graph + /// formation, so this is always safe to call once you have the source + /// RefSCC. + /// + /// This operation does not change the cyclic structure of the graph and so + /// is very inexpensive. It may change the connectivity graph of the SCCs + /// though, so be careful calling this while iterating over them. + void removeOutgoingEdge(Node &SourceN, Node &TargetN); + + /// Remove a list of ref edges which are entirely within this RefSCC. + /// + /// Both the \a SourceN and all of the \a TargetNs must be within this + /// RefSCC. Removing these edges may break cycles that form this RefSCC and + /// thus this operation may change the RefSCC graph significantly. In + /// particular, this operation will re-form new RefSCCs based on the + /// remaining connectivity of the graph. The following invariants are + /// guaranteed to hold after calling this method: + /// + /// 1) If a ref-cycle remains after removal, it leaves this RefSCC intact + /// and in the graph. No new RefSCCs are built. + /// 2) Otherwise, this RefSCC will be dead after this call and no longer in + /// the graph or the postorder traversal of the call graph. Any iterator + /// pointing at this RefSCC will become invalid. + /// 3) All newly formed RefSCCs will be returned and the order of the + /// RefSCCs returned will be a valid postorder traversal of the new + /// RefSCCs. + /// 4) No RefSCC other than this RefSCC has its member set changed (this is + /// inherent in the definition of removing such an edge). + /// + /// These invariants are very important to ensure that we can build + /// optimization pipelines on top of the CGSCC pass manager which + /// intelligently update the RefSCC graph without invalidating other parts + /// of the RefSCC graph. + /// + /// Note that we provide no routine to remove a *call* edge. Instead, you + /// must first switch it to a ref edge using \c switchInternalEdgeToRef. + /// This split API is intentional as each of these two steps can invalidate + /// a different aspect of the graph structure and needs to have the + /// invalidation handled independently. + /// + /// The runtime complexity of this method is, in the worst case, O(V+E) + /// where V is the number of nodes in this RefSCC and E is the number of + /// edges leaving the nodes in this RefSCC. Note that E includes both edges + /// within this RefSCC and edges from this RefSCC to child RefSCCs. Some + /// effort has been made to minimize the overhead of common cases such as + /// self-edges and edge removals which result in a spanning tree with no + /// more cycles. + SmallVector<RefSCC *, 1> removeInternalRefEdge(Node &SourceN, + ArrayRef<Node *> TargetNs); + + /// A convenience wrapper around the above to handle trivial cases of + /// inserting a new call edge. + /// + /// This is trivial whenever the target is in the same SCC as the source or + /// the edge is an outgoing edge to some descendant SCC. In these cases + /// there is no change to the cyclic structure of SCCs or RefSCCs. + /// + /// To further make calling this convenient, it also handles inserting + /// already existing edges. + void insertTrivialCallEdge(Node &SourceN, Node &TargetN); + + /// A convenience wrapper around the above to handle trivial cases of + /// inserting a new ref edge. + /// + /// This is trivial whenever the target is in the same RefSCC as the source + /// or the edge is an outgoing edge to some descendant RefSCC. In these + /// cases there is no change to the cyclic structure of the RefSCCs. + /// + /// To further make calling this convenient, it also handles inserting + /// already existing edges. + void insertTrivialRefEdge(Node &SourceN, Node &TargetN); + + /// Directly replace a node's function with a new function. + /// + /// This should be used when moving the body and users of a function to + /// a new formal function object but not otherwise changing the call graph + /// structure in any way. + /// + /// It requires that the old function in the provided node have zero uses + /// and the new function must have calls and references to it establishing + /// an equivalent graph. + void replaceNodeFunction(Node &N, Function &NewF); + + ///@} + }; + + /// A post-order depth-first RefSCC iterator over the call graph. + /// + /// This iterator walks the cached post-order sequence of RefSCCs. However, + /// it trades stability for flexibility. It is restricted to a forward + /// iterator but will survive mutations which insert new RefSCCs and continue + /// to point to the same RefSCC even if it moves in the post-order sequence. + class postorder_ref_scc_iterator + : public iterator_facade_base<postorder_ref_scc_iterator, + std::forward_iterator_tag, RefSCC> { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + /// Nonce type to select the constructor for the end iterator. + struct IsAtEndT {}; + + LazyCallGraph *G; + RefSCC *RC = nullptr; + + /// Build the begin iterator for a node. + postorder_ref_scc_iterator(LazyCallGraph &G) : G(&G), RC(getRC(G, 0)) {} + + /// Build the end iterator for a node. This is selected purely by overload. + postorder_ref_scc_iterator(LazyCallGraph &G, IsAtEndT /*Nonce*/) : G(&G) {} + + /// Get the post-order RefSCC at the given index of the postorder walk, + /// populating it if necessary. + static RefSCC *getRC(LazyCallGraph &G, int Index) { + if (Index == (int)G.PostOrderRefSCCs.size()) + // We're at the end. + return nullptr; + + return G.PostOrderRefSCCs[Index]; + } + + public: + bool operator==(const postorder_ref_scc_iterator &Arg) const { + return G == Arg.G && RC == Arg.RC; + } + + reference operator*() const { return *RC; } + + using iterator_facade_base::operator++; + postorder_ref_scc_iterator &operator++() { + assert(RC && "Cannot increment the end iterator!"); + RC = getRC(*G, G->RefSCCIndices.find(RC)->second + 1); + return *this; + } + }; + + /// Construct a graph for the given module. + /// + /// This sets up the graph and computes all of the entry points of the graph. + /// No function definitions are scanned until their nodes in the graph are + /// requested during traversal. + LazyCallGraph(Module &M, TargetLibraryInfo &TLI); + + LazyCallGraph(LazyCallGraph &&G); + LazyCallGraph &operator=(LazyCallGraph &&RHS); + + EdgeSequence::iterator begin() { return EntryEdges.begin(); } + EdgeSequence::iterator end() { return EntryEdges.end(); } + + void buildRefSCCs(); + + postorder_ref_scc_iterator postorder_ref_scc_begin() { + if (!EntryEdges.empty()) + assert(!PostOrderRefSCCs.empty() && + "Must form RefSCCs before iterating them!"); + return postorder_ref_scc_iterator(*this); + } + postorder_ref_scc_iterator postorder_ref_scc_end() { + if (!EntryEdges.empty()) + assert(!PostOrderRefSCCs.empty() && + "Must form RefSCCs before iterating them!"); + return postorder_ref_scc_iterator(*this, + postorder_ref_scc_iterator::IsAtEndT()); + } + + iterator_range<postorder_ref_scc_iterator> postorder_ref_sccs() { + return make_range(postorder_ref_scc_begin(), postorder_ref_scc_end()); + } + + /// Lookup a function in the graph which has already been scanned and added. + Node *lookup(const Function &F) const { return NodeMap.lookup(&F); } + + /// Lookup a function's SCC in the graph. + /// + /// \returns null if the function hasn't been assigned an SCC via the RefSCC + /// iterator walk. + SCC *lookupSCC(Node &N) const { return SCCMap.lookup(&N); } + + /// Lookup a function's RefSCC in the graph. + /// + /// \returns null if the function hasn't been assigned a RefSCC via the + /// RefSCC iterator walk. + RefSCC *lookupRefSCC(Node &N) const { + if (SCC *C = lookupSCC(N)) + return &C->getOuterRefSCC(); + + return nullptr; + } + + /// Get a graph node for a given function, scanning it to populate the graph + /// data as necessary. + Node &get(Function &F) { + Node *&N = NodeMap[&F]; + if (N) + return *N; + + return insertInto(F, N); + } + + /// Get the sequence of known and defined library functions. + /// + /// These functions, because they are known to LLVM, can have calls + /// introduced out of thin air from arbitrary IR. + ArrayRef<Function *> getLibFunctions() const { + return LibFunctions.getArrayRef(); + } + + /// Test whether a function is a known and defined library function tracked by + /// the call graph. + /// + /// Because these functions are known to LLVM they are specially modeled in + /// the call graph and even when all IR-level references have been removed + /// remain active and reachable. + bool isLibFunction(Function &F) const { return LibFunctions.count(&F); } + + ///@{ + /// \name Pre-SCC Mutation API + /// + /// These methods are only valid to call prior to forming any SCCs for this + /// call graph. They can be used to update the core node-graph during + /// a node-based inorder traversal that precedes any SCC-based traversal. + /// + /// Once you begin manipulating a call graph's SCCs, most mutation of the + /// graph must be performed via a RefSCC method. There are some exceptions + /// below. + + /// Update the call graph after inserting a new edge. + void insertEdge(Node &SourceN, Node &TargetN, Edge::Kind EK); + + /// Update the call graph after inserting a new edge. + void insertEdge(Function &Source, Function &Target, Edge::Kind EK) { + return insertEdge(get(Source), get(Target), EK); + } + + /// Update the call graph after deleting an edge. + void removeEdge(Node &SourceN, Node &TargetN); + + /// Update the call graph after deleting an edge. + void removeEdge(Function &Source, Function &Target) { + return removeEdge(get(Source), get(Target)); + } + + ///@} + + ///@{ + /// \name General Mutation API + /// + /// There are a very limited set of mutations allowed on the graph as a whole + /// once SCCs have started to be formed. These routines have strict contracts + /// but may be called at any point. + + /// Remove a dead function from the call graph (typically to delete it). + /// + /// Note that the function must have an empty use list, and the call graph + /// must be up-to-date prior to calling this. That means it is by itself in + /// a maximal SCC which is by itself in a maximal RefSCC, etc. No structural + /// changes result from calling this routine other than potentially removing + /// entry points into the call graph. + /// + /// If SCC formation has begun, this function must not be part of the current + /// DFS in order to call this safely. Typically, the function will have been + /// fully visited by the DFS prior to calling this routine. + void removeDeadFunction(Function &F); + + ///@} + + ///@{ + /// \name Static helpers for code doing updates to the call graph. + /// + /// These helpers are used to implement parts of the call graph but are also + /// useful to code doing updates or otherwise wanting to walk the IR in the + /// same patterns as when we build the call graph. + + /// Recursively visits the defined functions whose address is reachable from + /// every constant in the \p Worklist. + /// + /// Doesn't recurse through any constants already in the \p Visited set, and + /// updates that set with every constant visited. + /// + /// For each defined function, calls \p Callback with that function. + template <typename CallbackT> + static void visitReferences(SmallVectorImpl<Constant *> &Worklist, + SmallPtrSetImpl<Constant *> &Visited, + CallbackT Callback) { + while (!Worklist.empty()) { + Constant *C = Worklist.pop_back_val(); + + if (Function *F = dyn_cast<Function>(C)) { + if (!F->isDeclaration()) + Callback(*F); + continue; + } + + if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { + // The blockaddress constant expression is a weird special case, we + // can't generically walk its operands the way we do for all other + // constants. + if (Visited.insert(BA->getFunction()).second) + Worklist.push_back(BA->getFunction()); + continue; + } + + for (Value *Op : C->operand_values()) + if (Visited.insert(cast<Constant>(Op)).second) + Worklist.push_back(cast<Constant>(Op)); + } + } + + ///@} + +private: + using node_stack_iterator = SmallVectorImpl<Node *>::reverse_iterator; + using node_stack_range = iterator_range<node_stack_iterator>; + + /// Allocator that holds all the call graph nodes. + SpecificBumpPtrAllocator<Node> BPA; + + /// Maps function->node for fast lookup. + DenseMap<const Function *, Node *> NodeMap; + + /// The entry edges into the graph. + /// + /// These edges are from "external" sources. Put another way, they + /// escape at the module scope. + EdgeSequence EntryEdges; + + /// Allocator that holds all the call graph SCCs. + SpecificBumpPtrAllocator<SCC> SCCBPA; + + /// Maps Function -> SCC for fast lookup. + DenseMap<Node *, SCC *> SCCMap; + + /// Allocator that holds all the call graph RefSCCs. + SpecificBumpPtrAllocator<RefSCC> RefSCCBPA; + + /// The post-order sequence of RefSCCs. + /// + /// This list is lazily formed the first time we walk the graph. + SmallVector<RefSCC *, 16> PostOrderRefSCCs; + + /// A map from RefSCC to the index for it in the postorder sequence of + /// RefSCCs. + DenseMap<RefSCC *, int> RefSCCIndices; + + /// Defined functions that are also known library functions which the + /// optimizer can reason about and therefore might introduce calls to out of + /// thin air. + SmallSetVector<Function *, 4> LibFunctions; + + /// Helper to insert a new function, with an already looked-up entry in + /// the NodeMap. + Node &insertInto(Function &F, Node *&MappedN); + + /// Helper to update pointers back to the graph object during moves. + void updateGraphPtrs(); + + /// Allocates an SCC and constructs it using the graph allocator. + /// + /// The arguments are forwarded to the constructor. + template <typename... Ts> SCC *createSCC(Ts &&... Args) { + return new (SCCBPA.Allocate()) SCC(std::forward<Ts>(Args)...); + } + + /// Allocates a RefSCC and constructs it using the graph allocator. + /// + /// The arguments are forwarded to the constructor. + template <typename... Ts> RefSCC *createRefSCC(Ts &&... Args) { + return new (RefSCCBPA.Allocate()) RefSCC(std::forward<Ts>(Args)...); + } + + /// Common logic for building SCCs from a sequence of roots. + /// + /// This is a very generic implementation of the depth-first walk and SCC + /// formation algorithm. It uses a generic sequence of roots and generic + /// callbacks for each step. This is designed to be used to implement both + /// the RefSCC formation and SCC formation with shared logic. + /// + /// Currently this is a relatively naive implementation of Tarjan's DFS + /// algorithm to form the SCCs. + /// + /// FIXME: We should consider newer variants such as Nuutila. + template <typename RootsT, typename GetBeginT, typename GetEndT, + typename GetNodeT, typename FormSCCCallbackT> + static void buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin, + GetEndT &&GetEnd, GetNodeT &&GetNode, + FormSCCCallbackT &&FormSCC); + + /// Build the SCCs for a RefSCC out of a list of nodes. + void buildSCCs(RefSCC &RC, node_stack_range Nodes); + + /// Get the index of a RefSCC within the postorder traversal. + /// + /// Requires that this RefSCC is a valid one in the (perhaps partial) + /// postorder traversed part of the graph. + int getRefSCCIndex(RefSCC &RC) { + auto IndexIt = RefSCCIndices.find(&RC); + assert(IndexIt != RefSCCIndices.end() && "RefSCC doesn't have an index!"); + assert(PostOrderRefSCCs[IndexIt->second] == &RC && + "Index does not point back at RC!"); + return IndexIt->second; + } +}; + +inline LazyCallGraph::Edge::Edge() : Value() {} +inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {} + +inline LazyCallGraph::Edge::operator bool() const { + return Value.getPointer() && !Value.getPointer()->isDead(); +} + +inline LazyCallGraph::Edge::Kind LazyCallGraph::Edge::getKind() const { + assert(*this && "Queried a null edge!"); + return Value.getInt(); +} + +inline bool LazyCallGraph::Edge::isCall() const { + assert(*this && "Queried a null edge!"); + return getKind() == Call; +} + +inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode() const { + assert(*this && "Queried a null edge!"); + return *Value.getPointer(); +} + +inline Function &LazyCallGraph::Edge::getFunction() const { + assert(*this && "Queried a null edge!"); + return getNode().getFunction(); +} + +// Provide GraphTraits specializations for call graphs. +template <> struct GraphTraits<LazyCallGraph::Node *> { + using NodeRef = LazyCallGraph::Node *; + using ChildIteratorType = LazyCallGraph::EdgeSequence::iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); } + static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); } +}; +template <> struct GraphTraits<LazyCallGraph *> { + using NodeRef = LazyCallGraph::Node *; + using ChildIteratorType = LazyCallGraph::EdgeSequence::iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); } + static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); } +}; + +/// An analysis pass which computes the call graph for a module. +class LazyCallGraphAnalysis : public AnalysisInfoMixin<LazyCallGraphAnalysis> { + friend AnalysisInfoMixin<LazyCallGraphAnalysis>; + + static AnalysisKey Key; + +public: + /// Inform generic clients of the result type. + using Result = LazyCallGraph; + + /// Compute the \c LazyCallGraph for the module \c M. + /// + /// This just builds the set of entry points to the call graph. The rest is + /// built lazily as it is walked. + LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) { + return LazyCallGraph(M, AM.getResult<TargetLibraryAnalysis>(M)); + } +}; + +/// A pass which prints the call graph to a \c raw_ostream. +/// +/// This is primarily useful for testing the analysis. +class LazyCallGraphPrinterPass + : public PassInfoMixin<LazyCallGraphPrinterPass> { + raw_ostream &OS; + +public: + explicit LazyCallGraphPrinterPass(raw_ostream &OS); + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +/// A pass which prints the call graph as a DOT file to a \c raw_ostream. +/// +/// This is primarily useful for visualization purposes. +class LazyCallGraphDOTPrinterPass + : public PassInfoMixin<LazyCallGraphDOTPrinterPass> { + raw_ostream &OS; + +public: + explicit LazyCallGraphDOTPrinterPass(raw_ostream &OS); + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_LAZYCALLGRAPH_H diff --git a/clang-r353983e/include/llvm/Analysis/LazyValueInfo.h b/clang-r353983e/include/llvm/Analysis/LazyValueInfo.h new file mode 100644 index 00000000..570a5044 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LazyValueInfo.h @@ -0,0 +1,166 @@ +//===- LazyValueInfo.h - Value constraint analysis --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for lazy computation of value constraint +// information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LAZYVALUEINFO_H +#define LLVM_ANALYSIS_LAZYVALUEINFO_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + class AssumptionCache; + class Constant; + class ConstantRange; + class DataLayout; + class DominatorTree; + class Instruction; + class TargetLibraryInfo; + class Value; + +/// This pass computes, caches, and vends lazy value constraint information. +class LazyValueInfo { + friend class LazyValueInfoWrapperPass; + AssumptionCache *AC = nullptr; + const DataLayout *DL = nullptr; + class TargetLibraryInfo *TLI = nullptr; + DominatorTree *DT = nullptr; + void *PImpl = nullptr; + LazyValueInfo(const LazyValueInfo&) = delete; + void operator=(const LazyValueInfo&) = delete; +public: + ~LazyValueInfo(); + LazyValueInfo() {} + LazyValueInfo(AssumptionCache *AC_, const DataLayout *DL_, TargetLibraryInfo *TLI_, + DominatorTree *DT_) + : AC(AC_), DL(DL_), TLI(TLI_), DT(DT_) {} + LazyValueInfo(LazyValueInfo &&Arg) + : AC(Arg.AC), DL(Arg.DL), TLI(Arg.TLI), DT(Arg.DT), PImpl(Arg.PImpl) { + Arg.PImpl = nullptr; + } + LazyValueInfo &operator=(LazyValueInfo &&Arg) { + releaseMemory(); + AC = Arg.AC; + DL = Arg.DL; + TLI = Arg.TLI; + DT = Arg.DT; + PImpl = Arg.PImpl; + Arg.PImpl = nullptr; + return *this; + } + + /// This is used to return true/false/dunno results. + enum Tristate { + Unknown = -1, False = 0, True = 1 + }; + + // Public query interface. + + /// Determine whether the specified value comparison with a constant is known + /// to be true or false on the specified CFG edge. + /// Pred is a CmpInst predicate. + Tristate getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, + BasicBlock *FromBB, BasicBlock *ToBB, + Instruction *CxtI = nullptr); + + /// Determine whether the specified value comparison with a constant is known + /// to be true or false at the specified instruction + /// (from an assume intrinsic). Pred is a CmpInst predicate. + Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C, + Instruction *CxtI); + + /// Determine whether the specified value is known to be a + /// constant at the end of the specified block. Return null if not. + Constant *getConstant(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr); + + /// Return the ConstantRange constraint that is known to hold for the + /// specified value at the end of the specified block. This may only be called + /// on integer-typed Values. + ConstantRange getConstantRange(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr); + + /// Determine whether the specified value is known to be a + /// constant on the specified edge. Return null if not. + Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, + Instruction *CxtI = nullptr); + + /// Return the ConstantRage constraint that is known to hold for the + /// specified value on the specified edge. This may be only be called + /// on integer-typed Values. + ConstantRange getConstantRangeOnEdge(Value *V, BasicBlock *FromBB, + BasicBlock *ToBB, + Instruction *CxtI = nullptr); + + /// Inform the analysis cache that we have threaded an edge from + /// PredBB to OldSucc to be from PredBB to NewSucc instead. + void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc); + + /// Inform the analysis cache that we have erased a block. + void eraseBlock(BasicBlock *BB); + + /// Print the \LazyValueInfo Analysis. + /// We pass in the DTree that is required for identifying which basic blocks + /// we can solve/print for, in the LVIPrinter. The DT is optional + /// in LVI, so we need to pass it here as an argument. + void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS); + + /// Disables use of the DominatorTree within LVI. + void disableDT(); + + /// Enables use of the DominatorTree within LVI. Does nothing if the class + /// instance was initialized without a DT pointer. + void enableDT(); + + // For old PM pass. Delete once LazyValueInfoWrapperPass is gone. + void releaseMemory(); + + /// Handle invalidation events in the new pass manager. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); +}; + +/// Analysis to compute lazy value information. +class LazyValueAnalysis : public AnalysisInfoMixin<LazyValueAnalysis> { +public: + typedef LazyValueInfo Result; + Result run(Function &F, FunctionAnalysisManager &FAM); + +private: + static AnalysisKey Key; + friend struct AnalysisInfoMixin<LazyValueAnalysis>; +}; + +/// Wrapper around LazyValueInfo. +class LazyValueInfoWrapperPass : public FunctionPass { + LazyValueInfoWrapperPass(const LazyValueInfoWrapperPass&) = delete; + void operator=(const LazyValueInfoWrapperPass&) = delete; +public: + static char ID; + LazyValueInfoWrapperPass() : FunctionPass(ID) { + initializeLazyValueInfoWrapperPassPass(*PassRegistry::getPassRegistry()); + } + ~LazyValueInfoWrapperPass() override { + assert(!Info.PImpl && "releaseMemory not called"); + } + + LazyValueInfo &getLVI(); + + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + bool runOnFunction(Function &F) override; +private: + LazyValueInfo Info; +}; + +} // end namespace llvm + +#endif + diff --git a/clang-r353983e/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/clang-r353983e/include/llvm/Analysis/LegacyDivergenceAnalysis.h new file mode 100644 index 00000000..0a338b81 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -0,0 +1,68 @@ +//===- llvm/Analysis/LegacyDivergenceAnalysis.h - KernelDivergence Analysis -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The kernel divergence analysis is an LLVM pass which can be used to find out +// if a branch instruction in a GPU program (kernel) is divergent or not. It can help +// branch optimizations such as jump threading and loop unswitching to make +// better decisions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H +#define LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/DivergenceAnalysis.h" + +namespace llvm { +class Value; +class GPUDivergenceAnalysis; +class LegacyDivergenceAnalysis : public FunctionPass { +public: + static char ID; + + LegacyDivergenceAnalysis() : FunctionPass(ID) { + initializeLegacyDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnFunction(Function &F) override; + + // Print all divergent branches in the function. + void print(raw_ostream &OS, const Module *) const override; + + // Returns true if V is divergent at its definition. + // + // Even if this function returns false, V may still be divergent when used + // in a different basic block. + bool isDivergent(const Value *V) const; + + // Returns true if V is uniform/non-divergent. + // + // Even if this function returns true, V may still be divergent when used + // in a different basic block. + bool isUniform(const Value *V) const { return !isDivergent(V); } + + // Keep the analysis results uptodate by removing an erased value. + void removeValue(const Value *V) { DivergentValues.erase(V); } + +private: + // Whether analysis should be performed by GPUDivergenceAnalysis. + bool shouldUseGPUDivergenceAnalysis(const Function &F) const; + + // (optional) handle to new DivergenceAnalysis + std::unique_ptr<GPUDivergenceAnalysis> gpuDA; + + // Stores all divergent values. + DenseSet<const Value *> DivergentValues; +}; +} // End llvm namespace + +#endif //LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/Lint.h b/clang-r353983e/include/llvm/Analysis/Lint.h new file mode 100644 index 00000000..0fea81e2 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/Lint.h @@ -0,0 +1,48 @@ +//===-- llvm/Analysis/Lint.h - LLVM IR Lint ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines lint interfaces that can be used for some sanity checking +// of input to the system, and for checking that transformations +// haven't done something bad. In contrast to the Verifier, the Lint checker +// checks for undefined behavior or constructions with likely unintended +// behavior. +// +// To see what specifically is checked, look at Lint.cpp +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LINT_H +#define LLVM_ANALYSIS_LINT_H + +namespace llvm { + +class FunctionPass; +class Module; +class Function; + +/// Create a lint pass. +/// +/// Check a module or function. +FunctionPass *createLintPass(); + +/// Check a module. +/// +/// This should only be used for debugging, because it plays games with +/// PassManagers and stuff. +void lintModule( + const Module &M ///< The module to be checked +); + +// lintFunction - Check a function. +void lintFunction( + const Function &F ///< The function to be checked +); + +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/Loads.h b/clang-r353983e/include/llvm/Analysis/Loads.h new file mode 100644 index 00000000..04401162 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/Loads.h @@ -0,0 +1,129 @@ +//===- Loads.h - Local load analysis --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares simple local analyses for load instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOADS_H +#define LLVM_ANALYSIS_LOADS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Support/CommandLine.h" + +namespace llvm { + +class DataLayout; +class MDNode; + +/// Return true if this is always a dereferenceable pointer. If the context +/// instruction is specified perform context-sensitive analysis and return true +/// if the pointer is dereferenceable at the specified instruction. +bool isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr); + +/// Returns true if V is always a dereferenceable pointer with alignment +/// greater or equal than requested. If the context instruction is specified +/// performs context-sensitive analysis and returns true if the pointer is +/// dereferenceable at the specified instruction. +bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const DataLayout &DL, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr); + +/// Returns true if V is always dereferenceable for Size byte with alignment +/// greater or equal than requested. If the context instruction is specified +/// performs context-sensitive analysis and returns true if the pointer is +/// dereferenceable at the specified instruction. +bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const APInt &Size, const DataLayout &DL, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr); + +/// Return true if we know that executing a load from this value cannot trap. +/// +/// If DT and ScanFrom are specified this method performs context-sensitive +/// analysis and returns true if it is safe to load immediately before ScanFrom. +/// +/// If it is not obviously safe to load from the specified pointer, we do a +/// quick local scan of the basic block containing ScanFrom, to determine if +/// the address is already accessed. +bool isSafeToLoadUnconditionally(Value *V, unsigned Align, + const DataLayout &DL, + Instruction *ScanFrom = nullptr, + const DominatorTree *DT = nullptr); + +/// The default number of maximum instructions to scan in the block, used by +/// FindAvailableLoadedValue(). +extern cl::opt<unsigned> DefMaxInstsToScan; + +/// Scan backwards to see if we have the value of the given load available +/// locally within a small number of instructions. +/// +/// You can use this function to scan across multiple blocks: after you call +/// this function, if ScanFrom points at the beginning of the block, it's safe +/// to continue scanning the predecessors. +/// +/// Note that performing load CSE requires special care to make sure the +/// metadata is set appropriately. In particular, aliasing metadata needs +/// to be merged. (This doesn't matter for store-to-load forwarding because +/// the only relevant load gets deleted.) +/// +/// \param Load The load we want to replace. +/// \param ScanBB The basic block to scan. +/// \param [in,out] ScanFrom The location to start scanning from. When this +/// function returns, it points at the last instruction scanned. +/// \param MaxInstsToScan The maximum number of instructions to scan. If this +/// is zero, the whole block will be scanned. +/// \param AA Optional pointer to alias analysis, to make the scan more +/// precise. +/// \param [out] IsLoadCSE Whether the returned value is a load from the same +/// location in memory, as opposed to the value operand of a store. +/// +/// \returns The found value, or nullptr if no value is found. +Value *FindAvailableLoadedValue(LoadInst *Load, + BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan = DefMaxInstsToScan, + AliasAnalysis *AA = nullptr, + bool *IsLoadCSE = nullptr, + unsigned *NumScanedInst = nullptr); + +/// Scan backwards to see if we have the value of the given pointer available +/// locally within a small number of instructions. +/// +/// You can use this function to scan across multiple blocks: after you call +/// this function, if ScanFrom points at the beginning of the block, it's safe +/// to continue scanning the predecessors. +/// +/// \param Ptr The pointer we want the load and store to originate from. +/// \param AccessTy The access type of the pointer. +/// \param AtLeastAtomic Are we looking for at-least an atomic load/store ? In +/// case it is false, we can return an atomic or non-atomic load or store. In +/// case it is true, we need to return an atomic load or store. +/// \param ScanBB The basic block to scan. +/// \param [in,out] ScanFrom The location to start scanning from. When this +/// function returns, it points at the last instruction scanned. +/// \param MaxInstsToScan The maximum number of instructions to scan. If this +/// is zero, the whole block will be scanned. +/// \param AA Optional pointer to alias analysis, to make the scan more +/// precise. +/// \param [out] IsLoad Whether the returned value is a load from the same +/// location in memory, as opposed to the value operand of a store. +/// +/// \returns The found value, or nullptr if no value is found. +Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, + BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, AliasAnalysis *AA, + bool *IsLoad, unsigned *NumScanedInst); +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/LoopAccessAnalysis.h b/clang-r353983e/include/llvm/Analysis/LoopAccessAnalysis.h new file mode 100644 index 00000000..fa34afaf --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LoopAccessAnalysis.h @@ -0,0 +1,784 @@ +//===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the loop memory dependence framework that +// was originally developed for the Loop Vectorizer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H +#define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H + +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class Value; +class DataLayout; +class ScalarEvolution; +class Loop; +class SCEV; +class SCEVUnionPredicate; +class LoopAccessInfo; +class OptimizationRemarkEmitter; + +/// Collection of parameters shared beetween the Loop Vectorizer and the +/// Loop Access Analysis. +struct VectorizerParams { + /// Maximum SIMD width. + static const unsigned MaxVectorWidth; + + /// VF as overridden by the user. + static unsigned VectorizationFactor; + /// Interleave factor as overridden by the user. + static unsigned VectorizationInterleave; + /// True if force-vector-interleave was specified by the user. + static bool isInterleaveForced(); + + /// \When performing memory disambiguation checks at runtime do not + /// make more than this number of comparisons. + static unsigned RuntimeMemoryCheckThreshold; +}; + +/// Checks memory dependences among accesses to the same underlying +/// object to determine whether there vectorization is legal or not (and at +/// which vectorization factor). +/// +/// Note: This class will compute a conservative dependence for access to +/// different underlying pointers. Clients, such as the loop vectorizer, will +/// sometimes deal these potential dependencies by emitting runtime checks. +/// +/// We use the ScalarEvolution framework to symbolically evalutate access +/// functions pairs. Since we currently don't restructure the loop we can rely +/// on the program order of memory accesses to determine their safety. +/// At the moment we will only deem accesses as safe for: +/// * A negative constant distance assuming program order. +/// +/// Safe: tmp = a[i + 1]; OR a[i + 1] = x; +/// a[i] = tmp; y = a[i]; +/// +/// The latter case is safe because later checks guarantuee that there can't +/// be a cycle through a phi node (that is, we check that "x" and "y" is not +/// the same variable: a header phi can only be an induction or a reduction, a +/// reduction can't have a memory sink, an induction can't have a memory +/// source). This is important and must not be violated (or we have to +/// resort to checking for cycles through memory). +/// +/// * A positive constant distance assuming program order that is bigger +/// than the biggest memory access. +/// +/// tmp = a[i] OR b[i] = x +/// a[i+2] = tmp y = b[i+2]; +/// +/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively. +/// +/// * Zero distances and all accesses have the same size. +/// +class MemoryDepChecker { +public: + typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; + typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; + /// Set of potential dependent memory accesses. + typedef EquivalenceClasses<MemAccessInfo> DepCandidates; + + /// Type to keep track of the status of the dependence check. The order of + /// the elements is important and has to be from most permissive to least + /// permissive. + enum class VectorizationSafetyStatus { + // Can vectorize safely without RT checks. All dependences are known to be + // safe. + Safe, + // Can possibly vectorize with RT checks to overcome unknown dependencies. + PossiblySafeWithRtChecks, + // Cannot vectorize due to known unsafe dependencies. + Unsafe, + }; + + /// Dependece between memory access instructions. + struct Dependence { + /// The type of the dependence. + enum DepType { + // No dependence. + NoDep, + // We couldn't determine the direction or the distance. + Unknown, + // Lexically forward. + // + // FIXME: If we only have loop-independent forward dependences (e.g. a + // read and write of A[i]), LAA will locally deem the dependence "safe" + // without querying the MemoryDepChecker. Therefore we can miss + // enumerating loop-independent forward dependences in + // getDependences. Note that as soon as there are different + // indices used to access the same array, the MemoryDepChecker *is* + // queried and the dependence list is complete. + Forward, + // Forward, but if vectorized, is likely to prevent store-to-load + // forwarding. + ForwardButPreventsForwarding, + // Lexically backward. + Backward, + // Backward, but the distance allows a vectorization factor of + // MaxSafeDepDistBytes. + BackwardVectorizable, + // Same, but may prevent store-to-load forwarding. + BackwardVectorizableButPreventsForwarding + }; + + /// String version of the types. + static const char *DepName[]; + + /// Index of the source of the dependence in the InstMap vector. + unsigned Source; + /// Index of the destination of the dependence in the InstMap vector. + unsigned Destination; + /// The type of the dependence. + DepType Type; + + Dependence(unsigned Source, unsigned Destination, DepType Type) + : Source(Source), Destination(Destination), Type(Type) {} + + /// Return the source instruction of the dependence. + Instruction *getSource(const LoopAccessInfo &LAI) const; + /// Return the destination instruction of the dependence. + Instruction *getDestination(const LoopAccessInfo &LAI) const; + + /// Dependence types that don't prevent vectorization. + static VectorizationSafetyStatus isSafeForVectorization(DepType Type); + + /// Lexically forward dependence. + bool isForward() const; + /// Lexically backward dependence. + bool isBackward() const; + + /// May be a lexically backward dependence type (includes Unknown). + bool isPossiblyBackward() const; + + /// Print the dependence. \p Instr is used to map the instruction + /// indices to instructions. + void print(raw_ostream &OS, unsigned Depth, + const SmallVectorImpl<Instruction *> &Instrs) const; + }; + + MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) + : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U), + FoundNonConstantDistanceDependence(false), + Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {} + + /// Register the location (instructions are given increasing numbers) + /// of a write access. + void addAccess(StoreInst *SI) { + Value *Ptr = SI->getPointerOperand(); + Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx); + InstMap.push_back(SI); + ++AccessIdx; + } + + /// Register the location (instructions are given increasing numbers) + /// of a write access. + void addAccess(LoadInst *LI) { + Value *Ptr = LI->getPointerOperand(); + Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx); + InstMap.push_back(LI); + ++AccessIdx; + } + + /// Check whether the dependencies between the accesses are safe. + /// + /// Only checks sets with elements in \p CheckDeps. + bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, + const ValueToValueMap &Strides); + + /// No memory dependence was encountered that would inhibit + /// vectorization. + bool isSafeForVectorization() const { + return Status == VectorizationSafetyStatus::Safe; + } + + /// The maximum number of bytes of a vector register we can vectorize + /// the accesses safely with. + uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } + + /// Return the number of elements that are safe to operate on + /// simultaneously, multiplied by the size of the element in bits. + uint64_t getMaxSafeRegisterWidth() const { return MaxSafeRegisterWidth; } + + /// In same cases when the dependency check fails we can still + /// vectorize the loop with a dynamic array access check. + bool shouldRetryWithRuntimeCheck() const { + return FoundNonConstantDistanceDependence && + Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks; + } + + /// Returns the memory dependences. If null is returned we exceeded + /// the MaxDependences threshold and this information is not + /// available. + const SmallVectorImpl<Dependence> *getDependences() const { + return RecordDependences ? &Dependences : nullptr; + } + + void clearDependences() { Dependences.clear(); } + + /// The vector of memory access instructions. The indices are used as + /// instruction identifiers in the Dependence class. + const SmallVectorImpl<Instruction *> &getMemoryInstructions() const { + return InstMap; + } + + /// Generate a mapping between the memory instructions and their + /// indices according to program order. + DenseMap<Instruction *, unsigned> generateInstructionOrderMap() const { + DenseMap<Instruction *, unsigned> OrderMap; + + for (unsigned I = 0; I < InstMap.size(); ++I) + OrderMap[InstMap[I]] = I; + + return OrderMap; + } + + /// Find the set of instructions that read or write via \p Ptr. + SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr, + bool isWrite) const; + +private: + /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and + /// applies dynamic knowledge to simplify SCEV expressions and convert them + /// to a more usable form. We need this in case assumptions about SCEV + /// expressions need to be made in order to avoid unknown dependences. For + /// example we might assume a unit stride for a pointer in order to prove + /// that a memory access is strided and doesn't wrap. + PredicatedScalarEvolution &PSE; + const Loop *InnermostLoop; + + /// Maps access locations (ptr, read/write) to program order. + DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses; + + /// Memory access instructions in program order. + SmallVector<Instruction *, 16> InstMap; + + /// The program order index to be used for the next instruction. + unsigned AccessIdx; + + // We can access this many bytes in parallel safely. + uint64_t MaxSafeDepDistBytes; + + /// Number of elements (from consecutive iterations) that are safe to + /// operate on simultaneously, multiplied by the size of the element in bits. + /// The size of the element is taken from the memory access that is most + /// restrictive. + uint64_t MaxSafeRegisterWidth; + + /// If we see a non-constant dependence distance we can still try to + /// vectorize this loop with runtime checks. + bool FoundNonConstantDistanceDependence; + + /// Result of the dependence checks, indicating whether the checked + /// dependences are safe for vectorization, require RT checks or are known to + /// be unsafe. + VectorizationSafetyStatus Status; + + //// True if Dependences reflects the dependences in the + //// loop. If false we exceeded MaxDependences and + //// Dependences is invalid. + bool RecordDependences; + + /// Memory dependences collected during the analysis. Only valid if + /// RecordDependences is true. + SmallVector<Dependence, 8> Dependences; + + /// Check whether there is a plausible dependence between the two + /// accesses. + /// + /// Access \p A must happen before \p B in program order. The two indices + /// identify the index into the program order map. + /// + /// This function checks whether there is a plausible dependence (or the + /// absence of such can't be proved) between the two accesses. If there is a + /// plausible dependence but the dependence distance is bigger than one + /// element access it records this distance in \p MaxSafeDepDistBytes (if this + /// distance is smaller than any other distance encountered so far). + /// Otherwise, this function returns true signaling a possible dependence. + Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx, + const ValueToValueMap &Strides); + + /// Check whether the data dependence could prevent store-load + /// forwarding. + /// + /// \return false if we shouldn't vectorize at all or avoid larger + /// vectorization factors by limiting MaxSafeDepDistBytes. + bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize); + + /// Updates the current safety status with \p S. We can go from Safe to + /// either PossiblySafeWithRtChecks or Unsafe and from + /// PossiblySafeWithRtChecks to Unsafe. + void mergeInStatus(VectorizationSafetyStatus S); +}; + +/// Holds information about the memory runtime legality checks to verify +/// that a group of pointers do not overlap. +class RuntimePointerChecking { +public: + struct PointerInfo { + /// Holds the pointer value that we need to check. + TrackingVH<Value> PointerValue; + /// Holds the smallest byte address accessed by the pointer throughout all + /// iterations of the loop. + const SCEV *Start; + /// Holds the largest byte address accessed by the pointer throughout all + /// iterations of the loop, plus 1. + const SCEV *End; + /// Holds the information if this pointer is used for writing to memory. + bool IsWritePtr; + /// Holds the id of the set of pointers that could be dependent because of a + /// shared underlying object. + unsigned DependencySetId; + /// Holds the id of the disjoint alias set to which this pointer belongs. + unsigned AliasSetId; + /// SCEV for the access. + const SCEV *Expr; + + PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End, + bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId, + const SCEV *Expr) + : PointerValue(PointerValue), Start(Start), End(End), + IsWritePtr(IsWritePtr), DependencySetId(DependencySetId), + AliasSetId(AliasSetId), Expr(Expr) {} + }; + + RuntimePointerChecking(ScalarEvolution *SE) : Need(false), SE(SE) {} + + /// Reset the state of the pointer runtime information. + void reset() { + Need = false; + Pointers.clear(); + Checks.clear(); + } + + /// Insert a pointer and calculate the start and end SCEVs. + /// We need \p PSE in order to compute the SCEV expression of the pointer + /// according to the assumptions that we've made during the analysis. + /// The method might also version the pointer stride according to \p Strides, + /// and add new predicates to \p PSE. + void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, + unsigned ASId, const ValueToValueMap &Strides, + PredicatedScalarEvolution &PSE); + + /// No run-time memory checking is necessary. + bool empty() const { return Pointers.empty(); } + + /// A grouping of pointers. A single memcheck is required between + /// two groups. + struct CheckingPtrGroup { + /// Create a new pointer checking group containing a single + /// pointer, with index \p Index in RtCheck. + CheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck) + : RtCheck(RtCheck), High(RtCheck.Pointers[Index].End), + Low(RtCheck.Pointers[Index].Start) { + Members.push_back(Index); + } + + /// Tries to add the pointer recorded in RtCheck at index + /// \p Index to this pointer checking group. We can only add a pointer + /// to a checking group if we will still be able to get + /// the upper and lower bounds of the check. Returns true in case + /// of success, false otherwise. + bool addPointer(unsigned Index); + + /// Constitutes the context of this pointer checking group. For each + /// pointer that is a member of this group we will retain the index + /// at which it appears in RtCheck. + RuntimePointerChecking &RtCheck; + /// The SCEV expression which represents the upper bound of all the + /// pointers in this group. + const SCEV *High; + /// The SCEV expression which represents the lower bound of all the + /// pointers in this group. + const SCEV *Low; + /// Indices of all the pointers that constitute this grouping. + SmallVector<unsigned, 2> Members; + }; + + /// A memcheck which made up of a pair of grouped pointers. + /// + /// These *have* to be const for now, since checks are generated from + /// CheckingPtrGroups in LAI::addRuntimeChecks which is a const member + /// function. FIXME: once check-generation is moved inside this class (after + /// the PtrPartition hack is removed), we could drop const. + typedef std::pair<const CheckingPtrGroup *, const CheckingPtrGroup *> + PointerCheck; + + /// Generate the checks and store it. This also performs the grouping + /// of pointers to reduce the number of memchecks necessary. + void generateChecks(MemoryDepChecker::DepCandidates &DepCands, + bool UseDependencies); + + /// Returns the checks that generateChecks created. + const SmallVector<PointerCheck, 4> &getChecks() const { return Checks; } + + /// Decide if we need to add a check between two groups of pointers, + /// according to needsChecking. + bool needsChecking(const CheckingPtrGroup &M, + const CheckingPtrGroup &N) const; + + /// Returns the number of run-time checks required according to + /// needsChecking. + unsigned getNumberOfChecks() const { return Checks.size(); } + + /// Print the list run-time memory checks necessary. + void print(raw_ostream &OS, unsigned Depth = 0) const; + + /// Print \p Checks. + void printChecks(raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks, + unsigned Depth = 0) const; + + /// This flag indicates if we need to add the runtime check. + bool Need; + + /// Information about the pointers that may require checking. + SmallVector<PointerInfo, 2> Pointers; + + /// Holds a partitioning of pointers into "check groups". + SmallVector<CheckingPtrGroup, 2> CheckingGroups; + + /// Check if pointers are in the same partition + /// + /// \p PtrToPartition contains the partition number for pointers (-1 if the + /// pointer belongs to multiple partitions). + static bool + arePointersInSamePartition(const SmallVectorImpl<int> &PtrToPartition, + unsigned PtrIdx1, unsigned PtrIdx2); + + /// Decide whether we need to issue a run-time check for pointer at + /// index \p I and \p J to prove their independence. + bool needsChecking(unsigned I, unsigned J) const; + + /// Return PointerInfo for pointer at index \p PtrIdx. + const PointerInfo &getPointerInfo(unsigned PtrIdx) const { + return Pointers[PtrIdx]; + } + +private: + /// Groups pointers such that a single memcheck is required + /// between two different groups. This will clear the CheckingGroups vector + /// and re-compute it. We will only group dependecies if \p UseDependencies + /// is true, otherwise we will create a separate group for each pointer. + void groupChecks(MemoryDepChecker::DepCandidates &DepCands, + bool UseDependencies); + + /// Generate the checks and return them. + SmallVector<PointerCheck, 4> + generateChecks() const; + + /// Holds a pointer to the ScalarEvolution analysis. + ScalarEvolution *SE; + + /// Set of run-time checks required to establish independence of + /// otherwise may-aliasing pointers in the loop. + SmallVector<PointerCheck, 4> Checks; +}; + +/// Drive the analysis of memory accesses in the loop +/// +/// This class is responsible for analyzing the memory accesses of a loop. It +/// collects the accesses and then its main helper the AccessAnalysis class +/// finds and categorizes the dependences in buildDependenceSets. +/// +/// For memory dependences that can be analyzed at compile time, it determines +/// whether the dependence is part of cycle inhibiting vectorization. This work +/// is delegated to the MemoryDepChecker class. +/// +/// For memory dependences that cannot be determined at compile time, it +/// generates run-time checks to prove independence. This is done by +/// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the +/// RuntimePointerCheck class. +/// +/// If pointers can wrap or can't be expressed as affine AddRec expressions by +/// ScalarEvolution, we will generate run-time checks by emitting a +/// SCEVUnionPredicate. +/// +/// Checks for both memory dependences and the SCEV predicates contained in the +/// PSE must be emitted in order for the results of this analysis to be valid. +class LoopAccessInfo { +public: + LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, + AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI); + + /// Return true we can analyze the memory accesses in the loop and there are + /// no memory dependence cycles. + bool canVectorizeMemory() const { return CanVecMem; } + + const RuntimePointerChecking *getRuntimePointerChecking() const { + return PtrRtChecking.get(); + } + + /// Number of memchecks required to prove independence of otherwise + /// may-alias pointers. + unsigned getNumRuntimePointerChecks() const { + return PtrRtChecking->getNumberOfChecks(); + } + + /// Return true if the block BB needs to be predicated in order for the loop + /// to be vectorized. + static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, + DominatorTree *DT); + + /// Returns true if the value V is uniform within the loop. + bool isUniform(Value *V) const; + + uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; } + unsigned getNumStores() const { return NumStores; } + unsigned getNumLoads() const { return NumLoads;} + + /// Add code that checks at runtime if the accessed arrays overlap. + /// + /// Returns a pair of instructions where the first element is the first + /// instruction generated in possibly a sequence of instructions and the + /// second value is the final comparator value or NULL if no check is needed. + std::pair<Instruction *, Instruction *> + addRuntimeChecks(Instruction *Loc) const; + + /// Generete the instructions for the checks in \p PointerChecks. + /// + /// Returns a pair of instructions where the first element is the first + /// instruction generated in possibly a sequence of instructions and the + /// second value is the final comparator value or NULL if no check is needed. + std::pair<Instruction *, Instruction *> + addRuntimeChecks(Instruction *Loc, + const SmallVectorImpl<RuntimePointerChecking::PointerCheck> + &PointerChecks) const; + + /// The diagnostics report generated for the analysis. E.g. why we + /// couldn't analyze the loop. + const OptimizationRemarkAnalysis *getReport() const { return Report.get(); } + + /// the Memory Dependence Checker which can determine the + /// loop-independent and loop-carried dependences between memory accesses. + const MemoryDepChecker &getDepChecker() const { return *DepChecker; } + + /// Return the list of instructions that use \p Ptr to read or write + /// memory. + SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr, + bool isWrite) const { + return DepChecker->getInstructionsForAccess(Ptr, isWrite); + } + + /// If an access has a symbolic strides, this maps the pointer value to + /// the stride symbol. + const ValueToValueMap &getSymbolicStrides() const { return SymbolicStrides; } + + /// Pointer has a symbolic stride. + bool hasStride(Value *V) const { return StrideSet.count(V); } + + /// Print the information about the memory accesses in the loop. + void print(raw_ostream &OS, unsigned Depth = 0) const; + + /// If the loop has memory dependence involving an invariant address, i.e. two + /// stores or a store and a load, then return true, else return false. + bool hasDependenceInvolvingLoopInvariantAddress() const { + return HasDependenceInvolvingLoopInvariantAddress; + } + + /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts + /// them to a more usable form. All SCEV expressions during the analysis + /// should be re-written (and therefore simplified) according to PSE. + /// A user of LoopAccessAnalysis will need to emit the runtime checks + /// associated with this predicate. + const PredicatedScalarEvolution &getPSE() const { return *PSE; } + +private: + /// Analyze the loop. + void analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, + const TargetLibraryInfo *TLI, DominatorTree *DT); + + /// Check if the structure of the loop allows it to be analyzed by this + /// pass. + bool canAnalyzeLoop(); + + /// Save the analysis remark. + /// + /// LAA does not directly emits the remarks. Instead it stores it which the + /// client can retrieve and presents as its own analysis + /// (e.g. -Rpass-analysis=loop-vectorize). + OptimizationRemarkAnalysis &recordAnalysis(StringRef RemarkName, + Instruction *Instr = nullptr); + + /// Collect memory access with loop invariant strides. + /// + /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop + /// invariant. + void collectStridedAccess(Value *LoadOrStoreInst); + + std::unique_ptr<PredicatedScalarEvolution> PSE; + + /// We need to check that all of the pointers in this list are disjoint + /// at runtime. Using std::unique_ptr to make using move ctor simpler. + std::unique_ptr<RuntimePointerChecking> PtrRtChecking; + + /// the Memory Dependence Checker which can determine the + /// loop-independent and loop-carried dependences between memory accesses. + std::unique_ptr<MemoryDepChecker> DepChecker; + + Loop *TheLoop; + + unsigned NumLoads; + unsigned NumStores; + + uint64_t MaxSafeDepDistBytes; + + /// Cache the result of analyzeLoop. + bool CanVecMem; + + /// Indicator that there are non vectorizable stores to a uniform address. + bool HasDependenceInvolvingLoopInvariantAddress; + + /// The diagnostics report generated for the analysis. E.g. why we + /// couldn't analyze the loop. + std::unique_ptr<OptimizationRemarkAnalysis> Report; + + /// If an access has a symbolic strides, this maps the pointer value to + /// the stride symbol. + ValueToValueMap SymbolicStrides; + + /// Set of symbolic strides values. + SmallPtrSet<Value *, 8> StrideSet; +}; + +Value *stripIntegerCast(Value *V); + +/// Return the SCEV corresponding to a pointer with the symbolic stride +/// replaced with constant one, assuming the SCEV predicate associated with +/// \p PSE is true. +/// +/// If necessary this method will version the stride of the pointer according +/// to \p PtrToStride and therefore add further predicates to \p PSE. +/// +/// If \p OrigPtr is not null, use it to look up the stride value instead of \p +/// Ptr. \p PtrToStride provides the mapping between the pointer value and its +/// stride as collected by LoopVectorizationLegality::collectStridedAccess. +const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, + const ValueToValueMap &PtrToStride, + Value *Ptr, Value *OrigPtr = nullptr); + +/// If the pointer has a constant stride return it in units of its +/// element size. Otherwise return zero. +/// +/// Ensure that it does not wrap in the address space, assuming the predicate +/// associated with \p PSE is true. +/// +/// If necessary this method will version the stride of the pointer according +/// to \p PtrToStride and therefore add further predicates to \p PSE. +/// The \p Assume parameter indicates if we are allowed to make additional +/// run-time assumptions. +int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap = ValueToValueMap(), + bool Assume = false, bool ShouldCheckWrap = true); + +/// Attempt to sort the pointers in \p VL and return the sorted indices +/// in \p SortedIndices, if reordering is required. +/// +/// Returns 'true' if sorting is legal, otherwise returns 'false'. +/// +/// For example, for a given \p VL of memory accesses in program order, a[i+4], +/// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the +/// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and +/// saves the mask for actual memory accesses in program order in +/// \p SortedIndices as <1,2,0,3> +bool sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, + ScalarEvolution &SE, + SmallVectorImpl<unsigned> &SortedIndices); + +/// Returns true if the memory operations \p A and \p B are consecutive. +/// This is a simple API that does not depend on the analysis pass. +bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, + ScalarEvolution &SE, bool CheckType = true); + +/// This analysis provides dependence information for the memory accesses +/// of a loop. +/// +/// It runs the analysis for a loop on demand. This can be initiated by +/// querying the loop access info via LAA::getInfo. getInfo return a +/// LoopAccessInfo object. See this class for the specifics of what information +/// is provided. +class LoopAccessLegacyAnalysis : public FunctionPass { +public: + static char ID; + + LoopAccessLegacyAnalysis() : FunctionPass(ID) { + initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Query the result of the loop access information for the loop \p L. + /// + /// If there is no cached result available run the analysis. + const LoopAccessInfo &getInfo(Loop *L); + + void releaseMemory() override { + // Invalidate the cache when the pass is freed. + LoopAccessInfoMap.clear(); + } + + /// Print the result of the analysis when invoked with -analyze. + void print(raw_ostream &OS, const Module *M = nullptr) const override; + +private: + /// The cache. + DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap; + + // The used analysis passes. + ScalarEvolution *SE; + const TargetLibraryInfo *TLI; + AliasAnalysis *AA; + DominatorTree *DT; + LoopInfo *LI; +}; + +/// This analysis provides dependence information for the memory +/// accesses of a loop. +/// +/// It runs the analysis for a loop on demand. This can be initiated by +/// querying the loop access info via AM.getResult<LoopAccessAnalysis>. +/// getResult return a LoopAccessInfo object. See this class for the +/// specifics of what information is provided. +class LoopAccessAnalysis + : public AnalysisInfoMixin<LoopAccessAnalysis> { + friend AnalysisInfoMixin<LoopAccessAnalysis>; + static AnalysisKey Key; + +public: + typedef LoopAccessInfo Result; + + Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR); +}; + +inline Instruction *MemoryDepChecker::Dependence::getSource( + const LoopAccessInfo &LAI) const { + return LAI.getDepChecker().getMemoryInstructions()[Source]; +} + +inline Instruction *MemoryDepChecker::Dependence::getDestination( + const LoopAccessInfo &LAI) const { + return LAI.getDepChecker().getMemoryInstructions()[Destination]; +} + +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/LoopAnalysisManager.h b/clang-r353983e/include/llvm/Analysis/LoopAnalysisManager.h new file mode 100644 index 00000000..b87d981e --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LoopAnalysisManager.h @@ -0,0 +1,159 @@ +//===- LoopAnalysisManager.h - Loop analysis management ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This header provides classes for managing per-loop analyses. These are +/// typically used as part of a loop pass pipeline over the loop nests of +/// a function. +/// +/// Loop analyses are allowed to make some simplifying assumptions: +/// 1) Loops are, where possible, in simplified form. +/// 2) Loops are *always* in LCSSA form. +/// 3) A collection of analysis results are available: +/// - LoopInfo +/// - DominatorTree +/// - ScalarEvolution +/// - AAManager +/// +/// The primary mechanism to provide these invariants is the loop pass manager, +/// but they can also be manually provided in order to reason about a loop from +/// outside of a dedicated pass manager. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPANALYSISMANAGER_H +#define LLVM_ANALYSIS_LOOPANALYSISMANAGER_H + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/PriorityWorklist.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// The adaptor from a function pass to a loop pass computes these analyses and +/// makes them available to the loop passes "for free". Each loop pass is +/// expected expected to update these analyses if necessary to ensure they're +/// valid after it runs. +struct LoopStandardAnalysisResults { + AAResults &AA; + AssumptionCache &AC; + DominatorTree &DT; + LoopInfo &LI; + ScalarEvolution &SE; + TargetLibraryInfo &TLI; + TargetTransformInfo &TTI; + MemorySSA *MSSA; +}; + +/// Enables memory ssa as a dependency for loop passes. +extern cl::opt<bool> EnableMSSALoopDependency; + +/// Extern template declaration for the analysis set for this IR unit. +extern template class AllAnalysesOn<Loop>; + +extern template class AnalysisManager<Loop, LoopStandardAnalysisResults &>; +/// The loop analysis manager. +/// +/// See the documentation for the AnalysisManager template for detail +/// documentation. This typedef serves as a convenient way to refer to this +/// construct in the adaptors and proxies used to integrate this into the larger +/// pass manager infrastructure. +typedef AnalysisManager<Loop, LoopStandardAnalysisResults &> + LoopAnalysisManager; + +/// A proxy from a \c LoopAnalysisManager to a \c Function. +typedef InnerAnalysisManagerProxy<LoopAnalysisManager, Function> + LoopAnalysisManagerFunctionProxy; + +/// A specialized result for the \c LoopAnalysisManagerFunctionProxy which +/// retains a \c LoopInfo reference. +/// +/// This allows it to collect loop objects for which analysis results may be +/// cached in the \c LoopAnalysisManager. +template <> class LoopAnalysisManagerFunctionProxy::Result { +public: + explicit Result(LoopAnalysisManager &InnerAM, LoopInfo &LI) + : InnerAM(&InnerAM), LI(&LI) {} + Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI) { + // We have to null out the analysis manager in the moved-from state + // because we are taking ownership of the responsibilty to clear the + // analysis state. + Arg.InnerAM = nullptr; + } + Result &operator=(Result &&RHS) { + InnerAM = RHS.InnerAM; + LI = RHS.LI; + // We have to null out the analysis manager in the moved-from state + // because we are taking ownership of the responsibilty to clear the + // analysis state. + RHS.InnerAM = nullptr; + return *this; + } + ~Result() { + // InnerAM is cleared in a moved from state where there is nothing to do. + if (!InnerAM) + return; + + // Clear out the analysis manager if we're being destroyed -- it means we + // didn't even see an invalidate call when we got invalidated. + InnerAM->clear(); + } + + /// Accessor for the analysis manager. + LoopAnalysisManager &getManager() { return *InnerAM; } + + /// Handler for invalidation of the proxy for a particular function. + /// + /// If the proxy, \c LoopInfo, and associated analyses are preserved, this + /// will merely forward the invalidation event to any cached loop analysis + /// results for loops within this function. + /// + /// If the necessary loop infrastructure is not preserved, this will forcibly + /// clear all of the cached analysis results that are keyed on the \c + /// LoopInfo for this function. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + +private: + LoopAnalysisManager *InnerAM; + LoopInfo *LI; +}; + +/// Provide a specialized run method for the \c LoopAnalysisManagerFunctionProxy +/// so it can pass the \c LoopInfo to the result. +template <> +LoopAnalysisManagerFunctionProxy::Result +LoopAnalysisManagerFunctionProxy::run(Function &F, FunctionAnalysisManager &AM); + +// Ensure the \c LoopAnalysisManagerFunctionProxy is provided as an extern +// template. +extern template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>; + +extern template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop, + LoopStandardAnalysisResults &>; +/// A proxy from a \c FunctionAnalysisManager to a \c Loop. +typedef OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop, + LoopStandardAnalysisResults &> + FunctionAnalysisManagerLoopProxy; + +/// Returns the minimum set of Analyses that all loop passes must preserve. +PreservedAnalyses getLoopPassPreservedAnalyses(); +} + +#endif // LLVM_ANALYSIS_LOOPANALYSISMANAGER_H diff --git a/clang-r353983e/include/llvm/Analysis/LoopInfo.h b/clang-r353983e/include/llvm/Analysis/LoopInfo.h new file mode 100644 index 00000000..0899630f --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LoopInfo.h @@ -0,0 +1,1039 @@ +//===- llvm/Analysis/LoopInfo.h - Natural Loop Calculator -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoopInfo class that is used to identify natural loops +// and determine the loop depth of various nodes of the CFG. A natural loop +// has exactly one entry-point, which is called the header. Note that natural +// loops may actually be several loops that share the same header node. +// +// This analysis calculates the nesting structure of loops in a function. For +// each natural loop identified, this analysis identifies natural loops +// contained entirely within the loop and the basic blocks the make up the loop. +// +// It can calculate on the fly various bits of information, for example: +// +// * whether there is a preheader for the loop +// * the number of back edges to the header +// * whether or not a particular block branches out of the loop +// * the successor blocks of the loop +// * the loop depth +// * etc... +// +// Note that this analysis specifically identifies *Loops* not cycles or SCCs +// in the CFG. There can be strongly connected components in the CFG which +// this analysis will not recognize and that will not be represented by a Loop +// instance. In particular, a Loop might be inside such a non-loop SCC, or a +// non-loop SCC might contain a sub-SCC which is a Loop. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPINFO_H +#define LLVM_ANALYSIS_LOOPINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include <algorithm> +#include <utility> + +namespace llvm { + +class DominatorTree; +class LoopInfo; +class Loop; +class MDNode; +class PHINode; +class raw_ostream; +template <class N, bool IsPostDom> class DominatorTreeBase; +template <class N, class M> class LoopInfoBase; +template <class N, class M> class LoopBase; + +//===----------------------------------------------------------------------===// +/// Instances of this class are used to represent loops that are detected in the +/// flow graph. +/// +template <class BlockT, class LoopT> class LoopBase { + LoopT *ParentLoop; + // Loops contained entirely within this one. + std::vector<LoopT *> SubLoops; + + // The list of blocks in this loop. First entry is the header node. + std::vector<BlockT *> Blocks; + + SmallPtrSet<const BlockT *, 8> DenseBlockSet; + +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + /// Indicator that this loop is no longer a valid loop. + bool IsInvalid = false; +#endif + + LoopBase(const LoopBase<BlockT, LoopT> &) = delete; + const LoopBase<BlockT, LoopT> & + operator=(const LoopBase<BlockT, LoopT> &) = delete; + +public: + /// Return the nesting level of this loop. An outer-most loop has depth 1, + /// for consistency with loop depth values used for basic blocks, where depth + /// 0 is used for blocks not inside any loops. + unsigned getLoopDepth() const { + assert(!isInvalid() && "Loop not in a valid state!"); + unsigned D = 1; + for (const LoopT *CurLoop = ParentLoop; CurLoop; + CurLoop = CurLoop->ParentLoop) + ++D; + return D; + } + BlockT *getHeader() const { return getBlocks().front(); } + LoopT *getParentLoop() const { return ParentLoop; } + + /// This is a raw interface for bypassing addChildLoop. + void setParentLoop(LoopT *L) { + assert(!isInvalid() && "Loop not in a valid state!"); + ParentLoop = L; + } + + /// Return true if the specified loop is contained within in this loop. + bool contains(const LoopT *L) const { + assert(!isInvalid() && "Loop not in a valid state!"); + if (L == this) + return true; + if (!L) + return false; + return contains(L->getParentLoop()); + } + + /// Return true if the specified basic block is in this loop. + bool contains(const BlockT *BB) const { + assert(!isInvalid() && "Loop not in a valid state!"); + return DenseBlockSet.count(BB); + } + + /// Return true if the specified instruction is in this loop. + template <class InstT> bool contains(const InstT *Inst) const { + return contains(Inst->getParent()); + } + + /// Return the loops contained entirely within this loop. + const std::vector<LoopT *> &getSubLoops() const { + assert(!isInvalid() && "Loop not in a valid state!"); + return SubLoops; + } + std::vector<LoopT *> &getSubLoopsVector() { + assert(!isInvalid() && "Loop not in a valid state!"); + return SubLoops; + } + typedef typename std::vector<LoopT *>::const_iterator iterator; + typedef + typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator; + iterator begin() const { return getSubLoops().begin(); } + iterator end() const { return getSubLoops().end(); } + reverse_iterator rbegin() const { return getSubLoops().rbegin(); } + reverse_iterator rend() const { return getSubLoops().rend(); } + bool empty() const { return getSubLoops().empty(); } + + /// Get a list of the basic blocks which make up this loop. + ArrayRef<BlockT *> getBlocks() const { + assert(!isInvalid() && "Loop not in a valid state!"); + return Blocks; + } + typedef typename ArrayRef<BlockT *>::const_iterator block_iterator; + block_iterator block_begin() const { return getBlocks().begin(); } + block_iterator block_end() const { return getBlocks().end(); } + inline iterator_range<block_iterator> blocks() const { + assert(!isInvalid() && "Loop not in a valid state!"); + return make_range(block_begin(), block_end()); + } + + /// Get the number of blocks in this loop in constant time. + /// Invalidate the loop, indicating that it is no longer a loop. + unsigned getNumBlocks() const { + assert(!isInvalid() && "Loop not in a valid state!"); + return Blocks.size(); + } + + /// Return a direct, mutable handle to the blocks vector so that we can + /// mutate it efficiently with techniques like `std::remove`. + std::vector<BlockT *> &getBlocksVector() { + assert(!isInvalid() && "Loop not in a valid state!"); + return Blocks; + } + /// Return a direct, mutable handle to the blocks set so that we can + /// mutate it efficiently. + SmallPtrSetImpl<const BlockT *> &getBlocksSet() { + assert(!isInvalid() && "Loop not in a valid state!"); + return DenseBlockSet; + } + + /// Return a direct, immutable handle to the blocks set. + const SmallPtrSetImpl<const BlockT *> &getBlocksSet() const { + assert(!isInvalid() && "Loop not in a valid state!"); + return DenseBlockSet; + } + + /// Return true if this loop is no longer valid. The only valid use of this + /// helper is "assert(L.isInvalid())" or equivalent, since IsInvalid is set to + /// true by the destructor. In other words, if this accessor returns true, + /// the caller has already triggered UB by calling this accessor; and so it + /// can only be called in a context where a return value of true indicates a + /// programmer error. + bool isInvalid() const { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + return IsInvalid; +#else + return false; +#endif + } + + /// True if terminator in the block can branch to another block that is + /// outside of the current loop. + bool isLoopExiting(const BlockT *BB) const { + assert(!isInvalid() && "Loop not in a valid state!"); + for (const auto &Succ : children<const BlockT *>(BB)) { + if (!contains(Succ)) + return true; + } + return false; + } + + /// Returns true if \p BB is a loop-latch. + /// A latch block is a block that contains a branch back to the header. + /// This function is useful when there are multiple latches in a loop + /// because \fn getLoopLatch will return nullptr in that case. + bool isLoopLatch(const BlockT *BB) const { + assert(!isInvalid() && "Loop not in a valid state!"); + assert(contains(BB) && "block does not belong to the loop"); + + BlockT *Header = getHeader(); + auto PredBegin = GraphTraits<Inverse<BlockT *>>::child_begin(Header); + auto PredEnd = GraphTraits<Inverse<BlockT *>>::child_end(Header); + return std::find(PredBegin, PredEnd, BB) != PredEnd; + } + + /// Calculate the number of back edges to the loop header. + unsigned getNumBackEdges() const { + assert(!isInvalid() && "Loop not in a valid state!"); + unsigned NumBackEdges = 0; + BlockT *H = getHeader(); + + for (const auto Pred : children<Inverse<BlockT *>>(H)) + if (contains(Pred)) + ++NumBackEdges; + + return NumBackEdges; + } + + //===--------------------------------------------------------------------===// + // APIs for simple analysis of the loop. + // + // Note that all of these methods can fail on general loops (ie, there may not + // be a preheader, etc). For best success, the loop simplification and + // induction variable canonicalization pass should be used to normalize loops + // for easy analysis. These methods assume canonical loops. + + /// Return all blocks inside the loop that have successors outside of the + /// loop. These are the blocks _inside of the current loop_ which branch out. + /// The returned list is always unique. + void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const; + + /// If getExitingBlocks would return exactly one block, return that block. + /// Otherwise return null. + BlockT *getExitingBlock() const; + + /// Return all of the successor blocks of this loop. These are the blocks + /// _outside of the current loop_ which are branched to. + void getExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const; + + /// If getExitBlocks would return exactly one block, return that block. + /// Otherwise return null. + BlockT *getExitBlock() const; + + /// Return true if no exit block for the loop has a predecessor that is + /// outside the loop. + bool hasDedicatedExits() const; + + /// Return all unique successor blocks of this loop. + /// These are the blocks _outside of the current loop_ which are branched to. + /// This assumes that loop exits are in canonical form, i.e. all exits are + /// dedicated exits. + void getUniqueExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const; + + /// If getUniqueExitBlocks would return exactly one block, return that block. + /// Otherwise return null. + BlockT *getUniqueExitBlock() const; + + /// Edge type. + typedef std::pair<const BlockT *, const BlockT *> Edge; + + /// Return all pairs of (_inside_block_,_outside_block_). + void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const; + + /// If there is a preheader for this loop, return it. A loop has a preheader + /// if there is only one edge to the header of the loop from outside of the + /// loop. If this is the case, the block branching to the header of the loop + /// is the preheader node. + /// + /// This method returns null if there is no preheader for the loop. + BlockT *getLoopPreheader() const; + + /// If the given loop's header has exactly one unique predecessor outside the + /// loop, return it. Otherwise return null. + /// This is less strict that the loop "preheader" concept, which requires + /// the predecessor to have exactly one successor. + BlockT *getLoopPredecessor() const; + + /// If there is a single latch block for this loop, return it. + /// A latch block is a block that contains a branch back to the header. + BlockT *getLoopLatch() const; + + /// Return all loop latch blocks of this loop. A latch block is a block that + /// contains a branch back to the header. + void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const { + assert(!isInvalid() && "Loop not in a valid state!"); + BlockT *H = getHeader(); + for (const auto Pred : children<Inverse<BlockT *>>(H)) + if (contains(Pred)) + LoopLatches.push_back(Pred); + } + + //===--------------------------------------------------------------------===// + // APIs for updating loop information after changing the CFG + // + + /// This method is used by other analyses to update loop information. + /// NewBB is set to be a new member of the current loop. + /// Because of this, it is added as a member of all parent loops, and is added + /// to the specified LoopInfo object as being in the current basic block. It + /// is not valid to replace the loop header with this method. + void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LI); + + /// This is used when splitting loops up. It replaces the OldChild entry in + /// our children list with NewChild, and updates the parent pointer of + /// OldChild to be null and the NewChild to be this loop. + /// This updates the loop depth of the new child. + void replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild); + + /// Add the specified loop to be a child of this loop. + /// This updates the loop depth of the new child. + void addChildLoop(LoopT *NewChild) { + assert(!isInvalid() && "Loop not in a valid state!"); + assert(!NewChild->ParentLoop && "NewChild already has a parent!"); + NewChild->ParentLoop = static_cast<LoopT *>(this); + SubLoops.push_back(NewChild); + } + + /// This removes the specified child from being a subloop of this loop. The + /// loop is not deleted, as it will presumably be inserted into another loop. + LoopT *removeChildLoop(iterator I) { + assert(!isInvalid() && "Loop not in a valid state!"); + assert(I != SubLoops.end() && "Cannot remove end iterator!"); + LoopT *Child = *I; + assert(Child->ParentLoop == this && "Child is not a child of this loop!"); + SubLoops.erase(SubLoops.begin() + (I - begin())); + Child->ParentLoop = nullptr; + return Child; + } + + /// This removes the specified child from being a subloop of this loop. The + /// loop is not deleted, as it will presumably be inserted into another loop. + LoopT *removeChildLoop(LoopT *Child) { + return removeChildLoop(llvm::find(*this, Child)); + } + + /// This adds a basic block directly to the basic block list. + /// This should only be used by transformations that create new loops. Other + /// transformations should use addBasicBlockToLoop. + void addBlockEntry(BlockT *BB) { + assert(!isInvalid() && "Loop not in a valid state!"); + Blocks.push_back(BB); + DenseBlockSet.insert(BB); + } + + /// interface to reverse Blocks[from, end of loop] in this loop + void reverseBlock(unsigned from) { + assert(!isInvalid() && "Loop not in a valid state!"); + std::reverse(Blocks.begin() + from, Blocks.end()); + } + + /// interface to do reserve() for Blocks + void reserveBlocks(unsigned size) { + assert(!isInvalid() && "Loop not in a valid state!"); + Blocks.reserve(size); + } + + /// This method is used to move BB (which must be part of this loop) to be the + /// loop header of the loop (the block that dominates all others). + void moveToHeader(BlockT *BB) { + assert(!isInvalid() && "Loop not in a valid state!"); + if (Blocks[0] == BB) + return; + for (unsigned i = 0;; ++i) { + assert(i != Blocks.size() && "Loop does not contain BB!"); + if (Blocks[i] == BB) { + Blocks[i] = Blocks[0]; + Blocks[0] = BB; + return; + } + } + } + + /// This removes the specified basic block from the current loop, updating the + /// Blocks as appropriate. This does not update the mapping in the LoopInfo + /// class. + void removeBlockFromLoop(BlockT *BB) { + assert(!isInvalid() && "Loop not in a valid state!"); + auto I = find(Blocks, BB); + assert(I != Blocks.end() && "N is not in this list!"); + Blocks.erase(I); + + DenseBlockSet.erase(BB); + } + + /// Verify loop structure + void verifyLoop() const; + + /// Verify loop structure of this loop and all nested loops. + void verifyLoopNest(DenseSet<const LoopT *> *Loops) const; + + /// Returns true if the loop is annotated parallel. + /// + /// Derived classes can override this method using static template + /// polymorphism. + bool isAnnotatedParallel() const { return false; } + + /// Print loop with all the BBs inside it. + void print(raw_ostream &OS, unsigned Depth = 0, bool Verbose = false) const; + +protected: + friend class LoopInfoBase<BlockT, LoopT>; + + /// This creates an empty loop. + LoopBase() : ParentLoop(nullptr) {} + + explicit LoopBase(BlockT *BB) : ParentLoop(nullptr) { + Blocks.push_back(BB); + DenseBlockSet.insert(BB); + } + + // Since loop passes like SCEV are allowed to key analysis results off of + // `Loop` pointers, we cannot re-use pointers within a loop pass manager. + // This means loop passes should not be `delete` ing `Loop` objects directly + // (and risk a later `Loop` allocation re-using the address of a previous one) + // but should be using LoopInfo::markAsRemoved, which keeps around the `Loop` + // pointer till the end of the lifetime of the `LoopInfo` object. + // + // To make it easier to follow this rule, we mark the destructor as + // non-public. + ~LoopBase() { + for (auto *SubLoop : SubLoops) + SubLoop->~LoopT(); + +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + IsInvalid = true; +#endif + SubLoops.clear(); + Blocks.clear(); + DenseBlockSet.clear(); + ParentLoop = nullptr; + } +}; + +template <class BlockT, class LoopT> +raw_ostream &operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) { + Loop.print(OS); + return OS; +} + +// Implementation in LoopInfoImpl.h +extern template class LoopBase<BasicBlock, Loop>; + +/// Represents a single loop in the control flow graph. Note that not all SCCs +/// in the CFG are necessarily loops. +class Loop : public LoopBase<BasicBlock, Loop> { +public: + /// A range representing the start and end location of a loop. + class LocRange { + DebugLoc Start; + DebugLoc End; + + public: + LocRange() {} + LocRange(DebugLoc Start) : Start(std::move(Start)), End(std::move(Start)) {} + LocRange(DebugLoc Start, DebugLoc End) + : Start(std::move(Start)), End(std::move(End)) {} + + const DebugLoc &getStart() const { return Start; } + const DebugLoc &getEnd() const { return End; } + + /// Check for null. + /// + explicit operator bool() const { return Start && End; } + }; + + /// Return true if the specified value is loop invariant. + bool isLoopInvariant(const Value *V) const; + + /// Return true if all the operands of the specified instruction are loop + /// invariant. + bool hasLoopInvariantOperands(const Instruction *I) const; + + /// If the given value is an instruction inside of the loop and it can be + /// hoisted, do so to make it trivially loop-invariant. + /// Return true if the value after any hoisting is loop invariant. This + /// function can be used as a slightly more aggressive replacement for + /// isLoopInvariant. + /// + /// If InsertPt is specified, it is the point to hoist instructions to. + /// If null, the terminator of the loop preheader is used. + bool makeLoopInvariant(Value *V, bool &Changed, + Instruction *InsertPt = nullptr) const; + + /// If the given instruction is inside of the loop and it can be hoisted, do + /// so to make it trivially loop-invariant. + /// Return true if the instruction after any hoisting is loop invariant. This + /// function can be used as a slightly more aggressive replacement for + /// isLoopInvariant. + /// + /// If InsertPt is specified, it is the point to hoist instructions to. + /// If null, the terminator of the loop preheader is used. + /// + bool makeLoopInvariant(Instruction *I, bool &Changed, + Instruction *InsertPt = nullptr) const; + + /// Check to see if the loop has a canonical induction variable: an integer + /// recurrence that starts at 0 and increments by one each time through the + /// loop. If so, return the phi node that corresponds to it. + /// + /// The IndVarSimplify pass transforms loops to have a canonical induction + /// variable. + /// + PHINode *getCanonicalInductionVariable() const; + + /// Return true if the Loop is in LCSSA form. + bool isLCSSAForm(DominatorTree &DT) const; + + /// Return true if this Loop and all inner subloops are in LCSSA form. + bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const; + + /// Return true if the Loop is in the form that the LoopSimplify form + /// transforms loops to, which is sometimes called normal form. + bool isLoopSimplifyForm() const; + + /// Return true if the loop body is safe to clone in practice. + bool isSafeToClone() const; + + /// Returns true if the loop is annotated parallel. + /// + /// A parallel loop can be assumed to not contain any dependencies between + /// iterations by the compiler. That is, any loop-carried dependency checking + /// can be skipped completely when parallelizing the loop on the target + /// machine. Thus, if the parallel loop information originates from the + /// programmer, e.g. via the OpenMP parallel for pragma, it is the + /// programmer's responsibility to ensure there are no loop-carried + /// dependencies. The final execution order of the instructions across + /// iterations is not guaranteed, thus, the end result might or might not + /// implement actual concurrent execution of instructions across multiple + /// iterations. + bool isAnnotatedParallel() const; + + /// Return the llvm.loop loop id metadata node for this loop if it is present. + /// + /// If this loop contains the same llvm.loop metadata on each branch to the + /// header then the node is returned. If any latch instruction does not + /// contain llvm.loop or if multiple latches contain different nodes then + /// 0 is returned. + MDNode *getLoopID() const; + /// Set the llvm.loop loop id metadata for this loop. + /// + /// The LoopID metadata node will be added to each terminator instruction in + /// the loop that branches to the loop header. + /// + /// The LoopID metadata node should have one or more operands and the first + /// operand should be the node itself. + void setLoopID(MDNode *LoopID) const; + + /// Add llvm.loop.unroll.disable to this loop's loop id metadata. + /// + /// Remove existing unroll metadata and add unroll disable metadata to + /// indicate the loop has already been unrolled. This prevents a loop + /// from being unrolled more than is directed by a pragma if the loop + /// unrolling pass is run more than once (which it generally is). + void setLoopAlreadyUnrolled(); + + void dump() const; + void dumpVerbose() const; + + /// Return the debug location of the start of this loop. + /// This looks for a BB terminating instruction with a known debug + /// location by looking at the preheader and header blocks. If it + /// cannot find a terminating instruction with location information, + /// it returns an unknown location. + DebugLoc getStartLoc() const; + + /// Return the source code span of the loop. + LocRange getLocRange() const; + + StringRef getName() const { + if (BasicBlock *Header = getHeader()) + if (Header->hasName()) + return Header->getName(); + return "<unnamed loop>"; + } + +private: + Loop() = default; + + friend class LoopInfoBase<BasicBlock, Loop>; + friend class LoopBase<BasicBlock, Loop>; + explicit Loop(BasicBlock *BB) : LoopBase<BasicBlock, Loop>(BB) {} + ~Loop() = default; +}; + +//===----------------------------------------------------------------------===// +/// This class builds and contains all of the top-level loop +/// structures in the specified function. +/// + +template <class BlockT, class LoopT> class LoopInfoBase { + // BBMap - Mapping of basic blocks to the inner most loop they occur in + DenseMap<const BlockT *, LoopT *> BBMap; + std::vector<LoopT *> TopLevelLoops; + BumpPtrAllocator LoopAllocator; + + friend class LoopBase<BlockT, LoopT>; + friend class LoopInfo; + + void operator=(const LoopInfoBase &) = delete; + LoopInfoBase(const LoopInfoBase &) = delete; + +public: + LoopInfoBase() {} + ~LoopInfoBase() { releaseMemory(); } + + LoopInfoBase(LoopInfoBase &&Arg) + : BBMap(std::move(Arg.BBMap)), + TopLevelLoops(std::move(Arg.TopLevelLoops)), + LoopAllocator(std::move(Arg.LoopAllocator)) { + // We have to clear the arguments top level loops as we've taken ownership. + Arg.TopLevelLoops.clear(); + } + LoopInfoBase &operator=(LoopInfoBase &&RHS) { + BBMap = std::move(RHS.BBMap); + + for (auto *L : TopLevelLoops) + L->~LoopT(); + + TopLevelLoops = std::move(RHS.TopLevelLoops); + LoopAllocator = std::move(RHS.LoopAllocator); + RHS.TopLevelLoops.clear(); + return *this; + } + + void releaseMemory() { + BBMap.clear(); + + for (auto *L : TopLevelLoops) + L->~LoopT(); + TopLevelLoops.clear(); + LoopAllocator.Reset(); + } + + template <typename... ArgsTy> LoopT *AllocateLoop(ArgsTy &&... Args) { + LoopT *Storage = LoopAllocator.Allocate<LoopT>(); + return new (Storage) LoopT(std::forward<ArgsTy>(Args)...); + } + + /// iterator/begin/end - The interface to the top-level loops in the current + /// function. + /// + typedef typename std::vector<LoopT *>::const_iterator iterator; + typedef + typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator; + iterator begin() const { return TopLevelLoops.begin(); } + iterator end() const { return TopLevelLoops.end(); } + reverse_iterator rbegin() const { return TopLevelLoops.rbegin(); } + reverse_iterator rend() const { return TopLevelLoops.rend(); } + bool empty() const { return TopLevelLoops.empty(); } + + /// Return all of the loops in the function in preorder across the loop + /// nests, with siblings in forward program order. + /// + /// Note that because loops form a forest of trees, preorder is equivalent to + /// reverse postorder. + SmallVector<LoopT *, 4> getLoopsInPreorder(); + + /// Return all of the loops in the function in preorder across the loop + /// nests, with siblings in *reverse* program order. + /// + /// Note that because loops form a forest of trees, preorder is equivalent to + /// reverse postorder. + /// + /// Also note that this is *not* a reverse preorder. Only the siblings are in + /// reverse program order. + SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder(); + + /// Return the inner most loop that BB lives in. If a basic block is in no + /// loop (for example the entry node), null is returned. + LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); } + + /// Same as getLoopFor. + const LoopT *operator[](const BlockT *BB) const { return getLoopFor(BB); } + + /// Return the loop nesting level of the specified block. A depth of 0 means + /// the block is not inside any loop. + unsigned getLoopDepth(const BlockT *BB) const { + const LoopT *L = getLoopFor(BB); + return L ? L->getLoopDepth() : 0; + } + + // True if the block is a loop header node + bool isLoopHeader(const BlockT *BB) const { + const LoopT *L = getLoopFor(BB); + return L && L->getHeader() == BB; + } + + /// This removes the specified top-level loop from this loop info object. + /// The loop is not deleted, as it will presumably be inserted into + /// another loop. + LoopT *removeLoop(iterator I) { + assert(I != end() && "Cannot remove end iterator!"); + LoopT *L = *I; + assert(!L->getParentLoop() && "Not a top-level loop!"); + TopLevelLoops.erase(TopLevelLoops.begin() + (I - begin())); + return L; + } + + /// Change the top-level loop that contains BB to the specified loop. + /// This should be used by transformations that restructure the loop hierarchy + /// tree. + void changeLoopFor(BlockT *BB, LoopT *L) { + if (!L) { + BBMap.erase(BB); + return; + } + BBMap[BB] = L; + } + + /// Replace the specified loop in the top-level loops list with the indicated + /// loop. + void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop) { + auto I = find(TopLevelLoops, OldLoop); + assert(I != TopLevelLoops.end() && "Old loop not at top level!"); + *I = NewLoop; + assert(!NewLoop->ParentLoop && !OldLoop->ParentLoop && + "Loops already embedded into a subloop!"); + } + + /// This adds the specified loop to the collection of top-level loops. + void addTopLevelLoop(LoopT *New) { + assert(!New->getParentLoop() && "Loop already in subloop!"); + TopLevelLoops.push_back(New); + } + + /// This method completely removes BB from all data structures, + /// including all of the Loop objects it is nested in and our mapping from + /// BasicBlocks to loops. + void removeBlock(BlockT *BB) { + auto I = BBMap.find(BB); + if (I != BBMap.end()) { + for (LoopT *L = I->second; L; L = L->getParentLoop()) + L->removeBlockFromLoop(BB); + + BBMap.erase(I); + } + } + + // Internals + + static bool isNotAlreadyContainedIn(const LoopT *SubLoop, + const LoopT *ParentLoop) { + if (!SubLoop) + return true; + if (SubLoop == ParentLoop) + return false; + return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop); + } + + /// Create the loop forest using a stable algorithm. + void analyze(const DominatorTreeBase<BlockT, false> &DomTree); + + // Debugging + void print(raw_ostream &OS) const; + + void verify(const DominatorTreeBase<BlockT, false> &DomTree) const; + + /// Destroy a loop that has been removed from the `LoopInfo` nest. + /// + /// This runs the destructor of the loop object making it invalid to + /// reference afterward. The memory is retained so that the *pointer* to the + /// loop remains valid. + /// + /// The caller is responsible for removing this loop from the loop nest and + /// otherwise disconnecting it from the broader `LoopInfo` data structures. + /// Callers that don't naturally handle this themselves should probably call + /// `erase' instead. + void destroy(LoopT *L) { + L->~LoopT(); + + // Since LoopAllocator is a BumpPtrAllocator, this Deallocate only poisons + // \c L, but the pointer remains valid for non-dereferencing uses. + LoopAllocator.Deallocate(L); + } +}; + +// Implementation in LoopInfoImpl.h +extern template class LoopInfoBase<BasicBlock, Loop>; + +class LoopInfo : public LoopInfoBase<BasicBlock, Loop> { + typedef LoopInfoBase<BasicBlock, Loop> BaseT; + + friend class LoopBase<BasicBlock, Loop>; + + void operator=(const LoopInfo &) = delete; + LoopInfo(const LoopInfo &) = delete; + +public: + LoopInfo() {} + explicit LoopInfo(const DominatorTreeBase<BasicBlock, false> &DomTree); + + LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))) {} + LoopInfo &operator=(LoopInfo &&RHS) { + BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); + return *this; + } + + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); + + // Most of the public interface is provided via LoopInfoBase. + + /// Update LoopInfo after removing the last backedge from a loop. This updates + /// the loop forest and parent loops for each block so that \c L is no longer + /// referenced, but does not actually delete \c L immediately. The pointer + /// will remain valid until this LoopInfo's memory is released. + void erase(Loop *L); + + /// Returns true if replacing From with To everywhere is guaranteed to + /// preserve LCSSA form. + bool replacementPreservesLCSSAForm(Instruction *From, Value *To) { + // Preserving LCSSA form is only problematic if the replacing value is an + // instruction. + Instruction *I = dyn_cast<Instruction>(To); + if (!I) + return true; + // If both instructions are defined in the same basic block then replacement + // cannot break LCSSA form. + if (I->getParent() == From->getParent()) + return true; + // If the instruction is not defined in a loop then it can safely replace + // anything. + Loop *ToLoop = getLoopFor(I->getParent()); + if (!ToLoop) + return true; + // If the replacing instruction is defined in the same loop as the original + // instruction, or in a loop that contains it as an inner loop, then using + // it as a replacement will not break LCSSA form. + return ToLoop->contains(getLoopFor(From->getParent())); + } + + /// Checks if moving a specific instruction can break LCSSA in any loop. + /// + /// Return true if moving \p Inst to before \p NewLoc will break LCSSA, + /// assuming that the function containing \p Inst and \p NewLoc is currently + /// in LCSSA form. + bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc) { + assert(Inst->getFunction() == NewLoc->getFunction() && + "Can't reason about IPO!"); + + auto *OldBB = Inst->getParent(); + auto *NewBB = NewLoc->getParent(); + + // Movement within the same loop does not break LCSSA (the equality check is + // to avoid doing a hashtable lookup in case of intra-block movement). + if (OldBB == NewBB) + return true; + + auto *OldLoop = getLoopFor(OldBB); + auto *NewLoop = getLoopFor(NewBB); + + if (OldLoop == NewLoop) + return true; + + // Check if Outer contains Inner; with the null loop counting as the + // "outermost" loop. + auto Contains = [](const Loop *Outer, const Loop *Inner) { + return !Outer || Outer->contains(Inner); + }; + + // To check that the movement of Inst to before NewLoc does not break LCSSA, + // we need to check two sets of uses for possible LCSSA violations at + // NewLoc: the users of NewInst, and the operands of NewInst. + + // If we know we're hoisting Inst out of an inner loop to an outer loop, + // then the uses *of* Inst don't need to be checked. + + if (!Contains(NewLoop, OldLoop)) { + for (Use &U : Inst->uses()) { + auto *UI = cast<Instruction>(U.getUser()); + auto *UBB = isa<PHINode>(UI) ? cast<PHINode>(UI)->getIncomingBlock(U) + : UI->getParent(); + if (UBB != NewBB && getLoopFor(UBB) != NewLoop) + return false; + } + } + + // If we know we're sinking Inst from an outer loop into an inner loop, then + // the *operands* of Inst don't need to be checked. + + if (!Contains(OldLoop, NewLoop)) { + // See below on why we can't handle phi nodes here. + if (isa<PHINode>(Inst)) + return false; + + for (Use &U : Inst->operands()) { + auto *DefI = dyn_cast<Instruction>(U.get()); + if (!DefI) + return false; + + // This would need adjustment if we allow Inst to be a phi node -- the + // new use block won't simply be NewBB. + + auto *DefBlock = DefI->getParent(); + if (DefBlock != NewBB && getLoopFor(DefBlock) != NewLoop) + return false; + } + } + + return true; + } +}; + +// Allow clients to walk the list of nested loops... +template <> struct GraphTraits<const Loop *> { + typedef const Loop *NodeRef; + typedef LoopInfo::iterator ChildIteratorType; + + static NodeRef getEntryNode(const Loop *L) { return L; } + static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->end(); } +}; + +template <> struct GraphTraits<Loop *> { + typedef Loop *NodeRef; + typedef LoopInfo::iterator ChildIteratorType; + + static NodeRef getEntryNode(Loop *L) { return L; } + static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->end(); } +}; + +/// Analysis pass that exposes the \c LoopInfo for a function. +class LoopAnalysis : public AnalysisInfoMixin<LoopAnalysis> { + friend AnalysisInfoMixin<LoopAnalysis>; + static AnalysisKey Key; + +public: + typedef LoopInfo Result; + + LoopInfo run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass for the \c LoopAnalysis results. +class LoopPrinterPass : public PassInfoMixin<LoopPrinterPass> { + raw_ostream &OS; + +public: + explicit LoopPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Verifier pass for the \c LoopAnalysis results. +struct LoopVerifierPass : public PassInfoMixin<LoopVerifierPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// The legacy pass manager's analysis pass to compute loop information. +class LoopInfoWrapperPass : public FunctionPass { + LoopInfo LI; + +public: + static char ID; // Pass identification, replacement for typeid + + LoopInfoWrapperPass() : FunctionPass(ID) { + initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + LoopInfo &getLoopInfo() { return LI; } + const LoopInfo &getLoopInfo() const { return LI; } + + /// Calculate the natural loop information for a given function. + bool runOnFunction(Function &F) override; + + void verifyAnalysis() const override; + + void releaseMemory() override { LI.releaseMemory(); } + + void print(raw_ostream &O, const Module *M = nullptr) const override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +/// Function to print a loop's contents as LLVM's text IR assembly. +void printLoop(Loop &L, raw_ostream &OS, const std::string &Banner = ""); + +/// Find and return the loop attribute node for the attribute @p Name in +/// @p LoopID. Return nullptr if there is no such attribute. +MDNode *findOptionMDForLoopID(MDNode *LoopID, StringRef Name); + +/// Find string metadata for a loop. +/// +/// Returns the MDNode where the first operand is the metadata's name. The +/// following operands are the metadata's values. If no metadata with @p Name is +/// found, return nullptr. +MDNode *findOptionMDForLoop(const Loop *TheLoop, StringRef Name); + +/// Return whether an MDNode might represent an access group. +/// +/// Access group metadata nodes have to be distinct and empty. Being +/// always-empty ensures that it never needs to be changed (which -- because +/// MDNodes are designed immutable -- would require creating a new MDNode). Note +/// that this is not a sufficient condition: not every distinct and empty NDNode +/// is representing an access group. +bool isValidAsAccessGroup(MDNode *AccGroup); + +/// Create a new LoopID after the loop has been transformed. +/// +/// This can be used when no follow-up loop attributes are defined +/// (llvm::makeFollowupLoopID returning None) to stop transformations to be +/// applied again. +/// +/// @param Context The LLVMContext in which to create the new LoopID. +/// @param OrigLoopID The original LoopID; can be nullptr if the original +/// loop has no LoopID. +/// @param RemovePrefixes Remove all loop attributes that have these prefixes. +/// Use to remove metadata of the transformation that has +/// been applied. +/// @param AddAttrs Add these loop attributes to the new LoopID. +/// +/// @return A new LoopID that can be applied using Loop::setLoopID(). +llvm::MDNode * +makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, + llvm::ArrayRef<llvm::StringRef> RemovePrefixes, + llvm::ArrayRef<llvm::MDNode *> AddAttrs); + +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/LoopInfoImpl.h b/clang-r353983e/include/llvm/Analysis/LoopInfoImpl.h new file mode 100644 index 00000000..ad425083 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LoopInfoImpl.h @@ -0,0 +1,759 @@ +//===- llvm/Analysis/LoopInfoImpl.h - Natural Loop Calculator ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the generic implementation of LoopInfo used for both Loops and +// MachineLoops. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPINFOIMPL_H +#define LLVM_ANALYSIS_LOOPINFOIMPL_H + +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Dominators.h" + +namespace llvm { + +//===----------------------------------------------------------------------===// +// APIs for simple analysis of the loop. See header notes. + +/// getExitingBlocks - Return all blocks inside the loop that have successors +/// outside of the loop. These are the blocks _inside of the current loop_ +/// which branch out. The returned list is always unique. +/// +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::getExitingBlocks( + SmallVectorImpl<BlockT *> &ExitingBlocks) const { + assert(!isInvalid() && "Loop not in a valid state!"); + for (const auto BB : blocks()) + for (const auto &Succ : children<BlockT *>(BB)) + if (!contains(Succ)) { + // Not in current loop? It must be an exit block. + ExitingBlocks.push_back(BB); + break; + } +} + +/// getExitingBlock - If getExitingBlocks would return exactly one block, +/// return that block. Otherwise return null. +template <class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getExitingBlock() const { + assert(!isInvalid() && "Loop not in a valid state!"); + SmallVector<BlockT *, 8> ExitingBlocks; + getExitingBlocks(ExitingBlocks); + if (ExitingBlocks.size() == 1) + return ExitingBlocks[0]; + return nullptr; +} + +/// getExitBlocks - Return all of the successor blocks of this loop. These +/// are the blocks _outside of the current loop_ which are branched to. +/// +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::getExitBlocks( + SmallVectorImpl<BlockT *> &ExitBlocks) const { + assert(!isInvalid() && "Loop not in a valid state!"); + for (const auto BB : blocks()) + for (const auto &Succ : children<BlockT *>(BB)) + if (!contains(Succ)) + // Not in current loop? It must be an exit block. + ExitBlocks.push_back(Succ); +} + +/// getExitBlock - If getExitBlocks would return exactly one block, +/// return that block. Otherwise return null. +template <class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getExitBlock() const { + assert(!isInvalid() && "Loop not in a valid state!"); + SmallVector<BlockT *, 8> ExitBlocks; + getExitBlocks(ExitBlocks); + if (ExitBlocks.size() == 1) + return ExitBlocks[0]; + return nullptr; +} + +template <class BlockT, class LoopT> +bool LoopBase<BlockT, LoopT>::hasDedicatedExits() const { + // Each predecessor of each exit block of a normal loop is contained + // within the loop. + SmallVector<BlockT *, 4> ExitBlocks; + getExitBlocks(ExitBlocks); + for (BlockT *EB : ExitBlocks) + for (BlockT *Predecessor : children<Inverse<BlockT *>>(EB)) + if (!contains(Predecessor)) + return false; + // All the requirements are met. + return true; +} + +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::getUniqueExitBlocks( + SmallVectorImpl<BlockT *> &ExitBlocks) const { + typedef GraphTraits<BlockT *> BlockTraits; + typedef GraphTraits<Inverse<BlockT *>> InvBlockTraits; + + assert(hasDedicatedExits() && + "getUniqueExitBlocks assumes the loop has canonical form exits!"); + + SmallVector<BlockT *, 32> SwitchExitBlocks; + for (BlockT *Block : this->blocks()) { + SwitchExitBlocks.clear(); + for (BlockT *Successor : children<BlockT *>(Block)) { + // If block is inside the loop then it is not an exit block. + if (contains(Successor)) + continue; + + BlockT *FirstPred = *InvBlockTraits::child_begin(Successor); + + // If current basic block is this exit block's first predecessor then only + // insert exit block in to the output ExitBlocks vector. This ensures that + // same exit block is not inserted twice into ExitBlocks vector. + if (Block != FirstPred) + continue; + + // If a terminator has more then two successors, for example SwitchInst, + // then it is possible that there are multiple edges from current block to + // one exit block. + if (std::distance(BlockTraits::child_begin(Block), + BlockTraits::child_end(Block)) <= 2) { + ExitBlocks.push_back(Successor); + continue; + } + + // In case of multiple edges from current block to exit block, collect + // only one edge in ExitBlocks. Use switchExitBlocks to keep track of + // duplicate edges. + if (!is_contained(SwitchExitBlocks, Successor)) { + SwitchExitBlocks.push_back(Successor); + ExitBlocks.push_back(Successor); + } + } + } +} + +template <class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getUniqueExitBlock() const { + SmallVector<BlockT *, 8> UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + if (UniqueExitBlocks.size() == 1) + return UniqueExitBlocks[0]; + return nullptr; +} + +/// getExitEdges - Return all pairs of (_inside_block_,_outside_block_). +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::getExitEdges( + SmallVectorImpl<Edge> &ExitEdges) const { + assert(!isInvalid() && "Loop not in a valid state!"); + for (const auto BB : blocks()) + for (const auto &Succ : children<BlockT *>(BB)) + if (!contains(Succ)) + // Not in current loop? It must be an exit block. + ExitEdges.emplace_back(BB, Succ); +} + +/// getLoopPreheader - If there is a preheader for this loop, return it. A +/// loop has a preheader if there is only one edge to the header of the loop +/// from outside of the loop and it is legal to hoist instructions into the +/// predecessor. If this is the case, the block branching to the header of the +/// loop is the preheader node. +/// +/// This method returns null if there is no preheader for the loop. +/// +template <class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getLoopPreheader() const { + assert(!isInvalid() && "Loop not in a valid state!"); + // Keep track of nodes outside the loop branching to the header... + BlockT *Out = getLoopPredecessor(); + if (!Out) + return nullptr; + + // Make sure we are allowed to hoist instructions into the predecessor. + if (!Out->isLegalToHoistInto()) + return nullptr; + + // Make sure there is only one exit out of the preheader. + typedef GraphTraits<BlockT *> BlockTraits; + typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out); + ++SI; + if (SI != BlockTraits::child_end(Out)) + return nullptr; // Multiple exits from the block, must not be a preheader. + + // The predecessor has exactly one successor, so it is a preheader. + return Out; +} + +/// getLoopPredecessor - If the given loop's header has exactly one unique +/// predecessor outside the loop, return it. Otherwise return null. +/// This is less strict that the loop "preheader" concept, which requires +/// the predecessor to have exactly one successor. +/// +template <class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const { + assert(!isInvalid() && "Loop not in a valid state!"); + // Keep track of nodes outside the loop branching to the header... + BlockT *Out = nullptr; + + // Loop over the predecessors of the header node... + BlockT *Header = getHeader(); + for (const auto Pred : children<Inverse<BlockT *>>(Header)) { + if (!contains(Pred)) { // If the block is not in the loop... + if (Out && Out != Pred) + return nullptr; // Multiple predecessors outside the loop + Out = Pred; + } + } + + // Make sure there is only one exit out of the preheader. + assert(Out && "Header of loop has no predecessors from outside loop?"); + return Out; +} + +/// getLoopLatch - If there is a single latch block for this loop, return it. +/// A latch block is a block that contains a branch back to the header. +template <class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getLoopLatch() const { + assert(!isInvalid() && "Loop not in a valid state!"); + BlockT *Header = getHeader(); + BlockT *Latch = nullptr; + for (const auto Pred : children<Inverse<BlockT *>>(Header)) { + if (contains(Pred)) { + if (Latch) + return nullptr; + Latch = Pred; + } + } + + return Latch; +} + +//===----------------------------------------------------------------------===// +// APIs for updating loop information after changing the CFG +// + +/// addBasicBlockToLoop - This method is used by other analyses to update loop +/// information. NewBB is set to be a new member of the current loop. +/// Because of this, it is added as a member of all parent loops, and is added +/// to the specified LoopInfo object as being in the current basic block. It +/// is not valid to replace the loop header with this method. +/// +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::addBasicBlockToLoop( + BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) { + assert(!isInvalid() && "Loop not in a valid state!"); +#ifndef NDEBUG + if (!Blocks.empty()) { + auto SameHeader = LIB[getHeader()]; + assert(contains(SameHeader) && getHeader() == SameHeader->getHeader() && + "Incorrect LI specified for this loop!"); + } +#endif + assert(NewBB && "Cannot add a null basic block to the loop!"); + assert(!LIB[NewBB] && "BasicBlock already in the loop!"); + + LoopT *L = static_cast<LoopT *>(this); + + // Add the loop mapping to the LoopInfo object... + LIB.BBMap[NewBB] = L; + + // Add the basic block to this loop and all parent loops... + while (L) { + L->addBlockEntry(NewBB); + L = L->getParentLoop(); + } +} + +/// replaceChildLoopWith - This is used when splitting loops up. It replaces +/// the OldChild entry in our children list with NewChild, and updates the +/// parent pointer of OldChild to be null and the NewChild to be this loop. +/// This updates the loop depth of the new child. +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::replaceChildLoopWith(LoopT *OldChild, + LoopT *NewChild) { + assert(!isInvalid() && "Loop not in a valid state!"); + assert(OldChild->ParentLoop == this && "This loop is already broken!"); + assert(!NewChild->ParentLoop && "NewChild already has a parent!"); + typename std::vector<LoopT *>::iterator I = find(SubLoops, OldChild); + assert(I != SubLoops.end() && "OldChild not in loop!"); + *I = NewChild; + OldChild->ParentLoop = nullptr; + NewChild->ParentLoop = static_cast<LoopT *>(this); +} + +/// verifyLoop - Verify loop structure +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::verifyLoop() const { + assert(!isInvalid() && "Loop not in a valid state!"); +#ifndef NDEBUG + assert(!Blocks.empty() && "Loop header is missing"); + + // Setup for using a depth-first iterator to visit every block in the loop. + SmallVector<BlockT *, 8> ExitBBs; + getExitBlocks(ExitBBs); + df_iterator_default_set<BlockT *> VisitSet; + VisitSet.insert(ExitBBs.begin(), ExitBBs.end()); + df_ext_iterator<BlockT *, df_iterator_default_set<BlockT *>> + BI = df_ext_begin(getHeader(), VisitSet), + BE = df_ext_end(getHeader(), VisitSet); + + // Keep track of the BBs visited. + SmallPtrSet<BlockT *, 8> VisitedBBs; + + // Check the individual blocks. + for (; BI != BE; ++BI) { + BlockT *BB = *BI; + + assert(std::any_of(GraphTraits<BlockT *>::child_begin(BB), + GraphTraits<BlockT *>::child_end(BB), + [&](BlockT *B) { return contains(B); }) && + "Loop block has no in-loop successors!"); + + assert(std::any_of(GraphTraits<Inverse<BlockT *>>::child_begin(BB), + GraphTraits<Inverse<BlockT *>>::child_end(BB), + [&](BlockT *B) { return contains(B); }) && + "Loop block has no in-loop predecessors!"); + + SmallVector<BlockT *, 2> OutsideLoopPreds; + std::for_each(GraphTraits<Inverse<BlockT *>>::child_begin(BB), + GraphTraits<Inverse<BlockT *>>::child_end(BB), + [&](BlockT *B) { + if (!contains(B)) + OutsideLoopPreds.push_back(B); + }); + + if (BB == getHeader()) { + assert(!OutsideLoopPreds.empty() && "Loop is unreachable!"); + } else if (!OutsideLoopPreds.empty()) { + // A non-header loop shouldn't be reachable from outside the loop, + // though it is permitted if the predecessor is not itself actually + // reachable. + BlockT *EntryBB = &BB->getParent()->front(); + for (BlockT *CB : depth_first(EntryBB)) + for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i) + assert(CB != OutsideLoopPreds[i] && + "Loop has multiple entry points!"); + } + assert(BB != &getHeader()->getParent()->front() && + "Loop contains function entry block!"); + + VisitedBBs.insert(BB); + } + + if (VisitedBBs.size() != getNumBlocks()) { + dbgs() << "The following blocks are unreachable in the loop: "; + for (auto BB : Blocks) { + if (!VisitedBBs.count(BB)) { + dbgs() << *BB << "\n"; + } + } + assert(false && "Unreachable block in loop"); + } + + // Check the subloops. + for (iterator I = begin(), E = end(); I != E; ++I) + // Each block in each subloop should be contained within this loop. + for (block_iterator BI = (*I)->block_begin(), BE = (*I)->block_end(); + BI != BE; ++BI) { + assert(contains(*BI) && + "Loop does not contain all the blocks of a subloop!"); + } + + // Check the parent loop pointer. + if (ParentLoop) { + assert(is_contained(*ParentLoop, this) && + "Loop is not a subloop of its parent!"); + } +#endif +} + +/// verifyLoop - Verify loop structure of this loop and all nested loops. +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::verifyLoopNest( + DenseSet<const LoopT *> *Loops) const { + assert(!isInvalid() && "Loop not in a valid state!"); + Loops->insert(static_cast<const LoopT *>(this)); + // Verify this loop. + verifyLoop(); + // Verify the subloops. + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->verifyLoopNest(Loops); +} + +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth, + bool Verbose) const { + OS.indent(Depth * 2); + if (static_cast<const LoopT *>(this)->isAnnotatedParallel()) + OS << "Parallel "; + OS << "Loop at depth " << getLoopDepth() << " containing: "; + + BlockT *H = getHeader(); + for (unsigned i = 0; i < getBlocks().size(); ++i) { + BlockT *BB = getBlocks()[i]; + if (!Verbose) { + if (i) + OS << ","; + BB->printAsOperand(OS, false); + } else + OS << "\n"; + + if (BB == H) + OS << "<header>"; + if (isLoopLatch(BB)) + OS << "<latch>"; + if (isLoopExiting(BB)) + OS << "<exiting>"; + if (Verbose) + BB->print(OS); + } + OS << "\n"; + + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->print(OS, Depth + 2); +} + +//===----------------------------------------------------------------------===// +/// Stable LoopInfo Analysis - Build a loop tree using stable iterators so the +/// result does / not depend on use list (block predecessor) order. +/// + +/// Discover a subloop with the specified backedges such that: All blocks within +/// this loop are mapped to this loop or a subloop. And all subloops within this +/// loop have their parent loop set to this loop or a subloop. +template <class BlockT, class LoopT> +static void discoverAndMapSubloop(LoopT *L, ArrayRef<BlockT *> Backedges, + LoopInfoBase<BlockT, LoopT> *LI, + const DomTreeBase<BlockT> &DomTree) { + typedef GraphTraits<Inverse<BlockT *>> InvBlockTraits; + + unsigned NumBlocks = 0; + unsigned NumSubloops = 0; + + // Perform a backward CFG traversal using a worklist. + std::vector<BlockT *> ReverseCFGWorklist(Backedges.begin(), Backedges.end()); + while (!ReverseCFGWorklist.empty()) { + BlockT *PredBB = ReverseCFGWorklist.back(); + ReverseCFGWorklist.pop_back(); + + LoopT *Subloop = LI->getLoopFor(PredBB); + if (!Subloop) { + if (!DomTree.isReachableFromEntry(PredBB)) + continue; + + // This is an undiscovered block. Map it to the current loop. + LI->changeLoopFor(PredBB, L); + ++NumBlocks; + if (PredBB == L->getHeader()) + continue; + // Push all block predecessors on the worklist. + ReverseCFGWorklist.insert(ReverseCFGWorklist.end(), + InvBlockTraits::child_begin(PredBB), + InvBlockTraits::child_end(PredBB)); + } else { + // This is a discovered block. Find its outermost discovered loop. + while (LoopT *Parent = Subloop->getParentLoop()) + Subloop = Parent; + + // If it is already discovered to be a subloop of this loop, continue. + if (Subloop == L) + continue; + + // Discover a subloop of this loop. + Subloop->setParentLoop(L); + ++NumSubloops; + NumBlocks += Subloop->getBlocksVector().capacity(); + PredBB = Subloop->getHeader(); + // Continue traversal along predecessors that are not loop-back edges from + // within this subloop tree itself. Note that a predecessor may directly + // reach another subloop that is not yet discovered to be a subloop of + // this loop, which we must traverse. + for (const auto Pred : children<Inverse<BlockT *>>(PredBB)) { + if (LI->getLoopFor(Pred) != Subloop) + ReverseCFGWorklist.push_back(Pred); + } + } + } + L->getSubLoopsVector().reserve(NumSubloops); + L->reserveBlocks(NumBlocks); +} + +/// Populate all loop data in a stable order during a single forward DFS. +template <class BlockT, class LoopT> class PopulateLoopsDFS { + typedef GraphTraits<BlockT *> BlockTraits; + typedef typename BlockTraits::ChildIteratorType SuccIterTy; + + LoopInfoBase<BlockT, LoopT> *LI; + +public: + PopulateLoopsDFS(LoopInfoBase<BlockT, LoopT> *li) : LI(li) {} + + void traverse(BlockT *EntryBlock); + +protected: + void insertIntoLoop(BlockT *Block); +}; + +/// Top-level driver for the forward DFS within the loop. +template <class BlockT, class LoopT> +void PopulateLoopsDFS<BlockT, LoopT>::traverse(BlockT *EntryBlock) { + for (BlockT *BB : post_order(EntryBlock)) + insertIntoLoop(BB); +} + +/// Add a single Block to its ancestor loops in PostOrder. If the block is a +/// subloop header, add the subloop to its parent in PostOrder, then reverse the +/// Block and Subloop vectors of the now complete subloop to achieve RPO. +template <class BlockT, class LoopT> +void PopulateLoopsDFS<BlockT, LoopT>::insertIntoLoop(BlockT *Block) { + LoopT *Subloop = LI->getLoopFor(Block); + if (Subloop && Block == Subloop->getHeader()) { + // We reach this point once per subloop after processing all the blocks in + // the subloop. + if (Subloop->getParentLoop()) + Subloop->getParentLoop()->getSubLoopsVector().push_back(Subloop); + else + LI->addTopLevelLoop(Subloop); + + // For convenience, Blocks and Subloops are inserted in postorder. Reverse + // the lists, except for the loop header, which is always at the beginning. + Subloop->reverseBlock(1); + std::reverse(Subloop->getSubLoopsVector().begin(), + Subloop->getSubLoopsVector().end()); + + Subloop = Subloop->getParentLoop(); + } + for (; Subloop; Subloop = Subloop->getParentLoop()) + Subloop->addBlockEntry(Block); +} + +/// Analyze LoopInfo discovers loops during a postorder DominatorTree traversal +/// interleaved with backward CFG traversals within each subloop +/// (discoverAndMapSubloop). The backward traversal skips inner subloops, so +/// this part of the algorithm is linear in the number of CFG edges. Subloop and +/// Block vectors are then populated during a single forward CFG traversal +/// (PopulateLoopDFS). +/// +/// During the two CFG traversals each block is seen three times: +/// 1) Discovered and mapped by a reverse CFG traversal. +/// 2) Visited during a forward DFS CFG traversal. +/// 3) Reverse-inserted in the loop in postorder following forward DFS. +/// +/// The Block vectors are inclusive, so step 3 requires loop-depth number of +/// insertions per block. +template <class BlockT, class LoopT> +void LoopInfoBase<BlockT, LoopT>::analyze(const DomTreeBase<BlockT> &DomTree) { + // Postorder traversal of the dominator tree. + const DomTreeNodeBase<BlockT> *DomRoot = DomTree.getRootNode(); + for (auto DomNode : post_order(DomRoot)) { + + BlockT *Header = DomNode->getBlock(); + SmallVector<BlockT *, 4> Backedges; + + // Check each predecessor of the potential loop header. + for (const auto Backedge : children<Inverse<BlockT *>>(Header)) { + // If Header dominates predBB, this is a new loop. Collect the backedges. + if (DomTree.dominates(Header, Backedge) && + DomTree.isReachableFromEntry(Backedge)) { + Backedges.push_back(Backedge); + } + } + // Perform a backward CFG traversal to discover and map blocks in this loop. + if (!Backedges.empty()) { + LoopT *L = AllocateLoop(Header); + discoverAndMapSubloop(L, ArrayRef<BlockT *>(Backedges), this, DomTree); + } + } + // Perform a single forward CFG traversal to populate block and subloop + // vectors for all loops. + PopulateLoopsDFS<BlockT, LoopT> DFS(this); + DFS.traverse(DomRoot->getBlock()); +} + +template <class BlockT, class LoopT> +SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() { + SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist; + // The outer-most loop actually goes into the result in the same relative + // order as we walk it. But LoopInfo stores the top level loops in reverse + // program order so for here we reverse it to get forward program order. + // FIXME: If we change the order of LoopInfo we will want to remove the + // reverse here. + for (LoopT *RootL : reverse(*this)) { + assert(PreOrderWorklist.empty() && + "Must start with an empty preorder walk worklist."); + PreOrderWorklist.push_back(RootL); + do { + LoopT *L = PreOrderWorklist.pop_back_val(); + // Sub-loops are stored in forward program order, but will process the + // worklist backwards so append them in reverse order. + PreOrderWorklist.append(L->rbegin(), L->rend()); + PreOrderLoops.push_back(L); + } while (!PreOrderWorklist.empty()); + } + + return PreOrderLoops; +} + +template <class BlockT, class LoopT> +SmallVector<LoopT *, 4> +LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() { + SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist; + // The outer-most loop actually goes into the result in the same relative + // order as we walk it. LoopInfo stores the top level loops in reverse + // program order so we walk in order here. + // FIXME: If we change the order of LoopInfo we will want to add a reverse + // here. + for (LoopT *RootL : *this) { + assert(PreOrderWorklist.empty() && + "Must start with an empty preorder walk worklist."); + PreOrderWorklist.push_back(RootL); + do { + LoopT *L = PreOrderWorklist.pop_back_val(); + // Sub-loops are stored in forward program order, but will process the + // worklist backwards so we can just append them in order. + PreOrderWorklist.append(L->begin(), L->end()); + PreOrderLoops.push_back(L); + } while (!PreOrderWorklist.empty()); + } + + return PreOrderLoops; +} + +// Debugging +template <class BlockT, class LoopT> +void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const { + for (unsigned i = 0; i < TopLevelLoops.size(); ++i) + TopLevelLoops[i]->print(OS); +#if 0 + for (DenseMap<BasicBlock*, LoopT*>::const_iterator I = BBMap.begin(), + E = BBMap.end(); I != E; ++I) + OS << "BB '" << I->first->getName() << "' level = " + << I->second->getLoopDepth() << "\n"; +#endif +} + +template <typename T> +bool compareVectors(std::vector<T> &BB1, std::vector<T> &BB2) { + llvm::sort(BB1); + llvm::sort(BB2); + return BB1 == BB2; +} + +template <class BlockT, class LoopT> +void addInnerLoopsToHeadersMap(DenseMap<BlockT *, const LoopT *> &LoopHeaders, + const LoopInfoBase<BlockT, LoopT> &LI, + const LoopT &L) { + LoopHeaders[L.getHeader()] = &L; + for (LoopT *SL : L) + addInnerLoopsToHeadersMap(LoopHeaders, LI, *SL); +} + +#ifndef NDEBUG +template <class BlockT, class LoopT> +static void compareLoops(const LoopT *L, const LoopT *OtherL, + DenseMap<BlockT *, const LoopT *> &OtherLoopHeaders) { + BlockT *H = L->getHeader(); + BlockT *OtherH = OtherL->getHeader(); + assert(H == OtherH && + "Mismatched headers even though found in the same map entry!"); + + assert(L->getLoopDepth() == OtherL->getLoopDepth() && + "Mismatched loop depth!"); + const LoopT *ParentL = L, *OtherParentL = OtherL; + do { + assert(ParentL->getHeader() == OtherParentL->getHeader() && + "Mismatched parent loop headers!"); + ParentL = ParentL->getParentLoop(); + OtherParentL = OtherParentL->getParentLoop(); + } while (ParentL); + + for (const LoopT *SubL : *L) { + BlockT *SubH = SubL->getHeader(); + const LoopT *OtherSubL = OtherLoopHeaders.lookup(SubH); + assert(OtherSubL && "Inner loop is missing in computed loop info!"); + OtherLoopHeaders.erase(SubH); + compareLoops(SubL, OtherSubL, OtherLoopHeaders); + } + + std::vector<BlockT *> BBs = L->getBlocks(); + std::vector<BlockT *> OtherBBs = OtherL->getBlocks(); + assert(compareVectors(BBs, OtherBBs) && + "Mismatched basic blocks in the loops!"); + + const SmallPtrSetImpl<const BlockT *> &BlocksSet = L->getBlocksSet(); + const SmallPtrSetImpl<const BlockT *> &OtherBlocksSet = L->getBlocksSet(); + assert(BlocksSet.size() == OtherBlocksSet.size() && + std::all_of(BlocksSet.begin(), BlocksSet.end(), + [&OtherBlocksSet](const BlockT *BB) { + return OtherBlocksSet.count(BB); + }) && + "Mismatched basic blocks in BlocksSets!"); +} +#endif + +template <class BlockT, class LoopT> +void LoopInfoBase<BlockT, LoopT>::verify( + const DomTreeBase<BlockT> &DomTree) const { + DenseSet<const LoopT *> Loops; + for (iterator I = begin(), E = end(); I != E; ++I) { + assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); + (*I)->verifyLoopNest(&Loops); + } + +// Verify that blocks are mapped to valid loops. +#ifndef NDEBUG + for (auto &Entry : BBMap) { + const BlockT *BB = Entry.first; + LoopT *L = Entry.second; + assert(Loops.count(L) && "orphaned loop"); + assert(L->contains(BB) && "orphaned block"); + for (LoopT *ChildLoop : *L) + assert(!ChildLoop->contains(BB) && + "BBMap should point to the innermost loop containing BB"); + } + + // Recompute LoopInfo to verify loops structure. + LoopInfoBase<BlockT, LoopT> OtherLI; + OtherLI.analyze(DomTree); + + // Build a map we can use to move from our LI to the computed one. This + // allows us to ignore the particular order in any layer of the loop forest + // while still comparing the structure. + DenseMap<BlockT *, const LoopT *> OtherLoopHeaders; + for (LoopT *L : OtherLI) + addInnerLoopsToHeadersMap(OtherLoopHeaders, OtherLI, *L); + + // Walk the top level loops and ensure there is a corresponding top-level + // loop in the computed version and then recursively compare those loop + // nests. + for (LoopT *L : *this) { + BlockT *Header = L->getHeader(); + const LoopT *OtherL = OtherLoopHeaders.lookup(Header); + assert(OtherL && "Top level loop is missing in computed loop info!"); + // Now that we've matched this loop, erase its header from the map. + OtherLoopHeaders.erase(Header); + // And recursively compare these loops. + compareLoops(L, OtherL, OtherLoopHeaders); + } + + // Any remaining entries in the map are loops which were found when computing + // a fresh LoopInfo but not present in the current one. + if (!OtherLoopHeaders.empty()) { + for (const auto &HeaderAndLoop : OtherLoopHeaders) + dbgs() << "Found new loop: " << *HeaderAndLoop.second << "\n"; + llvm_unreachable("Found new loops when recomputing LoopInfo!"); + } +#endif +} + +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/LoopIterator.h b/clang-r353983e/include/llvm/Analysis/LoopIterator.h new file mode 100644 index 00000000..fa4da428 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LoopIterator.h @@ -0,0 +1,259 @@ +//===--------- LoopIterator.h - Iterate over loop blocks --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file defines iterators to visit the basic blocks within a loop. +// +// These iterators currently visit blocks within subloops as well. +// Unfortunately we have no efficient way of summarizing loop exits which would +// allow skipping subloops during traversal. +// +// If you want to visit all blocks in a loop and don't need an ordered traveral, +// use Loop::block_begin() instead. +// +// This is intentionally designed to work with ill-formed loops in which the +// backedge has been deleted. The only prerequisite is that all blocks +// contained within the loop according to the most recent LoopInfo analysis are +// reachable from the loop header. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPITERATOR_H +#define LLVM_ANALYSIS_LOOPITERATOR_H + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/LoopInfo.h" + +namespace llvm { + +class LoopBlocksTraversal; + +// A traits type that is intended to be used in graph algorithms. The graph +// traits starts at the loop header, and traverses the BasicBlocks that are in +// the loop body, but not the loop header. Since the loop header is skipped, +// the back edges are excluded. +// +// TODO: Explore the possibility to implement LoopBlocksTraversal in terms of +// LoopBodyTraits, so that insertEdge doesn't have to be specialized. +struct LoopBodyTraits { + using NodeRef = std::pair<const Loop *, BasicBlock *>; + + // This wraps a const Loop * into the iterator, so we know which edges to + // filter out. + class WrappedSuccIterator + : public iterator_adaptor_base< + WrappedSuccIterator, succ_iterator, + typename std::iterator_traits<succ_iterator>::iterator_category, + NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { + using BaseT = iterator_adaptor_base< + WrappedSuccIterator, succ_iterator, + typename std::iterator_traits<succ_iterator>::iterator_category, + NodeRef, std::ptrdiff_t, NodeRef *, NodeRef>; + + const Loop *L; + + public: + WrappedSuccIterator(succ_iterator Begin, const Loop *L) + : BaseT(Begin), L(L) {} + + NodeRef operator*() const { return {L, *I}; } + }; + + struct LoopBodyFilter { + bool operator()(NodeRef N) const { + const Loop *L = N.first; + return N.second != L->getHeader() && L->contains(N.second); + } + }; + + using ChildIteratorType = + filter_iterator<WrappedSuccIterator, LoopBodyFilter>; + + static NodeRef getEntryNode(const Loop &G) { return {&G, G.getHeader()}; } + + static ChildIteratorType child_begin(NodeRef Node) { + return make_filter_range(make_range<WrappedSuccIterator>( + {succ_begin(Node.second), Node.first}, + {succ_end(Node.second), Node.first}), + LoopBodyFilter{}) + .begin(); + } + + static ChildIteratorType child_end(NodeRef Node) { + return make_filter_range(make_range<WrappedSuccIterator>( + {succ_begin(Node.second), Node.first}, + {succ_end(Node.second), Node.first}), + LoopBodyFilter{}) + .end(); + } +}; + +/// Store the result of a depth first search within basic blocks contained by a +/// single loop. +/// +/// TODO: This could be generalized for any CFG region, or the entire CFG. +class LoopBlocksDFS { +public: + /// Postorder list iterators. + typedef std::vector<BasicBlock*>::const_iterator POIterator; + typedef std::vector<BasicBlock*>::const_reverse_iterator RPOIterator; + + friend class LoopBlocksTraversal; + +private: + Loop *L; + + /// Map each block to its postorder number. A block is only mapped after it is + /// preorder visited by DFS. It's postorder number is initially zero and set + /// to nonzero after it is finished by postorder traversal. + DenseMap<BasicBlock*, unsigned> PostNumbers; + std::vector<BasicBlock*> PostBlocks; + +public: + LoopBlocksDFS(Loop *Container) : + L(Container), PostNumbers(NextPowerOf2(Container->getNumBlocks())) { + PostBlocks.reserve(Container->getNumBlocks()); + } + + Loop *getLoop() const { return L; } + + /// Traverse the loop blocks and store the DFS result. + void perform(LoopInfo *LI); + + /// Return true if postorder numbers are assigned to all loop blocks. + bool isComplete() const { return PostBlocks.size() == L->getNumBlocks(); } + + /// Iterate over the cached postorder blocks. + POIterator beginPostorder() const { + assert(isComplete() && "bad loop DFS"); + return PostBlocks.begin(); + } + POIterator endPostorder() const { return PostBlocks.end(); } + + /// Reverse iterate over the cached postorder blocks. + RPOIterator beginRPO() const { + assert(isComplete() && "bad loop DFS"); + return PostBlocks.rbegin(); + } + RPOIterator endRPO() const { return PostBlocks.rend(); } + + /// Return true if this block has been preorder visited. + bool hasPreorder(BasicBlock *BB) const { return PostNumbers.count(BB); } + + /// Return true if this block has a postorder number. + bool hasPostorder(BasicBlock *BB) const { + DenseMap<BasicBlock*, unsigned>::const_iterator I = PostNumbers.find(BB); + return I != PostNumbers.end() && I->second; + } + + /// Get a block's postorder number. + unsigned getPostorder(BasicBlock *BB) const { + DenseMap<BasicBlock*, unsigned>::const_iterator I = PostNumbers.find(BB); + assert(I != PostNumbers.end() && "block not visited by DFS"); + assert(I->second && "block not finished by DFS"); + return I->second; + } + + /// Get a block's reverse postorder number. + unsigned getRPO(BasicBlock *BB) const { + return 1 + PostBlocks.size() - getPostorder(BB); + } + + void clear() { + PostNumbers.clear(); + PostBlocks.clear(); + } +}; + +/// Wrapper class to LoopBlocksDFS that provides a standard begin()/end() +/// interface for the DFS reverse post-order traversal of blocks in a loop body. +class LoopBlocksRPO { +private: + LoopBlocksDFS DFS; + +public: + LoopBlocksRPO(Loop *Container) : DFS(Container) {} + + /// Traverse the loop blocks and store the DFS result. + void perform(LoopInfo *LI) { + DFS.perform(LI); + } + + /// Reverse iterate over the cached postorder blocks. + LoopBlocksDFS::RPOIterator begin() const { return DFS.beginRPO(); } + LoopBlocksDFS::RPOIterator end() const { return DFS.endRPO(); } +}; + +/// Specialize po_iterator_storage to record postorder numbers. +template<> class po_iterator_storage<LoopBlocksTraversal, true> { + LoopBlocksTraversal &LBT; +public: + po_iterator_storage(LoopBlocksTraversal &lbs) : LBT(lbs) {} + // These functions are defined below. + bool insertEdge(Optional<BasicBlock *> From, BasicBlock *To); + void finishPostorder(BasicBlock *BB); +}; + +/// Traverse the blocks in a loop using a depth-first search. +class LoopBlocksTraversal { +public: + /// Graph traversal iterator. + typedef po_iterator<BasicBlock*, LoopBlocksTraversal, true> POTIterator; + +private: + LoopBlocksDFS &DFS; + LoopInfo *LI; + +public: + LoopBlocksTraversal(LoopBlocksDFS &Storage, LoopInfo *LInfo) : + DFS(Storage), LI(LInfo) {} + + /// Postorder traversal over the graph. This only needs to be done once. + /// po_iterator "automatically" calls back to visitPreorder and + /// finishPostorder to record the DFS result. + POTIterator begin() { + assert(DFS.PostBlocks.empty() && "Need clear DFS result before traversing"); + assert(DFS.L->getNumBlocks() && "po_iterator cannot handle an empty graph"); + return po_ext_begin(DFS.L->getHeader(), *this); + } + POTIterator end() { + // po_ext_end interface requires a basic block, but ignores its value. + return po_ext_end(DFS.L->getHeader(), *this); + } + + /// Called by po_iterator upon reaching a block via a CFG edge. If this block + /// is contained in the loop and has not been visited, then mark it preorder + /// visited and return true. + /// + /// TODO: If anyone is interested, we could record preorder numbers here. + bool visitPreorder(BasicBlock *BB) { + if (!DFS.L->contains(LI->getLoopFor(BB))) + return false; + + return DFS.PostNumbers.insert(std::make_pair(BB, 0)).second; + } + + /// Called by po_iterator each time it advances, indicating a block's + /// postorder. + void finishPostorder(BasicBlock *BB) { + assert(DFS.PostNumbers.count(BB) && "Loop DFS skipped preorder"); + DFS.PostBlocks.push_back(BB); + DFS.PostNumbers[BB] = DFS.PostBlocks.size(); + } +}; + +inline bool po_iterator_storage<LoopBlocksTraversal, true>::insertEdge( + Optional<BasicBlock *> From, BasicBlock *To) { + return LBT.visitPreorder(To); +} + +inline void po_iterator_storage<LoopBlocksTraversal, true>:: +finishPostorder(BasicBlock *BB) { + LBT.finishPostorder(BB); +} + +} // End namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/LoopPass.h b/clang-r353983e/include/llvm/Analysis/LoopPass.h new file mode 100644 index 00000000..9215ab34 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LoopPass.h @@ -0,0 +1,178 @@ +//===- LoopPass.h - LoopPass class ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines LoopPass class. All loop optimization +// and transformation passes are derived from LoopPass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPPASS_H +#define LLVM_ANALYSIS_LOOPPASS_H + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/LegacyPassManagers.h" +#include "llvm/Pass.h" +#include <deque> + +namespace llvm { + +class LPPassManager; +class Function; +class PMStack; + +class LoopPass : public Pass { +public: + explicit LoopPass(char &pid) : Pass(PT_Loop, pid) {} + + /// getPrinterPass - Get a pass to print the function corresponding + /// to a Loop. + Pass *createPrinterPass(raw_ostream &O, + const std::string &Banner) const override; + + // runOnLoop - This method should be implemented by the subclass to perform + // whatever action is necessary for the specified Loop. + virtual bool runOnLoop(Loop *L, LPPassManager &LPM) = 0; + + using llvm::Pass::doInitialization; + using llvm::Pass::doFinalization; + + // Initialization and finalization hooks. + virtual bool doInitialization(Loop *L, LPPassManager &LPM) { + return false; + } + + // Finalization hook does not supply Loop because at this time + // loop nest is completely different. + virtual bool doFinalization() { return false; } + + // Check if this pass is suitable for the current LPPassManager, if + // available. This pass P is not suitable for a LPPassManager if P + // is not preserving higher level analysis info used by other + // LPPassManager passes. In such case, pop LPPassManager from the + // stack. This will force assignPassManager() to create new + // LPPassManger as expected. + void preparePassManager(PMStack &PMS) override; + + /// Assign pass manager to manage this pass + void assignPassManager(PMStack &PMS, PassManagerType PMT) override; + + /// Return what kind of Pass Manager can manage this pass. + PassManagerType getPotentialPassManagerType() const override { + return PMT_LoopPassManager; + } + + //===--------------------------------------------------------------------===// + /// SimpleAnalysis - Provides simple interface to update analysis info + /// maintained by various passes. Note, if required this interface can + /// be extracted into a separate abstract class but it would require + /// additional use of multiple inheritance in Pass class hierarchy, something + /// we are trying to avoid. + + /// Each loop pass can override these simple analysis hooks to update + /// desired analysis information. + /// cloneBasicBlockAnalysis - Clone analysis info associated with basic block. + virtual void cloneBasicBlockAnalysis(BasicBlock *F, BasicBlock *T, Loop *L) {} + + /// deleteAnalysisValue - Delete analysis info associated with value V. + virtual void deleteAnalysisValue(Value *V, Loop *L) {} + + /// Delete analysis info associated with Loop L. + /// Called to notify a Pass that a loop has been deleted and any + /// associated analysis values can be deleted. + virtual void deleteAnalysisLoop(Loop *L) {} + +protected: + /// Optional passes call this function to check whether the pass should be + /// skipped. This is the case when Attribute::OptimizeNone is set or when + /// optimization bisect is over the limit. + bool skipLoop(const Loop *L) const; +}; + +class LPPassManager : public FunctionPass, public PMDataManager { +public: + static char ID; + explicit LPPassManager(); + + /// run - Execute all of the passes scheduled for execution. Keep track of + /// whether any of the passes modifies the module, and if so, return true. + bool runOnFunction(Function &F) override; + + /// Pass Manager itself does not invalidate any analysis info. + // LPPassManager needs LoopInfo. + void getAnalysisUsage(AnalysisUsage &Info) const override; + + StringRef getPassName() const override { return "Loop Pass Manager"; } + + PMDataManager *getAsPMDataManager() override { return this; } + Pass *getAsPass() override { return this; } + + /// Print passes managed by this manager + void dumpPassStructure(unsigned Offset) override; + + LoopPass *getContainedPass(unsigned N) { + assert(N < PassVector.size() && "Pass number out of range!"); + LoopPass *LP = static_cast<LoopPass *>(PassVector[N]); + return LP; + } + + PassManagerType getPassManagerType() const override { + return PMT_LoopPassManager; + } + +public: + // Add a new loop into the loop queue. + void addLoop(Loop &L); + + // Mark \p L as deleted. + void markLoopAsDeleted(Loop &L); + + //===--------------------------------------------------------------------===// + /// SimpleAnalysis - Provides simple interface to update analysis info + /// maintained by various passes. Note, if required this interface can + /// be extracted into a separate abstract class but it would require + /// additional use of multiple inheritance in Pass class hierarchy, something + /// we are trying to avoid. + + /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for + /// all passes that implement simple analysis interface. + void cloneBasicBlockSimpleAnalysis(BasicBlock *From, BasicBlock *To, Loop *L); + + /// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes + /// that implement simple analysis interface. + void deleteSimpleAnalysisValue(Value *V, Loop *L); + + /// Invoke deleteAnalysisLoop hook for all passes that implement simple + /// analysis interface. + void deleteSimpleAnalysisLoop(Loop *L); + +private: + std::deque<Loop *> LQ; + LoopInfo *LI; + Loop *CurrentLoop; + bool CurrentLoopDeleted; +}; + +// This pass is required by the LCSSA transformation. It is used inside +// LPPassManager to check if current pass preserves LCSSA form, and if it does +// pass manager calls lcssa verification for the current loop. +struct LCSSAVerificationPass : public FunctionPass { + static char ID; + LCSSAVerificationPass() : FunctionPass(ID) { + initializeLCSSAVerificationPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { return false; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; + +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/LoopUnrollAnalyzer.h b/clang-r353983e/include/llvm/Analysis/LoopUnrollAnalyzer.h new file mode 100644 index 00000000..5f332e3c --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/LoopUnrollAnalyzer.h @@ -0,0 +1,94 @@ +//===- llvm/Analysis/LoopUnrollAnalyzer.h - Loop Unroll Analyzer-*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements UnrolledInstAnalyzer class. It's used for predicting +// potential effects that loop unrolling might have, such as enabling constant +// propagation and other optimizations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPUNROLLANALYZER_H +#define LLVM_ANALYSIS_LOOPUNROLLANALYZER_H + +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/InstVisitor.h" + +// This class is used to get an estimate of the optimization effects that we +// could get from complete loop unrolling. It comes from the fact that some +// loads might be replaced with concrete constant values and that could trigger +// a chain of instruction simplifications. +// +// E.g. we might have: +// int a[] = {0, 1, 0}; +// v = 0; +// for (i = 0; i < 3; i ++) +// v += b[i]*a[i]; +// If we completely unroll the loop, we would get: +// v = b[0]*a[0] + b[1]*a[1] + b[2]*a[2] +// Which then will be simplified to: +// v = b[0]* 0 + b[1]* 1 + b[2]* 0 +// And finally: +// v = b[1] +namespace llvm { +class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> { + typedef InstVisitor<UnrolledInstAnalyzer, bool> Base; + friend class InstVisitor<UnrolledInstAnalyzer, bool>; + struct SimplifiedAddress { + Value *Base = nullptr; + ConstantInt *Offset = nullptr; + }; + +public: + UnrolledInstAnalyzer(unsigned Iteration, + DenseMap<Value *, Constant *> &SimplifiedValues, + ScalarEvolution &SE, const Loop *L) + : SimplifiedValues(SimplifiedValues), SE(SE), L(L) { + IterationNumber = SE.getConstant(APInt(64, Iteration)); + } + + // Allow access to the initial visit method. + using Base::visit; + +private: + /// A cache of pointer bases and constant-folded offsets corresponding + /// to GEP (or derived from GEP) instructions. + /// + /// In order to find the base pointer one needs to perform non-trivial + /// traversal of the corresponding SCEV expression, so it's good to have the + /// results saved. + DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses; + + /// SCEV expression corresponding to number of currently simulated + /// iteration. + const SCEV *IterationNumber; + + /// A Value->Constant map for keeping values that we managed to + /// constant-fold on the given iteration. + /// + /// While we walk the loop instructions, we build up and maintain a mapping + /// of simplified values specific to this iteration. The idea is to propagate + /// any special information we have about loads that can be replaced with + /// constants after complete unrolling, and account for likely simplifications + /// post-unrolling. + DenseMap<Value *, Constant *> &SimplifiedValues; + + ScalarEvolution &SE; + const Loop *L; + + bool simplifyInstWithSCEV(Instruction *I); + + bool visitInstruction(Instruction &I) { return simplifyInstWithSCEV(&I); } + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitCastInst(CastInst &I); + bool visitCmpInst(CmpInst &I); + bool visitPHINode(PHINode &PN); +}; +} +#endif diff --git a/clang-r353983e/include/llvm/Analysis/MemoryBuiltins.h b/clang-r353983e/include/llvm/Analysis/MemoryBuiltins.h new file mode 100644 index 00000000..49b87a43 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/MemoryBuiltins.h @@ -0,0 +1,311 @@ +//==- llvm/Analysis/MemoryBuiltins.h - Calls to memory builtins --*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This family of functions identifies calls to builtin functions that allocate +// or free memory. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MEMORYBUILTINS_H +#define LLVM_ANALYSIS_MEMORYBUILTINS_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/TargetFolder.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/ValueHandle.h" +#include <cstdint> +#include <utility> + +namespace llvm { + +class AllocaInst; +class Argument; +class CallInst; +class ConstantInt; +class ConstantPointerNull; +class DataLayout; +class ExtractElementInst; +class ExtractValueInst; +class GEPOperator; +class GlobalAlias; +class GlobalVariable; +class Instruction; +class IntegerType; +class IntrinsicInst; +class IntToPtrInst; +class LLVMContext; +class LoadInst; +class PHINode; +class PointerType; +class SelectInst; +class TargetLibraryInfo; +class Type; +class UndefValue; +class Value; + +/// Tests if a value is a call or invoke to a library function that +/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup +/// like). +bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + +/// Tests if a value is a call or invoke to a function that returns a +/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). +bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + +/// Tests if a value is a call or invoke to a library function that +/// allocates uninitialized memory (such as malloc). +bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + +/// Tests if a value is a call or invoke to a library function that +/// allocates zero-filled memory (such as calloc). +bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + +/// Tests if a value is a call or invoke to a library function that +/// allocates memory similar to malloc or calloc. +bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + +/// Tests if a value is a call or invoke to a library function that +/// allocates memory (either malloc, calloc, or strdup like). +bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + +//===----------------------------------------------------------------------===// +// malloc Call Utility Functions. +// + +/// extractMallocCall - Returns the corresponding CallInst if the instruction +/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we +/// ignore InvokeInst here. +const CallInst *extractMallocCall(const Value *I, const TargetLibraryInfo *TLI); +inline CallInst *extractMallocCall(Value *I, const TargetLibraryInfo *TLI) { + return const_cast<CallInst*>(extractMallocCall((const Value*)I, TLI)); +} + +/// getMallocType - Returns the PointerType resulting from the malloc call. +/// The PointerType depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the malloc calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. +PointerType *getMallocType(const CallInst *CI, const TargetLibraryInfo *TLI); + +/// getMallocAllocatedType - Returns the Type allocated by malloc call. +/// The Type depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the malloc calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. +Type *getMallocAllocatedType(const CallInst *CI, const TargetLibraryInfo *TLI); + +/// getMallocArraySize - Returns the array size of a malloc call. If the +/// argument passed to malloc is a multiple of the size of the malloced type, +/// then return that multiple. For non-array mallocs, the multiple is +/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be +/// determined. +Value *getMallocArraySize(CallInst *CI, const DataLayout &DL, + const TargetLibraryInfo *TLI, + bool LookThroughSExt = false); + +//===----------------------------------------------------------------------===// +// calloc Call Utility Functions. +// + +/// extractCallocCall - Returns the corresponding CallInst if the instruction +/// is a calloc call. +const CallInst *extractCallocCall(const Value *I, const TargetLibraryInfo *TLI); +inline CallInst *extractCallocCall(Value *I, const TargetLibraryInfo *TLI) { + return const_cast<CallInst*>(extractCallocCall((const Value*)I, TLI)); +} + + +//===----------------------------------------------------------------------===// +// free Call Utility Functions. +// + +/// isFreeCall - Returns non-null if the value is a call to the builtin free() +const CallInst *isFreeCall(const Value *I, const TargetLibraryInfo *TLI); + +inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { + return const_cast<CallInst*>(isFreeCall((const Value*)I, TLI)); +} + +//===----------------------------------------------------------------------===// +// Utility functions to compute size of objects. +// + +/// Various options to control the behavior of getObjectSize. +struct ObjectSizeOpts { + /// Controls how we handle conditional statements with unknown conditions. + enum class Mode : uint8_t { + /// Fail to evaluate an unknown condition. + Exact, + /// Evaluate all branches of an unknown condition. If all evaluations + /// succeed, pick the minimum size. + Min, + /// Same as Min, except we pick the maximum size of all of the branches. + Max + }; + + /// How we want to evaluate this object's size. + Mode EvalMode = Mode::Exact; + /// Whether to round the result up to the alignment of allocas, byval + /// arguments, and global variables. + bool RoundToAlign = false; + /// If this is true, null pointers in address space 0 will be treated as + /// though they can't be evaluated. Otherwise, null is always considered to + /// point to a 0 byte region of memory. + bool NullIsUnknownSize = false; +}; + +/// Compute the size of the object pointed by Ptr. Returns true and the +/// object size in Size if successful, and false otherwise. In this context, by +/// object we mean the region of memory starting at Ptr to the end of the +/// underlying object pointed to by Ptr. +bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, + const TargetLibraryInfo *TLI, ObjectSizeOpts Opts = {}); + +/// Try to turn a call to \@llvm.objectsize into an integer value of the given +/// Type. Returns null on failure. If MustSucceed is true, this function will +/// not return null, and may return conservative values governed by the second +/// argument of the call to objectsize. +Value *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, + const TargetLibraryInfo *TLI, bool MustSucceed); + + + +using SizeOffsetType = std::pair<APInt, APInt>; + +/// Evaluate the size and offset of an object pointed to by a Value* +/// statically. Fails if size or offset are not known at compile time. +class ObjectSizeOffsetVisitor + : public InstVisitor<ObjectSizeOffsetVisitor, SizeOffsetType> { + const DataLayout &DL; + const TargetLibraryInfo *TLI; + ObjectSizeOpts Options; + unsigned IntTyBits; + APInt Zero; + SmallPtrSet<Instruction *, 8> SeenInsts; + + APInt align(APInt Size, uint64_t Align); + + SizeOffsetType unknown() { + return std::make_pair(APInt(), APInt()); + } + +public: + ObjectSizeOffsetVisitor(const DataLayout &DL, const TargetLibraryInfo *TLI, + LLVMContext &Context, ObjectSizeOpts Options = {}); + + SizeOffsetType compute(Value *V); + + static bool knownSize(const SizeOffsetType &SizeOffset) { + return SizeOffset.first.getBitWidth() > 1; + } + + static bool knownOffset(const SizeOffsetType &SizeOffset) { + return SizeOffset.second.getBitWidth() > 1; + } + + static bool bothKnown(const SizeOffsetType &SizeOffset) { + return knownSize(SizeOffset) && knownOffset(SizeOffset); + } + + // These are "private", except they can't actually be made private. Only + // compute() should be used by external users. + SizeOffsetType visitAllocaInst(AllocaInst &I); + SizeOffsetType visitArgument(Argument &A); + SizeOffsetType visitCallSite(CallSite CS); + SizeOffsetType visitConstantPointerNull(ConstantPointerNull&); + SizeOffsetType visitExtractElementInst(ExtractElementInst &I); + SizeOffsetType visitExtractValueInst(ExtractValueInst &I); + SizeOffsetType visitGEPOperator(GEPOperator &GEP); + SizeOffsetType visitGlobalAlias(GlobalAlias &GA); + SizeOffsetType visitGlobalVariable(GlobalVariable &GV); + SizeOffsetType visitIntToPtrInst(IntToPtrInst&); + SizeOffsetType visitLoadInst(LoadInst &I); + SizeOffsetType visitPHINode(PHINode&); + SizeOffsetType visitSelectInst(SelectInst &I); + SizeOffsetType visitUndefValue(UndefValue&); + SizeOffsetType visitInstruction(Instruction &I); + +private: + bool CheckedZextOrTrunc(APInt &I); +}; + +using SizeOffsetEvalType = std::pair<Value *, Value *>; + +/// Evaluate the size and offset of an object pointed to by a Value*. +/// May create code to compute the result at run-time. +class ObjectSizeOffsetEvaluator + : public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetEvalType> { + using BuilderTy = IRBuilder<TargetFolder>; + using WeakEvalType = std::pair<WeakTrackingVH, WeakTrackingVH>; + using CacheMapTy = DenseMap<const Value *, WeakEvalType>; + using PtrSetTy = SmallPtrSet<const Value *, 8>; + + const DataLayout &DL; + const TargetLibraryInfo *TLI; + LLVMContext &Context; + BuilderTy Builder; + IntegerType *IntTy; + Value *Zero; + CacheMapTy CacheMap; + PtrSetTy SeenVals; + ObjectSizeOpts EvalOpts; + + SizeOffsetEvalType compute_(Value *V); + +public: + static SizeOffsetEvalType unknown() { + return std::make_pair(nullptr, nullptr); + } + + ObjectSizeOffsetEvaluator(const DataLayout &DL, const TargetLibraryInfo *TLI, + LLVMContext &Context, ObjectSizeOpts EvalOpts = {}); + + SizeOffsetEvalType compute(Value *V); + + bool knownSize(SizeOffsetEvalType SizeOffset) { + return SizeOffset.first; + } + + bool knownOffset(SizeOffsetEvalType SizeOffset) { + return SizeOffset.second; + } + + bool anyKnown(SizeOffsetEvalType SizeOffset) { + return knownSize(SizeOffset) || knownOffset(SizeOffset); + } + + bool bothKnown(SizeOffsetEvalType SizeOffset) { + return knownSize(SizeOffset) && knownOffset(SizeOffset); + } + + // The individual instruction visitors should be treated as private. + SizeOffsetEvalType visitAllocaInst(AllocaInst &I); + SizeOffsetEvalType visitCallSite(CallSite CS); + SizeOffsetEvalType visitExtractElementInst(ExtractElementInst &I); + SizeOffsetEvalType visitExtractValueInst(ExtractValueInst &I); + SizeOffsetEvalType visitGEPOperator(GEPOperator &GEP); + SizeOffsetEvalType visitIntToPtrInst(IntToPtrInst&); + SizeOffsetEvalType visitLoadInst(LoadInst &I); + SizeOffsetEvalType visitPHINode(PHINode &PHI); + SizeOffsetEvalType visitSelectInst(SelectInst &I); + SizeOffsetEvalType visitInstruction(Instruction &I); +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MEMORYBUILTINS_H diff --git a/clang-r353983e/include/llvm/Analysis/MemoryDependenceAnalysis.h b/clang-r353983e/include/llvm/Analysis/MemoryDependenceAnalysis.h new file mode 100644 index 00000000..de574990 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -0,0 +1,544 @@ +//===- llvm/Analysis/MemoryDependenceAnalysis.h - Memory Deps ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the MemoryDependenceAnalysis analysis pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H +#define LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PointerEmbeddedInt.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PointerSumType.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PredIteratorCache.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> +#include <utility> +#include <vector> + +namespace llvm { + +class AssumptionCache; +class DominatorTree; +class Function; +class Instruction; +class LoadInst; +class PHITransAddr; +class TargetLibraryInfo; +class PhiValues; +class Value; + +/// A memory dependence query can return one of three different answers. +class MemDepResult { + enum DepType { + /// Clients of MemDep never see this. + /// + /// Entries with this marker occur in a LocalDeps map or NonLocalDeps map + /// when the instruction they previously referenced was removed from + /// MemDep. In either case, the entry may include an instruction pointer. + /// If so, the pointer is an instruction in the block where scanning can + /// start from, saving some work. + /// + /// In a default-constructed MemDepResult object, the type will be Invalid + /// and the instruction pointer will be null. + Invalid = 0, + + /// This is a dependence on the specified instruction which clobbers the + /// desired value. The pointer member of the MemDepResult pair holds the + /// instruction that clobbers the memory. For example, this occurs when we + /// see a may-aliased store to the memory location we care about. + /// + /// There are several cases that may be interesting here: + /// 1. Loads are clobbered by may-alias stores. + /// 2. Loads are considered clobbered by partially-aliased loads. The + /// client may choose to analyze deeper into these cases. + Clobber, + + /// This is a dependence on the specified instruction which defines or + /// produces the desired memory location. The pointer member of the + /// MemDepResult pair holds the instruction that defines the memory. + /// + /// Cases of interest: + /// 1. This could be a load or store for dependence queries on + /// load/store. The value loaded or stored is the produced value. + /// Note that the pointer operand may be different than that of the + /// queried pointer due to must aliases and phi translation. Note + /// that the def may not be the same type as the query, the pointers + /// may just be must aliases. + /// 2. For loads and stores, this could be an allocation instruction. In + /// this case, the load is loading an undef value or a store is the + /// first store to (that part of) the allocation. + /// 3. Dependence queries on calls return Def only when they are readonly + /// calls or memory use intrinsics with identical callees and no + /// intervening clobbers. No validation is done that the operands to + /// the calls are the same. + Def, + + /// This marker indicates that the query has no known dependency in the + /// specified block. + /// + /// More detailed state info is encoded in the upper part of the pair (i.e. + /// the Instruction*) + Other + }; + + /// If DepType is "Other", the upper part of the sum type is an encoding of + /// the following more detailed type information. + enum OtherType { + /// This marker indicates that the query has no dependency in the specified + /// block. + /// + /// To find out more, the client should query other predecessor blocks. + NonLocal = 1, + /// This marker indicates that the query has no dependency in the specified + /// function. + NonFuncLocal, + /// This marker indicates that the query dependency is unknown. + Unknown + }; + + using ValueTy = PointerSumType< + DepType, PointerSumTypeMember<Invalid, Instruction *>, + PointerSumTypeMember<Clobber, Instruction *>, + PointerSumTypeMember<Def, Instruction *>, + PointerSumTypeMember<Other, PointerEmbeddedInt<OtherType, 3>>>; + ValueTy Value; + + explicit MemDepResult(ValueTy V) : Value(V) {} + +public: + MemDepResult() = default; + + /// get methods: These are static ctor methods for creating various + /// MemDepResult kinds. + static MemDepResult getDef(Instruction *Inst) { + assert(Inst && "Def requires inst"); + return MemDepResult(ValueTy::create<Def>(Inst)); + } + static MemDepResult getClobber(Instruction *Inst) { + assert(Inst && "Clobber requires inst"); + return MemDepResult(ValueTy::create<Clobber>(Inst)); + } + static MemDepResult getNonLocal() { + return MemDepResult(ValueTy::create<Other>(NonLocal)); + } + static MemDepResult getNonFuncLocal() { + return MemDepResult(ValueTy::create<Other>(NonFuncLocal)); + } + static MemDepResult getUnknown() { + return MemDepResult(ValueTy::create<Other>(Unknown)); + } + + /// Tests if this MemDepResult represents a query that is an instruction + /// clobber dependency. + bool isClobber() const { return Value.is<Clobber>(); } + + /// Tests if this MemDepResult represents a query that is an instruction + /// definition dependency. + bool isDef() const { return Value.is<Def>(); } + + /// Tests if this MemDepResult represents a query that is transparent to the + /// start of the block, but where a non-local hasn't been done. + bool isNonLocal() const { + return Value.is<Other>() && Value.cast<Other>() == NonLocal; + } + + /// Tests if this MemDepResult represents a query that is transparent to the + /// start of the function. + bool isNonFuncLocal() const { + return Value.is<Other>() && Value.cast<Other>() == NonFuncLocal; + } + + /// Tests if this MemDepResult represents a query which cannot and/or will + /// not be computed. + bool isUnknown() const { + return Value.is<Other>() && Value.cast<Other>() == Unknown; + } + + /// If this is a normal dependency, returns the instruction that is depended + /// on. Otherwise, returns null. + Instruction *getInst() const { + switch (Value.getTag()) { + case Invalid: + return Value.cast<Invalid>(); + case Clobber: + return Value.cast<Clobber>(); + case Def: + return Value.cast<Def>(); + case Other: + return nullptr; + } + llvm_unreachable("Unknown discriminant!"); + } + + bool operator==(const MemDepResult &M) const { return Value == M.Value; } + bool operator!=(const MemDepResult &M) const { return Value != M.Value; } + bool operator<(const MemDepResult &M) const { return Value < M.Value; } + bool operator>(const MemDepResult &M) const { return Value > M.Value; } + +private: + friend class MemoryDependenceResults; + + /// Tests if this is a MemDepResult in its dirty/invalid. state. + bool isDirty() const { return Value.is<Invalid>(); } + + static MemDepResult getDirty(Instruction *Inst) { + return MemDepResult(ValueTy::create<Invalid>(Inst)); + } +}; + +/// This is an entry in the NonLocalDepInfo cache. +/// +/// For each BasicBlock (the BB entry) it keeps a MemDepResult. +class NonLocalDepEntry { + BasicBlock *BB; + MemDepResult Result; + +public: + NonLocalDepEntry(BasicBlock *bb, MemDepResult result) + : BB(bb), Result(result) {} + + // This is used for searches. + NonLocalDepEntry(BasicBlock *bb) : BB(bb) {} + + // BB is the sort key, it can't be changed. + BasicBlock *getBB() const { return BB; } + + void setResult(const MemDepResult &R) { Result = R; } + + const MemDepResult &getResult() const { return Result; } + + bool operator<(const NonLocalDepEntry &RHS) const { return BB < RHS.BB; } +}; + +/// This is a result from a NonLocal dependence query. +/// +/// For each BasicBlock (the BB entry) it keeps a MemDepResult and the +/// (potentially phi translated) address that was live in the block. +class NonLocalDepResult { + NonLocalDepEntry Entry; + Value *Address; + +public: + NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address) + : Entry(bb, result), Address(address) {} + + // BB is the sort key, it can't be changed. + BasicBlock *getBB() const { return Entry.getBB(); } + + void setResult(const MemDepResult &R, Value *Addr) { + Entry.setResult(R); + Address = Addr; + } + + const MemDepResult &getResult() const { return Entry.getResult(); } + + /// Returns the address of this pointer in this block. + /// + /// This can be different than the address queried for the non-local result + /// because of phi translation. This returns null if the address was not + /// available in a block (i.e. because phi translation failed) or if this is + /// a cached result and that address was deleted. + /// + /// The address is always null for a non-local 'call' dependence. + Value *getAddress() const { return Address; } +}; + +/// Provides a lazy, caching interface for making common memory aliasing +/// information queries, backed by LLVM's alias analysis passes. +/// +/// The dependency information returned is somewhat unusual, but is pragmatic. +/// If queried about a store or call that might modify memory, the analysis +/// will return the instruction[s] that may either load from that memory or +/// store to it. If queried with a load or call that can never modify memory, +/// the analysis will return calls and stores that might modify the pointer, +/// but generally does not return loads unless a) they are volatile, or +/// b) they load from *must-aliased* pointers. Returning a dependence on +/// must-alias'd pointers instead of all pointers interacts well with the +/// internal caching mechanism. +class MemoryDependenceResults { + // A map from instructions to their dependency. + using LocalDepMapType = DenseMap<Instruction *, MemDepResult>; + LocalDepMapType LocalDeps; + +public: + using NonLocalDepInfo = std::vector<NonLocalDepEntry>; + +private: + /// A pair<Value*, bool> where the bool is true if the dependence is a read + /// only dependence, false if read/write. + using ValueIsLoadPair = PointerIntPair<const Value *, 1, bool>; + + /// This pair is used when caching information for a block. + /// + /// If the pointer is null, the cache value is not a full query that starts + /// at the specified block. If non-null, the bool indicates whether or not + /// the contents of the block was skipped. + using BBSkipFirstBlockPair = PointerIntPair<BasicBlock *, 1, bool>; + + /// This record is the information kept for each (value, is load) pair. + struct NonLocalPointerInfo { + /// The pair of the block and the skip-first-block flag. + BBSkipFirstBlockPair Pair; + /// The results of the query for each relevant block. + NonLocalDepInfo NonLocalDeps; + /// The maximum size of the dereferences of the pointer. + /// + /// May be UnknownSize if the sizes are unknown. + LocationSize Size = LocationSize::unknown(); + /// The AA tags associated with dereferences of the pointer. + /// + /// The members may be null if there are no tags or conflicting tags. + AAMDNodes AATags; + + NonLocalPointerInfo() = default; + }; + + /// Cache storing single nonlocal def for the instruction. + /// It is set when nonlocal def would be found in function returning only + /// local dependencies. + DenseMap<AssertingVH<const Value>, NonLocalDepResult> NonLocalDefsCache; + using ReverseNonLocalDefsCacheTy = + DenseMap<Instruction *, SmallPtrSet<const Value*, 4>>; + ReverseNonLocalDefsCacheTy ReverseNonLocalDefsCache; + + /// This map stores the cached results of doing a pointer lookup at the + /// bottom of a block. + /// + /// The key of this map is the pointer+isload bit, the value is a list of + /// <bb->result> mappings. + using CachedNonLocalPointerInfo = + DenseMap<ValueIsLoadPair, NonLocalPointerInfo>; + CachedNonLocalPointerInfo NonLocalPointerDeps; + + // A map from instructions to their non-local pointer dependencies. + using ReverseNonLocalPtrDepTy = + DenseMap<Instruction *, SmallPtrSet<ValueIsLoadPair, 4>>; + ReverseNonLocalPtrDepTy ReverseNonLocalPtrDeps; + + /// This is the instruction we keep for each cached access that we have for + /// an instruction. + /// + /// The pointer is an owning pointer and the bool indicates whether we have + /// any dirty bits in the set. + using PerInstNLInfo = std::pair<NonLocalDepInfo, bool>; + + // A map from instructions to their non-local dependencies. + using NonLocalDepMapType = DenseMap<Instruction *, PerInstNLInfo>; + + NonLocalDepMapType NonLocalDeps; + + // A reverse mapping from dependencies to the dependees. This is + // used when removing instructions to keep the cache coherent. + using ReverseDepMapType = + DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>>; + ReverseDepMapType ReverseLocalDeps; + + // A reverse mapping from dependencies to the non-local dependees. + ReverseDepMapType ReverseNonLocalDeps; + + /// Current AA implementation, just a cache. + AliasAnalysis &AA; + AssumptionCache &AC; + const TargetLibraryInfo &TLI; + DominatorTree &DT; + PhiValues &PV; + PredIteratorCache PredCache; + +public: + MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, + const TargetLibraryInfo &TLI, + DominatorTree &DT, PhiValues &PV) + : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV) {} + + /// Handle invalidation in the new PM. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + + /// Some methods limit the number of instructions they will examine. + /// The return value of this method is the default limit that will be + /// used if no limit is explicitly passed in. + unsigned getDefaultBlockScanLimit() const; + + /// Returns the instruction on which a memory operation depends. + /// + /// See the class comment for more details. It is illegal to call this on + /// non-memory instructions. + MemDepResult getDependency(Instruction *QueryInst); + + /// Perform a full dependency query for the specified call, returning the set + /// of blocks that the value is potentially live across. + /// + /// The returned set of results will include a "NonLocal" result for all + /// blocks where the value is live across. + /// + /// This method assumes the instruction returns a "NonLocal" dependency + /// within its own block. + /// + /// This returns a reference to an internal data structure that may be + /// invalidated on the next non-local query or when an instruction is + /// removed. Clients must copy this data if they want it around longer than + /// that. + const NonLocalDepInfo &getNonLocalCallDependency(CallBase *QueryCall); + + /// Perform a full dependency query for an access to the QueryInst's + /// specified memory location, returning the set of instructions that either + /// define or clobber the value. + /// + /// Warning: For a volatile query instruction, the dependencies will be + /// accurate, and thus usable for reordering, but it is never legal to + /// remove the query instruction. + /// + /// This method assumes the pointer has a "NonLocal" dependency within + /// QueryInst's parent basic block. + void getNonLocalPointerDependency(Instruction *QueryInst, + SmallVectorImpl<NonLocalDepResult> &Result); + + /// Removes an instruction from the dependence analysis, updating the + /// dependence of instructions that previously depended on it. + void removeInstruction(Instruction *InstToRemove); + + /// Invalidates cached information about the specified pointer, because it + /// may be too conservative in memdep. + /// + /// This is an optional call that can be used when the client detects an + /// equivalence between the pointer and some other value and replaces the + /// other value with ptr. This can make Ptr available in more places that + /// cached info does not necessarily keep. + void invalidateCachedPointerInfo(Value *Ptr); + + /// Clears the PredIteratorCache info. + /// + /// This needs to be done when the CFG changes, e.g., due to splitting + /// critical edges. + void invalidateCachedPredecessors(); + + /// Returns the instruction on which a memory location depends. + /// + /// If isLoad is true, this routine ignores may-aliases with read-only + /// operations. If isLoad is false, this routine ignores may-aliases + /// with reads from read-only locations. If possible, pass the query + /// instruction as well; this function may take advantage of the metadata + /// annotated to the query instruction to refine the result. \p Limit + /// can be used to set the maximum number of instructions that will be + /// examined to find the pointer dependency. On return, it will be set to + /// the number of instructions left to examine. If a null pointer is passed + /// in, the limit will default to the value of -memdep-block-scan-limit. + /// + /// Note that this is an uncached query, and thus may be inefficient. + MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, + BasicBlock::iterator ScanIt, + BasicBlock *BB, + Instruction *QueryInst = nullptr, + unsigned *Limit = nullptr); + + MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, + bool isLoad, + BasicBlock::iterator ScanIt, + BasicBlock *BB, + Instruction *QueryInst, + unsigned *Limit = nullptr); + + /// This analysis looks for other loads and stores with invariant.group + /// metadata and the same pointer operand. Returns Unknown if it does not + /// find anything, and Def if it can be assumed that 2 instructions load or + /// store the same value and NonLocal which indicate that non-local Def was + /// found, which can be retrieved by calling getNonLocalPointerDependency + /// with the same queried instruction. + MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB); + + /// Looks at a memory location for a load (specified by MemLocBase, Offs, and + /// Size) and compares it against a load. + /// + /// If the specified load could be safely widened to a larger integer load + /// that is 1) still efficient, 2) safe for the target, and 3) would provide + /// the specified memory location value, then this function returns the size + /// in bytes of the load width to use. If not, this returns zero. + static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, + int64_t MemLocOffs, + unsigned MemLocSize, + const LoadInst *LI); + + /// Release memory in caches. + void releaseMemory(); + +private: + MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall, + BasicBlock::iterator ScanIt, + BasicBlock *BB); + bool getNonLocalPointerDepFromBB(Instruction *QueryInst, + const PHITransAddr &Pointer, + const MemoryLocation &Loc, bool isLoad, + BasicBlock *BB, + SmallVectorImpl<NonLocalDepResult> &Result, + DenseMap<BasicBlock *, Value *> &Visited, + bool SkipFirstBlock = false); + MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst, + const MemoryLocation &Loc, bool isLoad, + BasicBlock *BB, NonLocalDepInfo *Cache, + unsigned NumSortedEntries); + + void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P); + + void verifyRemoved(Instruction *Inst) const; +}; + +/// An analysis that produces \c MemoryDependenceResults for a function. +/// +/// This is essentially a no-op because the results are computed entirely +/// lazily. +class MemoryDependenceAnalysis + : public AnalysisInfoMixin<MemoryDependenceAnalysis> { + friend AnalysisInfoMixin<MemoryDependenceAnalysis>; + + static AnalysisKey Key; + +public: + using Result = MemoryDependenceResults; + + MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM); +}; + +/// A wrapper analysis pass for the legacy pass manager that exposes a \c +/// MemoryDepnedenceResults instance. +class MemoryDependenceWrapperPass : public FunctionPass { + Optional<MemoryDependenceResults> MemDep; + +public: + static char ID; + + MemoryDependenceWrapperPass(); + ~MemoryDependenceWrapperPass() override; + + /// Pass Implementation stuff. This doesn't do any analysis eagerly. + bool runOnFunction(Function &) override; + + /// Clean up memory in between runs + void releaseMemory() override; + + /// Does not modify anything. It uses Value Numbering and Alias Analysis. + void getAnalysisUsage(AnalysisUsage &AU) const override; + + MemoryDependenceResults &getMemDep() { return *MemDep; } +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/MemoryLocation.h b/clang-r353983e/include/llvm/Analysis/MemoryLocation.h new file mode 100644 index 00000000..7c26353e --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/MemoryLocation.h @@ -0,0 +1,307 @@ +//===- MemoryLocation.h - Memory location descriptions ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides utility analysis objects describing memory locations. +/// These are used both by the Alias Analysis infrastructure and more +/// specialized memory analysis layers. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MEMORYLOCATION_H +#define LLVM_ANALYSIS_MEMORYLOCATION_H + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Optional.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" + +namespace llvm { + +class LoadInst; +class StoreInst; +class MemTransferInst; +class MemIntrinsic; +class AtomicMemTransferInst; +class AtomicMemIntrinsic; +class AnyMemTransferInst; +class AnyMemIntrinsic; +class TargetLibraryInfo; + +// Represents the size of a MemoryLocation. Logically, it's an +// Optional<uint63_t> that also carries a bit to represent whether the integer +// it contains, N, is 'precise'. Precise, in this context, means that we know +// that the area of storage referenced by the given MemoryLocation must be +// precisely N bytes. An imprecise value is formed as the union of two or more +// precise values, and can conservatively represent all of the values unioned +// into it. Importantly, imprecise values are an *upper-bound* on the size of a +// MemoryLocation. +// +// Concretely, a precise MemoryLocation is (%p, 4) in +// store i32 0, i32* %p +// +// Since we know that %p must be at least 4 bytes large at this point. +// Otherwise, we have UB. An example of an imprecise MemoryLocation is (%p, 4) +// at the memcpy in +// +// %n = select i1 %foo, i64 1, i64 4 +// call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %baz, i64 %n, i32 1, +// i1 false) +// +// ...Since we'll copy *up to* 4 bytes into %p, but we can't guarantee that +// we'll ever actually do so. +// +// If asked to represent a pathologically large value, this will degrade to +// None. +class LocationSize { + enum : uint64_t { + Unknown = ~uint64_t(0), + ImpreciseBit = uint64_t(1) << 63, + MapEmpty = Unknown - 1, + MapTombstone = Unknown - 2, + + // The maximum value we can represent without falling back to 'unknown'. + MaxValue = (MapTombstone - 1) & ~ImpreciseBit, + }; + + uint64_t Value; + + // Hack to support implicit construction. This should disappear when the + // public LocationSize ctor goes away. + enum DirectConstruction { Direct }; + + constexpr LocationSize(uint64_t Raw, DirectConstruction): Value(Raw) {} + + static_assert(Unknown & ImpreciseBit, "Unknown is imprecise by definition."); +public: + // FIXME: Migrate all users to construct via either `precise` or `upperBound`, + // to make it more obvious at the callsite the kind of size that they're + // providing. + // + // Since the overwhelming majority of users of this provide precise values, + // this assumes the provided value is precise. + constexpr LocationSize(uint64_t Raw) + : Value(Raw > MaxValue ? Unknown : Raw) {} + + static LocationSize precise(uint64_t Value) { return LocationSize(Value); } + + static LocationSize upperBound(uint64_t Value) { + // You can't go lower than 0, so give a precise result. + if (LLVM_UNLIKELY(Value == 0)) + return precise(0); + if (LLVM_UNLIKELY(Value > MaxValue)) + return unknown(); + return LocationSize(Value | ImpreciseBit, Direct); + } + + constexpr static LocationSize unknown() { + return LocationSize(Unknown, Direct); + } + + // Sentinel values, generally used for maps. + constexpr static LocationSize mapTombstone() { + return LocationSize(MapTombstone, Direct); + } + constexpr static LocationSize mapEmpty() { + return LocationSize(MapEmpty, Direct); + } + + // Returns a LocationSize that can correctly represent either `*this` or + // `Other`. + LocationSize unionWith(LocationSize Other) const { + if (Other == *this) + return *this; + + if (!hasValue() || !Other.hasValue()) + return unknown(); + + return upperBound(std::max(getValue(), Other.getValue())); + } + + bool hasValue() const { return Value != Unknown; } + uint64_t getValue() const { + assert(hasValue() && "Getting value from an unknown LocationSize!"); + return Value & ~ImpreciseBit; + } + + // Returns whether or not this value is precise. Note that if a value is + // precise, it's guaranteed to not be `unknown()`. + bool isPrecise() const { + return (Value & ImpreciseBit) == 0; + } + + // Convenience method to check if this LocationSize's value is 0. + bool isZero() const { return hasValue() && getValue() == 0; } + + bool operator==(const LocationSize &Other) const { + return Value == Other.Value; + } + + bool operator!=(const LocationSize &Other) const { + return !(*this == Other); + } + + // Ordering operators are not provided, since it's unclear if there's only one + // reasonable way to compare: + // - values that don't exist against values that do, and + // - precise values to imprecise values + + void print(raw_ostream &OS) const; + + // Returns an opaque value that represents this LocationSize. Cannot be + // reliably converted back into a LocationSize. + uint64_t toRaw() const { return Value; } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, LocationSize Size) { + Size.print(OS); + return OS; +} + +/// Representation for a specific memory location. +/// +/// This abstraction can be used to represent a specific location in memory. +/// The goal of the location is to represent enough information to describe +/// abstract aliasing, modification, and reference behaviors of whatever +/// value(s) are stored in memory at the particular location. +/// +/// The primary user of this interface is LLVM's Alias Analysis, but other +/// memory analyses such as MemoryDependence can use it as well. +class MemoryLocation { +public: + /// UnknownSize - This is a special value which can be used with the + /// size arguments in alias queries to indicate that the caller does not + /// know the sizes of the potential memory references. + enum : uint64_t { UnknownSize = ~UINT64_C(0) }; + + /// The address of the start of the location. + const Value *Ptr; + + /// The maximum size of the location, in address-units, or + /// UnknownSize if the size is not known. + /// + /// Note that an unknown size does not mean the pointer aliases the entire + /// virtual address space, because there are restrictions on stepping out of + /// one object and into another. See + /// http://llvm.org/docs/LangRef.html#pointeraliasing + LocationSize Size; + + /// The metadata nodes which describes the aliasing of the location (each + /// member is null if that kind of information is unavailable). + AAMDNodes AATags; + + /// Return a location with information about the memory reference by the given + /// instruction. + static MemoryLocation get(const LoadInst *LI); + static MemoryLocation get(const StoreInst *SI); + static MemoryLocation get(const VAArgInst *VI); + static MemoryLocation get(const AtomicCmpXchgInst *CXI); + static MemoryLocation get(const AtomicRMWInst *RMWI); + static MemoryLocation get(const Instruction *Inst) { + return *MemoryLocation::getOrNone(Inst); + } + static Optional<MemoryLocation> getOrNone(const Instruction *Inst) { + switch (Inst->getOpcode()) { + case Instruction::Load: + return get(cast<LoadInst>(Inst)); + case Instruction::Store: + return get(cast<StoreInst>(Inst)); + case Instruction::VAArg: + return get(cast<VAArgInst>(Inst)); + case Instruction::AtomicCmpXchg: + return get(cast<AtomicCmpXchgInst>(Inst)); + case Instruction::AtomicRMW: + return get(cast<AtomicRMWInst>(Inst)); + default: + return None; + } + } + + /// Return a location representing the source of a memory transfer. + static MemoryLocation getForSource(const MemTransferInst *MTI); + static MemoryLocation getForSource(const AtomicMemTransferInst *MTI); + static MemoryLocation getForSource(const AnyMemTransferInst *MTI); + + /// Return a location representing the destination of a memory set or + /// transfer. + static MemoryLocation getForDest(const MemIntrinsic *MI); + static MemoryLocation getForDest(const AtomicMemIntrinsic *MI); + static MemoryLocation getForDest(const AnyMemIntrinsic *MI); + + /// Return a location representing a particular argument of a call. + static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, + const TargetLibraryInfo *TLI); + static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, + const TargetLibraryInfo &TLI) { + return getForArgument(Call, ArgIdx, &TLI); + } + + explicit MemoryLocation(const Value *Ptr = nullptr, + LocationSize Size = LocationSize::unknown(), + const AAMDNodes &AATags = AAMDNodes()) + : Ptr(Ptr), Size(Size), AATags(AATags) {} + + MemoryLocation getWithNewPtr(const Value *NewPtr) const { + MemoryLocation Copy(*this); + Copy.Ptr = NewPtr; + return Copy; + } + + MemoryLocation getWithNewSize(LocationSize NewSize) const { + MemoryLocation Copy(*this); + Copy.Size = NewSize; + return Copy; + } + + MemoryLocation getWithoutAATags() const { + MemoryLocation Copy(*this); + Copy.AATags = AAMDNodes(); + return Copy; + } + + bool operator==(const MemoryLocation &Other) const { + return Ptr == Other.Ptr && Size == Other.Size && AATags == Other.AATags; + } +}; + +// Specialize DenseMapInfo. +template <> struct DenseMapInfo<LocationSize> { + static inline LocationSize getEmptyKey() { + return LocationSize::mapEmpty(); + } + static inline LocationSize getTombstoneKey() { + return LocationSize::mapTombstone(); + } + static unsigned getHashValue(const LocationSize &Val) { + return DenseMapInfo<uint64_t>::getHashValue(Val.toRaw()); + } + static bool isEqual(const LocationSize &LHS, const LocationSize &RHS) { + return LHS == RHS; + } +}; + +template <> struct DenseMapInfo<MemoryLocation> { + static inline MemoryLocation getEmptyKey() { + return MemoryLocation(DenseMapInfo<const Value *>::getEmptyKey(), + DenseMapInfo<LocationSize>::getEmptyKey()); + } + static inline MemoryLocation getTombstoneKey() { + return MemoryLocation(DenseMapInfo<const Value *>::getTombstoneKey(), + DenseMapInfo<LocationSize>::getTombstoneKey()); + } + static unsigned getHashValue(const MemoryLocation &Val) { + return DenseMapInfo<const Value *>::getHashValue(Val.Ptr) ^ + DenseMapInfo<LocationSize>::getHashValue(Val.Size) ^ + DenseMapInfo<AAMDNodes>::getHashValue(Val.AATags); + } + static bool isEqual(const MemoryLocation &LHS, const MemoryLocation &RHS) { + return LHS == RHS; + } +}; +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/MemorySSA.h b/clang-r353983e/include/llvm/Analysis/MemorySSA.h new file mode 100644 index 00000000..fa92fd34 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/MemorySSA.h @@ -0,0 +1,1303 @@ +//===- MemorySSA.h - Build Memory SSA ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file exposes an interface to building/using memory SSA to +/// walk memory instructions using a use/def graph. +/// +/// Memory SSA class builds an SSA form that links together memory access +/// instructions such as loads, stores, atomics, and calls. Additionally, it +/// does a trivial form of "heap versioning" Every time the memory state changes +/// in the program, we generate a new heap version. It generates +/// MemoryDef/Uses/Phis that are overlayed on top of the existing instructions. +/// +/// As a trivial example, +/// define i32 @main() #0 { +/// entry: +/// %call = call noalias i8* @_Znwm(i64 4) #2 +/// %0 = bitcast i8* %call to i32* +/// %call1 = call noalias i8* @_Znwm(i64 4) #2 +/// %1 = bitcast i8* %call1 to i32* +/// store i32 5, i32* %0, align 4 +/// store i32 7, i32* %1, align 4 +/// %2 = load i32* %0, align 4 +/// %3 = load i32* %1, align 4 +/// %add = add nsw i32 %2, %3 +/// ret i32 %add +/// } +/// +/// Will become +/// define i32 @main() #0 { +/// entry: +/// ; 1 = MemoryDef(0) +/// %call = call noalias i8* @_Znwm(i64 4) #3 +/// %2 = bitcast i8* %call to i32* +/// ; 2 = MemoryDef(1) +/// %call1 = call noalias i8* @_Znwm(i64 4) #3 +/// %4 = bitcast i8* %call1 to i32* +/// ; 3 = MemoryDef(2) +/// store i32 5, i32* %2, align 4 +/// ; 4 = MemoryDef(3) +/// store i32 7, i32* %4, align 4 +/// ; MemoryUse(3) +/// %7 = load i32* %2, align 4 +/// ; MemoryUse(4) +/// %8 = load i32* %4, align 4 +/// %add = add nsw i32 %7, %8 +/// ret i32 %add +/// } +/// +/// Given this form, all the stores that could ever effect the load at %8 can be +/// gotten by using the MemoryUse associated with it, and walking from use to +/// def until you hit the top of the function. +/// +/// Each def also has a list of users associated with it, so you can walk from +/// both def to users, and users to defs. Note that we disambiguate MemoryUses, +/// but not the RHS of MemoryDefs. You can see this above at %7, which would +/// otherwise be a MemoryUse(4). Being disambiguated means that for a given +/// store, all the MemoryUses on its use lists are may-aliases of that store +/// (but the MemoryDefs on its use list may not be). +/// +/// MemoryDefs are not disambiguated because it would require multiple reaching +/// definitions, which would require multiple phis, and multiple memoryaccesses +/// per instruction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MEMORYSSA_H +#define LLVM_ANALYSIS_MEMORYSSA_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/simple_ilist.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DerivedUser.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <iterator> +#include <memory> +#include <utility> + +namespace llvm { + +class Function; +class Instruction; +class MemoryAccess; +class MemorySSAWalker; +class LLVMContext; +class raw_ostream; + +namespace MSSAHelpers { + +struct AllAccessTag {}; +struct DefsOnlyTag {}; + +} // end namespace MSSAHelpers + +enum : unsigned { + // Used to signify what the default invalid ID is for MemoryAccess's + // getID() + INVALID_MEMORYACCESS_ID = -1U +}; + +template <class T> class memoryaccess_def_iterator_base; +using memoryaccess_def_iterator = memoryaccess_def_iterator_base<MemoryAccess>; +using const_memoryaccess_def_iterator = + memoryaccess_def_iterator_base<const MemoryAccess>; + +// The base for all memory accesses. All memory accesses in a block are +// linked together using an intrusive list. +class MemoryAccess + : public DerivedUser, + public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>, + public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>> { +public: + using AllAccessType = + ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>; + using DefsOnlyType = + ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>; + + MemoryAccess(const MemoryAccess &) = delete; + MemoryAccess &operator=(const MemoryAccess &) = delete; + + void *operator new(size_t) = delete; + + // Methods for support type inquiry through isa, cast, and + // dyn_cast + static bool classof(const Value *V) { + unsigned ID = V->getValueID(); + return ID == MemoryUseVal || ID == MemoryPhiVal || ID == MemoryDefVal; + } + + BasicBlock *getBlock() const { return Block; } + + void print(raw_ostream &OS) const; + void dump() const; + + /// The user iterators for a memory access + using iterator = user_iterator; + using const_iterator = const_user_iterator; + + /// This iterator walks over all of the defs in a given + /// MemoryAccess. For MemoryPhi nodes, this walks arguments. For + /// MemoryUse/MemoryDef, this walks the defining access. + memoryaccess_def_iterator defs_begin(); + const_memoryaccess_def_iterator defs_begin() const; + memoryaccess_def_iterator defs_end(); + const_memoryaccess_def_iterator defs_end() const; + + /// Get the iterators for the all access list and the defs only list + /// We default to the all access list. + AllAccessType::self_iterator getIterator() { + return this->AllAccessType::getIterator(); + } + AllAccessType::const_self_iterator getIterator() const { + return this->AllAccessType::getIterator(); + } + AllAccessType::reverse_self_iterator getReverseIterator() { + return this->AllAccessType::getReverseIterator(); + } + AllAccessType::const_reverse_self_iterator getReverseIterator() const { + return this->AllAccessType::getReverseIterator(); + } + DefsOnlyType::self_iterator getDefsIterator() { + return this->DefsOnlyType::getIterator(); + } + DefsOnlyType::const_self_iterator getDefsIterator() const { + return this->DefsOnlyType::getIterator(); + } + DefsOnlyType::reverse_self_iterator getReverseDefsIterator() { + return this->DefsOnlyType::getReverseIterator(); + } + DefsOnlyType::const_reverse_self_iterator getReverseDefsIterator() const { + return this->DefsOnlyType::getReverseIterator(); + } + +protected: + friend class MemoryDef; + friend class MemoryPhi; + friend class MemorySSA; + friend class MemoryUse; + friend class MemoryUseOrDef; + + /// Used by MemorySSA to change the block of a MemoryAccess when it is + /// moved. + void setBlock(BasicBlock *BB) { Block = BB; } + + /// Used for debugging and tracking things about MemoryAccesses. + /// Guaranteed unique among MemoryAccesses, no guarantees otherwise. + inline unsigned getID() const; + + MemoryAccess(LLVMContext &C, unsigned Vty, DeleteValueTy DeleteValue, + BasicBlock *BB, unsigned NumOperands) + : DerivedUser(Type::getVoidTy(C), Vty, nullptr, NumOperands, DeleteValue), + Block(BB) {} + + // Use deleteValue() to delete a generic MemoryAccess. + ~MemoryAccess() = default; + +private: + BasicBlock *Block; +}; + +template <> +struct ilist_alloc_traits<MemoryAccess> { + static void deleteNode(MemoryAccess *MA) { MA->deleteValue(); } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const MemoryAccess &MA) { + MA.print(OS); + return OS; +} + +/// Class that has the common methods + fields of memory uses/defs. It's +/// a little awkward to have, but there are many cases where we want either a +/// use or def, and there are many cases where uses are needed (defs aren't +/// acceptable), and vice-versa. +/// +/// This class should never be instantiated directly; make a MemoryUse or +/// MemoryDef instead. +class MemoryUseOrDef : public MemoryAccess { +public: + void *operator new(size_t) = delete; + + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + /// Get the instruction that this MemoryUse represents. + Instruction *getMemoryInst() const { return MemoryInstruction; } + + /// Get the access that produces the memory state used by this Use. + MemoryAccess *getDefiningAccess() const { return getOperand(0); } + + static bool classof(const Value *MA) { + return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal; + } + + // Sadly, these have to be public because they are needed in some of the + // iterators. + inline bool isOptimized() const; + inline MemoryAccess *getOptimized() const; + inline void setOptimized(MemoryAccess *); + + // Retrieve AliasResult type of the optimized access. Ideally this would be + // returned by the caching walker and may go away in the future. + Optional<AliasResult> getOptimizedAccessType() const { + return OptimizedAccessAlias; + } + + /// Reset the ID of what this MemoryUse was optimized to, causing it to + /// be rewalked by the walker if necessary. + /// This really should only be called by tests. + inline void resetOptimized(); + +protected: + friend class MemorySSA; + friend class MemorySSAUpdater; + + MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty, + DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB, + unsigned NumOperands) + : MemoryAccess(C, Vty, DeleteValue, BB, NumOperands), + MemoryInstruction(MI), OptimizedAccessAlias(MayAlias) { + setDefiningAccess(DMA); + } + + // Use deleteValue() to delete a generic MemoryUseOrDef. + ~MemoryUseOrDef() = default; + + void setOptimizedAccessType(Optional<AliasResult> AR) { + OptimizedAccessAlias = AR; + } + + void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false, + Optional<AliasResult> AR = MayAlias) { + if (!Optimized) { + setOperand(0, DMA); + return; + } + setOptimized(DMA); + setOptimizedAccessType(AR); + } + +private: + Instruction *MemoryInstruction; + Optional<AliasResult> OptimizedAccessAlias; +}; + +/// Represents read-only accesses to memory +/// +/// In particular, the set of Instructions that will be represented by +/// MemoryUse's is exactly the set of Instructions for which +/// AliasAnalysis::getModRefInfo returns "Ref". +class MemoryUse final : public MemoryUseOrDef { +public: + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB) + : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB, + /*NumOperands=*/1) {} + + // allocate space for exactly one operand + void *operator new(size_t s) { return User::operator new(s, 1); } + + static bool classof(const Value *MA) { + return MA->getValueID() == MemoryUseVal; + } + + void print(raw_ostream &OS) const; + + void setOptimized(MemoryAccess *DMA) { + OptimizedID = DMA->getID(); + setOperand(0, DMA); + } + + bool isOptimized() const { + return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID(); + } + + MemoryAccess *getOptimized() const { + return getDefiningAccess(); + } + + void resetOptimized() { + OptimizedID = INVALID_MEMORYACCESS_ID; + } + +protected: + friend class MemorySSA; + +private: + static void deleteMe(DerivedUser *Self); + + unsigned OptimizedID = INVALID_MEMORYACCESS_ID; +}; + +template <> +struct OperandTraits<MemoryUse> : public FixedNumOperandTraits<MemoryUse, 1> {}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUse, MemoryAccess) + +/// Represents a read-write access to memory, whether it is a must-alias, +/// or a may-alias. +/// +/// In particular, the set of Instructions that will be represented by +/// MemoryDef's is exactly the set of Instructions for which +/// AliasAnalysis::getModRefInfo returns "Mod" or "ModRef". +/// Note that, in order to provide def-def chains, all defs also have a use +/// associated with them. This use points to the nearest reaching +/// MemoryDef/MemoryPhi. +class MemoryDef final : public MemoryUseOrDef { +public: + friend class MemorySSA; + + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB, + unsigned Ver) + : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB, + /*NumOperands=*/2), + ID(Ver) {} + + // allocate space for exactly two operands + void *operator new(size_t s) { return User::operator new(s, 2); } + + static bool classof(const Value *MA) { + return MA->getValueID() == MemoryDefVal; + } + + void setOptimized(MemoryAccess *MA) { + setOperand(1, MA); + OptimizedID = MA->getID(); + } + + MemoryAccess *getOptimized() const { + return cast_or_null<MemoryAccess>(getOperand(1)); + } + + bool isOptimized() const { + return getOptimized() && OptimizedID == getOptimized()->getID(); + } + + void resetOptimized() { + OptimizedID = INVALID_MEMORYACCESS_ID; + setOperand(1, nullptr); + } + + void print(raw_ostream &OS) const; + + unsigned getID() const { return ID; } + +private: + static void deleteMe(DerivedUser *Self); + + const unsigned ID; + unsigned OptimizedID = INVALID_MEMORYACCESS_ID; +}; + +template <> +struct OperandTraits<MemoryDef> : public FixedNumOperandTraits<MemoryDef, 2> {}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryDef, MemoryAccess) + +template <> +struct OperandTraits<MemoryUseOrDef> { + static Use *op_begin(MemoryUseOrDef *MUD) { + if (auto *MU = dyn_cast<MemoryUse>(MUD)) + return OperandTraits<MemoryUse>::op_begin(MU); + return OperandTraits<MemoryDef>::op_begin(cast<MemoryDef>(MUD)); + } + + static Use *op_end(MemoryUseOrDef *MUD) { + if (auto *MU = dyn_cast<MemoryUse>(MUD)) + return OperandTraits<MemoryUse>::op_end(MU); + return OperandTraits<MemoryDef>::op_end(cast<MemoryDef>(MUD)); + } + + static unsigned operands(const MemoryUseOrDef *MUD) { + if (const auto *MU = dyn_cast<MemoryUse>(MUD)) + return OperandTraits<MemoryUse>::operands(MU); + return OperandTraits<MemoryDef>::operands(cast<MemoryDef>(MUD)); + } +}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess) + +/// Represents phi nodes for memory accesses. +/// +/// These have the same semantic as regular phi nodes, with the exception that +/// only one phi will ever exist in a given basic block. +/// Guaranteeing one phi per block means guaranteeing there is only ever one +/// valid reaching MemoryDef/MemoryPHI along each path to the phi node. +/// This is ensured by not allowing disambiguation of the RHS of a MemoryDef or +/// a MemoryPhi's operands. +/// That is, given +/// if (a) { +/// store %a +/// store %b +/// } +/// it *must* be transformed into +/// if (a) { +/// 1 = MemoryDef(liveOnEntry) +/// store %a +/// 2 = MemoryDef(1) +/// store %b +/// } +/// and *not* +/// if (a) { +/// 1 = MemoryDef(liveOnEntry) +/// store %a +/// 2 = MemoryDef(liveOnEntry) +/// store %b +/// } +/// even if the two stores do not conflict. Otherwise, both 1 and 2 reach the +/// end of the branch, and if there are not two phi nodes, one will be +/// disconnected completely from the SSA graph below that point. +/// Because MemoryUse's do not generate new definitions, they do not have this +/// issue. +class MemoryPhi final : public MemoryAccess { + // allocate space for exactly zero operands + void *operator new(size_t s) { return User::operator new(s); } + +public: + /// Provide fast operand accessors + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + MemoryPhi(LLVMContext &C, BasicBlock *BB, unsigned Ver, unsigned NumPreds = 0) + : MemoryAccess(C, MemoryPhiVal, deleteMe, BB, 0), ID(Ver), + ReservedSpace(NumPreds) { + allocHungoffUses(ReservedSpace); + } + + // Block iterator interface. This provides access to the list of incoming + // basic blocks, which parallels the list of incoming values. + using block_iterator = BasicBlock **; + using const_block_iterator = BasicBlock *const *; + + block_iterator block_begin() { + auto *Ref = reinterpret_cast<Use::UserRef *>(op_begin() + ReservedSpace); + return reinterpret_cast<block_iterator>(Ref + 1); + } + + const_block_iterator block_begin() const { + const auto *Ref = + reinterpret_cast<const Use::UserRef *>(op_begin() + ReservedSpace); + return reinterpret_cast<const_block_iterator>(Ref + 1); + } + + block_iterator block_end() { return block_begin() + getNumOperands(); } + + const_block_iterator block_end() const { + return block_begin() + getNumOperands(); + } + + iterator_range<block_iterator> blocks() { + return make_range(block_begin(), block_end()); + } + + iterator_range<const_block_iterator> blocks() const { + return make_range(block_begin(), block_end()); + } + + op_range incoming_values() { return operands(); } + + const_op_range incoming_values() const { return operands(); } + + /// Return the number of incoming edges + unsigned getNumIncomingValues() const { return getNumOperands(); } + + /// Return incoming value number x + MemoryAccess *getIncomingValue(unsigned I) const { return getOperand(I); } + void setIncomingValue(unsigned I, MemoryAccess *V) { + assert(V && "PHI node got a null value!"); + setOperand(I, V); + } + + static unsigned getOperandNumForIncomingValue(unsigned I) { return I; } + static unsigned getIncomingValueNumForOperand(unsigned I) { return I; } + + /// Return incoming basic block number @p i. + BasicBlock *getIncomingBlock(unsigned I) const { return block_begin()[I]; } + + /// Return incoming basic block corresponding + /// to an operand of the PHI. + BasicBlock *getIncomingBlock(const Use &U) const { + assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?"); + return getIncomingBlock(unsigned(&U - op_begin())); + } + + /// Return incoming basic block corresponding + /// to value use iterator. + BasicBlock *getIncomingBlock(MemoryAccess::const_user_iterator I) const { + return getIncomingBlock(I.getUse()); + } + + void setIncomingBlock(unsigned I, BasicBlock *BB) { + assert(BB && "PHI node got a null basic block!"); + block_begin()[I] = BB; + } + + /// Add an incoming value to the end of the PHI list + void addIncoming(MemoryAccess *V, BasicBlock *BB) { + if (getNumOperands() == ReservedSpace) + growOperands(); // Get more space! + // Initialize some new operands. + setNumHungOffUseOperands(getNumOperands() + 1); + setIncomingValue(getNumOperands() - 1, V); + setIncomingBlock(getNumOperands() - 1, BB); + } + + /// Return the first index of the specified basic + /// block in the value list for this PHI. Returns -1 if no instance. + int getBasicBlockIndex(const BasicBlock *BB) const { + for (unsigned I = 0, E = getNumOperands(); I != E; ++I) + if (block_begin()[I] == BB) + return I; + return -1; + } + + MemoryAccess *getIncomingValueForBlock(const BasicBlock *BB) const { + int Idx = getBasicBlockIndex(BB); + assert(Idx >= 0 && "Invalid basic block argument!"); + return getIncomingValue(Idx); + } + + // After deleting incoming position I, the order of incoming may be changed. + void unorderedDeleteIncoming(unsigned I) { + unsigned E = getNumOperands(); + assert(I < E && "Cannot remove out of bounds Phi entry."); + // MemoryPhi must have at least two incoming values, otherwise the MemoryPhi + // itself should be deleted. + assert(E >= 2 && "Cannot only remove incoming values in MemoryPhis with " + "at least 2 values."); + setIncomingValue(I, getIncomingValue(E - 1)); + setIncomingBlock(I, block_begin()[E - 1]); + setOperand(E - 1, nullptr); + block_begin()[E - 1] = nullptr; + setNumHungOffUseOperands(getNumOperands() - 1); + } + + // After deleting entries that satisfy Pred, remaining entries may have + // changed order. + template <typename Fn> void unorderedDeleteIncomingIf(Fn &&Pred) { + for (unsigned I = 0, E = getNumOperands(); I != E; ++I) + if (Pred(getIncomingValue(I), getIncomingBlock(I))) { + unorderedDeleteIncoming(I); + E = getNumOperands(); + --I; + } + assert(getNumOperands() >= 1 && + "Cannot remove all incoming blocks in a MemoryPhi."); + } + + // After deleting incoming block BB, the incoming blocks order may be changed. + void unorderedDeleteIncomingBlock(const BasicBlock *BB) { + unorderedDeleteIncomingIf( + [&](const MemoryAccess *, const BasicBlock *B) { return BB == B; }); + } + + // After deleting incoming memory access MA, the incoming accesses order may + // be changed. + void unorderedDeleteIncomingValue(const MemoryAccess *MA) { + unorderedDeleteIncomingIf( + [&](const MemoryAccess *M, const BasicBlock *) { return MA == M; }); + } + + static bool classof(const Value *V) { + return V->getValueID() == MemoryPhiVal; + } + + void print(raw_ostream &OS) const; + + unsigned getID() const { return ID; } + +protected: + friend class MemorySSA; + + /// this is more complicated than the generic + /// User::allocHungoffUses, because we have to allocate Uses for the incoming + /// values and pointers to the incoming blocks, all in one allocation. + void allocHungoffUses(unsigned N) { + User::allocHungoffUses(N, /* IsPhi */ true); + } + +private: + // For debugging only + const unsigned ID; + unsigned ReservedSpace; + + /// This grows the operand list in response to a push_back style of + /// operation. This grows the number of ops by 1.5 times. + void growOperands() { + unsigned E = getNumOperands(); + // 2 op PHI nodes are VERY common, so reserve at least enough for that. + ReservedSpace = std::max(E + E / 2, 2u); + growHungoffUses(ReservedSpace, /* IsPhi */ true); + } + + static void deleteMe(DerivedUser *Self); +}; + +inline unsigned MemoryAccess::getID() const { + assert((isa<MemoryDef>(this) || isa<MemoryPhi>(this)) && + "only memory defs and phis have ids"); + if (const auto *MD = dyn_cast<MemoryDef>(this)) + return MD->getID(); + return cast<MemoryPhi>(this)->getID(); +} + +inline bool MemoryUseOrDef::isOptimized() const { + if (const auto *MD = dyn_cast<MemoryDef>(this)) + return MD->isOptimized(); + return cast<MemoryUse>(this)->isOptimized(); +} + +inline MemoryAccess *MemoryUseOrDef::getOptimized() const { + if (const auto *MD = dyn_cast<MemoryDef>(this)) + return MD->getOptimized(); + return cast<MemoryUse>(this)->getOptimized(); +} + +inline void MemoryUseOrDef::setOptimized(MemoryAccess *MA) { + if (auto *MD = dyn_cast<MemoryDef>(this)) + MD->setOptimized(MA); + else + cast<MemoryUse>(this)->setOptimized(MA); +} + +inline void MemoryUseOrDef::resetOptimized() { + if (auto *MD = dyn_cast<MemoryDef>(this)) + MD->resetOptimized(); + else + cast<MemoryUse>(this)->resetOptimized(); +} + +template <> struct OperandTraits<MemoryPhi> : public HungoffOperandTraits<2> {}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess) + +/// Encapsulates MemorySSA, including all data associated with memory +/// accesses. +class MemorySSA { +public: + MemorySSA(Function &, AliasAnalysis *, DominatorTree *); + ~MemorySSA(); + + MemorySSAWalker *getWalker(); + MemorySSAWalker *getSkipSelfWalker(); + + /// Given a memory Mod/Ref'ing instruction, get the MemorySSA + /// access associated with it. If passed a basic block gets the memory phi + /// node that exists for that block, if there is one. Otherwise, this will get + /// a MemoryUseOrDef. + MemoryUseOrDef *getMemoryAccess(const Instruction *I) const { + return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I)); + } + + MemoryPhi *getMemoryAccess(const BasicBlock *BB) const { + return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB))); + } + + void dump() const; + void print(raw_ostream &) const; + + /// Return true if \p MA represents the live on entry value + /// + /// Loads and stores from pointer arguments and other global values may be + /// defined by memory operations that do not occur in the current function, so + /// they may be live on entry to the function. MemorySSA represents such + /// memory state by the live on entry definition, which is guaranteed to occur + /// before any other memory access in the function. + inline bool isLiveOnEntryDef(const MemoryAccess *MA) const { + return MA == LiveOnEntryDef.get(); + } + + inline MemoryAccess *getLiveOnEntryDef() const { + return LiveOnEntryDef.get(); + } + + // Sadly, iplists, by default, owns and deletes pointers added to the + // list. It's not currently possible to have two iplists for the same type, + // where one owns the pointers, and one does not. This is because the traits + // are per-type, not per-tag. If this ever changes, we should make the + // DefList an iplist. + using AccessList = iplist<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>; + using DefsList = + simple_ilist<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>; + + /// Return the list of MemoryAccess's for a given basic block. + /// + /// This list is not modifiable by the user. + const AccessList *getBlockAccesses(const BasicBlock *BB) const { + return getWritableBlockAccesses(BB); + } + + /// Return the list of MemoryDef's and MemoryPhi's for a given basic + /// block. + /// + /// This list is not modifiable by the user. + const DefsList *getBlockDefs(const BasicBlock *BB) const { + return getWritableBlockDefs(BB); + } + + /// Given two memory accesses in the same basic block, determine + /// whether MemoryAccess \p A dominates MemoryAccess \p B. + bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const; + + /// Given two memory accesses in potentially different blocks, + /// determine whether MemoryAccess \p A dominates MemoryAccess \p B. + bool dominates(const MemoryAccess *A, const MemoryAccess *B) const; + + /// Given a MemoryAccess and a Use, determine whether MemoryAccess \p A + /// dominates Use \p B. + bool dominates(const MemoryAccess *A, const Use &B) const; + + /// Verify that MemorySSA is self consistent (IE definitions dominate + /// all uses, uses appear in the right places). This is used by unit tests. + void verifyMemorySSA() const; + + /// Used in various insertion functions to specify whether we are talking + /// about the beginning or end of a block. + enum InsertionPlace { Beginning, End }; + +protected: + // Used by Memory SSA annotater, dumpers, and wrapper pass + friend class MemorySSAAnnotatedWriter; + friend class MemorySSAPrinterLegacyPass; + friend class MemorySSAUpdater; + + void verifyDefUses(Function &F) const; + void verifyDomination(Function &F) const; + void verifyOrdering(Function &F) const; + void verifyDominationNumbers(const Function &F) const; + + // This is used by the use optimizer and updater. + AccessList *getWritableBlockAccesses(const BasicBlock *BB) const { + auto It = PerBlockAccesses.find(BB); + return It == PerBlockAccesses.end() ? nullptr : It->second.get(); + } + + // This is used by the use optimizer and updater. + DefsList *getWritableBlockDefs(const BasicBlock *BB) const { + auto It = PerBlockDefs.find(BB); + return It == PerBlockDefs.end() ? nullptr : It->second.get(); + } + + // These is used by the updater to perform various internal MemorySSA + // machinsations. They do not always leave the IR in a correct state, and + // relies on the updater to fixup what it breaks, so it is not public. + + void moveTo(MemoryUseOrDef *What, BasicBlock *BB, AccessList::iterator Where); + void moveTo(MemoryAccess *What, BasicBlock *BB, InsertionPlace Point); + + // Rename the dominator tree branch rooted at BB. + void renamePass(BasicBlock *BB, MemoryAccess *IncomingVal, + SmallPtrSetImpl<BasicBlock *> &Visited) { + renamePass(DT->getNode(BB), IncomingVal, Visited, true, true); + } + + void removeFromLookups(MemoryAccess *); + void removeFromLists(MemoryAccess *, bool ShouldDelete = true); + void insertIntoListsForBlock(MemoryAccess *, const BasicBlock *, + InsertionPlace); + void insertIntoListsBefore(MemoryAccess *, const BasicBlock *, + AccessList::iterator); + MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *, + const MemoryUseOrDef *Template = nullptr); + +private: + class ClobberWalkerBase; + class CachingWalker; + class SkipSelfWalker; + class OptimizeUses; + + CachingWalker *getWalkerImpl(); + void buildMemorySSA(); + void optimizeUses(); + + void prepareForMoveTo(MemoryAccess *, BasicBlock *); + void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const; + + using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>; + using DefsMap = DenseMap<const BasicBlock *, std::unique_ptr<DefsList>>; + + void + determineInsertionPoint(const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks); + void markUnreachableAsLiveOnEntry(BasicBlock *BB); + bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const; + MemoryPhi *createMemoryPhi(BasicBlock *BB); + MemoryUseOrDef *createNewAccess(Instruction *, + const MemoryUseOrDef *Template = nullptr); + MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace); + void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &); + MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool); + void renameSuccessorPhis(BasicBlock *, MemoryAccess *, bool); + void renamePass(DomTreeNode *, MemoryAccess *IncomingVal, + SmallPtrSetImpl<BasicBlock *> &Visited, + bool SkipVisited = false, bool RenameAllUses = false); + AccessList *getOrCreateAccessList(const BasicBlock *); + DefsList *getOrCreateDefsList(const BasicBlock *); + void renumberBlock(const BasicBlock *) const; + AliasAnalysis *AA; + DominatorTree *DT; + Function &F; + + // Memory SSA mappings + DenseMap<const Value *, MemoryAccess *> ValueToMemoryAccess; + + // These two mappings contain the main block to access/def mappings for + // MemorySSA. The list contained in PerBlockAccesses really owns all the + // MemoryAccesses. + // Both maps maintain the invariant that if a block is found in them, the + // corresponding list is not empty, and if a block is not found in them, the + // corresponding list is empty. + AccessMap PerBlockAccesses; + DefsMap PerBlockDefs; + std::unique_ptr<MemoryAccess, ValueDeleter> LiveOnEntryDef; + + // Domination mappings + // Note that the numbering is local to a block, even though the map is + // global. + mutable SmallPtrSet<const BasicBlock *, 16> BlockNumberingValid; + mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering; + + // Memory SSA building info + std::unique_ptr<ClobberWalkerBase> WalkerBase; + std::unique_ptr<CachingWalker> Walker; + std::unique_ptr<SkipSelfWalker> SkipWalker; + unsigned NextID; +}; + +// Internal MemorySSA utils, for use by MemorySSA classes and walkers +class MemorySSAUtil { +protected: + friend class GVNHoist; + friend class MemorySSAWalker; + + // This function should not be used by new passes. + static bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, + AliasAnalysis &AA); +}; + +// This pass does eager building and then printing of MemorySSA. It is used by +// the tests to be able to build, dump, and verify Memory SSA. +class MemorySSAPrinterLegacyPass : public FunctionPass { +public: + MemorySSAPrinterLegacyPass(); + + bool runOnFunction(Function &) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + static char ID; +}; + +/// An analysis that produces \c MemorySSA for a function. +/// +class MemorySSAAnalysis : public AnalysisInfoMixin<MemorySSAAnalysis> { + friend AnalysisInfoMixin<MemorySSAAnalysis>; + + static AnalysisKey Key; + +public: + // Wrap MemorySSA result to ensure address stability of internal MemorySSA + // pointers after construction. Use a wrapper class instead of plain + // unique_ptr<MemorySSA> to avoid build breakage on MSVC. + struct Result { + Result(std::unique_ptr<MemorySSA> &&MSSA) : MSSA(std::move(MSSA)) {} + + MemorySSA &getMSSA() { return *MSSA.get(); } + + std::unique_ptr<MemorySSA> MSSA; + }; + + Result run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass for \c MemorySSA. +class MemorySSAPrinterPass : public PassInfoMixin<MemorySSAPrinterPass> { + raw_ostream &OS; + +public: + explicit MemorySSAPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Verifier pass for \c MemorySSA. +struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy analysis pass which computes \c MemorySSA. +class MemorySSAWrapperPass : public FunctionPass { +public: + MemorySSAWrapperPass(); + + static char ID; + + bool runOnFunction(Function &) override; + void releaseMemory() override; + MemorySSA &getMSSA() { return *MSSA; } + const MemorySSA &getMSSA() const { return *MSSA; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + void verifyAnalysis() const override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; + +private: + std::unique_ptr<MemorySSA> MSSA; +}; + +/// This is the generic walker interface for walkers of MemorySSA. +/// Walkers are used to be able to further disambiguate the def-use chains +/// MemorySSA gives you, or otherwise produce better info than MemorySSA gives +/// you. +/// In particular, while the def-use chains provide basic information, and are +/// guaranteed to give, for example, the nearest may-aliasing MemoryDef for a +/// MemoryUse as AliasAnalysis considers it, a user mant want better or other +/// information. In particular, they may want to use SCEV info to further +/// disambiguate memory accesses, or they may want the nearest dominating +/// may-aliasing MemoryDef for a call or a store. This API enables a +/// standardized interface to getting and using that info. +class MemorySSAWalker { +public: + MemorySSAWalker(MemorySSA *); + virtual ~MemorySSAWalker() = default; + + using MemoryAccessSet = SmallVector<MemoryAccess *, 8>; + + /// Given a memory Mod/Ref/ModRef'ing instruction, calling this + /// will give you the nearest dominating MemoryAccess that Mod's the location + /// the instruction accesses (by skipping any def which AA can prove does not + /// alias the location(s) accessed by the instruction given). + /// + /// Note that this will return a single access, and it must dominate the + /// Instruction, so if an operand of a MemoryPhi node Mod's the instruction, + /// this will return the MemoryPhi, not the operand. This means that + /// given: + /// if (a) { + /// 1 = MemoryDef(liveOnEntry) + /// store %a + /// } else { + /// 2 = MemoryDef(liveOnEntry) + /// store %b + /// } + /// 3 = MemoryPhi(2, 1) + /// MemoryUse(3) + /// load %a + /// + /// calling this API on load(%a) will return the MemoryPhi, not the MemoryDef + /// in the if (a) branch. + MemoryAccess *getClobberingMemoryAccess(const Instruction *I) { + MemoryAccess *MA = MSSA->getMemoryAccess(I); + assert(MA && "Handed an instruction that MemorySSA doesn't recognize?"); + return getClobberingMemoryAccess(MA); + } + + /// Does the same thing as getClobberingMemoryAccess(const Instruction *I), + /// but takes a MemoryAccess instead of an Instruction. + virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) = 0; + + /// Given a potentially clobbering memory access and a new location, + /// calling this will give you the nearest dominating clobbering MemoryAccess + /// (by skipping non-aliasing def links). + /// + /// This version of the function is mainly used to disambiguate phi translated + /// pointers, where the value of a pointer may have changed from the initial + /// memory access. Note that this expects to be handed either a MemoryUse, + /// or an already potentially clobbering access. Unlike the above API, if + /// given a MemoryDef that clobbers the pointer as the starting access, it + /// will return that MemoryDef, whereas the above would return the clobber + /// starting from the use side of the memory def. + virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + const MemoryLocation &) = 0; + + /// Given a memory access, invalidate anything this walker knows about + /// that access. + /// This API is used by walkers that store information to perform basic cache + /// invalidation. This will be called by MemorySSA at appropriate times for + /// the walker it uses or returns. + virtual void invalidateInfo(MemoryAccess *) {} + + virtual void verify(const MemorySSA *MSSA) { assert(MSSA == this->MSSA); } + +protected: + friend class MemorySSA; // For updating MSSA pointer in MemorySSA move + // constructor. + MemorySSA *MSSA; +}; + +/// A MemorySSAWalker that does no alias queries, or anything else. It +/// simply returns the links as they were constructed by the builder. +class DoNothingMemorySSAWalker final : public MemorySSAWalker { +public: + // Keep the overrides below from hiding the Instruction overload of + // getClobberingMemoryAccess. + using MemorySSAWalker::getClobberingMemoryAccess; + + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + const MemoryLocation &) override; +}; + +using MemoryAccessPair = std::pair<MemoryAccess *, MemoryLocation>; +using ConstMemoryAccessPair = std::pair<const MemoryAccess *, MemoryLocation>; + +/// Iterator base class used to implement const and non-const iterators +/// over the defining accesses of a MemoryAccess. +template <class T> +class memoryaccess_def_iterator_base + : public iterator_facade_base<memoryaccess_def_iterator_base<T>, + std::forward_iterator_tag, T, ptrdiff_t, T *, + T *> { + using BaseT = typename memoryaccess_def_iterator_base::iterator_facade_base; + +public: + memoryaccess_def_iterator_base(T *Start) : Access(Start) {} + memoryaccess_def_iterator_base() = default; + + bool operator==(const memoryaccess_def_iterator_base &Other) const { + return Access == Other.Access && (!Access || ArgNo == Other.ArgNo); + } + + // This is a bit ugly, but for MemoryPHI's, unlike PHINodes, you can't get the + // block from the operand in constant time (In a PHINode, the uselist has + // both, so it's just subtraction). We provide it as part of the + // iterator to avoid callers having to linear walk to get the block. + // If the operation becomes constant time on MemoryPHI's, this bit of + // abstraction breaking should be removed. + BasicBlock *getPhiArgBlock() const { + MemoryPhi *MP = dyn_cast<MemoryPhi>(Access); + assert(MP && "Tried to get phi arg block when not iterating over a PHI"); + return MP->getIncomingBlock(ArgNo); + } + + typename BaseT::iterator::pointer operator*() const { + assert(Access && "Tried to access past the end of our iterator"); + // Go to the first argument for phis, and the defining access for everything + // else. + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) + return MP->getIncomingValue(ArgNo); + return cast<MemoryUseOrDef>(Access)->getDefiningAccess(); + } + + using BaseT::operator++; + memoryaccess_def_iterator &operator++() { + assert(Access && "Hit end of iterator"); + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) { + if (++ArgNo >= MP->getNumIncomingValues()) { + ArgNo = 0; + Access = nullptr; + } + } else { + Access = nullptr; + } + return *this; + } + +private: + T *Access = nullptr; + unsigned ArgNo = 0; +}; + +inline memoryaccess_def_iterator MemoryAccess::defs_begin() { + return memoryaccess_def_iterator(this); +} + +inline const_memoryaccess_def_iterator MemoryAccess::defs_begin() const { + return const_memoryaccess_def_iterator(this); +} + +inline memoryaccess_def_iterator MemoryAccess::defs_end() { + return memoryaccess_def_iterator(); +} + +inline const_memoryaccess_def_iterator MemoryAccess::defs_end() const { + return const_memoryaccess_def_iterator(); +} + +/// GraphTraits for a MemoryAccess, which walks defs in the normal case, +/// and uses in the inverse case. +template <> struct GraphTraits<MemoryAccess *> { + using NodeRef = MemoryAccess *; + using ChildIteratorType = memoryaccess_def_iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { return N->defs_begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->defs_end(); } +}; + +template <> struct GraphTraits<Inverse<MemoryAccess *>> { + using NodeRef = MemoryAccess *; + using ChildIteratorType = MemoryAccess::iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { return N->user_begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->user_end(); } +}; + +/// Provide an iterator that walks defs, giving both the memory access, +/// and the current pointer location, updating the pointer location as it +/// changes due to phi node translation. +/// +/// This iterator, while somewhat specialized, is what most clients actually +/// want when walking upwards through MemorySSA def chains. It takes a pair of +/// <MemoryAccess,MemoryLocation>, and walks defs, properly translating the +/// memory location through phi nodes for the user. +class upward_defs_iterator + : public iterator_facade_base<upward_defs_iterator, + std::forward_iterator_tag, + const MemoryAccessPair> { + using BaseT = upward_defs_iterator::iterator_facade_base; + +public: + upward_defs_iterator(const MemoryAccessPair &Info) + : DefIterator(Info.first), Location(Info.second), + OriginalAccess(Info.first) { + CurrentPair.first = nullptr; + + WalkingPhi = Info.first && isa<MemoryPhi>(Info.first); + fillInCurrentPair(); + } + + upward_defs_iterator() { CurrentPair.first = nullptr; } + + bool operator==(const upward_defs_iterator &Other) const { + return DefIterator == Other.DefIterator; + } + + BaseT::iterator::reference operator*() const { + assert(DefIterator != OriginalAccess->defs_end() && + "Tried to access past the end of our iterator"); + return CurrentPair; + } + + using BaseT::operator++; + upward_defs_iterator &operator++() { + assert(DefIterator != OriginalAccess->defs_end() && + "Tried to access past the end of the iterator"); + ++DefIterator; + if (DefIterator != OriginalAccess->defs_end()) + fillInCurrentPair(); + return *this; + } + + BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); } + +private: + void fillInCurrentPair() { + CurrentPair.first = *DefIterator; + if (WalkingPhi && Location.Ptr) { + PHITransAddr Translator( + const_cast<Value *>(Location.Ptr), + OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr); + if (!Translator.PHITranslateValue(OriginalAccess->getBlock(), + DefIterator.getPhiArgBlock(), nullptr, + false)) + if (Translator.getAddr() != Location.Ptr) { + CurrentPair.second = Location.getWithNewPtr(Translator.getAddr()); + return; + } + } + CurrentPair.second = Location; + } + + MemoryAccessPair CurrentPair; + memoryaccess_def_iterator DefIterator; + MemoryLocation Location; + MemoryAccess *OriginalAccess = nullptr; + bool WalkingPhi = false; +}; + +inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair) { + return upward_defs_iterator(Pair); +} + +inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); } + +inline iterator_range<upward_defs_iterator> +upward_defs(const MemoryAccessPair &Pair) { + return make_range(upward_defs_begin(Pair), upward_defs_end()); +} + +/// Walks the defining accesses of MemoryDefs. Stops after we hit something that +/// has no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when +/// comparing against a null def_chain_iterator, this will compare equal only +/// after walking said Phi/liveOnEntry. +/// +/// The UseOptimizedChain flag specifies whether to walk the clobbering +/// access chain, or all the accesses. +/// +/// Normally, MemoryDef are all just def/use linked together, so a def_chain on +/// a MemoryDef will walk all MemoryDefs above it in the program until it hits +/// a phi node. The optimized chain walks the clobbering access of a store. +/// So if you are just trying to find, given a store, what the next +/// thing that would clobber the same memory is, you want the optimized chain. +template <class T, bool UseOptimizedChain = false> +struct def_chain_iterator + : public iterator_facade_base<def_chain_iterator<T, UseOptimizedChain>, + std::forward_iterator_tag, MemoryAccess *> { + def_chain_iterator() : MA(nullptr) {} + def_chain_iterator(T MA) : MA(MA) {} + + T operator*() const { return MA; } + + def_chain_iterator &operator++() { + // N.B. liveOnEntry has a null defining access. + if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) { + if (UseOptimizedChain && MUD->isOptimized()) + MA = MUD->getOptimized(); + else + MA = MUD->getDefiningAccess(); + } else { + MA = nullptr; + } + + return *this; + } + + bool operator==(const def_chain_iterator &O) const { return MA == O.MA; } + +private: + T MA; +}; + +template <class T> +inline iterator_range<def_chain_iterator<T>> +def_chain(T MA, MemoryAccess *UpTo = nullptr) { +#ifdef EXPENSIVE_CHECKS + assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator<T>()) && + "UpTo isn't in the def chain!"); +#endif + return make_range(def_chain_iterator<T>(MA), def_chain_iterator<T>(UpTo)); +} + +template <class T> +inline iterator_range<def_chain_iterator<T, true>> optimized_def_chain(T MA) { + return make_range(def_chain_iterator<T, true>(MA), + def_chain_iterator<T, true>(nullptr)); +} + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MEMORYSSA_H diff --git a/clang-r353983e/include/llvm/Analysis/MemorySSAUpdater.h b/clang-r353983e/include/llvm/Analysis/MemorySSAUpdater.h new file mode 100644 index 00000000..58cf1cc6 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/MemorySSAUpdater.h @@ -0,0 +1,285 @@ +//===- MemorySSAUpdater.h - Memory SSA Updater-------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// An automatic updater for MemorySSA that handles arbitrary insertion, +// deletion, and moves. It performs phi insertion where necessary, and +// automatically updates the MemorySSA IR to be correct. +// While updating loads or removing instructions is often easy enough to not +// need this, updating stores should generally not be attemped outside this +// API. +// +// Basic API usage: +// Create the memory access you want for the instruction (this is mainly so +// we know where it is, without having to duplicate the entire set of create +// functions MemorySSA supports). +// Call insertDef or insertUse depending on whether it's a MemoryUse or a +// MemoryDef. +// That's it. +// +// For moving, first, move the instruction itself using the normal SSA +// instruction moving API, then just call moveBefore, moveAfter,or moveTo with +// the right arguments. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MEMORYSSAUPDATER_H +#define LLVM_ANALYSIS_MEMORYSSAUPDATER_H + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFGDiff.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/OperandTraits.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +class Function; +class Instruction; +class MemoryAccess; +class LLVMContext; +class raw_ostream; + +using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>; +using PhiToDefMap = SmallDenseMap<MemoryPhi *, MemoryAccess *>; +using CFGUpdate = cfg::Update<BasicBlock *>; +using GraphDiffInvBBPair = + std::pair<const GraphDiff<BasicBlock *> *, Inverse<BasicBlock *>>; + +class MemorySSAUpdater { +private: + MemorySSA *MSSA; + + /// We use WeakVH rather than a costly deletion to deal with dangling pointers. + /// MemoryPhis are created eagerly and sometimes get zapped shortly afterwards. + SmallVector<WeakVH, 16> InsertedPHIs; + + SmallPtrSet<BasicBlock *, 8> VisitedBlocks; + SmallSet<AssertingVH<MemoryPhi>, 8> NonOptPhis; + +public: + MemorySSAUpdater(MemorySSA *MSSA) : MSSA(MSSA) {} + + /// Insert a definition into the MemorySSA IR. RenameUses will rename any use + /// below the new def block (and any inserted phis). RenameUses should be set + /// to true if the definition may cause new aliases for loads below it. This + /// is not the case for hoisting or sinking or other forms of code *movement*. + /// It *is* the case for straight code insertion. + /// For example: + /// store a + /// if (foo) { } + /// load a + /// + /// Moving the store into the if block, and calling insertDef, does not + /// require RenameUses. + /// However, changing it to: + /// store a + /// if (foo) { store b } + /// load a + /// Where a mayalias b, *does* require RenameUses be set to true. + void insertDef(MemoryDef *Def, bool RenameUses = false); + void insertUse(MemoryUse *Use); + /// Update the MemoryPhi in `To` following an edge deletion between `From` and + /// `To`. If `To` becomes unreachable, a call to removeBlocks should be made. + void removeEdge(BasicBlock *From, BasicBlock *To); + /// Update the MemoryPhi in `To` to have a single incoming edge from `From`, + /// following a CFG change that replaced multiple edges (switch) with a direct + /// branch. + void removeDuplicatePhiEdgesBetween(BasicBlock *From, BasicBlock *To); + /// Update MemorySSA after a loop was cloned, given the blocks in RPO order, + /// the exit blocks and a 1:1 mapping of all blocks and instructions + /// cloned. This involves duplicating all defs and uses in the cloned blocks + /// Updating phi nodes in exit block successors is done separately. + void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, + ArrayRef<BasicBlock *> ExitBlocks, + const ValueToValueMapTy &VM, + bool IgnoreIncomingWithNoClones = false); + // Block BB was fully or partially cloned into its predecessor P1. Map + // contains the 1:1 mapping of instructions cloned and VM[BB]=P1. + void updateForClonedBlockIntoPred(BasicBlock *BB, BasicBlock *P1, + const ValueToValueMapTy &VM); + /// Update phi nodes in exit block successors following cloning. Exit blocks + /// that were not cloned don't have additional predecessors added. + void updateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks, + const ValueToValueMapTy &VMap, + DominatorTree &DT); + void updateExitBlocksForClonedLoop( + ArrayRef<BasicBlock *> ExitBlocks, + ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, DominatorTree &DT); + + /// Apply CFG updates, analogous with the DT edge updates. + void applyUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT); + /// Apply CFG insert updates, analogous with the DT edge updates. + void applyInsertUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT); + + void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where); + void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where); + void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, + MemorySSA::InsertionPlace Where); + /// `From` block was spliced into `From` and `To`. There is a CFG edge from + /// `From` to `To`. Move all accesses from `From` to `To` starting at + /// instruction `Start`. `To` is newly created BB, so empty of + /// MemorySSA::MemoryAccesses. Edges are already updated, so successors of + /// `To` with MPhi nodes need to update incoming block. + /// |------| |------| + /// | From | | From | + /// | | |------| + /// | | || + /// | | => \/ + /// | | |------| <- Start + /// | | | To | + /// |------| |------| + void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, + Instruction *Start); + /// `From` block was merged into `To`. There is a CFG edge from `To` to + /// `From`.`To` still branches to `From`, but all instructions were moved and + /// `From` is now an empty block; `From` is about to be deleted. Move all + /// accesses from `From` to `To` starting at instruction `Start`. `To` may + /// have multiple successors, `From` has a single predecessor. `From` may have + /// successors with MPhi nodes, replace their incoming block with `To`. + /// |------| |------| + /// | To | | To | + /// |------| | | + /// || => | | + /// \/ | | + /// |------| | | <- Start + /// | From | | | + /// |------| |------| + void moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To, + Instruction *Start); + /// A new empty BasicBlock (New) now branches directly to Old. Some of + /// Old's predecessors (Preds) are now branching to New instead of Old. + /// If New is the only predecessor, move Old's Phi, if present, to New. + /// Otherwise, add a new Phi in New with appropriate incoming values, and + /// update the incoming values in Old's Phi node too, if present. + void wireOldPredecessorsToNewImmediatePredecessor( + BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds, + bool IdenticalEdgesWereMerged = true); + // The below are utility functions. Other than creation of accesses to pass + // to insertDef, and removeAccess to remove accesses, you should generally + // not attempt to update memoryssa yourself. It is very non-trivial to get + // the edge cases right, and the above calls already operate in near-optimal + // time bounds. + + /// Create a MemoryAccess in MemorySSA at a specified point in a block, + /// with a specified clobbering definition. + /// + /// Returns the new MemoryAccess. + /// This should be called when a memory instruction is created that is being + /// used to replace an existing memory instruction. It will *not* create PHI + /// nodes, or verify the clobbering definition. The insertion place is used + /// solely to determine where in the memoryssa access lists the instruction + /// will be placed. The caller is expected to keep ordering the same as + /// instructions. + /// It will return the new MemoryAccess. + /// Note: If a MemoryAccess already exists for I, this function will make it + /// inaccessible and it *must* have removeMemoryAccess called on it. + MemoryAccess *createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, + const BasicBlock *BB, + MemorySSA::InsertionPlace Point); + + /// Create a MemoryAccess in MemorySSA before or after an existing + /// MemoryAccess. + /// + /// Returns the new MemoryAccess. + /// This should be called when a memory instruction is created that is being + /// used to replace an existing memory instruction. It will *not* create PHI + /// nodes, or verify the clobbering definition. + /// + /// Note: If a MemoryAccess already exists for I, this function will make it + /// inaccessible and it *must* have removeMemoryAccess called on it. + MemoryUseOrDef *createMemoryAccessBefore(Instruction *I, + MemoryAccess *Definition, + MemoryUseOrDef *InsertPt); + MemoryUseOrDef *createMemoryAccessAfter(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt); + + /// Remove a MemoryAccess from MemorySSA, including updating all + /// definitions and uses. + /// This should be called when a memory instruction that has a MemoryAccess + /// associated with it is erased from the program. For example, if a store or + /// load is simply erased (not replaced), removeMemoryAccess should be called + /// on the MemoryAccess for that store/load. + void removeMemoryAccess(MemoryAccess *, bool OptimizePhis = false); + + /// Remove MemoryAccess for a given instruction, if a MemoryAccess exists. + /// This should be called when an instruction (load/store) is deleted from + /// the program. + void removeMemoryAccess(const Instruction *I, bool OptimizePhis = false) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + removeMemoryAccess(MA, OptimizePhis); + } + + /// Remove all MemoryAcceses in a set of BasicBlocks about to be deleted. + /// Assumption we make here: all uses of deleted defs and phi must either + /// occur in blocks about to be deleted (thus will be deleted as well), or + /// they occur in phis that will simply lose an incoming value. + /// Deleted blocks still have successor info, but their predecessor edges and + /// Phi nodes may already be updated. Instructions in DeadBlocks should be + /// deleted after this call. + void removeBlocks(const SmallPtrSetImpl<BasicBlock *> &DeadBlocks); + + /// Get handle on MemorySSA. + MemorySSA* getMemorySSA() const { return MSSA; } + +private: + // Move What before Where in the MemorySSA IR. + template <class WhereType> + void moveTo(MemoryUseOrDef *What, BasicBlock *BB, WhereType Where); + // Move all memory accesses from `From` to `To` starting at `Start`. + // Restrictions apply, see public wrappers of this method. + void moveAllAccesses(BasicBlock *From, BasicBlock *To, Instruction *Start); + MemoryAccess *getPreviousDef(MemoryAccess *); + MemoryAccess *getPreviousDefInBlock(MemoryAccess *); + MemoryAccess * + getPreviousDefFromEnd(BasicBlock *, + DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &); + MemoryAccess * + getPreviousDefRecursive(BasicBlock *, + DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &); + MemoryAccess *recursePhi(MemoryAccess *Phi); + template <class RangeType> + MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands); + void fixupDefs(const SmallVectorImpl<WeakVH> &); + // Clone all uses and defs from BB to NewBB given a 1:1 map of all + // instructions and blocks cloned, and a map of MemoryPhi : Definition + // (MemoryAccess Phi or Def). VMap maps old instructions to cloned + // instructions and old blocks to cloned blocks. MPhiMap, is created in the + // caller of this private method, and maps existing MemoryPhis to new + // definitions that new MemoryAccesses must point to. These definitions may + // not necessarily be MemoryPhis themselves, they may be MemoryDefs. As such, + // the map is between MemoryPhis and MemoryAccesses, where the MemoryAccesses + // may be MemoryPhis or MemoryDefs and not MemoryUses. + void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, + const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap); + template <typename Iter> + void privateUpdateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks, + Iter ValuesBegin, Iter ValuesEnd, + DominatorTree &DT); + void applyInsertUpdates(ArrayRef<CFGUpdate>, DominatorTree &DT, + const GraphDiff<BasicBlock *> *GD); +}; +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MEMORYSSAUPDATER_H diff --git a/clang-r353983e/include/llvm/Analysis/ModuleSummaryAnalysis.h b/clang-r353983e/include/llvm/Analysis/ModuleSummaryAnalysis.h new file mode 100644 index 00000000..1572a49e --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ModuleSummaryAnalysis.h @@ -0,0 +1,80 @@ +//===- ModuleSummaryAnalysis.h - Module summary index builder ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface to build a ModuleSummaryIndex for a module. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MODULESUMMARYANALYSIS_H +#define LLVM_ANALYSIS_MODULESUMMARYANALYSIS_H + +#include "llvm/ADT/Optional.h" +#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include <functional> + +namespace llvm { + +class BlockFrequencyInfo; +class Function; +class Module; +class ProfileSummaryInfo; + +/// Direct function to compute a \c ModuleSummaryIndex from a given module. +/// +/// If operating within a pass manager which has defined ways to compute the \c +/// BlockFrequencyInfo for a given function, that can be provided via +/// a std::function callback. Otherwise, this routine will manually construct +/// that information. +ModuleSummaryIndex buildModuleSummaryIndex( + const Module &M, + std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback, + ProfileSummaryInfo *PSI); + +/// Analysis pass to provide the ModuleSummaryIndex object. +class ModuleSummaryIndexAnalysis + : public AnalysisInfoMixin<ModuleSummaryIndexAnalysis> { + friend AnalysisInfoMixin<ModuleSummaryIndexAnalysis>; + + static AnalysisKey Key; + +public: + using Result = ModuleSummaryIndex; + + Result run(Module &M, ModuleAnalysisManager &AM); +}; + +/// Legacy wrapper pass to provide the ModuleSummaryIndex object. +class ModuleSummaryIndexWrapperPass : public ModulePass { + Optional<ModuleSummaryIndex> Index; + +public: + static char ID; + + ModuleSummaryIndexWrapperPass(); + + /// Get the index built by pass + ModuleSummaryIndex &getIndex() { return *Index; } + const ModuleSummaryIndex &getIndex() const { return *Index; } + + bool runOnModule(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +//===--------------------------------------------------------------------===// +// +// createModuleSummaryIndexWrapperPass - This pass builds a ModuleSummaryIndex +// object for the module, to be written to bitcode or LLVM assembly. +// +ModulePass *createModuleSummaryIndexWrapperPass(); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MODULESUMMARYANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/MustExecute.h b/clang-r353983e/include/llvm/Analysis/MustExecute.h new file mode 100644 index 00000000..3ef539c8 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/MustExecute.h @@ -0,0 +1,169 @@ +//===- MustExecute.h - Is an instruction known to execute--------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Contains a collection of routines for determining if a given instruction is +/// guaranteed to execute if a given point in control flow is reached. The most +/// common example is an instruction within a loop being provably executed if we +/// branch to the header of it's containing loop. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MUSTEXECUTE_H +#define LLVM_ANALYSIS_MUSTEXECUTE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/InstructionPrecedenceTracking.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instruction.h" + +namespace llvm { + +class Instruction; +class DominatorTree; +class Loop; + +/// Captures loop safety information. +/// It keep information for loop blocks may throw exception or otherwise +/// exit abnormaly on any iteration of the loop which might actually execute +/// at runtime. The primary way to consume this infromation is via +/// isGuaranteedToExecute below, but some callers bailout or fallback to +/// alternate reasoning if a loop contains any implicit control flow. +/// NOTE: LoopSafetyInfo contains cached information regarding loops and their +/// particular blocks. This information is only dropped on invocation of +/// computeLoopSafetyInfo. If the loop or any of its block is deleted, or if +/// any thrower instructions have been added or removed from them, or if the +/// control flow has changed, or in case of other meaningful modifications, the +/// LoopSafetyInfo needs to be recomputed. If a meaningful modifications to the +/// loop were made and the info wasn't recomputed properly, the behavior of all +/// methods except for computeLoopSafetyInfo is undefined. +class LoopSafetyInfo { + // Used to update funclet bundle operands. + DenseMap<BasicBlock *, ColorVector> BlockColors; + +protected: + /// Computes block colors. + void computeBlockColors(const Loop *CurLoop); + +public: + /// Returns block colors map that is used to update funclet operand bundles. + const DenseMap<BasicBlock *, ColorVector> &getBlockColors() const; + + /// Copy colors of block \p Old into the block \p New. + void copyColors(BasicBlock *New, BasicBlock *Old); + + /// Returns true iff the block \p BB potentially may throw exception. It can + /// be false-positive in cases when we want to avoid complex analysis. + virtual bool blockMayThrow(const BasicBlock *BB) const = 0; + + /// Returns true iff any block of the loop for which this info is contains an + /// instruction that may throw or otherwise exit abnormally. + virtual bool anyBlockMayThrow() const = 0; + + /// Return true if we must reach the block \p BB under assumption that the + /// loop \p CurLoop is entered. + bool allLoopPathsLeadToBlock(const Loop *CurLoop, const BasicBlock *BB, + const DominatorTree *DT) const; + + /// Computes safety information for a loop checks loop body & header for + /// the possibility of may throw exception, it takes LoopSafetyInfo and loop + /// as argument. Updates safety information in LoopSafetyInfo argument. + /// Note: This is defined to clear and reinitialize an already initialized + /// LoopSafetyInfo. Some callers rely on this fact. + virtual void computeLoopSafetyInfo(const Loop *CurLoop) = 0; + + /// Returns true if the instruction in a loop is guaranteed to execute at + /// least once (under the assumption that the loop is entered). + virtual bool isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, + const Loop *CurLoop) const = 0; + + LoopSafetyInfo() = default; + + virtual ~LoopSafetyInfo() = default; +}; + + +/// Simple and conservative implementation of LoopSafetyInfo that can give +/// false-positive answers to its queries in order to avoid complicated +/// analysis. +class SimpleLoopSafetyInfo: public LoopSafetyInfo { + bool MayThrow = false; // The current loop contains an instruction which + // may throw. + bool HeaderMayThrow = false; // Same as previous, but specific to loop header + +public: + virtual bool blockMayThrow(const BasicBlock *BB) const; + + virtual bool anyBlockMayThrow() const; + + virtual void computeLoopSafetyInfo(const Loop *CurLoop); + + virtual bool isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, + const Loop *CurLoop) const; + + SimpleLoopSafetyInfo() : LoopSafetyInfo() {}; + + virtual ~SimpleLoopSafetyInfo() {}; +}; + +/// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to +/// give precise answers on "may throw" queries. This implementation uses cache +/// that should be invalidated by calling the methods insertInstructionTo and +/// removeInstruction whenever we modify a basic block's contents by adding or +/// removing instructions. +class ICFLoopSafetyInfo: public LoopSafetyInfo { + bool MayThrow = false; // The current loop contains an instruction which + // may throw. + // Contains information about implicit control flow in this loop's blocks. + mutable ImplicitControlFlowTracking ICF; + // Contains information about instruction that may possibly write memory. + mutable MemoryWriteTracking MW; + +public: + virtual bool blockMayThrow(const BasicBlock *BB) const; + + virtual bool anyBlockMayThrow() const; + + virtual void computeLoopSafetyInfo(const Loop *CurLoop); + + virtual bool isGuaranteedToExecute(const Instruction &Inst, + const DominatorTree *DT, + const Loop *CurLoop) const; + + /// Returns true if we could not execute a memory-modifying instruction before + /// we enter \p BB under assumption that \p CurLoop is entered. + bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) + const; + + /// Returns true if we could not execute a memory-modifying instruction before + /// we execute \p I under assumption that \p CurLoop is entered. + bool doesNotWriteMemoryBefore(const Instruction &I, const Loop *CurLoop) + const; + + /// Inform the safety info that we are planning to insert a new instruction + /// \p Inst into the basic block \p BB. It will make all cache updates to keep + /// it correct after this insertion. + void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB); + + /// Inform safety info that we are planning to remove the instruction \p Inst + /// from its block. It will make all cache updates to keep it correct after + /// this removal. + void removeInstruction(const Instruction *Inst); + + ICFLoopSafetyInfo(DominatorTree *DT) : LoopSafetyInfo(), ICF(DT), MW(DT) {}; + + virtual ~ICFLoopSafetyInfo() {}; +}; + +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/clang-r353983e/include/llvm/Analysis/ObjCARCAliasAnalysis.h new file mode 100644 index 00000000..ed15472a --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ObjCARCAliasAnalysis.h @@ -0,0 +1,96 @@ +//===- ObjCARCAliasAnalysis.h - ObjC ARC Alias Analysis ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares a simple ARC-aware AliasAnalysis using special knowledge +/// of Objective C to enhance other optimization passes which rely on the Alias +/// Analysis infrastructure. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_OBJCARCALIASANALYSIS_H +#define LLVM_ANALYSIS_OBJCARCALIASANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Pass.h" + +namespace llvm { +namespace objcarc { + +/// This is a simple alias analysis implementation that uses knowledge +/// of ARC constructs to answer queries. +/// +/// TODO: This class could be generalized to know about other ObjC-specific +/// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing +/// even though their offsets are dynamic. +class ObjCARCAAResult : public AAResultBase<ObjCARCAAResult> { + friend AAResultBase<ObjCARCAAResult>; + + const DataLayout &DL; + +public: + explicit ObjCARCAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} + ObjCARCAAResult(ObjCARCAAResult &&Arg) + : AAResultBase(std::move(Arg)), DL(Arg.DL) {} + + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &) { + return false; + } + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal); + + using AAResultBase::getModRefBehavior; + FunctionModRefBehavior getModRefBehavior(const Function *F); + + using AAResultBase::getModRefInfo; + ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class ObjCARCAA : public AnalysisInfoMixin<ObjCARCAA> { + friend AnalysisInfoMixin<ObjCARCAA>; + static AnalysisKey Key; + +public: + typedef ObjCARCAAResult Result; + + ObjCARCAAResult run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy wrapper pass to provide the ObjCARCAAResult object. +class ObjCARCAAWrapperPass : public ImmutablePass { + std::unique_ptr<ObjCARCAAResult> Result; + +public: + static char ID; + + ObjCARCAAWrapperPass(); + + ObjCARCAAResult &getResult() { return *Result; } + const ObjCARCAAResult &getResult() const { return *Result; } + + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +} // namespace objcarc +} // namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/clang-r353983e/include/llvm/Analysis/ObjCARCAnalysisUtils.h new file mode 100644 index 00000000..522abd75 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -0,0 +1,300 @@ +//===- ObjCARCAnalysisUtils.h - ObjC ARC Analysis Utilities -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines common analysis utilities used by the ObjC ARC Optimizer. +/// ARC stands for Automatic Reference Counting and is a system for managing +/// reference counts for objects in Objective C. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H +#define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ObjCARCInstKind.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" + +namespace llvm { +class raw_ostream; +} + +namespace llvm { +namespace objcarc { + +/// A handy option to enable/disable all ARC Optimizations. +extern bool EnableARCOpts; + +/// Test if the given module looks interesting to run ARC optimization +/// on. +inline bool ModuleHasARC(const Module &M) { + return + M.getNamedValue("llvm.objc.retain") || + M.getNamedValue("llvm.objc.release") || + M.getNamedValue("llvm.objc.autorelease") || + M.getNamedValue("llvm.objc.retainAutoreleasedReturnValue") || + M.getNamedValue("llvm.objc.unsafeClaimAutoreleasedReturnValue") || + M.getNamedValue("llvm.objc.retainBlock") || + M.getNamedValue("llvm.objc.autoreleaseReturnValue") || + M.getNamedValue("llvm.objc.autoreleasePoolPush") || + M.getNamedValue("llvm.objc.loadWeakRetained") || + M.getNamedValue("llvm.objc.loadWeak") || + M.getNamedValue("llvm.objc.destroyWeak") || + M.getNamedValue("llvm.objc.storeWeak") || + M.getNamedValue("llvm.objc.initWeak") || + M.getNamedValue("llvm.objc.moveWeak") || + M.getNamedValue("llvm.objc.copyWeak") || + M.getNamedValue("llvm.objc.retainedObject") || + M.getNamedValue("llvm.objc.unretainedObject") || + M.getNamedValue("llvm.objc.unretainedPointer") || + M.getNamedValue("llvm.objc.clang.arc.use"); +} + +/// This is a wrapper around getUnderlyingObject which also knows how to +/// look through objc_retain and objc_autorelease calls, which we know to return +/// their argument verbatim. +inline const Value *GetUnderlyingObjCPtr(const Value *V, + const DataLayout &DL) { + for (;;) { + V = GetUnderlyingObject(V, DL); + if (!IsForwarding(GetBasicARCInstKind(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + + return V; +} + +/// A wrapper for GetUnderlyingObjCPtr used for results memoization. +inline const Value * +GetUnderlyingObjCPtrCached(const Value *V, const DataLayout &DL, + DenseMap<const Value *, WeakTrackingVH> &Cache) { + if (auto InCache = Cache.lookup(V)) + return InCache; + + const Value *Computed = GetUnderlyingObjCPtr(V, DL); + Cache[V] = const_cast<Value *>(Computed); + return Computed; +} + +/// The RCIdentity root of a value \p V is a dominating value U for which +/// retaining or releasing U is equivalent to retaining or releasing V. In other +/// words, ARC operations on \p V are equivalent to ARC operations on \p U. +/// +/// We use this in the ARC optimizer to make it easier to match up ARC +/// operations by always mapping ARC operations to RCIdentityRoots instead of +/// pointers themselves. +/// +/// The two ways that we see RCIdentical values in ObjC are via: +/// +/// 1. PointerCasts +/// 2. Forwarding Calls that return their argument verbatim. +/// +/// Thus this function strips off pointer casts and forwarding calls. *NOTE* +/// This implies that two RCIdentical values must alias. +inline const Value *GetRCIdentityRoot(const Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicARCInstKind(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + return V; +} + +/// Helper which calls const Value *GetRCIdentityRoot(const Value *V) and just +/// casts away the const of the result. For documentation about what an +/// RCIdentityRoot (and by extension GetRCIdentityRoot is) look at that +/// function. +inline Value *GetRCIdentityRoot(Value *V) { + return const_cast<Value *>(GetRCIdentityRoot((const Value *)V)); +} + +/// Assuming the given instruction is one of the special calls such as +/// objc_retain or objc_release, return the RCIdentity root of the argument of +/// the call. +inline Value *GetArgRCIdentityRoot(Value *Inst) { + return GetRCIdentityRoot(cast<CallInst>(Inst)->getArgOperand(0)); +} + +inline bool IsNullOrUndef(const Value *V) { + return isa<ConstantPointerNull>(V) || isa<UndefValue>(V); +} + +inline bool IsNoopInstruction(const Instruction *I) { + return isa<BitCastInst>(I) || + (isa<GetElementPtrInst>(I) && + cast<GetElementPtrInst>(I)->hasAllZeroIndices()); +} + +/// Test whether the given value is possible a retainable object pointer. +inline bool IsPotentialRetainableObjPtr(const Value *Op) { + // Pointers to static or stack storage are not valid retainable object + // pointers. + if (isa<Constant>(Op) || isa<AllocaInst>(Op)) + return false; + // Special arguments can not be a valid retainable object pointer. + if (const Argument *Arg = dyn_cast<Argument>(Op)) + if (Arg->hasByValAttr() || + Arg->hasInAllocaAttr() || + Arg->hasNestAttr() || + Arg->hasStructRetAttr()) + return false; + // Only consider values with pointer types. + // + // It seemes intuitive to exclude function pointer types as well, since + // functions are never retainable object pointers, however clang occasionally + // bitcasts retainable object pointers to function-pointer type temporarily. + PointerType *Ty = dyn_cast<PointerType>(Op->getType()); + if (!Ty) + return false; + // Conservatively assume anything else is a potential retainable object + // pointer. + return true; +} + +inline bool IsPotentialRetainableObjPtr(const Value *Op, + AliasAnalysis &AA) { + // First make the rudimentary check. + if (!IsPotentialRetainableObjPtr(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} + +/// Helper for GetARCInstKind. Determines what kind of construct CS +/// is. +inline ARCInstKind GetCallSiteClass(ImmutableCallSite CS) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) + if (IsPotentialRetainableObjPtr(*I)) + return CS.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser; + + return CS.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call; +} + +/// Return true if this value refers to a distinct and identifiable +/// object. +/// +/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses +/// special knowledge of ObjC conventions. +inline bool IsObjCIdentifiedObject(const Value *V) { + // Assume that call results and arguments have their own "provenance". + // Constants (including GlobalVariables) and Allocas are never + // reference-counted. + if (isa<CallInst>(V) || isa<InvokeInst>(V) || + isa<Argument>(V) || isa<Constant>(V) || + isa<AllocaInst>(V)) + return true; + + if (const LoadInst *LI = dyn_cast<LoadInst>(V)) { + const Value *Pointer = + GetRCIdentityRoot(LI->getPointerOperand()); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) { + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (GV->isConstant()) + return true; + StringRef Name = GV->getName(); + // These special variables are known to hold values which are not + // reference-counted pointers. + if (Name.startswith("\01l_objc_msgSend_fixup_")) + return true; + + StringRef Section = GV->getSection(); + if (Section.find("__message_refs") != StringRef::npos || + Section.find("__objc_classrefs") != StringRef::npos || + Section.find("__objc_superrefs") != StringRef::npos || + Section.find("__objc_methname") != StringRef::npos || + Section.find("__cstring") != StringRef::npos) + return true; + } + } + + return false; +} + +enum class ARCMDKindID { + ImpreciseRelease, + CopyOnEscape, + NoObjCARCExceptions, +}; + +/// A cache of MDKinds used by various ARC optimizations. +class ARCMDKindCache { + Module *M; + + /// The Metadata Kind for clang.imprecise_release metadata. + llvm::Optional<unsigned> ImpreciseReleaseMDKind; + + /// The Metadata Kind for clang.arc.copy_on_escape metadata. + llvm::Optional<unsigned> CopyOnEscapeMDKind; + + /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. + llvm::Optional<unsigned> NoObjCARCExceptionsMDKind; + +public: + void init(Module *Mod) { + M = Mod; + ImpreciseReleaseMDKind = NoneType::None; + CopyOnEscapeMDKind = NoneType::None; + NoObjCARCExceptionsMDKind = NoneType::None; + } + + unsigned get(ARCMDKindID ID) { + switch (ID) { + case ARCMDKindID::ImpreciseRelease: + if (!ImpreciseReleaseMDKind) + ImpreciseReleaseMDKind = + M->getContext().getMDKindID("clang.imprecise_release"); + return *ImpreciseReleaseMDKind; + case ARCMDKindID::CopyOnEscape: + if (!CopyOnEscapeMDKind) + CopyOnEscapeMDKind = + M->getContext().getMDKindID("clang.arc.copy_on_escape"); + return *CopyOnEscapeMDKind; + case ARCMDKindID::NoObjCARCExceptions: + if (!NoObjCARCExceptionsMDKind) + NoObjCARCExceptionsMDKind = + M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); + return *NoObjCARCExceptionsMDKind; + } + llvm_unreachable("Covered switch isn't covered?!"); + } +}; + +} // end namespace objcarc +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/ObjCARCInstKind.h b/clang-r353983e/include/llvm/Analysis/ObjCARCInstKind.h new file mode 100644 index 00000000..9890d5f7 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ObjCARCInstKind.h @@ -0,0 +1,124 @@ +//===- ObjCARCInstKind.h - ARC instruction equivalence classes --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_OBJCARCINSTKIND_H +#define LLVM_ANALYSIS_OBJCARCINSTKIND_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Instructions.h" + +namespace llvm { +namespace objcarc { + +/// \enum ARCInstKind +/// +/// Equivalence classes of instructions in the ARC Model. +/// +/// Since we do not have "instructions" to represent ARC concepts in LLVM IR, +/// we instead operate on equivalence classes of instructions. +/// +/// TODO: This should be split into two enums: a runtime entry point enum +/// (possibly united with the ARCRuntimeEntrypoint class) and an enum that deals +/// with effects of instructions in the ARC model (which would handle the notion +/// of a User or CallOrUser). +enum class ARCInstKind { + Retain, ///< objc_retain + RetainRV, ///< objc_retainAutoreleasedReturnValue + ClaimRV, ///< objc_unsafeClaimAutoreleasedReturnValue + RetainBlock, ///< objc_retainBlock + Release, ///< objc_release + Autorelease, ///< objc_autorelease + AutoreleaseRV, ///< objc_autoreleaseReturnValue + AutoreleasepoolPush, ///< objc_autoreleasePoolPush + AutoreleasepoolPop, ///< objc_autoreleasePoolPop + NoopCast, ///< objc_retainedObject, etc. + FusedRetainAutorelease, ///< objc_retainAutorelease + FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue + LoadWeakRetained, ///< objc_loadWeakRetained (primitive) + StoreWeak, ///< objc_storeWeak (primitive) + InitWeak, ///< objc_initWeak (derived) + LoadWeak, ///< objc_loadWeak (derived) + MoveWeak, ///< objc_moveWeak (derived) + CopyWeak, ///< objc_copyWeak (derived) + DestroyWeak, ///< objc_destroyWeak (derived) + StoreStrong, ///< objc_storeStrong (derived) + IntrinsicUser, ///< llvm.objc.clang.arc.use + CallOrUser, ///< could call objc_release and/or "use" pointers + Call, ///< could call objc_release + User, ///< could "use" a pointer + None ///< anything that is inert from an ARC perspective. +}; + +raw_ostream &operator<<(raw_ostream &OS, const ARCInstKind Class); + +/// Test if the given class is a kind of user. +bool IsUser(ARCInstKind Class); + +/// Test if the given class is objc_retain or equivalent. +bool IsRetain(ARCInstKind Class); + +/// Test if the given class is objc_autorelease or equivalent. +bool IsAutorelease(ARCInstKind Class); + +/// Test if the given class represents instructions which return their +/// argument verbatim. +bool IsForwarding(ARCInstKind Class); + +/// Test if the given class represents instructions which do nothing if +/// passed a null pointer. +bool IsNoopOnNull(ARCInstKind Class); + +/// Test if the given class represents instructions which are always safe +/// to mark with the "tail" keyword. +bool IsAlwaysTail(ARCInstKind Class); + +/// Test if the given class represents instructions which are never safe +/// to mark with the "tail" keyword. +bool IsNeverTail(ARCInstKind Class); + +/// Test if the given class represents instructions which are always safe +/// to mark with the nounwind attribute. +bool IsNoThrow(ARCInstKind Class); + +/// Test whether the given instruction can autorelease any pointer or cause an +/// autoreleasepool pop. +bool CanInterruptRV(ARCInstKind Class); + +/// Determine if F is one of the special known Functions. If it isn't, +/// return ARCInstKind::CallOrUser. +ARCInstKind GetFunctionClass(const Function *F); + +/// Determine which objc runtime call instruction class V belongs to. +/// +/// This is similar to GetARCInstKind except that it only detects objc +/// runtime calls. This allows it to be faster. +/// +inline ARCInstKind GetBasicARCInstKind(const Value *V) { + if (const CallInst *CI = dyn_cast<CallInst>(V)) { + if (const Function *F = CI->getCalledFunction()) + return GetFunctionClass(F); + // Otherwise, be conservative. + return ARCInstKind::CallOrUser; + } + + // Otherwise, be conservative. + return isa<InvokeInst>(V) ? ARCInstKind::CallOrUser : ARCInstKind::User; +} + +/// Map V to its ARCInstKind equivalence class. +ARCInstKind GetARCInstKind(const Value *V); + +/// Returns false if conservatively we can prove that any instruction mapped to +/// this kind can not decrement ref counts. Returns true otherwise. +bool CanDecrementRefCount(ARCInstKind Kind); + +} // end namespace objcarc +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/OptimizationRemarkEmitter.h b/clang-r353983e/include/llvm/Analysis/OptimizationRemarkEmitter.h new file mode 100644 index 00000000..a2b29555 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/OptimizationRemarkEmitter.h @@ -0,0 +1,167 @@ +//===- OptimizationRemarkEmitter.h - Optimization Diagnostic ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Optimization diagnostic interfaces. It's packaged as an analysis pass so +// that by using this service passes become dependent on BFI as well. BFI is +// used to compute the "hotness" of the diagnostic message. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H +#define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H + +#include "llvm/ADT/Optional.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { +class DebugLoc; +class Loop; +class Pass; +class Twine; +class Value; + +/// The optimization diagnostic interface. +/// +/// It allows reporting when optimizations are performed and when they are not +/// along with the reasons for it. Hotness information of the corresponding +/// code region can be included in the remark if DiagnosticsHotnessRequested is +/// enabled in the LLVM context. +class OptimizationRemarkEmitter { +public: + OptimizationRemarkEmitter(const Function *F, BlockFrequencyInfo *BFI) + : F(F), BFI(BFI) {} + + /// This variant can be used to generate ORE on demand (without the + /// analysis pass). + /// + /// Note that this ctor has a very different cost depending on whether + /// F->getContext().getDiagnosticsHotnessRequested() is on or not. If it's off + /// the operation is free. + /// + /// Whereas if DiagnosticsHotnessRequested is on, it is fairly expensive + /// operation since BFI and all its required analyses are computed. This is + /// for example useful for CGSCC passes that can't use function analyses + /// passes in the old PM. + OptimizationRemarkEmitter(const Function *F); + + OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg) + : F(Arg.F), BFI(Arg.BFI) {} + + OptimizationRemarkEmitter &operator=(OptimizationRemarkEmitter &&RHS) { + F = RHS.F; + BFI = RHS.BFI; + return *this; + } + + /// Handle invalidation events in the new pass manager. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + + /// Output the remark via the diagnostic handler and to the + /// optimization record file. + void emit(DiagnosticInfoOptimizationBase &OptDiag); + + /// Take a lambda that returns a remark which will be emitted. Second + /// argument is only used to restrict this to functions. + template <typename T> + void emit(T RemarkBuilder, decltype(RemarkBuilder()) * = nullptr) { + // Avoid building the remark unless we know there are at least *some* + // remarks enabled. We can't currently check whether remarks are requested + // for the calling pass since that requires actually building the remark. + + if (F->getContext().getDiagnosticsOutputFile() || + F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) { + auto R = RemarkBuilder(); + emit((DiagnosticInfoOptimizationBase &)R); + } + } + + /// Whether we allow for extra compile-time budget to perform more + /// analysis to produce fewer false positives. + /// + /// This is useful when reporting missed optimizations. In this case we can + /// use the extra analysis (1) to filter trivial false positives or (2) to + /// provide more context so that non-trivial false positives can be quickly + /// detected by the user. + bool allowExtraAnalysis(StringRef PassName) const { + return (F->getContext().getDiagnosticsOutputFile() || + F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(PassName)); + } + +private: + const Function *F; + + BlockFrequencyInfo *BFI; + + /// If we generate BFI on demand, we need to free it when ORE is freed. + std::unique_ptr<BlockFrequencyInfo> OwnedBFI; + + /// Compute hotness from IR value (currently assumed to be a block) if PGO is + /// available. + Optional<uint64_t> computeHotness(const Value *V); + + /// Similar but use value from \p OptDiag and update hotness there. + void computeHotness(DiagnosticInfoIROptimization &OptDiag); + + /// Only allow verbose messages if we know we're filtering by hotness + /// (BFI is only set in this case). + bool shouldEmitVerbose() { return BFI != nullptr; } + + OptimizationRemarkEmitter(const OptimizationRemarkEmitter &) = delete; + void operator=(const OptimizationRemarkEmitter &) = delete; +}; + +/// Add a small namespace to avoid name clashes with the classes used in +/// the streaming interface. We want these to be short for better +/// write/readability. +namespace ore { +using NV = DiagnosticInfoOptimizationBase::Argument; +using setIsVerbose = DiagnosticInfoOptimizationBase::setIsVerbose; +using setExtraArgs = DiagnosticInfoOptimizationBase::setExtraArgs; +} + +/// OptimizationRemarkEmitter legacy analysis pass +/// +/// Note that this pass shouldn't generally be marked as preserved by other +/// passes. It's holding onto BFI, so if the pass does not preserve BFI, BFI +/// could be freed. +class OptimizationRemarkEmitterWrapperPass : public FunctionPass { + std::unique_ptr<OptimizationRemarkEmitter> ORE; + +public: + OptimizationRemarkEmitterWrapperPass(); + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + OptimizationRemarkEmitter &getORE() { + assert(ORE && "pass not run yet"); + return *ORE; + } + + static char ID; +}; + +class OptimizationRemarkEmitterAnalysis + : public AnalysisInfoMixin<OptimizationRemarkEmitterAnalysis> { + friend AnalysisInfoMixin<OptimizationRemarkEmitterAnalysis>; + static AnalysisKey Key; + +public: + /// Provide the result typedef for this analysis pass. + typedef OptimizationRemarkEmitter Result; + + /// Run the analysis pass over a function and produce BFI. + Result run(Function &F, FunctionAnalysisManager &AM); +}; +} +#endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H diff --git a/clang-r353983e/include/llvm/Analysis/OrderedBasicBlock.h b/clang-r353983e/include/llvm/Analysis/OrderedBasicBlock.h new file mode 100644 index 00000000..6823f686 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/OrderedBasicBlock.h @@ -0,0 +1,66 @@ +//===- llvm/Analysis/OrderedBasicBlock.h --------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the OrderedBasicBlock class. OrderedBasicBlock maintains +// an interface where clients can query if one instruction comes before another +// in a BasicBlock. Since BasicBlock currently lacks a reliable way to query +// relative position between instructions one can use OrderedBasicBlock to do +// such queries. OrderedBasicBlock is lazily built on a source BasicBlock and +// maintains an internal Instruction -> Position map. A OrderedBasicBlock +// instance should be discarded whenever the source BasicBlock changes. +// +// It's currently used by the CaptureTracker in order to find relative +// positions of a pair of instructions inside a BasicBlock. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_ORDEREDBASICBLOCK_H +#define LLVM_ANALYSIS_ORDEREDBASICBLOCK_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/BasicBlock.h" + +namespace llvm { + +class Instruction; +class BasicBlock; + +class OrderedBasicBlock { +private: + /// Map a instruction to its position in a BasicBlock. + SmallDenseMap<const Instruction *, unsigned, 32> NumberedInsts; + + /// Keep track of last instruction inserted into \p NumberedInsts. + /// It speeds up queries for uncached instructions by providing a start point + /// for new queries in OrderedBasicBlock::comesBefore. + BasicBlock::const_iterator LastInstFound; + + /// The position/number to tag the next instruction to be found. + unsigned NextInstPos; + + /// The source BasicBlock to map. + const BasicBlock *BB; + + /// Given no cached results, find if \p A comes before \p B in \p BB. + /// Cache and number out instruction while walking \p BB. + bool comesBefore(const Instruction *A, const Instruction *B); + +public: + OrderedBasicBlock(const BasicBlock *BasicB); + + /// Find out whether \p A dominates \p B, meaning whether \p A + /// comes before \p B in \p BB. This is a simplification that considers + /// cached instruction positions and ignores other basic blocks, being + /// only relevant to compare relative instructions positions inside \p BB. + /// Returns false for A == B. + bool dominates(const Instruction *A, const Instruction *B); +}; + +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/OrderedInstructions.h b/clang-r353983e/include/llvm/Analysis/OrderedInstructions.h new file mode 100644 index 00000000..967b146b --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/OrderedInstructions.h @@ -0,0 +1,64 @@ +//===- llvm/Transforms/Utils/OrderedInstructions.h -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an efficient way to check for dominance relation between 2 +// instructions. +// +// This interface dispatches to appropriate dominance check given 2 +// instructions, i.e. in case the instructions are in the same basic block, +// OrderedBasicBlock (with instruction numbering and caching) are used. +// Otherwise, dominator tree is used. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_ORDEREDINSTRUCTIONS_H +#define LLVM_ANALYSIS_ORDEREDINSTRUCTIONS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Operator.h" + +namespace llvm { + +class OrderedInstructions { + /// Used to check dominance for instructions in same basic block. + mutable DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> + OBBMap; + + /// The dominator tree of the parent function. + DominatorTree *DT; + + /// Return true if the first instruction comes before the second in the + /// same basic block. It will create an ordered basic block, if it does + /// not yet exist in OBBMap. + bool localDominates(const Instruction *, const Instruction *) const; + +public: + /// Constructor. + OrderedInstructions(DominatorTree *DT) : DT(DT) {} + + /// Return true if first instruction dominates the second. + bool dominates(const Instruction *, const Instruction *) const; + + /// Return true if the first instruction comes before the second in the + /// dominator tree DFS traversal if they are in different basic blocks, + /// or if the first instruction comes before the second in the same basic + /// block. + bool dfsBefore(const Instruction *, const Instruction *) const; + + /// Invalidate the OrderedBasicBlock cache when its basic block changes. + /// i.e. If an instruction is deleted or added to the basic block, the user + /// should call this function to invalidate the OrderedBasicBlock cache for + /// this basic block. + void invalidateBlock(const BasicBlock *BB) { OBBMap.erase(BB); } +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_ORDEREDINSTRUCTIONS_H diff --git a/clang-r353983e/include/llvm/Analysis/PHITransAddr.h b/clang-r353983e/include/llvm/Analysis/PHITransAddr.h new file mode 100644 index 00000000..54a07f05 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/PHITransAddr.h @@ -0,0 +1,126 @@ +//===- PHITransAddr.h - PHI Translation for Addresses -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the PHITransAddr class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PHITRANSADDR_H +#define LLVM_ANALYSIS_PHITRANSADDR_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Instruction.h" + +namespace llvm { + class AssumptionCache; + class DominatorTree; + class DataLayout; + class TargetLibraryInfo; + +/// PHITransAddr - An address value which tracks and handles phi translation. +/// As we walk "up" the CFG through predecessors, we need to ensure that the +/// address we're tracking is kept up to date. For example, if we're analyzing +/// an address of "&A[i]" and walk through the definition of 'i' which is a PHI +/// node, we *must* phi translate i to get "&A[j]" or else we will analyze an +/// incorrect pointer in the predecessor block. +/// +/// This is designed to be a relatively small object that lives on the stack and +/// is copyable. +/// +class PHITransAddr { + /// Addr - The actual address we're analyzing. + Value *Addr; + + /// The DataLayout we are playing with. + const DataLayout &DL; + + /// TLI - The target library info if known, otherwise null. + const TargetLibraryInfo *TLI; + + /// A cache of \@llvm.assume calls used by SimplifyInstruction. + AssumptionCache *AC; + + /// InstInputs - The inputs for our symbolic address. + SmallVector<Instruction*, 4> InstInputs; + +public: + PHITransAddr(Value *addr, const DataLayout &DL, AssumptionCache *AC) + : Addr(addr), DL(DL), TLI(nullptr), AC(AC) { + // If the address is an instruction, the whole thing is considered an input. + if (Instruction *I = dyn_cast<Instruction>(Addr)) + InstInputs.push_back(I); + } + + Value *getAddr() const { return Addr; } + + /// NeedsPHITranslationFromBlock - Return true if moving from the specified + /// BasicBlock to its predecessors requires PHI translation. + bool NeedsPHITranslationFromBlock(BasicBlock *BB) const { + // We do need translation if one of our input instructions is defined in + // this block. + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + if (InstInputs[i]->getParent() == BB) + return true; + return false; + } + + /// IsPotentiallyPHITranslatable - If this needs PHI translation, return true + /// if we have some hope of doing it. This should be used as a filter to + /// avoid calling PHITranslateValue in hopeless situations. + bool IsPotentiallyPHITranslatable() const; + + /// PHITranslateValue - PHI translate the current address up the CFG from + /// CurBB to Pred, updating our state to reflect any needed changes. If + /// 'MustDominate' is true, the translated value must dominate + /// PredBB. This returns true on failure and sets Addr to null. + bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree *DT, bool MustDominate); + + /// PHITranslateWithInsertion - PHI translate this value into the specified + /// predecessor block, inserting a computation of the value if it is + /// unavailable. + /// + /// All newly created instructions are added to the NewInsts list. This + /// returns null on failure. + /// + Value *PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree &DT, + SmallVectorImpl<Instruction *> &NewInsts); + + void dump() const; + + /// Verify - Check internal consistency of this data structure. If the + /// structure is valid, it returns true. If invalid, it prints errors and + /// returns false. + bool Verify() const; + +private: + Value *PHITranslateSubExpr(Value *V, BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree *DT); + + /// InsertPHITranslatedSubExpr - Insert a computation of the PHI translated + /// version of 'V' for the edge PredBB->CurBB into the end of the PredBB + /// block. All newly created instructions are added to the NewInsts list. + /// This returns null on failure. + /// + Value *InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, + BasicBlock *PredBB, const DominatorTree &DT, + SmallVectorImpl<Instruction *> &NewInsts); + + /// AddAsInput - If the specified value is an instruction, add it as an input. + Value *AddAsInput(Value *V) { + // If V is an instruction, it is now an input. + if (Instruction *VI = dyn_cast<Instruction>(V)) + InstInputs.push_back(VI); + return V; + } +}; + +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/Passes.h b/clang-r353983e/include/llvm/Analysis/Passes.h new file mode 100644 index 00000000..d9c97dff --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/Passes.h @@ -0,0 +1,108 @@ +//===-- llvm/Analysis/Passes.h - Constructors for analyses ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file defines prototypes for accessor functions that expose passes +// in the analysis libraries. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PASSES_H +#define LLVM_ANALYSIS_PASSES_H + +namespace llvm { + class FunctionPass; + class ImmutablePass; + class LoopPass; + class ModulePass; + class Pass; + class PassInfo; + + //===--------------------------------------------------------------------===// + // + // createObjCARCAAWrapperPass - This pass implements ObjC-ARC-based + // alias analysis. + // + ImmutablePass *createObjCARCAAWrapperPass(); + + FunctionPass *createPAEvalPass(); + + //===--------------------------------------------------------------------===// + // + /// createLazyValueInfoPass - This creates an instance of the LazyValueInfo + /// pass. + FunctionPass *createLazyValueInfoPass(); + + //===--------------------------------------------------------------------===// + // + // createDependenceAnalysisWrapperPass - This creates an instance of the + // DependenceAnalysisWrapper pass. + // + FunctionPass *createDependenceAnalysisWrapperPass(); + + //===--------------------------------------------------------------------===// + // + // createCostModelAnalysisPass - This creates an instance of the + // CostModelAnalysis pass. + // + FunctionPass *createCostModelAnalysisPass(); + + //===--------------------------------------------------------------------===// + // + // createDelinearizationPass - This pass implements attempts to restore + // multidimensional array indices from linearized expressions. + // + FunctionPass *createDelinearizationPass(); + + //===--------------------------------------------------------------------===// + // + // createLegacyDivergenceAnalysisPass - This pass determines which branches in a GPU + // program are divergent. + // + FunctionPass *createLegacyDivergenceAnalysisPass(); + + //===--------------------------------------------------------------------===// + // + // Minor pass prototypes, allowing us to expose them through bugpoint and + // analyze. + FunctionPass *createInstCountPass(); + + //===--------------------------------------------------------------------===// + // + // createRegionInfoPass - This pass finds all single entry single exit regions + // in a function and builds the region hierarchy. + // + FunctionPass *createRegionInfoPass(); + + // Print module-level debug info metadata in human-readable form. + ModulePass *createModuleDebugInfoPrinterPass(); + + //===--------------------------------------------------------------------===// + // + // createMemDepPrinter - This pass exhaustively collects all memdep + // information and prints it with -analyze. + // + FunctionPass *createMemDepPrinter(); + + //===--------------------------------------------------------------------===// + // + // createMemDerefPrinter - This pass collects memory dereferenceability + // information and prints it with -analyze. + // + FunctionPass *createMemDerefPrinter(); + + //===--------------------------------------------------------------------===// + // + // createMustExecutePrinter - This pass collects information about which + // instructions within a loop are guaranteed to execute if the loop header is + // entered and prints it with -analyze. + // + FunctionPass *createMustExecutePrinter(); + +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/PhiValues.h b/clang-r353983e/include/llvm/Analysis/PhiValues.h new file mode 100644 index 00000000..124fa219 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/PhiValues.h @@ -0,0 +1,158 @@ +//===- PhiValues.h - Phi Value Analysis -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the PhiValues class, and associated passes, which can be +// used to find the underlying values of the phis in a function, i.e. the +// non-phi values that can be found by traversing the phi graph. +// +// This information is computed lazily and cached. If new phis are added to the +// function they are handled correctly, but if an existing phi has its operands +// modified PhiValues has to be notified by calling invalidateValue. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PHIVALUES_H +#define LLVM_ANALYSIS_PHIVALUES_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" + +namespace llvm { + +class Use; +class Value; +class PHINode; +class Function; + +/// Class for calculating and caching the underlying values of phis in a +/// function. +/// +/// Initially the PhiValues is empty, and gets incrementally populated whenever +/// it is queried. +class PhiValues { +public: + using ValueSet = SmallPtrSet<Value *, 4>; + + /// Construct an empty PhiValues. + PhiValues(const Function &F) : F(F) {} + + /// Get the underlying values of a phi. + /// + /// This returns the cached value if PN has previously been processed, + /// otherwise it processes it first. + const ValueSet &getValuesForPhi(const PHINode *PN); + + /// Notify PhiValues that the cached information using V is no longer valid + /// + /// Whenever a phi has its operands modified the cached values for that phi + /// (and the phis that use that phi) become invalid. A user of PhiValues has + /// to notify it of this by calling invalidateValue on either the operand or + /// the phi, which will then clear the relevant cached information. + void invalidateValue(const Value *V); + + /// Free the memory used by this class. + void releaseMemory(); + + /// Print out the values currently in the cache. + void print(raw_ostream &OS) const; + + /// Handle invalidation events in the new pass manager. + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &); + +private: + using PhiSet = SmallPtrSet<const PHINode *, 4>; + using ConstValueSet = SmallPtrSet<const Value *, 4>; + + /// The next depth number to be used by processPhi. + unsigned int NextDepthNumber = 1; + + /// Depth numbers of phis. Phis with the same depth number are part of the + /// same strongly connected component. + DenseMap<const PHINode *, unsigned int> DepthMap; + + /// Non-phi values reachable from each component. + DenseMap<unsigned int, ValueSet> NonPhiReachableMap; + + /// All values reachable from each component. + DenseMap<unsigned int, ConstValueSet> ReachableMap; + + /// A CallbackVH to notify PhiValues when a value is deleted or replaced, so + /// that the cached information for that value can be cleared to avoid + /// dangling pointers to invalid values. + class PhiValuesCallbackVH final : public CallbackVH { + PhiValues *PV; + void deleted() override; + void allUsesReplacedWith(Value *New) override; + + public: + PhiValuesCallbackVH(Value *V, PhiValues *PV = nullptr) + : CallbackVH(V), PV(PV) {} + }; + + /// A set of callbacks to the values that processPhi has seen. + DenseSet<PhiValuesCallbackVH, DenseMapInfo<Value *>> TrackedValues; + + /// The function that the PhiValues is for. + const Function &F; + + /// Process a phi so that its entries in the depth and reachable maps are + /// fully populated. + void processPhi(const PHINode *PN, SmallVector<const PHINode *, 8> &Stack); +}; + +/// The analysis pass which yields a PhiValues +/// +/// The analysis does nothing by itself, and just returns an empty PhiValues +/// which will get filled in as it's used. +class PhiValuesAnalysis : public AnalysisInfoMixin<PhiValuesAnalysis> { + friend AnalysisInfoMixin<PhiValuesAnalysis>; + static AnalysisKey Key; + +public: + using Result = PhiValues; + PhiValues run(Function &F, FunctionAnalysisManager &); +}; + +/// A pass for printing the PhiValues for a function. +/// +/// This pass doesn't print whatever information the PhiValues happens to hold, +/// but instead first uses the PhiValues to analyze all the phis in the function +/// so the complete information is printed. +class PhiValuesPrinterPass : public PassInfoMixin<PhiValuesPrinterPass> { + raw_ostream &OS; + +public: + explicit PhiValuesPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Wrapper pass for the legacy pass manager +class PhiValuesWrapperPass : public FunctionPass { + std::unique_ptr<PhiValues> Result; + +public: + static char ID; + PhiValuesWrapperPass(); + + PhiValues &getResult() { return *Result; } + const PhiValues &getResult() const { return *Result; } + + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +} // namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/PostDominators.h b/clang-r353983e/include/llvm/Analysis/PostDominators.h new file mode 100644 index 00000000..87d2e031 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/PostDominators.h @@ -0,0 +1,115 @@ +//=- llvm/Analysis/PostDominators.h - Post Dominator Calculation --*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file exposes interfaces to post dominance information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_POSTDOMINATORS_H +#define LLVM_ANALYSIS_POSTDOMINATORS_H + +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +class Function; +class raw_ostream; + +/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used to +/// compute the post-dominator tree. +class PostDominatorTree : public PostDomTreeBase<BasicBlock> { +public: + using Base = PostDomTreeBase<BasicBlock>; + + PostDominatorTree() = default; + explicit PostDominatorTree(Function &F) { recalculate(F); } + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); +}; + +/// Analysis pass which computes a \c PostDominatorTree. +class PostDominatorTreeAnalysis + : public AnalysisInfoMixin<PostDominatorTreeAnalysis> { + friend AnalysisInfoMixin<PostDominatorTreeAnalysis>; + + static AnalysisKey Key; + +public: + /// Provide the result type for this analysis pass. + using Result = PostDominatorTree; + + /// Run the analysis pass over a function and produce a post dominator + /// tree. + PostDominatorTree run(Function &F, FunctionAnalysisManager &); +}; + +/// Printer pass for the \c PostDominatorTree. +class PostDominatorTreePrinterPass + : public PassInfoMixin<PostDominatorTreePrinterPass> { + raw_ostream &OS; + +public: + explicit PostDominatorTreePrinterPass(raw_ostream &OS); + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +struct PostDominatorTreeWrapperPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + + PostDominatorTree DT; + + PostDominatorTreeWrapperPass() : FunctionPass(ID) { + initializePostDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + PostDominatorTree &getPostDomTree() { return DT; } + const PostDominatorTree &getPostDomTree() const { return DT; } + + bool runOnFunction(Function &F) override; + + void verifyAnalysis() const override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + void releaseMemory() override { + DT.releaseMemory(); + } + + void print(raw_ostream &OS, const Module*) const override; +}; + +FunctionPass* createPostDomTree(); + +template <> struct GraphTraits<PostDominatorTree*> + : public GraphTraits<DomTreeNode*> { + static NodeRef getEntryNode(PostDominatorTree *DT) { + return DT->getRootNode(); + } + + static nodes_iterator nodes_begin(PostDominatorTree *N) { + if (getEntryNode(N)) + return df_begin(getEntryNode(N)); + else + return df_end(getEntryNode(N)); + } + + static nodes_iterator nodes_end(PostDominatorTree *N) { + return df_end(getEntryNode(N)); + } +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_POSTDOMINATORS_H diff --git a/clang-r353983e/include/llvm/Analysis/ProfileSummaryInfo.h b/clang-r353983e/include/llvm/Analysis/ProfileSummaryInfo.h new file mode 100644 index 00000000..636b5d68 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ProfileSummaryInfo.h @@ -0,0 +1,171 @@ +//===- llvm/Analysis/ProfileSummaryInfo.h - profile summary ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that provides access to profile summary +// information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PROFILE_SUMMARY_INFO_H +#define LLVM_ANALYSIS_PROFILE_SUMMARY_INFO_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include <memory> + +namespace llvm { +class BasicBlock; +class BlockFrequencyInfo; +class CallSite; +class ProfileSummary; +/// Analysis providing profile information. +/// +/// This is an immutable analysis pass that provides ability to query global +/// (program-level) profile information. The main APIs are isHotCount and +/// isColdCount that tells whether a given profile count is considered hot/cold +/// based on the profile summary. This also provides convenience methods to +/// check whether a function is hot or cold. + +// FIXME: Provide convenience methods to determine hotness/coldness of other IR +// units. This would require making this depend on BFI. +class ProfileSummaryInfo { +private: + Module &M; + std::unique_ptr<ProfileSummary> Summary; + bool computeSummary(); + void computeThresholds(); + // Count thresholds to answer isHotCount and isColdCount queries. + Optional<uint64_t> HotCountThreshold, ColdCountThreshold; + // True if the working set size of the code is considered huge, + // because the number of profile counts required to reach the hot + // percentile is above a huge threshold. + Optional<bool> HasHugeWorkingSetSize; + +public: + ProfileSummaryInfo(Module &M) : M(M) {} + ProfileSummaryInfo(ProfileSummaryInfo &&Arg) + : M(Arg.M), Summary(std::move(Arg.Summary)) {} + + /// Returns true if profile summary is available. + bool hasProfileSummary() { return computeSummary(); } + + /// Returns true if module \c M has sample profile. + bool hasSampleProfile() { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_Sample; + } + + /// Returns true if module \c M has instrumentation profile. + bool hasInstrumentationProfile() { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_Instr; + } + + /// Handle the invalidation of this information. + /// + /// When used as a result of \c ProfileSummaryAnalysis this method will be + /// called when the module this was computed for changes. Since profile + /// summary is immutable after it is annotated on the module, we return false + /// here. + bool invalidate(Module &, const PreservedAnalyses &, + ModuleAnalysisManager::Invalidator &) { + return false; + } + + /// Returns the profile count for \p CallInst. + Optional<uint64_t> getProfileCount(const Instruction *CallInst, + BlockFrequencyInfo *BFI); + /// Returns true if the working set size of the code is considered huge. + bool hasHugeWorkingSetSize(); + /// Returns true if \p F has hot function entry. + bool isFunctionEntryHot(const Function *F); + /// Returns true if \p F contains hot code. + bool isFunctionHotInCallGraph(const Function *F, BlockFrequencyInfo &BFI); + /// Returns true if \p F has cold function entry. + bool isFunctionEntryCold(const Function *F); + /// Returns true if \p F contains only cold code. + bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI); + /// Returns true if count \p C is considered hot. + bool isHotCount(uint64_t C); + /// Returns true if count \p C is considered cold. + bool isColdCount(uint64_t C); + /// Returns true if BasicBlock \p BB is considered hot. + bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); + /// Returns true if BasicBlock \p BB is considered cold. + bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); + /// Returns true if CallSite \p CS is considered hot. + bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); + /// Returns true if Callsite \p CS is considered cold. + bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); + /// Returns HotCountThreshold if set. Recompute HotCountThreshold + /// if not set. + uint64_t getOrCompHotCountThreshold(); + /// Returns ColdCountThreshold if set. Recompute HotCountThreshold + /// if not set. + uint64_t getOrCompColdCountThreshold(); + /// Returns HotCountThreshold if set. + uint64_t getHotCountThreshold() { + return HotCountThreshold ? HotCountThreshold.getValue() : 0; + } + /// Returns ColdCountThreshold if set. + uint64_t getColdCountThreshold() { + return ColdCountThreshold ? ColdCountThreshold.getValue() : 0; + } +}; + +/// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo. +class ProfileSummaryInfoWrapperPass : public ImmutablePass { + std::unique_ptr<ProfileSummaryInfo> PSI; + +public: + static char ID; + ProfileSummaryInfoWrapperPass(); + + ProfileSummaryInfo &getPSI() { return *PSI; } + const ProfileSummaryInfo &getPSI() const { return *PSI; } + + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; + +/// An analysis pass based on the new PM to deliver ProfileSummaryInfo. +class ProfileSummaryAnalysis + : public AnalysisInfoMixin<ProfileSummaryAnalysis> { +public: + typedef ProfileSummaryInfo Result; + + Result run(Module &M, ModuleAnalysisManager &); + +private: + friend AnalysisInfoMixin<ProfileSummaryAnalysis>; + static AnalysisKey Key; +}; + +/// Printer pass that uses \c ProfileSummaryAnalysis. +class ProfileSummaryPrinterPass + : public PassInfoMixin<ProfileSummaryPrinterPass> { + raw_ostream &OS; + +public: + explicit ProfileSummaryPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/PtrUseVisitor.h b/clang-r353983e/include/llvm/Analysis/PtrUseVisitor.h new file mode 100644 index 00000000..dca8718b --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/PtrUseVisitor.h @@ -0,0 +1,302 @@ +//===- PtrUseVisitor.h - InstVisitors over a pointers uses ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file provides a collection of visitors which walk the (instruction) +/// uses of a pointer. These visitors all provide the same essential behavior +/// as an InstVisitor with similar template-based flexibility and +/// implementation strategies. +/// +/// These can be used, for example, to quickly analyze the uses of an alloca, +/// global variable, or function argument. +/// +/// FIXME: Provide a variant which doesn't track offsets and is cheaper. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PTRUSEVISITOR_H +#define LLVM_ANALYSIS_PTRUSEVISITOR_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> +#include <type_traits> + +namespace llvm { + +namespace detail { + +/// Implementation of non-dependent functionality for \c PtrUseVisitor. +/// +/// See \c PtrUseVisitor for the public interface and detailed comments about +/// usage. This class is just a helper base class which is not templated and +/// contains all common code to be shared between different instantiations of +/// PtrUseVisitor. +class PtrUseVisitorBase { +public: + /// This class provides information about the result of a visit. + /// + /// After walking all the users (recursively) of a pointer, the basic + /// infrastructure records some commonly useful information such as escape + /// analysis and whether the visit completed or aborted early. + class PtrInfo { + public: + PtrInfo() : AbortedInfo(nullptr, false), EscapedInfo(nullptr, false) {} + + /// Reset the pointer info, clearing all state. + void reset() { + AbortedInfo.setPointer(nullptr); + AbortedInfo.setInt(false); + EscapedInfo.setPointer(nullptr); + EscapedInfo.setInt(false); + } + + /// Did we abort the visit early? + bool isAborted() const { return AbortedInfo.getInt(); } + + /// Is the pointer escaped at some point? + bool isEscaped() const { return EscapedInfo.getInt(); } + + /// Get the instruction causing the visit to abort. + /// \returns a pointer to the instruction causing the abort if one is + /// available; otherwise returns null. + Instruction *getAbortingInst() const { return AbortedInfo.getPointer(); } + + /// Get the instruction causing the pointer to escape. + /// \returns a pointer to the instruction which escapes the pointer if one + /// is available; otherwise returns null. + Instruction *getEscapingInst() const { return EscapedInfo.getPointer(); } + + /// Mark the visit as aborted. Intended for use in a void return. + /// \param I The instruction which caused the visit to abort, if available. + void setAborted(Instruction *I = nullptr) { + AbortedInfo.setInt(true); + AbortedInfo.setPointer(I); + } + + /// Mark the pointer as escaped. Intended for use in a void return. + /// \param I The instruction which escapes the pointer, if available. + void setEscaped(Instruction *I = nullptr) { + EscapedInfo.setInt(true); + EscapedInfo.setPointer(I); + } + + /// Mark the pointer as escaped, and the visit as aborted. Intended + /// for use in a void return. + /// \param I The instruction which both escapes the pointer and aborts the + /// visit, if available. + void setEscapedAndAborted(Instruction *I = nullptr) { + setEscaped(I); + setAborted(I); + } + + private: + PointerIntPair<Instruction *, 1, bool> AbortedInfo, EscapedInfo; + }; + +protected: + const DataLayout &DL; + + /// \name Visitation infrastructure + /// @{ + + /// The info collected about the pointer being visited thus far. + PtrInfo PI; + + /// A struct of the data needed to visit a particular use. + /// + /// This is used to maintain a worklist fo to-visit uses. This is used to + /// make the visit be iterative rather than recursive. + struct UseToVisit { + using UseAndIsOffsetKnownPair = PointerIntPair<Use *, 1, bool>; + + UseAndIsOffsetKnownPair UseAndIsOffsetKnown; + APInt Offset; + }; + + /// The worklist of to-visit uses. + SmallVector<UseToVisit, 8> Worklist; + + /// A set of visited uses to break cycles in unreachable code. + SmallPtrSet<Use *, 8> VisitedUses; + + /// @} + + /// \name Per-visit state + /// This state is reset for each instruction visited. + /// @{ + + /// The use currently being visited. + Use *U; + + /// True if we have a known constant offset for the use currently + /// being visited. + bool IsOffsetKnown; + + /// The constant offset of the use if that is known. + APInt Offset; + + /// @} + + /// Note that the constructor is protected because this class must be a base + /// class, we can't create instances directly of this class. + PtrUseVisitorBase(const DataLayout &DL) : DL(DL) {} + + /// Enqueue the users of this instruction in the visit worklist. + /// + /// This will visit the users with the same offset of the current visit + /// (including an unknown offset if that is the current state). + void enqueueUsers(Instruction &I); + + /// Walk the operands of a GEP and adjust the offset as appropriate. + /// + /// This routine does the heavy lifting of the pointer walk by computing + /// offsets and looking through GEPs. + bool adjustOffsetForGEP(GetElementPtrInst &GEPI); +}; + +} // end namespace detail + +/// A base class for visitors over the uses of a pointer value. +/// +/// Once constructed, a user can call \c visit on a pointer value, and this +/// will walk its uses and visit each instruction using an InstVisitor. It also +/// provides visit methods which will recurse through any pointer-to-pointer +/// transformations such as GEPs and bitcasts. +/// +/// During the visit, the current Use* being visited is available to the +/// subclass, as well as the current offset from the original base pointer if +/// known. +/// +/// The recursive visit of uses is accomplished with a worklist, so the only +/// ordering guarantee is that an instruction is visited before any uses of it +/// are visited. Note that this does *not* mean before any of its users are +/// visited! This is because users can be visited multiple times due to +/// multiple, different uses of pointers derived from the same base. +/// +/// A particular Use will only be visited once, but a User may be visited +/// multiple times, once per Use. This visits may notably have different +/// offsets. +/// +/// All visit methods on the underlying InstVisitor return a boolean. This +/// return short-circuits the visit, stopping it immediately. +/// +/// FIXME: Generalize this for all values rather than just instructions. +template <typename DerivedT> +class PtrUseVisitor : protected InstVisitor<DerivedT>, + public detail::PtrUseVisitorBase { + friend class InstVisitor<DerivedT>; + + using Base = InstVisitor<DerivedT>; + +public: + PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) { + static_assert(std::is_base_of<PtrUseVisitor, DerivedT>::value, + "Must pass the derived type to this template!"); + } + + /// Recursively visit the uses of the given pointer. + /// \returns An info struct about the pointer. See \c PtrInfo for details. + PtrInfo visitPtr(Instruction &I) { + // This must be a pointer type. Get an integer type suitable to hold + // offsets on this pointer. + // FIXME: Support a vector of pointers. + assert(I.getType()->isPointerTy()); + IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(I.getType())); + IsOffsetKnown = true; + Offset = APInt(IntPtrTy->getBitWidth(), 0); + PI.reset(); + + // Enqueue the uses of this pointer. + enqueueUsers(I); + + // Visit all the uses off the worklist until it is empty. + while (!Worklist.empty()) { + UseToVisit ToVisit = Worklist.pop_back_val(); + U = ToVisit.UseAndIsOffsetKnown.getPointer(); + IsOffsetKnown = ToVisit.UseAndIsOffsetKnown.getInt(); + if (IsOffsetKnown) + Offset = std::move(ToVisit.Offset); + + Instruction *I = cast<Instruction>(U->getUser()); + static_cast<DerivedT*>(this)->visit(I); + if (PI.isAborted()) + break; + } + return PI; + } + +protected: + void visitStoreInst(StoreInst &SI) { + if (SI.getValueOperand() == U->get()) + PI.setEscaped(&SI); + } + + void visitBitCastInst(BitCastInst &BC) { + enqueueUsers(BC); + } + + void visitPtrToIntInst(PtrToIntInst &I) { + PI.setEscaped(&I); + } + + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { + if (GEPI.use_empty()) + return; + + // If we can't walk the GEP, clear the offset. + if (!adjustOffsetForGEP(GEPI)) { + IsOffsetKnown = false; + Offset = APInt(); + } + + // Enqueue the users now that the offset has been adjusted. + enqueueUsers(GEPI); + } + + // No-op intrinsics which we know don't escape the pointer to logic in + // some other function. + void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) {} + void visitMemIntrinsic(MemIntrinsic &I) {} + void visitIntrinsicInst(IntrinsicInst &II) { + switch (II.getIntrinsicID()) { + default: + return Base::visitIntrinsicInst(II); + + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return; // No-op intrinsics. + } + } + + // Generically, arguments to calls and invokes escape the pointer to some + // other function. Mark that. + void visitCallSite(CallSite CS) { + PI.setEscaped(CS.getInstruction()); + Base::visitCallSite(CS); + } +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_PTRUSEVISITOR_H diff --git a/clang-r353983e/include/llvm/Analysis/RegionInfo.h b/clang-r353983e/include/llvm/Analysis/RegionInfo.h new file mode 100644 index 00000000..8bcc3e85 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/RegionInfo.h @@ -0,0 +1,1032 @@ +//===- RegionInfo.h - SESE region analysis ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Calculate a program structure tree built out of single entry single exit +// regions. +// The basic ideas are taken from "The Program Structure Tree - Richard Johnson, +// David Pearson, Keshav Pingali - 1994", however enriched with ideas from "The +// Refined Process Structure Tree - Jussi Vanhatalo, Hagen Voelyer, Jana +// Koehler - 2009". +// The algorithm to calculate these data structures however is completely +// different, as it takes advantage of existing information already available +// in (Post)dominace tree and dominance frontier passes. This leads to a simpler +// and in practice hopefully better performing algorithm. The runtime of the +// algorithms described in the papers above are both linear in graph size, +// O(V+E), whereas this algorithm is not, as the dominance frontier information +// itself is not, but in practice runtime seems to be in the order of magnitude +// of dominance tree calculation. +// +// WARNING: LLVM is generally very concerned about compile time such that +// the use of additional analysis passes in the default +// optimization sequence is avoided as much as possible. +// Specifically, if you do not need the RegionInfo, but dominance +// information could be sufficient please base your work only on +// the dominator tree. Most passes maintain it, such that using +// it has often near zero cost. In contrast RegionInfo is by +// default not available, is not maintained by existing +// transformations and there is no intention to do so. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_REGIONINFO_H +#define LLVM_ANALYSIS_REGIONINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <map> +#include <memory> +#include <set> +#include <string> +#include <type_traits> +#include <vector> + +namespace llvm { + +class DominanceFrontier; +class DominatorTree; +class Loop; +class LoopInfo; +class PostDominatorTree; +class Region; +template <class RegionTr> class RegionBase; +class RegionInfo; +template <class RegionTr> class RegionInfoBase; +class RegionNode; + +// Class to be specialized for different users of RegionInfo +// (i.e. BasicBlocks or MachineBasicBlocks). This is only to avoid needing to +// pass around an unreasonable number of template parameters. +template <class FuncT_> +struct RegionTraits { + // FuncT + // BlockT + // RegionT + // RegionNodeT + // RegionInfoT + using BrokenT = typename FuncT_::UnknownRegionTypeError; +}; + +template <> +struct RegionTraits<Function> { + using FuncT = Function; + using BlockT = BasicBlock; + using RegionT = Region; + using RegionNodeT = RegionNode; + using RegionInfoT = RegionInfo; + using DomTreeT = DominatorTree; + using DomTreeNodeT = DomTreeNode; + using DomFrontierT = DominanceFrontier; + using PostDomTreeT = PostDominatorTree; + using InstT = Instruction; + using LoopT = Loop; + using LoopInfoT = LoopInfo; + + static unsigned getNumSuccessors(BasicBlock *BB) { + return BB->getTerminator()->getNumSuccessors(); + } +}; + +/// Marker class to iterate over the elements of a Region in flat mode. +/// +/// The class is used to either iterate in Flat mode or by not using it to not +/// iterate in Flat mode. During a Flat mode iteration all Regions are entered +/// and the iteration returns every BasicBlock. If the Flat mode is not +/// selected for SubRegions just one RegionNode containing the subregion is +/// returned. +template <class GraphType> +class FlatIt {}; + +/// A RegionNode represents a subregion or a BasicBlock that is part of a +/// Region. +template <class Tr> +class RegionNodeBase { + friend class RegionBase<Tr>; + +public: + using BlockT = typename Tr::BlockT; + using RegionT = typename Tr::RegionT; + +private: + /// This is the entry basic block that starts this region node. If this is a + /// BasicBlock RegionNode, then entry is just the basic block, that this + /// RegionNode represents. Otherwise it is the entry of this (Sub)RegionNode. + /// + /// In the BBtoRegionNode map of the parent of this node, BB will always map + /// to this node no matter which kind of node this one is. + /// + /// The node can hold either a Region or a BasicBlock. + /// Use one bit to save, if this RegionNode is a subregion or BasicBlock + /// RegionNode. + PointerIntPair<BlockT *, 1, bool> entry; + + /// The parent Region of this RegionNode. + /// @see getParent() + RegionT *parent; + +protected: + /// Create a RegionNode. + /// + /// @param Parent The parent of this RegionNode. + /// @param Entry The entry BasicBlock of the RegionNode. If this + /// RegionNode represents a BasicBlock, this is the + /// BasicBlock itself. If it represents a subregion, this + /// is the entry BasicBlock of the subregion. + /// @param isSubRegion If this RegionNode represents a SubRegion. + inline RegionNodeBase(RegionT *Parent, BlockT *Entry, + bool isSubRegion = false) + : entry(Entry, isSubRegion), parent(Parent) {} + +public: + RegionNodeBase(const RegionNodeBase &) = delete; + RegionNodeBase &operator=(const RegionNodeBase &) = delete; + + /// Get the parent Region of this RegionNode. + /// + /// The parent Region is the Region this RegionNode belongs to. If for + /// example a BasicBlock is element of two Regions, there exist two + /// RegionNodes for this BasicBlock. Each with the getParent() function + /// pointing to the Region this RegionNode belongs to. + /// + /// @return Get the parent Region of this RegionNode. + inline RegionT *getParent() const { return parent; } + + /// Get the entry BasicBlock of this RegionNode. + /// + /// If this RegionNode represents a BasicBlock this is just the BasicBlock + /// itself, otherwise we return the entry BasicBlock of the Subregion + /// + /// @return The entry BasicBlock of this RegionNode. + inline BlockT *getEntry() const { return entry.getPointer(); } + + /// Get the content of this RegionNode. + /// + /// This can be either a BasicBlock or a subregion. Before calling getNodeAs() + /// check the type of the content with the isSubRegion() function call. + /// + /// @return The content of this RegionNode. + template <class T> inline T *getNodeAs() const; + + /// Is this RegionNode a subregion? + /// + /// @return True if it contains a subregion. False if it contains a + /// BasicBlock. + inline bool isSubRegion() const { return entry.getInt(); } +}; + +//===----------------------------------------------------------------------===// +/// A single entry single exit Region. +/// +/// A Region is a connected subgraph of a control flow graph that has exactly +/// two connections to the remaining graph. It can be used to analyze or +/// optimize parts of the control flow graph. +/// +/// A <em> simple Region </em> is connected to the remaining graph by just two +/// edges. One edge entering the Region and another one leaving the Region. +/// +/// An <em> extended Region </em> (or just Region) is a subgraph that can be +/// transform into a simple Region. The transformation is done by adding +/// BasicBlocks that merge several entry or exit edges so that after the merge +/// just one entry and one exit edge exists. +/// +/// The \e Entry of a Region is the first BasicBlock that is passed after +/// entering the Region. It is an element of the Region. The entry BasicBlock +/// dominates all BasicBlocks in the Region. +/// +/// The \e Exit of a Region is the first BasicBlock that is passed after +/// leaving the Region. It is not an element of the Region. The exit BasicBlock, +/// postdominates all BasicBlocks in the Region. +/// +/// A <em> canonical Region </em> cannot be constructed by combining smaller +/// Regions. +/// +/// Region A is the \e parent of Region B, if B is completely contained in A. +/// +/// Two canonical Regions either do not intersect at all or one is +/// the parent of the other. +/// +/// The <em> Program Structure Tree</em> is a graph (V, E) where V is the set of +/// Regions in the control flow graph and E is the \e parent relation of these +/// Regions. +/// +/// Example: +/// +/// \verbatim +/// A simple control flow graph, that contains two regions. +/// +/// 1 +/// / | +/// 2 | +/// / \ 3 +/// 4 5 | +/// | | | +/// 6 7 8 +/// \ | / +/// \ |/ Region A: 1 -> 9 {1,2,3,4,5,6,7,8} +/// 9 Region B: 2 -> 9 {2,4,5,6,7} +/// \endverbatim +/// +/// You can obtain more examples by either calling +/// +/// <tt> "opt -regions -analyze anyprogram.ll" </tt> +/// or +/// <tt> "opt -view-regions-only anyprogram.ll" </tt> +/// +/// on any LLVM file you are interested in. +/// +/// The first call returns a textual representation of the program structure +/// tree, the second one creates a graphical representation using graphviz. +template <class Tr> +class RegionBase : public RegionNodeBase<Tr> { + friend class RegionInfoBase<Tr>; + + using FuncT = typename Tr::FuncT; + using BlockT = typename Tr::BlockT; + using RegionInfoT = typename Tr::RegionInfoT; + using RegionT = typename Tr::RegionT; + using RegionNodeT = typename Tr::RegionNodeT; + using DomTreeT = typename Tr::DomTreeT; + using LoopT = typename Tr::LoopT; + using LoopInfoT = typename Tr::LoopInfoT; + using InstT = typename Tr::InstT; + + using BlockTraits = GraphTraits<BlockT *>; + using InvBlockTraits = GraphTraits<Inverse<BlockT *>>; + using SuccIterTy = typename BlockTraits::ChildIteratorType; + using PredIterTy = typename InvBlockTraits::ChildIteratorType; + + // Information necessary to manage this Region. + RegionInfoT *RI; + DomTreeT *DT; + + // The exit BasicBlock of this region. + // (The entry BasicBlock is part of RegionNode) + BlockT *exit; + + using RegionSet = std::vector<std::unique_ptr<RegionT>>; + + // The subregions of this region. + RegionSet children; + + using BBNodeMapT = std::map<BlockT *, std::unique_ptr<RegionNodeT>>; + + // Save the BasicBlock RegionNodes that are element of this Region. + mutable BBNodeMapT BBNodeMap; + + /// Check if a BB is in this Region. This check also works + /// if the region is incorrectly built. (EXPENSIVE!) + void verifyBBInRegion(BlockT *BB) const; + + /// Walk over all the BBs of the region starting from BB and + /// verify that all reachable basic blocks are elements of the region. + /// (EXPENSIVE!) + void verifyWalk(BlockT *BB, std::set<BlockT *> *visitedBB) const; + + /// Verify if the region and its children are valid regions (EXPENSIVE!) + void verifyRegionNest() const; + +public: + /// Create a new region. + /// + /// @param Entry The entry basic block of the region. + /// @param Exit The exit basic block of the region. + /// @param RI The region info object that is managing this region. + /// @param DT The dominator tree of the current function. + /// @param Parent The surrounding region or NULL if this is a top level + /// region. + RegionBase(BlockT *Entry, BlockT *Exit, RegionInfoT *RI, DomTreeT *DT, + RegionT *Parent = nullptr); + + RegionBase(const RegionBase &) = delete; + RegionBase &operator=(const RegionBase &) = delete; + + /// Delete the Region and all its subregions. + ~RegionBase(); + + /// Get the entry BasicBlock of the Region. + /// @return The entry BasicBlock of the region. + BlockT *getEntry() const { + return RegionNodeBase<Tr>::getEntry(); + } + + /// Replace the entry basic block of the region with the new basic + /// block. + /// + /// @param BB The new entry basic block of the region. + void replaceEntry(BlockT *BB); + + /// Replace the exit basic block of the region with the new basic + /// block. + /// + /// @param BB The new exit basic block of the region. + void replaceExit(BlockT *BB); + + /// Recursively replace the entry basic block of the region. + /// + /// This function replaces the entry basic block with a new basic block. It + /// also updates all child regions that have the same entry basic block as + /// this region. + /// + /// @param NewEntry The new entry basic block. + void replaceEntryRecursive(BlockT *NewEntry); + + /// Recursively replace the exit basic block of the region. + /// + /// This function replaces the exit basic block with a new basic block. It + /// also updates all child regions that have the same exit basic block as + /// this region. + /// + /// @param NewExit The new exit basic block. + void replaceExitRecursive(BlockT *NewExit); + + /// Get the exit BasicBlock of the Region. + /// @return The exit BasicBlock of the Region, NULL if this is the TopLevel + /// Region. + BlockT *getExit() const { return exit; } + + /// Get the parent of the Region. + /// @return The parent of the Region or NULL if this is a top level + /// Region. + RegionT *getParent() const { + return RegionNodeBase<Tr>::getParent(); + } + + /// Get the RegionNode representing the current Region. + /// @return The RegionNode representing the current Region. + RegionNodeT *getNode() const { + return const_cast<RegionNodeT *>( + reinterpret_cast<const RegionNodeT *>(this)); + } + + /// Get the nesting level of this Region. + /// + /// An toplevel Region has depth 0. + /// + /// @return The depth of the region. + unsigned getDepth() const; + + /// Check if a Region is the TopLevel region. + /// + /// The toplevel region represents the whole function. + bool isTopLevelRegion() const { return exit == nullptr; } + + /// Return a new (non-canonical) region, that is obtained by joining + /// this region with its predecessors. + /// + /// @return A region also starting at getEntry(), but reaching to the next + /// basic block that forms with getEntry() a (non-canonical) region. + /// NULL if such a basic block does not exist. + RegionT *getExpandedRegion() const; + + /// Return the first block of this region's single entry edge, + /// if existing. + /// + /// @return The BasicBlock starting this region's single entry edge, + /// else NULL. + BlockT *getEnteringBlock() const; + + /// Return the first block of this region's single exit edge, + /// if existing. + /// + /// @return The BasicBlock starting this region's single exit edge, + /// else NULL. + BlockT *getExitingBlock() const; + + /// Collect all blocks of this region's single exit edge, if existing. + /// + /// @return True if this region contains all the predecessors of the exit. + bool getExitingBlocks(SmallVectorImpl<BlockT *> &Exitings) const; + + /// Is this a simple region? + /// + /// A region is simple if it has exactly one exit and one entry edge. + /// + /// @return True if the Region is simple. + bool isSimple() const; + + /// Returns the name of the Region. + /// @return The Name of the Region. + std::string getNameStr() const; + + /// Return the RegionInfo object, that belongs to this Region. + RegionInfoT *getRegionInfo() const { return RI; } + + /// PrintStyle - Print region in difference ways. + enum PrintStyle { PrintNone, PrintBB, PrintRN }; + + /// Print the region. + /// + /// @param OS The output stream the Region is printed to. + /// @param printTree Print also the tree of subregions. + /// @param level The indentation level used for printing. + void print(raw_ostream &OS, bool printTree = true, unsigned level = 0, + PrintStyle Style = PrintNone) const; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the region to stderr. + void dump() const; +#endif + + /// Check if the region contains a BasicBlock. + /// + /// @param BB The BasicBlock that might be contained in this Region. + /// @return True if the block is contained in the region otherwise false. + bool contains(const BlockT *BB) const; + + /// Check if the region contains another region. + /// + /// @param SubRegion The region that might be contained in this Region. + /// @return True if SubRegion is contained in the region otherwise false. + bool contains(const RegionT *SubRegion) const { + // Toplevel Region. + if (!getExit()) + return true; + + return contains(SubRegion->getEntry()) && + (contains(SubRegion->getExit()) || + SubRegion->getExit() == getExit()); + } + + /// Check if the region contains an Instruction. + /// + /// @param Inst The Instruction that might be contained in this region. + /// @return True if the Instruction is contained in the region otherwise + /// false. + bool contains(const InstT *Inst) const { return contains(Inst->getParent()); } + + /// Check if the region contains a loop. + /// + /// @param L The loop that might be contained in this region. + /// @return True if the loop is contained in the region otherwise false. + /// In case a NULL pointer is passed to this function the result + /// is false, except for the region that describes the whole function. + /// In that case true is returned. + bool contains(const LoopT *L) const; + + /// Get the outermost loop in the region that contains a loop. + /// + /// Find for a Loop L the outermost loop OuterL that is a parent loop of L + /// and is itself contained in the region. + /// + /// @param L The loop the lookup is started. + /// @return The outermost loop in the region, NULL if such a loop does not + /// exist or if the region describes the whole function. + LoopT *outermostLoopInRegion(LoopT *L) const; + + /// Get the outermost loop in the region that contains a basic block. + /// + /// Find for a basic block BB the outermost loop L that contains BB and is + /// itself contained in the region. + /// + /// @param LI A pointer to a LoopInfo analysis. + /// @param BB The basic block surrounded by the loop. + /// @return The outermost loop in the region, NULL if such a loop does not + /// exist or if the region describes the whole function. + LoopT *outermostLoopInRegion(LoopInfoT *LI, BlockT *BB) const; + + /// Get the subregion that starts at a BasicBlock + /// + /// @param BB The BasicBlock the subregion should start. + /// @return The Subregion if available, otherwise NULL. + RegionT *getSubRegionNode(BlockT *BB) const; + + /// Get the RegionNode for a BasicBlock + /// + /// @param BB The BasicBlock at which the RegionNode should start. + /// @return If available, the RegionNode that represents the subregion + /// starting at BB. If no subregion starts at BB, the RegionNode + /// representing BB. + RegionNodeT *getNode(BlockT *BB) const; + + /// Get the BasicBlock RegionNode for a BasicBlock + /// + /// @param BB The BasicBlock for which the RegionNode is requested. + /// @return The RegionNode representing the BB. + RegionNodeT *getBBNode(BlockT *BB) const; + + /// Add a new subregion to this Region. + /// + /// @param SubRegion The new subregion that will be added. + /// @param moveChildren Move the children of this region, that are also + /// contained in SubRegion into SubRegion. + void addSubRegion(RegionT *SubRegion, bool moveChildren = false); + + /// Remove a subregion from this Region. + /// + /// The subregion is not deleted, as it will probably be inserted into another + /// region. + /// @param SubRegion The SubRegion that will be removed. + RegionT *removeSubRegion(RegionT *SubRegion); + + /// Move all direct child nodes of this Region to another Region. + /// + /// @param To The Region the child nodes will be transferred to. + void transferChildrenTo(RegionT *To); + + /// Verify if the region is a correct region. + /// + /// Check if this is a correctly build Region. This is an expensive check, as + /// the complete CFG of the Region will be walked. + void verifyRegion() const; + + /// Clear the cache for BB RegionNodes. + /// + /// After calling this function the BasicBlock RegionNodes will be stored at + /// different memory locations. RegionNodes obtained before this function is + /// called are therefore not comparable to RegionNodes abtained afterwords. + void clearNodeCache(); + + /// @name Subregion Iterators + /// + /// These iterators iterator over all subregions of this Region. + //@{ + using iterator = typename RegionSet::iterator; + using const_iterator = typename RegionSet::const_iterator; + + iterator begin() { return children.begin(); } + iterator end() { return children.end(); } + + const_iterator begin() const { return children.begin(); } + const_iterator end() const { return children.end(); } + //@} + + /// @name BasicBlock Iterators + /// + /// These iterators iterate over all BasicBlocks that are contained in this + /// Region. The iterator also iterates over BasicBlocks that are elements of + /// a subregion of this Region. It is therefore called a flat iterator. + //@{ + template <bool IsConst> + class block_iterator_wrapper + : public df_iterator< + typename std::conditional<IsConst, const BlockT, BlockT>::type *> { + using super = + df_iterator< + typename std::conditional<IsConst, const BlockT, BlockT>::type *>; + + public: + using Self = block_iterator_wrapper<IsConst>; + using value_type = typename super::value_type; + + // Construct the begin iterator. + block_iterator_wrapper(value_type Entry, value_type Exit) + : super(df_begin(Entry)) { + // Mark the exit of the region as visited, so that the children of the + // exit and the exit itself, i.e. the block outside the region will never + // be visited. + super::Visited.insert(Exit); + } + + // Construct the end iterator. + block_iterator_wrapper() : super(df_end<value_type>((BlockT *)nullptr)) {} + + /*implicit*/ block_iterator_wrapper(super I) : super(I) {} + + // FIXME: Even a const_iterator returns a non-const BasicBlock pointer. + // This was introduced for backwards compatibility, but should + // be removed as soon as all users are fixed. + BlockT *operator*() const { + return const_cast<BlockT *>(super::operator*()); + } + }; + + using block_iterator = block_iterator_wrapper<false>; + using const_block_iterator = block_iterator_wrapper<true>; + + block_iterator block_begin() { return block_iterator(getEntry(), getExit()); } + + block_iterator block_end() { return block_iterator(); } + + const_block_iterator block_begin() const { + return const_block_iterator(getEntry(), getExit()); + } + const_block_iterator block_end() const { return const_block_iterator(); } + + using block_range = iterator_range<block_iterator>; + using const_block_range = iterator_range<const_block_iterator>; + + /// Returns a range view of the basic blocks in the region. + inline block_range blocks() { + return block_range(block_begin(), block_end()); + } + + /// Returns a range view of the basic blocks in the region. + /// + /// This is the 'const' version of the range view. + inline const_block_range blocks() const { + return const_block_range(block_begin(), block_end()); + } + //@} + + /// @name Element Iterators + /// + /// These iterators iterate over all BasicBlock and subregion RegionNodes that + /// are direct children of this Region. It does not iterate over any + /// RegionNodes that are also element of a subregion of this Region. + //@{ + using element_iterator = + df_iterator<RegionNodeT *, df_iterator_default_set<RegionNodeT *>, false, + GraphTraits<RegionNodeT *>>; + + using const_element_iterator = + df_iterator<const RegionNodeT *, + df_iterator_default_set<const RegionNodeT *>, false, + GraphTraits<const RegionNodeT *>>; + + element_iterator element_begin(); + element_iterator element_end(); + iterator_range<element_iterator> elements() { + return make_range(element_begin(), element_end()); + } + + const_element_iterator element_begin() const; + const_element_iterator element_end() const; + iterator_range<const_element_iterator> elements() const { + return make_range(element_begin(), element_end()); + } + //@} +}; + +/// Print a RegionNode. +template <class Tr> +inline raw_ostream &operator<<(raw_ostream &OS, const RegionNodeBase<Tr> &Node); + +//===----------------------------------------------------------------------===// +/// Analysis that detects all canonical Regions. +/// +/// The RegionInfo pass detects all canonical regions in a function. The Regions +/// are connected using the parent relation. This builds a Program Structure +/// Tree. +template <class Tr> +class RegionInfoBase { + friend class RegionInfo; + friend class MachineRegionInfo; + + using BlockT = typename Tr::BlockT; + using FuncT = typename Tr::FuncT; + using RegionT = typename Tr::RegionT; + using RegionInfoT = typename Tr::RegionInfoT; + using DomTreeT = typename Tr::DomTreeT; + using DomTreeNodeT = typename Tr::DomTreeNodeT; + using PostDomTreeT = typename Tr::PostDomTreeT; + using DomFrontierT = typename Tr::DomFrontierT; + using BlockTraits = GraphTraits<BlockT *>; + using InvBlockTraits = GraphTraits<Inverse<BlockT *>>; + using SuccIterTy = typename BlockTraits::ChildIteratorType; + using PredIterTy = typename InvBlockTraits::ChildIteratorType; + + using BBtoBBMap = DenseMap<BlockT *, BlockT *>; + using BBtoRegionMap = DenseMap<BlockT *, RegionT *>; + + RegionInfoBase(); + + RegionInfoBase(RegionInfoBase &&Arg) + : DT(std::move(Arg.DT)), PDT(std::move(Arg.PDT)), DF(std::move(Arg.DF)), + TopLevelRegion(std::move(Arg.TopLevelRegion)), + BBtoRegion(std::move(Arg.BBtoRegion)) { + Arg.wipe(); + } + + RegionInfoBase &operator=(RegionInfoBase &&RHS) { + DT = std::move(RHS.DT); + PDT = std::move(RHS.PDT); + DF = std::move(RHS.DF); + TopLevelRegion = std::move(RHS.TopLevelRegion); + BBtoRegion = std::move(RHS.BBtoRegion); + RHS.wipe(); + return *this; + } + + virtual ~RegionInfoBase(); + + DomTreeT *DT; + PostDomTreeT *PDT; + DomFrontierT *DF; + + /// The top level region. + RegionT *TopLevelRegion = nullptr; + + /// Map every BB to the smallest region, that contains BB. + BBtoRegionMap BBtoRegion; + +protected: + /// Update refences to a RegionInfoT held by the RegionT managed here + /// + /// This is a post-move helper. Regions hold references to the owning + /// RegionInfo object. After a move these need to be fixed. + template<typename TheRegionT> + void updateRegionTree(RegionInfoT &RI, TheRegionT *R) { + if (!R) + return; + R->RI = &RI; + for (auto &SubR : *R) + updateRegionTree(RI, SubR.get()); + } + +private: + /// Wipe this region tree's state without releasing any resources. + /// + /// This is essentially a post-move helper only. It leaves the object in an + /// assignable and destroyable state, but otherwise invalid. + void wipe() { + DT = nullptr; + PDT = nullptr; + DF = nullptr; + TopLevelRegion = nullptr; + BBtoRegion.clear(); + } + + // Check whether the entries of BBtoRegion for the BBs of region + // SR are correct. Triggers an assertion if not. Calls itself recursively for + // subregions. + void verifyBBMap(const RegionT *SR) const; + + // Returns true if BB is in the dominance frontier of + // entry, because it was inherited from exit. In the other case there is an + // edge going from entry to BB without passing exit. + bool isCommonDomFrontier(BlockT *BB, BlockT *entry, BlockT *exit) const; + + // Check if entry and exit surround a valid region, based on + // dominance tree and dominance frontier. + bool isRegion(BlockT *entry, BlockT *exit) const; + + // Saves a shortcut pointing from entry to exit. + // This function may extend this shortcut if possible. + void insertShortCut(BlockT *entry, BlockT *exit, BBtoBBMap *ShortCut) const; + + // Returns the next BB that postdominates N, while skipping + // all post dominators that cannot finish a canonical region. + DomTreeNodeT *getNextPostDom(DomTreeNodeT *N, BBtoBBMap *ShortCut) const; + + // A region is trivial, if it contains only one BB. + bool isTrivialRegion(BlockT *entry, BlockT *exit) const; + + // Creates a single entry single exit region. + RegionT *createRegion(BlockT *entry, BlockT *exit); + + // Detect all regions starting with bb 'entry'. + void findRegionsWithEntry(BlockT *entry, BBtoBBMap *ShortCut); + + // Detects regions in F. + void scanForRegions(FuncT &F, BBtoBBMap *ShortCut); + + // Get the top most parent with the same entry block. + RegionT *getTopMostParent(RegionT *region); + + // Build the region hierarchy after all region detected. + void buildRegionsTree(DomTreeNodeT *N, RegionT *region); + + // Update statistic about created regions. + virtual void updateStatistics(RegionT *R) = 0; + + // Detect all regions in function and build the region tree. + void calculate(FuncT &F); + +public: + RegionInfoBase(const RegionInfoBase &) = delete; + RegionInfoBase &operator=(const RegionInfoBase &) = delete; + + static bool VerifyRegionInfo; + static typename RegionT::PrintStyle printStyle; + + void print(raw_ostream &OS) const; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() const; +#endif + + void releaseMemory(); + + /// Get the smallest region that contains a BasicBlock. + /// + /// @param BB The basic block. + /// @return The smallest region, that contains BB or NULL, if there is no + /// region containing BB. + RegionT *getRegionFor(BlockT *BB) const; + + /// Set the smallest region that surrounds a basic block. + /// + /// @param BB The basic block surrounded by a region. + /// @param R The smallest region that surrounds BB. + void setRegionFor(BlockT *BB, RegionT *R); + + /// A shortcut for getRegionFor(). + /// + /// @param BB The basic block. + /// @return The smallest region, that contains BB or NULL, if there is no + /// region containing BB. + RegionT *operator[](BlockT *BB) const; + + /// Return the exit of the maximal refined region, that starts at a + /// BasicBlock. + /// + /// @param BB The BasicBlock the refined region starts. + BlockT *getMaxRegionExit(BlockT *BB) const; + + /// Find the smallest region that contains two regions. + /// + /// @param A The first region. + /// @param B The second region. + /// @return The smallest region containing A and B. + RegionT *getCommonRegion(RegionT *A, RegionT *B) const; + + /// Find the smallest region that contains two basic blocks. + /// + /// @param A The first basic block. + /// @param B The second basic block. + /// @return The smallest region that contains A and B. + RegionT *getCommonRegion(BlockT *A, BlockT *B) const { + return getCommonRegion(getRegionFor(A), getRegionFor(B)); + } + + /// Find the smallest region that contains a set of regions. + /// + /// @param Regions A vector of regions. + /// @return The smallest region that contains all regions in Regions. + RegionT *getCommonRegion(SmallVectorImpl<RegionT *> &Regions) const; + + /// Find the smallest region that contains a set of basic blocks. + /// + /// @param BBs A vector of basic blocks. + /// @return The smallest region that contains all basic blocks in BBS. + RegionT *getCommonRegion(SmallVectorImpl<BlockT *> &BBs) const; + + RegionT *getTopLevelRegion() const { return TopLevelRegion; } + + /// Clear the Node Cache for all Regions. + /// + /// @see Region::clearNodeCache() + void clearNodeCache() { + if (TopLevelRegion) + TopLevelRegion->clearNodeCache(); + } + + void verifyAnalysis() const; +}; + +class Region; + +class RegionNode : public RegionNodeBase<RegionTraits<Function>> { +public: + inline RegionNode(Region *Parent, BasicBlock *Entry, bool isSubRegion = false) + : RegionNodeBase<RegionTraits<Function>>(Parent, Entry, isSubRegion) {} + + bool operator==(const Region &RN) const { + return this == reinterpret_cast<const RegionNode *>(&RN); + } +}; + +class Region : public RegionBase<RegionTraits<Function>> { +public: + Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo *RI, DominatorTree *DT, + Region *Parent = nullptr); + ~Region(); + + bool operator==(const RegionNode &RN) const { + return &RN == reinterpret_cast<const RegionNode *>(this); + } +}; + +class RegionInfo : public RegionInfoBase<RegionTraits<Function>> { +public: + using Base = RegionInfoBase<RegionTraits<Function>>; + + explicit RegionInfo(); + + RegionInfo(RegionInfo &&Arg) : Base(std::move(static_cast<Base &>(Arg))) { + updateRegionTree(*this, TopLevelRegion); + } + + RegionInfo &operator=(RegionInfo &&RHS) { + Base::operator=(std::move(static_cast<Base &>(RHS))); + updateRegionTree(*this, TopLevelRegion); + return *this; + } + + ~RegionInfo() override; + + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); + + // updateStatistics - Update statistic about created regions. + void updateStatistics(Region *R) final; + + void recalculate(Function &F, DominatorTree *DT, PostDominatorTree *PDT, + DominanceFrontier *DF); + +#ifndef NDEBUG + /// Opens a viewer to show the GraphViz visualization of the regions. + /// + /// Useful during debugging as an alternative to dump(). + void view(); + + /// Opens a viewer to show the GraphViz visualization of this region + /// without instructions in the BasicBlocks. + /// + /// Useful during debugging as an alternative to dump(). + void viewOnly(); +#endif +}; + +class RegionInfoPass : public FunctionPass { + RegionInfo RI; + +public: + static char ID; + + explicit RegionInfoPass(); + ~RegionInfoPass() override; + + RegionInfo &getRegionInfo() { return RI; } + + const RegionInfo &getRegionInfo() const { return RI; } + + /// @name FunctionPass interface + //@{ + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void verifyAnalysis() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module *) const override; + void dump() const; + //@} +}; + +/// Analysis pass that exposes the \c RegionInfo for a function. +class RegionInfoAnalysis : public AnalysisInfoMixin<RegionInfoAnalysis> { + friend AnalysisInfoMixin<RegionInfoAnalysis>; + + static AnalysisKey Key; + +public: + using Result = RegionInfo; + + RegionInfo run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass for the \c RegionInfo. +class RegionInfoPrinterPass : public PassInfoMixin<RegionInfoPrinterPass> { + raw_ostream &OS; + +public: + explicit RegionInfoPrinterPass(raw_ostream &OS); + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Verifier pass for the \c RegionInfo. +struct RegionInfoVerifierPass : PassInfoMixin<RegionInfoVerifierPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +template <> +template <> +inline BasicBlock * +RegionNodeBase<RegionTraits<Function>>::getNodeAs<BasicBlock>() const { + assert(!isSubRegion() && "This is not a BasicBlock RegionNode!"); + return getEntry(); +} + +template <> +template <> +inline Region * +RegionNodeBase<RegionTraits<Function>>::getNodeAs<Region>() const { + assert(isSubRegion() && "This is not a subregion RegionNode!"); + auto Unconst = const_cast<RegionNodeBase<RegionTraits<Function>> *>(this); + return reinterpret_cast<Region *>(Unconst); +} + +template <class Tr> +inline raw_ostream &operator<<(raw_ostream &OS, + const RegionNodeBase<Tr> &Node) { + using BlockT = typename Tr::BlockT; + using RegionT = typename Tr::RegionT; + + if (Node.isSubRegion()) + return OS << Node.template getNodeAs<RegionT>()->getNameStr(); + else + return OS << Node.template getNodeAs<BlockT>()->getName(); +} + +extern template class RegionBase<RegionTraits<Function>>; +extern template class RegionNodeBase<RegionTraits<Function>>; +extern template class RegionInfoBase<RegionTraits<Function>>; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_REGIONINFO_H diff --git a/clang-r353983e/include/llvm/Analysis/RegionInfoImpl.h b/clang-r353983e/include/llvm/Analysis/RegionInfoImpl.h new file mode 100644 index 00000000..c59c09dd --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/RegionInfoImpl.h @@ -0,0 +1,931 @@ +//===- RegionInfoImpl.h - SESE region detection analysis --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Detects single entry single exit regions in the control flow graph. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_REGIONINFOIMPL_H +#define LLVM_ANALYSIS_REGIONINFOIMPL_H + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <memory> +#include <set> +#include <string> +#include <type_traits> +#include <vector> + +#define DEBUG_TYPE "region" + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// RegionBase Implementation +template <class Tr> +RegionBase<Tr>::RegionBase(BlockT *Entry, BlockT *Exit, + typename Tr::RegionInfoT *RInfo, DomTreeT *dt, + RegionT *Parent) + : RegionNodeBase<Tr>(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {} + +template <class Tr> +RegionBase<Tr>::~RegionBase() { + // Only clean the cache for this Region. Caches of child Regions will be + // cleaned when the child Regions are deleted. + BBNodeMap.clear(); +} + +template <class Tr> +void RegionBase<Tr>::replaceEntry(BlockT *BB) { + this->entry.setPointer(BB); +} + +template <class Tr> +void RegionBase<Tr>::replaceExit(BlockT *BB) { + assert(exit && "No exit to replace!"); + exit = BB; +} + +template <class Tr> +void RegionBase<Tr>::replaceEntryRecursive(BlockT *NewEntry) { + std::vector<RegionT *> RegionQueue; + BlockT *OldEntry = getEntry(); + + RegionQueue.push_back(static_cast<RegionT *>(this)); + while (!RegionQueue.empty()) { + RegionT *R = RegionQueue.back(); + RegionQueue.pop_back(); + + R->replaceEntry(NewEntry); + for (std::unique_ptr<RegionT> &Child : *R) { + if (Child->getEntry() == OldEntry) + RegionQueue.push_back(Child.get()); + } + } +} + +template <class Tr> +void RegionBase<Tr>::replaceExitRecursive(BlockT *NewExit) { + std::vector<RegionT *> RegionQueue; + BlockT *OldExit = getExit(); + + RegionQueue.push_back(static_cast<RegionT *>(this)); + while (!RegionQueue.empty()) { + RegionT *R = RegionQueue.back(); + RegionQueue.pop_back(); + + R->replaceExit(NewExit); + for (std::unique_ptr<RegionT> &Child : *R) { + if (Child->getExit() == OldExit) + RegionQueue.push_back(Child.get()); + } + } +} + +template <class Tr> +bool RegionBase<Tr>::contains(const BlockT *B) const { + BlockT *BB = const_cast<BlockT *>(B); + + if (!DT->getNode(BB)) + return false; + + BlockT *entry = getEntry(), *exit = getExit(); + + // Toplevel region. + if (!exit) + return true; + + return (DT->dominates(entry, BB) && + !(DT->dominates(exit, BB) && DT->dominates(entry, exit))); +} + +template <class Tr> +bool RegionBase<Tr>::contains(const LoopT *L) const { + // BBs that are not part of any loop are element of the Loop + // described by the NULL pointer. This loop is not part of any region, + // except if the region describes the whole function. + if (!L) + return getExit() == nullptr; + + if (!contains(L->getHeader())) + return false; + + SmallVector<BlockT *, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + for (BlockT *BB : ExitingBlocks) { + if (!contains(BB)) + return false; + } + + return true; +} + +template <class Tr> +typename Tr::LoopT *RegionBase<Tr>::outermostLoopInRegion(LoopT *L) const { + if (!contains(L)) + return nullptr; + + while (L && contains(L->getParentLoop())) { + L = L->getParentLoop(); + } + + return L; +} + +template <class Tr> +typename Tr::LoopT *RegionBase<Tr>::outermostLoopInRegion(LoopInfoT *LI, + BlockT *BB) const { + assert(LI && BB && "LI and BB cannot be null!"); + LoopT *L = LI->getLoopFor(BB); + return outermostLoopInRegion(L); +} + +template <class Tr> +typename RegionBase<Tr>::BlockT *RegionBase<Tr>::getEnteringBlock() const { + BlockT *entry = getEntry(); + BlockT *enteringBlock = nullptr; + + for (BlockT *Pred : make_range(InvBlockTraits::child_begin(entry), + InvBlockTraits::child_end(entry))) { + if (DT->getNode(Pred) && !contains(Pred)) { + if (enteringBlock) + return nullptr; + + enteringBlock = Pred; + } + } + + return enteringBlock; +} + +template <class Tr> +bool RegionBase<Tr>::getExitingBlocks( + SmallVectorImpl<BlockT *> &Exitings) const { + bool CoverAll = true; + + if (!exit) + return CoverAll; + + for (PredIterTy PI = InvBlockTraits::child_begin(exit), + PE = InvBlockTraits::child_end(exit); + PI != PE; ++PI) { + BlockT *Pred = *PI; + if (contains(Pred)) { + Exitings.push_back(Pred); + continue; + } + + CoverAll = false; + } + + return CoverAll; +} + +template <class Tr> +typename RegionBase<Tr>::BlockT *RegionBase<Tr>::getExitingBlock() const { + BlockT *exit = getExit(); + BlockT *exitingBlock = nullptr; + + if (!exit) + return nullptr; + + for (BlockT *Pred : make_range(InvBlockTraits::child_begin(exit), + InvBlockTraits::child_end(exit))) { + if (contains(Pred)) { + if (exitingBlock) + return nullptr; + + exitingBlock = Pred; + } + } + + return exitingBlock; +} + +template <class Tr> +bool RegionBase<Tr>::isSimple() const { + return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock(); +} + +template <class Tr> +std::string RegionBase<Tr>::getNameStr() const { + std::string exitName; + std::string entryName; + + if (getEntry()->getName().empty()) { + raw_string_ostream OS(entryName); + + getEntry()->printAsOperand(OS, false); + } else + entryName = getEntry()->getName(); + + if (getExit()) { + if (getExit()->getName().empty()) { + raw_string_ostream OS(exitName); + + getExit()->printAsOperand(OS, false); + } else + exitName = getExit()->getName(); + } else + exitName = "<Function Return>"; + + return entryName + " => " + exitName; +} + +template <class Tr> +void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const { + if (!contains(BB)) + report_fatal_error("Broken region found: enumerated BB not in region!"); + + BlockT *entry = getEntry(), *exit = getExit(); + + for (BlockT *Succ : + make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) { + if (!contains(Succ) && exit != Succ) + report_fatal_error("Broken region found: edges leaving the region must go " + "to the exit node!"); + } + + if (entry != BB) { + for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB), + InvBlockTraits::child_end(BB))) { + if (!contains(Pred)) + report_fatal_error("Broken region found: edges entering the region must " + "go to the entry node!"); + } + } +} + +template <class Tr> +void RegionBase<Tr>::verifyWalk(BlockT *BB, std::set<BlockT *> *visited) const { + BlockT *exit = getExit(); + + visited->insert(BB); + + verifyBBInRegion(BB); + + for (BlockT *Succ : + make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) { + if (Succ != exit && visited->find(Succ) == visited->end()) + verifyWalk(Succ, visited); + } +} + +template <class Tr> +void RegionBase<Tr>::verifyRegion() const { + // Only do verification when user wants to, otherwise this expensive check + // will be invoked by PMDataManager::verifyPreservedAnalysis when + // a regionpass (marked PreservedAll) finish. + if (!RegionInfoBase<Tr>::VerifyRegionInfo) + return; + + std::set<BlockT *> visited; + verifyWalk(getEntry(), &visited); +} + +template <class Tr> +void RegionBase<Tr>::verifyRegionNest() const { + for (const std::unique_ptr<RegionT> &R : *this) + R->verifyRegionNest(); + + verifyRegion(); +} + +template <class Tr> +typename RegionBase<Tr>::element_iterator RegionBase<Tr>::element_begin() { + return GraphTraits<RegionT *>::nodes_begin(static_cast<RegionT *>(this)); +} + +template <class Tr> +typename RegionBase<Tr>::element_iterator RegionBase<Tr>::element_end() { + return GraphTraits<RegionT *>::nodes_end(static_cast<RegionT *>(this)); +} + +template <class Tr> +typename RegionBase<Tr>::const_element_iterator +RegionBase<Tr>::element_begin() const { + return GraphTraits<const RegionT *>::nodes_begin( + static_cast<const RegionT *>(this)); +} + +template <class Tr> +typename RegionBase<Tr>::const_element_iterator +RegionBase<Tr>::element_end() const { + return GraphTraits<const RegionT *>::nodes_end( + static_cast<const RegionT *>(this)); +} + +template <class Tr> +typename Tr::RegionT *RegionBase<Tr>::getSubRegionNode(BlockT *BB) const { + using RegionT = typename Tr::RegionT; + + RegionT *R = RI->getRegionFor(BB); + + if (!R || R == this) + return nullptr; + + // If we pass the BB out of this region, that means our code is broken. + assert(contains(R) && "BB not in current region!"); + + while (contains(R->getParent()) && R->getParent() != this) + R = R->getParent(); + + if (R->getEntry() != BB) + return nullptr; + + return R; +} + +template <class Tr> +typename Tr::RegionNodeT *RegionBase<Tr>::getBBNode(BlockT *BB) const { + assert(contains(BB) && "Can get BB node out of this region!"); + + typename BBNodeMapT::const_iterator at = BBNodeMap.find(BB); + + if (at == BBNodeMap.end()) { + auto Deconst = const_cast<RegionBase<Tr> *>(this); + typename BBNodeMapT::value_type V = { + BB, + llvm::make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)}; + at = BBNodeMap.insert(std::move(V)).first; + } + return at->second.get(); +} + +template <class Tr> +typename Tr::RegionNodeT *RegionBase<Tr>::getNode(BlockT *BB) const { + assert(contains(BB) && "Can get BB node out of this region!"); + if (RegionT *Child = getSubRegionNode(BB)) + return Child->getNode(); + + return getBBNode(BB); +} + +template <class Tr> +void RegionBase<Tr>::transferChildrenTo(RegionT *To) { + for (std::unique_ptr<RegionT> &R : *this) { + R->parent = To; + To->children.push_back(std::move(R)); + } + children.clear(); +} + +template <class Tr> +void RegionBase<Tr>::addSubRegion(RegionT *SubRegion, bool moveChildren) { + assert(!SubRegion->parent && "SubRegion already has a parent!"); + assert(llvm::find_if(*this, + [&](const std::unique_ptr<RegionT> &R) { + return R.get() == SubRegion; + }) == children.end() && + "Subregion already exists!"); + + SubRegion->parent = static_cast<RegionT *>(this); + children.push_back(std::unique_ptr<RegionT>(SubRegion)); + + if (!moveChildren) + return; + + assert(SubRegion->children.empty() && + "SubRegions that contain children are not supported"); + + for (RegionNodeT *Element : elements()) { + if (!Element->isSubRegion()) { + BlockT *BB = Element->template getNodeAs<BlockT>(); + + if (SubRegion->contains(BB)) + RI->setRegionFor(BB, SubRegion); + } + } + + std::vector<std::unique_ptr<RegionT>> Keep; + for (std::unique_ptr<RegionT> &R : *this) { + if (SubRegion->contains(R.get()) && R.get() != SubRegion) { + R->parent = SubRegion; + SubRegion->children.push_back(std::move(R)); + } else + Keep.push_back(std::move(R)); + } + + children.clear(); + children.insert( + children.begin(), + std::move_iterator<typename RegionSet::iterator>(Keep.begin()), + std::move_iterator<typename RegionSet::iterator>(Keep.end())); +} + +template <class Tr> +typename Tr::RegionT *RegionBase<Tr>::removeSubRegion(RegionT *Child) { + assert(Child->parent == this && "Child is not a child of this region!"); + Child->parent = nullptr; + typename RegionSet::iterator I = + llvm::find_if(children, [&](const std::unique_ptr<RegionT> &R) { + return R.get() == Child; + }); + assert(I != children.end() && "Region does not exit. Unable to remove."); + children.erase(children.begin() + (I - begin())); + return Child; +} + +template <class Tr> +unsigned RegionBase<Tr>::getDepth() const { + unsigned Depth = 0; + + for (RegionT *R = getParent(); R != nullptr; R = R->getParent()) + ++Depth; + + return Depth; +} + +template <class Tr> +typename Tr::RegionT *RegionBase<Tr>::getExpandedRegion() const { + unsigned NumSuccessors = Tr::getNumSuccessors(exit); + + if (NumSuccessors == 0) + return nullptr; + + RegionT *R = RI->getRegionFor(exit); + + if (R->getEntry() != exit) { + for (BlockT *Pred : make_range(InvBlockTraits::child_begin(getExit()), + InvBlockTraits::child_end(getExit()))) + if (!contains(Pred)) + return nullptr; + if (Tr::getNumSuccessors(exit) == 1) + return new RegionT(getEntry(), *BlockTraits::child_begin(exit), RI, DT); + return nullptr; + } + + while (R->getParent() && R->getParent()->getEntry() == exit) + R = R->getParent(); + + for (BlockT *Pred : make_range(InvBlockTraits::child_begin(getExit()), + InvBlockTraits::child_end(getExit()))) { + if (!(contains(Pred) || R->contains(Pred))) + return nullptr; + } + + return new RegionT(getEntry(), R->getExit(), RI, DT); +} + +template <class Tr> +void RegionBase<Tr>::print(raw_ostream &OS, bool print_tree, unsigned level, + PrintStyle Style) const { + if (print_tree) + OS.indent(level * 2) << '[' << level << "] " << getNameStr(); + else + OS.indent(level * 2) << getNameStr(); + + OS << '\n'; + + if (Style != PrintNone) { + OS.indent(level * 2) << "{\n"; + OS.indent(level * 2 + 2); + + if (Style == PrintBB) { + for (const auto *BB : blocks()) + OS << BB->getName() << ", "; // TODO: remove the last "," + } else if (Style == PrintRN) { + for (const RegionNodeT *Element : elements()) { + OS << *Element << ", "; // TODO: remove the last ", + } + } + + OS << '\n'; + } + + if (print_tree) { + for (const std::unique_ptr<RegionT> &R : *this) + R->print(OS, print_tree, level + 1, Style); + } + + if (Style != PrintNone) + OS.indent(level * 2) << "} \n"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +template <class Tr> +void RegionBase<Tr>::dump() const { + print(dbgs(), true, getDepth(), RegionInfoBase<Tr>::printStyle); +} +#endif + +template <class Tr> +void RegionBase<Tr>::clearNodeCache() { + BBNodeMap.clear(); + for (std::unique_ptr<RegionT> &R : *this) + R->clearNodeCache(); +} + +//===----------------------------------------------------------------------===// +// RegionInfoBase implementation +// + +template <class Tr> +RegionInfoBase<Tr>::RegionInfoBase() = default; + +template <class Tr> +RegionInfoBase<Tr>::~RegionInfoBase() { + releaseMemory(); +} + +template <class Tr> +void RegionInfoBase<Tr>::verifyBBMap(const RegionT *R) const { + assert(R && "Re must be non-null"); + for (const typename Tr::RegionNodeT *Element : R->elements()) { + if (Element->isSubRegion()) { + const RegionT *SR = Element->template getNodeAs<RegionT>(); + verifyBBMap(SR); + } else { + BlockT *BB = Element->template getNodeAs<BlockT>(); + if (getRegionFor(BB) != R) + report_fatal_error("BB map does not match region nesting"); + } + } +} + +template <class Tr> +bool RegionInfoBase<Tr>::isCommonDomFrontier(BlockT *BB, BlockT *entry, + BlockT *exit) const { + for (BlockT *P : make_range(InvBlockTraits::child_begin(BB), + InvBlockTraits::child_end(BB))) { + if (DT->dominates(entry, P) && !DT->dominates(exit, P)) + return false; + } + + return true; +} + +template <class Tr> +bool RegionInfoBase<Tr>::isRegion(BlockT *entry, BlockT *exit) const { + assert(entry && exit && "entry and exit must not be null!"); + + using DST = typename DomFrontierT::DomSetType; + + DST *entrySuccs = &DF->find(entry)->second; + + // Exit is the header of a loop that contains the entry. In this case, + // the dominance frontier must only contain the exit. + if (!DT->dominates(entry, exit)) { + for (typename DST::iterator SI = entrySuccs->begin(), + SE = entrySuccs->end(); + SI != SE; ++SI) { + if (*SI != exit && *SI != entry) + return false; + } + + return true; + } + + DST *exitSuccs = &DF->find(exit)->second; + + // Do not allow edges leaving the region. + for (BlockT *Succ : *entrySuccs) { + if (Succ == exit || Succ == entry) + continue; + if (exitSuccs->find(Succ) == exitSuccs->end()) + return false; + if (!isCommonDomFrontier(Succ, entry, exit)) + return false; + } + + // Do not allow edges pointing into the region. + for (BlockT *Succ : *exitSuccs) { + if (DT->properlyDominates(entry, Succ) && Succ != exit) + return false; + } + + return true; +} + +template <class Tr> +void RegionInfoBase<Tr>::insertShortCut(BlockT *entry, BlockT *exit, + BBtoBBMap *ShortCut) const { + assert(entry && exit && "entry and exit must not be null!"); + + typename BBtoBBMap::iterator e = ShortCut->find(exit); + + if (e == ShortCut->end()) + // No further region at exit available. + (*ShortCut)[entry] = exit; + else { + // We found a region e that starts at exit. Therefore (entry, e->second) + // is also a region, that is larger than (entry, exit). Insert the + // larger one. + BlockT *BB = e->second; + (*ShortCut)[entry] = BB; + } +} + +template <class Tr> +typename Tr::DomTreeNodeT * +RegionInfoBase<Tr>::getNextPostDom(DomTreeNodeT *N, BBtoBBMap *ShortCut) const { + typename BBtoBBMap::iterator e = ShortCut->find(N->getBlock()); + + if (e == ShortCut->end()) + return N->getIDom(); + + return PDT->getNode(e->second)->getIDom(); +} + +template <class Tr> +bool RegionInfoBase<Tr>::isTrivialRegion(BlockT *entry, BlockT *exit) const { + assert(entry && exit && "entry and exit must not be null!"); + + unsigned num_successors = + BlockTraits::child_end(entry) - BlockTraits::child_begin(entry); + + if (num_successors <= 1 && exit == *(BlockTraits::child_begin(entry))) + return true; + + return false; +} + +template <class Tr> +typename Tr::RegionT *RegionInfoBase<Tr>::createRegion(BlockT *entry, + BlockT *exit) { + assert(entry && exit && "entry and exit must not be null!"); + + if (isTrivialRegion(entry, exit)) + return nullptr; + + RegionT *region = + new RegionT(entry, exit, static_cast<RegionInfoT *>(this), DT); + BBtoRegion.insert({entry, region}); + +#ifdef EXPENSIVE_CHECKS + region->verifyRegion(); +#else + LLVM_DEBUG(region->verifyRegion()); +#endif + + updateStatistics(region); + return region; +} + +template <class Tr> +void RegionInfoBase<Tr>::findRegionsWithEntry(BlockT *entry, + BBtoBBMap *ShortCut) { + assert(entry); + + DomTreeNodeT *N = PDT->getNode(entry); + if (!N) + return; + + RegionT *lastRegion = nullptr; + BlockT *lastExit = entry; + + // As only a BasicBlock that postdominates entry can finish a region, walk the + // post dominance tree upwards. + while ((N = getNextPostDom(N, ShortCut))) { + BlockT *exit = N->getBlock(); + + if (!exit) + break; + + if (isRegion(entry, exit)) { + RegionT *newRegion = createRegion(entry, exit); + + if (lastRegion) + newRegion->addSubRegion(lastRegion); + + lastRegion = newRegion; + lastExit = exit; + } + + // This can never be a region, so stop the search. + if (!DT->dominates(entry, exit)) + break; + } + + // Tried to create regions from entry to lastExit. Next time take a + // shortcut from entry to lastExit. + if (lastExit != entry) + insertShortCut(entry, lastExit, ShortCut); +} + +template <class Tr> +void RegionInfoBase<Tr>::scanForRegions(FuncT &F, BBtoBBMap *ShortCut) { + using FuncPtrT = typename std::add_pointer<FuncT>::type; + + BlockT *entry = GraphTraits<FuncPtrT>::getEntryNode(&F); + DomTreeNodeT *N = DT->getNode(entry); + + // Iterate over the dominance tree in post order to start with the small + // regions from the bottom of the dominance tree. If the small regions are + // detected first, detection of bigger regions is faster, as we can jump + // over the small regions. + for (auto DomNode : post_order(N)) + findRegionsWithEntry(DomNode->getBlock(), ShortCut); +} + +template <class Tr> +typename Tr::RegionT *RegionInfoBase<Tr>::getTopMostParent(RegionT *region) { + while (region->getParent()) + region = region->getParent(); + + return region; +} + +template <class Tr> +void RegionInfoBase<Tr>::buildRegionsTree(DomTreeNodeT *N, RegionT *region) { + BlockT *BB = N->getBlock(); + + // Passed region exit + while (BB == region->getExit()) + region = region->getParent(); + + typename BBtoRegionMap::iterator it = BBtoRegion.find(BB); + + // This basic block is a start block of a region. It is already in the + // BBtoRegion relation. Only the child basic blocks have to be updated. + if (it != BBtoRegion.end()) { + RegionT *newRegion = it->second; + region->addSubRegion(getTopMostParent(newRegion)); + region = newRegion; + } else { + BBtoRegion[BB] = region; + } + + for (DomTreeNodeBase<BlockT> *C : *N) { + buildRegionsTree(C, region); + } +} + +#ifdef EXPENSIVE_CHECKS +template <class Tr> +bool RegionInfoBase<Tr>::VerifyRegionInfo = true; +#else +template <class Tr> +bool RegionInfoBase<Tr>::VerifyRegionInfo = false; +#endif + +template <class Tr> +typename Tr::RegionT::PrintStyle RegionInfoBase<Tr>::printStyle = + RegionBase<Tr>::PrintNone; + +template <class Tr> +void RegionInfoBase<Tr>::print(raw_ostream &OS) const { + OS << "Region tree:\n"; + TopLevelRegion->print(OS, true, 0, printStyle); + OS << "End region tree\n"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +template <class Tr> +void RegionInfoBase<Tr>::dump() const { print(dbgs()); } +#endif + +template <class Tr> +void RegionInfoBase<Tr>::releaseMemory() { + BBtoRegion.clear(); + if (TopLevelRegion) + delete TopLevelRegion; + TopLevelRegion = nullptr; +} + +template <class Tr> +void RegionInfoBase<Tr>::verifyAnalysis() const { + // Do only verify regions if explicitely activated using EXPENSIVE_CHECKS or + // -verify-region-info + if (!RegionInfoBase<Tr>::VerifyRegionInfo) + return; + + TopLevelRegion->verifyRegionNest(); + + verifyBBMap(TopLevelRegion); +} + +// Region pass manager support. +template <class Tr> +typename Tr::RegionT *RegionInfoBase<Tr>::getRegionFor(BlockT *BB) const { + typename BBtoRegionMap::const_iterator I = BBtoRegion.find(BB); + return I != BBtoRegion.end() ? I->second : nullptr; +} + +template <class Tr> +void RegionInfoBase<Tr>::setRegionFor(BlockT *BB, RegionT *R) { + BBtoRegion[BB] = R; +} + +template <class Tr> +typename Tr::RegionT *RegionInfoBase<Tr>::operator[](BlockT *BB) const { + return getRegionFor(BB); +} + +template <class Tr> +typename RegionInfoBase<Tr>::BlockT * +RegionInfoBase<Tr>::getMaxRegionExit(BlockT *BB) const { + BlockT *Exit = nullptr; + + while (true) { + // Get largest region that starts at BB. + RegionT *R = getRegionFor(BB); + while (R && R->getParent() && R->getParent()->getEntry() == BB) + R = R->getParent(); + + // Get the single exit of BB. + if (R && R->getEntry() == BB) + Exit = R->getExit(); + else if (++BlockTraits::child_begin(BB) == BlockTraits::child_end(BB)) + Exit = *BlockTraits::child_begin(BB); + else // No single exit exists. + return Exit; + + // Get largest region that starts at Exit. + RegionT *ExitR = getRegionFor(Exit); + while (ExitR && ExitR->getParent() && + ExitR->getParent()->getEntry() == Exit) + ExitR = ExitR->getParent(); + + for (BlockT *Pred : make_range(InvBlockTraits::child_begin(Exit), + InvBlockTraits::child_end(Exit))) { + if (!R->contains(Pred) && !ExitR->contains(Pred)) + break; + } + + // This stops infinite cycles. + if (DT->dominates(Exit, BB)) + break; + + BB = Exit; + } + + return Exit; +} + +template <class Tr> +typename Tr::RegionT *RegionInfoBase<Tr>::getCommonRegion(RegionT *A, + RegionT *B) const { + assert(A && B && "One of the Regions is NULL"); + + if (A->contains(B)) + return A; + + while (!B->contains(A)) + B = B->getParent(); + + return B; +} + +template <class Tr> +typename Tr::RegionT * +RegionInfoBase<Tr>::getCommonRegion(SmallVectorImpl<RegionT *> &Regions) const { + RegionT *ret = Regions.back(); + Regions.pop_back(); + + for (RegionT *R : Regions) + ret = getCommonRegion(ret, R); + + return ret; +} + +template <class Tr> +typename Tr::RegionT * +RegionInfoBase<Tr>::getCommonRegion(SmallVectorImpl<BlockT *> &BBs) const { + RegionT *ret = getRegionFor(BBs.back()); + BBs.pop_back(); + + for (BlockT *BB : BBs) + ret = getCommonRegion(ret, getRegionFor(BB)); + + return ret; +} + +template <class Tr> +void RegionInfoBase<Tr>::calculate(FuncT &F) { + using FuncPtrT = typename std::add_pointer<FuncT>::type; + + // ShortCut a function where for every BB the exit of the largest region + // starting with BB is stored. These regions can be threated as single BBS. + // This improves performance on linear CFGs. + BBtoBBMap ShortCut; + + scanForRegions(F, &ShortCut); + BlockT *BB = GraphTraits<FuncPtrT>::getEntryNode(&F); + buildRegionsTree(DT->getNode(BB), TopLevelRegion); +} + +} // end namespace llvm + +#undef DEBUG_TYPE + +#endif // LLVM_ANALYSIS_REGIONINFOIMPL_H diff --git a/clang-r353983e/include/llvm/Analysis/RegionIterator.h b/clang-r353983e/include/llvm/Analysis/RegionIterator.h new file mode 100644 index 00000000..72bc5bbc --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/RegionIterator.h @@ -0,0 +1,359 @@ +//===- RegionIterator.h - Iterators to iteratate over Regions ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file defines the iterators to iterate over the elements of a Region. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_REGIONITERATOR_H +#define LLVM_ANALYSIS_REGIONITERATOR_H + +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/IR/CFG.h" +#include <cassert> +#include <iterator> +#include <type_traits> + +namespace llvm { + +class BasicBlock; + +//===----------------------------------------------------------------------===// +/// Hierarchical RegionNode successor iterator. +/// +/// This iterator iterates over all successors of a RegionNode. +/// +/// For a BasicBlock RegionNode it skips all BasicBlocks that are not part of +/// the parent Region. Furthermore for BasicBlocks that start a subregion, a +/// RegionNode representing the subregion is returned. +/// +/// For a subregion RegionNode there is just one successor. The RegionNode +/// representing the exit of the subregion. +template <class NodeRef, class BlockT, class RegionT> +class RNSuccIterator + : public std::iterator<std::forward_iterator_tag, NodeRef> { + using super = std::iterator<std::forward_iterator_tag, NodeRef>; + using BlockTraits = GraphTraits<BlockT *>; + using SuccIterTy = typename BlockTraits::ChildIteratorType; + + // The iterator works in two modes, bb mode or region mode. + enum ItMode { + // In BB mode it returns all successors of this BasicBlock as its + // successors. + ItBB, + // In region mode there is only one successor, thats the regionnode mapping + // to the exit block of the regionnode + ItRgBegin, // At the beginning of the regionnode successor. + ItRgEnd // At the end of the regionnode successor. + }; + + static_assert(std::is_pointer<NodeRef>::value, + "FIXME: Currently RNSuccIterator only supports NodeRef as " + "pointers due to the use of pointer-specific data structures " + "(e.g. PointerIntPair and SmallPtrSet) internally. Generalize " + "it to support non-pointer types"); + + // Use two bit to represent the mode iterator. + PointerIntPair<NodeRef, 2, ItMode> Node; + + // The block successor iterator. + SuccIterTy BItor; + + // advanceRegionSucc - A region node has only one successor. It reaches end + // once we advance it. + void advanceRegionSucc() { + assert(Node.getInt() == ItRgBegin && "Cannot advance region successor!"); + Node.setInt(ItRgEnd); + } + + NodeRef getNode() const { return Node.getPointer(); } + + // isRegionMode - Is the current iterator in region mode? + bool isRegionMode() const { return Node.getInt() != ItBB; } + + // Get the immediate successor. This function may return a Basic Block + // RegionNode or a subregion RegionNode. + NodeRef getISucc(BlockT *BB) const { + NodeRef succ; + succ = getNode()->getParent()->getNode(BB); + assert(succ && "BB not in Region or entered subregion!"); + return succ; + } + + // getRegionSucc - Return the successor basic block of a SubRegion RegionNode. + inline BlockT* getRegionSucc() const { + assert(Node.getInt() == ItRgBegin && "Cannot get the region successor!"); + return getNode()->template getNodeAs<RegionT>()->getExit(); + } + + // isExit - Is this the exit BB of the Region? + inline bool isExit(BlockT* BB) const { + return getNode()->getParent()->getExit() == BB; + } + +public: + using Self = RNSuccIterator<NodeRef, BlockT, RegionT>; + using value_type = typename super::value_type; + + /// Create begin iterator of a RegionNode. + inline RNSuccIterator(NodeRef node) + : Node(node, node->isSubRegion() ? ItRgBegin : ItBB), + BItor(BlockTraits::child_begin(node->getEntry())) { + // Skip the exit block + if (!isRegionMode()) + while (BlockTraits::child_end(node->getEntry()) != BItor && isExit(*BItor)) + ++BItor; + + if (isRegionMode() && isExit(getRegionSucc())) + advanceRegionSucc(); + } + + /// Create an end iterator. + inline RNSuccIterator(NodeRef node, bool) + : Node(node, node->isSubRegion() ? ItRgEnd : ItBB), + BItor(BlockTraits::child_end(node->getEntry())) {} + + inline bool operator==(const Self& x) const { + assert(isRegionMode() == x.isRegionMode() && "Broken iterator!"); + if (isRegionMode()) + return Node.getInt() == x.Node.getInt(); + else + return BItor == x.BItor; + } + + inline bool operator!=(const Self& x) const { return !operator==(x); } + + inline value_type operator*() const { + BlockT *BB = isRegionMode() ? getRegionSucc() : *BItor; + assert(!isExit(BB) && "Iterator out of range!"); + return getISucc(BB); + } + + inline Self& operator++() { + if(isRegionMode()) { + // The Region only has 1 successor. + advanceRegionSucc(); + } else { + // Skip the exit. + do + ++BItor; + while (BItor != BlockTraits::child_end(getNode()->getEntry()) + && isExit(*BItor)); + } + return *this; + } + + inline Self operator++(int) { + Self tmp = *this; + ++*this; + return tmp; + } +}; + +//===----------------------------------------------------------------------===// +/// Flat RegionNode iterator. +/// +/// The Flat Region iterator will iterate over all BasicBlock RegionNodes that +/// are contained in the Region and its subregions. This is close to a virtual +/// control flow graph of the Region. +template <class NodeRef, class BlockT, class RegionT> +class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT> + : public std::iterator<std::forward_iterator_tag, NodeRef> { + using super = std::iterator<std::forward_iterator_tag, NodeRef>; + using BlockTraits = GraphTraits<BlockT *>; + using SuccIterTy = typename BlockTraits::ChildIteratorType; + + NodeRef Node; + SuccIterTy Itor; + +public: + using Self = RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>; + using value_type = typename super::value_type; + + /// Create the iterator from a RegionNode. + /// + /// Note that the incoming node must be a bb node, otherwise it will trigger + /// an assertion when we try to get a BasicBlock. + inline RNSuccIterator(NodeRef node) + : Node(node), Itor(BlockTraits::child_begin(node->getEntry())) { + assert(!Node->isSubRegion() && + "Subregion node not allowed in flat iterating mode!"); + assert(Node->getParent() && "A BB node must have a parent!"); + + // Skip the exit block of the iterating region. + while (BlockTraits::child_end(Node->getEntry()) != Itor && + Node->getParent()->getExit() == *Itor) + ++Itor; + } + + /// Create an end iterator + inline RNSuccIterator(NodeRef node, bool) + : Node(node), Itor(BlockTraits::child_end(node->getEntry())) { + assert(!Node->isSubRegion() && + "Subregion node not allowed in flat iterating mode!"); + } + + inline bool operator==(const Self& x) const { + assert(Node->getParent() == x.Node->getParent() + && "Cannot compare iterators of different regions!"); + + return Itor == x.Itor && Node == x.Node; + } + + inline bool operator!=(const Self& x) const { return !operator==(x); } + + inline value_type operator*() const { + BlockT *BB = *Itor; + + // Get the iterating region. + RegionT *Parent = Node->getParent(); + + // The only case that the successor reaches out of the region is it reaches + // the exit of the region. + assert(Parent->getExit() != BB && "iterator out of range!"); + + return Parent->getBBNode(BB); + } + + inline Self& operator++() { + // Skip the exit block of the iterating region. + do + ++Itor; + while (Itor != succ_end(Node->getEntry()) + && Node->getParent()->getExit() == *Itor); + + return *this; + } + + inline Self operator++(int) { + Self tmp = *this; + ++*this; + return tmp; + } +}; + +template <class NodeRef, class BlockT, class RegionT> +inline RNSuccIterator<NodeRef, BlockT, RegionT> succ_begin(NodeRef Node) { + return RNSuccIterator<NodeRef, BlockT, RegionT>(Node); +} + +template <class NodeRef, class BlockT, class RegionT> +inline RNSuccIterator<NodeRef, BlockT, RegionT> succ_end(NodeRef Node) { + return RNSuccIterator<NodeRef, BlockT, RegionT>(Node, true); +} + +//===--------------------------------------------------------------------===// +// RegionNode GraphTraits specialization so the bbs in the region can be +// iterate by generic graph iterators. +// +// NodeT can either be region node or const region node, otherwise child_begin +// and child_end fail. + +#define RegionNodeGraphTraits(NodeT, BlockT, RegionT) \ + template <> struct GraphTraits<NodeT *> { \ + using NodeRef = NodeT *; \ + using ChildIteratorType = RNSuccIterator<NodeRef, BlockT, RegionT>; \ + static NodeRef getEntryNode(NodeRef N) { return N; } \ + static inline ChildIteratorType child_begin(NodeRef N) { \ + return RNSuccIterator<NodeRef, BlockT, RegionT>(N); \ + } \ + static inline ChildIteratorType child_end(NodeRef N) { \ + return RNSuccIterator<NodeRef, BlockT, RegionT>(N, true); \ + } \ + }; \ + template <> struct GraphTraits<FlatIt<NodeT *>> { \ + using NodeRef = NodeT *; \ + using ChildIteratorType = \ + RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>; \ + static NodeRef getEntryNode(NodeRef N) { return N; } \ + static inline ChildIteratorType child_begin(NodeRef N) { \ + return RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>(N); \ + } \ + static inline ChildIteratorType child_end(NodeRef N) { \ + return RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>(N, true); \ + } \ + } + +#define RegionGraphTraits(RegionT, NodeT) \ + template <> struct GraphTraits<RegionT *> : public GraphTraits<NodeT *> { \ + using nodes_iterator = df_iterator<NodeRef>; \ + static NodeRef getEntryNode(RegionT *R) { \ + return R->getNode(R->getEntry()); \ + } \ + static nodes_iterator nodes_begin(RegionT *R) { \ + return nodes_iterator::begin(getEntryNode(R)); \ + } \ + static nodes_iterator nodes_end(RegionT *R) { \ + return nodes_iterator::end(getEntryNode(R)); \ + } \ + }; \ + template <> \ + struct GraphTraits<FlatIt<RegionT *>> \ + : public GraphTraits<FlatIt<NodeT *>> { \ + using nodes_iterator = \ + df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false, \ + GraphTraits<FlatIt<NodeRef>>>; \ + static NodeRef getEntryNode(RegionT *R) { \ + return R->getBBNode(R->getEntry()); \ + } \ + static nodes_iterator nodes_begin(RegionT *R) { \ + return nodes_iterator::begin(getEntryNode(R)); \ + } \ + static nodes_iterator nodes_end(RegionT *R) { \ + return nodes_iterator::end(getEntryNode(R)); \ + } \ + } + +RegionNodeGraphTraits(RegionNode, BasicBlock, Region); +RegionNodeGraphTraits(const RegionNode, BasicBlock, Region); + +RegionGraphTraits(Region, RegionNode); +RegionGraphTraits(const Region, const RegionNode); + +template <> struct GraphTraits<RegionInfo*> + : public GraphTraits<FlatIt<RegionNode*>> { + using nodes_iterator = + df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false, + GraphTraits<FlatIt<NodeRef>>>; + + static NodeRef getEntryNode(RegionInfo *RI) { + return GraphTraits<FlatIt<Region*>>::getEntryNode(RI->getTopLevelRegion()); + } + + static nodes_iterator nodes_begin(RegionInfo* RI) { + return nodes_iterator::begin(getEntryNode(RI)); + } + + static nodes_iterator nodes_end(RegionInfo *RI) { + return nodes_iterator::end(getEntryNode(RI)); + } +}; + +template <> struct GraphTraits<RegionInfoPass*> + : public GraphTraits<RegionInfo *> { + using nodes_iterator = + df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false, + GraphTraits<FlatIt<NodeRef>>>; + + static NodeRef getEntryNode(RegionInfoPass *RI) { + return GraphTraits<RegionInfo*>::getEntryNode(&RI->getRegionInfo()); + } + + static nodes_iterator nodes_begin(RegionInfoPass* RI) { + return GraphTraits<RegionInfo*>::nodes_begin(&RI->getRegionInfo()); + } + + static nodes_iterator nodes_end(RegionInfoPass *RI) { + return GraphTraits<RegionInfo*>::nodes_end(&RI->getRegionInfo()); + } +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_REGIONITERATOR_H diff --git a/clang-r353983e/include/llvm/Analysis/RegionPass.h b/clang-r353983e/include/llvm/Analysis/RegionPass.h new file mode 100644 index 00000000..5b1864a3 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/RegionPass.h @@ -0,0 +1,130 @@ +//===- RegionPass.h - RegionPass class --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the RegionPass class. All region based analysis, +// optimization and transformation passes are derived from RegionPass. +// This class is implemented following the some ideas of the LoopPass.h class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_REGIONPASS_H +#define LLVM_ANALYSIS_REGIONPASS_H + +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LegacyPassManagers.h" +#include "llvm/Pass.h" +#include <deque> + +namespace llvm { + +class RGPassManager; +class Function; + +//===----------------------------------------------------------------------===// +/// A pass that runs on each Region in a function. +/// +/// RegionPass is managed by RGPassManager. +class RegionPass : public Pass { +public: + explicit RegionPass(char &pid) : Pass(PT_Region, pid) {} + + //===--------------------------------------------------------------------===// + /// @name To be implemented by every RegionPass + /// + //@{ + /// Run the pass on a specific Region + /// + /// Accessing regions not contained in the current region is not allowed. + /// + /// @param R The region this pass is run on. + /// @param RGM The RegionPassManager that manages this Pass. + /// + /// @return True if the pass modifies this Region. + virtual bool runOnRegion(Region *R, RGPassManager &RGM) = 0; + + /// Get a pass to print the LLVM IR in the region. + /// + /// @param O The output stream to print the Region. + /// @param Banner The banner to separate different printed passes. + /// + /// @return The pass to print the LLVM IR in the region. + Pass *createPrinterPass(raw_ostream &O, + const std::string &Banner) const override; + + using llvm::Pass::doInitialization; + using llvm::Pass::doFinalization; + + virtual bool doInitialization(Region *R, RGPassManager &RGM) { return false; } + virtual bool doFinalization() { return false; } + //@} + + //===--------------------------------------------------------------------===// + /// @name PassManager API + /// + //@{ + void preparePassManager(PMStack &PMS) override; + + void assignPassManager(PMStack &PMS, + PassManagerType PMT = PMT_RegionPassManager) override; + + PassManagerType getPotentialPassManagerType() const override { + return PMT_RegionPassManager; + } + //@} + +protected: + /// Optional passes call this function to check whether the pass should be + /// skipped. This is the case when optimization bisect is over the limit. + bool skipRegion(Region &R) const; +}; + +/// The pass manager to schedule RegionPasses. +class RGPassManager : public FunctionPass, public PMDataManager { + std::deque<Region*> RQ; + bool skipThisRegion; + bool redoThisRegion; + RegionInfo *RI; + Region *CurrentRegion; + +public: + static char ID; + explicit RGPassManager(); + + /// Execute all of the passes scheduled for execution. + /// + /// @return True if any of the passes modifies the function. + bool runOnFunction(Function &F) override; + + /// Pass Manager itself does not invalidate any analysis info. + /// RGPassManager needs RegionInfo. + void getAnalysisUsage(AnalysisUsage &Info) const override; + + StringRef getPassName() const override { return "Region Pass Manager"; } + + PMDataManager *getAsPMDataManager() override { return this; } + Pass *getAsPass() override { return this; } + + /// Print passes managed by this manager. + void dumpPassStructure(unsigned Offset) override; + + /// Get passes contained by this manager. + Pass *getContainedPass(unsigned N) { + assert(N < PassVector.size() && "Pass number out of range!"); + Pass *FP = static_cast<Pass *>(PassVector[N]); + return FP; + } + + PassManagerType getPassManagerType() const override { + return PMT_RegionPassManager; + } +}; + +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/RegionPrinter.h b/clang-r353983e/include/llvm/Analysis/RegionPrinter.h new file mode 100644 index 00000000..154ac35c --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/RegionPrinter.h @@ -0,0 +1,70 @@ +//===-- RegionPrinter.h - Region printer external interface -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines external functions that can be called to explicitly +// instantiate the region printer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_REGIONPRINTER_H +#define LLVM_ANALYSIS_REGIONPRINTER_H + +namespace llvm { + class FunctionPass; + class Function; + class RegionInfo; + + FunctionPass *createRegionViewerPass(); + FunctionPass *createRegionOnlyViewerPass(); + FunctionPass *createRegionPrinterPass(); + FunctionPass *createRegionOnlyPrinterPass(); + +#ifndef NDEBUG + /// Open a viewer to display the GraphViz vizualization of the analysis + /// result. + /// + /// Practical to call in the debugger. + /// Includes the instructions in each BasicBlock. + /// + /// @param RI The analysis to display. + void viewRegion(llvm::RegionInfo *RI); + + /// Analyze the regions of a function and open its GraphViz + /// visualization in a viewer. + /// + /// Useful to call in the debugger. + /// Includes the instructions in each BasicBlock. + /// The result of a new analysis may differ from the RegionInfo the pass + /// manager currently holds. + /// + /// @param F Function to analyze. + void viewRegion(const llvm::Function *F); + + /// Open a viewer to display the GraphViz vizualization of the analysis + /// result. + /// + /// Useful to call in the debugger. + /// Shows only the BasicBlock names without their instructions. + /// + /// @param RI The analysis to display. + void viewRegionOnly(llvm::RegionInfo *RI); + + /// Analyze the regions of a function and open its GraphViz + /// visualization in a viewer. + /// + /// Useful to call in the debugger. + /// Shows only the BasicBlock names without their instructions. + /// The result of a new analysis may differ from the RegionInfo the pass + /// manager currently holds. + /// + /// @param F Function to analyze. + void viewRegionOnly(const llvm::Function *F); +#endif +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/ScalarEvolution.h b/clang-r353983e/include/llvm/Analysis/ScalarEvolution.h new file mode 100644 index 00000000..f3a03511 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ScalarEvolution.h @@ -0,0 +1,2034 @@ +//===- llvm/Analysis/ScalarEvolution.h - Scalar Evolution -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The ScalarEvolution class is an LLVM pass which can be used to analyze and +// categorize scalar expressions in loops. It specializes in recognizing +// general induction variables, representing them with the abstract and opaque +// SCEV class. Given this analysis, trip counts of loops and other important +// properties can be obtained. +// +// This analysis is primarily useful for induction variable substitution and +// strength reduction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_H +#define LLVM_ANALYSIS_SCALAREVOLUTION_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <memory> +#include <utility> + +namespace llvm { + +class AssumptionCache; +class BasicBlock; +class Constant; +class ConstantInt; +class DataLayout; +class DominatorTree; +class GEPOperator; +class Instruction; +class LLVMContext; +class raw_ostream; +class ScalarEvolution; +class SCEVAddRecExpr; +class SCEVUnknown; +class StructType; +class TargetLibraryInfo; +class Type; +class Value; + +/// This class represents an analyzed expression in the program. These are +/// opaque objects that the client is not allowed to do much with directly. +/// +class SCEV : public FoldingSetNode { + friend struct FoldingSetTrait<SCEV>; + + /// A reference to an Interned FoldingSetNodeID for this node. The + /// ScalarEvolution's BumpPtrAllocator holds the data. + FoldingSetNodeIDRef FastID; + + // The SCEV baseclass this node corresponds to + const unsigned short SCEVType; + +protected: + // Estimated complexity of this node's expression tree size. + const unsigned short ExpressionSize; + + /// This field is initialized to zero and may be used in subclasses to store + /// miscellaneous information. + unsigned short SubclassData = 0; + +public: + /// NoWrapFlags are bitfield indices into SubclassData. + /// + /// Add and Mul expressions may have no-unsigned-wrap <NUW> or + /// no-signed-wrap <NSW> properties, which are derived from the IR + /// operator. NSW is a misnomer that we use to mean no signed overflow or + /// underflow. + /// + /// AddRec expressions may have a no-self-wraparound <NW> property if, in + /// the integer domain, abs(step) * max-iteration(loop) <= + /// unsigned-max(bitwidth). This means that the recurrence will never reach + /// its start value if the step is non-zero. Computing the same value on + /// each iteration is not considered wrapping, and recurrences with step = 0 + /// are trivially <NW>. <NW> is independent of the sign of step and the + /// value the add recurrence starts with. + /// + /// Note that NUW and NSW are also valid properties of a recurrence, and + /// either implies NW. For convenience, NW will be set for a recurrence + /// whenever either NUW or NSW are set. + enum NoWrapFlags { + FlagAnyWrap = 0, // No guarantee. + FlagNW = (1 << 0), // No self-wrap. + FlagNUW = (1 << 1), // No unsigned wrap. + FlagNSW = (1 << 2), // No signed wrap. + NoWrapMask = (1 << 3) - 1 + }; + + explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy, + unsigned short ExpressionSize) + : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {} + SCEV(const SCEV &) = delete; + SCEV &operator=(const SCEV &) = delete; + + unsigned getSCEVType() const { return SCEVType; } + + /// Return the LLVM type of this SCEV expression. + Type *getType() const; + + /// Return true if the expression is a constant zero. + bool isZero() const; + + /// Return true if the expression is a constant one. + bool isOne() const; + + /// Return true if the expression is a constant all-ones value. + bool isAllOnesValue() const; + + /// Return true if the specified scev is negated, but not a constant. + bool isNonConstantNegative() const; + + // Returns estimated size of the mathematical expression represented by this + // SCEV. The rules of its calculation are following: + // 1) Size of a SCEV without operands (like constants and SCEVUnknown) is 1; + // 2) Size SCEV with operands Op1, Op2, ..., OpN is calculated by formula: + // (1 + Size(Op1) + ... + Size(OpN)). + // This value gives us an estimation of time we need to traverse through this + // SCEV and all its operands recursively. We may use it to avoid performing + // heavy transformations on SCEVs of excessive size for sake of saving the + // compilation time. + unsigned short getExpressionSize() const { + return ExpressionSize; + } + + /// Print out the internal representation of this scalar to the specified + /// stream. This should really only be used for debugging purposes. + void print(raw_ostream &OS) const; + + /// This method is used for debugging. + void dump() const; +}; + +// Specialize FoldingSetTrait for SCEV to avoid needing to compute +// temporary FoldingSetNodeID values. +template <> struct FoldingSetTrait<SCEV> : DefaultFoldingSetTrait<SCEV> { + static void Profile(const SCEV &X, FoldingSetNodeID &ID) { ID = X.FastID; } + + static bool Equals(const SCEV &X, const FoldingSetNodeID &ID, unsigned IDHash, + FoldingSetNodeID &TempID) { + return ID == X.FastID; + } + + static unsigned ComputeHash(const SCEV &X, FoldingSetNodeID &TempID) { + return X.FastID.ComputeHash(); + } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const SCEV &S) { + S.print(OS); + return OS; +} + +/// An object of this class is returned by queries that could not be answered. +/// For example, if you ask for the number of iterations of a linked-list +/// traversal loop, you will get one of these. None of the standard SCEV +/// operations are valid on this class, it is just a marker. +struct SCEVCouldNotCompute : public SCEV { + SCEVCouldNotCompute(); + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S); +}; + +/// This class represents an assumption made using SCEV expressions which can +/// be checked at run-time. +class SCEVPredicate : public FoldingSetNode { + friend struct FoldingSetTrait<SCEVPredicate>; + + /// A reference to an Interned FoldingSetNodeID for this node. The + /// ScalarEvolution's BumpPtrAllocator holds the data. + FoldingSetNodeIDRef FastID; + +public: + enum SCEVPredicateKind { P_Union, P_Equal, P_Wrap }; + +protected: + SCEVPredicateKind Kind; + ~SCEVPredicate() = default; + SCEVPredicate(const SCEVPredicate &) = default; + SCEVPredicate &operator=(const SCEVPredicate &) = default; + +public: + SCEVPredicate(const FoldingSetNodeIDRef ID, SCEVPredicateKind Kind); + + SCEVPredicateKind getKind() const { return Kind; } + + /// Returns the estimated complexity of this predicate. This is roughly + /// measured in the number of run-time checks required. + virtual unsigned getComplexity() const { return 1; } + + /// Returns true if the predicate is always true. This means that no + /// assumptions were made and nothing needs to be checked at run-time. + virtual bool isAlwaysTrue() const = 0; + + /// Returns true if this predicate implies \p N. + virtual bool implies(const SCEVPredicate *N) const = 0; + + /// Prints a textual representation of this predicate with an indentation of + /// \p Depth. + virtual void print(raw_ostream &OS, unsigned Depth = 0) const = 0; + + /// Returns the SCEV to which this predicate applies, or nullptr if this is + /// a SCEVUnionPredicate. + virtual const SCEV *getExpr() const = 0; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const SCEVPredicate &P) { + P.print(OS); + return OS; +} + +// Specialize FoldingSetTrait for SCEVPredicate to avoid needing to compute +// temporary FoldingSetNodeID values. +template <> +struct FoldingSetTrait<SCEVPredicate> : DefaultFoldingSetTrait<SCEVPredicate> { + static void Profile(const SCEVPredicate &X, FoldingSetNodeID &ID) { + ID = X.FastID; + } + + static bool Equals(const SCEVPredicate &X, const FoldingSetNodeID &ID, + unsigned IDHash, FoldingSetNodeID &TempID) { + return ID == X.FastID; + } + + static unsigned ComputeHash(const SCEVPredicate &X, + FoldingSetNodeID &TempID) { + return X.FastID.ComputeHash(); + } +}; + +/// This class represents an assumption that two SCEV expressions are equal, +/// and this can be checked at run-time. +class SCEVEqualPredicate final : public SCEVPredicate { + /// We assume that LHS == RHS. + const SCEV *LHS; + const SCEV *RHS; + +public: + SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEV *LHS, + const SCEV *RHS); + + /// Implementation of the SCEVPredicate interface + bool implies(const SCEVPredicate *N) const override; + void print(raw_ostream &OS, unsigned Depth = 0) const override; + bool isAlwaysTrue() const override; + const SCEV *getExpr() const override; + + /// Returns the left hand side of the equality. + const SCEV *getLHS() const { return LHS; } + + /// Returns the right hand side of the equality. + const SCEV *getRHS() const { return RHS; } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEVPredicate *P) { + return P->getKind() == P_Equal; + } +}; + +/// This class represents an assumption made on an AddRec expression. Given an +/// affine AddRec expression {a,+,b}, we assume that it has the nssw or nusw +/// flags (defined below) in the first X iterations of the loop, where X is a +/// SCEV expression returned by getPredicatedBackedgeTakenCount). +/// +/// Note that this does not imply that X is equal to the backedge taken +/// count. This means that if we have a nusw predicate for i32 {0,+,1} with a +/// predicated backedge taken count of X, we only guarantee that {0,+,1} has +/// nusw in the first X iterations. {0,+,1} may still wrap in the loop if we +/// have more than X iterations. +class SCEVWrapPredicate final : public SCEVPredicate { +public: + /// Similar to SCEV::NoWrapFlags, but with slightly different semantics + /// for FlagNUSW. The increment is considered to be signed, and a + b + /// (where b is the increment) is considered to wrap if: + /// zext(a + b) != zext(a) + sext(b) + /// + /// If Signed is a function that takes an n-bit tuple and maps to the + /// integer domain as the tuples value interpreted as twos complement, + /// and Unsigned a function that takes an n-bit tuple and maps to the + /// integer domain as as the base two value of input tuple, then a + b + /// has IncrementNUSW iff: + /// + /// 0 <= Unsigned(a) + Signed(b) < 2^n + /// + /// The IncrementNSSW flag has identical semantics with SCEV::FlagNSW. + /// + /// Note that the IncrementNUSW flag is not commutative: if base + inc + /// has IncrementNUSW, then inc + base doesn't neccessarily have this + /// property. The reason for this is that this is used for sign/zero + /// extending affine AddRec SCEV expressions when a SCEVWrapPredicate is + /// assumed. A {base,+,inc} expression is already non-commutative with + /// regards to base and inc, since it is interpreted as: + /// (((base + inc) + inc) + inc) ... + enum IncrementWrapFlags { + IncrementAnyWrap = 0, // No guarantee. + IncrementNUSW = (1 << 0), // No unsigned with signed increment wrap. + IncrementNSSW = (1 << 1), // No signed with signed increment wrap + // (equivalent with SCEV::NSW) + IncrementNoWrapMask = (1 << 2) - 1 + }; + + /// Convenient IncrementWrapFlags manipulation methods. + LLVM_NODISCARD static SCEVWrapPredicate::IncrementWrapFlags + clearFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, + SCEVWrapPredicate::IncrementWrapFlags OffFlags) { + assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!"); + assert((OffFlags & IncrementNoWrapMask) == OffFlags && + "Invalid flags value!"); + return (SCEVWrapPredicate::IncrementWrapFlags)(Flags & ~OffFlags); + } + + LLVM_NODISCARD static SCEVWrapPredicate::IncrementWrapFlags + maskFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, int Mask) { + assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!"); + assert((Mask & IncrementNoWrapMask) == Mask && "Invalid mask value!"); + + return (SCEVWrapPredicate::IncrementWrapFlags)(Flags & Mask); + } + + LLVM_NODISCARD static SCEVWrapPredicate::IncrementWrapFlags + setFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, + SCEVWrapPredicate::IncrementWrapFlags OnFlags) { + assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!"); + assert((OnFlags & IncrementNoWrapMask) == OnFlags && + "Invalid flags value!"); + + return (SCEVWrapPredicate::IncrementWrapFlags)(Flags | OnFlags); + } + + /// Returns the set of SCEVWrapPredicate no wrap flags implied by a + /// SCEVAddRecExpr. + LLVM_NODISCARD static SCEVWrapPredicate::IncrementWrapFlags + getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE); + +private: + const SCEVAddRecExpr *AR; + IncrementWrapFlags Flags; + +public: + explicit SCEVWrapPredicate(const FoldingSetNodeIDRef ID, + const SCEVAddRecExpr *AR, + IncrementWrapFlags Flags); + + /// Returns the set assumed no overflow flags. + IncrementWrapFlags getFlags() const { return Flags; } + + /// Implementation of the SCEVPredicate interface + const SCEV *getExpr() const override; + bool implies(const SCEVPredicate *N) const override; + void print(raw_ostream &OS, unsigned Depth = 0) const override; + bool isAlwaysTrue() const override; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEVPredicate *P) { + return P->getKind() == P_Wrap; + } +}; + +/// This class represents a composition of other SCEV predicates, and is the +/// class that most clients will interact with. This is equivalent to a +/// logical "AND" of all the predicates in the union. +/// +/// NB! Unlike other SCEVPredicate sub-classes this class does not live in the +/// ScalarEvolution::Preds folding set. This is why the \c add function is sound. +class SCEVUnionPredicate final : public SCEVPredicate { +private: + using PredicateMap = + DenseMap<const SCEV *, SmallVector<const SCEVPredicate *, 4>>; + + /// Vector with references to all predicates in this union. + SmallVector<const SCEVPredicate *, 16> Preds; + + /// Maps SCEVs to predicates for quick look-ups. + PredicateMap SCEVToPreds; + +public: + SCEVUnionPredicate(); + + const SmallVectorImpl<const SCEVPredicate *> &getPredicates() const { + return Preds; + } + + /// Adds a predicate to this union. + void add(const SCEVPredicate *N); + + /// Returns a reference to a vector containing all predicates which apply to + /// \p Expr. + ArrayRef<const SCEVPredicate *> getPredicatesForExpr(const SCEV *Expr); + + /// Implementation of the SCEVPredicate interface + bool isAlwaysTrue() const override; + bool implies(const SCEVPredicate *N) const override; + void print(raw_ostream &OS, unsigned Depth) const override; + const SCEV *getExpr() const override; + + /// We estimate the complexity of a union predicate as the size number of + /// predicates in the union. + unsigned getComplexity() const override { return Preds.size(); } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEVPredicate *P) { + return P->getKind() == P_Union; + } +}; + +struct ExitLimitQuery { + ExitLimitQuery(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates) + : L(L), ExitingBlock(ExitingBlock), AllowPredicates(AllowPredicates) {} + + const Loop *L; + BasicBlock *ExitingBlock; + bool AllowPredicates; +}; + +template <> struct DenseMapInfo<ExitLimitQuery> { + static inline ExitLimitQuery getEmptyKey() { + return ExitLimitQuery(nullptr, nullptr, true); + } + + static inline ExitLimitQuery getTombstoneKey() { + return ExitLimitQuery(nullptr, nullptr, false); + } + + static unsigned getHashValue(ExitLimitQuery Val) { + return hash_combine(hash_combine(Val.L, Val.ExitingBlock), + Val.AllowPredicates); + } + + static bool isEqual(ExitLimitQuery LHS, ExitLimitQuery RHS) { + return LHS.L == RHS.L && LHS.ExitingBlock == RHS.ExitingBlock && + LHS.AllowPredicates == RHS.AllowPredicates; + } +}; + +/// The main scalar evolution driver. Because client code (intentionally) +/// can't do much with the SCEV objects directly, they must ask this class +/// for services. +class ScalarEvolution { +public: + /// An enum describing the relationship between a SCEV and a loop. + enum LoopDisposition { + LoopVariant, ///< The SCEV is loop-variant (unknown). + LoopInvariant, ///< The SCEV is loop-invariant. + LoopComputable ///< The SCEV varies predictably with the loop. + }; + + /// An enum describing the relationship between a SCEV and a basic block. + enum BlockDisposition { + DoesNotDominateBlock, ///< The SCEV does not dominate the block. + DominatesBlock, ///< The SCEV dominates the block. + ProperlyDominatesBlock ///< The SCEV properly dominates the block. + }; + + /// Convenient NoWrapFlags manipulation that hides enum casts and is + /// visible in the ScalarEvolution name space. + LLVM_NODISCARD static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, + int Mask) { + return (SCEV::NoWrapFlags)(Flags & Mask); + } + LLVM_NODISCARD static SCEV::NoWrapFlags setFlags(SCEV::NoWrapFlags Flags, + SCEV::NoWrapFlags OnFlags) { + return (SCEV::NoWrapFlags)(Flags | OnFlags); + } + LLVM_NODISCARD static SCEV::NoWrapFlags + clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags) { + return (SCEV::NoWrapFlags)(Flags & ~OffFlags); + } + + ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, + DominatorTree &DT, LoopInfo &LI); + ScalarEvolution(ScalarEvolution &&Arg); + ~ScalarEvolution(); + + LLVMContext &getContext() const { return F.getContext(); } + + /// Test if values of the given type are analyzable within the SCEV + /// framework. This primarily includes integer types, and it can optionally + /// include pointer types if the ScalarEvolution class has access to + /// target-specific information. + bool isSCEVable(Type *Ty) const; + + /// Return the size in bits of the specified type, for which isSCEVable must + /// return true. + uint64_t getTypeSizeInBits(Type *Ty) const; + + /// Return a type with the same bitwidth as the given type and which + /// represents how SCEV will treat the given type, for which isSCEVable must + /// return true. For pointer types, this is the pointer-sized integer type. + Type *getEffectiveSCEVType(Type *Ty) const; + + // Returns a wider type among {Ty1, Ty2}. + Type *getWiderType(Type *Ty1, Type *Ty2) const; + + /// Return true if the SCEV is a scAddRecExpr or it contains + /// scAddRecExpr. The result will be cached in HasRecMap. + bool containsAddRecurrence(const SCEV *S); + + /// Erase Value from ValueExprMap and ExprValueMap. + void eraseValueFromMap(Value *V); + + /// Return a SCEV expression for the full generality of the specified + /// expression. + const SCEV *getSCEV(Value *V); + + const SCEV *getConstant(ConstantInt *V); + const SCEV *getConstant(const APInt &Val); + const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); + const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty); + const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); + const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); + const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { + SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; + return getAddExpr(Ops, Flags, Depth); + } + const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { + SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2}; + return getAddExpr(Ops, Flags, Depth); + } + const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); + const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { + SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; + return getMulExpr(Ops, Flags, Depth); + } + const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { + SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2}; + return getMulExpr(Ops, Flags, Depth); + } + const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUDivExactExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getURemExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, + SCEV::NoWrapFlags Flags); + const SCEV *getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, + const Loop *L, SCEV::NoWrapFlags Flags); + const SCEV *getAddRecExpr(const SmallVectorImpl<const SCEV *> &Operands, + const Loop *L, SCEV::NoWrapFlags Flags) { + SmallVector<const SCEV *, 4> NewOp(Operands.begin(), Operands.end()); + return getAddRecExpr(NewOp, L, Flags); + } + + /// Checks if \p SymbolicPHI can be rewritten as an AddRecExpr under some + /// Predicates. If successful return these <AddRecExpr, Predicates>; + /// The function is intended to be called from PSCEV (the caller will decide + /// whether to actually add the predicates and carry out the rewrites). + Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> + createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI); + + /// Returns an expression for a GEP + /// + /// \p GEP The GEP. The indices contained in the GEP itself are ignored, + /// instead we use IndexExprs. + /// \p IndexExprs The expressions for the indices. + const SCEV *getGEPExpr(GEPOperator *GEP, + const SmallVectorImpl<const SCEV *> &IndexExprs); + const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands); + const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUMaxExpr(SmallVectorImpl<const SCEV *> &Operands); + const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getSMinExpr(SmallVectorImpl<const SCEV *> &Operands); + const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUMinExpr(SmallVectorImpl<const SCEV *> &Operands); + const SCEV *getUnknown(Value *V); + const SCEV *getCouldNotCompute(); + + /// Return a SCEV for the constant 0 of a specific type. + const SCEV *getZero(Type *Ty) { return getConstant(Ty, 0); } + + /// Return a SCEV for the constant 1 of a specific type. + const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); } + + /// Return an expression for sizeof AllocTy that is type IntTy + const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy); + + /// Return an expression for offsetof on the given field with type IntTy + const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo); + + /// Return the SCEV object corresponding to -V. + const SCEV *getNegativeSCEV(const SCEV *V, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); + + /// Return the SCEV object corresponding to ~V. + const SCEV *getNotSCEV(const SCEV *V); + + /// Return LHS-RHS. Minus is represented in SCEV as A+B*-1. + const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is zero extended. + const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is sign extended. + const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is zero extended. The + /// conversion must not be narrowing. + const SCEV *getNoopOrZeroExtend(const SCEV *V, Type *Ty); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is sign extended. The + /// conversion must not be narrowing. + const SCEV *getNoopOrSignExtend(const SCEV *V, Type *Ty); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is extended with + /// unspecified bits. The conversion must not be narrowing. + const SCEV *getNoopOrAnyExtend(const SCEV *V, Type *Ty); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. The conversion must not be widening. + const SCEV *getTruncateOrNoop(const SCEV *V, Type *Ty); + + /// Promote the operands to the wider of the types using zero-extension, and + /// then perform a umax operation with them. + const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS); + + /// Promote the operands to the wider of the types using zero-extension, and + /// then perform a umin operation with them. + const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS); + + /// Promote the operands to the wider of the types using zero-extension, and + /// then perform a umin operation with them. N-ary function. + const SCEV *getUMinFromMismatchedTypes(SmallVectorImpl<const SCEV *> &Ops); + + /// Transitively follow the chain of pointer-type operands until reaching a + /// SCEV that does not have a single pointer operand. This returns a + /// SCEVUnknown pointer for well-formed pointer-type expressions, but corner + /// cases do exist. + const SCEV *getPointerBase(const SCEV *V); + + /// Return a SCEV expression for the specified value at the specified scope + /// in the program. The L value specifies a loop nest to evaluate the + /// expression at, where null is the top-level or a specified loop is + /// immediately inside of the loop. + /// + /// This method can be used to compute the exit value for a variable defined + /// in a loop by querying what the value will hold in the parent loop. + /// + /// In the case that a relevant loop exit value cannot be computed, the + /// original value V is returned. + const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L); + + /// This is a convenience function which does getSCEVAtScope(getSCEV(V), L). + const SCEV *getSCEVAtScope(Value *V, const Loop *L); + + /// Test whether entry to the loop is protected by a conditional between LHS + /// and RHS. This is used to help avoid max expressions in loop trip + /// counts, and to eliminate casts. + bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + + /// Test whether the backedge of the loop is protected by a conditional + /// between LHS and RHS. This is used to eliminate casts. + bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + + /// Returns the maximum trip count of the loop if it is a single-exit + /// loop and we can compute a small maximum for that loop. + /// + /// Implemented in terms of the \c getSmallConstantTripCount overload with + /// the single exiting block passed to it. See that routine for details. + unsigned getSmallConstantTripCount(const Loop *L); + + /// Returns the maximum trip count of this loop as a normal unsigned + /// value. Returns 0 if the trip count is unknown or not constant. This + /// "trip count" assumes that control exits via ExitingBlock. More + /// precisely, it is the number of times that control may reach ExitingBlock + /// before taking the branch. For loops with multiple exits, it may not be + /// the number times that the loop header executes if the loop exits + /// prematurely via another branch. + unsigned getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock); + + /// Returns the upper bound of the loop trip count as a normal unsigned + /// value. + /// Returns 0 if the trip count is unknown or not constant. + unsigned getSmallConstantMaxTripCount(const Loop *L); + + /// Returns the largest constant divisor of the trip count of the + /// loop if it is a single-exit loop and we can compute a small maximum for + /// that loop. + /// + /// Implemented in terms of the \c getSmallConstantTripMultiple overload with + /// the single exiting block passed to it. See that routine for details. + unsigned getSmallConstantTripMultiple(const Loop *L); + + /// Returns the largest constant divisor of the trip count of this loop as a + /// normal unsigned value, if possible. This means that the actual trip + /// count is always a multiple of the returned value (don't forget the trip + /// count could very well be zero as well!). As explained in the comments + /// for getSmallConstantTripCount, this assumes that control exits the loop + /// via ExitingBlock. + unsigned getSmallConstantTripMultiple(const Loop *L, + BasicBlock *ExitingBlock); + + /// Get the expression for the number of loop iterations for which this loop + /// is guaranteed not to exit via ExitingBlock. Otherwise return + /// SCEVCouldNotCompute. + const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock); + + /// If the specified loop has a predictable backedge-taken count, return it, + /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count is + /// the number of times the loop header will be branched to from within the + /// loop, assuming there are no abnormal exists like exception throws. This is + /// one less than the trip count of the loop, since it doesn't count the first + /// iteration, when the header is branched to from outside the loop. + /// + /// Note that it is not valid to call this method on a loop without a + /// loop-invariant backedge-taken count (see + /// hasLoopInvariantBackedgeTakenCount). + const SCEV *getBackedgeTakenCount(const Loop *L); + + /// Similar to getBackedgeTakenCount, except it will add a set of + /// SCEV predicates to Predicates that are required to be true in order for + /// the answer to be correct. Predicates can be checked with run-time + /// checks and can be used to perform loop versioning. + const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Predicates); + + /// When successful, this returns a SCEVConstant that is greater than or equal + /// to (i.e. a "conservative over-approximation") of the value returend by + /// getBackedgeTakenCount. If such a value cannot be computed, it returns the + /// SCEVCouldNotCompute object. + const SCEV *getMaxBackedgeTakenCount(const Loop *L); + + /// Return true if the backedge taken count is either the value returned by + /// getMaxBackedgeTakenCount or zero. + bool isBackedgeTakenCountMaxOrZero(const Loop *L); + + /// Return true if the specified loop has an analyzable loop-invariant + /// backedge-taken count. + bool hasLoopInvariantBackedgeTakenCount(const Loop *L); + + /// This method should be called by the client when it has changed a loop in + /// a way that may effect ScalarEvolution's ability to compute a trip count, + /// or if the loop is deleted. This call is potentially expensive for large + /// loop bodies. + void forgetLoop(const Loop *L); + + // This method invokes forgetLoop for the outermost loop of the given loop + // \p L, making ScalarEvolution forget about all this subtree. This needs to + // be done whenever we make a transform that may affect the parameters of the + // outer loop, such as exit counts for branches. + void forgetTopmostLoop(const Loop *L); + + /// This method should be called by the client when it has changed a value + /// in a way that may effect its value, or which may disconnect it from a + /// def-use chain linking it to a loop. + void forgetValue(Value *V); + + /// Called when the client has changed the disposition of values in + /// this loop. + /// + /// We don't have a way to invalidate per-loop dispositions. Clear and + /// recompute is simpler. + void forgetLoopDispositions(const Loop *L) { LoopDispositions.clear(); } + + /// Determine the minimum number of zero bits that S is guaranteed to end in + /// (at every loop iteration). It is, at the same time, the minimum number + /// of times S is divisible by 2. For example, given {4,+,8} it returns 2. + /// If S is guaranteed to be 0, it returns the bitwidth of S. + uint32_t GetMinTrailingZeros(const SCEV *S); + + /// Determine the unsigned range for a particular SCEV. + /// NOTE: This returns a copy of the reference returned by getRangeRef. + ConstantRange getUnsignedRange(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_UNSIGNED); + } + + /// Determine the min of the unsigned range for a particular SCEV. + APInt getUnsignedRangeMin(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_UNSIGNED).getUnsignedMin(); + } + + /// Determine the max of the unsigned range for a particular SCEV. + APInt getUnsignedRangeMax(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_UNSIGNED).getUnsignedMax(); + } + + /// Determine the signed range for a particular SCEV. + /// NOTE: This returns a copy of the reference returned by getRangeRef. + ConstantRange getSignedRange(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_SIGNED); + } + + /// Determine the min of the signed range for a particular SCEV. + APInt getSignedRangeMin(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_SIGNED).getSignedMin(); + } + + /// Determine the max of the signed range for a particular SCEV. + APInt getSignedRangeMax(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_SIGNED).getSignedMax(); + } + + /// Test if the given expression is known to be negative. + bool isKnownNegative(const SCEV *S); + + /// Test if the given expression is known to be positive. + bool isKnownPositive(const SCEV *S); + + /// Test if the given expression is known to be non-negative. + bool isKnownNonNegative(const SCEV *S); + + /// Test if the given expression is known to be non-positive. + bool isKnownNonPositive(const SCEV *S); + + /// Test if the given expression is known to be non-zero. + bool isKnownNonZero(const SCEV *S); + + /// Splits SCEV expression \p S into two SCEVs. One of them is obtained from + /// \p S by substitution of all AddRec sub-expression related to loop \p L + /// with initial value of that SCEV. The second is obtained from \p S by + /// substitution of all AddRec sub-expressions related to loop \p L with post + /// increment of this AddRec in the loop \p L. In both cases all other AddRec + /// sub-expressions (not related to \p L) remain the same. + /// If the \p S contains non-invariant unknown SCEV the function returns + /// CouldNotCompute SCEV in both values of std::pair. + /// For example, for SCEV S={0, +, 1}<L1> + {0, +, 1}<L2> and loop L=L1 + /// the function returns pair: + /// first = {0, +, 1}<L2> + /// second = {1, +, 1}<L1> + {0, +, 1}<L2> + /// We can see that for the first AddRec sub-expression it was replaced with + /// 0 (initial value) for the first element and to {1, +, 1}<L1> (post + /// increment value) for the second one. In both cases AddRec expression + /// related to L2 remains the same. + std::pair<const SCEV *, const SCEV *> SplitIntoInitAndPostInc(const Loop *L, + const SCEV *S); + + /// We'd like to check the predicate on every iteration of the most dominated + /// loop between loops used in LHS and RHS. + /// To do this we use the following list of steps: + /// 1. Collect set S all loops on which either LHS or RHS depend. + /// 2. If S is non-empty + /// a. Let PD be the element of S which is dominated by all other elements. + /// b. Let E(LHS) be value of LHS on entry of PD. + /// To get E(LHS), we should just take LHS and replace all AddRecs that are + /// attached to PD on with their entry values. + /// Define E(RHS) in the same way. + /// c. Let B(LHS) be value of L on backedge of PD. + /// To get B(LHS), we should just take LHS and replace all AddRecs that are + /// attached to PD on with their backedge values. + /// Define B(RHS) in the same way. + /// d. Note that E(LHS) and E(RHS) are automatically available on entry of PD, + /// so we can assert on that. + /// e. Return true if isLoopEntryGuardedByCond(Pred, E(LHS), E(RHS)) && + /// isLoopBackedgeGuardedByCond(Pred, B(LHS), B(RHS)) + bool isKnownViaInduction(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS); + + /// Test if the given expression is known to satisfy the condition described + /// by Pred, LHS, and RHS. + bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS); + + /// Test if the condition described by Pred, LHS, RHS is known to be true on + /// every iteration of the loop of the recurrency LHS. + bool isKnownOnEveryIteration(ICmpInst::Predicate Pred, + const SCEVAddRecExpr *LHS, const SCEV *RHS); + + /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X" + /// is monotonically increasing or decreasing. In the former case set + /// `Increasing` to true and in the latter case set `Increasing` to false. + /// + /// A predicate is said to be monotonically increasing if may go from being + /// false to being true as the loop iterates, but never the other way + /// around. A predicate is said to be monotonically decreasing if may go + /// from being true to being false as the loop iterates, but never the other + /// way around. + bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, + bool &Increasing); + + /// Return true if the result of the predicate LHS `Pred` RHS is loop + /// invariant with respect to L. Set InvariantPred, InvariantLHS and + /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the + /// loop invariant form of LHS `Pred` RHS. + bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, const Loop *L, + ICmpInst::Predicate &InvariantPred, + const SCEV *&InvariantLHS, + const SCEV *&InvariantRHS); + + /// Simplify LHS and RHS in a comparison with predicate Pred. Return true + /// iff any changes were made. If the operands are provably equal or + /// unequal, LHS and RHS are set to the same value and Pred is set to either + /// ICMP_EQ or ICMP_NE. + bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS, + const SCEV *&RHS, unsigned Depth = 0); + + /// Return the "disposition" of the given SCEV with respect to the given + /// loop. + LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L); + + /// Return true if the value of the given SCEV is unchanging in the + /// specified loop. + bool isLoopInvariant(const SCEV *S, const Loop *L); + + /// Determine if the SCEV can be evaluated at loop's entry. It is true if it + /// doesn't depend on a SCEVUnknown of an instruction which is dominated by + /// the header of loop L. + bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L); + + /// Return true if the given SCEV changes value in a known way in the + /// specified loop. This property being true implies that the value is + /// variant in the loop AND that we can emit an expression to compute the + /// value of the expression at any particular loop iteration. + bool hasComputableLoopEvolution(const SCEV *S, const Loop *L); + + /// Return the "disposition" of the given SCEV with respect to the given + /// block. + BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB); + + /// Return true if elements that makes up the given SCEV dominate the + /// specified basic block. + bool dominates(const SCEV *S, const BasicBlock *BB); + + /// Return true if elements that makes up the given SCEV properly dominate + /// the specified basic block. + bool properlyDominates(const SCEV *S, const BasicBlock *BB); + + /// Test whether the given SCEV has Op as a direct or indirect operand. + bool hasOperand(const SCEV *S, const SCEV *Op) const; + + /// Return the size of an element read or written by Inst. + const SCEV *getElementSize(Instruction *Inst); + + /// Compute the array dimensions Sizes from the set of Terms extracted from + /// the memory access function of this SCEVAddRecExpr (second step of + /// delinearization). + void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize); + + void print(raw_ostream &OS) const; + void verify() const; + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + + /// Collect parametric terms occurring in step expressions (first step of + /// delinearization). + void collectParametricTerms(const SCEV *Expr, + SmallVectorImpl<const SCEV *> &Terms); + + /// Return in Subscripts the access functions for each dimension in Sizes + /// (third step of delinearization). + void computeAccessFunctions(const SCEV *Expr, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes); + + /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the + /// subscripts and sizes of an array access. + /// + /// The delinearization is a 3 step process: the first two steps compute the + /// sizes of each subscript and the third step computes the access functions + /// for the delinearized array: + /// + /// 1. Find the terms in the step functions + /// 2. Compute the array size + /// 3. Compute the access function: divide the SCEV by the array size + /// starting with the innermost dimensions found in step 2. The Quotient + /// is the SCEV to be divided in the next step of the recursion. The + /// Remainder is the subscript of the innermost dimension. Loop over all + /// array dimensions computed in step 2. + /// + /// To compute a uniform array size for several memory accesses to the same + /// object, one can collect in step 1 all the step terms for all the memory + /// accesses, and compute in step 2 a unique array shape. This guarantees + /// that the array shape will be the same across all memory accesses. + /// + /// FIXME: We could derive the result of steps 1 and 2 from a description of + /// the array shape given in metadata. + /// + /// Example: + /// + /// A[][n][m] + /// + /// for i + /// for j + /// for k + /// A[j+k][2i][5i] = + /// + /// The initial SCEV: + /// + /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k] + /// + /// 1. Find the different terms in the step functions: + /// -> [2*m, 5, n*m, n*m] + /// + /// 2. Compute the array size: sort and unique them + /// -> [n*m, 2*m, 5] + /// find the GCD of all the terms = 1 + /// divide by the GCD and erase constant terms + /// -> [n*m, 2*m] + /// GCD = m + /// divide by GCD -> [n, 2] + /// remove constant terms + /// -> [n] + /// size of the array is A[unknown][n][m] + /// + /// 3. Compute the access function + /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m + /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k + /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k + /// The remainder is the subscript of the innermost array dimension: [5i]. + /// + /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n + /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k + /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k + /// The Remainder is the subscript of the next array dimension: [2i]. + /// + /// The subscript of the outermost dimension is the Quotient: [j+k]. + /// + /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i]. + void delinearize(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize); + + /// Return the DataLayout associated with the module this SCEV instance is + /// operating on. + const DataLayout &getDataLayout() const { + return F.getParent()->getDataLayout(); + } + + const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS); + + const SCEVPredicate * + getWrapPredicate(const SCEVAddRecExpr *AR, + SCEVWrapPredicate::IncrementWrapFlags AddedFlags); + + /// Re-writes the SCEV according to the Predicates in \p A. + const SCEV *rewriteUsingPredicate(const SCEV *S, const Loop *L, + SCEVUnionPredicate &A); + /// Tries to convert the \p S expression to an AddRec expression, + /// adding additional predicates to \p Preds as required. + const SCEVAddRecExpr *convertSCEVToAddRecWithPredicates( + const SCEV *S, const Loop *L, + SmallPtrSetImpl<const SCEVPredicate *> &Preds); + +private: + /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a + /// Value is deleted. + class SCEVCallbackVH final : public CallbackVH { + ScalarEvolution *SE; + + void deleted() override; + void allUsesReplacedWith(Value *New) override; + + public: + SCEVCallbackVH(Value *V, ScalarEvolution *SE = nullptr); + }; + + friend class SCEVCallbackVH; + friend class SCEVExpander; + friend class SCEVUnknown; + + /// The function we are analyzing. + Function &F; + + /// Does the module have any calls to the llvm.experimental.guard intrinsic + /// at all? If this is false, we avoid doing work that will only help if + /// thare are guards present in the IR. + bool HasGuards; + + /// The target library information for the target we are targeting. + TargetLibraryInfo &TLI; + + /// The tracker for \@llvm.assume intrinsics in this function. + AssumptionCache &AC; + + /// The dominator tree. + DominatorTree &DT; + + /// The loop information for the function we are currently analyzing. + LoopInfo &LI; + + /// This SCEV is used to represent unknown trip counts and things. + std::unique_ptr<SCEVCouldNotCompute> CouldNotCompute; + + /// The type for HasRecMap. + using HasRecMapType = DenseMap<const SCEV *, bool>; + + /// This is a cache to record whether a SCEV contains any scAddRecExpr. + HasRecMapType HasRecMap; + + /// The type for ExprValueMap. + using ValueOffsetPair = std::pair<Value *, ConstantInt *>; + using ExprValueMapType = DenseMap<const SCEV *, SetVector<ValueOffsetPair>>; + + /// ExprValueMap -- This map records the original values from which + /// the SCEV expr is generated from. + /// + /// We want to represent the mapping as SCEV -> ValueOffsetPair instead + /// of SCEV -> Value: + /// Suppose we know S1 expands to V1, and + /// S1 = S2 + C_a + /// S3 = S2 + C_b + /// where C_a and C_b are different SCEVConstants. Then we'd like to + /// expand S3 as V1 - C_a + C_b instead of expanding S2 literally. + /// It is helpful when S2 is a complex SCEV expr. + /// + /// In order to do that, we represent ExprValueMap as a mapping from + /// SCEV to ValueOffsetPair. We will save both S1->{V1, 0} and + /// S2->{V1, C_a} into the map when we create SCEV for V1. When S3 + /// is expanded, it will first expand S2 to V1 - C_a because of + /// S2->{V1, C_a} in the map, then expand S3 to V1 - C_a + C_b. + /// + /// Note: S->{V, Offset} in the ExprValueMap means S can be expanded + /// to V - Offset. + ExprValueMapType ExprValueMap; + + /// The type for ValueExprMap. + using ValueExprMapType = + DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *>>; + + /// This is a cache of the values we have analyzed so far. + ValueExprMapType ValueExprMap; + + /// Mark predicate values currently being processed by isImpliedCond. + SmallPtrSet<Value *, 6> PendingLoopPredicates; + + /// Mark SCEVUnknown Phis currently being processed by getRangeRef. + SmallPtrSet<const PHINode *, 6> PendingPhiRanges; + + // Mark SCEVUnknown Phis currently being processed by isImpliedViaMerge. + SmallPtrSet<const PHINode *, 6> PendingMerges; + + /// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of + /// conditions dominating the backedge of a loop. + bool WalkingBEDominatingConds = false; + + /// Set to true by isKnownPredicateViaSplitting when we're trying to prove a + /// predicate by splitting it into a set of independent predicates. + bool ProvingSplitPredicate = false; + + /// Memoized values for the GetMinTrailingZeros + DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache; + + /// Return the Value set from which the SCEV expr is generated. + SetVector<ValueOffsetPair> *getSCEVValues(const SCEV *S); + + /// Private helper method for the GetMinTrailingZeros method + uint32_t GetMinTrailingZerosImpl(const SCEV *S); + + /// Information about the number of loop iterations for which a loop exit's + /// branch condition evaluates to the not-taken path. This is a temporary + /// pair of exact and max expressions that are eventually summarized in + /// ExitNotTakenInfo and BackedgeTakenInfo. + struct ExitLimit { + const SCEV *ExactNotTaken; // The exit is not taken exactly this many times + const SCEV *MaxNotTaken; // The exit is not taken at most this many times + + // Not taken either exactly MaxNotTaken or zero times + bool MaxOrZero = false; + + /// A set of predicate guards for this ExitLimit. The result is only valid + /// if all of the predicates in \c Predicates evaluate to 'true' at + /// run-time. + SmallPtrSet<const SCEVPredicate *, 4> Predicates; + + void addPredicate(const SCEVPredicate *P) { + assert(!isa<SCEVUnionPredicate>(P) && "Only add leaf predicates here!"); + Predicates.insert(P); + } + + /*implicit*/ ExitLimit(const SCEV *E); + + ExitLimit( + const SCEV *E, const SCEV *M, bool MaxOrZero, + ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList); + + ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero, + const SmallPtrSetImpl<const SCEVPredicate *> &PredSet); + + ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero); + + /// Test whether this ExitLimit contains any computed information, or + /// whether it's all SCEVCouldNotCompute values. + bool hasAnyInfo() const { + return !isa<SCEVCouldNotCompute>(ExactNotTaken) || + !isa<SCEVCouldNotCompute>(MaxNotTaken); + } + + bool hasOperand(const SCEV *S) const; + + /// Test whether this ExitLimit contains all information. + bool hasFullInfo() const { + return !isa<SCEVCouldNotCompute>(ExactNotTaken); + } + }; + + /// Information about the number of times a particular loop exit may be + /// reached before exiting the loop. + struct ExitNotTakenInfo { + PoisoningVH<BasicBlock> ExitingBlock; + const SCEV *ExactNotTaken; + std::unique_ptr<SCEVUnionPredicate> Predicate; + + explicit ExitNotTakenInfo(PoisoningVH<BasicBlock> ExitingBlock, + const SCEV *ExactNotTaken, + std::unique_ptr<SCEVUnionPredicate> Predicate) + : ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken), + Predicate(std::move(Predicate)) {} + + bool hasAlwaysTruePredicate() const { + return !Predicate || Predicate->isAlwaysTrue(); + } + }; + + /// Information about the backedge-taken count of a loop. This currently + /// includes an exact count and a maximum count. + /// + class BackedgeTakenInfo { + /// A list of computable exits and their not-taken counts. Loops almost + /// never have more than one computable exit. + SmallVector<ExitNotTakenInfo, 1> ExitNotTaken; + + /// The pointer part of \c MaxAndComplete is an expression indicating the + /// least maximum backedge-taken count of the loop that is known, or a + /// SCEVCouldNotCompute. This expression is only valid if the predicates + /// associated with all loop exits are true. + /// + /// The integer part of \c MaxAndComplete is a boolean indicating if \c + /// ExitNotTaken has an element for every exiting block in the loop. + PointerIntPair<const SCEV *, 1> MaxAndComplete; + + /// True iff the backedge is taken either exactly Max or zero times. + bool MaxOrZero = false; + + /// \name Helper projection functions on \c MaxAndComplete. + /// @{ + bool isComplete() const { return MaxAndComplete.getInt(); } + const SCEV *getMax() const { return MaxAndComplete.getPointer(); } + /// @} + + public: + BackedgeTakenInfo() : MaxAndComplete(nullptr, 0) {} + BackedgeTakenInfo(BackedgeTakenInfo &&) = default; + BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default; + + using EdgeExitInfo = std::pair<BasicBlock *, ExitLimit>; + + /// Initialize BackedgeTakenInfo from a list of exact exit counts. + BackedgeTakenInfo(ArrayRef<EdgeExitInfo> ExitCounts, bool Complete, + const SCEV *MaxCount, bool MaxOrZero); + + /// Test whether this BackedgeTakenInfo contains any computed information, + /// or whether it's all SCEVCouldNotCompute values. + bool hasAnyInfo() const { + return !ExitNotTaken.empty() || !isa<SCEVCouldNotCompute>(getMax()); + } + + /// Test whether this BackedgeTakenInfo contains complete information. + bool hasFullInfo() const { return isComplete(); } + + /// Return an expression indicating the exact *backedge-taken* + /// count of the loop if it is known or SCEVCouldNotCompute + /// otherwise. If execution makes it to the backedge on every + /// iteration (i.e. there are no abnormal exists like exception + /// throws and thread exits) then this is the number of times the + /// loop header will execute minus one. + /// + /// If the SCEV predicate associated with the answer can be different + /// from AlwaysTrue, we must add a (non null) Predicates argument. + /// The SCEV predicate associated with the answer will be added to + /// Predicates. A run-time check needs to be emitted for the SCEV + /// predicate in order for the answer to be valid. + /// + /// Note that we should always know if we need to pass a predicate + /// argument or not from the way the ExitCounts vector was computed. + /// If we allowed SCEV predicates to be generated when populating this + /// vector, this information can contain them and therefore a + /// SCEVPredicate argument should be added to getExact. + const SCEV *getExact(const Loop *L, ScalarEvolution *SE, + SCEVUnionPredicate *Predicates = nullptr) const; + + /// Return the number of times this loop exit may fall through to the back + /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via + /// this block before this number of iterations, but may exit via another + /// block. + const SCEV *getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const; + + /// Get the max backedge taken count for the loop. + const SCEV *getMax(ScalarEvolution *SE) const; + + /// Return true if the number of times this backedge is taken is either the + /// value returned by getMax or zero. + bool isMaxOrZero(ScalarEvolution *SE) const; + + /// Return true if any backedge taken count expressions refer to the given + /// subexpression. + bool hasOperand(const SCEV *S, ScalarEvolution *SE) const; + + /// Invalidate this result and free associated memory. + void clear(); + }; + + /// Cache the backedge-taken count of the loops for this function as they + /// are computed. + DenseMap<const Loop *, BackedgeTakenInfo> BackedgeTakenCounts; + + /// Cache the predicated backedge-taken count of the loops for this + /// function as they are computed. + DenseMap<const Loop *, BackedgeTakenInfo> PredicatedBackedgeTakenCounts; + + /// This map contains entries for all of the PHI instructions that we + /// attempt to compute constant evolutions for. This allows us to avoid + /// potentially expensive recomputation of these properties. An instruction + /// maps to null if we are unable to compute its exit value. + DenseMap<PHINode *, Constant *> ConstantEvolutionLoopExitValue; + + /// This map contains entries for all the expressions that we attempt to + /// compute getSCEVAtScope information for, which can be expensive in + /// extreme cases. + DenseMap<const SCEV *, SmallVector<std::pair<const Loop *, const SCEV *>, 2>> + ValuesAtScopes; + + /// Memoized computeLoopDisposition results. + DenseMap<const SCEV *, + SmallVector<PointerIntPair<const Loop *, 2, LoopDisposition>, 2>> + LoopDispositions; + + struct LoopProperties { + /// Set to true if the loop contains no instruction that can have side + /// effects (i.e. via throwing an exception, volatile or atomic access). + bool HasNoAbnormalExits; + + /// Set to true if the loop contains no instruction that can abnormally exit + /// the loop (i.e. via throwing an exception, by terminating the thread + /// cleanly or by infinite looping in a called function). Strictly + /// speaking, the last one is not leaving the loop, but is identical to + /// leaving the loop for reasoning about undefined behavior. + bool HasNoSideEffects; + }; + + /// Cache for \c getLoopProperties. + DenseMap<const Loop *, LoopProperties> LoopPropertiesCache; + + /// Return a \c LoopProperties instance for \p L, creating one if necessary. + LoopProperties getLoopProperties(const Loop *L); + + bool loopHasNoSideEffects(const Loop *L) { + return getLoopProperties(L).HasNoSideEffects; + } + + bool loopHasNoAbnormalExits(const Loop *L) { + return getLoopProperties(L).HasNoAbnormalExits; + } + + /// Compute a LoopDisposition value. + LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L); + + /// Memoized computeBlockDisposition results. + DenseMap< + const SCEV *, + SmallVector<PointerIntPair<const BasicBlock *, 2, BlockDisposition>, 2>> + BlockDispositions; + + /// Compute a BlockDisposition value. + BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB); + + /// Memoized results from getRange + DenseMap<const SCEV *, ConstantRange> UnsignedRanges; + + /// Memoized results from getRange + DenseMap<const SCEV *, ConstantRange> SignedRanges; + + /// Used to parameterize getRange + enum RangeSignHint { HINT_RANGE_UNSIGNED, HINT_RANGE_SIGNED }; + + /// Set the memoized range for the given SCEV. + const ConstantRange &setRange(const SCEV *S, RangeSignHint Hint, + ConstantRange CR) { + DenseMap<const SCEV *, ConstantRange> &Cache = + Hint == HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges; + + auto Pair = Cache.try_emplace(S, std::move(CR)); + if (!Pair.second) + Pair.first->second = std::move(CR); + return Pair.first->second; + } + + /// Determine the range for a particular SCEV. + /// NOTE: This returns a reference to an entry in a cache. It must be + /// copied if its needed for longer. + const ConstantRange &getRangeRef(const SCEV *S, RangeSignHint Hint); + + /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Stop}. + /// Helper for \c getRange. + ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop, + const SCEV *MaxBECount, unsigned BitWidth); + + /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p + /// Stop} by "factoring out" a ternary expression from the add recurrence. + /// Helper called by \c getRange. + ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop, + const SCEV *MaxBECount, unsigned BitWidth); + + /// We know that there is no SCEV for the specified value. Analyze the + /// expression. + const SCEV *createSCEV(Value *V); + + /// Provide the special handling we need to analyze PHI SCEVs. + const SCEV *createNodeForPHI(PHINode *PN); + + /// Helper function called from createNodeForPHI. + const SCEV *createAddRecFromPHI(PHINode *PN); + + /// A helper function for createAddRecFromPHI to handle simple cases. + const SCEV *createSimpleAffineAddRec(PHINode *PN, Value *BEValueV, + Value *StartValueV); + + /// Helper function called from createNodeForPHI. + const SCEV *createNodeFromSelectLikePHI(PHINode *PN); + + /// Provide special handling for a select-like instruction (currently this + /// is either a select instruction or a phi node). \p I is the instruction + /// being processed, and it is assumed equivalent to "Cond ? TrueVal : + /// FalseVal". + const SCEV *createNodeForSelectOrPHI(Instruction *I, Value *Cond, + Value *TrueVal, Value *FalseVal); + + /// Provide the special handling we need to analyze GEP SCEVs. + const SCEV *createNodeForGEP(GEPOperator *GEP); + + /// Implementation code for getSCEVAtScope; called at most once for each + /// SCEV+Loop pair. + const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L); + + /// This looks up computed SCEV values for all instructions that depend on + /// the given instruction and removes them from the ValueExprMap map if they + /// reference SymName. This is used during PHI resolution. + void forgetSymbolicName(Instruction *I, const SCEV *SymName); + + /// Return the BackedgeTakenInfo for the given loop, lazily computing new + /// values if the loop hasn't been analyzed yet. The returned result is + /// guaranteed not to be predicated. + const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L); + + /// Similar to getBackedgeTakenInfo, but will add predicates as required + /// with the purpose of returning complete information. + const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L); + + /// Compute the number of times the specified loop will iterate. + /// If AllowPredicates is set, we will create new SCEV predicates as + /// necessary in order to return an exact answer. + BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L, + bool AllowPredicates = false); + + /// Compute the number of times the backedge of the specified loop will + /// execute if it exits via the specified block. If AllowPredicates is set, + /// this call will try to use a minimal set of SCEV predicates in order to + /// return an exact answer. + ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, + bool AllowPredicates = false); + + /// Compute the number of times the backedge of the specified loop will + /// execute if its exit condition were a conditional branch of ExitCond. + /// + /// \p ControlsExit is true if ExitCond directly controls the exit + /// branch. In this case, we can assume that the loop exits only if the + /// condition is true and can infer that failing to meet the condition prior + /// to integer wraparound results in undefined behavior. + /// + /// If \p AllowPredicates is set, this call will try to use a minimal set of + /// SCEV predicates in order to return an exact answer. + ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, + bool ExitIfTrue, bool ControlsExit, + bool AllowPredicates = false); + + // Helper functions for computeExitLimitFromCond to avoid exponential time + // complexity. + + class ExitLimitCache { + // It may look like we need key on the whole (L, ExitIfTrue, ControlsExit, + // AllowPredicates) tuple, but recursive calls to + // computeExitLimitFromCondCached from computeExitLimitFromCondImpl only + // vary the in \c ExitCond and \c ControlsExit parameters. We remember the + // initial values of the other values to assert our assumption. + SmallDenseMap<PointerIntPair<Value *, 1>, ExitLimit> TripCountMap; + + const Loop *L; + bool ExitIfTrue; + bool AllowPredicates; + + public: + ExitLimitCache(const Loop *L, bool ExitIfTrue, bool AllowPredicates) + : L(L), ExitIfTrue(ExitIfTrue), AllowPredicates(AllowPredicates) {} + + Optional<ExitLimit> find(const Loop *L, Value *ExitCond, bool ExitIfTrue, + bool ControlsExit, bool AllowPredicates); + + void insert(const Loop *L, Value *ExitCond, bool ExitIfTrue, + bool ControlsExit, bool AllowPredicates, const ExitLimit &EL); + }; + + using ExitLimitCacheTy = ExitLimitCache; + + ExitLimit computeExitLimitFromCondCached(ExitLimitCacheTy &Cache, + const Loop *L, Value *ExitCond, + bool ExitIfTrue, + bool ControlsExit, + bool AllowPredicates); + ExitLimit computeExitLimitFromCondImpl(ExitLimitCacheTy &Cache, const Loop *L, + Value *ExitCond, bool ExitIfTrue, + bool ControlsExit, + bool AllowPredicates); + + /// Compute the number of times the backedge of the specified loop will + /// execute if its exit condition were a conditional branch of the ICmpInst + /// ExitCond and ExitIfTrue. If AllowPredicates is set, this call will try + /// to use a minimal set of SCEV predicates in order to return an exact + /// answer. + ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, + bool ExitIfTrue, + bool IsSubExpr, + bool AllowPredicates = false); + + /// Compute the number of times the backedge of the specified loop will + /// execute if its exit condition were a switch with a single exiting case + /// to ExitingBB. + ExitLimit computeExitLimitFromSingleExitSwitch(const Loop *L, + SwitchInst *Switch, + BasicBlock *ExitingBB, + bool IsSubExpr); + + /// Given an exit condition of 'icmp op load X, cst', try to see if we can + /// compute the backedge-taken count. + ExitLimit computeLoadConstantCompareExitLimit(LoadInst *LI, Constant *RHS, + const Loop *L, + ICmpInst::Predicate p); + + /// Compute the exit limit of a loop that is controlled by a + /// "(IV >> 1) != 0" type comparison. We cannot compute the exact trip + /// count in these cases (since SCEV has no way of expressing them), but we + /// can still sometimes compute an upper bound. + /// + /// Return an ExitLimit for a loop whose backedge is guarded by `LHS Pred + /// RHS`. + ExitLimit computeShiftCompareExitLimit(Value *LHS, Value *RHS, const Loop *L, + ICmpInst::Predicate Pred); + + /// If the loop is known to execute a constant number of times (the + /// condition evolves only from constants), try to evaluate a few iterations + /// of the loop until we get the exit condition gets a value of ExitWhen + /// (true or false). If we cannot evaluate the exit count of the loop, + /// return CouldNotCompute. + const SCEV *computeExitCountExhaustively(const Loop *L, Value *Cond, + bool ExitWhen); + + /// Return the number of times an exit condition comparing the specified + /// value to zero will execute. If not computable, return CouldNotCompute. + /// If AllowPredicates is set, this call will try to use a minimal set of + /// SCEV predicates in order to return an exact answer. + ExitLimit howFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr, + bool AllowPredicates = false); + + /// Return the number of times an exit condition checking the specified + /// value for nonzero will execute. If not computable, return + /// CouldNotCompute. + ExitLimit howFarToNonZero(const SCEV *V, const Loop *L); + + /// Return the number of times an exit condition containing the specified + /// less-than comparison will execute. If not computable, return + /// CouldNotCompute. + /// + /// \p isSigned specifies whether the less-than is signed. + /// + /// \p ControlsExit is true when the LHS < RHS condition directly controls + /// the branch (loops exits only if condition is true). In this case, we can + /// use NoWrapFlags to skip overflow checks. + /// + /// If \p AllowPredicates is set, this call will try to use a minimal set of + /// SCEV predicates in order to return an exact answer. + ExitLimit howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, + bool isSigned, bool ControlsExit, + bool AllowPredicates = false); + + ExitLimit howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, + bool isSigned, bool IsSubExpr, + bool AllowPredicates = false); + + /// Return a predecessor of BB (which may not be an immediate predecessor) + /// which has exactly one successor from which BB is reachable, or null if + /// no such block is found. + std::pair<BasicBlock *, BasicBlock *> + getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the given FoundCondValue value evaluates to true. + bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + Value *FoundCondValue, bool Inverse); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is + /// true. + bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, + const SCEV *FoundRHS); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + bool isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, const SCEV *FoundLHS, + const SCEV *FoundRHS); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. Here LHS is an operation that includes FoundLHS as one of its + /// arguments. + bool isImpliedViaOperations(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS, + unsigned Depth = 0); + + /// Test whether the condition described by Pred, LHS, and RHS is true. + /// Use only simple non-recursive types of checks, such as range analysis etc. + bool isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, const SCEV *FoundLHS, + const SCEV *FoundRHS); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. Utility function used by isImpliedCondOperands. Tries to get + /// cases like "X `sgt` 0 => X - 1 `sgt` -1". + bool isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, const SCEV *FoundLHS, + const SCEV *FoundRHS); + + /// Return true if the condition denoted by \p LHS \p Pred \p RHS is implied + /// by a call to \c @llvm.experimental.guard in \p BB. + bool isImpliedViaGuard(BasicBlock *BB, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + /// + /// This routine tries to rule out certain kinds of integer overflow, and + /// then tries to reason about arithmetic properties of the predicates. + bool isImpliedCondOperandsViaNoOverflow(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + /// + /// This routine tries to figure out predicate for Phis which are SCEVUnknown + /// if it is true for every possible incoming value from their respective + /// basic blocks. + bool isImpliedViaMerge(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS, + unsigned Depth); + + /// If we know that the specified Phi is in the header of its containing + /// loop, we know the loop executes a constant number of times, and the PHI + /// node is just a recurrence involving constants, fold it. + Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt &BEs, + const Loop *L); + + /// Test if the given expression is known to satisfy the condition described + /// by Pred and the known constant ranges of LHS and RHS. + bool isKnownPredicateViaConstantRanges(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + + /// Try to prove the condition described by "LHS Pred RHS" by ruling out + /// integer overflow. + /// + /// For instance, this will return true for "A s< (A + C)<nsw>" if C is + /// positive. + bool isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS); + + /// Try to split Pred LHS RHS into logical conjunctions (and's) and try to + /// prove them individually. + bool isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS); + + /// Try to match the Expr as "(L + R)<Flags>". + bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, + SCEV::NoWrapFlags &Flags); + + /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a + /// constant, and None if it isn't. + /// + /// This is intended to be a cheaper version of getMinusSCEV. We can be + /// frugal here since we just bail out of actually constructing and + /// canonicalizing an expression in the cases where the result isn't going + /// to be a constant. + Optional<APInt> computeConstantDifference(const SCEV *LHS, const SCEV *RHS); + + /// Drop memoized information computed for S. + void forgetMemoizedResults(const SCEV *S); + + /// Return an existing SCEV for V if there is one, otherwise return nullptr. + const SCEV *getExistingSCEV(Value *V); + + /// Return false iff given SCEV contains a SCEVUnknown with NULL value- + /// pointer. + bool checkValidity(const SCEV *S) const; + + /// Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be + /// equal to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}. This is + /// equivalent to proving no signed (resp. unsigned) wrap in + /// {`Start`,+,`Step`} if `ExtendOpTy` is `SCEVSignExtendExpr` + /// (resp. `SCEVZeroExtendExpr`). + template <typename ExtendOpTy> + bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, + const Loop *L); + + /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation. + SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR); + + bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred, bool &Increasing); + + /// Return SCEV no-wrap flags that can be proven based on reasoning about + /// how poison produced from no-wrap flags on this value (e.g. a nuw add) + /// would trigger undefined behavior on overflow. + SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V); + + /// Return true if the SCEV corresponding to \p I is never poison. Proving + /// this is more complex than proving that just \p I is never poison, since + /// SCEV commons expressions across control flow, and you can have cases + /// like: + /// + /// idx0 = a + b; + /// ptr[idx0] = 100; + /// if (<condition>) { + /// idx1 = a +nsw b; + /// ptr[idx1] = 200; + /// } + /// + /// where the SCEV expression (+ a b) is guaranteed to not be poison (and + /// hence not sign-overflow) only if "<condition>" is true. Since both + /// `idx0` and `idx1` will be mapped to the same SCEV expression, (+ a b), + /// it is not okay to annotate (+ a b) with <nsw> in the above example. + bool isSCEVExprNeverPoison(const Instruction *I); + + /// This is like \c isSCEVExprNeverPoison but it specifically works for + /// instructions that will get mapped to SCEV add recurrences. Return true + /// if \p I will never generate poison under the assumption that \p I is an + /// add recurrence on the loop \p L. + bool isAddRecNeverPoison(const Instruction *I, const Loop *L); + + /// Similar to createAddRecFromPHI, but with the additional flexibility of + /// suggesting runtime overflow checks in case casts are encountered. + /// If successful, the analysis records that for this loop, \p SymbolicPHI, + /// which is the UnknownSCEV currently representing the PHI, can be rewritten + /// into an AddRec, assuming some predicates; The function then returns the + /// AddRec and the predicates as a pair, and caches this pair in + /// PredicatedSCEVRewrites. + /// If the analysis is not successful, a mapping from the \p SymbolicPHI to + /// itself (with no predicates) is recorded, and a nullptr with an empty + /// predicates vector is returned as a pair. + Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> + createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI); + + /// Compute the backedge taken count knowing the interval difference, the + /// stride and presence of the equality in the comparison. + const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride, + bool Equality); + + /// Compute the maximum backedge count based on the range of values + /// permitted by Start, End, and Stride. This is for loops of the form + /// {Start, +, Stride} LT End. + /// + /// Precondition: the induction variable is known to be positive. We *don't* + /// assert these preconditions so please be careful. + const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride, + const SCEV *End, unsigned BitWidth, + bool IsSigned); + + /// Verify if an linear IV with positive stride can overflow when in a + /// less-than comparison, knowing the invariant term of the comparison, + /// the stride and the knowledge of NSW/NUW flags on the recurrence. + bool doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, + bool NoWrap); + + /// Verify if an linear IV with negative stride can overflow when in a + /// greater-than comparison, knowing the invariant term of the comparison, + /// the stride and the knowledge of NSW/NUW flags on the recurrence. + bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, + bool NoWrap); + + /// Get add expr already created or create a new one. + const SCEV *getOrCreateAddExpr(ArrayRef<const SCEV *> Ops, + SCEV::NoWrapFlags Flags); + + /// Get mul expr already created or create a new one. + const SCEV *getOrCreateMulExpr(ArrayRef<const SCEV *> Ops, + SCEV::NoWrapFlags Flags); + + // Get addrec expr already created or create a new one. + const SCEV *getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops, + const Loop *L, SCEV::NoWrapFlags Flags); + + /// Return x if \p Val is f(x) where f is a 1-1 function. + const SCEV *stripInjectiveFunctions(const SCEV *Val) const; + + /// Find all of the loops transitively used in \p S, and fill \p LoopsUsed. + /// A loop is considered "used" by an expression if it contains + /// an add rec on said loop. + void getUsedLoops(const SCEV *S, SmallPtrSetImpl<const Loop *> &LoopsUsed); + + /// Find all of the loops transitively used in \p S, and update \c LoopUsers + /// accordingly. + void addToLoopUseLists(const SCEV *S); + + /// Try to match the pattern generated by getURemExpr(A, B). If successful, + /// Assign A and B to LHS and RHS, respectively. + bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); + + FoldingSet<SCEV> UniqueSCEVs; + FoldingSet<SCEVPredicate> UniquePreds; + BumpPtrAllocator SCEVAllocator; + + /// This maps loops to a list of SCEV expressions that (transitively) use said + /// loop. + DenseMap<const Loop *, SmallVector<const SCEV *, 4>> LoopUsers; + + /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression + /// they can be rewritten into under certain predicates. + DenseMap<std::pair<const SCEVUnknown *, const Loop *>, + std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> + PredicatedSCEVRewrites; + + /// The head of a linked list of all SCEVUnknown values that have been + /// allocated. This is used by releaseMemory to locate them all and call + /// their destructors. + SCEVUnknown *FirstUnknown = nullptr; +}; + +/// Analysis pass that exposes the \c ScalarEvolution for a function. +class ScalarEvolutionAnalysis + : public AnalysisInfoMixin<ScalarEvolutionAnalysis> { + friend AnalysisInfoMixin<ScalarEvolutionAnalysis>; + + static AnalysisKey Key; + +public: + using Result = ScalarEvolution; + + ScalarEvolution run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass for the \c ScalarEvolutionAnalysis results. +class ScalarEvolutionPrinterPass + : public PassInfoMixin<ScalarEvolutionPrinterPass> { + raw_ostream &OS; + +public: + explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +class ScalarEvolutionWrapperPass : public FunctionPass { + std::unique_ptr<ScalarEvolution> SE; + +public: + static char ID; + + ScalarEvolutionWrapperPass(); + + ScalarEvolution &getSE() { return *SE; } + const ScalarEvolution &getSE() const { return *SE; } + + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module * = nullptr) const override; + void verifyAnalysis() const override; +}; + +/// An interface layer with SCEV used to manage how we see SCEV expressions +/// for values in the context of existing predicates. We can add new +/// predicates, but we cannot remove them. +/// +/// This layer has multiple purposes: +/// - provides a simple interface for SCEV versioning. +/// - guarantees that the order of transformations applied on a SCEV +/// expression for a single Value is consistent across two different +/// getSCEV calls. This means that, for example, once we've obtained +/// an AddRec expression for a certain value through expression +/// rewriting, we will continue to get an AddRec expression for that +/// Value. +/// - lowers the number of expression rewrites. +class PredicatedScalarEvolution { +public: + PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L); + + const SCEVUnionPredicate &getUnionPredicate() const; + + /// Returns the SCEV expression of V, in the context of the current SCEV + /// predicate. The order of transformations applied on the expression of V + /// returned by ScalarEvolution is guaranteed to be preserved, even when + /// adding new predicates. + const SCEV *getSCEV(Value *V); + + /// Get the (predicated) backedge count for the analyzed loop. + const SCEV *getBackedgeTakenCount(); + + /// Adds a new predicate. + void addPredicate(const SCEVPredicate &Pred); + + /// Attempts to produce an AddRecExpr for V by adding additional SCEV + /// predicates. If we can't transform the expression into an AddRecExpr we + /// return nullptr and not add additional SCEV predicates to the current + /// context. + const SCEVAddRecExpr *getAsAddRec(Value *V); + + /// Proves that V doesn't overflow by adding SCEV predicate. + void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags); + + /// Returns true if we've proved that V doesn't wrap by means of a SCEV + /// predicate. + bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags); + + /// Returns the ScalarEvolution analysis used. + ScalarEvolution *getSE() const { return &SE; } + + /// We need to explicitly define the copy constructor because of FlagsMap. + PredicatedScalarEvolution(const PredicatedScalarEvolution &); + + /// Print the SCEV mappings done by the Predicated Scalar Evolution. + /// The printed text is indented by \p Depth. + void print(raw_ostream &OS, unsigned Depth) const; + + /// Check if \p AR1 and \p AR2 are equal, while taking into account + /// Equal predicates in Preds. + bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, + const SCEVAddRecExpr *AR2) const; + +private: + /// Increments the version number of the predicate. This needs to be called + /// every time the SCEV predicate changes. + void updateGeneration(); + + /// Holds a SCEV and the version number of the SCEV predicate used to + /// perform the rewrite of the expression. + using RewriteEntry = std::pair<unsigned, const SCEV *>; + + /// Maps a SCEV to the rewrite result of that SCEV at a certain version + /// number. If this number doesn't match the current Generation, we will + /// need to do a rewrite. To preserve the transformation order of previous + /// rewrites, we will rewrite the previous result instead of the original + /// SCEV. + DenseMap<const SCEV *, RewriteEntry> RewriteMap; + + /// Records what NoWrap flags we've added to a Value *. + ValueMap<Value *, SCEVWrapPredicate::IncrementWrapFlags> FlagsMap; + + /// The ScalarEvolution analysis. + ScalarEvolution &SE; + + /// The analyzed Loop. + const Loop &L; + + /// The SCEVPredicate that forms our context. We will rewrite all + /// expressions assuming that this predicate true. + SCEVUnionPredicate Preds; + + /// Marks the version of the SCEV predicate used. When rewriting a SCEV + /// expression we mark it with the version of the predicate. We use this to + /// figure out if the predicate has changed from the last rewrite of the + /// SCEV. If so, we need to perform a new rewrite. + unsigned Generation = 0; + + /// The backedge taken count. + const SCEV *BackedgeCount = nullptr; +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_SCALAREVOLUTION_H diff --git a/clang-r353983e/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/clang-r353983e/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h new file mode 100644 index 00000000..d417850c --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h @@ -0,0 +1,71 @@ +//===- ScalarEvolutionAliasAnalysis.h - SCEV-based AA -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for a SCEV-based alias analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONALIASANALYSIS_H +#define LLVM_ANALYSIS_SCALAREVOLUTIONALIASANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// A simple alias analysis implementation that uses ScalarEvolution to answer +/// queries. +class SCEVAAResult : public AAResultBase<SCEVAAResult> { + ScalarEvolution &SE; + +public: + explicit SCEVAAResult(ScalarEvolution &SE) : AAResultBase(), SE(SE) {} + SCEVAAResult(SCEVAAResult &&Arg) : AAResultBase(std::move(Arg)), SE(Arg.SE) {} + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + +private: + Value *GetBaseValue(const SCEV *S); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class SCEVAA : public AnalysisInfoMixin<SCEVAA> { + friend AnalysisInfoMixin<SCEVAA>; + static AnalysisKey Key; + +public: + typedef SCEVAAResult Result; + + SCEVAAResult run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy wrapper pass to provide the SCEVAAResult object. +class SCEVAAWrapperPass : public FunctionPass { + std::unique_ptr<SCEVAAResult> Result; + +public: + static char ID; + + SCEVAAWrapperPass(); + + SCEVAAResult &getResult() { return *Result; } + const SCEVAAResult &getResult() const { return *Result; } + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +/// Creates an instance of \c SCEVAAWrapperPass. +FunctionPass *createSCEVAAWrapperPass(); + +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/ScalarEvolutionExpander.h b/clang-r353983e/include/llvm/Analysis/ScalarEvolutionExpander.h new file mode 100644 index 00000000..a2c1258d --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -0,0 +1,397 @@ +//===---- llvm/Analysis/ScalarEvolutionExpander.h - SCEV Exprs --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the classes used to generate code from scalar expressions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONEXPANDER_H +#define LLVM_ANALYSIS_SCALAREVOLUTIONEXPANDER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" +#include "llvm/Analysis/TargetFolder.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/ValueHandle.h" + +namespace llvm { + class TargetTransformInfo; + + /// Return true if the given expression is safe to expand in the sense that + /// all materialized values are safe to speculate anywhere their operands are + /// defined. + bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE); + + /// Return true if the given expression is safe to expand in the sense that + /// all materialized values are defined and safe to speculate at the specified + /// location and their operands are defined at this location. + bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, + ScalarEvolution &SE); + + /// This class uses information about analyze scalars to rewrite expressions + /// in canonical form. + /// + /// Clients should create an instance of this class when rewriting is needed, + /// and destroy it when finished to allow the release of the associated + /// memory. + class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> { + ScalarEvolution &SE; + const DataLayout &DL; + + // New instructions receive a name to identify them with the current pass. + const char* IVName; + + // InsertedExpressions caches Values for reuse, so must track RAUW. + DenseMap<std::pair<const SCEV *, Instruction *>, TrackingVH<Value>> + InsertedExpressions; + + // InsertedValues only flags inserted instructions so needs no RAUW. + DenseSet<AssertingVH<Value>> InsertedValues; + DenseSet<AssertingVH<Value>> InsertedPostIncValues; + + /// A memoization of the "relevant" loop for a given SCEV. + DenseMap<const SCEV *, const Loop *> RelevantLoops; + + /// Addrecs referring to any of the given loops are expanded in post-inc + /// mode. For example, expanding {1,+,1}<L> in post-inc mode returns the add + /// instruction that adds one to the phi for {0,+,1}<L>, as opposed to a new + /// phi starting at 1. This is only supported in non-canonical mode. + PostIncLoopSet PostIncLoops; + + /// When this is non-null, addrecs expanded in the loop it indicates should + /// be inserted with increments at IVIncInsertPos. + const Loop *IVIncInsertLoop; + + /// When expanding addrecs in the IVIncInsertLoop loop, insert the IV + /// increment at this position. + Instruction *IVIncInsertPos; + + /// Phis that complete an IV chain. Reuse + DenseSet<AssertingVH<PHINode>> ChainedPhis; + + /// When true, expressions are expanded in "canonical" form. In particular, + /// addrecs are expanded as arithmetic based on a canonical induction + /// variable. When false, expression are expanded in a more literal form. + bool CanonicalMode; + + /// When invoked from LSR, the expander is in "strength reduction" mode. The + /// only difference is that phi's are only reused if they are already in + /// "expanded" form. + bool LSRMode; + + typedef IRBuilder<TargetFolder> BuilderType; + BuilderType Builder; + + // RAII object that stores the current insertion point and restores it when + // the object is destroyed. This includes the debug location. Duplicated + // from InsertPointGuard to add SetInsertPoint() which is used to updated + // InsertPointGuards stack when insert points are moved during SCEV + // expansion. + class SCEVInsertPointGuard { + IRBuilderBase &Builder; + AssertingVH<BasicBlock> Block; + BasicBlock::iterator Point; + DebugLoc DbgLoc; + SCEVExpander *SE; + + SCEVInsertPointGuard(const SCEVInsertPointGuard &) = delete; + SCEVInsertPointGuard &operator=(const SCEVInsertPointGuard &) = delete; + + public: + SCEVInsertPointGuard(IRBuilderBase &B, SCEVExpander *SE) + : Builder(B), Block(B.GetInsertBlock()), Point(B.GetInsertPoint()), + DbgLoc(B.getCurrentDebugLocation()), SE(SE) { + SE->InsertPointGuards.push_back(this); + } + + ~SCEVInsertPointGuard() { + // These guards should always created/destroyed in FIFO order since they + // are used to guard lexically scoped blocks of code in + // ScalarEvolutionExpander. + assert(SE->InsertPointGuards.back() == this); + SE->InsertPointGuards.pop_back(); + Builder.restoreIP(IRBuilderBase::InsertPoint(Block, Point)); + Builder.SetCurrentDebugLocation(DbgLoc); + } + + BasicBlock::iterator GetInsertPoint() const { return Point; } + void SetInsertPoint(BasicBlock::iterator I) { Point = I; } + }; + + /// Stack of pointers to saved insert points, used to keep insert points + /// consistent when instructions are moved. + SmallVector<SCEVInsertPointGuard *, 8> InsertPointGuards; + +#ifndef NDEBUG + const char *DebugType; +#endif + + friend struct SCEVVisitor<SCEVExpander, Value*>; + + public: + /// Construct a SCEVExpander in "canonical" mode. + explicit SCEVExpander(ScalarEvolution &se, const DataLayout &DL, + const char *name) + : SE(se), DL(DL), IVName(name), IVIncInsertLoop(nullptr), + IVIncInsertPos(nullptr), CanonicalMode(true), LSRMode(false), + Builder(se.getContext(), TargetFolder(DL)) { +#ifndef NDEBUG + DebugType = ""; +#endif + } + + ~SCEVExpander() { + // Make sure the insert point guard stack is consistent. + assert(InsertPointGuards.empty()); + } + +#ifndef NDEBUG + void setDebugType(const char* s) { DebugType = s; } +#endif + + /// Erase the contents of the InsertedExpressions map so that users trying + /// to expand the same expression into multiple BasicBlocks or different + /// places within the same BasicBlock can do so. + void clear() { + InsertedExpressions.clear(); + InsertedValues.clear(); + InsertedPostIncValues.clear(); + ChainedPhis.clear(); + } + + /// Return true for expressions that may incur non-trivial cost to evaluate + /// at runtime. + /// + /// At is an optional parameter which specifies point in code where user is + /// going to expand this expression. Sometimes this knowledge can lead to a + /// more accurate cost estimation. + bool isHighCostExpansion(const SCEV *Expr, Loop *L, + const Instruction *At = nullptr) { + SmallPtrSet<const SCEV *, 8> Processed; + return isHighCostExpansionHelper(Expr, L, At, Processed); + } + + /// This method returns the canonical induction variable of the specified + /// type for the specified loop (inserting one if there is none). A + /// canonical induction variable starts at zero and steps by one on each + /// iteration. + PHINode *getOrInsertCanonicalInductionVariable(const Loop *L, Type *Ty); + + /// Return the induction variable increment's IV operand. + Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos, + bool allowScale); + + /// Utility for hoisting an IV increment. + bool hoistIVInc(Instruction *IncV, Instruction *InsertPos); + + /// replace congruent phis with their most canonical representative. Return + /// the number of phis eliminated. + unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, + SmallVectorImpl<WeakTrackingVH> &DeadInsts, + const TargetTransformInfo *TTI = nullptr); + + /// Insert code to directly compute the specified SCEV expression into the + /// program. The inserted code is inserted into the specified block. + Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I); + + /// Insert code to directly compute the specified SCEV expression into the + /// program. The inserted code is inserted into the SCEVExpander's current + /// insertion point. If a type is specified, the result will be expanded to + /// have that type, with a cast if necessary. + Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr); + + + /// Generates a code sequence that evaluates this predicate. The inserted + /// instructions will be at position \p Loc. The result will be of type i1 + /// and will have a value of 0 when the predicate is false and 1 otherwise. + Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc); + + /// A specialized variant of expandCodeForPredicate, handling the case when + /// we are expanding code for a SCEVEqualPredicate. + Value *expandEqualPredicate(const SCEVEqualPredicate *Pred, + Instruction *Loc); + + /// Generates code that evaluates if the \p AR expression will overflow. + Value *generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, + bool Signed); + + /// A specialized variant of expandCodeForPredicate, handling the case when + /// we are expanding code for a SCEVWrapPredicate. + Value *expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc); + + /// A specialized variant of expandCodeForPredicate, handling the case when + /// we are expanding code for a SCEVUnionPredicate. + Value *expandUnionPredicate(const SCEVUnionPredicate *Pred, + Instruction *Loc); + + /// Set the current IV increment loop and position. + void setIVIncInsertPos(const Loop *L, Instruction *Pos) { + assert(!CanonicalMode && + "IV increment positions are not supported in CanonicalMode"); + IVIncInsertLoop = L; + IVIncInsertPos = Pos; + } + + /// Enable post-inc expansion for addrecs referring to the given + /// loops. Post-inc expansion is only supported in non-canonical mode. + void setPostInc(const PostIncLoopSet &L) { + assert(!CanonicalMode && + "Post-inc expansion is not supported in CanonicalMode"); + PostIncLoops = L; + } + + /// Disable all post-inc expansion. + void clearPostInc() { + PostIncLoops.clear(); + + // When we change the post-inc loop set, cached expansions may no + // longer be valid. + InsertedPostIncValues.clear(); + } + + /// Disable the behavior of expanding expressions in canonical form rather + /// than in a more literal form. Non-canonical mode is useful for late + /// optimization passes. + void disableCanonicalMode() { CanonicalMode = false; } + + void enableLSRMode() { LSRMode = true; } + + /// Set the current insertion point. This is useful if multiple calls to + /// expandCodeFor() are going to be made with the same insert point and the + /// insert point may be moved during one of the expansions (e.g. if the + /// insert point is not a block terminator). + void setInsertPoint(Instruction *IP) { + assert(IP); + Builder.SetInsertPoint(IP); + } + + /// Clear the current insertion point. This is useful if the instruction + /// that had been serving as the insertion point may have been deleted. + void clearInsertPoint() { + Builder.ClearInsertionPoint(); + } + + /// Return true if the specified instruction was inserted by the code + /// rewriter. If so, the client should not modify the instruction. + bool isInsertedInstruction(Instruction *I) const { + return InsertedValues.count(I) || InsertedPostIncValues.count(I); + } + + void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); } + + /// Try to find existing LLVM IR value for S available at the point At. + Value *getExactExistingExpansion(const SCEV *S, const Instruction *At, + Loop *L); + + /// Try to find the ValueOffsetPair for S. The function is mainly used to + /// check whether S can be expanded cheaply. If this returns a non-None + /// value, we know we can codegen the `ValueOffsetPair` into a suitable + /// expansion identical with S so that S can be expanded cheaply. + /// + /// L is a hint which tells in which loop to look for the suitable value. + /// On success return value which is equivalent to the expanded S at point + /// At. Return nullptr if value was not found. + /// + /// Note that this function does not perform an exhaustive search. I.e if it + /// didn't find any value it does not mean that there is no such value. + /// + Optional<ScalarEvolution::ValueOffsetPair> + getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L); + + private: + LLVMContext &getContext() const { return SE.getContext(); } + + /// Recursive helper function for isHighCostExpansion. + bool isHighCostExpansionHelper(const SCEV *S, Loop *L, + const Instruction *At, + SmallPtrSetImpl<const SCEV *> &Processed); + + /// Insert the specified binary operator, doing a small amount of work to + /// avoid inserting an obviously redundant operation. + Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS); + + /// Arrange for there to be a cast of V to Ty at IP, reusing an existing + /// cast if a suitable one exists, moving an existing cast if a suitable one + /// exists but isn't in the right place, or creating a new one. + Value *ReuseOrCreateCast(Value *V, Type *Ty, + Instruction::CastOps Op, + BasicBlock::iterator IP); + + /// Insert a cast of V to the specified type, which must be possible with a + /// noop cast, doing what we can to share the casts. + Value *InsertNoopCastOfTo(Value *V, Type *Ty); + + /// Expand a SCEVAddExpr with a pointer type into a GEP instead of using + /// ptrtoint+arithmetic+inttoptr. + Value *expandAddToGEP(const SCEV *const *op_begin, + const SCEV *const *op_end, + PointerType *PTy, Type *Ty, Value *V); + Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V); + + /// Find a previous Value in ExprValueMap for expand. + ScalarEvolution::ValueOffsetPair + FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); + + Value *expand(const SCEV *S); + + /// Determine the most "relevant" loop for the given SCEV. + const Loop *getRelevantLoop(const SCEV *); + + Value *visitConstant(const SCEVConstant *S) { + return S->getValue(); + } + + Value *visitTruncateExpr(const SCEVTruncateExpr *S); + + Value *visitZeroExtendExpr(const SCEVZeroExtendExpr *S); + + Value *visitSignExtendExpr(const SCEVSignExtendExpr *S); + + Value *visitAddExpr(const SCEVAddExpr *S); + + Value *visitMulExpr(const SCEVMulExpr *S); + + Value *visitUDivExpr(const SCEVUDivExpr *S); + + Value *visitAddRecExpr(const SCEVAddRecExpr *S); + + Value *visitSMaxExpr(const SCEVSMaxExpr *S); + + Value *visitUMaxExpr(const SCEVUMaxExpr *S); + + Value *visitUnknown(const SCEVUnknown *S) { + return S->getValue(); + } + + void rememberInstruction(Value *I); + + bool isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L); + + bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L); + + Value *expandAddRecExprLiterally(const SCEVAddRecExpr *); + PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, + const Loop *L, + Type *ExpandTy, + Type *IntTy, + Type *&TruncTy, + bool &InvertStep); + Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, + Type *ExpandTy, Type *IntTy, bool useSubtract); + + void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, + Instruction *Pos, PHINode *LoopPhi); + + void fixupInsertPoints(Instruction *I); + }; +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/ScalarEvolutionExpressions.h b/clang-r353983e/include/llvm/Analysis/ScalarEvolutionExpressions.h new file mode 100644 index 00000000..e187a962 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -0,0 +1,770 @@ +//===- llvm/Analysis/ScalarEvolutionExpressions.h - SCEV Exprs --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the classes used to represent and build scalar expressions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H +#define LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstddef> + +namespace llvm { + +class APInt; +class Constant; +class ConstantRange; +class Loop; +class Type; + + enum SCEVTypes { + // These should be ordered in terms of increasing complexity to make the + // folders simpler. + scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr, + scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, + scUnknown, scCouldNotCompute + }; + + /// This class represents a constant integer value. + class SCEVConstant : public SCEV { + friend class ScalarEvolution; + + ConstantInt *V; + + SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) : + SCEV(ID, scConstant, 1), V(v) {} + + public: + ConstantInt *getValue() const { return V; } + const APInt &getAPInt() const { return getValue()->getValue(); } + + Type *getType() const { return V->getType(); } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scConstant; + } + }; + + static unsigned short computeExpressionSize(ArrayRef<const SCEV *> Args) { + APInt Size(16, 1); + for (auto *Arg : Args) + Size = Size.uadd_sat(APInt(16, Arg->getExpressionSize())); + return (unsigned short)Size.getZExtValue(); + } + + /// This is the base class for unary cast operator classes. + class SCEVCastExpr : public SCEV { + protected: + const SCEV *Op; + Type *Ty; + + SCEVCastExpr(const FoldingSetNodeIDRef ID, + unsigned SCEVTy, const SCEV *op, Type *ty); + + public: + const SCEV *getOperand() const { return Op; } + Type *getType() const { return Ty; } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scTruncate || + S->getSCEVType() == scZeroExtend || + S->getSCEVType() == scSignExtend; + } + }; + + /// This class represents a truncation of an integer value to a + /// smaller integer value. + class SCEVTruncateExpr : public SCEVCastExpr { + friend class ScalarEvolution; + + SCEVTruncateExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, Type *ty); + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scTruncate; + } + }; + + /// This class represents a zero extension of a small integer value + /// to a larger integer value. + class SCEVZeroExtendExpr : public SCEVCastExpr { + friend class ScalarEvolution; + + SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, Type *ty); + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scZeroExtend; + } + }; + + /// This class represents a sign extension of a small integer value + /// to a larger integer value. + class SCEVSignExtendExpr : public SCEVCastExpr { + friend class ScalarEvolution; + + SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, Type *ty); + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scSignExtend; + } + }; + + /// This node is a base class providing common functionality for + /// n'ary operators. + class SCEVNAryExpr : public SCEV { + protected: + // Since SCEVs are immutable, ScalarEvolution allocates operand + // arrays with its SCEVAllocator, so this class just needs a simple + // pointer rather than a more elaborate vector-like data structure. + // This also avoids the need for a non-trivial destructor. + const SCEV *const *Operands; + size_t NumOperands; + + SCEVNAryExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T, + const SCEV *const *O, size_t N) + : SCEV(ID, T, computeExpressionSize(makeArrayRef(O, N))), Operands(O), + NumOperands(N) {} + + public: + size_t getNumOperands() const { return NumOperands; } + + const SCEV *getOperand(unsigned i) const { + assert(i < NumOperands && "Operand index out of range!"); + return Operands[i]; + } + + using op_iterator = const SCEV *const *; + using op_range = iterator_range<op_iterator>; + + op_iterator op_begin() const { return Operands; } + op_iterator op_end() const { return Operands + NumOperands; } + op_range operands() const { + return make_range(op_begin(), op_end()); + } + + Type *getType() const { return getOperand(0)->getType(); } + + NoWrapFlags getNoWrapFlags(NoWrapFlags Mask = NoWrapMask) const { + return (NoWrapFlags)(SubclassData & Mask); + } + + bool hasNoUnsignedWrap() const { + return getNoWrapFlags(FlagNUW) != FlagAnyWrap; + } + + bool hasNoSignedWrap() const { + return getNoWrapFlags(FlagNSW) != FlagAnyWrap; + } + + bool hasNoSelfWrap() const { + return getNoWrapFlags(FlagNW) != FlagAnyWrap; + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scAddExpr || + S->getSCEVType() == scMulExpr || + S->getSCEVType() == scSMaxExpr || + S->getSCEVType() == scUMaxExpr || + S->getSCEVType() == scAddRecExpr; + } + }; + + /// This node is the base class for n'ary commutative operators. + class SCEVCommutativeExpr : public SCEVNAryExpr { + protected: + SCEVCommutativeExpr(const FoldingSetNodeIDRef ID, + enum SCEVTypes T, const SCEV *const *O, size_t N) + : SCEVNAryExpr(ID, T, O, N) {} + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scAddExpr || + S->getSCEVType() == scMulExpr || + S->getSCEVType() == scSMaxExpr || + S->getSCEVType() == scUMaxExpr; + } + + /// Set flags for a non-recurrence without clearing previously set flags. + void setNoWrapFlags(NoWrapFlags Flags) { + SubclassData |= Flags; + } + }; + + /// This node represents an addition of some number of SCEVs. + class SCEVAddExpr : public SCEVCommutativeExpr { + friend class ScalarEvolution; + + SCEVAddExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N) + : SCEVCommutativeExpr(ID, scAddExpr, O, N) {} + + public: + Type *getType() const { + // Use the type of the last operand, which is likely to be a pointer + // type, if there is one. This doesn't usually matter, but it can help + // reduce casts when the expressions are expanded. + return getOperand(getNumOperands() - 1)->getType(); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scAddExpr; + } + }; + + /// This node represents multiplication of some number of SCEVs. + class SCEVMulExpr : public SCEVCommutativeExpr { + friend class ScalarEvolution; + + SCEVMulExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N) + : SCEVCommutativeExpr(ID, scMulExpr, O, N) {} + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scMulExpr; + } + }; + + /// This class represents a binary unsigned division operation. + class SCEVUDivExpr : public SCEV { + friend class ScalarEvolution; + + const SCEV *LHS; + const SCEV *RHS; + + SCEVUDivExpr(const FoldingSetNodeIDRef ID, const SCEV *lhs, const SCEV *rhs) + : SCEV(ID, scUDivExpr, computeExpressionSize({lhs, rhs})), LHS(lhs), + RHS(rhs) {} + + public: + const SCEV *getLHS() const { return LHS; } + const SCEV *getRHS() const { return RHS; } + + Type *getType() const { + // In most cases the types of LHS and RHS will be the same, but in some + // crazy cases one or the other may be a pointer. ScalarEvolution doesn't + // depend on the type for correctness, but handling types carefully can + // avoid extra casts in the SCEVExpander. The LHS is more likely to be + // a pointer type than the RHS, so use the RHS' type here. + return getRHS()->getType(); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scUDivExpr; + } + }; + + /// This node represents a polynomial recurrence on the trip count + /// of the specified loop. This is the primary focus of the + /// ScalarEvolution framework; all the other SCEV subclasses are + /// mostly just supporting infrastructure to allow SCEVAddRecExpr + /// expressions to be created and analyzed. + /// + /// All operands of an AddRec are required to be loop invariant. + /// + class SCEVAddRecExpr : public SCEVNAryExpr { + friend class ScalarEvolution; + + const Loop *L; + + SCEVAddRecExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N, const Loop *l) + : SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {} + + public: + const SCEV *getStart() const { return Operands[0]; } + const Loop *getLoop() const { return L; } + + /// Constructs and returns the recurrence indicating how much this + /// expression steps by. If this is a polynomial of degree N, it + /// returns a chrec of degree N-1. We cannot determine whether + /// the step recurrence has self-wraparound. + const SCEV *getStepRecurrence(ScalarEvolution &SE) const { + if (isAffine()) return getOperand(1); + return SE.getAddRecExpr(SmallVector<const SCEV *, 3>(op_begin()+1, + op_end()), + getLoop(), FlagAnyWrap); + } + + /// Return true if this represents an expression A + B*x where A + /// and B are loop invariant values. + bool isAffine() const { + // We know that the start value is invariant. This expression is thus + // affine iff the step is also invariant. + return getNumOperands() == 2; + } + + /// Return true if this represents an expression A + B*x + C*x^2 + /// where A, B and C are loop invariant values. This corresponds + /// to an addrec of the form {L,+,M,+,N} + bool isQuadratic() const { + return getNumOperands() == 3; + } + + /// Set flags for a recurrence without clearing any previously set flags. + /// For AddRec, either NUW or NSW implies NW. Keep track of this fact here + /// to make it easier to propagate flags. + void setNoWrapFlags(NoWrapFlags Flags) { + if (Flags & (FlagNUW | FlagNSW)) + Flags = ScalarEvolution::setFlags(Flags, FlagNW); + SubclassData |= Flags; + } + + /// Return the value of this chain of recurrences at the specified + /// iteration number. + const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const; + + /// Return the number of iterations of this loop that produce + /// values in the specified constant range. Another way of + /// looking at this is that it returns the first iteration number + /// where the value is not in the condition, thus computing the + /// exit count. If the iteration count can't be computed, an + /// instance of SCEVCouldNotCompute is returned. + const SCEV *getNumIterationsInRange(const ConstantRange &Range, + ScalarEvolution &SE) const; + + /// Return an expression representing the value of this expression + /// one iteration of the loop ahead. + const SCEVAddRecExpr *getPostIncExpr(ScalarEvolution &SE) const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scAddRecExpr; + } + }; + + /// This class represents a signed maximum selection. + class SCEVSMaxExpr : public SCEVCommutativeExpr { + friend class ScalarEvolution; + + SCEVSMaxExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N) + : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) { + // Max never overflows. + setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); + } + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scSMaxExpr; + } + }; + + /// This class represents an unsigned maximum selection. + class SCEVUMaxExpr : public SCEVCommutativeExpr { + friend class ScalarEvolution; + + SCEVUMaxExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N) + : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) { + // Max never overflows. + setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); + } + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scUMaxExpr; + } + }; + + /// This means that we are dealing with an entirely unknown SCEV + /// value, and only represent it as its LLVM Value. This is the + /// "bottom" value for the analysis. + class SCEVUnknown final : public SCEV, private CallbackVH { + friend class ScalarEvolution; + + /// The parent ScalarEvolution value. This is used to update the + /// parent's maps when the value associated with a SCEVUnknown is + /// deleted or RAUW'd. + ScalarEvolution *SE; + + /// The next pointer in the linked list of all SCEVUnknown + /// instances owned by a ScalarEvolution. + SCEVUnknown *Next; + + SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, + ScalarEvolution *se, SCEVUnknown *next) : + SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {} + + // Implement CallbackVH. + void deleted() override; + void allUsesReplacedWith(Value *New) override; + + public: + Value *getValue() const { return getValPtr(); } + + /// @{ + /// Test whether this is a special constant representing a type + /// size, alignment, or field offset in a target-independent + /// manner, and hasn't happened to have been folded with other + /// operations into something unrecognizable. This is mainly only + /// useful for pretty-printing and other situations where it isn't + /// absolutely required for these to succeed. + bool isSizeOf(Type *&AllocTy) const; + bool isAlignOf(Type *&AllocTy) const; + bool isOffsetOf(Type *&STy, Constant *&FieldNo) const; + /// @} + + Type *getType() const { return getValPtr()->getType(); } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scUnknown; + } + }; + + /// This class defines a simple visitor class that may be used for + /// various SCEV analysis purposes. + template<typename SC, typename RetVal=void> + struct SCEVVisitor { + RetVal visit(const SCEV *S) { + switch (S->getSCEVType()) { + case scConstant: + return ((SC*)this)->visitConstant((const SCEVConstant*)S); + case scTruncate: + return ((SC*)this)->visitTruncateExpr((const SCEVTruncateExpr*)S); + case scZeroExtend: + return ((SC*)this)->visitZeroExtendExpr((const SCEVZeroExtendExpr*)S); + case scSignExtend: + return ((SC*)this)->visitSignExtendExpr((const SCEVSignExtendExpr*)S); + case scAddExpr: + return ((SC*)this)->visitAddExpr((const SCEVAddExpr*)S); + case scMulExpr: + return ((SC*)this)->visitMulExpr((const SCEVMulExpr*)S); + case scUDivExpr: + return ((SC*)this)->visitUDivExpr((const SCEVUDivExpr*)S); + case scAddRecExpr: + return ((SC*)this)->visitAddRecExpr((const SCEVAddRecExpr*)S); + case scSMaxExpr: + return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S); + case scUMaxExpr: + return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S); + case scUnknown: + return ((SC*)this)->visitUnknown((const SCEVUnknown*)S); + case scCouldNotCompute: + return ((SC*)this)->visitCouldNotCompute((const SCEVCouldNotCompute*)S); + default: + llvm_unreachable("Unknown SCEV type!"); + } + } + + RetVal visitCouldNotCompute(const SCEVCouldNotCompute *S) { + llvm_unreachable("Invalid use of SCEVCouldNotCompute!"); + } + }; + + /// Visit all nodes in the expression tree using worklist traversal. + /// + /// Visitor implements: + /// // return true to follow this node. + /// bool follow(const SCEV *S); + /// // return true to terminate the search. + /// bool isDone(); + template<typename SV> + class SCEVTraversal { + SV &Visitor; + SmallVector<const SCEV *, 8> Worklist; + SmallPtrSet<const SCEV *, 8> Visited; + + void push(const SCEV *S) { + if (Visited.insert(S).second && Visitor.follow(S)) + Worklist.push_back(S); + } + + public: + SCEVTraversal(SV& V): Visitor(V) {} + + void visitAll(const SCEV *Root) { + push(Root); + while (!Worklist.empty() && !Visitor.isDone()) { + const SCEV *S = Worklist.pop_back_val(); + + switch (S->getSCEVType()) { + case scConstant: + case scUnknown: + break; + case scTruncate: + case scZeroExtend: + case scSignExtend: + push(cast<SCEVCastExpr>(S)->getOperand()); + break; + case scAddExpr: + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: + case scAddRecExpr: + for (const auto *Op : cast<SCEVNAryExpr>(S)->operands()) + push(Op); + break; + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); + push(UDiv->getLHS()); + push(UDiv->getRHS()); + break; + } + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + default: + llvm_unreachable("Unknown SCEV kind!"); + } + } + } + }; + + /// Use SCEVTraversal to visit all nodes in the given expression tree. + template<typename SV> + void visitAll(const SCEV *Root, SV& Visitor) { + SCEVTraversal<SV> T(Visitor); + T.visitAll(Root); + } + + /// Return true if any node in \p Root satisfies the predicate \p Pred. + template <typename PredTy> + bool SCEVExprContains(const SCEV *Root, PredTy Pred) { + struct FindClosure { + bool Found = false; + PredTy Pred; + + FindClosure(PredTy Pred) : Pred(Pred) {} + + bool follow(const SCEV *S) { + if (!Pred(S)) + return true; + + Found = true; + return false; + } + + bool isDone() const { return Found; } + }; + + FindClosure FC(Pred); + visitAll(Root, FC); + return FC.Found; + } + + /// This visitor recursively visits a SCEV expression and re-writes it. + /// The result from each visit is cached, so it will return the same + /// SCEV for the same input. + template<typename SC> + class SCEVRewriteVisitor : public SCEVVisitor<SC, const SCEV *> { + protected: + ScalarEvolution &SE; + // Memoize the result of each visit so that we only compute once for + // the same input SCEV. This is to avoid redundant computations when + // a SCEV is referenced by multiple SCEVs. Without memoization, this + // visit algorithm would have exponential time complexity in the worst + // case, causing the compiler to hang on certain tests. + DenseMap<const SCEV *, const SCEV *> RewriteResults; + + public: + SCEVRewriteVisitor(ScalarEvolution &SE) : SE(SE) {} + + const SCEV *visit(const SCEV *S) { + auto It = RewriteResults.find(S); + if (It != RewriteResults.end()) + return It->second; + auto* Visited = SCEVVisitor<SC, const SCEV *>::visit(S); + auto Result = RewriteResults.try_emplace(S, Visited); + assert(Result.second && "Should insert a new entry"); + return Result.first->second; + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + return Constant; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); + return Operand == Expr->getOperand() + ? Expr + : SE.getTruncateExpr(Operand, Expr->getType()); + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); + return Operand == Expr->getOperand() + ? Expr + : SE.getZeroExtendExpr(Operand, Expr->getType()); + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); + return Operand == Expr->getOperand() + ? Expr + : SE.getSignExtendExpr(Operand, Expr->getType()); + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + SmallVector<const SCEV *, 2> Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getAddExpr(Operands); + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + SmallVector<const SCEV *, 2> Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getMulExpr(Operands); + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + auto *LHS = ((SC *)this)->visit(Expr->getLHS()); + auto *RHS = ((SC *)this)->visit(Expr->getRHS()); + bool Changed = LHS != Expr->getLHS() || RHS != Expr->getRHS(); + return !Changed ? Expr : SE.getUDivExpr(LHS, RHS); + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + SmallVector<const SCEV *, 2> Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr + : SE.getAddRecExpr(Operands, Expr->getLoop(), + Expr->getNoWrapFlags()); + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + SmallVector<const SCEV *, 2> Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC *)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getSMaxExpr(Operands); + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + SmallVector<const SCEV *, 2> Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getUMaxExpr(Operands); + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + return Expr; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return Expr; + } + }; + + using ValueToValueMap = DenseMap<const Value *, Value *>; + + /// The SCEVParameterRewriter takes a scalar evolution expression and updates + /// the SCEVUnknown components following the Map (Value -> Value). + class SCEVParameterRewriter : public SCEVRewriteVisitor<SCEVParameterRewriter> { + public: + static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, + ValueToValueMap &Map, + bool InterpretConsts = false) { + SCEVParameterRewriter Rewriter(SE, Map, InterpretConsts); + return Rewriter.visit(Scev); + } + + SCEVParameterRewriter(ScalarEvolution &SE, ValueToValueMap &M, bool C) + : SCEVRewriteVisitor(SE), Map(M), InterpretConsts(C) {} + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + Value *V = Expr->getValue(); + if (Map.count(V)) { + Value *NV = Map[V]; + if (InterpretConsts && isa<ConstantInt>(NV)) + return SE.getConstant(cast<ConstantInt>(NV)); + return SE.getUnknown(NV); + } + return Expr; + } + + private: + ValueToValueMap ⤅ + bool InterpretConsts; + }; + + using LoopToScevMapT = DenseMap<const Loop *, const SCEV *>; + + /// The SCEVLoopAddRecRewriter takes a scalar evolution expression and applies + /// the Map (Loop -> SCEV) to all AddRecExprs. + class SCEVLoopAddRecRewriter + : public SCEVRewriteVisitor<SCEVLoopAddRecRewriter> { + public: + SCEVLoopAddRecRewriter(ScalarEvolution &SE, LoopToScevMapT &M) + : SCEVRewriteVisitor(SE), Map(M) {} + + static const SCEV *rewrite(const SCEV *Scev, LoopToScevMapT &Map, + ScalarEvolution &SE) { + SCEVLoopAddRecRewriter Rewriter(SE, Map); + return Rewriter.visit(Scev); + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + SmallVector<const SCEV *, 2> Operands; + for (const SCEV *Op : Expr->operands()) + Operands.push_back(visit(Op)); + + const Loop *L = Expr->getLoop(); + const SCEV *Res = SE.getAddRecExpr(Operands, L, Expr->getNoWrapFlags()); + + if (0 == Map.count(L)) + return Res; + + const SCEVAddRecExpr *Rec = cast<SCEVAddRecExpr>(Res); + return Rec->evaluateAtIteration(Map[L], SE); + } + + private: + LoopToScevMapT ⤅ + }; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H diff --git a/clang-r353983e/include/llvm/Analysis/ScalarEvolutionNormalization.h b/clang-r353983e/include/llvm/Analysis/ScalarEvolutionNormalization.h new file mode 100644 index 00000000..1a05594a --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ScalarEvolutionNormalization.h @@ -0,0 +1,68 @@ +//===- llvm/Analysis/ScalarEvolutionNormalization.h - See below -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for working with "normalized" ScalarEvolution +// expressions. +// +// The following example illustrates post-increment uses and how normalized +// expressions help. +// +// for (i=0; i!=n; ++i) { +// ... +// } +// use(i); +// +// While the expression for most uses of i inside the loop is {0,+,1}<%L>, the +// expression for the use of i outside the loop is {1,+,1}<%L>, since i is +// incremented at the end of the loop body. This is inconveient, since it +// suggests that we need two different induction variables, one that starts +// at 0 and one that starts at 1. We'd prefer to be able to think of these as +// the same induction variable, with uses inside the loop using the +// "pre-incremented" value, and uses after the loop using the +// "post-incremented" value. +// +// Expressions for post-incremented uses are represented as an expression +// paired with a set of loops for which the expression is in "post-increment" +// mode (there may be multiple loops). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H +#define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" + +namespace llvm { + +class Loop; +class ScalarEvolution; +class SCEV; + +typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet; + +typedef function_ref<bool(const SCEVAddRecExpr *)> NormalizePredTy; + +/// Normalize \p S to be post-increment for all loops present in \p +/// Loops. +const SCEV *normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, + ScalarEvolution &SE); + +/// Normalize \p S for all add recurrence sub-expressions for which \p +/// Pred returns true. +const SCEV *normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred, + ScalarEvolution &SE); + +/// Denormalize \p S to be post-increment for all loops present in \p +/// Loops. +const SCEV *denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, + ScalarEvolution &SE); +} // namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/ScopedNoAliasAA.h b/clang-r353983e/include/llvm/Analysis/ScopedNoAliasAA.h new file mode 100644 index 00000000..94c60218 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ScopedNoAliasAA.h @@ -0,0 +1,88 @@ +//===- ScopedNoAliasAA.h - Scoped No-Alias Alias Analysis -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This is the interface for a metadata-based scoped no-alias analysis. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCOPEDNOALIASAA_H +#define LLVM_ANALYSIS_SCOPEDNOALIASAA_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include <memory> + +namespace llvm { + +class Function; +class MDNode; +class MemoryLocation; + +/// A simple AA result which uses scoped-noalias metadata to answer queries. +class ScopedNoAliasAAResult : public AAResultBase<ScopedNoAliasAAResult> { + friend AAResultBase<ScopedNoAliasAAResult>; + +public: + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &) { + return false; + } + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc); + ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2); + +private: + bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const; +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class ScopedNoAliasAA : public AnalysisInfoMixin<ScopedNoAliasAA> { + friend AnalysisInfoMixin<ScopedNoAliasAA>; + + static AnalysisKey Key; + +public: + using Result = ScopedNoAliasAAResult; + + ScopedNoAliasAAResult run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy wrapper pass to provide the ScopedNoAliasAAResult object. +class ScopedNoAliasAAWrapperPass : public ImmutablePass { + std::unique_ptr<ScopedNoAliasAAResult> Result; + +public: + static char ID; + + ScopedNoAliasAAWrapperPass(); + + ScopedNoAliasAAResult &getResult() { return *Result; } + const ScopedNoAliasAAResult &getResult() const { return *Result; } + + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +//===--------------------------------------------------------------------===// +// +// createScopedNoAliasAAWrapperPass - This pass implements metadata-based +// scoped noalias analysis. +// +ImmutablePass *createScopedNoAliasAAWrapperPass(); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_SCOPEDNOALIASAA_H diff --git a/clang-r353983e/include/llvm/Analysis/SparsePropagation.h b/clang-r353983e/include/llvm/Analysis/SparsePropagation.h new file mode 100644 index 00000000..fac92e4a --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/SparsePropagation.h @@ -0,0 +1,526 @@ +//===- SparsePropagation.h - Sparse Conditional Property Propagation ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements an abstract sparse conditional propagation algorithm, +// modeled after SCCP, but with a customizable lattice function. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SPARSEPROPAGATION_H +#define LLVM_ANALYSIS_SPARSEPROPAGATION_H + +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" +#include <set> + +#define DEBUG_TYPE "sparseprop" + +namespace llvm { + +/// A template for translating between LLVM Values and LatticeKeys. Clients must +/// provide a specialization of LatticeKeyInfo for their LatticeKey type. +template <class LatticeKey> struct LatticeKeyInfo { + // static inline Value *getValueFromLatticeKey(LatticeKey Key); + // static inline LatticeKey getLatticeKeyFromValue(Value *V); +}; + +template <class LatticeKey, class LatticeVal, + class KeyInfo = LatticeKeyInfo<LatticeKey>> +class SparseSolver; + +/// AbstractLatticeFunction - This class is implemented by the dataflow instance +/// to specify what the lattice values are and how they handle merges etc. This +/// gives the client the power to compute lattice values from instructions, +/// constants, etc. The current requirement is that lattice values must be +/// copyable. At the moment, nothing tries to avoid copying. Additionally, +/// lattice keys must be able to be used as keys of a mapping data structure. +/// Internally, the generic solver currently uses a DenseMap to map lattice keys +/// to lattice values. If the lattice key is a non-standard type, a +/// specialization of DenseMapInfo must be provided. +template <class LatticeKey, class LatticeVal> class AbstractLatticeFunction { +private: + LatticeVal UndefVal, OverdefinedVal, UntrackedVal; + +public: + AbstractLatticeFunction(LatticeVal undefVal, LatticeVal overdefinedVal, + LatticeVal untrackedVal) { + UndefVal = undefVal; + OverdefinedVal = overdefinedVal; + UntrackedVal = untrackedVal; + } + + virtual ~AbstractLatticeFunction() = default; + + LatticeVal getUndefVal() const { return UndefVal; } + LatticeVal getOverdefinedVal() const { return OverdefinedVal; } + LatticeVal getUntrackedVal() const { return UntrackedVal; } + + /// IsUntrackedValue - If the specified LatticeKey is obviously uninteresting + /// to the analysis (i.e., it would always return UntrackedVal), this + /// function can return true to avoid pointless work. + virtual bool IsUntrackedValue(LatticeKey Key) { return false; } + + /// ComputeLatticeVal - Compute and return a LatticeVal corresponding to the + /// given LatticeKey. + virtual LatticeVal ComputeLatticeVal(LatticeKey Key) { + return getOverdefinedVal(); + } + + /// IsSpecialCasedPHI - Given a PHI node, determine whether this PHI node is + /// one that the we want to handle through ComputeInstructionState. + virtual bool IsSpecialCasedPHI(PHINode *PN) { return false; } + + /// MergeValues - Compute and return the merge of the two specified lattice + /// values. Merging should only move one direction down the lattice to + /// guarantee convergence (toward overdefined). + virtual LatticeVal MergeValues(LatticeVal X, LatticeVal Y) { + return getOverdefinedVal(); // always safe, never useful. + } + + /// ComputeInstructionState - Compute the LatticeKeys that change as a result + /// of executing instruction \p I. Their associated LatticeVals are store in + /// \p ChangedValues. + virtual void + ComputeInstructionState(Instruction &I, + DenseMap<LatticeKey, LatticeVal> &ChangedValues, + SparseSolver<LatticeKey, LatticeVal> &SS) = 0; + + /// PrintLatticeVal - Render the given LatticeVal to the specified stream. + virtual void PrintLatticeVal(LatticeVal LV, raw_ostream &OS); + + /// PrintLatticeKey - Render the given LatticeKey to the specified stream. + virtual void PrintLatticeKey(LatticeKey Key, raw_ostream &OS); + + /// GetValueFromLatticeVal - If the given LatticeVal is representable as an + /// LLVM value, return it; otherwise, return nullptr. If a type is given, the + /// returned value must have the same type. This function is used by the + /// generic solver in attempting to resolve branch and switch conditions. + virtual Value *GetValueFromLatticeVal(LatticeVal LV, Type *Ty = nullptr) { + return nullptr; + } +}; + +/// SparseSolver - This class is a general purpose solver for Sparse Conditional +/// Propagation with a programmable lattice function. +template <class LatticeKey, class LatticeVal, class KeyInfo> +class SparseSolver { + + /// LatticeFunc - This is the object that knows the lattice and how to + /// compute transfer functions. + AbstractLatticeFunction<LatticeKey, LatticeVal> *LatticeFunc; + + /// ValueState - Holds the LatticeVals associated with LatticeKeys. + DenseMap<LatticeKey, LatticeVal> ValueState; + + /// BBExecutable - Holds the basic blocks that are executable. + SmallPtrSet<BasicBlock *, 16> BBExecutable; + + /// ValueWorkList - Holds values that should be processed. + SmallVector<Value *, 64> ValueWorkList; + + /// BBWorkList - Holds basic blocks that should be processed. + SmallVector<BasicBlock *, 64> BBWorkList; + + using Edge = std::pair<BasicBlock *, BasicBlock *>; + + /// KnownFeasibleEdges - Entries in this set are edges which have already had + /// PHI nodes retriggered. + std::set<Edge> KnownFeasibleEdges; + +public: + explicit SparseSolver( + AbstractLatticeFunction<LatticeKey, LatticeVal> *Lattice) + : LatticeFunc(Lattice) {} + SparseSolver(const SparseSolver &) = delete; + SparseSolver &operator=(const SparseSolver &) = delete; + + /// Solve - Solve for constants and executable blocks. + void Solve(); + + void Print(raw_ostream &OS) const; + + /// getExistingValueState - Return the LatticeVal object corresponding to the + /// given value from the ValueState map. If the value is not in the map, + /// UntrackedVal is returned, unlike the getValueState method. + LatticeVal getExistingValueState(LatticeKey Key) const { + auto I = ValueState.find(Key); + return I != ValueState.end() ? I->second : LatticeFunc->getUntrackedVal(); + } + + /// getValueState - Return the LatticeVal object corresponding to the given + /// value from the ValueState map. If the value is not in the map, its state + /// is initialized. + LatticeVal getValueState(LatticeKey Key); + + /// isEdgeFeasible - Return true if the control flow edge from the 'From' + /// basic block to the 'To' basic block is currently feasible. If + /// AggressiveUndef is true, then this treats values with unknown lattice + /// values as undefined. This is generally only useful when solving the + /// lattice, not when querying it. + bool isEdgeFeasible(BasicBlock *From, BasicBlock *To, + bool AggressiveUndef = false); + + /// isBlockExecutable - Return true if there are any known feasible + /// edges into the basic block. This is generally only useful when + /// querying the lattice. + bool isBlockExecutable(BasicBlock *BB) const { + return BBExecutable.count(BB); + } + + /// MarkBlockExecutable - This method can be used by clients to mark all of + /// the blocks that are known to be intrinsically live in the processed unit. + void MarkBlockExecutable(BasicBlock *BB); + +private: + /// UpdateState - When the state of some LatticeKey is potentially updated to + /// the given LatticeVal, this function notices and adds the LLVM value + /// corresponding the key to the work list, if needed. + void UpdateState(LatticeKey Key, LatticeVal LV); + + /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB + /// work list if it is not already executable. + void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest); + + /// getFeasibleSuccessors - Return a vector of booleans to indicate which + /// successors are reachable from a given terminator instruction. + void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl<bool> &Succs, + bool AggressiveUndef); + + void visitInst(Instruction &I); + void visitPHINode(PHINode &I); + void visitTerminator(Instruction &TI); +}; + +//===----------------------------------------------------------------------===// +// AbstractLatticeFunction Implementation +//===----------------------------------------------------------------------===// + +template <class LatticeKey, class LatticeVal> +void AbstractLatticeFunction<LatticeKey, LatticeVal>::PrintLatticeVal( + LatticeVal V, raw_ostream &OS) { + if (V == UndefVal) + OS << "undefined"; + else if (V == OverdefinedVal) + OS << "overdefined"; + else if (V == UntrackedVal) + OS << "untracked"; + else + OS << "unknown lattice value"; +} + +template <class LatticeKey, class LatticeVal> +void AbstractLatticeFunction<LatticeKey, LatticeVal>::PrintLatticeKey( + LatticeKey Key, raw_ostream &OS) { + OS << "unknown lattice key"; +} + +//===----------------------------------------------------------------------===// +// SparseSolver Implementation +//===----------------------------------------------------------------------===// + +template <class LatticeKey, class LatticeVal, class KeyInfo> +LatticeVal +SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getValueState(LatticeKey Key) { + auto I = ValueState.find(Key); + if (I != ValueState.end()) + return I->second; // Common case, in the map + + if (LatticeFunc->IsUntrackedValue(Key)) + return LatticeFunc->getUntrackedVal(); + LatticeVal LV = LatticeFunc->ComputeLatticeVal(Key); + + // If this value is untracked, don't add it to the map. + if (LV == LatticeFunc->getUntrackedVal()) + return LV; + return ValueState[Key] = std::move(LV); +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::UpdateState(LatticeKey Key, + LatticeVal LV) { + auto I = ValueState.find(Key); + if (I != ValueState.end() && I->second == LV) + return; // No change. + + // Update the state of the given LatticeKey and add its corresponding LLVM + // value to the work list. + ValueState[Key] = std::move(LV); + if (Value *V = KeyInfo::getValueFromLatticeKey(Key)) + ValueWorkList.push_back(V); +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::MarkBlockExecutable( + BasicBlock *BB) { + if (!BBExecutable.insert(BB).second) + return; + LLVM_DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); + BBWorkList.push_back(BB); // Add the block to the work list! +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::markEdgeExecutable( + BasicBlock *Source, BasicBlock *Dest) { + if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) + return; // This edge is already known to be executable! + + LLVM_DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() + << " -> " << Dest->getName() << "\n"); + + if (BBExecutable.count(Dest)) { + // The destination is already executable, but we just made an edge + // feasible that wasn't before. Revisit the PHI nodes in the block + // because they have potentially new operands. + for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I) + visitPHINode(*cast<PHINode>(I)); + } else { + MarkBlockExecutable(Dest); + } +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getFeasibleSuccessors( + Instruction &TI, SmallVectorImpl<bool> &Succs, bool AggressiveUndef) { + Succs.resize(TI.getNumSuccessors()); + if (TI.getNumSuccessors() == 0) + return; + + if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) { + if (BI->isUnconditional()) { + Succs[0] = true; + return; + } + + LatticeVal BCValue; + if (AggressiveUndef) + BCValue = + getValueState(KeyInfo::getLatticeKeyFromValue(BI->getCondition())); + else + BCValue = getExistingValueState( + KeyInfo::getLatticeKeyFromValue(BI->getCondition())); + + if (BCValue == LatticeFunc->getOverdefinedVal() || + BCValue == LatticeFunc->getUntrackedVal()) { + // Overdefined condition variables can branch either way. + Succs[0] = Succs[1] = true; + return; + } + + // If undefined, neither is feasible yet. + if (BCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = + dyn_cast_or_null<Constant>(LatticeFunc->GetValueFromLatticeVal( + std::move(BCValue), BI->getCondition()->getType())); + if (!C || !isa<ConstantInt>(C)) { + // Non-constant values can go either way. + Succs[0] = Succs[1] = true; + return; + } + + // Constant condition variables mean the branch can only go a single way + Succs[C->isNullValue()] = true; + return; + } + + if (TI.isExceptionalTerminator() || + TI.isIndirectTerminator()) { + Succs.assign(Succs.size(), true); + return; + } + + SwitchInst &SI = cast<SwitchInst>(TI); + LatticeVal SCValue; + if (AggressiveUndef) + SCValue = getValueState(KeyInfo::getLatticeKeyFromValue(SI.getCondition())); + else + SCValue = getExistingValueState( + KeyInfo::getLatticeKeyFromValue(SI.getCondition())); + + if (SCValue == LatticeFunc->getOverdefinedVal() || + SCValue == LatticeFunc->getUntrackedVal()) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + // If undefined, neither is feasible yet. + if (SCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = dyn_cast_or_null<Constant>(LatticeFunc->GetValueFromLatticeVal( + std::move(SCValue), SI.getCondition()->getType())); + if (!C || !isa<ConstantInt>(C)) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + SwitchInst::CaseHandle Case = *SI.findCaseValue(cast<ConstantInt>(C)); + Succs[Case.getSuccessorIndex()] = true; +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +bool SparseSolver<LatticeKey, LatticeVal, KeyInfo>::isEdgeFeasible( + BasicBlock *From, BasicBlock *To, bool AggressiveUndef) { + SmallVector<bool, 16> SuccFeasible; + Instruction *TI = From->getTerminator(); + getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (TI->getSuccessor(i) == To && SuccFeasible[i]) + return true; + + return false; +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitTerminator( + Instruction &TI) { + SmallVector<bool, 16> SuccFeasible; + getFeasibleSuccessors(TI, SuccFeasible, true); + + BasicBlock *BB = TI.getParent(); + + // Mark all feasible successors executable... + for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) + if (SuccFeasible[i]) + markEdgeExecutable(BB, TI.getSuccessor(i)); +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitPHINode(PHINode &PN) { + // The lattice function may store more information on a PHINode than could be + // computed from its incoming values. For example, SSI form stores its sigma + // functions as PHINodes with a single incoming value. + if (LatticeFunc->IsSpecialCasedPHI(&PN)) { + DenseMap<LatticeKey, LatticeVal> ChangedValues; + LatticeFunc->ComputeInstructionState(PN, ChangedValues, *this); + for (auto &ChangedValue : ChangedValues) + if (ChangedValue.second != LatticeFunc->getUntrackedVal()) + UpdateState(std::move(ChangedValue.first), + std::move(ChangedValue.second)); + return; + } + + LatticeKey Key = KeyInfo::getLatticeKeyFromValue(&PN); + LatticeVal PNIV = getValueState(Key); + LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); + + // If this value is already overdefined (common) just return. + if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal()) + return; // Quick exit + + // Super-extra-high-degree PHI nodes are unlikely to ever be interesting, + // and slow us down a lot. Just mark them overdefined. + if (PN.getNumIncomingValues() > 64) { + UpdateState(Key, Overdefined); + return; + } + + // Look at all of the executable operands of the PHI node. If any of them + // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the + // transfer function to give us the merge of the incoming values. + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // If the edge is not yet known to be feasible, it doesn't impact the PHI. + if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true)) + continue; + + // Merge in this value. + LatticeVal OpVal = + getValueState(KeyInfo::getLatticeKeyFromValue(PN.getIncomingValue(i))); + if (OpVal != PNIV) + PNIV = LatticeFunc->MergeValues(PNIV, OpVal); + + if (PNIV == Overdefined) + break; // Rest of input values don't matter. + } + + // Update the PHI with the compute value, which is the merge of the inputs. + UpdateState(Key, PNIV); +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitInst(Instruction &I) { + // PHIs are handled by the propagation logic, they are never passed into the + // transfer functions. + if (PHINode *PN = dyn_cast<PHINode>(&I)) + return visitPHINode(*PN); + + // Otherwise, ask the transfer function what the result is. If this is + // something that we care about, remember it. + DenseMap<LatticeKey, LatticeVal> ChangedValues; + LatticeFunc->ComputeInstructionState(I, ChangedValues, *this); + for (auto &ChangedValue : ChangedValues) + if (ChangedValue.second != LatticeFunc->getUntrackedVal()) + UpdateState(ChangedValue.first, ChangedValue.second); + + if (I.isTerminator()) + visitTerminator(I); +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::Solve() { + // Process the work lists until they are empty! + while (!BBWorkList.empty() || !ValueWorkList.empty()) { + // Process the value work list. + while (!ValueWorkList.empty()) { + Value *V = ValueWorkList.back(); + ValueWorkList.pop_back(); + + LLVM_DEBUG(dbgs() << "\nPopped off V-WL: " << *V << "\n"); + + // "V" got into the work list because it made a transition. See if any + // users are both live and in need of updating. + for (User *U : V->users()) + if (Instruction *Inst = dyn_cast<Instruction>(U)) + if (BBExecutable.count(Inst->getParent())) // Inst is executable? + visitInst(*Inst); + } + + // Process the basic block work list. + while (!BBWorkList.empty()) { + BasicBlock *BB = BBWorkList.back(); + BBWorkList.pop_back(); + + LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB); + + // Notify all instructions in this basic block that they are newly + // executable. + for (Instruction &I : *BB) + visitInst(I); + } + } +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::Print( + raw_ostream &OS) const { + if (ValueState.empty()) + return; + + LatticeKey Key; + LatticeVal LV; + + OS << "ValueState:\n"; + for (auto &Entry : ValueState) { + std::tie(Key, LV) = Entry; + if (LV == LatticeFunc->getUntrackedVal()) + continue; + OS << "\t"; + LatticeFunc->PrintLatticeVal(LV, OS); + OS << ": "; + LatticeFunc->PrintLatticeKey(Key, OS); + OS << "\n"; + } +} +} // end namespace llvm + +#undef DEBUG_TYPE + +#endif // LLVM_ANALYSIS_SPARSEPROPAGATION_H diff --git a/clang-r353983e/include/llvm/Analysis/StackSafetyAnalysis.h b/clang-r353983e/include/llvm/Analysis/StackSafetyAnalysis.h new file mode 100644 index 00000000..f9d8b08a --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/StackSafetyAnalysis.h @@ -0,0 +1,119 @@ +//===- StackSafetyAnalysis.h - Stack memory safety analysis -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Stack Safety Analysis detects allocas and arguments with safe access. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_STACKSAFETYANALYSIS_H +#define LLVM_ANALYSIS_STACKSAFETYANALYSIS_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// Interface to access stack safety analysis results for single function. +class StackSafetyInfo { +public: + struct FunctionInfo; + +private: + std::unique_ptr<FunctionInfo> Info; + +public: + StackSafetyInfo(); + StackSafetyInfo(FunctionInfo &&Info); + StackSafetyInfo(StackSafetyInfo &&); + StackSafetyInfo &operator=(StackSafetyInfo &&); + ~StackSafetyInfo(); + + // TODO: Add useful for client methods. + void print(raw_ostream &O) const; +}; + +/// StackSafetyInfo wrapper for the new pass manager. +class StackSafetyAnalysis : public AnalysisInfoMixin<StackSafetyAnalysis> { + friend AnalysisInfoMixin<StackSafetyAnalysis>; + static AnalysisKey Key; + +public: + using Result = StackSafetyInfo; + StackSafetyInfo run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass for the \c StackSafetyAnalysis results. +class StackSafetyPrinterPass : public PassInfoMixin<StackSafetyPrinterPass> { + raw_ostream &OS; + +public: + explicit StackSafetyPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// StackSafetyInfo wrapper for the legacy pass manager +class StackSafetyInfoWrapperPass : public FunctionPass { + StackSafetyInfo SSI; + +public: + static char ID; + StackSafetyInfoWrapperPass(); + + const StackSafetyInfo &getResult() const { return SSI; } + + void print(raw_ostream &O, const Module *M) const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnFunction(Function &F) override; +}; + +using StackSafetyGlobalInfo = std::map<const GlobalValue *, StackSafetyInfo>; + +/// This pass performs the global (interprocedural) stack safety analysis (new +/// pass manager). +class StackSafetyGlobalAnalysis + : public AnalysisInfoMixin<StackSafetyGlobalAnalysis> { + friend AnalysisInfoMixin<StackSafetyGlobalAnalysis>; + static AnalysisKey Key; + +public: + using Result = StackSafetyGlobalInfo; + Result run(Module &M, ModuleAnalysisManager &AM); +}; + +/// Printer pass for the \c StackSafetyGlobalAnalysis results. +class StackSafetyGlobalPrinterPass + : public PassInfoMixin<StackSafetyGlobalPrinterPass> { + raw_ostream &OS; + +public: + explicit StackSafetyGlobalPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +/// This pass performs the global (interprocedural) stack safety analysis +/// (legacy pass manager). +class StackSafetyGlobalInfoWrapperPass : public ModulePass { + StackSafetyGlobalInfo SSI; + +public: + static char ID; + + StackSafetyGlobalInfoWrapperPass(); + + const StackSafetyGlobalInfo &getResult() const { return SSI; } + + void print(raw_ostream &O, const Module *M) const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnModule(Module &M) override; +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_STACKSAFETYANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/SyncDependenceAnalysis.h b/clang-r353983e/include/llvm/Analysis/SyncDependenceAnalysis.h new file mode 100644 index 00000000..099403b4 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/SyncDependenceAnalysis.h @@ -0,0 +1,85 @@ +//===- SyncDependenceAnalysis.h - Divergent Branch Dependence -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// This file defines the SyncDependenceAnalysis class, which computes for +// every divergent branch the set of phi nodes that the branch will make +// divergent. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H +#define LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/LoopInfo.h" +#include <memory> + +namespace llvm { + +class BasicBlock; +class DominatorTree; +class Loop; +class PostDominatorTree; + +using ConstBlockSet = SmallPtrSet<const BasicBlock *, 4>; + +/// \brief Relates points of divergent control to join points in +/// reducible CFGs. +/// +/// This analysis relates points of divergent control to points of converging +/// divergent control. The analysis requires all loops to be reducible. +class SyncDependenceAnalysis { + void visitSuccessor(const BasicBlock &succBlock, const Loop *termLoop, + const BasicBlock *defBlock); + +public: + bool inRegion(const BasicBlock &BB) const; + + ~SyncDependenceAnalysis(); + SyncDependenceAnalysis(const DominatorTree &DT, const PostDominatorTree &PDT, + const LoopInfo &LI); + + /// \brief Computes divergent join points and loop exits caused by branch + /// divergence in \p Term. + /// + /// The set of blocks which are reachable by disjoint paths from \p Term. + /// The set also contains loop exits if there two disjoint paths: + /// one from \p Term to the loop exit and another from \p Term to the loop + /// header. Those exit blocks are added to the returned set. + /// If L is the parent loop of \p Term and an exit of L is in the returned + /// set then L is a divergent loop. + const ConstBlockSet &join_blocks(const Instruction &Term); + + /// \brief Computes divergent join points and loop exits (in the surrounding + /// loop) caused by the divergent loop exits of\p Loop. + /// + /// The set of blocks which are reachable by disjoint paths from the + /// loop exits of \p Loop. + /// This treats the loop as a single node in \p Loop's parent loop. + /// The returned set has the same properties as for join_blocks(TermInst&). + const ConstBlockSet &join_blocks(const Loop &Loop); + +private: + static ConstBlockSet EmptyBlockSet; + + ReversePostOrderTraversal<const Function *> FuncRPOT; + const DominatorTree &DT; + const PostDominatorTree &PDT; + const LoopInfo &LI; + + std::map<const Loop *, std::unique_ptr<ConstBlockSet>> CachedLoopExitJoins; + std::map<const Instruction *, std::unique_ptr<ConstBlockSet>> + CachedBranchJoins; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/SyntheticCountsUtils.h b/clang-r353983e/include/llvm/Analysis/SyntheticCountsUtils.h new file mode 100644 index 00000000..b9b4c98b --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/SyntheticCountsUtils.h @@ -0,0 +1,52 @@ +//===- SyntheticCountsUtils.h - utilities for count propagation--*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for synthetic counts propagation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H +#define LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Support/ScaledNumber.h" + +namespace llvm { + +class CallGraph; +class Function; + +/// Class with methods to propagate synthetic entry counts. +/// +/// This class is templated on the type of the call graph and designed to work +/// with the traditional per-module callgraph and the summary callgraphs used in +/// ThinLTO. This contains only static methods and alias templates. +template <typename CallGraphType> class SyntheticCountsUtils { +public: + using Scaled64 = ScaledNumber<uint64_t>; + using CGT = GraphTraits<CallGraphType>; + using NodeRef = typename CGT::NodeRef; + using EdgeRef = typename CGT::EdgeRef; + using SccTy = std::vector<NodeRef>; + + // Not all EdgeRef have information about the source of the edge. Hence + // NodeRef corresponding to the source of the EdgeRef is explicitly passed. + using GetProfCountTy = function_ref<Optional<Scaled64>(NodeRef, EdgeRef)>; + using AddCountTy = function_ref<void(NodeRef, Scaled64)>; + + static void propagate(const CallGraphType &CG, GetProfCountTy GetProfCount, + AddCountTy AddCount); + +private: + static void propagateFromSCC(const SccTy &SCC, GetProfCountTy GetProfCount, + AddCountTy AddCount); +}; +} // namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/TargetFolder.h b/clang-r353983e/include/llvm/Analysis/TargetFolder.h new file mode 100644 index 00000000..b8047a46 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/TargetFolder.h @@ -0,0 +1,268 @@ +//====- TargetFolder.h - Constant folding helper ---------------*- C++ -*-====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the TargetFolder class, a helper for IRBuilder. +// It provides IRBuilder with a set of methods for creating constants with +// target dependent folding, in addition to the same target-independent +// folding that the ConstantFolder class provides. For general constant +// creation and folding, use ConstantExpr and the routines in +// llvm/Analysis/ConstantFolding.h. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TARGETFOLDER_H +#define LLVM_ANALYSIS_TARGETFOLDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" + +namespace llvm { + +class DataLayout; + +/// TargetFolder - Create constants with target dependent folding. +class TargetFolder { + const DataLayout &DL; + + /// Fold - Fold the constant using target specific information. + Constant *Fold(Constant *C) const { + if (Constant *CF = ConstantFoldConstant(C, DL)) + return CF; + return C; + } + +public: + explicit TargetFolder(const DataLayout &DL) : DL(DL) {} + + //===--------------------------------------------------------------------===// + // Binary Operators + //===--------------------------------------------------------------------===// + + Constant *CreateAdd(Constant *LHS, Constant *RHS, + bool HasNUW = false, bool HasNSW = false) const { + return Fold(ConstantExpr::getAdd(LHS, RHS, HasNUW, HasNSW)); + } + Constant *CreateFAdd(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getFAdd(LHS, RHS)); + } + Constant *CreateSub(Constant *LHS, Constant *RHS, + bool HasNUW = false, bool HasNSW = false) const { + return Fold(ConstantExpr::getSub(LHS, RHS, HasNUW, HasNSW)); + } + Constant *CreateFSub(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getFSub(LHS, RHS)); + } + Constant *CreateMul(Constant *LHS, Constant *RHS, + bool HasNUW = false, bool HasNSW = false) const { + return Fold(ConstantExpr::getMul(LHS, RHS, HasNUW, HasNSW)); + } + Constant *CreateFMul(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getFMul(LHS, RHS)); + } + Constant *CreateUDiv(Constant *LHS, Constant *RHS, bool isExact = false)const{ + return Fold(ConstantExpr::getUDiv(LHS, RHS, isExact)); + } + Constant *CreateSDiv(Constant *LHS, Constant *RHS, bool isExact = false)const{ + return Fold(ConstantExpr::getSDiv(LHS, RHS, isExact)); + } + Constant *CreateFDiv(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getFDiv(LHS, RHS)); + } + Constant *CreateURem(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getURem(LHS, RHS)); + } + Constant *CreateSRem(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getSRem(LHS, RHS)); + } + Constant *CreateFRem(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getFRem(LHS, RHS)); + } + Constant *CreateShl(Constant *LHS, Constant *RHS, + bool HasNUW = false, bool HasNSW = false) const { + return Fold(ConstantExpr::getShl(LHS, RHS, HasNUW, HasNSW)); + } + Constant *CreateLShr(Constant *LHS, Constant *RHS, bool isExact = false)const{ + return Fold(ConstantExpr::getLShr(LHS, RHS, isExact)); + } + Constant *CreateAShr(Constant *LHS, Constant *RHS, bool isExact = false)const{ + return Fold(ConstantExpr::getAShr(LHS, RHS, isExact)); + } + Constant *CreateAnd(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getAnd(LHS, RHS)); + } + Constant *CreateOr(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getOr(LHS, RHS)); + } + Constant *CreateXor(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getXor(LHS, RHS)); + } + + Constant *CreateBinOp(Instruction::BinaryOps Opc, + Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::get(Opc, LHS, RHS)); + } + + //===--------------------------------------------------------------------===// + // Unary Operators + //===--------------------------------------------------------------------===// + + Constant *CreateNeg(Constant *C, + bool HasNUW = false, bool HasNSW = false) const { + return Fold(ConstantExpr::getNeg(C, HasNUW, HasNSW)); + } + Constant *CreateFNeg(Constant *C) const { + return Fold(ConstantExpr::getFNeg(C)); + } + Constant *CreateNot(Constant *C) const { + return Fold(ConstantExpr::getNot(C)); + } + + //===--------------------------------------------------------------------===// + // Memory Instructions + //===--------------------------------------------------------------------===// + + Constant *CreateGetElementPtr(Type *Ty, Constant *C, + ArrayRef<Constant *> IdxList) const { + return Fold(ConstantExpr::getGetElementPtr(Ty, C, IdxList)); + } + Constant *CreateGetElementPtr(Type *Ty, Constant *C, Constant *Idx) const { + // This form of the function only exists to avoid ambiguous overload + // warnings about whether to convert Idx to ArrayRef<Constant *> or + // ArrayRef<Value *>. + return Fold(ConstantExpr::getGetElementPtr(Ty, C, Idx)); + } + Constant *CreateGetElementPtr(Type *Ty, Constant *C, + ArrayRef<Value *> IdxList) const { + return Fold(ConstantExpr::getGetElementPtr(Ty, C, IdxList)); + } + + Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C, + ArrayRef<Constant *> IdxList) const { + return Fold(ConstantExpr::getInBoundsGetElementPtr(Ty, C, IdxList)); + } + Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C, + Constant *Idx) const { + // This form of the function only exists to avoid ambiguous overload + // warnings about whether to convert Idx to ArrayRef<Constant *> or + // ArrayRef<Value *>. + return Fold(ConstantExpr::getInBoundsGetElementPtr(Ty, C, Idx)); + } + Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C, + ArrayRef<Value *> IdxList) const { + return Fold(ConstantExpr::getInBoundsGetElementPtr(Ty, C, IdxList)); + } + + //===--------------------------------------------------------------------===// + // Cast/Conversion Operators + //===--------------------------------------------------------------------===// + + Constant *CreateCast(Instruction::CastOps Op, Constant *C, + Type *DestTy) const { + if (C->getType() == DestTy) + return C; // avoid calling Fold + return Fold(ConstantExpr::getCast(Op, C, DestTy)); + } + Constant *CreateIntCast(Constant *C, Type *DestTy, + bool isSigned) const { + if (C->getType() == DestTy) + return C; // avoid calling Fold + return Fold(ConstantExpr::getIntegerCast(C, DestTy, isSigned)); + } + Constant *CreatePointerCast(Constant *C, Type *DestTy) const { + if (C->getType() == DestTy) + return C; // avoid calling Fold + return Fold(ConstantExpr::getPointerCast(C, DestTy)); + } + Constant *CreateFPCast(Constant *C, Type *DestTy) const { + if (C->getType() == DestTy) + return C; // avoid calling Fold + return Fold(ConstantExpr::getFPCast(C, DestTy)); + } + Constant *CreateBitCast(Constant *C, Type *DestTy) const { + return CreateCast(Instruction::BitCast, C, DestTy); + } + Constant *CreateIntToPtr(Constant *C, Type *DestTy) const { + return CreateCast(Instruction::IntToPtr, C, DestTy); + } + Constant *CreatePtrToInt(Constant *C, Type *DestTy) const { + return CreateCast(Instruction::PtrToInt, C, DestTy); + } + Constant *CreateZExtOrBitCast(Constant *C, Type *DestTy) const { + if (C->getType() == DestTy) + return C; // avoid calling Fold + return Fold(ConstantExpr::getZExtOrBitCast(C, DestTy)); + } + Constant *CreateSExtOrBitCast(Constant *C, Type *DestTy) const { + if (C->getType() == DestTy) + return C; // avoid calling Fold + return Fold(ConstantExpr::getSExtOrBitCast(C, DestTy)); + } + Constant *CreateTruncOrBitCast(Constant *C, Type *DestTy) const { + if (C->getType() == DestTy) + return C; // avoid calling Fold + return Fold(ConstantExpr::getTruncOrBitCast(C, DestTy)); + } + + Constant *CreatePointerBitCastOrAddrSpaceCast(Constant *C, + Type *DestTy) const { + if (C->getType() == DestTy) + return C; // avoid calling Fold + return Fold(ConstantExpr::getPointerBitCastOrAddrSpaceCast(C, DestTy)); + } + + //===--------------------------------------------------------------------===// + // Compare Instructions + //===--------------------------------------------------------------------===// + + Constant *CreateICmp(CmpInst::Predicate P, Constant *LHS, + Constant *RHS) const { + return Fold(ConstantExpr::getCompare(P, LHS, RHS)); + } + Constant *CreateFCmp(CmpInst::Predicate P, Constant *LHS, + Constant *RHS) const { + return Fold(ConstantExpr::getCompare(P, LHS, RHS)); + } + + //===--------------------------------------------------------------------===// + // Other Instructions + //===--------------------------------------------------------------------===// + + Constant *CreateSelect(Constant *C, Constant *True, Constant *False) const { + return Fold(ConstantExpr::getSelect(C, True, False)); + } + + Constant *CreateExtractElement(Constant *Vec, Constant *Idx) const { + return Fold(ConstantExpr::getExtractElement(Vec, Idx)); + } + + Constant *CreateInsertElement(Constant *Vec, Constant *NewElt, + Constant *Idx) const { + return Fold(ConstantExpr::getInsertElement(Vec, NewElt, Idx)); + } + + Constant *CreateShuffleVector(Constant *V1, Constant *V2, + Constant *Mask) const { + return Fold(ConstantExpr::getShuffleVector(V1, V2, Mask)); + } + + Constant *CreateExtractValue(Constant *Agg, + ArrayRef<unsigned> IdxList) const { + return Fold(ConstantExpr::getExtractValue(Agg, IdxList)); + } + + Constant *CreateInsertValue(Constant *Agg, Constant *Val, + ArrayRef<unsigned> IdxList) const { + return Fold(ConstantExpr::getInsertValue(Agg, Val, IdxList)); + } +}; + +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/TargetLibraryInfo.def b/clang-r353983e/include/llvm/Analysis/TargetLibraryInfo.def new file mode 100644 index 00000000..f73c69e5 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/TargetLibraryInfo.def @@ -0,0 +1,1353 @@ +//===-- TargetLibraryInfo.def - Library information -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This .def file will either fill in the enum definition or fill in the +// string representation array definition for TargetLibraryInfo. +// Which is defined depends on whether TLI_DEFINE_ENUM is defined or +// TLI_DEFINE_STRING is defined. Only one should be defined at a time. + +#if !(defined(TLI_DEFINE_ENUM) || defined(TLI_DEFINE_STRING)) +#error "Must define TLI_DEFINE_ENUM or TLI_DEFINE_STRING for TLI .def." +#elif defined(TLI_DEFINE_ENUM) && defined(TLI_DEFINE_STRING) +#error "Can only define one of TLI_DEFINE_ENUM or TLI_DEFINE_STRING at a time." +#else +// One of TLI_DEFINE_ENUM/STRING are defined. + +#if defined(TLI_DEFINE_ENUM) +#define TLI_DEFINE_ENUM_INTERNAL(enum_variant) LibFunc_##enum_variant, +#define TLI_DEFINE_STRING_INTERNAL(string_repr) +#else +#define TLI_DEFINE_ENUM_INTERNAL(enum_variant) +#define TLI_DEFINE_STRING_INTERNAL(string_repr) string_repr, +#endif + +/// void *new(unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_int) +TLI_DEFINE_STRING_INTERNAL("??2@YAPAXI@Z") + +/// void *new(unsigned int, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_int_nothrow) +TLI_DEFINE_STRING_INTERNAL("??2@YAPAXIABUnothrow_t@std@@@Z") + +/// void *new(unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong) +TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_K@Z") + +/// void *new(unsigned long long, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong_nothrow) +TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_KAEBUnothrow_t@std@@@Z") + +/// void operator delete(void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAX@Z") + +/// void operator delete(void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_nothrow) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXABUnothrow_t@std@@@Z") + +/// void operator delete(void*, unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_int) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXI@Z") + +/// void operator delete(void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX@Z") + +/// void operator delete(void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_nothrow) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAXAEBUnothrow_t@std@@@Z") + +/// void operator delete(void*, unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_longlong) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX_K@Z") + +/// void *new[](unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXI@Z") + +/// void *new[](unsigned int, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXIABUnothrow_t@std@@@Z") + +/// void *new[](unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_K@Z") + +/// void *new[](unsigned long long, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z") + +/// void operator delete[](void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAX@Z") + +/// void operator delete[](void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXABUnothrow_t@std@@@Z") + +/// void operator delete[](void*, unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_int) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXI@Z") + +/// void operator delete[](void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX@Z") + +/// void operator delete[](void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAXAEBUnothrow_t@std@@@Z") + +/// void operator delete[](void*, unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_longlong) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX_K@Z") + +/// int _IO_getc(_IO_FILE * __fp); +TLI_DEFINE_ENUM_INTERNAL(under_IO_getc) +TLI_DEFINE_STRING_INTERNAL("_IO_getc") +/// int _IO_putc(int __c, _IO_FILE * __fp); +TLI_DEFINE_ENUM_INTERNAL(under_IO_putc) +TLI_DEFINE_STRING_INTERNAL("_IO_putc") +/// void operator delete[](void*); +TLI_DEFINE_ENUM_INTERNAL(ZdaPv) +TLI_DEFINE_STRING_INTERNAL("_ZdaPv") +/// void operator delete[](void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(ZdaPvRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZdaPvRKSt9nothrow_t") +/// void operator delete[](void*, align_val_t); +TLI_DEFINE_ENUM_INTERNAL(ZdaPvSt11align_val_t) +TLI_DEFINE_STRING_INTERNAL("_ZdaPvSt11align_val_t") +/// void operator delete[](void*, align_val_t, nothrow) +TLI_DEFINE_ENUM_INTERNAL(ZdaPvSt11align_val_tRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZdaPvSt11align_val_tRKSt9nothrow_t") +/// void operator delete[](void*, unsigned int); +TLI_DEFINE_ENUM_INTERNAL(ZdaPvj) +TLI_DEFINE_STRING_INTERNAL("_ZdaPvj") +/// void operator delete[](void*, unsigned long); +TLI_DEFINE_ENUM_INTERNAL(ZdaPvm) +TLI_DEFINE_STRING_INTERNAL("_ZdaPvm") +/// void operator delete(void*); +TLI_DEFINE_ENUM_INTERNAL(ZdlPv) +TLI_DEFINE_STRING_INTERNAL("_ZdlPv") +/// void operator delete(void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(ZdlPvRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZdlPvRKSt9nothrow_t") +/// void operator delete(void*, align_val_t) +TLI_DEFINE_ENUM_INTERNAL(ZdlPvSt11align_val_t) +TLI_DEFINE_STRING_INTERNAL("_ZdlPvSt11align_val_t") +/// void operator delete(void*, align_val_t, nothrow) +TLI_DEFINE_ENUM_INTERNAL(ZdlPvSt11align_val_tRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZdlPvSt11align_val_tRKSt9nothrow_t") +/// void operator delete(void*, unsigned int); +TLI_DEFINE_ENUM_INTERNAL(ZdlPvj) +TLI_DEFINE_STRING_INTERNAL("_ZdlPvj") +/// void operator delete(void*, unsigned long); +TLI_DEFINE_ENUM_INTERNAL(ZdlPvm) +TLI_DEFINE_STRING_INTERNAL("_ZdlPvm") +/// void *new[](unsigned int); +TLI_DEFINE_ENUM_INTERNAL(Znaj) +TLI_DEFINE_STRING_INTERNAL("_Znaj") +/// void *new[](unsigned int, nothrow); +TLI_DEFINE_ENUM_INTERNAL(ZnajRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZnajRKSt9nothrow_t") +/// void *new[](unsigned int, align_val_t) +TLI_DEFINE_ENUM_INTERNAL(ZnajSt11align_val_t) +TLI_DEFINE_STRING_INTERNAL("_ZnajSt11align_val_t") +/// void *new[](unsigned int, align_val_t, nothrow) +TLI_DEFINE_ENUM_INTERNAL(ZnajSt11align_val_tRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZnajSt11align_val_tRKSt9nothrow_t") +/// void *new[](unsigned long); +TLI_DEFINE_ENUM_INTERNAL(Znam) +TLI_DEFINE_STRING_INTERNAL("_Znam") +/// void *new[](unsigned long, nothrow); +TLI_DEFINE_ENUM_INTERNAL(ZnamRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZnamRKSt9nothrow_t") +/// void *new[](unsigned long, align_val_t) +TLI_DEFINE_ENUM_INTERNAL(ZnamSt11align_val_t) +TLI_DEFINE_STRING_INTERNAL("_ZnamSt11align_val_t") +/// void *new[](unsigned long, align_val_t, nothrow) +TLI_DEFINE_ENUM_INTERNAL(ZnamSt11align_val_tRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZnamSt11align_val_tRKSt9nothrow_t") +/// void *new(unsigned int); +TLI_DEFINE_ENUM_INTERNAL(Znwj) +TLI_DEFINE_STRING_INTERNAL("_Znwj") +/// void *new(unsigned int, nothrow); +TLI_DEFINE_ENUM_INTERNAL(ZnwjRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZnwjRKSt9nothrow_t") +/// void *new(unsigned int, align_val_t) +TLI_DEFINE_ENUM_INTERNAL(ZnwjSt11align_val_t) +TLI_DEFINE_STRING_INTERNAL("_ZnwjSt11align_val_t") +/// void *new(unsigned int, align_val_t, nothrow) +TLI_DEFINE_ENUM_INTERNAL(ZnwjSt11align_val_tRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZnwjSt11align_val_tRKSt9nothrow_t") +/// void *new(unsigned long); +TLI_DEFINE_ENUM_INTERNAL(Znwm) +TLI_DEFINE_STRING_INTERNAL("_Znwm") +/// void *new(unsigned long, nothrow); +TLI_DEFINE_ENUM_INTERNAL(ZnwmRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZnwmRKSt9nothrow_t") +/// void *new(unsigned long, align_val_t) +TLI_DEFINE_ENUM_INTERNAL(ZnwmSt11align_val_t) +TLI_DEFINE_STRING_INTERNAL("_ZnwmSt11align_val_t") +/// void *new(unsigned long, align_val_t, nothrow) +TLI_DEFINE_ENUM_INTERNAL(ZnwmSt11align_val_tRKSt9nothrow_t) +TLI_DEFINE_STRING_INTERNAL("_ZnwmSt11align_val_tRKSt9nothrow_t") +/// double __acos_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(acos_finite) +TLI_DEFINE_STRING_INTERNAL("__acos_finite") +/// float __acosf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(acosf_finite) +TLI_DEFINE_STRING_INTERNAL("__acosf_finite") +/// double __acosh_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(acosh_finite) +TLI_DEFINE_STRING_INTERNAL("__acosh_finite") +/// float __acoshf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(acoshf_finite) +TLI_DEFINE_STRING_INTERNAL("__acoshf_finite") +/// long double __acoshl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(acoshl_finite) +TLI_DEFINE_STRING_INTERNAL("__acoshl_finite") +/// long double __acosl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(acosl_finite) +TLI_DEFINE_STRING_INTERNAL("__acosl_finite") +/// double __asin_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(asin_finite) +TLI_DEFINE_STRING_INTERNAL("__asin_finite") +/// float __asinf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(asinf_finite) +TLI_DEFINE_STRING_INTERNAL("__asinf_finite") +/// long double __asinl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(asinl_finite) +TLI_DEFINE_STRING_INTERNAL("__asinl_finite") +/// double atan2_finite(double y, double x); +TLI_DEFINE_ENUM_INTERNAL(atan2_finite) +TLI_DEFINE_STRING_INTERNAL("__atan2_finite") +/// float atan2f_finite(float y, float x); +TLI_DEFINE_ENUM_INTERNAL(atan2f_finite) +TLI_DEFINE_STRING_INTERNAL("__atan2f_finite") +/// long double atan2l_finite(long double y, long double x); +TLI_DEFINE_ENUM_INTERNAL(atan2l_finite) +TLI_DEFINE_STRING_INTERNAL("__atan2l_finite") +/// double __atanh_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(atanh_finite) +TLI_DEFINE_STRING_INTERNAL("__atanh_finite") +/// float __atanhf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(atanhf_finite) +TLI_DEFINE_STRING_INTERNAL("__atanhf_finite") +/// long double __atanhl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(atanhl_finite) +TLI_DEFINE_STRING_INTERNAL("__atanhl_finite") +/// double __cosh_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(cosh_finite) +TLI_DEFINE_STRING_INTERNAL("__cosh_finite") +/// float __coshf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(coshf_finite) +TLI_DEFINE_STRING_INTERNAL("__coshf_finite") +/// long double __coshl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(coshl_finite) +TLI_DEFINE_STRING_INTERNAL("__coshl_finite") +/// double __cospi(double x); +TLI_DEFINE_ENUM_INTERNAL(cospi) +TLI_DEFINE_STRING_INTERNAL("__cospi") +/// float __cospif(float x); +TLI_DEFINE_ENUM_INTERNAL(cospif) +TLI_DEFINE_STRING_INTERNAL("__cospif") +/// int __cxa_atexit(void (*f)(void *), void *p, void *d); +TLI_DEFINE_ENUM_INTERNAL(cxa_atexit) +TLI_DEFINE_STRING_INTERNAL("__cxa_atexit") +/// void __cxa_guard_abort(guard_t *guard); +/// guard_t is int64_t in Itanium ABI or int32_t on ARM eabi. +TLI_DEFINE_ENUM_INTERNAL(cxa_guard_abort) +TLI_DEFINE_STRING_INTERNAL("__cxa_guard_abort") +/// int __cxa_guard_acquire(guard_t *guard); +TLI_DEFINE_ENUM_INTERNAL(cxa_guard_acquire) +TLI_DEFINE_STRING_INTERNAL("__cxa_guard_acquire") +/// void __cxa_guard_release(guard_t *guard); +TLI_DEFINE_ENUM_INTERNAL(cxa_guard_release) +TLI_DEFINE_STRING_INTERNAL("__cxa_guard_release") +/// double __exp10_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(exp10_finite) +TLI_DEFINE_STRING_INTERNAL("__exp10_finite") +/// float __exp10f_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(exp10f_finite) +TLI_DEFINE_STRING_INTERNAL("__exp10f_finite") +/// long double __exp10l_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(exp10l_finite) +TLI_DEFINE_STRING_INTERNAL("__exp10l_finite") +/// double __exp2_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(exp2_finite) +TLI_DEFINE_STRING_INTERNAL("__exp2_finite") +/// float __exp2f_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(exp2f_finite) +TLI_DEFINE_STRING_INTERNAL("__exp2f_finite") +/// long double __exp2l_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(exp2l_finite) +TLI_DEFINE_STRING_INTERNAL("__exp2l_finite") +/// double __exp_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(exp_finite) +TLI_DEFINE_STRING_INTERNAL("__exp_finite") +/// float __expf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(expf_finite) +TLI_DEFINE_STRING_INTERNAL("__expf_finite") +/// long double __expl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(expl_finite) +TLI_DEFINE_STRING_INTERNAL("__expl_finite") +/// int __isoc99_scanf (const char *format, ...) +TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_scanf) +TLI_DEFINE_STRING_INTERNAL("__isoc99_scanf") +/// int __isoc99_sscanf(const char *s, const char *format, ...) +TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_sscanf) +TLI_DEFINE_STRING_INTERNAL("__isoc99_sscanf") +/// double __log10_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(log10_finite) +TLI_DEFINE_STRING_INTERNAL("__log10_finite") +/// float __log10f_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(log10f_finite) +TLI_DEFINE_STRING_INTERNAL("__log10f_finite") +/// long double __log10l_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(log10l_finite) +TLI_DEFINE_STRING_INTERNAL("__log10l_finite") +/// double __log2_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(log2_finite) +TLI_DEFINE_STRING_INTERNAL("__log2_finite") +/// float __log2f_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(log2f_finite) +TLI_DEFINE_STRING_INTERNAL("__log2f_finite") +/// long double __log2l_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(log2l_finite) +TLI_DEFINE_STRING_INTERNAL("__log2l_finite") +/// double __log_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(log_finite) +TLI_DEFINE_STRING_INTERNAL("__log_finite") +/// float __logf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(logf_finite) +TLI_DEFINE_STRING_INTERNAL("__logf_finite") +/// long double __logl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(logl_finite) +TLI_DEFINE_STRING_INTERNAL("__logl_finite") +/// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size); +TLI_DEFINE_ENUM_INTERNAL(memcpy_chk) +TLI_DEFINE_STRING_INTERNAL("__memcpy_chk") +/// void *__memmove_chk(void *s1, const void *s2, size_t n, size_t s1size); +TLI_DEFINE_ENUM_INTERNAL(memmove_chk) +TLI_DEFINE_STRING_INTERNAL("__memmove_chk") +/// void *__memset_chk(void *s, char v, size_t n, size_t s1size); +TLI_DEFINE_ENUM_INTERNAL(memset_chk) +TLI_DEFINE_STRING_INTERNAL("__memset_chk") + +// int __nvvm_reflect(const char *) +TLI_DEFINE_ENUM_INTERNAL(nvvm_reflect) +TLI_DEFINE_STRING_INTERNAL("__nvvm_reflect") +/// double __pow_finite(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(pow_finite) +TLI_DEFINE_STRING_INTERNAL("__pow_finite") +/// float _powf_finite(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(powf_finite) +TLI_DEFINE_STRING_INTERNAL("__powf_finite") +/// long double __powl_finite(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(powl_finite) +TLI_DEFINE_STRING_INTERNAL("__powl_finite") +/// double __sincospi_stret(double x); +TLI_DEFINE_ENUM_INTERNAL(sincospi_stret) +TLI_DEFINE_STRING_INTERNAL("__sincospi_stret") +/// float __sincospif_stret(float x); +TLI_DEFINE_ENUM_INTERNAL(sincospif_stret) +TLI_DEFINE_STRING_INTERNAL("__sincospif_stret") +/// double __sinh_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(sinh_finite) +TLI_DEFINE_STRING_INTERNAL("__sinh_finite") +/// float _sinhf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(sinhf_finite) +TLI_DEFINE_STRING_INTERNAL("__sinhf_finite") +/// long double __sinhl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(sinhl_finite) +TLI_DEFINE_STRING_INTERNAL("__sinhl_finite") +/// double __sinpi(double x); +TLI_DEFINE_ENUM_INTERNAL(sinpi) +TLI_DEFINE_STRING_INTERNAL("__sinpi") +/// float __sinpif(float x); +TLI_DEFINE_ENUM_INTERNAL(sinpif) +TLI_DEFINE_STRING_INTERNAL("__sinpif") +/// double __sqrt_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(sqrt_finite) +TLI_DEFINE_STRING_INTERNAL("__sqrt_finite") +/// float __sqrt_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(sqrtf_finite) +TLI_DEFINE_STRING_INTERNAL("__sqrtf_finite") +/// long double __sqrt_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(sqrtl_finite) +TLI_DEFINE_STRING_INTERNAL("__sqrtl_finite") +/// char *__stpcpy_chk(char *s1, const char *s2, size_t s1size); +TLI_DEFINE_ENUM_INTERNAL(stpcpy_chk) +TLI_DEFINE_STRING_INTERNAL("__stpcpy_chk") +/// char *__stpncpy_chk(char *s1, const char *s2, size_t n, size_t s1size); +TLI_DEFINE_ENUM_INTERNAL(stpncpy_chk) +TLI_DEFINE_STRING_INTERNAL("__stpncpy_chk") +/// char *__strcpy_chk(char *s1, const char *s2, size_t s1size); +TLI_DEFINE_ENUM_INTERNAL(strcpy_chk) +TLI_DEFINE_STRING_INTERNAL("__strcpy_chk") +/// char * __strdup(const char *s); +TLI_DEFINE_ENUM_INTERNAL(dunder_strdup) +TLI_DEFINE_STRING_INTERNAL("__strdup") +/// char *__strncpy_chk(char *s1, const char *s2, size_t n, size_t s1size); +TLI_DEFINE_ENUM_INTERNAL(strncpy_chk) +TLI_DEFINE_STRING_INTERNAL("__strncpy_chk") +/// char *__strndup(const char *s, size_t n); +TLI_DEFINE_ENUM_INTERNAL(dunder_strndup) +TLI_DEFINE_STRING_INTERNAL("__strndup") +/// char * __strtok_r(char *s, const char *delim, char **save_ptr); +TLI_DEFINE_ENUM_INTERNAL(dunder_strtok_r) +TLI_DEFINE_STRING_INTERNAL("__strtok_r") +/// int abs(int j); +TLI_DEFINE_ENUM_INTERNAL(abs) +TLI_DEFINE_STRING_INTERNAL("abs") +/// int access(const char *path, int amode); +TLI_DEFINE_ENUM_INTERNAL(access) +TLI_DEFINE_STRING_INTERNAL("access") +/// double acos(double x); +TLI_DEFINE_ENUM_INTERNAL(acos) +TLI_DEFINE_STRING_INTERNAL("acos") +/// float acosf(float x); +TLI_DEFINE_ENUM_INTERNAL(acosf) +TLI_DEFINE_STRING_INTERNAL("acosf") +/// double acosh(double x); +TLI_DEFINE_ENUM_INTERNAL(acosh) +TLI_DEFINE_STRING_INTERNAL("acosh") +/// float acoshf(float x); +TLI_DEFINE_ENUM_INTERNAL(acoshf) +TLI_DEFINE_STRING_INTERNAL("acoshf") +/// long double acoshl(long double x); +TLI_DEFINE_ENUM_INTERNAL(acoshl) +TLI_DEFINE_STRING_INTERNAL("acoshl") +/// long double acosl(long double x); +TLI_DEFINE_ENUM_INTERNAL(acosl) +TLI_DEFINE_STRING_INTERNAL("acosl") +/// double asin(double x); +TLI_DEFINE_ENUM_INTERNAL(asin) +TLI_DEFINE_STRING_INTERNAL("asin") +/// float asinf(float x); +TLI_DEFINE_ENUM_INTERNAL(asinf) +TLI_DEFINE_STRING_INTERNAL("asinf") +/// double asinh(double x); +TLI_DEFINE_ENUM_INTERNAL(asinh) +TLI_DEFINE_STRING_INTERNAL("asinh") +/// float asinhf(float x); +TLI_DEFINE_ENUM_INTERNAL(asinhf) +TLI_DEFINE_STRING_INTERNAL("asinhf") +/// long double asinhl(long double x); +TLI_DEFINE_ENUM_INTERNAL(asinhl) +TLI_DEFINE_STRING_INTERNAL("asinhl") +/// long double asinl(long double x); +TLI_DEFINE_ENUM_INTERNAL(asinl) +TLI_DEFINE_STRING_INTERNAL("asinl") +/// double atan(double x); +TLI_DEFINE_ENUM_INTERNAL(atan) +TLI_DEFINE_STRING_INTERNAL("atan") +/// double atan2(double y, double x); +TLI_DEFINE_ENUM_INTERNAL(atan2) +TLI_DEFINE_STRING_INTERNAL("atan2") +/// float atan2f(float y, float x); +TLI_DEFINE_ENUM_INTERNAL(atan2f) +TLI_DEFINE_STRING_INTERNAL("atan2f") +/// long double atan2l(long double y, long double x); +TLI_DEFINE_ENUM_INTERNAL(atan2l) +TLI_DEFINE_STRING_INTERNAL("atan2l") +/// float atanf(float x); +TLI_DEFINE_ENUM_INTERNAL(atanf) +TLI_DEFINE_STRING_INTERNAL("atanf") +/// double atanh(double x); +TLI_DEFINE_ENUM_INTERNAL(atanh) +TLI_DEFINE_STRING_INTERNAL("atanh") +/// float atanhf(float x); +TLI_DEFINE_ENUM_INTERNAL(atanhf) +TLI_DEFINE_STRING_INTERNAL("atanhf") +/// long double atanhl(long double x); +TLI_DEFINE_ENUM_INTERNAL(atanhl) +TLI_DEFINE_STRING_INTERNAL("atanhl") +/// long double atanl(long double x); +TLI_DEFINE_ENUM_INTERNAL(atanl) +TLI_DEFINE_STRING_INTERNAL("atanl") +/// double atof(const char *str); +TLI_DEFINE_ENUM_INTERNAL(atof) +TLI_DEFINE_STRING_INTERNAL("atof") +/// int atoi(const char *str); +TLI_DEFINE_ENUM_INTERNAL(atoi) +TLI_DEFINE_STRING_INTERNAL("atoi") +/// long atol(const char *str); +TLI_DEFINE_ENUM_INTERNAL(atol) +TLI_DEFINE_STRING_INTERNAL("atol") +/// long long atoll(const char *nptr); +TLI_DEFINE_ENUM_INTERNAL(atoll) +TLI_DEFINE_STRING_INTERNAL("atoll") +/// int bcmp(const void *s1, const void *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(bcmp) +TLI_DEFINE_STRING_INTERNAL("bcmp") +/// void bcopy(const void *s1, void *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(bcopy) +TLI_DEFINE_STRING_INTERNAL("bcopy") +/// void bzero(void *s, size_t n); +TLI_DEFINE_ENUM_INTERNAL(bzero) +TLI_DEFINE_STRING_INTERNAL("bzero") +/// double cabs(double complex z) +TLI_DEFINE_ENUM_INTERNAL(cabs) +TLI_DEFINE_STRING_INTERNAL("cabs") +/// float cabs(float complex z) +TLI_DEFINE_ENUM_INTERNAL(cabsf) +TLI_DEFINE_STRING_INTERNAL("cabsf") +/// long double cabs(long double complex z) +TLI_DEFINE_ENUM_INTERNAL(cabsl) +TLI_DEFINE_STRING_INTERNAL("cabsl") +/// void *calloc(size_t count, size_t size); +TLI_DEFINE_ENUM_INTERNAL(calloc) +TLI_DEFINE_STRING_INTERNAL("calloc") +/// double cbrt(double x); +TLI_DEFINE_ENUM_INTERNAL(cbrt) +TLI_DEFINE_STRING_INTERNAL("cbrt") +/// float cbrtf(float x); +TLI_DEFINE_ENUM_INTERNAL(cbrtf) +TLI_DEFINE_STRING_INTERNAL("cbrtf") +/// long double cbrtl(long double x); +TLI_DEFINE_ENUM_INTERNAL(cbrtl) +TLI_DEFINE_STRING_INTERNAL("cbrtl") +/// double ceil(double x); +TLI_DEFINE_ENUM_INTERNAL(ceil) +TLI_DEFINE_STRING_INTERNAL("ceil") +/// float ceilf(float x); +TLI_DEFINE_ENUM_INTERNAL(ceilf) +TLI_DEFINE_STRING_INTERNAL("ceilf") +/// long double ceill(long double x); +TLI_DEFINE_ENUM_INTERNAL(ceill) +TLI_DEFINE_STRING_INTERNAL("ceill") +/// int chmod(const char *path, mode_t mode); +TLI_DEFINE_ENUM_INTERNAL(chmod) +TLI_DEFINE_STRING_INTERNAL("chmod") +/// int chown(const char *path, uid_t owner, gid_t group); +TLI_DEFINE_ENUM_INTERNAL(chown) +TLI_DEFINE_STRING_INTERNAL("chown") +/// void clearerr(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(clearerr) +TLI_DEFINE_STRING_INTERNAL("clearerr") +/// int closedir(DIR *dirp); +TLI_DEFINE_ENUM_INTERNAL(closedir) +TLI_DEFINE_STRING_INTERNAL("closedir") +/// double copysign(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(copysign) +TLI_DEFINE_STRING_INTERNAL("copysign") +/// float copysignf(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(copysignf) +TLI_DEFINE_STRING_INTERNAL("copysignf") +/// long double copysignl(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(copysignl) +TLI_DEFINE_STRING_INTERNAL("copysignl") +/// double cos(double x); +TLI_DEFINE_ENUM_INTERNAL(cos) +TLI_DEFINE_STRING_INTERNAL("cos") +/// float cosf(float x); +TLI_DEFINE_ENUM_INTERNAL(cosf) +TLI_DEFINE_STRING_INTERNAL("cosf") +/// double cosh(double x); +TLI_DEFINE_ENUM_INTERNAL(cosh) +TLI_DEFINE_STRING_INTERNAL("cosh") +/// float coshf(float x); +TLI_DEFINE_ENUM_INTERNAL(coshf) +TLI_DEFINE_STRING_INTERNAL("coshf") +/// long double coshl(long double x); +TLI_DEFINE_ENUM_INTERNAL(coshl) +TLI_DEFINE_STRING_INTERNAL("coshl") +/// long double cosl(long double x); +TLI_DEFINE_ENUM_INTERNAL(cosl) +TLI_DEFINE_STRING_INTERNAL("cosl") +/// char *ctermid(char *s); +TLI_DEFINE_ENUM_INTERNAL(ctermid) +TLI_DEFINE_STRING_INTERNAL("ctermid") +/// int execl(const char *path, const char *arg, ...); +TLI_DEFINE_ENUM_INTERNAL(execl) +TLI_DEFINE_STRING_INTERNAL("execl") +/// int execle(const char *file, const char *arg, ..., char * const envp[]); +TLI_DEFINE_ENUM_INTERNAL(execle) +TLI_DEFINE_STRING_INTERNAL("execle") +/// int execlp(const char *file, const char *arg, ...); +TLI_DEFINE_ENUM_INTERNAL(execlp) +TLI_DEFINE_STRING_INTERNAL("execlp") +/// int execv(const char *path, char *const argv[]); +TLI_DEFINE_ENUM_INTERNAL(execv) +TLI_DEFINE_STRING_INTERNAL("execv") +/// int execvP(const char *file, const char *search_path, char *const argv[]); +TLI_DEFINE_ENUM_INTERNAL(execvP) +TLI_DEFINE_STRING_INTERNAL("execvP") +/// int execve(const char *filename, char *const argv[], char *const envp[]); +TLI_DEFINE_ENUM_INTERNAL(execve) +TLI_DEFINE_STRING_INTERNAL("execve") +/// int execvp(const char *file, char *const argv[]); +TLI_DEFINE_ENUM_INTERNAL(execvp) +TLI_DEFINE_STRING_INTERNAL("execvp") +/// int execvpe(const char *file, char *const argv[], char *const envp[]); +TLI_DEFINE_ENUM_INTERNAL(execvpe) +TLI_DEFINE_STRING_INTERNAL("execvpe") +/// double exp(double x); +TLI_DEFINE_ENUM_INTERNAL(exp) +TLI_DEFINE_STRING_INTERNAL("exp") +/// double exp10(double x); +TLI_DEFINE_ENUM_INTERNAL(exp10) +TLI_DEFINE_STRING_INTERNAL("exp10") +/// float exp10f(float x); +TLI_DEFINE_ENUM_INTERNAL(exp10f) +TLI_DEFINE_STRING_INTERNAL("exp10f") +/// long double exp10l(long double x); +TLI_DEFINE_ENUM_INTERNAL(exp10l) +TLI_DEFINE_STRING_INTERNAL("exp10l") +/// double exp2(double x); +TLI_DEFINE_ENUM_INTERNAL(exp2) +TLI_DEFINE_STRING_INTERNAL("exp2") +/// float exp2f(float x); +TLI_DEFINE_ENUM_INTERNAL(exp2f) +TLI_DEFINE_STRING_INTERNAL("exp2f") +/// long double exp2l(long double x); +TLI_DEFINE_ENUM_INTERNAL(exp2l) +TLI_DEFINE_STRING_INTERNAL("exp2l") +/// float expf(float x); +TLI_DEFINE_ENUM_INTERNAL(expf) +TLI_DEFINE_STRING_INTERNAL("expf") +/// long double expl(long double x); +TLI_DEFINE_ENUM_INTERNAL(expl) +TLI_DEFINE_STRING_INTERNAL("expl") +/// double expm1(double x); +TLI_DEFINE_ENUM_INTERNAL(expm1) +TLI_DEFINE_STRING_INTERNAL("expm1") +/// float expm1f(float x); +TLI_DEFINE_ENUM_INTERNAL(expm1f) +TLI_DEFINE_STRING_INTERNAL("expm1f") +/// long double expm1l(long double x); +TLI_DEFINE_ENUM_INTERNAL(expm1l) +TLI_DEFINE_STRING_INTERNAL("expm1l") +/// double fabs(double x); +TLI_DEFINE_ENUM_INTERNAL(fabs) +TLI_DEFINE_STRING_INTERNAL("fabs") +/// float fabsf(float x); +TLI_DEFINE_ENUM_INTERNAL(fabsf) +TLI_DEFINE_STRING_INTERNAL("fabsf") +/// long double fabsl(long double x); +TLI_DEFINE_ENUM_INTERNAL(fabsl) +TLI_DEFINE_STRING_INTERNAL("fabsl") +/// int fclose(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fclose) +TLI_DEFINE_STRING_INTERNAL("fclose") +/// FILE *fdopen(int fildes, const char *mode); +TLI_DEFINE_ENUM_INTERNAL(fdopen) +TLI_DEFINE_STRING_INTERNAL("fdopen") +/// int feof(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(feof) +TLI_DEFINE_STRING_INTERNAL("feof") +/// int ferror(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(ferror) +TLI_DEFINE_STRING_INTERNAL("ferror") +/// int fflush(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fflush) +TLI_DEFINE_STRING_INTERNAL("fflush") +/// int ffs(int i); +TLI_DEFINE_ENUM_INTERNAL(ffs) +TLI_DEFINE_STRING_INTERNAL("ffs") +/// int ffsl(long int i); +TLI_DEFINE_ENUM_INTERNAL(ffsl) +TLI_DEFINE_STRING_INTERNAL("ffsl") +/// int ffsll(long long int i); +TLI_DEFINE_ENUM_INTERNAL(ffsll) +TLI_DEFINE_STRING_INTERNAL("ffsll") +/// int fgetc(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fgetc) +TLI_DEFINE_STRING_INTERNAL("fgetc") +/// int fgetc_unlocked(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fgetc_unlocked) +TLI_DEFINE_STRING_INTERNAL("fgetc_unlocked") +/// int fgetpos(FILE *stream, fpos_t *pos); +TLI_DEFINE_ENUM_INTERNAL(fgetpos) +TLI_DEFINE_STRING_INTERNAL("fgetpos") +/// char *fgets(char *s, int n, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fgets) +TLI_DEFINE_STRING_INTERNAL("fgets") +/// char *fgets_unlocked(char *s, int n, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fgets_unlocked) +TLI_DEFINE_STRING_INTERNAL("fgets_unlocked") +/// int fileno(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fileno) +TLI_DEFINE_STRING_INTERNAL("fileno") +/// int fiprintf(FILE *stream, const char *format, ...); +TLI_DEFINE_ENUM_INTERNAL(fiprintf) +TLI_DEFINE_STRING_INTERNAL("fiprintf") +/// void flockfile(FILE *file); +TLI_DEFINE_ENUM_INTERNAL(flockfile) +TLI_DEFINE_STRING_INTERNAL("flockfile") +/// double floor(double x); +TLI_DEFINE_ENUM_INTERNAL(floor) +TLI_DEFINE_STRING_INTERNAL("floor") +/// float floorf(float x); +TLI_DEFINE_ENUM_INTERNAL(floorf) +TLI_DEFINE_STRING_INTERNAL("floorf") +/// long double floorl(long double x); +TLI_DEFINE_ENUM_INTERNAL(floorl) +TLI_DEFINE_STRING_INTERNAL("floorl") +/// int fls(int i); +TLI_DEFINE_ENUM_INTERNAL(fls) +TLI_DEFINE_STRING_INTERNAL("fls") +/// int flsl(long int i); +TLI_DEFINE_ENUM_INTERNAL(flsl) +TLI_DEFINE_STRING_INTERNAL("flsl") +/// int flsll(long long int i); +TLI_DEFINE_ENUM_INTERNAL(flsll) +TLI_DEFINE_STRING_INTERNAL("flsll") +/// double fmax(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(fmax) +TLI_DEFINE_STRING_INTERNAL("fmax") +/// float fmaxf(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(fmaxf) +TLI_DEFINE_STRING_INTERNAL("fmaxf") +/// long double fmaxl(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(fmaxl) +TLI_DEFINE_STRING_INTERNAL("fmaxl") +/// double fmin(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(fmin) +TLI_DEFINE_STRING_INTERNAL("fmin") +/// float fminf(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(fminf) +TLI_DEFINE_STRING_INTERNAL("fminf") +/// long double fminl(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(fminl) +TLI_DEFINE_STRING_INTERNAL("fminl") +/// double fmod(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(fmod) +TLI_DEFINE_STRING_INTERNAL("fmod") +/// float fmodf(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(fmodf) +TLI_DEFINE_STRING_INTERNAL("fmodf") +/// long double fmodl(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(fmodl) +TLI_DEFINE_STRING_INTERNAL("fmodl") +/// FILE *fopen(const char *filename, const char *mode); +TLI_DEFINE_ENUM_INTERNAL(fopen) +TLI_DEFINE_STRING_INTERNAL("fopen") +/// FILE *fopen64(const char *filename, const char *opentype) +TLI_DEFINE_ENUM_INTERNAL(fopen64) +TLI_DEFINE_STRING_INTERNAL("fopen64") +/// int fork(); +TLI_DEFINE_ENUM_INTERNAL(fork) +TLI_DEFINE_STRING_INTERNAL("fork") +/// int fprintf(FILE *stream, const char *format, ...); +TLI_DEFINE_ENUM_INTERNAL(fprintf) +TLI_DEFINE_STRING_INTERNAL("fprintf") +/// int fputc(int c, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fputc) +TLI_DEFINE_STRING_INTERNAL("fputc") +/// int fputc_unlocked(int c, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fputc_unlocked) +TLI_DEFINE_STRING_INTERNAL("fputc_unlocked") +/// int fputs(const char *s, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fputs) +TLI_DEFINE_STRING_INTERNAL("fputs") +/// int fputs_unlocked(const char *s, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fputs_unlocked) +TLI_DEFINE_STRING_INTERNAL("fputs_unlocked") +/// size_t fread(void *ptr, size_t size, size_t nitems, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fread) +TLI_DEFINE_STRING_INTERNAL("fread") +/// size_t fread_unlocked(void *ptr, size_t size, size_t nitems, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fread_unlocked) +TLI_DEFINE_STRING_INTERNAL("fread_unlocked") +/// void free(void *ptr); +TLI_DEFINE_ENUM_INTERNAL(free) +TLI_DEFINE_STRING_INTERNAL("free") +/// double frexp(double num, int *exp); +TLI_DEFINE_ENUM_INTERNAL(frexp) +TLI_DEFINE_STRING_INTERNAL("frexp") +/// float frexpf(float num, int *exp); +TLI_DEFINE_ENUM_INTERNAL(frexpf) +TLI_DEFINE_STRING_INTERNAL("frexpf") +/// long double frexpl(long double num, int *exp); +TLI_DEFINE_ENUM_INTERNAL(frexpl) +TLI_DEFINE_STRING_INTERNAL("frexpl") +/// int fscanf(FILE *stream, const char *format, ... ); +TLI_DEFINE_ENUM_INTERNAL(fscanf) +TLI_DEFINE_STRING_INTERNAL("fscanf") +/// int fseek(FILE *stream, long offset, int whence); +TLI_DEFINE_ENUM_INTERNAL(fseek) +TLI_DEFINE_STRING_INTERNAL("fseek") +/// int fseeko(FILE *stream, off_t offset, int whence); +TLI_DEFINE_ENUM_INTERNAL(fseeko) +TLI_DEFINE_STRING_INTERNAL("fseeko") +/// int fseeko64(FILE *stream, off64_t offset, int whence) +TLI_DEFINE_ENUM_INTERNAL(fseeko64) +TLI_DEFINE_STRING_INTERNAL("fseeko64") +/// int fsetpos(FILE *stream, const fpos_t *pos); +TLI_DEFINE_ENUM_INTERNAL(fsetpos) +TLI_DEFINE_STRING_INTERNAL("fsetpos") +/// int fstat(int fildes, struct stat *buf); +TLI_DEFINE_ENUM_INTERNAL(fstat) +TLI_DEFINE_STRING_INTERNAL("fstat") +/// int fstat64(int filedes, struct stat64 *buf) +TLI_DEFINE_ENUM_INTERNAL(fstat64) +TLI_DEFINE_STRING_INTERNAL("fstat64") +/// int fstatvfs(int fildes, struct statvfs *buf); +TLI_DEFINE_ENUM_INTERNAL(fstatvfs) +TLI_DEFINE_STRING_INTERNAL("fstatvfs") +/// int fstatvfs64(int fildes, struct statvfs64 *buf); +TLI_DEFINE_ENUM_INTERNAL(fstatvfs64) +TLI_DEFINE_STRING_INTERNAL("fstatvfs64") +/// long ftell(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(ftell) +TLI_DEFINE_STRING_INTERNAL("ftell") +/// off_t ftello(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(ftello) +TLI_DEFINE_STRING_INTERNAL("ftello") +/// off64_t ftello64(FILE *stream) +TLI_DEFINE_ENUM_INTERNAL(ftello64) +TLI_DEFINE_STRING_INTERNAL("ftello64") +/// int ftrylockfile(FILE *file); +TLI_DEFINE_ENUM_INTERNAL(ftrylockfile) +TLI_DEFINE_STRING_INTERNAL("ftrylockfile") +/// void funlockfile(FILE *file); +TLI_DEFINE_ENUM_INTERNAL(funlockfile) +TLI_DEFINE_STRING_INTERNAL("funlockfile") +/// size_t fwrite(const void *ptr, size_t size, size_t nitems, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fwrite) +TLI_DEFINE_STRING_INTERNAL("fwrite") +/// size_t fwrite_unlocked(const void *ptr, size_t size, size_t nitems, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(fwrite_unlocked) +TLI_DEFINE_STRING_INTERNAL("fwrite_unlocked") +/// int getc(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(getc) +TLI_DEFINE_STRING_INTERNAL("getc") +/// int getc_unlocked(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(getc_unlocked) +TLI_DEFINE_STRING_INTERNAL("getc_unlocked") +/// int getchar(void); +TLI_DEFINE_ENUM_INTERNAL(getchar) +TLI_DEFINE_STRING_INTERNAL("getchar") +/// int getchar_unlocked(void); +TLI_DEFINE_ENUM_INTERNAL(getchar_unlocked) +TLI_DEFINE_STRING_INTERNAL("getchar_unlocked") +/// char *getenv(const char *name); +TLI_DEFINE_ENUM_INTERNAL(getenv) +TLI_DEFINE_STRING_INTERNAL("getenv") +/// int getitimer(int which, struct itimerval *value); +TLI_DEFINE_ENUM_INTERNAL(getitimer) +TLI_DEFINE_STRING_INTERNAL("getitimer") +/// int getlogin_r(char *name, size_t namesize); +TLI_DEFINE_ENUM_INTERNAL(getlogin_r) +TLI_DEFINE_STRING_INTERNAL("getlogin_r") +/// struct passwd *getpwnam(const char *name); +TLI_DEFINE_ENUM_INTERNAL(getpwnam) +TLI_DEFINE_STRING_INTERNAL("getpwnam") +/// char *gets(char *s); +TLI_DEFINE_ENUM_INTERNAL(gets) +TLI_DEFINE_STRING_INTERNAL("gets") +/// int gettimeofday(struct timeval *tp, void *tzp); +TLI_DEFINE_ENUM_INTERNAL(gettimeofday) +TLI_DEFINE_STRING_INTERNAL("gettimeofday") +/// uint32_t htonl(uint32_t hostlong); +TLI_DEFINE_ENUM_INTERNAL(htonl) +TLI_DEFINE_STRING_INTERNAL("htonl") +/// uint16_t htons(uint16_t hostshort); +TLI_DEFINE_ENUM_INTERNAL(htons) +TLI_DEFINE_STRING_INTERNAL("htons") +/// int iprintf(const char *format, ...); +TLI_DEFINE_ENUM_INTERNAL(iprintf) +TLI_DEFINE_STRING_INTERNAL("iprintf") +/// int isascii(int c); +TLI_DEFINE_ENUM_INTERNAL(isascii) +TLI_DEFINE_STRING_INTERNAL("isascii") +/// int isdigit(int c); +TLI_DEFINE_ENUM_INTERNAL(isdigit) +TLI_DEFINE_STRING_INTERNAL("isdigit") +/// long int labs(long int j); +TLI_DEFINE_ENUM_INTERNAL(labs) +TLI_DEFINE_STRING_INTERNAL("labs") +/// int lchown(const char *path, uid_t owner, gid_t group); +TLI_DEFINE_ENUM_INTERNAL(lchown) +TLI_DEFINE_STRING_INTERNAL("lchown") +/// double ldexp(double x, int n); +TLI_DEFINE_ENUM_INTERNAL(ldexp) +TLI_DEFINE_STRING_INTERNAL("ldexp") +/// float ldexpf(float x, int n); +TLI_DEFINE_ENUM_INTERNAL(ldexpf) +TLI_DEFINE_STRING_INTERNAL("ldexpf") +/// long double ldexpl(long double x, int n); +TLI_DEFINE_ENUM_INTERNAL(ldexpl) +TLI_DEFINE_STRING_INTERNAL("ldexpl") +/// long long int llabs(long long int j); +TLI_DEFINE_ENUM_INTERNAL(llabs) +TLI_DEFINE_STRING_INTERNAL("llabs") +/// double log(double x); +TLI_DEFINE_ENUM_INTERNAL(log) +TLI_DEFINE_STRING_INTERNAL("log") +/// double log10(double x); +TLI_DEFINE_ENUM_INTERNAL(log10) +TLI_DEFINE_STRING_INTERNAL("log10") +/// float log10f(float x); +TLI_DEFINE_ENUM_INTERNAL(log10f) +TLI_DEFINE_STRING_INTERNAL("log10f") +/// long double log10l(long double x); +TLI_DEFINE_ENUM_INTERNAL(log10l) +TLI_DEFINE_STRING_INTERNAL("log10l") +/// double log1p(double x); +TLI_DEFINE_ENUM_INTERNAL(log1p) +TLI_DEFINE_STRING_INTERNAL("log1p") +/// float log1pf(float x); +TLI_DEFINE_ENUM_INTERNAL(log1pf) +TLI_DEFINE_STRING_INTERNAL("log1pf") +/// long double log1pl(long double x); +TLI_DEFINE_ENUM_INTERNAL(log1pl) +TLI_DEFINE_STRING_INTERNAL("log1pl") +/// double log2(double x); +TLI_DEFINE_ENUM_INTERNAL(log2) +TLI_DEFINE_STRING_INTERNAL("log2") +/// float log2f(float x); +TLI_DEFINE_ENUM_INTERNAL(log2f) +TLI_DEFINE_STRING_INTERNAL("log2f") +/// double long double log2l(long double x); +TLI_DEFINE_ENUM_INTERNAL(log2l) +TLI_DEFINE_STRING_INTERNAL("log2l") +/// double logb(double x); +TLI_DEFINE_ENUM_INTERNAL(logb) +TLI_DEFINE_STRING_INTERNAL("logb") +/// float logbf(float x); +TLI_DEFINE_ENUM_INTERNAL(logbf) +TLI_DEFINE_STRING_INTERNAL("logbf") +/// long double logbl(long double x); +TLI_DEFINE_ENUM_INTERNAL(logbl) +TLI_DEFINE_STRING_INTERNAL("logbl") +/// float logf(float x); +TLI_DEFINE_ENUM_INTERNAL(logf) +TLI_DEFINE_STRING_INTERNAL("logf") +/// long double logl(long double x); +TLI_DEFINE_ENUM_INTERNAL(logl) +TLI_DEFINE_STRING_INTERNAL("logl") +/// int lstat(const char *path, struct stat *buf); +TLI_DEFINE_ENUM_INTERNAL(lstat) +TLI_DEFINE_STRING_INTERNAL("lstat") +/// int lstat64(const char *path, struct stat64 *buf); +TLI_DEFINE_ENUM_INTERNAL(lstat64) +TLI_DEFINE_STRING_INTERNAL("lstat64") +/// void *malloc(size_t size); +TLI_DEFINE_ENUM_INTERNAL(malloc) +TLI_DEFINE_STRING_INTERNAL("malloc") +/// void *memalign(size_t boundary, size_t size); +TLI_DEFINE_ENUM_INTERNAL(memalign) +TLI_DEFINE_STRING_INTERNAL("memalign") +/// void *memccpy(void *s1, const void *s2, int c, size_t n); +TLI_DEFINE_ENUM_INTERNAL(memccpy) +TLI_DEFINE_STRING_INTERNAL("memccpy") +/// void *memchr(const void *s, int c, size_t n); +TLI_DEFINE_ENUM_INTERNAL(memchr) +TLI_DEFINE_STRING_INTERNAL("memchr") +/// int memcmp(const void *s1, const void *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(memcmp) +TLI_DEFINE_STRING_INTERNAL("memcmp") +/// void *memcpy(void *s1, const void *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(memcpy) +TLI_DEFINE_STRING_INTERNAL("memcpy") +/// void *memmove(void *s1, const void *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(memmove) +TLI_DEFINE_STRING_INTERNAL("memmove") +/// void *mempcpy(void *s1, const void *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(mempcpy) +TLI_DEFINE_STRING_INTERNAL("mempcpy") +// void *memrchr(const void *s, int c, size_t n); +TLI_DEFINE_ENUM_INTERNAL(memrchr) +TLI_DEFINE_STRING_INTERNAL("memrchr") +/// void *memset(void *b, int c, size_t len); +TLI_DEFINE_ENUM_INTERNAL(memset) +TLI_DEFINE_STRING_INTERNAL("memset") +/// void memset_pattern16(void *b, const void *pattern16, size_t len); +TLI_DEFINE_ENUM_INTERNAL(memset_pattern16) +TLI_DEFINE_STRING_INTERNAL("memset_pattern16") +/// int mkdir(const char *path, mode_t mode); +TLI_DEFINE_ENUM_INTERNAL(mkdir) +TLI_DEFINE_STRING_INTERNAL("mkdir") +/// time_t mktime(struct tm *timeptr); +TLI_DEFINE_ENUM_INTERNAL(mktime) +TLI_DEFINE_STRING_INTERNAL("mktime") +/// double modf(double x, double *iptr); +TLI_DEFINE_ENUM_INTERNAL(modf) +TLI_DEFINE_STRING_INTERNAL("modf") +/// float modff(float, float *iptr); +TLI_DEFINE_ENUM_INTERNAL(modff) +TLI_DEFINE_STRING_INTERNAL("modff") +/// long double modfl(long double value, long double *iptr); +TLI_DEFINE_ENUM_INTERNAL(modfl) +TLI_DEFINE_STRING_INTERNAL("modfl") + +/// double nearbyint(double x); +TLI_DEFINE_ENUM_INTERNAL(nearbyint) +TLI_DEFINE_STRING_INTERNAL("nearbyint") +/// float nearbyintf(float x); +TLI_DEFINE_ENUM_INTERNAL(nearbyintf) +TLI_DEFINE_STRING_INTERNAL("nearbyintf") +/// long double nearbyintl(long double x); +TLI_DEFINE_ENUM_INTERNAL(nearbyintl) +TLI_DEFINE_STRING_INTERNAL("nearbyintl") +/// uint32_t ntohl(uint32_t netlong); +TLI_DEFINE_ENUM_INTERNAL(ntohl) +TLI_DEFINE_STRING_INTERNAL("ntohl") +/// uint16_t ntohs(uint16_t netshort); +TLI_DEFINE_ENUM_INTERNAL(ntohs) +TLI_DEFINE_STRING_INTERNAL("ntohs") +/// int open(const char *path, int oflag, ... ); +TLI_DEFINE_ENUM_INTERNAL(open) +TLI_DEFINE_STRING_INTERNAL("open") +/// int open64(const char *filename, int flags[, mode_t mode]) +TLI_DEFINE_ENUM_INTERNAL(open64) +TLI_DEFINE_STRING_INTERNAL("open64") +/// DIR *opendir(const char *dirname); +TLI_DEFINE_ENUM_INTERNAL(opendir) +TLI_DEFINE_STRING_INTERNAL("opendir") +/// int pclose(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(pclose) +TLI_DEFINE_STRING_INTERNAL("pclose") +/// void perror(const char *s); +TLI_DEFINE_ENUM_INTERNAL(perror) +TLI_DEFINE_STRING_INTERNAL("perror") +/// FILE *popen(const char *command, const char *mode); +TLI_DEFINE_ENUM_INTERNAL(popen) +TLI_DEFINE_STRING_INTERNAL("popen") +/// int posix_memalign(void **memptr, size_t alignment, size_t size); +TLI_DEFINE_ENUM_INTERNAL(posix_memalign) +TLI_DEFINE_STRING_INTERNAL("posix_memalign") +/// double pow(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(pow) +TLI_DEFINE_STRING_INTERNAL("pow") +/// float powf(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(powf) +TLI_DEFINE_STRING_INTERNAL("powf") +/// long double powl(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(powl) +TLI_DEFINE_STRING_INTERNAL("powl") +/// ssize_t pread(int fildes, void *buf, size_t nbyte, off_t offset); +TLI_DEFINE_ENUM_INTERNAL(pread) +TLI_DEFINE_STRING_INTERNAL("pread") +/// int printf(const char *format, ...); +TLI_DEFINE_ENUM_INTERNAL(printf) +TLI_DEFINE_STRING_INTERNAL("printf") +/// int putc(int c, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(putc) +TLI_DEFINE_STRING_INTERNAL("putc") +/// int putc_unlocked(int c, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(putc_unlocked) +TLI_DEFINE_STRING_INTERNAL("putc_unlocked") +/// int putchar(int c); +TLI_DEFINE_ENUM_INTERNAL(putchar) +TLI_DEFINE_STRING_INTERNAL("putchar") +/// int putchar_unlocked(int c); +TLI_DEFINE_ENUM_INTERNAL(putchar_unlocked) +TLI_DEFINE_STRING_INTERNAL("putchar_unlocked") +/// int puts(const char *s); +TLI_DEFINE_ENUM_INTERNAL(puts) +TLI_DEFINE_STRING_INTERNAL("puts") +/// ssize_t pwrite(int fildes, const void *buf, size_t nbyte, off_t offset); +TLI_DEFINE_ENUM_INTERNAL(pwrite) +TLI_DEFINE_STRING_INTERNAL("pwrite") +/// void qsort(void *base, size_t nel, size_t width, +/// int (*compar)(const void *, const void *)); +TLI_DEFINE_ENUM_INTERNAL(qsort) +TLI_DEFINE_STRING_INTERNAL("qsort") +/// ssize_t read(int fildes, void *buf, size_t nbyte); +TLI_DEFINE_ENUM_INTERNAL(read) +TLI_DEFINE_STRING_INTERNAL("read") +/// ssize_t readlink(const char *path, char *buf, size_t bufsize); +TLI_DEFINE_ENUM_INTERNAL(readlink) +TLI_DEFINE_STRING_INTERNAL("readlink") +/// void *realloc(void *ptr, size_t size); +TLI_DEFINE_ENUM_INTERNAL(realloc) +TLI_DEFINE_STRING_INTERNAL("realloc") +/// void *reallocf(void *ptr, size_t size); +TLI_DEFINE_ENUM_INTERNAL(reallocf) +TLI_DEFINE_STRING_INTERNAL("reallocf") +/// char *realpath(const char *file_name, char *resolved_name); +TLI_DEFINE_ENUM_INTERNAL(realpath) +TLI_DEFINE_STRING_INTERNAL("realpath") +/// int remove(const char *path); +TLI_DEFINE_ENUM_INTERNAL(remove) +TLI_DEFINE_STRING_INTERNAL("remove") +/// int rename(const char *old, const char *new); +TLI_DEFINE_ENUM_INTERNAL(rename) +TLI_DEFINE_STRING_INTERNAL("rename") +/// void rewind(FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(rewind) +TLI_DEFINE_STRING_INTERNAL("rewind") +/// double rint(double x); +TLI_DEFINE_ENUM_INTERNAL(rint) +TLI_DEFINE_STRING_INTERNAL("rint") +/// float rintf(float x); +TLI_DEFINE_ENUM_INTERNAL(rintf) +TLI_DEFINE_STRING_INTERNAL("rintf") +/// long double rintl(long double x); +TLI_DEFINE_ENUM_INTERNAL(rintl) +TLI_DEFINE_STRING_INTERNAL("rintl") +/// int rmdir(const char *path); +TLI_DEFINE_ENUM_INTERNAL(rmdir) +TLI_DEFINE_STRING_INTERNAL("rmdir") +/// double round(double x); +TLI_DEFINE_ENUM_INTERNAL(round) +TLI_DEFINE_STRING_INTERNAL("round") +/// float roundf(float x); +TLI_DEFINE_ENUM_INTERNAL(roundf) +TLI_DEFINE_STRING_INTERNAL("roundf") +/// long double roundl(long double x); +TLI_DEFINE_ENUM_INTERNAL(roundl) +TLI_DEFINE_STRING_INTERNAL("roundl") +/// int scanf(const char *restrict format, ... ); +TLI_DEFINE_ENUM_INTERNAL(scanf) +TLI_DEFINE_STRING_INTERNAL("scanf") +/// void setbuf(FILE *stream, char *buf); +TLI_DEFINE_ENUM_INTERNAL(setbuf) +TLI_DEFINE_STRING_INTERNAL("setbuf") +/// int setitimer(int which, const struct itimerval *value, +/// struct itimerval *ovalue); +TLI_DEFINE_ENUM_INTERNAL(setitimer) +TLI_DEFINE_STRING_INTERNAL("setitimer") +/// int setvbuf(FILE *stream, char *buf, int type, size_t size); +TLI_DEFINE_ENUM_INTERNAL(setvbuf) +TLI_DEFINE_STRING_INTERNAL("setvbuf") +/// double sin(double x); +TLI_DEFINE_ENUM_INTERNAL(sin) +TLI_DEFINE_STRING_INTERNAL("sin") +/// float sinf(float x); +TLI_DEFINE_ENUM_INTERNAL(sinf) +TLI_DEFINE_STRING_INTERNAL("sinf") +/// double sinh(double x); +TLI_DEFINE_ENUM_INTERNAL(sinh) +TLI_DEFINE_STRING_INTERNAL("sinh") +/// float sinhf(float x); +TLI_DEFINE_ENUM_INTERNAL(sinhf) +TLI_DEFINE_STRING_INTERNAL("sinhf") +/// long double sinhl(long double x); +TLI_DEFINE_ENUM_INTERNAL(sinhl) +TLI_DEFINE_STRING_INTERNAL("sinhl") +/// long double sinl(long double x); +TLI_DEFINE_ENUM_INTERNAL(sinl) +TLI_DEFINE_STRING_INTERNAL("sinl") +/// int siprintf(char *str, const char *format, ...); +TLI_DEFINE_ENUM_INTERNAL(siprintf) +TLI_DEFINE_STRING_INTERNAL("siprintf") +/// int snprintf(char *s, size_t n, const char *format, ...); +TLI_DEFINE_ENUM_INTERNAL(snprintf) +TLI_DEFINE_STRING_INTERNAL("snprintf") +/// int sprintf(char *str, const char *format, ...); +TLI_DEFINE_ENUM_INTERNAL(sprintf) +TLI_DEFINE_STRING_INTERNAL("sprintf") +/// double sqrt(double x); +TLI_DEFINE_ENUM_INTERNAL(sqrt) +TLI_DEFINE_STRING_INTERNAL("sqrt") +/// float sqrtf(float x); +TLI_DEFINE_ENUM_INTERNAL(sqrtf) +TLI_DEFINE_STRING_INTERNAL("sqrtf") +/// long double sqrtl(long double x); +TLI_DEFINE_ENUM_INTERNAL(sqrtl) +TLI_DEFINE_STRING_INTERNAL("sqrtl") +/// int sscanf(const char *s, const char *format, ... ); +TLI_DEFINE_ENUM_INTERNAL(sscanf) +TLI_DEFINE_STRING_INTERNAL("sscanf") +/// int stat(const char *path, struct stat *buf); +TLI_DEFINE_ENUM_INTERNAL(stat) +TLI_DEFINE_STRING_INTERNAL("stat") +/// int stat64(const char *path, struct stat64 *buf); +TLI_DEFINE_ENUM_INTERNAL(stat64) +TLI_DEFINE_STRING_INTERNAL("stat64") +/// int statvfs(const char *path, struct statvfs *buf); +TLI_DEFINE_ENUM_INTERNAL(statvfs) +TLI_DEFINE_STRING_INTERNAL("statvfs") +/// int statvfs64(const char *path, struct statvfs64 *buf) +TLI_DEFINE_ENUM_INTERNAL(statvfs64) +TLI_DEFINE_STRING_INTERNAL("statvfs64") +/// char *stpcpy(char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(stpcpy) +TLI_DEFINE_STRING_INTERNAL("stpcpy") +/// char *stpncpy(char *s1, const char *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(stpncpy) +TLI_DEFINE_STRING_INTERNAL("stpncpy") +/// int strcasecmp(const char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(strcasecmp) +TLI_DEFINE_STRING_INTERNAL("strcasecmp") +/// char *strcat(char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(strcat) +TLI_DEFINE_STRING_INTERNAL("strcat") +/// char *strchr(const char *s, int c); +TLI_DEFINE_ENUM_INTERNAL(strchr) +TLI_DEFINE_STRING_INTERNAL("strchr") +/// int strcmp(const char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(strcmp) +TLI_DEFINE_STRING_INTERNAL("strcmp") +/// int strcoll(const char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(strcoll) +TLI_DEFINE_STRING_INTERNAL("strcoll") +/// char *strcpy(char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(strcpy) +TLI_DEFINE_STRING_INTERNAL("strcpy") +/// size_t strcspn(const char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(strcspn) +TLI_DEFINE_STRING_INTERNAL("strcspn") +/// char *strdup(const char *s1); +TLI_DEFINE_ENUM_INTERNAL(strdup) +TLI_DEFINE_STRING_INTERNAL("strdup") +/// size_t strlen(const char *s); +TLI_DEFINE_ENUM_INTERNAL(strlen) +TLI_DEFINE_STRING_INTERNAL("strlen") +/// int strncasecmp(const char *s1, const char *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(strncasecmp) +TLI_DEFINE_STRING_INTERNAL("strncasecmp") +/// char *strncat(char *s1, const char *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(strncat) +TLI_DEFINE_STRING_INTERNAL("strncat") +/// int strncmp(const char *s1, const char *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(strncmp) +TLI_DEFINE_STRING_INTERNAL("strncmp") +/// char *strncpy(char *s1, const char *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(strncpy) +TLI_DEFINE_STRING_INTERNAL("strncpy") +/// char *strndup(const char *s1, size_t n); +TLI_DEFINE_ENUM_INTERNAL(strndup) +TLI_DEFINE_STRING_INTERNAL("strndup") +/// size_t strnlen(const char *s, size_t maxlen); +TLI_DEFINE_ENUM_INTERNAL(strnlen) +TLI_DEFINE_STRING_INTERNAL("strnlen") +/// char *strpbrk(const char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(strpbrk) +TLI_DEFINE_STRING_INTERNAL("strpbrk") +/// char *strrchr(const char *s, int c); +TLI_DEFINE_ENUM_INTERNAL(strrchr) +TLI_DEFINE_STRING_INTERNAL("strrchr") +/// size_t strspn(const char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(strspn) +TLI_DEFINE_STRING_INTERNAL("strspn") +/// char *strstr(const char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(strstr) +TLI_DEFINE_STRING_INTERNAL("strstr") +/// double strtod(const char *nptr, char **endptr); +TLI_DEFINE_ENUM_INTERNAL(strtod) +TLI_DEFINE_STRING_INTERNAL("strtod") +/// float strtof(const char *nptr, char **endptr); +TLI_DEFINE_ENUM_INTERNAL(strtof) +TLI_DEFINE_STRING_INTERNAL("strtof") +// char *strtok(char *s1, const char *s2); +TLI_DEFINE_ENUM_INTERNAL(strtok) +TLI_DEFINE_STRING_INTERNAL("strtok") +// char *strtok_r(char *s, const char *sep, char **lasts); +TLI_DEFINE_ENUM_INTERNAL(strtok_r) +TLI_DEFINE_STRING_INTERNAL("strtok_r") +/// long int strtol(const char *nptr, char **endptr, int base); +TLI_DEFINE_ENUM_INTERNAL(strtol) +TLI_DEFINE_STRING_INTERNAL("strtol") +/// long double strtold(const char *nptr, char **endptr); +TLI_DEFINE_ENUM_INTERNAL(strtold) +TLI_DEFINE_STRING_INTERNAL("strtold") +/// long long int strtoll(const char *nptr, char **endptr, int base); +TLI_DEFINE_ENUM_INTERNAL(strtoll) +TLI_DEFINE_STRING_INTERNAL("strtoll") +/// unsigned long int strtoul(const char *nptr, char **endptr, int base); +TLI_DEFINE_ENUM_INTERNAL(strtoul) +TLI_DEFINE_STRING_INTERNAL("strtoul") +/// unsigned long long int strtoull(const char *nptr, char **endptr, int base); +TLI_DEFINE_ENUM_INTERNAL(strtoull) +TLI_DEFINE_STRING_INTERNAL("strtoull") +/// size_t strxfrm(char *s1, const char *s2, size_t n); +TLI_DEFINE_ENUM_INTERNAL(strxfrm) +TLI_DEFINE_STRING_INTERNAL("strxfrm") +/// int system(const char *command); +TLI_DEFINE_ENUM_INTERNAL(system) +TLI_DEFINE_STRING_INTERNAL("system") +/// double tan(double x); +TLI_DEFINE_ENUM_INTERNAL(tan) +TLI_DEFINE_STRING_INTERNAL("tan") +/// float tanf(float x); +TLI_DEFINE_ENUM_INTERNAL(tanf) +TLI_DEFINE_STRING_INTERNAL("tanf") +/// double tanh(double x); +TLI_DEFINE_ENUM_INTERNAL(tanh) +TLI_DEFINE_STRING_INTERNAL("tanh") +/// float tanhf(float x); +TLI_DEFINE_ENUM_INTERNAL(tanhf) +TLI_DEFINE_STRING_INTERNAL("tanhf") +/// long double tanhl(long double x); +TLI_DEFINE_ENUM_INTERNAL(tanhl) +TLI_DEFINE_STRING_INTERNAL("tanhl") +/// long double tanl(long double x); +TLI_DEFINE_ENUM_INTERNAL(tanl) +TLI_DEFINE_STRING_INTERNAL("tanl") +/// clock_t times(struct tms *buffer); +TLI_DEFINE_ENUM_INTERNAL(times) +TLI_DEFINE_STRING_INTERNAL("times") +/// FILE *tmpfile(void); +TLI_DEFINE_ENUM_INTERNAL(tmpfile) +TLI_DEFINE_STRING_INTERNAL("tmpfile") +/// FILE *tmpfile64(void) +TLI_DEFINE_ENUM_INTERNAL(tmpfile64) +TLI_DEFINE_STRING_INTERNAL("tmpfile64") +/// int toascii(int c); +TLI_DEFINE_ENUM_INTERNAL(toascii) +TLI_DEFINE_STRING_INTERNAL("toascii") +/// double trunc(double x); +TLI_DEFINE_ENUM_INTERNAL(trunc) +TLI_DEFINE_STRING_INTERNAL("trunc") +/// float truncf(float x); +TLI_DEFINE_ENUM_INTERNAL(truncf) +TLI_DEFINE_STRING_INTERNAL("truncf") +/// long double truncl(long double x); +TLI_DEFINE_ENUM_INTERNAL(truncl) +TLI_DEFINE_STRING_INTERNAL("truncl") +/// int uname(struct utsname *name); +TLI_DEFINE_ENUM_INTERNAL(uname) +TLI_DEFINE_STRING_INTERNAL("uname") +/// int ungetc(int c, FILE *stream); +TLI_DEFINE_ENUM_INTERNAL(ungetc) +TLI_DEFINE_STRING_INTERNAL("ungetc") +/// int unlink(const char *path); +TLI_DEFINE_ENUM_INTERNAL(unlink) +TLI_DEFINE_STRING_INTERNAL("unlink") +/// int unsetenv(const char *name); +TLI_DEFINE_ENUM_INTERNAL(unsetenv) +TLI_DEFINE_STRING_INTERNAL("unsetenv") +/// int utime(const char *path, const struct utimbuf *times); +TLI_DEFINE_ENUM_INTERNAL(utime) +TLI_DEFINE_STRING_INTERNAL("utime") +/// int utimes(const char *path, const struct timeval times[2]); +TLI_DEFINE_ENUM_INTERNAL(utimes) +TLI_DEFINE_STRING_INTERNAL("utimes") +/// void *valloc(size_t size); +TLI_DEFINE_ENUM_INTERNAL(valloc) +TLI_DEFINE_STRING_INTERNAL("valloc") +/// int vfprintf(FILE *stream, const char *format, va_list ap); +TLI_DEFINE_ENUM_INTERNAL(vfprintf) +TLI_DEFINE_STRING_INTERNAL("vfprintf") +/// int vfscanf(FILE *stream, const char *format, va_list arg); +TLI_DEFINE_ENUM_INTERNAL(vfscanf) +TLI_DEFINE_STRING_INTERNAL("vfscanf") +/// int vprintf(const char *restrict format, va_list ap); +TLI_DEFINE_ENUM_INTERNAL(vprintf) +TLI_DEFINE_STRING_INTERNAL("vprintf") +/// int vscanf(const char *format, va_list arg); +TLI_DEFINE_ENUM_INTERNAL(vscanf) +TLI_DEFINE_STRING_INTERNAL("vscanf") +/// int vsnprintf(char *s, size_t n, const char *format, va_list ap); +TLI_DEFINE_ENUM_INTERNAL(vsnprintf) +TLI_DEFINE_STRING_INTERNAL("vsnprintf") +/// int vsprintf(char *s, const char *format, va_list ap); +TLI_DEFINE_ENUM_INTERNAL(vsprintf) +TLI_DEFINE_STRING_INTERNAL("vsprintf") +/// int vsscanf(const char *s, const char *format, va_list arg); +TLI_DEFINE_ENUM_INTERNAL(vsscanf) +TLI_DEFINE_STRING_INTERNAL("vsscanf") +/// size_t wcslen (const wchar_t* wcs); +TLI_DEFINE_ENUM_INTERNAL(wcslen) +TLI_DEFINE_STRING_INTERNAL("wcslen") +/// ssize_t write(int fildes, const void *buf, size_t nbyte); +TLI_DEFINE_ENUM_INTERNAL(write) +TLI_DEFINE_STRING_INTERNAL("write") + +#undef TLI_DEFINE_ENUM_INTERNAL +#undef TLI_DEFINE_STRING_INTERNAL +#endif // One of TLI_DEFINE_ENUM/STRING are defined. + +#undef TLI_DEFINE_ENUM +#undef TLI_DEFINE_STRING diff --git a/clang-r353983e/include/llvm/Analysis/TargetLibraryInfo.h b/clang-r353983e/include/llvm/Analysis/TargetLibraryInfo.h new file mode 100644 index 00000000..fc6811e8 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/TargetLibraryInfo.h @@ -0,0 +1,393 @@ +//===-- TargetLibraryInfo.h - Library information ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TARGETLIBRARYINFO_H +#define LLVM_ANALYSIS_TARGETLIBRARYINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { +template <typename T> class ArrayRef; + +/// Describes a possible vectorization of a function. +/// Function 'VectorFnName' is equivalent to 'ScalarFnName' vectorized +/// by a factor 'VectorizationFactor'. +struct VecDesc { + StringRef ScalarFnName; + StringRef VectorFnName; + unsigned VectorizationFactor; +}; + + enum LibFunc { +#define TLI_DEFINE_ENUM +#include "llvm/Analysis/TargetLibraryInfo.def" + + NumLibFuncs + }; + +/// Implementation of the target library information. +/// +/// This class constructs tables that hold the target library information and +/// make it available. However, it is somewhat expensive to compute and only +/// depends on the triple. So users typically interact with the \c +/// TargetLibraryInfo wrapper below. +class TargetLibraryInfoImpl { + friend class TargetLibraryInfo; + + unsigned char AvailableArray[(NumLibFuncs+3)/4]; + llvm::DenseMap<unsigned, std::string> CustomNames; + static StringRef const StandardNames[NumLibFuncs]; + bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param; + + enum AvailabilityState { + StandardName = 3, // (memset to all ones) + CustomName = 1, + Unavailable = 0 // (memset to all zeros) + }; + void setState(LibFunc F, AvailabilityState State) { + AvailableArray[F/4] &= ~(3 << 2*(F&3)); + AvailableArray[F/4] |= State << 2*(F&3); + } + AvailabilityState getState(LibFunc F) const { + return static_cast<AvailabilityState>((AvailableArray[F/4] >> 2*(F&3)) & 3); + } + + /// Vectorization descriptors - sorted by ScalarFnName. + std::vector<VecDesc> VectorDescs; + /// Scalarization descriptors - same content as VectorDescs but sorted based + /// on VectorFnName rather than ScalarFnName. + std::vector<VecDesc> ScalarDescs; + + /// Return true if the function type FTy is valid for the library function + /// F, regardless of whether the function is available. + bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F, + const DataLayout *DL) const; + +public: + /// List of known vector-functions libraries. + /// + /// The vector-functions library defines, which functions are vectorizable + /// and with which factor. The library can be specified by either frontend, + /// or a commandline option, and then used by + /// addVectorizableFunctionsFromVecLib for filling up the tables of + /// vectorizable functions. + enum VectorLibrary { + NoLibrary, // Don't use any vector library. + Accelerate, // Use Accelerate framework. + SVML // Intel short vector math library. + }; + + TargetLibraryInfoImpl(); + explicit TargetLibraryInfoImpl(const Triple &T); + + // Provide value semantics. + TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI); + TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI); + TargetLibraryInfoImpl &operator=(const TargetLibraryInfoImpl &TLI); + TargetLibraryInfoImpl &operator=(TargetLibraryInfoImpl &&TLI); + + /// Searches for a particular function name. + /// + /// If it is one of the known library functions, return true and set F to the + /// corresponding value. + bool getLibFunc(StringRef funcName, LibFunc &F) const; + + /// Searches for a particular function name, also checking that its type is + /// valid for the library function matching that name. + /// + /// If it is one of the known library functions, return true and set F to the + /// corresponding value. + bool getLibFunc(const Function &FDecl, LibFunc &F) const; + + /// Forces a function to be marked as unavailable. + void setUnavailable(LibFunc F) { + setState(F, Unavailable); + } + + /// Forces a function to be marked as available. + void setAvailable(LibFunc F) { + setState(F, StandardName); + } + + /// Forces a function to be marked as available and provide an alternate name + /// that must be used. + void setAvailableWithName(LibFunc F, StringRef Name) { + if (StandardNames[F] != Name) { + setState(F, CustomName); + CustomNames[F] = Name; + assert(CustomNames.find(F) != CustomNames.end()); + } else { + setState(F, StandardName); + } + } + + /// Disables all builtins. + /// + /// This can be used for options like -fno-builtin. + void disableAllFunctions(); + + /// Add a set of scalar -> vector mappings, queryable via + /// getVectorizedFunction and getScalarizedFunction. + void addVectorizableFunctions(ArrayRef<VecDesc> Fns); + + /// Calls addVectorizableFunctions with a known preset of functions for the + /// given vector library. + void addVectorizableFunctionsFromVecLib(enum VectorLibrary VecLib); + + /// Return true if the function F has a vector equivalent with vectorization + /// factor VF. + bool isFunctionVectorizable(StringRef F, unsigned VF) const { + return !getVectorizedFunction(F, VF).empty(); + } + + /// Return true if the function F has a vector equivalent with any + /// vectorization factor. + bool isFunctionVectorizable(StringRef F) const; + + /// Return the name of the equivalent of F, vectorized with factor VF. If no + /// such mapping exists, return the empty string. + StringRef getVectorizedFunction(StringRef F, unsigned VF) const; + + /// Return true if the function F has a scalar equivalent, and set VF to be + /// the vectorization factor. + bool isFunctionScalarizable(StringRef F, unsigned &VF) const { + return !getScalarizedFunction(F, VF).empty(); + } + + /// Return the name of the equivalent of F, scalarized. If no such mapping + /// exists, return the empty string. + /// + /// Set VF to the vectorization factor. + StringRef getScalarizedFunction(StringRef F, unsigned &VF) const; + + /// Set to true iff i32 parameters to library functions should have signext + /// or zeroext attributes if they correspond to C-level int or unsigned int, + /// respectively. + void setShouldExtI32Param(bool Val) { + ShouldExtI32Param = Val; + } + + /// Set to true iff i32 results from library functions should have signext + /// or zeroext attributes if they correspond to C-level int or unsigned int, + /// respectively. + void setShouldExtI32Return(bool Val) { + ShouldExtI32Return = Val; + } + + /// Set to true iff i32 parameters to library functions should have signext + /// attribute if they correspond to C-level int or unsigned int. + void setShouldSignExtI32Param(bool Val) { + ShouldSignExtI32Param = Val; + } + + /// Returns the size of the wchar_t type in bytes or 0 if the size is unknown. + /// This queries the 'wchar_size' metadata. + unsigned getWCharSize(const Module &M) const; +}; + +/// Provides information about what library functions are available for +/// the current target. +/// +/// This both allows optimizations to handle them specially and frontends to +/// disable such optimizations through -fno-builtin etc. +class TargetLibraryInfo { + friend class TargetLibraryAnalysis; + friend class TargetLibraryInfoWrapperPass; + + const TargetLibraryInfoImpl *Impl; + +public: + explicit TargetLibraryInfo(const TargetLibraryInfoImpl &Impl) : Impl(&Impl) {} + + // Provide value semantics. + TargetLibraryInfo(const TargetLibraryInfo &TLI) : Impl(TLI.Impl) {} + TargetLibraryInfo(TargetLibraryInfo &&TLI) : Impl(TLI.Impl) {} + TargetLibraryInfo &operator=(const TargetLibraryInfo &TLI) { + Impl = TLI.Impl; + return *this; + } + TargetLibraryInfo &operator=(TargetLibraryInfo &&TLI) { + Impl = TLI.Impl; + return *this; + } + + /// Searches for a particular function name. + /// + /// If it is one of the known library functions, return true and set F to the + /// corresponding value. + bool getLibFunc(StringRef funcName, LibFunc &F) const { + return Impl->getLibFunc(funcName, F); + } + + bool getLibFunc(const Function &FDecl, LibFunc &F) const { + return Impl->getLibFunc(FDecl, F); + } + + /// If a callsite does not have the 'nobuiltin' attribute, return if the + /// called function is a known library function and set F to that function. + bool getLibFunc(ImmutableCallSite CS, LibFunc &F) const { + return !CS.isNoBuiltin() && CS.getCalledFunction() && + getLibFunc(*(CS.getCalledFunction()), F); + } + + /// Tests whether a library function is available. + bool has(LibFunc F) const { + return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable; + } + bool isFunctionVectorizable(StringRef F, unsigned VF) const { + return Impl->isFunctionVectorizable(F, VF); + } + bool isFunctionVectorizable(StringRef F) const { + return Impl->isFunctionVectorizable(F); + } + StringRef getVectorizedFunction(StringRef F, unsigned VF) const { + return Impl->getVectorizedFunction(F, VF); + } + + /// Tests if the function is both available and a candidate for optimized code + /// generation. + bool hasOptimizedCodeGen(LibFunc F) const { + if (Impl->getState(F) == TargetLibraryInfoImpl::Unavailable) + return false; + switch (F) { + default: break; + case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: + case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: + case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl: + case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl: + case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: + case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite: + case LibFunc_sqrtl_finite: + case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl: + case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl: + case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: + case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl: + case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill: + case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl: + case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl: + case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: + case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l: + case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l: + case LibFunc_memcmp: case LibFunc_strcmp: case LibFunc_strcpy: + case LibFunc_stpcpy: case LibFunc_strlen: case LibFunc_strnlen: + case LibFunc_memchr: case LibFunc_mempcpy: + return true; + } + return false; + } + + StringRef getName(LibFunc F) const { + auto State = Impl->getState(F); + if (State == TargetLibraryInfoImpl::Unavailable) + return StringRef(); + if (State == TargetLibraryInfoImpl::StandardName) + return Impl->StandardNames[F]; + assert(State == TargetLibraryInfoImpl::CustomName); + return Impl->CustomNames.find(F)->second; + } + + /// Returns extension attribute kind to be used for i32 parameters + /// corresponding to C-level int or unsigned int. May be zeroext, signext, + /// or none. + Attribute::AttrKind getExtAttrForI32Param(bool Signed = true) const { + if (Impl->ShouldExtI32Param) + return Signed ? Attribute::SExt : Attribute::ZExt; + if (Impl->ShouldSignExtI32Param) + return Attribute::SExt; + return Attribute::None; + } + + /// Returns extension attribute kind to be used for i32 return values + /// corresponding to C-level int or unsigned int. May be zeroext, signext, + /// or none. + Attribute::AttrKind getExtAttrForI32Return(bool Signed = true) const { + if (Impl->ShouldExtI32Return) + return Signed ? Attribute::SExt : Attribute::ZExt; + return Attribute::None; + } + + /// \copydoc TargetLibraryInfoImpl::getWCharSize() + unsigned getWCharSize(const Module &M) const { + return Impl->getWCharSize(M); + } + + /// Handle invalidation from the pass manager. + /// + /// If we try to invalidate this info, just return false. It cannot become + /// invalid even if the module or function changes. + bool invalidate(Module &, const PreservedAnalyses &, + ModuleAnalysisManager::Invalidator &) { + return false; + } + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &) { + return false; + } +}; + +/// Analysis pass providing the \c TargetLibraryInfo. +/// +/// Note that this pass's result cannot be invalidated, it is immutable for the +/// life of the module. +class TargetLibraryAnalysis : public AnalysisInfoMixin<TargetLibraryAnalysis> { +public: + typedef TargetLibraryInfo Result; + + /// Default construct the library analysis. + /// + /// This will use the module's triple to construct the library info for that + /// module. + TargetLibraryAnalysis() {} + + /// Construct a library analysis with preset info. + /// + /// This will directly copy the preset info into the result without + /// consulting the module's triple. + TargetLibraryAnalysis(TargetLibraryInfoImpl PresetInfoImpl) + : PresetInfoImpl(std::move(PresetInfoImpl)) {} + + TargetLibraryInfo run(Module &M, ModuleAnalysisManager &); + TargetLibraryInfo run(Function &F, FunctionAnalysisManager &); + +private: + friend AnalysisInfoMixin<TargetLibraryAnalysis>; + static AnalysisKey Key; + + Optional<TargetLibraryInfoImpl> PresetInfoImpl; + + StringMap<std::unique_ptr<TargetLibraryInfoImpl>> Impls; + + TargetLibraryInfoImpl &lookupInfoImpl(const Triple &T); +}; + +class TargetLibraryInfoWrapperPass : public ImmutablePass { + TargetLibraryInfoImpl TLIImpl; + TargetLibraryInfo TLI; + + virtual void anchor(); + +public: + static char ID; + TargetLibraryInfoWrapperPass(); + explicit TargetLibraryInfoWrapperPass(const Triple &T); + explicit TargetLibraryInfoWrapperPass(const TargetLibraryInfoImpl &TLI); + + TargetLibraryInfo &getTLI() { return TLI; } + const TargetLibraryInfo &getTLI() const { return TLI; } +}; + +} // end namespace llvm + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/TargetTransformInfo.h b/clang-r353983e/include/llvm/Analysis/TargetTransformInfo.h new file mode 100644 index 00000000..60dbf677 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/TargetTransformInfo.h @@ -0,0 +1,1731 @@ +//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This pass exposes codegen information to IR-level passes. Every +/// transformation that uses codegen information is broken into three parts: +/// 1. The IR-level analysis pass. +/// 2. The IR-level transformation interface which provides the needed +/// information. +/// 3. Codegen-level implementation which uses target-specific hooks. +/// +/// This file defines #2, which is the interface that IR-level transformations +/// use for querying the codegen. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H +#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H + +#include "llvm/ADT/Optional.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/DataTypes.h" +#include <functional> + +namespace llvm { + +namespace Intrinsic { +enum ID : unsigned; +} + +class Function; +class GlobalValue; +class IntrinsicInst; +class LoadInst; +class Loop; +class SCEV; +class ScalarEvolution; +class StoreInst; +class SwitchInst; +class Type; +class User; +class Value; + +/// Information about a load/store intrinsic defined by the target. +struct MemIntrinsicInfo { + /// This is the pointer that the intrinsic is loading from or storing to. + /// If this is non-null, then analysis/optimization passes can assume that + /// this intrinsic is functionally equivalent to a load/store from this + /// pointer. + Value *PtrVal = nullptr; + + // Ordering for atomic operations. + AtomicOrdering Ordering = AtomicOrdering::NotAtomic; + + // Same Id is set by the target for corresponding load/store intrinsics. + unsigned short MatchingId = 0; + + bool ReadMem = false; + bool WriteMem = false; + bool IsVolatile = false; + + bool isUnordered() const { + return (Ordering == AtomicOrdering::NotAtomic || + Ordering == AtomicOrdering::Unordered) && !IsVolatile; + } +}; + +/// This pass provides access to the codegen interfaces that are needed +/// for IR-level transformations. +class TargetTransformInfo { +public: + /// Construct a TTI object using a type implementing the \c Concept + /// API below. + /// + /// This is used by targets to construct a TTI wrapping their target-specific + /// implementaion that encodes appropriate costs for their target. + template <typename T> TargetTransformInfo(T Impl); + + /// Construct a baseline TTI object using a minimal implementation of + /// the \c Concept API below. + /// + /// The TTI implementation will reflect the information in the DataLayout + /// provided if non-null. + explicit TargetTransformInfo(const DataLayout &DL); + + // Provide move semantics. + TargetTransformInfo(TargetTransformInfo &&Arg); + TargetTransformInfo &operator=(TargetTransformInfo &&RHS); + + // We need to define the destructor out-of-line to define our sub-classes + // out-of-line. + ~TargetTransformInfo(); + + /// Handle the invalidation of this information. + /// + /// When used as a result of \c TargetIRAnalysis this method will be called + /// when the function this was computed for changes. When it returns false, + /// the information is preserved across those changes. + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &) { + // FIXME: We should probably in some way ensure that the subtarget + // information for a function hasn't changed. + return false; + } + + /// \name Generic Target Information + /// @{ + + /// The kind of cost model. + /// + /// There are several different cost models that can be customized by the + /// target. The normalization of each cost model may be target specific. + enum TargetCostKind { + TCK_RecipThroughput, ///< Reciprocal throughput. + TCK_Latency, ///< The latency of instruction. + TCK_CodeSize ///< Instruction code size. + }; + + /// Query the cost of a specified instruction. + /// + /// Clients should use this interface to query the cost of an existing + /// instruction. The instruction must have a valid parent (basic block). + /// + /// Note, this method does not cache the cost calculation and it + /// can be expensive in some cases. + int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const { + switch (kind){ + case TCK_RecipThroughput: + return getInstructionThroughput(I); + + case TCK_Latency: + return getInstructionLatency(I); + + case TCK_CodeSize: + return getUserCost(I); + } + llvm_unreachable("Unknown instruction cost kind"); + } + + /// Underlying constants for 'cost' values in this interface. + /// + /// Many APIs in this interface return a cost. This enum defines the + /// fundamental values that should be used to interpret (and produce) those + /// costs. The costs are returned as an int rather than a member of this + /// enumeration because it is expected that the cost of one IR instruction + /// may have a multiplicative factor to it or otherwise won't fit directly + /// into the enum. Moreover, it is common to sum or average costs which works + /// better as simple integral values. Thus this enum only provides constants. + /// Also note that the returned costs are signed integers to make it natural + /// to add, subtract, and test with zero (a common boundary condition). It is + /// not expected that 2^32 is a realistic cost to be modeling at any point. + /// + /// Note that these costs should usually reflect the intersection of code-size + /// cost and execution cost. A free instruction is typically one that folds + /// into another instruction. For example, reg-to-reg moves can often be + /// skipped by renaming the registers in the CPU, but they still are encoded + /// and thus wouldn't be considered 'free' here. + enum TargetCostConstants { + TCC_Free = 0, ///< Expected to fold away in lowering. + TCC_Basic = 1, ///< The cost of a typical 'add' instruction. + TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86. + }; + + /// Estimate the cost of a specific operation when lowered. + /// + /// Note that this is designed to work on an arbitrary synthetic opcode, and + /// thus work for hypothetical queries before an instruction has even been + /// formed. However, this does *not* work for GEPs, and must not be called + /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as + /// analyzing a GEP's cost required more information. + /// + /// Typically only the result type is required, and the operand type can be + /// omitted. However, if the opcode is one of the cast instructions, the + /// operand type is required. + /// + /// The returned cost is defined in terms of \c TargetCostConstants, see its + /// comments for a detailed explanation of the cost values. + int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const; + + /// Estimate the cost of a GEP operation when lowered. + /// + /// The contract for this function is the same as \c getOperationCost except + /// that it supports an interface that provides extra information specific to + /// the GEP operation. + int getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands) const; + + /// Estimate the cost of a EXT operation when lowered. + /// + /// The contract for this function is the same as \c getOperationCost except + /// that it supports an interface that provides extra information specific to + /// the EXT operation. + int getExtCost(const Instruction *I, const Value *Src) const; + + /// Estimate the cost of a function call when lowered. + /// + /// The contract for this is the same as \c getOperationCost except that it + /// supports an interface that provides extra information specific to call + /// instructions. + /// + /// This is the most basic query for estimating call cost: it only knows the + /// function type and (potentially) the number of arguments at the call site. + /// The latter is only interesting for varargs function types. + int getCallCost(FunctionType *FTy, int NumArgs = -1) const; + + /// Estimate the cost of calling a specific function when lowered. + /// + /// This overload adds the ability to reason about the particular function + /// being called in the event it is a library call with special lowering. + int getCallCost(const Function *F, int NumArgs = -1) const; + + /// Estimate the cost of calling a specific function when lowered. + /// + /// This overload allows specifying a set of candidate argument values. + int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const; + + /// \returns A value by which our inlining threshold should be multiplied. + /// This is primarily used to bump up the inlining threshold wholesale on + /// targets where calls are unusually expensive. + /// + /// TODO: This is a rather blunt instrument. Perhaps altering the costs of + /// individual classes of instructions would be better. + unsigned getInliningThresholdMultiplier() const; + + /// Estimate the cost of an intrinsic when lowered. + /// + /// Mirrors the \c getCallCost method but uses an intrinsic identifier. + int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<Type *> ParamTys) const; + + /// Estimate the cost of an intrinsic when lowered. + /// + /// Mirrors the \c getCallCost method but uses an intrinsic identifier. + int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<const Value *> Arguments) const; + + /// \return The estimated number of case clusters when lowering \p 'SI'. + /// \p JTSize Set a jump table size only when \p SI is suitable for a jump + /// table. + unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) const; + + /// Estimate the cost of a given IR user when lowered. + /// + /// This can estimate the cost of either a ConstantExpr or Instruction when + /// lowered. It has two primary advantages over the \c getOperationCost and + /// \c getGEPCost above, and one significant disadvantage: it can only be + /// used when the IR construct has already been formed. + /// + /// The advantages are that it can inspect the SSA use graph to reason more + /// accurately about the cost. For example, all-constant-GEPs can often be + /// folded into a load or other instruction, but if they are used in some + /// other context they may not be folded. This routine can distinguish such + /// cases. + /// + /// \p Operands is a list of operands which can be a result of transformations + /// of the current operands. The number of the operands on the list must equal + /// to the number of the current operands the IR user has. Their order on the + /// list must be the same as the order of the current operands the IR user + /// has. + /// + /// The returned cost is defined in terms of \c TargetCostConstants, see its + /// comments for a detailed explanation of the cost values. + int getUserCost(const User *U, ArrayRef<const Value *> Operands) const; + + /// This is a helper function which calls the two-argument getUserCost + /// with \p Operands which are the current operands U has. + int getUserCost(const User *U) const { + SmallVector<const Value *, 4> Operands(U->value_op_begin(), + U->value_op_end()); + return getUserCost(U, Operands); + } + + /// Return true if branch divergence exists. + /// + /// Branch divergence has a significantly negative impact on GPU performance + /// when threads in the same wavefront take different paths due to conditional + /// branches. + bool hasBranchDivergence() const; + + /// Returns whether V is a source of divergence. + /// + /// This function provides the target-dependent information for + /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first + /// builds the dependency graph, and then runs the reachability algorithm + /// starting with the sources of divergence. + bool isSourceOfDivergence(const Value *V) const; + + // Returns true for the target specific + // set of operations which produce uniform result + // even taking non-unform arguments + bool isAlwaysUniform(const Value *V) const; + + /// Returns the address space ID for a target's 'flat' address space. Note + /// this is not necessarily the same as addrspace(0), which LLVM sometimes + /// refers to as the generic address space. The flat address space is a + /// generic address space that can be used access multiple segments of memory + /// with different address spaces. Access of a memory location through a + /// pointer with this address space is expected to be legal but slower + /// compared to the same memory location accessed through a pointer with a + /// different address space. + // + /// This is for targets with different pointer representations which can + /// be converted with the addrspacecast instruction. If a pointer is converted + /// to this address space, optimizations should attempt to replace the access + /// with the source address space. + /// + /// \returns ~0u if the target does not have such a flat address space to + /// optimize away. + unsigned getFlatAddressSpace() const; + + /// Test whether calls to a function lower to actual program function + /// calls. + /// + /// The idea is to test whether the program is likely to require a 'call' + /// instruction or equivalent in order to call the given function. + /// + /// FIXME: It's not clear that this is a good or useful query API. Client's + /// should probably move to simpler cost metrics using the above. + /// Alternatively, we could split the cost interface into distinct code-size + /// and execution-speed costs. This would allow modelling the core of this + /// query more accurately as a call is a single small instruction, but + /// incurs significant execution cost. + bool isLoweredToCall(const Function *F) const; + + struct LSRCost { + /// TODO: Some of these could be merged. Also, a lexical ordering + /// isn't always optimal. + unsigned Insns; + unsigned NumRegs; + unsigned AddRecCost; + unsigned NumIVMuls; + unsigned NumBaseAdds; + unsigned ImmCost; + unsigned SetupCost; + unsigned ScaleCost; + }; + + /// Parameters that control the generic loop unrolling transformation. + struct UnrollingPreferences { + /// The cost threshold for the unrolled loop. Should be relative to the + /// getUserCost values returned by this API, and the expectation is that + /// the unrolled loop's instructions when run through that interface should + /// not exceed this cost. However, this is only an estimate. Also, specific + /// loops may be unrolled even with a cost above this threshold if deemed + /// profitable. Set this to UINT_MAX to disable the loop body cost + /// restriction. + unsigned Threshold; + /// If complete unrolling will reduce the cost of the loop, we will boost + /// the Threshold by a certain percent to allow more aggressive complete + /// unrolling. This value provides the maximum boost percentage that we + /// can apply to Threshold (The value should be no less than 100). + /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost, + /// MaxPercentThresholdBoost / 100) + /// E.g. if complete unrolling reduces the loop execution time by 50% + /// then we boost the threshold by the factor of 2x. If unrolling is not + /// expected to reduce the running time, then we do not increase the + /// threshold. + unsigned MaxPercentThresholdBoost; + /// The cost threshold for the unrolled loop when optimizing for size (set + /// to UINT_MAX to disable). + unsigned OptSizeThreshold; + /// The cost threshold for the unrolled loop, like Threshold, but used + /// for partial/runtime unrolling (set to UINT_MAX to disable). + unsigned PartialThreshold; + /// The cost threshold for the unrolled loop when optimizing for size, like + /// OptSizeThreshold, but used for partial/runtime unrolling (set to + /// UINT_MAX to disable). + unsigned PartialOptSizeThreshold; + /// A forced unrolling factor (the number of concatenated bodies of the + /// original loop in the unrolled loop body). When set to 0, the unrolling + /// transformation will select an unrolling factor based on the current cost + /// threshold and other factors. + unsigned Count; + /// A forced peeling factor (the number of bodied of the original loop + /// that should be peeled off before the loop body). When set to 0, the + /// unrolling transformation will select a peeling factor based on profile + /// information and other factors. + unsigned PeelCount; + /// Default unroll count for loops with run-time trip count. + unsigned DefaultUnrollRuntimeCount; + // Set the maximum unrolling factor. The unrolling factor may be selected + // using the appropriate cost threshold, but may not exceed this number + // (set to UINT_MAX to disable). This does not apply in cases where the + // loop is being fully unrolled. + unsigned MaxCount; + /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but + /// applies even if full unrolling is selected. This allows a target to fall + /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount. + unsigned FullUnrollMaxCount; + // Represents number of instructions optimized when "back edge" + // becomes "fall through" in unrolled loop. + // For now we count a conditional branch on a backedge and a comparison + // feeding it. + unsigned BEInsns; + /// Allow partial unrolling (unrolling of loops to expand the size of the + /// loop body, not only to eliminate small constant-trip-count loops). + bool Partial; + /// Allow runtime unrolling (unrolling of loops to expand the size of the + /// loop body even when the number of loop iterations is not known at + /// compile time). + bool Runtime; + /// Allow generation of a loop remainder (extra iterations after unroll). + bool AllowRemainder; + /// Allow emitting expensive instructions (such as divisions) when computing + /// the trip count of a loop for runtime unrolling. + bool AllowExpensiveTripCount; + /// Apply loop unroll on any kind of loop + /// (mainly to loops that fail runtime unrolling). + bool Force; + /// Allow using trip count upper bound to unroll loops. + bool UpperBound; + /// Allow peeling off loop iterations for loops with low dynamic tripcount. + bool AllowPeeling; + /// Allow unrolling of all the iterations of the runtime loop remainder. + bool UnrollRemainder; + /// Allow unroll and jam. Used to enable unroll and jam for the target. + bool UnrollAndJam; + /// Threshold for unroll and jam, for inner loop size. The 'Threshold' + /// value above is used during unroll and jam for the outer loop size. + /// This value is used in the same manner to limit the size of the inner + /// loop. + unsigned UnrollAndJamInnerLoopThreshold; + }; + + /// Get target-customized preferences for the generic loop unrolling + /// transformation. The caller will initialize UP with the current + /// target-independent defaults. + void getUnrollingPreferences(Loop *L, ScalarEvolution &, + UnrollingPreferences &UP) const; + + /// @} + + /// \name Scalar Target Information + /// @{ + + /// Flags indicating the kind of support for population count. + /// + /// Compared to the SW implementation, HW support is supposed to + /// significantly boost the performance when the population is dense, and it + /// may or may not degrade performance if the population is sparse. A HW + /// support is considered as "Fast" if it can outperform, or is on a par + /// with, SW implementation when the population is sparse; otherwise, it is + /// considered as "Slow". + enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware }; + + /// Return true if the specified immediate is legal add immediate, that + /// is the target has add instructions which can add a register with the + /// immediate without having to materialize the immediate into a register. + bool isLegalAddImmediate(int64_t Imm) const; + + /// Return true if the specified immediate is legal icmp immediate, + /// that is the target has icmp instructions which can compare a register + /// against the immediate without having to materialize the immediate into a + /// register. + bool isLegalICmpImmediate(int64_t Imm) const; + + /// Return true if the addressing mode represented by AM is legal for + /// this target, for a load/store of the specified type. + /// The type may be VoidTy, in which case only return true if the addressing + /// mode is legal for a load/store of any legal type. + /// If target returns true in LSRWithInstrQueries(), I may be valid. + /// TODO: Handle pre/postinc as well. + bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale, + unsigned AddrSpace = 0, + Instruction *I = nullptr) const; + + /// Return true if LSR cost of C1 is lower than C1. + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) const; + + /// Return true if the target can fuse a compare and branch. + /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost + /// calculation for the instructions in a loop. + bool canMacroFuseCmp() const; + + /// \return True is LSR should make efforts to create/preserve post-inc + /// addressing mode expressions. + bool shouldFavorPostInc() const; + + /// Return true if LSR should make efforts to generate indexed addressing + /// modes that operate across loop iterations. + bool shouldFavorBackedgeIndex(const Loop *L) const; + + /// Return true if the target supports masked load/store + /// AVX2 and AVX-512 targets allow masks for consecutive load and store + bool isLegalMaskedStore(Type *DataType) const; + bool isLegalMaskedLoad(Type *DataType) const; + + /// Return true if the target supports masked gather/scatter + /// AVX-512 fully supports gather and scatter for vectors with 32 and 64 + /// bits scalar type. + bool isLegalMaskedScatter(Type *DataType) const; + bool isLegalMaskedGather(Type *DataType) const; + + /// Return true if the target has a unified operation to calculate division + /// and remainder. If so, the additional implicit multiplication and + /// subtraction required to calculate a remainder from division are free. This + /// can enable more aggressive transformations for division and remainder than + /// would typically be allowed using throughput or size cost models. + bool hasDivRemOp(Type *DataType, bool IsSigned) const; + + /// Return true if the given instruction (assumed to be a memory access + /// instruction) has a volatile variant. If that's the case then we can avoid + /// addrspacecast to generic AS for volatile loads/stores. Default + /// implementation returns false, which prevents address space inference for + /// volatile loads/stores. + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const; + + /// Return true if target doesn't mind addresses in vectors. + bool prefersVectorizedAddressing() const; + + /// Return the cost of the scaling factor used in the addressing + /// mode represented by AM for this target, for a load/store + /// of the specified type. + /// If the AM is supported, the return value must be >= 0. + /// If the AM is not supported, it returns a negative value. + /// TODO: Handle pre/postinc as well. + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale, + unsigned AddrSpace = 0) const; + + /// Return true if the loop strength reduce pass should make + /// Instruction* based TTI queries to isLegalAddressingMode(). This is + /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned + /// immediate offset and no index register. + bool LSRWithInstrQueries() const; + + /// Return true if it's free to truncate a value of type Ty1 to type + /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 + /// by referencing its sub-register AX. + bool isTruncateFree(Type *Ty1, Type *Ty2) const; + + /// Return true if it is profitable to hoist instruction in the + /// then/else to before if. + bool isProfitableToHoist(Instruction *I) const; + + bool useAA() const; + + /// Return true if this type is legal. + bool isTypeLegal(Type *Ty) const; + + /// Returns the target's jmp_buf alignment in bytes. + unsigned getJumpBufAlignment() const; + + /// Returns the target's jmp_buf size in bytes. + unsigned getJumpBufSize() const; + + /// Return true if switches should be turned into lookup tables for the + /// target. + bool shouldBuildLookupTables() const; + + /// Return true if switches should be turned into lookup tables + /// containing this constant value for the target. + bool shouldBuildLookupTablesForConstant(Constant *C) const; + + /// Return true if the input function which is cold at all call sites, + /// should use coldcc calling convention. + bool useColdCCForColdCall(Function &F) const; + + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) const; + + /// If target has efficient vector element load/store instructions, it can + /// return true here so that insertion/extraction costs are not added to + /// the scalarization cost of a load/store. + bool supportsEfficientVectorElementLoadStore() const; + + /// Don't restrict interleaved unrolling to small loops. + bool enableAggressiveInterleaving(bool LoopHasReductions) const; + + /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is + /// true if this is the expansion of memcmp(p1, p2, s) == 0. + struct MemCmpExpansionOptions { + // The list of available load sizes (in bytes), sorted in decreasing order. + SmallVector<unsigned, 8> LoadSizes; + // Set to true to allow overlapping loads. For example, 7-byte compares can + // be done with two 4-byte compares instead of 4+2+1-byte compares. This + // requires all loads in LoadSizes to be doable in an unaligned way. + bool AllowOverlappingLoads = false; + }; + const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const; + + /// Enable matching of interleaved access groups. + bool enableInterleavedAccessVectorization() const; + + /// Enable matching of interleaved access groups that contain predicated + /// accesses or gaps and therefore vectorized using masked + /// vector loads/stores. + bool enableMaskedInterleavedAccessVectorization() const; + + /// Indicate that it is potentially unsafe to automatically vectorize + /// floating-point operations because the semantics of vector and scalar + /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math + /// does not support IEEE-754 denormal numbers, while depending on the + /// platform, scalar floating-point math does. + /// This applies to floating-point math operations and calls, not memory + /// operations, shuffles, or casts. + bool isFPVectorizationPotentiallyUnsafe() const; + + /// Determine if the target supports unaligned memory accesses. + bool allowsMisalignedMemoryAccesses(LLVMContext &Context, + unsigned BitWidth, unsigned AddressSpace = 0, + unsigned Alignment = 1, + bool *Fast = nullptr) const; + + /// Return hardware support for population count. + PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; + + /// Return true if the hardware has a fast square-root instruction. + bool haveFastSqrt(Type *Ty) const; + + /// Return true if it is faster to check if a floating-point value is NaN + /// (or not-NaN) versus a comparison against a constant FP zero value. + /// Targets should override this if materializing a 0.0 for comparison is + /// generally as cheap as checking for ordered/unordered. + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const; + + /// Return the expected cost of supporting the floating point operation + /// of the specified type. + int getFPOpCost(Type *Ty) const; + + /// Return the expected cost of materializing for the given integer + /// immediate of the specified type. + int getIntImmCost(const APInt &Imm, Type *Ty) const; + + /// Return the expected cost of materialization for the given integer + /// immediate of the specified type for a given instruction. The cost can be + /// zero if the immediate can be folded into the specified instruction. + int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) const; + int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) const; + + /// Return the expected cost for the given integer when optimising + /// for size. This is different than the other integer immediate cost + /// functions in that it is subtarget agnostic. This is useful when you e.g. + /// target one ISA such as Aarch32 but smaller encodings could be possible + /// with another such as Thumb. This return value is used as a penalty when + /// the total costs for a constant is calculated (the bigger the cost, the + /// more beneficial constant hoisting is). + int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) const; + /// @} + + /// \name Vector Target Information + /// @{ + + /// The various kinds of shuffle patterns for vector queries. + enum ShuffleKind { + SK_Broadcast, ///< Broadcast element 0 to all other elements. + SK_Reverse, ///< Reverse the order of the vector. + SK_Select, ///< Selects elements from the corresponding lane of + ///< either source operand. This is equivalent to a + ///< vector select with a constant condition operand. + SK_Transpose, ///< Transpose two vectors. + SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. + SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset. + SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one + ///< with any shuffle mask. + SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any + ///< shuffle mask. + }; + + /// Additional information about an operand's possible values. + enum OperandValueKind { + OK_AnyValue, // Operand can have any value. + OK_UniformValue, // Operand is uniform (splat of a value). + OK_UniformConstantValue, // Operand is uniform constant. + OK_NonUniformConstantValue // Operand is a non uniform constant value. + }; + + /// Additional properties of an operand's values. + enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; + + /// \return The number of scalar or vector registers that the target has. + /// If 'Vectors' is true, it returns the number of vector registers. If it is + /// set to false, it returns the number of scalar registers. + unsigned getNumberOfRegisters(bool Vector) const; + + /// \return The width of the largest scalar or vector register type. + unsigned getRegisterBitWidth(bool Vector) const; + + /// \return The width of the smallest vector register type. + unsigned getMinVectorRegisterBitWidth() const; + + /// \return True if the vectorization factor should be chosen to + /// make the vector of the smallest element type match the size of a + /// vector register. For wider element types, this could result in + /// creating vectors that span multiple vector registers. + /// If false, the vectorization factor will be chosen based on the + /// size of the widest element type. + bool shouldMaximizeVectorBandwidth(bool OptSize) const; + + /// \return The minimum vectorization factor for types of given element + /// bit width, or 0 if there is no mimimum VF. The returned value only + /// applies when shouldMaximizeVectorBandwidth returns true. + unsigned getMinimumVF(unsigned ElemWidth) const; + + /// \return True if it should be considered for address type promotion. + /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is + /// profitable without finding other extensions fed by the same input. + bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const; + + /// \return The size of a cache line in bytes. + unsigned getCacheLineSize() const; + + /// The possible cache levels + enum class CacheLevel { + L1D, // The L1 data cache + L2D, // The L2 data cache + + // We currently do not model L3 caches, as their sizes differ widely between + // microarchitectures. Also, we currently do not have a use for L3 cache + // size modeling yet. + }; + + /// \return The size of the cache level in bytes, if available. + llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const; + + /// \return The associativity of the cache level, if available. + llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const; + + /// \return How much before a load we should place the prefetch instruction. + /// This is currently measured in number of instructions. + unsigned getPrefetchDistance() const; + + /// \return Some HW prefetchers can handle accesses up to a certain constant + /// stride. This is the minimum stride in bytes where it makes sense to start + /// adding SW prefetches. The default is 1, i.e. prefetch with any stride. + unsigned getMinPrefetchStride() const; + + /// \return The maximum number of iterations to prefetch ahead. If the + /// required number of iterations is more than this number, no prefetching is + /// performed. + unsigned getMaxPrefetchIterationsAhead() const; + + /// \return The maximum interleave factor that any transform should try to + /// perform for this target. This number depends on the level of parallelism + /// and the number of execution units in the CPU. + unsigned getMaxInterleaveFactor(unsigned VF) const; + + /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2. + static OperandValueKind getOperandInfo(Value *V, + OperandValueProperties &OpProps); + + /// This is an approximation of reciprocal throughput of a math/logic op. + /// A higher cost indicates less expected throughput. + /// From Agner Fog's guides, reciprocal throughput is "the average number of + /// clock cycles per instruction when the instructions are not part of a + /// limiting dependency chain." + /// Therefore, costs should be scaled to account for multiple execution units + /// on the target that can process this type of instruction. For example, if + /// there are 5 scalar integer units and 2 vector integer units that can + /// calculate an 'add' in a single cycle, this model should indicate that the + /// cost of the vector add instruction is 2.5 times the cost of the scalar + /// add instruction. + /// \p Args is an optional argument which holds the instruction operands + /// values so the TTI can analyze those values searching for special + /// cases or optimizations based on those values. + int getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, + OperandValueKind Opd2Info = OK_AnyValue, + OperandValueProperties Opd1PropInfo = OP_None, + OperandValueProperties Opd2PropInfo = OP_None, + ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const; + + /// \return The cost of a shuffle instruction of kind Kind and of type Tp. + /// The index and subtype parameters are used by the subvector insertion and + /// extraction shuffle kinds to show the insert/extract point and the type of + /// the subvector being inserted/extracted. + /// NOTE: For subvector extractions Tp represents the source type. + int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, + Type *SubTp = nullptr) const; + + /// \return The expected cost of cast instructions, such as bitcast, trunc, + /// zext, etc. If there is an existing instruction that holds Opcode, it + /// may be passed in the 'I' parameter. + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr) const; + + /// \return The expected cost of a sign- or zero-extended vector extract. Use + /// -1 to indicate that there is no information about the index value. + int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, + unsigned Index = -1) const; + + /// \return The expected cost of control-flow related instructions such as + /// Phi, Ret, Br. + int getCFInstrCost(unsigned Opcode) const; + + /// \returns The expected cost of compare and select instructions. If there + /// is an existing instruction that holds Opcode, it may be passed in the + /// 'I' parameter. + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy = nullptr, const Instruction *I = nullptr) const; + + /// \return The expected cost of vector Insert and Extract. + /// Use -1 to indicate that there is no information on the index value. + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; + + /// \return The cost of Load and Store instructions. + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace, const Instruction *I = nullptr) const; + + /// \return The cost of masked Load and Store instructions. + int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const; + + /// \return The cost of Gather or Scatter operation + /// \p Opcode - is a type of memory access Load or Store + /// \p DataTy - a vector type of the data to be loaded or stored + /// \p Ptr - pointer [or vector of pointers] - address[es] in memory + /// \p VariableMask - true when the memory access is predicated with a mask + /// that is not a compile-time constant + /// \p Alignment - alignment of single element + int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, + bool VariableMask, unsigned Alignment) const; + + /// \return The cost of the interleaved memory operation. + /// \p Opcode is the memory operation code + /// \p VecTy is the vector type of the interleaved access. + /// \p Factor is the interleave factor + /// \p Indices is the indices for interleaved load members (as interleaved + /// load allows gaps) + /// \p Alignment is the alignment of the memory operation + /// \p AddressSpace is address space of the pointer. + /// \p UseMaskForCond indicates if the memory access is predicated. + /// \p UseMaskForGaps indicates if gaps should be masked. + int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, + ArrayRef<unsigned> Indices, unsigned Alignment, + unsigned AddressSpace, + bool UseMaskForCond = false, + bool UseMaskForGaps = false) const; + + /// Calculate the cost of performing a vector reduction. + /// + /// This is the cost of reducing the vector value of type \p Ty to a scalar + /// value using the operation denoted by \p Opcode. The form of the reduction + /// can either be a pairwise reduction or a reduction that splits the vector + /// at every reduction level. + /// + /// Pairwise: + /// (v0, v1, v2, v3) + /// ((v0+v1), (v2+v3), undef, undef) + /// Split: + /// (v0, v1, v2, v3) + /// ((v0+v2), (v1+v3), undef, undef) + int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) const; + int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, + bool IsUnsigned) const; + + /// \returns The cost of Intrinsic instructions. Analyses the real arguments. + /// Three cases are handled: 1. scalar instruction 2. vector instruction + /// 3. scalar instruction which is to be vectorized with VF. + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Value *> Args, FastMathFlags FMF, + unsigned VF = 1) const; + + /// \returns The cost of Intrinsic instructions. Types analysis only. + /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the + /// arguments and the return value will be computed based on types. + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX) const; + + /// \returns The cost of Call instructions. + int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const; + + /// \returns The number of pieces into which the provided type must be + /// split during legalization. Zero is returned when the answer is unknown. + unsigned getNumberOfParts(Type *Tp) const; + + /// \returns The cost of the address computation. For most targets this can be + /// merged into the instruction indexing mode. Some targets might want to + /// distinguish between address computation for memory operations on vector + /// types and scalar types. Such targets should override this function. + /// The 'SE' parameter holds pointer for the scalar evolution object which + /// is used in order to get the Ptr step value in case of constant stride. + /// The 'Ptr' parameter holds SCEV of the access pointer. + int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr, + const SCEV *Ptr = nullptr) const; + + /// \returns The cost, if any, of keeping values of the given types alive + /// over a callsite. + /// + /// Some types may require the use of register classes that do not have + /// any callee-saved registers, so would require a spill and fill. + unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const; + + /// \returns True if the intrinsic is a supported memory intrinsic. Info + /// will contain additional information - whether the intrinsic may write + /// or read to memory, volatility and the pointer. Info is undefined + /// if false is returned. + bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; + + /// \returns The maximum element size, in bytes, for an element + /// unordered-atomic memory intrinsic. + unsigned getAtomicMemIntrinsicMaxElementSize() const; + + /// \returns A value which is the result of the given memory intrinsic. New + /// instructions may be created to extract the result from the given intrinsic + /// memory operation. Returns nullptr if the target cannot create a result + /// from the given intrinsic. + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, + Type *ExpectedType) const; + + /// \returns The type to use in a loop expansion of a memcpy call. + Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, unsigned DestAlign) const; + + /// \param[out] OpsOut The operand types to copy RemainingBytes of memory. + /// \param RemainingBytes The number of bytes to copy. + /// + /// Calculates the operand types to use when copying \p RemainingBytes of + /// memory, where source and destination alignments are \p SrcAlign and + /// \p DestAlign respectively. + void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, + LLVMContext &Context, + unsigned RemainingBytes, + unsigned SrcAlign, + unsigned DestAlign) const; + + /// \returns True if the two functions have compatible attributes for inlining + /// purposes. + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + + /// \returns True if the caller and callee agree on how \p Args will be passed + /// to the callee. + /// \param[out] Args The list of compatible arguments. The implementation may + /// filter out any incompatible args from this list. + bool areFunctionArgsABICompatible(const Function *Caller, + const Function *Callee, + SmallPtrSetImpl<Argument *> &Args) const; + + /// The type of load/store indexing. + enum MemIndexedMode { + MIM_Unindexed, ///< No indexing. + MIM_PreInc, ///< Pre-incrementing. + MIM_PreDec, ///< Pre-decrementing. + MIM_PostInc, ///< Post-incrementing. + MIM_PostDec ///< Post-decrementing. + }; + + /// \returns True if the specified indexed load for the given type is legal. + bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const; + + /// \returns True if the specified indexed store for the given type is legal. + bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const; + + /// \returns The bitwidth of the largest vector type that should be used to + /// load/store in the given address space. + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; + + /// \returns True if the load instruction is legal to vectorize. + bool isLegalToVectorizeLoad(LoadInst *LI) const; + + /// \returns True if the store instruction is legal to vectorize. + bool isLegalToVectorizeStore(StoreInst *SI) const; + + /// \returns True if it is legal to vectorize the given load chain. + bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, + unsigned Alignment, + unsigned AddrSpace) const; + + /// \returns True if it is legal to vectorize the given store chain. + bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, + unsigned Alignment, + unsigned AddrSpace) const; + + /// \returns The new vector factor value if the target doesn't support \p + /// SizeInBytes loads or has a better vector factor. + unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const; + + /// \returns The new vector factor value if the target doesn't support \p + /// SizeInBytes stores or has a better vector factor. + unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const; + + /// Flags describing the kind of vector reduction. + struct ReductionFlags { + ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {} + bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation. + bool IsSigned; ///< Whether the operation is a signed int reduction. + bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present. + }; + + /// \returns True if the target wants to handle the given reduction idiom in + /// the intrinsics form instead of the shuffle form. + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const; + + /// \returns True if the target wants to expand the given reduction intrinsic + /// into a shuffle sequence. + bool shouldExpandReduction(const IntrinsicInst *II) const; + /// @} + +private: + /// Estimate the latency of specified instruction. + /// Returns 1 as the default value. + int getInstructionLatency(const Instruction *I) const; + + /// Returns the expected throughput cost of the instruction. + /// Returns -1 if the cost is unknown. + int getInstructionThroughput(const Instruction *I) const; + + /// The abstract base class used to type erase specific TTI + /// implementations. + class Concept; + + /// The template model for the base class which wraps a concrete + /// implementation in a type erased interface. + template <typename T> class Model; + + std::unique_ptr<Concept> TTIImpl; +}; + +class TargetTransformInfo::Concept { +public: + virtual ~Concept() = 0; + virtual const DataLayout &getDataLayout() const = 0; + virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; + virtual int getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands) = 0; + virtual int getExtCost(const Instruction *I, const Value *Src) = 0; + virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0; + virtual int getCallCost(const Function *F, int NumArgs) = 0; + virtual int getCallCost(const Function *F, + ArrayRef<const Value *> Arguments) = 0; + virtual unsigned getInliningThresholdMultiplier() = 0; + virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<Type *> ParamTys) = 0; + virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<const Value *> Arguments) = 0; + virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) = 0; + virtual int + getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0; + virtual bool hasBranchDivergence() = 0; + virtual bool isSourceOfDivergence(const Value *V) = 0; + virtual bool isAlwaysUniform(const Value *V) = 0; + virtual unsigned getFlatAddressSpace() = 0; + virtual bool isLoweredToCall(const Function *F) = 0; + virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, + UnrollingPreferences &UP) = 0; + virtual bool isLegalAddImmediate(int64_t Imm) = 0; + virtual bool isLegalICmpImmediate(int64_t Imm) = 0; + virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale, + unsigned AddrSpace, + Instruction *I) = 0; + virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) = 0; + virtual bool canMacroFuseCmp() = 0; + virtual bool shouldFavorPostInc() const = 0; + virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; + virtual bool isLegalMaskedStore(Type *DataType) = 0; + virtual bool isLegalMaskedLoad(Type *DataType) = 0; + virtual bool isLegalMaskedScatter(Type *DataType) = 0; + virtual bool isLegalMaskedGather(Type *DataType) = 0; + virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; + virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; + virtual bool prefersVectorizedAddressing() = 0; + virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale, unsigned AddrSpace) = 0; + virtual bool LSRWithInstrQueries() = 0; + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; + virtual bool isProfitableToHoist(Instruction *I) = 0; + virtual bool useAA() = 0; + virtual bool isTypeLegal(Type *Ty) = 0; + virtual unsigned getJumpBufAlignment() = 0; + virtual unsigned getJumpBufSize() = 0; + virtual bool shouldBuildLookupTables() = 0; + virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; + virtual bool useColdCCForColdCall(Function &F) = 0; + virtual unsigned + getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0; + virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) = 0; + virtual bool supportsEfficientVectorElementLoadStore() = 0; + virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; + virtual const MemCmpExpansionOptions *enableMemCmpExpansion( + bool IsZeroCmp) const = 0; + virtual bool enableInterleavedAccessVectorization() = 0; + virtual bool enableMaskedInterleavedAccessVectorization() = 0; + virtual bool isFPVectorizationPotentiallyUnsafe() = 0; + virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, + unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) = 0; + virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; + virtual bool haveFastSqrt(Type *Ty) = 0; + virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0; + virtual int getFPOpCost(Type *Ty) = 0; + virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) = 0; + virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; + virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) = 0; + virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) = 0; + virtual unsigned getNumberOfRegisters(bool Vector) = 0; + virtual unsigned getRegisterBitWidth(bool Vector) const = 0; + virtual unsigned getMinVectorRegisterBitWidth() = 0; + virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0; + virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0; + virtual bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; + virtual unsigned getCacheLineSize() = 0; + virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0; + virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0; + virtual unsigned getPrefetchDistance() = 0; + virtual unsigned getMinPrefetchStride() = 0; + virtual unsigned getMaxPrefetchIterationsAhead() = 0; + virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; + virtual unsigned + getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, + OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo, + ArrayRef<const Value *> Args) = 0; + virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) = 0; + virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) = 0; + virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, + VectorType *VecTy, unsigned Index) = 0; + virtual int getCFInstrCost(unsigned Opcode) = 0; + virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy, const Instruction *I) = 0; + virtual int getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) = 0; + virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace, const Instruction *I) = 0; + virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) = 0; + virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + Value *Ptr, bool VariableMask, + unsigned Alignment) = 0; + virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef<unsigned> Indices, + unsigned Alignment, + unsigned AddressSpace, + bool UseMaskForCond = false, + bool UseMaskForGaps = false) = 0; + virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) = 0; + virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy, + bool IsPairwiseForm, bool IsUnsigned) = 0; + virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed) = 0; + virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0; + virtual int getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type *> Tys) = 0; + virtual unsigned getNumberOfParts(Type *Tp) = 0; + virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, + const SCEV *Ptr) = 0; + virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0; + virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, + MemIntrinsicInfo &Info) = 0; + virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; + virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, + Type *ExpectedType) = 0; + virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, + unsigned DestAlign) const = 0; + virtual void getMemcpyLoopResidualLoweringType( + SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, + unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0; + virtual bool areInlineCompatible(const Function *Caller, + const Function *Callee) const = 0; + virtual bool + areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, + SmallPtrSetImpl<Argument *> &Args) const = 0; + virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0; + virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0; + virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; + virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0; + virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0; + virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, + unsigned Alignment, + unsigned AddrSpace) const = 0; + virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, + unsigned Alignment, + unsigned AddrSpace) const = 0; + virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const = 0; + virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const = 0; + virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + ReductionFlags) const = 0; + virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; + virtual int getInstructionLatency(const Instruction *I) = 0; +}; + +template <typename T> +class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { + T Impl; + +public: + Model(T Impl) : Impl(std::move(Impl)) {} + ~Model() override {} + + const DataLayout &getDataLayout() const override { + return Impl.getDataLayout(); + } + + int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override { + return Impl.getOperationCost(Opcode, Ty, OpTy); + } + int getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands) override { + return Impl.getGEPCost(PointeeType, Ptr, Operands); + } + int getExtCost(const Instruction *I, const Value *Src) override { + return Impl.getExtCost(I, Src); + } + int getCallCost(FunctionType *FTy, int NumArgs) override { + return Impl.getCallCost(FTy, NumArgs); + } + int getCallCost(const Function *F, int NumArgs) override { + return Impl.getCallCost(F, NumArgs); + } + int getCallCost(const Function *F, + ArrayRef<const Value *> Arguments) override { + return Impl.getCallCost(F, Arguments); + } + unsigned getInliningThresholdMultiplier() override { + return Impl.getInliningThresholdMultiplier(); + } + int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<Type *> ParamTys) override { + return Impl.getIntrinsicCost(IID, RetTy, ParamTys); + } + int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<const Value *> Arguments) override { + return Impl.getIntrinsicCost(IID, RetTy, Arguments); + } + int getUserCost(const User *U, ArrayRef<const Value *> Operands) override { + return Impl.getUserCost(U, Operands); + } + bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } + bool isSourceOfDivergence(const Value *V) override { + return Impl.isSourceOfDivergence(V); + } + + bool isAlwaysUniform(const Value *V) override { + return Impl.isAlwaysUniform(V); + } + + unsigned getFlatAddressSpace() override { + return Impl.getFlatAddressSpace(); + } + + bool isLoweredToCall(const Function *F) override { + return Impl.isLoweredToCall(F); + } + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + UnrollingPreferences &UP) override { + return Impl.getUnrollingPreferences(L, SE, UP); + } + bool isLegalAddImmediate(int64_t Imm) override { + return Impl.isLegalAddImmediate(Imm); + } + bool isLegalICmpImmediate(int64_t Imm) override { + return Impl.isLegalICmpImmediate(Imm); + } + bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale, + unsigned AddrSpace, + Instruction *I) override { + return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale, AddrSpace, I); + } + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) override { + return Impl.isLSRCostLess(C1, C2); + } + bool canMacroFuseCmp() override { + return Impl.canMacroFuseCmp(); + } + bool shouldFavorPostInc() const override { + return Impl.shouldFavorPostInc(); + } + bool shouldFavorBackedgeIndex(const Loop *L) const override { + return Impl.shouldFavorBackedgeIndex(L); + } + bool isLegalMaskedStore(Type *DataType) override { + return Impl.isLegalMaskedStore(DataType); + } + bool isLegalMaskedLoad(Type *DataType) override { + return Impl.isLegalMaskedLoad(DataType); + } + bool isLegalMaskedScatter(Type *DataType) override { + return Impl.isLegalMaskedScatter(DataType); + } + bool isLegalMaskedGather(Type *DataType) override { + return Impl.isLegalMaskedGather(DataType); + } + bool hasDivRemOp(Type *DataType, bool IsSigned) override { + return Impl.hasDivRemOp(DataType, IsSigned); + } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override { + return Impl.hasVolatileVariant(I, AddrSpace); + } + bool prefersVectorizedAddressing() override { + return Impl.prefersVectorizedAddressing(); + } + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale, + unsigned AddrSpace) override { + return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale, AddrSpace); + } + bool LSRWithInstrQueries() override { + return Impl.LSRWithInstrQueries(); + } + bool isTruncateFree(Type *Ty1, Type *Ty2) override { + return Impl.isTruncateFree(Ty1, Ty2); + } + bool isProfitableToHoist(Instruction *I) override { + return Impl.isProfitableToHoist(I); + } + bool useAA() override { return Impl.useAA(); } + bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } + unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); } + unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); } + bool shouldBuildLookupTables() override { + return Impl.shouldBuildLookupTables(); + } + bool shouldBuildLookupTablesForConstant(Constant *C) override { + return Impl.shouldBuildLookupTablesForConstant(C); + } + bool useColdCCForColdCall(Function &F) override { + return Impl.useColdCCForColdCall(F); + } + + unsigned getScalarizationOverhead(Type *Ty, bool Insert, + bool Extract) override { + return Impl.getScalarizationOverhead(Ty, Insert, Extract); + } + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) override { + return Impl.getOperandsScalarizationOverhead(Args, VF); + } + + bool supportsEfficientVectorElementLoadStore() override { + return Impl.supportsEfficientVectorElementLoadStore(); + } + + bool enableAggressiveInterleaving(bool LoopHasReductions) override { + return Impl.enableAggressiveInterleaving(LoopHasReductions); + } + const MemCmpExpansionOptions *enableMemCmpExpansion( + bool IsZeroCmp) const override { + return Impl.enableMemCmpExpansion(IsZeroCmp); + } + bool enableInterleavedAccessVectorization() override { + return Impl.enableInterleavedAccessVectorization(); + } + bool enableMaskedInterleavedAccessVectorization() override { + return Impl.enableMaskedInterleavedAccessVectorization(); + } + bool isFPVectorizationPotentiallyUnsafe() override { + return Impl.isFPVectorizationPotentiallyUnsafe(); + } + bool allowsMisalignedMemoryAccesses(LLVMContext &Context, + unsigned BitWidth, unsigned AddressSpace, + unsigned Alignment, bool *Fast) override { + return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, + Alignment, Fast); + } + PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { + return Impl.getPopcntSupport(IntTyWidthInBit); + } + bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } + + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override { + return Impl.isFCmpOrdCheaperThanFCmpZero(Ty); + } + + int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } + + int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) override { + return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); + } + int getIntImmCost(const APInt &Imm, Type *Ty) override { + return Impl.getIntImmCost(Imm, Ty); + } + int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) override { + return Impl.getIntImmCost(Opc, Idx, Imm, Ty); + } + int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) override { + return Impl.getIntImmCost(IID, Idx, Imm, Ty); + } + unsigned getNumberOfRegisters(bool Vector) override { + return Impl.getNumberOfRegisters(Vector); + } + unsigned getRegisterBitWidth(bool Vector) const override { + return Impl.getRegisterBitWidth(Vector); + } + unsigned getMinVectorRegisterBitWidth() override { + return Impl.getMinVectorRegisterBitWidth(); + } + bool shouldMaximizeVectorBandwidth(bool OptSize) const override { + return Impl.shouldMaximizeVectorBandwidth(OptSize); + } + unsigned getMinimumVF(unsigned ElemWidth) const override { + return Impl.getMinimumVF(ElemWidth); + } + bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { + return Impl.shouldConsiderAddressTypePromotion( + I, AllowPromotionWithoutCommonHeader); + } + unsigned getCacheLineSize() override { + return Impl.getCacheLineSize(); + } + llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override { + return Impl.getCacheSize(Level); + } + llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override { + return Impl.getCacheAssociativity(Level); + } + unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); } + unsigned getMinPrefetchStride() override { + return Impl.getMinPrefetchStride(); + } + unsigned getMaxPrefetchIterationsAhead() override { + return Impl.getMaxPrefetchIterationsAhead(); + } + unsigned getMaxInterleaveFactor(unsigned VF) override { + return Impl.getMaxInterleaveFactor(VF); + } + unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) override { + return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize); + } + unsigned + getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, + OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo, + ArrayRef<const Value *> Args) override { + return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo, Args); + } + int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) override { + return Impl.getShuffleCost(Kind, Tp, Index, SubTp); + } + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) override { + return Impl.getCastInstrCost(Opcode, Dst, Src, I); + } + int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, + unsigned Index) override { + return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); + } + int getCFInstrCost(unsigned Opcode) override { + return Impl.getCFInstrCost(Opcode); + } + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) override { + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + } + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { + return Impl.getVectorInstrCost(Opcode, Val, Index); + } + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace, const Instruction *I) override { + return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + } + int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) override { + return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + } + int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + Value *Ptr, bool VariableMask, + unsigned Alignment) override { + return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, + Alignment); + } + int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, + ArrayRef<unsigned> Indices, unsigned Alignment, + unsigned AddressSpace, bool UseMaskForCond, + bool UseMaskForGaps) override { + return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace, + UseMaskForCond, UseMaskForGaps); + } + int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) override { + return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); + } + int getMinMaxReductionCost(Type *Ty, Type *CondTy, + bool IsPairwiseForm, bool IsUnsigned) override { + return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); + } + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, + FastMathFlags FMF, unsigned ScalarizationCostPassed) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF, + ScalarizationCostPassed); + } + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); + } + int getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type *> Tys) override { + return Impl.getCallInstrCost(F, RetTy, Tys); + } + unsigned getNumberOfParts(Type *Tp) override { + return Impl.getNumberOfParts(Tp); + } + int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, + const SCEV *Ptr) override { + return Impl.getAddressComputationCost(Ty, SE, Ptr); + } + unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override { + return Impl.getCostOfKeepingLiveOverCall(Tys); + } + bool getTgtMemIntrinsic(IntrinsicInst *Inst, + MemIntrinsicInfo &Info) override { + return Impl.getTgtMemIntrinsic(Inst, Info); + } + unsigned getAtomicMemIntrinsicMaxElementSize() const override { + return Impl.getAtomicMemIntrinsicMaxElementSize(); + } + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, + Type *ExpectedType) override { + return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); + } + Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, + unsigned DestAlign) const override { + return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign); + } + void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, + LLVMContext &Context, + unsigned RemainingBytes, + unsigned SrcAlign, + unsigned DestAlign) const override { + Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, + SrcAlign, DestAlign); + } + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const override { + return Impl.areInlineCompatible(Caller, Callee); + } + bool areFunctionArgsABICompatible( + const Function *Caller, const Function *Callee, + SmallPtrSetImpl<Argument *> &Args) const override { + return Impl.areFunctionArgsABICompatible(Caller, Callee, Args); + } + bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override { + return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout()); + } + bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override { + return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout()); + } + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override { + return Impl.getLoadStoreVecRegBitWidth(AddrSpace); + } + bool isLegalToVectorizeLoad(LoadInst *LI) const override { + return Impl.isLegalToVectorizeLoad(LI); + } + bool isLegalToVectorizeStore(StoreInst *SI) const override { + return Impl.isLegalToVectorizeStore(SI); + } + bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, + unsigned Alignment, + unsigned AddrSpace) const override { + return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, + AddrSpace); + } + bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, + unsigned Alignment, + unsigned AddrSpace) const override { + return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, + AddrSpace); + } + unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const override { + return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); + } + unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const override { + return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); + } + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const override { + return Impl.useReductionIntrinsic(Opcode, Ty, Flags); + } + bool shouldExpandReduction(const IntrinsicInst *II) const override { + return Impl.shouldExpandReduction(II); + } + int getInstructionLatency(const Instruction *I) override { + return Impl.getInstructionLatency(I); + } +}; + +template <typename T> +TargetTransformInfo::TargetTransformInfo(T Impl) + : TTIImpl(new Model<T>(Impl)) {} + +/// Analysis pass providing the \c TargetTransformInfo. +/// +/// The core idea of the TargetIRAnalysis is to expose an interface through +/// which LLVM targets can analyze and provide information about the middle +/// end's target-independent IR. This supports use cases such as target-aware +/// cost modeling of IR constructs. +/// +/// This is a function analysis because much of the cost modeling for targets +/// is done in a subtarget specific way and LLVM supports compiling different +/// functions targeting different subtargets in order to support runtime +/// dispatch according to the observed subtarget. +class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> { +public: + typedef TargetTransformInfo Result; + + /// Default construct a target IR analysis. + /// + /// This will use the module's datalayout to construct a baseline + /// conservative TTI result. + TargetIRAnalysis(); + + /// Construct an IR analysis pass around a target-provide callback. + /// + /// The callback will be called with a particular function for which the TTI + /// is needed and must return a TTI object for that function. + TargetIRAnalysis(std::function<Result(const Function &)> TTICallback); + + // Value semantics. We spell out the constructors for MSVC. + TargetIRAnalysis(const TargetIRAnalysis &Arg) + : TTICallback(Arg.TTICallback) {} + TargetIRAnalysis(TargetIRAnalysis &&Arg) + : TTICallback(std::move(Arg.TTICallback)) {} + TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) { + TTICallback = RHS.TTICallback; + return *this; + } + TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) { + TTICallback = std::move(RHS.TTICallback); + return *this; + } + + Result run(const Function &F, FunctionAnalysisManager &); + +private: + friend AnalysisInfoMixin<TargetIRAnalysis>; + static AnalysisKey Key; + + /// The callback used to produce a result. + /// + /// We use a completely opaque callback so that targets can provide whatever + /// mechanism they desire for constructing the TTI for a given function. + /// + /// FIXME: Should we really use std::function? It's relatively inefficient. + /// It might be possible to arrange for even stateful callbacks to outlive + /// the analysis and thus use a function_ref which would be lighter weight. + /// This may also be less error prone as the callback is likely to reference + /// the external TargetMachine, and that reference needs to never dangle. + std::function<Result(const Function &)> TTICallback; + + /// Helper function used as the callback in the default constructor. + static Result getDefaultTTI(const Function &F); +}; + +/// Wrapper pass for TargetTransformInfo. +/// +/// This pass can be constructed from a TTI object which it stores internally +/// and is queried by passes. +class TargetTransformInfoWrapperPass : public ImmutablePass { + TargetIRAnalysis TIRA; + Optional<TargetTransformInfo> TTI; + + virtual void anchor(); + +public: + static char ID; + + /// We must provide a default constructor for the pass but it should + /// never be used. + /// + /// Use the constructor below or call one of the creation routines. + TargetTransformInfoWrapperPass(); + + explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); + + TargetTransformInfo &getTTI(const Function &F); +}; + +/// Create an analysis pass wrapper around a TTI object. +/// +/// This analysis pass just holds the TTI instance and makes it available to +/// clients. +ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); + +} // End llvm namespace + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/TargetTransformInfoImpl.h b/clang-r353983e/include/llvm/Analysis/TargetTransformInfoImpl.h new file mode 100644 index 00000000..47059337 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -0,0 +1,868 @@ +//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides helpers for the implementation of +/// a TargetTransformInfo-conforming class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H +#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H + +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" + +namespace llvm { + +/// Base class for use as a mix-in that aids implementing +/// a TargetTransformInfo-compatible class. +class TargetTransformInfoImplBase { +protected: + typedef TargetTransformInfo TTI; + + const DataLayout &DL; + + explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} + +public: + // Provide value semantics. MSVC requires that we spell all of these out. + TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) + : DL(Arg.DL) {} + TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} + + const DataLayout &getDataLayout() const { return DL; } + + unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { + switch (Opcode) { + default: + // By default, just classify everything as 'basic'. + return TTI::TCC_Basic; + + case Instruction::GetElementPtr: + llvm_unreachable("Use getGEPCost for GEP operations!"); + + case Instruction::BitCast: + assert(OpTy && "Cast instructions must provide the operand type"); + if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) + // Identity and pointer-to-pointer casts are free. + return TTI::TCC_Free; + + // Otherwise, the default basic cost is used. + return TTI::TCC_Basic; + + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::UDiv: + case Instruction::URem: + return TTI::TCC_Expensive; + + case Instruction::IntToPtr: { + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + unsigned OpSize = OpTy->getScalarSizeInBits(); + if (DL.isLegalInteger(OpSize) && + OpSize <= DL.getPointerTypeSizeInBits(Ty)) + return TTI::TCC_Free; + + // Otherwise it's not a no-op. + return TTI::TCC_Basic; + } + case Instruction::PtrToInt: { + // A ptrtoint cast is free so long as the result is large enough to store + // the pointer, and a legal integer type. + unsigned DestSize = Ty->getScalarSizeInBits(); + if (DL.isLegalInteger(DestSize) && + DestSize >= DL.getPointerTypeSizeInBits(OpTy)) + return TTI::TCC_Free; + + // Otherwise it's not a no-op. + return TTI::TCC_Basic; + } + case Instruction::Trunc: + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty))) + return TTI::TCC_Free; + + return TTI::TCC_Basic; + } + } + + int getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands) { + // In the basic model, we just assume that all-constant GEPs will be folded + // into their uses via addressing modes. + for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) + if (!isa<Constant>(Operands[Idx])) + return TTI::TCC_Basic; + + return TTI::TCC_Free; + } + + unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) { + JTSize = 0; + return SI.getNumCases(); + } + + int getExtCost(const Instruction *I, const Value *Src) { + return TTI::TCC_Basic; + } + + unsigned getCallCost(FunctionType *FTy, int NumArgs) { + assert(FTy && "FunctionType must be provided to this routine."); + + // The target-independent implementation just measures the size of the + // function by approximating that each argument will take on average one + // instruction to prepare. + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = FTy->getNumParams(); + + return TTI::TCC_Basic * (NumArgs + 1); + } + + unsigned getInliningThresholdMultiplier() { return 1; } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<Type *> ParamTys) { + switch (IID) { + default: + // Intrinsics rarely (if ever) have normal argument setup constraints. + // Model them as having a basic instruction cost. + // FIXME: This is wrong for libc intrinsics. + return TTI::TCC_Basic; + + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::sideeffect: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::dbg_label: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::coro_alloc: + case Intrinsic::coro_begin: + case Intrinsic::coro_free: + case Intrinsic::coro_end: + case Intrinsic::coro_frame: + case Intrinsic::coro_size: + case Intrinsic::coro_suspend: + case Intrinsic::coro_param: + case Intrinsic::coro_subfn_addr: + // These intrinsics don't actually represent code after lowering. + return TTI::TCC_Free; + } + } + + bool hasBranchDivergence() { return false; } + + bool isSourceOfDivergence(const Value *V) { return false; } + + bool isAlwaysUniform(const Value *V) { return false; } + + unsigned getFlatAddressSpace () { + return -1; + } + + bool isLoweredToCall(const Function *F) { + assert(F && "A concrete function must be provided to this routine."); + + // FIXME: These should almost certainly not be handled here, and instead + // handled with the help of TLI or the target itself. This was largely + // ported from existing analysis heuristics here so that such refactorings + // can take place in the future. + + if (F->isIntrinsic()) + return false; + + if (F->hasLocalLinkage() || !F->hasName()) + return true; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || + Name == "fmin" || Name == "fminf" || Name == "fminl" || + Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || + Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || + Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") + return false; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || + Name == "exp2l" || Name == "exp2f" || Name == "floor" || + Name == "floorf" || Name == "ceil" || Name == "round" || + Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || + Name == "llabs") + return false; + + return true; + } + + void getUnrollingPreferences(Loop *, ScalarEvolution &, + TTI::UnrollingPreferences &) {} + + bool isLegalAddImmediate(int64_t Imm) { return false; } + + bool isLegalICmpImmediate(int64_t Imm) { return false; } + + bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale, + unsigned AddrSpace, Instruction *I = nullptr) { + // Guess that only reg and reg+reg addressing is allowed. This heuristic is + // taken from the implementation of LSR. + return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); + } + + bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { + return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.ImmCost, C2.SetupCost); + } + + bool canMacroFuseCmp() { return false; } + + bool shouldFavorPostInc() const { return false; } + + bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } + + bool isLegalMaskedStore(Type *DataType) { return false; } + + bool isLegalMaskedLoad(Type *DataType) { return false; } + + bool isLegalMaskedScatter(Type *DataType) { return false; } + + bool isLegalMaskedGather(Type *DataType) { return false; } + + bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } + + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } + + bool prefersVectorizedAddressing() { return true; } + + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { + // Guess that all legal addressing mode are free. + if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale, AddrSpace)) + return 0; + return -1; + } + + bool LSRWithInstrQueries() { return false; } + + bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } + + bool isProfitableToHoist(Instruction *I) { return true; } + + bool useAA() { return false; } + + bool isTypeLegal(Type *Ty) { return false; } + + unsigned getJumpBufAlignment() { return 0; } + + unsigned getJumpBufSize() { return 0; } + + bool shouldBuildLookupTables() { return true; } + bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } + + bool useColdCCForColdCall(Function &F) { return false; } + + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + return 0; + } + + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) { return 0; } + + bool supportsEfficientVectorElementLoadStore() { return false; } + + bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } + + const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( + bool IsZeroCmp) const { + return nullptr; + } + + bool enableInterleavedAccessVectorization() { return false; } + + bool enableMaskedInterleavedAccessVectorization() { return false; } + + bool isFPVectorizationPotentiallyUnsafe() { return false; } + + bool allowsMisalignedMemoryAccesses(LLVMContext &Context, + unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) { return false; } + + TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { + return TTI::PSK_Software; + } + + bool haveFastSqrt(Type *Ty) { return false; } + + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } + + unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } + + int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) { + return 0; + } + + unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } + + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) { + return TTI::TCC_Free; + } + + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) { + return TTI::TCC_Free; + } + + unsigned getNumberOfRegisters(bool Vector) { return 8; } + + unsigned getRegisterBitWidth(bool Vector) const { return 32; } + + unsigned getMinVectorRegisterBitWidth() { return 128; } + + bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } + + unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } + + bool + shouldConsiderAddressTypePromotion(const Instruction &I, + bool &AllowPromotionWithoutCommonHeader) { + AllowPromotionWithoutCommonHeader = false; + return false; + } + + unsigned getCacheLineSize() { return 0; } + + llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) { + switch (Level) { + case TargetTransformInfo::CacheLevel::L1D: + LLVM_FALLTHROUGH; + case TargetTransformInfo::CacheLevel::L2D: + return llvm::Optional<unsigned>(); + } + + llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); + } + + llvm::Optional<unsigned> getCacheAssociativity( + TargetTransformInfo::CacheLevel Level) { + switch (Level) { + case TargetTransformInfo::CacheLevel::L1D: + LLVM_FALLTHROUGH; + case TargetTransformInfo::CacheLevel::L2D: + return llvm::Optional<unsigned>(); + } + + llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); + } + + unsigned getPrefetchDistance() { return 0; } + + unsigned getMinPrefetchStride() { return 1; } + + unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } + + unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } + + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, + ArrayRef<const Value *> Args) { + return 1; + } + + unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index, + Type *SubTp) { + return 1; + } + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { return 1; } + + unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, + VectorType *VecTy, unsigned Index) { + return 1; + } + + unsigned getCFInstrCost(unsigned Opcode) { return 1; } + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { + return 1; + } + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { + return 1; + } + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace, const Instruction *I) { + return 1; + } + + unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) { + return 1; + } + + unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, + bool VariableMask, + unsigned Alignment) { + return 1; + } + + unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef<unsigned> Indices, + unsigned Alignment, unsigned AddressSpace, + bool UseMaskForCond = false, + bool UseMaskForGaps = false) { + return 1; + } + + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed) { + return 1; + } + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { + return 1; + } + + unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { + return 1; + } + + unsigned getNumberOfParts(Type *Tp) { return 0; } + + unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, + const SCEV *) { + return 0; + } + + unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } + + unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } + + unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } + + bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { + return false; + } + + unsigned getAtomicMemIntrinsicMaxElementSize() const { + // Note for overrides: You must ensure for all element unordered-atomic + // memory intrinsics that all power-of-2 element sizes up to, and + // including, the return value of this method have a corresponding + // runtime lib call. These runtime lib call definitions can be found + // in RuntimeLibcalls.h + return 0; + } + + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, + Type *ExpectedType) { + return nullptr; + } + + Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, unsigned DestAlign) const { + return Type::getInt8Ty(Context); + } + + void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, + LLVMContext &Context, + unsigned RemainingBytes, + unsigned SrcAlign, + unsigned DestAlign) const { + for (unsigned i = 0; i != RemainingBytes; ++i) + OpsOut.push_back(Type::getInt8Ty(Context)); + } + + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const { + return (Caller->getFnAttribute("target-cpu") == + Callee->getFnAttribute("target-cpu")) && + (Caller->getFnAttribute("target-features") == + Callee->getFnAttribute("target-features")); + } + + bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, + SmallPtrSetImpl<Argument *> &Args) const { + return (Caller->getFnAttribute("target-cpu") == + Callee->getFnAttribute("target-cpu")) && + (Caller->getFnAttribute("target-features") == + Callee->getFnAttribute("target-features")); + } + + bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, + const DataLayout &DL) const { + return false; + } + + bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, + const DataLayout &DL) const { + return false; + } + + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } + + bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } + + bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } + + bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, + unsigned Alignment, + unsigned AddrSpace) const { + return true; + } + + bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, + unsigned Alignment, + unsigned AddrSpace) const { + return true; + } + + unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + return VF; + } + + unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + return VF; + } + + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + return false; + } + + bool shouldExpandReduction(const IntrinsicInst *II) const { + return true; + } + +protected: + // Obtain the minimum required size to hold the value (without the sign) + // In case of a vector it returns the min required size for one element. + unsigned minRequiredElementSize(const Value* Val, bool &isSigned) { + if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { + const auto* VectorValue = cast<Constant>(Val); + + // In case of a vector need to pick the max between the min + // required size for each element + auto *VT = cast<VectorType>(Val->getType()); + + // Assume unsigned elements + isSigned = false; + + // The max required size is the total vector width divided by num + // of elements in the vector + unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements(); + + unsigned MinRequiredSize = 0; + for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { + if (auto* IntElement = + dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { + bool signedElement = IntElement->getValue().isNegative(); + // Get the element min required size. + unsigned ElementMinRequiredSize = + IntElement->getValue().getMinSignedBits() - 1; + // In case one element is signed then all the vector is signed. + isSigned |= signedElement; + // Save the max required bit size between all the elements. + MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); + } + else { + // not an int constant element + return MaxRequiredSize; + } + } + return MinRequiredSize; + } + + if (const auto* CI = dyn_cast<ConstantInt>(Val)) { + isSigned = CI->getValue().isNegative(); + return CI->getValue().getMinSignedBits() - 1; + } + + if (const auto* Cast = dyn_cast<SExtInst>(Val)) { + isSigned = true; + return Cast->getSrcTy()->getScalarSizeInBits() - 1; + } + + if (const auto* Cast = dyn_cast<ZExtInst>(Val)) { + isSigned = false; + return Cast->getSrcTy()->getScalarSizeInBits(); + } + + isSigned = false; + return Val->getType()->getScalarSizeInBits(); + } + + bool isStridedAccess(const SCEV *Ptr) { + return Ptr && isa<SCEVAddRecExpr>(Ptr); + } + + const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, + const SCEV *Ptr) { + if (!isStridedAccess(Ptr)) + return nullptr; + const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); + return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); + } + + bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, + int64_t MergeDistance) { + const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); + if (!Step) + return false; + APInt StrideVal = Step->getAPInt(); + if (StrideVal.getBitWidth() > 64) + return false; + // FIXME: Need to take absolute value for negative stride case. + return StrideVal.getSExtValue() < MergeDistance; + } +}; + +/// CRTP base class for use as a mix-in that aids implementing +/// a TargetTransformInfo-compatible class. +template <typename T> +class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { +private: + typedef TargetTransformInfoImplBase BaseT; + +protected: + explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} + +public: + using BaseT::getCallCost; + + unsigned getCallCost(const Function *F, int NumArgs) { + assert(F && "A concrete function must be provided to this routine."); + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = F->arg_size(); + + if (Intrinsic::ID IID = F->getIntrinsicID()) { + FunctionType *FTy = F->getFunctionType(); + SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); + return static_cast<T *>(this) + ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); + } + + if (!static_cast<T *>(this)->isLoweredToCall(F)) + return TTI::TCC_Basic; // Give a basic cost if it will be lowered + // directly. + + return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs); + } + + unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments) { + // Simply delegate to generic handling of the call. + // FIXME: We should use instsimplify or something else to catch calls which + // will constant fold with these arguments. + return static_cast<T *>(this)->getCallCost(F, Arguments.size()); + } + + using BaseT::getGEPCost; + + int getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands) { + const GlobalValue *BaseGV = nullptr; + if (Ptr != nullptr) { + // TODO: will remove this when pointers have an opaque type. + assert(Ptr->getType()->getScalarType()->getPointerElementType() == + PointeeType && + "explicit pointee type doesn't match operand's pointee type"); + BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); + } + bool HasBaseReg = (BaseGV == nullptr); + + auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); + APInt BaseOffset(PtrSizeBits, 0); + int64_t Scale = 0; + + auto GTI = gep_type_begin(PointeeType, Operands); + Type *TargetType = nullptr; + + // Handle the case where the GEP instruction has a single operand, + // the basis, therefore TargetType is a nullptr. + if (Operands.empty()) + return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; + + for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { + TargetType = GTI.getIndexedType(); + // We assume that the cost of Scalar GEP with constant index and the + // cost of Vector GEP with splat constant index are the same. + const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); + if (!ConstIdx) + if (auto Splat = getSplatValue(*I)) + ConstIdx = dyn_cast<ConstantInt>(Splat); + if (StructType *STy = GTI.getStructTypeOrNull()) { + // For structures the index is always splat or scalar constant + assert(ConstIdx && "Unexpected GEP index"); + uint64_t Field = ConstIdx->getZExtValue(); + BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); + } else { + int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); + if (ConstIdx) { + BaseOffset += + ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; + } else { + // Needs scale register. + if (Scale != 0) + // No addressing mode takes two scale registers. + return TTI::TCC_Basic; + Scale = ElementSize; + } + } + } + + // Assumes the address space is 0 when Ptr is nullptr. + unsigned AS = + (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); + + if (static_cast<T *>(this)->isLegalAddressingMode( + TargetType, const_cast<GlobalValue *>(BaseGV), + BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS)) + return TTI::TCC_Free; + return TTI::TCC_Basic; + } + + using BaseT::getIntrinsicCost; + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<const Value *> Arguments) { + // Delegate to the generic intrinsic handling code. This mostly provides an + // opportunity for targets to (for example) special case the cost of + // certain intrinsics based on constants used as arguments. + SmallVector<Type *, 8> ParamTys; + ParamTys.reserve(Arguments.size()); + for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) + ParamTys.push_back(Arguments[Idx]->getType()); + return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys); + } + + unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) { + if (isa<PHINode>(U)) + return TTI::TCC_Free; // Model all PHI nodes as free. + + // Static alloca doesn't generate target instructions. + if (auto *A = dyn_cast<AllocaInst>(U)) + if (A->isStaticAlloca()) + return TTI::TCC_Free; + + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) { + return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), + GEP->getPointerOperand(), + Operands.drop_front()); + } + + if (auto CS = ImmutableCallSite(U)) { + const Function *F = CS.getCalledFunction(); + if (!F) { + // Just use the called value type. + Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); + return static_cast<T *>(this) + ->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); + } + + SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); + return static_cast<T *>(this)->getCallCost(F, Arguments); + } + + if (const CastInst *CI = dyn_cast<CastInst>(U)) { + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + return TTI::TCC_Free; + if (isa<SExtInst>(CI) || isa<ZExtInst>(CI) || isa<FPExtInst>(CI)) + return static_cast<T *>(this)->getExtCost(CI, Operands.back()); + } + + return static_cast<T *>(this)->getOperationCost( + Operator::getOpcode(U), U->getType(), + U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); + } + + int getInstructionLatency(const Instruction *I) { + SmallVector<const Value *, 4> Operands(I->value_op_begin(), + I->value_op_end()); + if (getUserCost(I, Operands) == TTI::TCC_Free) + return 0; + + if (isa<LoadInst>(I)) + return 4; + + Type *DstTy = I->getType(); + + // Usually an intrinsic is a simple instruction. + // A real function call is much slower. + if (auto *CI = dyn_cast<CallInst>(I)) { + const Function *F = CI->getCalledFunction(); + if (!F || static_cast<T *>(this)->isLoweredToCall(F)) + return 40; + // Some intrinsics return a value and a flag, we use the value type + // to decide its latency. + if (StructType* StructTy = dyn_cast<StructType>(DstTy)) + DstTy = StructTy->getElementType(0); + // Fall through to simple instructions. + } + + if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) + DstTy = VectorTy->getElementType(); + if (DstTy->isFloatingPointTy()) + return 3; + + return 1; + } +}; +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/Trace.h b/clang-r353983e/include/llvm/Analysis/Trace.h new file mode 100644 index 00000000..a1ffd03c --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/Trace.h @@ -0,0 +1,111 @@ +//===- llvm/Analysis/Trace.h - Represent one trace of LLVM code -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class represents a single trace of LLVM basic blocks. A trace is a +// single entry, multiple exit, region of code that is often hot. Trace-based +// optimizations treat traces almost like they are a large, strange, basic +// block: because the trace path is assumed to be hot, optimizations for the +// fall-through path are made at the expense of the non-fall-through paths. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TRACE_H +#define LLVM_ANALYSIS_TRACE_H + +#include <cassert> +#include <vector> + +namespace llvm { + +class BasicBlock; +class Function; +class Module; +class raw_ostream; + +class Trace { + using BasicBlockListType = std::vector<BasicBlock *>; + + BasicBlockListType BasicBlocks; + +public: + /// Trace ctor - Make a new trace from a vector of basic blocks, + /// residing in the function which is the parent of the first + /// basic block in the vector. + Trace(const std::vector<BasicBlock *> &vBB) : BasicBlocks (vBB) {} + + /// getEntryBasicBlock - Return the entry basic block (first block) + /// of the trace. + BasicBlock *getEntryBasicBlock () const { return BasicBlocks[0]; } + + /// operator[]/getBlock - Return basic block N in the trace. + BasicBlock *operator[](unsigned i) const { return BasicBlocks[i]; } + BasicBlock *getBlock(unsigned i) const { return BasicBlocks[i]; } + + /// getFunction - Return this trace's parent function. + Function *getFunction () const; + + /// getModule - Return this Module that contains this trace's parent + /// function. + Module *getModule () const; + + /// getBlockIndex - Return the index of the specified basic block in the + /// trace, or -1 if it is not in the trace. + int getBlockIndex(const BasicBlock *X) const { + for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i) + if (BasicBlocks[i] == X) + return i; + return -1; + } + + /// contains - Returns true if this trace contains the given basic + /// block. + bool contains(const BasicBlock *X) const { + return getBlockIndex(X) != -1; + } + + /// Returns true if B1 occurs before B2 in the trace, or if it is the same + /// block as B2.. Both blocks must be in the trace. + bool dominates(const BasicBlock *B1, const BasicBlock *B2) const { + int B1Idx = getBlockIndex(B1), B2Idx = getBlockIndex(B2); + assert(B1Idx != -1 && B2Idx != -1 && "Block is not in the trace!"); + return B1Idx <= B2Idx; + } + + // BasicBlock iterators... + using iterator = BasicBlockListType::iterator; + using const_iterator = BasicBlockListType::const_iterator; + using reverse_iterator = std::reverse_iterator<iterator>; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + + iterator begin() { return BasicBlocks.begin(); } + const_iterator begin() const { return BasicBlocks.begin(); } + iterator end () { return BasicBlocks.end(); } + const_iterator end () const { return BasicBlocks.end(); } + + reverse_iterator rbegin() { return BasicBlocks.rbegin(); } + const_reverse_iterator rbegin() const { return BasicBlocks.rbegin(); } + reverse_iterator rend () { return BasicBlocks.rend(); } + const_reverse_iterator rend () const { return BasicBlocks.rend(); } + + unsigned size() const { return BasicBlocks.size(); } + bool empty() const { return BasicBlocks.empty(); } + + iterator erase(iterator q) { return BasicBlocks.erase (q); } + iterator erase(iterator q1, iterator q2) { return BasicBlocks.erase (q1, q2); } + + /// print - Write trace to output stream. + void print(raw_ostream &O) const; + + /// dump - Debugger convenience method; writes trace to standard error + /// output stream. + void dump() const; +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_TRACE_H diff --git a/clang-r353983e/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/clang-r353983e/include/llvm/Analysis/TypeBasedAliasAnalysis.h new file mode 100644 index 00000000..12350054 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/TypeBasedAliasAnalysis.h @@ -0,0 +1,93 @@ +//===- TypeBasedAliasAnalysis.h - Type-Based Alias Analysis -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This is the interface for a metadata-based TBAA. See the source file for +/// details on the algorithm. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H +#define LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include <memory> + +namespace llvm { + +class Function; +class MDNode; +class MemoryLocation; + +/// A simple AA result that uses TBAA metadata to answer queries. +class TypeBasedAAResult : public AAResultBase<TypeBasedAAResult> { + friend AAResultBase<TypeBasedAAResult>; + +public: + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &) { + return false; + } + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal); + FunctionModRefBehavior getModRefBehavior(const CallBase *Call); + FunctionModRefBehavior getModRefBehavior(const Function *F); + ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc); + ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2); + +private: + bool Aliases(const MDNode *A, const MDNode *B) const; + bool PathAliases(const MDNode *A, const MDNode *B) const; +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class TypeBasedAA : public AnalysisInfoMixin<TypeBasedAA> { + friend AnalysisInfoMixin<TypeBasedAA>; + + static AnalysisKey Key; + +public: + using Result = TypeBasedAAResult; + + TypeBasedAAResult run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy wrapper pass to provide the TypeBasedAAResult object. +class TypeBasedAAWrapperPass : public ImmutablePass { + std::unique_ptr<TypeBasedAAResult> Result; + +public: + static char ID; + + TypeBasedAAWrapperPass(); + + TypeBasedAAResult &getResult() { return *Result; } + const TypeBasedAAResult &getResult() const { return *Result; } + + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +//===--------------------------------------------------------------------===// +// +// createTypeBasedAAWrapperPass - This pass implements metadata-based +// type-based alias analysis. +// +ImmutablePass *createTypeBasedAAWrapperPass(); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H diff --git a/clang-r353983e/include/llvm/Analysis/TypeMetadataUtils.h b/clang-r353983e/include/llvm/Analysis/TypeMetadataUtils.h new file mode 100644 index 00000000..82cf8efe --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/TypeMetadataUtils.h @@ -0,0 +1,55 @@ +//===- TypeMetadataUtils.h - Utilities related to type metadata --*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains functions that make it easier to manipulate type metadata +// for devirtualization. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TYPEMETADATAUTILS_H +#define LLVM_ANALYSIS_TYPEMETADATAUTILS_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/CallSite.h" + +namespace llvm { + +class DominatorTree; + +/// The type of CFI jumptable needed for a function. +enum CfiFunctionLinkage { + CFL_Definition = 0, + CFL_Declaration = 1, + CFL_WeakDeclaration = 2 +}; + +/// A call site that could be devirtualized. +struct DevirtCallSite { + /// The offset from the address point to the virtual function. + uint64_t Offset; + /// The call site itself. + CallSite CS; +}; + +/// Given a call to the intrinsic \@llvm.type.test, find all devirtualizable +/// call sites based on the call and return them in DevirtCalls. +void findDevirtualizableCallsForTypeTest( + SmallVectorImpl<DevirtCallSite> &DevirtCalls, + SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI, + DominatorTree &DT); + +/// Given a call to the intrinsic \@llvm.type.checked.load, find all +/// devirtualizable call sites based on the call and return them in DevirtCalls. +void findDevirtualizableCallsForTypeCheckedLoad( + SmallVectorImpl<DevirtCallSite> &DevirtCalls, + SmallVectorImpl<Instruction *> &LoadedPtrs, + SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, + const CallInst *CI, DominatorTree &DT); +} + +#endif diff --git a/clang-r353983e/include/llvm/Analysis/Utils/Local.h b/clang-r353983e/include/llvm/Analysis/Utils/Local.h new file mode 100644 index 00000000..acbdf5dc --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/Utils/Local.h @@ -0,0 +1,90 @@ +//===- Local.h - Functions to perform local transformations -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform various local transformations to the +// program. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_UTILS_LOCAL_H +#define LLVM_ANALYSIS_UTILS_LOCAL_H + +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" + +namespace llvm { + +/// Given a getelementptr instruction/constantexpr, emit the code necessary to +/// compute the offset from the base pointer (without adding in the base +/// pointer). Return the result as a signed integer of intptr size. +/// When NoAssumptions is true, no assumptions about index computation not +/// overflowing is made. +template <typename IRBuilderTy> +Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, + bool NoAssumptions = false) { + GEPOperator *GEPOp = cast<GEPOperator>(GEP); + Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); + Value *Result = Constant::getNullValue(IntPtrTy); + + // If the GEP is inbounds, we know that none of the addressing operations will + // overflow in an unsigned sense. + bool isInBounds = GEPOp->isInBounds() && !NoAssumptions; + + // Build a mask for high order bits. + unsigned IntPtrWidth = IntPtrTy->getScalarType()->getIntegerBitWidth(); + uint64_t PtrSizeMask = + std::numeric_limits<uint64_t>::max() >> (64 - IntPtrWidth); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; + ++i, ++GTI) { + Value *Op = *i; + uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; + if (Constant *OpC = dyn_cast<Constant>(Op)) { + if (OpC->isZeroValue()) + continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = GTI.getStructTypeOrNull()) { + if (OpC->getType()->isVectorTy()) + OpC = OpC->getSplatValue(); + + uint64_t OpValue = cast<ConstantInt>(OpC)->getZExtValue(); + Size = DL.getStructLayout(STy)->getElementOffset(OpValue); + + if (Size) + Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"); + continue; + } + + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); + Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); + // Emit an add instruction. + Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); + continue; + } + // Convert to correct type. + if (Op->getType() != IntPtrTy) + Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); + if (Size != 1) { + // We'll let instcombine(mul) convert this to a shl if possible. + Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".idx", isInBounds /*NUW*/); + } + + // Emit an add instruction. + Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); + } + return Result; +} + +} + +#endif // LLVM_TRANSFORMS_UTILS_LOCAL_H diff --git a/clang-r353983e/include/llvm/Analysis/ValueLattice.h b/clang-r353983e/include/llvm/Analysis/ValueLattice.h new file mode 100644 index 00000000..56519d7d --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ValueLattice.h @@ -0,0 +1,322 @@ +//===- ValueLattice.h - Value constraint analysis ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_VALUELATTICE_H +#define LLVM_ANALYSIS_VALUELATTICE_H + +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +// +//===----------------------------------------------------------------------===// +// ValueLatticeElement +//===----------------------------------------------------------------------===// + +/// This class represents lattice values for constants. +/// +/// FIXME: This is basically just for bringup, this can be made a lot more rich +/// in the future. +/// + +namespace llvm { +class ValueLatticeElement { + enum ValueLatticeElementTy { + /// This Value has no known value yet. As a result, this implies the + /// producing instruction is dead. Caution: We use this as the starting + /// state in our local meet rules. In this usage, it's taken to mean + /// "nothing known yet". + undefined, + + /// This Value has a specific constant value. (For constant integers, + /// constantrange is used instead. Integer typed constantexprs can appear + /// as constant.) + constant, + + /// This Value is known to not have the specified value. (For constant + /// integers, constantrange is used instead. As above, integer typed + /// constantexprs can appear here.) + notconstant, + + /// The Value falls within this range. (Used only for integer typed values.) + constantrange, + + /// We can not precisely model the dynamic values this value might take. + overdefined + }; + + ValueLatticeElementTy Tag; + + /// The union either stores a pointer to a constant or a constant range, + /// associated to the lattice element. We have to ensure that Range is + /// initialized or destroyed when changing state to or from constantrange. + union { + Constant *ConstVal; + ConstantRange Range; + }; + +public: + // Const and Range are initialized on-demand. + ValueLatticeElement() : Tag(undefined) {} + + /// Custom destructor to ensure Range is properly destroyed, when the object + /// is deallocated. + ~ValueLatticeElement() { + switch (Tag) { + case overdefined: + case undefined: + case constant: + case notconstant: + break; + case constantrange: + Range.~ConstantRange(); + break; + }; + } + + /// Custom copy constructor, to ensure Range gets initialized when + /// copying a constant range lattice element. + ValueLatticeElement(const ValueLatticeElement &Other) : Tag(undefined) { + *this = Other; + } + + /// Custom assignment operator, to ensure Range gets initialized when + /// assigning a constant range lattice element. + ValueLatticeElement &operator=(const ValueLatticeElement &Other) { + // If we change the state of this from constant range to non constant range, + // destroy Range. + if (isConstantRange() && !Other.isConstantRange()) + Range.~ConstantRange(); + + // If we change the state of this from a valid ConstVal to another a state + // without a valid ConstVal, zero the pointer. + if ((isConstant() || isNotConstant()) && !Other.isConstant() && + !Other.isNotConstant()) + ConstVal = nullptr; + + switch (Other.Tag) { + case constantrange: + if (!isConstantRange()) + new (&Range) ConstantRange(Other.Range); + else + Range = Other.Range; + break; + case constant: + case notconstant: + ConstVal = Other.ConstVal; + break; + case overdefined: + case undefined: + break; + } + Tag = Other.Tag; + return *this; + } + + static ValueLatticeElement get(Constant *C) { + ValueLatticeElement Res; + if (!isa<UndefValue>(C)) + Res.markConstant(C); + return Res; + } + static ValueLatticeElement getNot(Constant *C) { + ValueLatticeElement Res; + if (!isa<UndefValue>(C)) + Res.markNotConstant(C); + return Res; + } + static ValueLatticeElement getRange(ConstantRange CR) { + ValueLatticeElement Res; + Res.markConstantRange(std::move(CR)); + return Res; + } + static ValueLatticeElement getOverdefined() { + ValueLatticeElement Res; + Res.markOverdefined(); + return Res; + } + + bool isUndefined() const { return Tag == undefined; } + bool isConstant() const { return Tag == constant; } + bool isNotConstant() const { return Tag == notconstant; } + bool isConstantRange() const { return Tag == constantrange; } + bool isOverdefined() const { return Tag == overdefined; } + + Constant *getConstant() const { + assert(isConstant() && "Cannot get the constant of a non-constant!"); + return ConstVal; + } + + Constant *getNotConstant() const { + assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); + return ConstVal; + } + + const ConstantRange &getConstantRange() const { + assert(isConstantRange() && + "Cannot get the constant-range of a non-constant-range!"); + return Range; + } + + Optional<APInt> asConstantInteger() const { + if (isConstant() && isa<ConstantInt>(getConstant())) { + return cast<ConstantInt>(getConstant())->getValue(); + } else if (isConstantRange() && getConstantRange().isSingleElement()) { + return *getConstantRange().getSingleElement(); + } + return None; + } + +private: + void markOverdefined() { + if (isOverdefined()) + return; + if (isConstant() || isNotConstant()) + ConstVal = nullptr; + if (isConstantRange()) + Range.~ConstantRange(); + Tag = overdefined; + } + + void markConstant(Constant *V) { + assert(V && "Marking constant with NULL"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + markConstantRange(ConstantRange(CI->getValue())); + return; + } + if (isa<UndefValue>(V)) + return; + + assert((!isConstant() || getConstant() == V) && + "Marking constant with different value"); + assert(isUndefined()); + Tag = constant; + ConstVal = V; + } + + void markNotConstant(Constant *V) { + assert(V && "Marking constant with NULL"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + markConstantRange(ConstantRange(CI->getValue() + 1, CI->getValue())); + return; + } + if (isa<UndefValue>(V)) + return; + + assert((!isConstant() || getConstant() != V) && + "Marking constant !constant with same value"); + assert((!isNotConstant() || getNotConstant() == V) && + "Marking !constant with different value"); + assert(isUndefined() || isConstant()); + Tag = notconstant; + ConstVal = V; + } + + void markConstantRange(ConstantRange NewR) { + if (isConstantRange()) { + if (NewR.isEmptySet()) + markOverdefined(); + else { + Range = std::move(NewR); + } + return; + } + + assert(isUndefined()); + if (NewR.isEmptySet()) + markOverdefined(); + else { + Tag = constantrange; + new (&Range) ConstantRange(std::move(NewR)); + } + } + +public: + /// Updates this object to approximate both this object and RHS. Returns + /// true if this object has been changed. + bool mergeIn(const ValueLatticeElement &RHS, const DataLayout &DL) { + if (RHS.isUndefined() || isOverdefined()) + return false; + if (RHS.isOverdefined()) { + markOverdefined(); + return true; + } + + if (isUndefined()) { + *this = RHS; + return !RHS.isUndefined(); + } + + if (isConstant()) { + if (RHS.isConstant() && getConstant() == RHS.getConstant()) + return false; + markOverdefined(); + return true; + } + + if (isNotConstant()) { + if (RHS.isNotConstant() && getNotConstant() == RHS.getNotConstant()) + return false; + markOverdefined(); + return true; + } + + assert(isConstantRange() && "New ValueLattice type?"); + if (!RHS.isConstantRange()) { + // We can get here if we've encountered a constantexpr of integer type + // and merge it with a constantrange. + markOverdefined(); + return true; + } + ConstantRange NewR = getConstantRange().unionWith(RHS.getConstantRange()); + if (NewR.isFullSet()) + markOverdefined(); + else if (NewR == getConstantRange()) + return false; + else + markConstantRange(std::move(NewR)); + return true; + } + + ConstantInt *getConstantInt() const { + assert(isConstant() && isa<ConstantInt>(getConstant()) && + "No integer constant"); + return cast<ConstantInt>(getConstant()); + } + + /// Compares this symbolic value with Other using Pred and returns either + /// true, false or undef constants, or nullptr if the comparison cannot be + /// evaluated. + Constant *getCompare(CmpInst::Predicate Pred, Type *Ty, + const ValueLatticeElement &Other) const { + if (isUndefined() || Other.isUndefined()) + return UndefValue::get(Ty); + + if (isConstant() && Other.isConstant()) + return ConstantExpr::getCompare(Pred, getConstant(), Other.getConstant()); + + // Integer constants are represented as ConstantRanges with single + // elements. + if (!isConstantRange() || !Other.isConstantRange()) + return nullptr; + + const auto &CR = getConstantRange(); + const auto &OtherCR = Other.getConstantRange(); + if (ConstantRange::makeSatisfyingICmpRegion(Pred, OtherCR).contains(CR)) + return ConstantInt::getTrue(Ty); + if (ConstantRange::makeSatisfyingICmpRegion( + CmpInst::getInversePredicate(Pred), OtherCR) + .contains(CR)) + return ConstantInt::getFalse(Ty); + + return nullptr; + } +}; + +raw_ostream &operator<<(raw_ostream &OS, const ValueLatticeElement &Val); + +} // end namespace llvm +#endif diff --git a/clang-r353983e/include/llvm/Analysis/ValueLatticeUtils.h b/clang-r353983e/include/llvm/Analysis/ValueLatticeUtils.h new file mode 100644 index 00000000..a3bbb961 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ValueLatticeUtils.h @@ -0,0 +1,40 @@ +//===-- ValueLatticeUtils.h - Utils for solving lattices --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares common functions useful for performing data-flow analyses +// that propagate values across function boundaries. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_VALUELATTICEUTILS_H +#define LLVM_ANALYSIS_VALUELATTICEUTILS_H + +namespace llvm { + +class Function; +class GlobalVariable; + +/// Determine if the values of the given function's arguments can be tracked +/// interprocedurally. The value of an argument can be tracked if the function +/// has local linkage and its address is not taken. +bool canTrackArgumentsInterprocedurally(Function *F); + +/// Determine if the values of the given function's returns can be tracked +/// interprocedurally. Return values can be tracked if the function has an +/// exact definition and it doesn't have the "naked" attribute. Naked functions +/// may contain assembly code that returns untrackable values. +bool canTrackReturnsInterprocedurally(Function *F); + +/// Determine if the value maintained in the given global variable can be +/// tracked interprocedurally. A value can be tracked if the global variable +/// has local linkage and is only used by non-volatile loads and stores. +bool canTrackGlobalVariableInterprocedurally(GlobalVariable *GV); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_VALUELATTICEUTILS_H diff --git a/clang-r353983e/include/llvm/Analysis/ValueTracking.h b/clang-r353983e/include/llvm/Analysis/ValueTracking.h new file mode 100644 index 00000000..b3c07b1e --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/ValueTracking.h @@ -0,0 +1,620 @@ +//===- llvm/Analysis/ValueTracking.h - Walk computations --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains routines that help analyze properties that chains of +// computations have. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_VALUETRACKING_H +#define LLVM_ANALYSIS_VALUETRACKING_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Intrinsics.h" +#include <cassert> +#include <cstdint> + +namespace llvm { + +class AddOperator; +class APInt; +class AssumptionCache; +class DataLayout; +class DominatorTree; +class GEPOperator; +class IntrinsicInst; +struct KnownBits; +class Loop; +class LoopInfo; +class MDNode; +class OptimizationRemarkEmitter; +class StringRef; +class TargetLibraryInfo; +class Value; + + /// Determine which bits of V are known to be either zero or one and return + /// them in the KnownZero/KnownOne bit sets. + /// + /// This function is defined on values with integer type, values with pointer + /// type, and vectors of integers. In the case + /// where V is a vector, the known zero and known one values are the + /// same width as the vector element, and the bit is set only if it is true + /// for all of the elements in the vector. + void computeKnownBits(const Value *V, KnownBits &Known, + const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, + bool UseInstrInfo = true); + + /// Returns the known bits rather than passing by reference. + KnownBits computeKnownBits(const Value *V, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, + bool UseInstrInfo = true); + + /// Compute known bits from the range metadata. + /// \p KnownZero the set of bits that are known to be zero + /// \p KnownOne the set of bits that are known to be one + void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, + KnownBits &Known); + + /// Return true if LHS and RHS have no common bits set. + bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, + const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + + /// Return true if the given value is known to have exactly one bit set when + /// defined. For vectors return true if every element is known to be a power + /// of two when defined. Supports values with integer or pointer type and + /// vectors of integers. If 'OrZero' is set, then return true if the given + /// value is either a power of two or zero. + bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, + bool OrZero = false, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + + bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); + + /// Return true if the given value is known to be non-zero when defined. For + /// vectors, return true if every element is known to be non-zero when + /// defined. For pointers, if the context instruction and dominator tree are + /// specified, perform context-sensitive analysis and return true if the + /// pointer couldn't possibly be null at the specified instruction. + /// Supports values with integer or pointer type and vectors of integers. + bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + + /// Return true if the two given values are negation. + /// Currently can recoginze Value pair: + /// 1: <X, Y> if X = sub (0, Y) or Y = sub (0, X) + /// 2: <X, Y> if X = sub (A, B) and Y = sub (B, A) + bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW = false); + + /// Returns true if the give value is known to be non-negative. + bool isKnownNonNegative(const Value *V, const DataLayout &DL, + unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + + /// Returns true if the given value is known be positive (i.e. non-negative + /// and non-zero). + bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + + /// Returns true if the given value is known be negative (i.e. non-positive + /// and non-zero). + bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + + /// Return true if the given values are known to be non-equal when defined. + /// Supports scalar integer types only. + bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + + /// Return true if 'V & Mask' is known to be zero. We use this predicate to + /// simplify operations downstream. Mask is known to be zero for bits that V + /// cannot have. + /// + /// This function is defined on values with integer type, values with pointer + /// type, and vectors of integers. In the case + /// where V is a vector, the mask, known zero, and known one values are the + /// same width as the vector element, and the bit is set only if it is true + /// for all of the elements in the vector. + bool MaskedValueIsZero(const Value *V, const APInt &Mask, + const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + + /// Return the number of times the sign bit of the register is replicated into + /// the other bits. We know that at least 1 bit is always equal to the sign + /// bit (itself), but other cases can give us information. For example, + /// immediately after an "ashr X, 2", we know that the top 3 bits are all + /// equal to each other, so we return 3. For vectors, return the number of + /// sign bits for the vector element with the mininum number of known sign + /// bits. + unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + bool UseInstrInfo = true); + + /// This function computes the integer multiple of Base that equals V. If + /// successful, it returns true and returns the multiple in Multiple. If + /// unsuccessful, it returns false. Also, if V can be simplified to an + /// integer, then the simplified V is returned in Val. Look through sext only + /// if LookThroughSExt=true. + bool ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, + bool LookThroughSExt = false, + unsigned Depth = 0); + + /// Map a call instruction to an intrinsic ID. Libcalls which have equivalent + /// intrinsics are treated as-if they were intrinsics. + Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, + const TargetLibraryInfo *TLI); + + /// Return true if we can prove that the specified FP value is never equal to + /// -0.0. + bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + + /// Return true if we can prove that the specified FP value is either NaN or + /// never less than -0.0. + /// + /// NaN --> true + /// +0 --> true + /// -0 --> true + /// x > +0 --> true + /// x < -0 --> false + bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI); + + /// Return true if the floating-point scalar value is not a NaN or if the + /// floating-point vector value has no NaN elements. Return false if a value + /// could ever be NaN. + bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth = 0); + + /// Return true if we can prove that the specified FP value's sign bit is 0. + /// + /// NaN --> true/false (depending on the NaN's sign bit) + /// +0 --> true + /// -0 --> false + /// x > +0 --> true + /// x < -0 --> false + bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI); + + /// If the specified value can be set by repeating the same byte in memory, + /// return the i8 value that it is represented with. This is true for all i8 + /// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double + /// 0.0 etc. If the value can't be handled with a repeated byte store (e.g. + /// i16 0x1234), return null. If the value is entirely undef and padding, + /// return undef. + Value *isBytewiseValue(Value *V); + + /// Given an aggregrate and an sequence of indices, see if the scalar value + /// indexed is already around as a register, for example if it were inserted + /// directly into the aggregrate. + /// + /// If InsertBefore is not null, this function will duplicate (modified) + /// insertvalues when a part of a nested struct is extracted. + Value *FindInsertedValue(Value *V, + ArrayRef<unsigned> idx_range, + Instruction *InsertBefore = nullptr); + + /// Analyze the specified pointer to see if it can be expressed as a base + /// pointer plus a constant offset. Return the base and offset to the caller. + Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const DataLayout &DL); + inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr, + int64_t &Offset, + const DataLayout &DL) { + return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset, + DL); + } + + /// Returns true if the GEP is based on a pointer to a string (array of + // \p CharSize integers) and is indexing into this string. + bool isGEPBasedOnPointerToString(const GEPOperator *GEP, + unsigned CharSize = 8); + + /// Represents offset+length into a ConstantDataArray. + struct ConstantDataArraySlice { + /// ConstantDataArray pointer. nullptr indicates a zeroinitializer (a valid + /// initializer, it just doesn't fit the ConstantDataArray interface). + const ConstantDataArray *Array; + + /// Slice starts at this Offset. + uint64_t Offset; + + /// Length of the slice. + uint64_t Length; + + /// Moves the Offset and adjusts Length accordingly. + void move(uint64_t Delta) { + assert(Delta < Length); + Offset += Delta; + Length -= Delta; + } + + /// Convenience accessor for elements in the slice. + uint64_t operator[](unsigned I) const { + return Array==nullptr ? 0 : Array->getElementAsInteger(I + Offset); + } + }; + + /// Returns true if the value \p V is a pointer into a ConstantDataArray. + /// If successful \p Slice will point to a ConstantDataArray info object + /// with an appropriate offset. + bool getConstantDataArrayInfo(const Value *V, ConstantDataArraySlice &Slice, + unsigned ElementSize, uint64_t Offset = 0); + + /// This function computes the length of a null-terminated C string pointed to + /// by V. If successful, it returns true and returns the string in Str. If + /// unsuccessful, it returns false. This does not include the trailing null + /// character by default. If TrimAtNul is set to false, then this returns any + /// trailing null characters as well as any other characters that come after + /// it. + bool getConstantStringInfo(const Value *V, StringRef &Str, + uint64_t Offset = 0, bool TrimAtNul = true); + + /// If we can compute the length of the string pointed to by the specified + /// pointer, return 'len+1'. If we can't, return 0. + uint64_t GetStringLength(const Value *V, unsigned CharSize = 8); + + /// This function returns call pointer argument that is considered the same by + /// aliasing rules. You CAN'T use it to replace one value with another. + const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call); + inline Value *getArgumentAliasingToReturnedPointer(CallBase *Call) { + return const_cast<Value *>(getArgumentAliasingToReturnedPointer( + const_cast<const CallBase *>(Call))); + } + + // {launder,strip}.invariant.group returns pointer that aliases its argument, + // and it only captures pointer by returning it. + // These intrinsics are not marked as nocapture, because returning is + // considered as capture. The arguments are not marked as returned neither, + // because it would make it useless. + bool isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + const CallBase *Call); + + /// This method strips off any GEP address adjustments and pointer casts from + /// the specified value, returning the original object being addressed. Note + /// that the returned value has pointer type if the specified value does. If + /// the MaxLookup value is non-zero, it limits the number of instructions to + /// be stripped off. + Value *GetUnderlyingObject(Value *V, const DataLayout &DL, + unsigned MaxLookup = 6); + inline const Value *GetUnderlyingObject(const Value *V, const DataLayout &DL, + unsigned MaxLookup = 6) { + return GetUnderlyingObject(const_cast<Value *>(V), DL, MaxLookup); + } + + /// This method is similar to GetUnderlyingObject except that it can + /// look through phi and select instructions and return multiple objects. + /// + /// If LoopInfo is passed, loop phis are further analyzed. If a pointer + /// accesses different objects in each iteration, we don't look through the + /// phi node. E.g. consider this loop nest: + /// + /// int **A; + /// for (i) + /// for (j) { + /// A[i][j] = A[i-1][j] * B[j] + /// } + /// + /// This is transformed by Load-PRE to stash away A[i] for the next iteration + /// of the outer loop: + /// + /// Curr = A[0]; // Prev_0 + /// for (i: 1..N) { + /// Prev = Curr; // Prev = PHI (Prev_0, Curr) + /// Curr = A[i]; + /// for (j: 0..N) { + /// Curr[j] = Prev[j] * B[j] + /// } + /// } + /// + /// Since A[i] and A[i-1] are independent pointers, getUnderlyingObjects + /// should not assume that Curr and Prev share the same underlying object thus + /// it shouldn't look through the phi above. + void GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects, + const DataLayout &DL, LoopInfo *LI = nullptr, + unsigned MaxLookup = 6); + + /// This is a wrapper around GetUnderlyingObjects and adds support for basic + /// ptrtoint+arithmetic+inttoptr sequences. + bool getUnderlyingObjectsForCodeGen(const Value *V, + SmallVectorImpl<Value *> &Objects, + const DataLayout &DL); + + /// Return true if the only users of this pointer are lifetime markers. + bool onlyUsedByLifetimeMarkers(const Value *V); + + /// Return true if the instruction does not have any effects besides + /// calculating the result and does not have undefined behavior. + /// + /// This method never returns true for an instruction that returns true for + /// mayHaveSideEffects; however, this method also does some other checks in + /// addition. It checks for undefined behavior, like dividing by zero or + /// loading from an invalid pointer (but not for undefined results, like a + /// shift with a shift amount larger than the width of the result). It checks + /// for malloc and alloca because speculatively executing them might cause a + /// memory leak. It also returns false for instructions related to control + /// flow, specifically terminators and PHI nodes. + /// + /// If the CtxI is specified this method performs context-sensitive analysis + /// and returns true if it is safe to execute the instruction immediately + /// before the CtxI. + /// + /// If the CtxI is NOT specified this method only looks at the instruction + /// itself and its operands, so if this method returns true, it is safe to + /// move the instruction as long as the correct dominance relationships for + /// the operands and users hold. + /// + /// This method can return true for instructions that read memory; + /// for such instructions, moving them may change the resulting value. + bool isSafeToSpeculativelyExecute(const Value *V, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr); + + /// Returns true if the result or effects of the given instructions \p I + /// depend on or influence global memory. + /// Memory dependence arises for example if the instruction reads from + /// memory or may produce effects or undefined behaviour. Memory dependent + /// instructions generally cannot be reorderd with respect to other memory + /// dependent instructions or moved into non-dominated basic blocks. + /// Instructions which just compute a value based on the values of their + /// operands are not memory dependent. + bool mayBeMemoryDependent(const Instruction &I); + + /// Return true if it is an intrinsic that cannot be speculated but also + /// cannot trap. + bool isAssumeLikeIntrinsic(const Instruction *I); + + /// Return true if it is valid to use the assumptions provided by an + /// assume intrinsic, I, at the point in the control-flow identified by the + /// context instruction, CxtI. + bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, + const DominatorTree *DT = nullptr); + + enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows }; + + OverflowResult computeOverflowForUnsignedMul(const Value *LHS, + const Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT, + bool UseInstrInfo = true); + OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT, + bool UseInstrInfo = true); + OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, + const Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT, + bool UseInstrInfo = true); + OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, + const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + /// This version also leverages the sign bit of Add if known. + OverflowResult computeOverflowForSignedAdd(const AddOperator *Add, + const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + OverflowResult computeOverflowForUnsignedSub(const Value *LHS, const Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT); + OverflowResult computeOverflowForSignedSub(const Value *LHS, const Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT); + + /// Returns true if the arithmetic part of the \p II 's result is + /// used only along the paths control dependent on the computation + /// not overflowing, \p II being an <op>.with.overflow intrinsic. + bool isOverflowIntrinsicNoWrap(const IntrinsicInst *II, + const DominatorTree &DT); + + /// Return true if this function can prove that the instruction I will + /// always transfer execution to one of its successors (including the next + /// instruction that follows within a basic block). E.g. this is not + /// guaranteed for function calls that could loop infinitely. + /// + /// In other words, this function returns false for instructions that may + /// transfer execution or fail to transfer execution in a way that is not + /// captured in the CFG nor in the sequence of instructions within a basic + /// block. + /// + /// Undefined behavior is assumed not to happen, so e.g. division is + /// guaranteed to transfer execution to the following instruction even + /// though division by zero might cause undefined behavior. + bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I); + + /// Returns true if this block does not contain a potential implicit exit. + /// This is equivelent to saying that all instructions within the basic block + /// are guaranteed to transfer execution to their successor within the basic + /// block. This has the same assumptions w.r.t. undefined behavior as the + /// instruction variant of this function. + bool isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB); + + /// Return true if this function can prove that the instruction I + /// is executed for every iteration of the loop L. + /// + /// Note that this currently only considers the loop header. + bool isGuaranteedToExecuteForEveryIteration(const Instruction *I, + const Loop *L); + + /// Return true if this function can prove that I is guaranteed to yield + /// full-poison (all bits poison) if at least one of its operands are + /// full-poison (all bits poison). + /// + /// The exact rules for how poison propagates through instructions have + /// not been settled as of 2015-07-10, so this function is conservative + /// and only considers poison to be propagated in uncontroversial + /// cases. There is no attempt to track values that may be only partially + /// poison. + bool propagatesFullPoison(const Instruction *I); + + /// Return either nullptr or an operand of I such that I will trigger + /// undefined behavior if I is executed and that operand has a full-poison + /// value (all bits poison). + const Value *getGuaranteedNonFullPoisonOp(const Instruction *I); + + /// Return true if this function can prove that if PoisonI is executed + /// and yields a full-poison value (all bits poison), then that will + /// trigger undefined behavior. + /// + /// Note that this currently only considers the basic block that is + /// the parent of I. + bool programUndefinedIfFullPoison(const Instruction *PoisonI); + + /// Specific patterns of select instructions we can match. + enum SelectPatternFlavor { + SPF_UNKNOWN = 0, + SPF_SMIN, /// Signed minimum + SPF_UMIN, /// Unsigned minimum + SPF_SMAX, /// Signed maximum + SPF_UMAX, /// Unsigned maximum + SPF_FMINNUM, /// Floating point minnum + SPF_FMAXNUM, /// Floating point maxnum + SPF_ABS, /// Absolute value + SPF_NABS /// Negated absolute value + }; + + /// Behavior when a floating point min/max is given one NaN and one + /// non-NaN as input. + enum SelectPatternNaNBehavior { + SPNB_NA = 0, /// NaN behavior not applicable. + SPNB_RETURNS_NAN, /// Given one NaN input, returns the NaN. + SPNB_RETURNS_OTHER, /// Given one NaN input, returns the non-NaN. + SPNB_RETURNS_ANY /// Given one NaN input, can return either (or + /// it has been determined that no operands can + /// be NaN). + }; + + struct SelectPatternResult { + SelectPatternFlavor Flavor; + SelectPatternNaNBehavior NaNBehavior; /// Only applicable if Flavor is + /// SPF_FMINNUM or SPF_FMAXNUM. + bool Ordered; /// When implementing this min/max pattern as + /// fcmp; select, does the fcmp have to be + /// ordered? + + /// Return true if \p SPF is a min or a max pattern. + static bool isMinOrMax(SelectPatternFlavor SPF) { + return SPF != SPF_UNKNOWN && SPF != SPF_ABS && SPF != SPF_NABS; + } + }; + + /// Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind + /// and providing the out parameter results if we successfully match. + /// + /// For ABS/NABS, LHS will be set to the input to the abs idiom. RHS will be + /// the negation instruction from the idiom. + /// + /// If CastOp is not nullptr, also match MIN/MAX idioms where the type does + /// not match that of the original select. If this is the case, the cast + /// operation (one of Trunc,SExt,Zext) that must be done to transform the + /// type of LHS and RHS into the type of V is returned in CastOp. + /// + /// For example: + /// %1 = icmp slt i32 %a, i32 4 + /// %2 = sext i32 %a to i64 + /// %3 = select i1 %1, i64 %2, i64 4 + /// + /// -> LHS = %a, RHS = i32 4, *CastOp = Instruction::SExt + /// + SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, + Instruction::CastOps *CastOp = nullptr, + unsigned Depth = 0); + inline SelectPatternResult + matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS, + Instruction::CastOps *CastOp = nullptr) { + Value *L = const_cast<Value*>(LHS); + Value *R = const_cast<Value*>(RHS); + auto Result = matchSelectPattern(const_cast<Value*>(V), L, R); + LHS = L; + RHS = R; + return Result; + } + + /// Return the canonical comparison predicate for the specified + /// minimum/maximum flavor. + CmpInst::Predicate getMinMaxPred(SelectPatternFlavor SPF, + bool Ordered = false); + + /// Return the inverse minimum/maximum flavor of the specified flavor. + /// For example, signed minimum is the inverse of signed maximum. + SelectPatternFlavor getInverseMinMaxFlavor(SelectPatternFlavor SPF); + + /// Return the canonical inverse comparison predicate for the specified + /// minimum/maximum flavor. + CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF); + + /// Return true if RHS is known to be implied true by LHS. Return false if + /// RHS is known to be implied false by LHS. Otherwise, return None if no + /// implication can be made. + /// A & B must be i1 (boolean) values or a vector of such values. Note that + /// the truth table for implication is the same as <=u on i1 values (but not + /// <=s!). The truth table for both is: + /// | T | F (B) + /// T | T | F + /// F | T | T + /// (A) + Optional<bool> isImpliedCondition(const Value *LHS, const Value *RHS, + const DataLayout &DL, bool LHSIsTrue = true, + unsigned Depth = 0); + + /// Return the boolean condition value in the context of the given instruction + /// if it is known based on dominating conditions. + Optional<bool> isImpliedByDomCondition(const Value *Cond, + const Instruction *ContextI, + const DataLayout &DL); +} // end namespace llvm + +#endif // LLVM_ANALYSIS_VALUETRACKING_H diff --git a/clang-r353983e/include/llvm/Analysis/VectorUtils.h b/clang-r353983e/include/llvm/Analysis/VectorUtils.h new file mode 100644 index 00000000..60ef6339 --- /dev/null +++ b/clang-r353983e/include/llvm/Analysis/VectorUtils.h @@ -0,0 +1,602 @@ +//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines some vectorizer utilities. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_VECTORUTILS_H +#define LLVM_ANALYSIS_VECTORUTILS_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/IRBuilder.h" + +namespace llvm { + +template <typename T> class ArrayRef; +class DemandedBits; +class GetElementPtrInst; +template <typename InstTy> class InterleaveGroup; +class Loop; +class ScalarEvolution; +class TargetTransformInfo; +class Type; +class Value; + +namespace Intrinsic { +enum ID : unsigned; +} + +/// Identify if the intrinsic is trivially vectorizable. +/// This method returns true if the intrinsic's argument types are all +/// scalars for the scalar form of the intrinsic and all vectors for +/// the vector form of the intrinsic. +bool isTriviallyVectorizable(Intrinsic::ID ID); + +/// Identifies if the intrinsic has a scalar operand. It checks for +/// ctlz,cttz and powi special intrinsics whose argument is scalar. +bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx); + +/// Returns intrinsic ID for call. +/// For the input call instruction it finds mapping intrinsic and returns +/// its intrinsic ID, in case it does not found it return not_intrinsic. +Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, + const TargetLibraryInfo *TLI); + +/// Find the operand of the GEP that should be checked for consecutive +/// stores. This ignores trailing indices that have no effect on the final +/// pointer. +unsigned getGEPInductionOperand(const GetElementPtrInst *Gep); + +/// If the argument is a GEP, then returns the operand identified by +/// getGEPInductionOperand. However, if there is some other non-loop-invariant +/// operand, it returns that instead. +Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp); + +/// If a value has only one user that is a CastInst, return it. +Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty); + +/// Get the stride of a pointer access in a loop. Looks for symbolic +/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. +Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp); + +/// Given a vector and an element number, see if the scalar value is +/// already around as a register, for example if it were inserted then extracted +/// from the vector. +Value *findScalarElement(Value *V, unsigned EltNo); + +/// Get splat value if the input is a splat vector or return nullptr. +/// The value may be extracted from a splat constants vector or from +/// a sequence of instructions that broadcast a single value into a vector. +const Value *getSplatValue(const Value *V); + +/// Compute a map of integer instructions to their minimum legal type +/// size. +/// +/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int +/// type (e.g. i32) whenever arithmetic is performed on them. +/// +/// For targets with native i8 or i16 operations, usually InstCombine can shrink +/// the arithmetic type down again. However InstCombine refuses to create +/// illegal types, so for targets without i8 or i16 registers, the lengthening +/// and shrinking remains. +/// +/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when +/// their scalar equivalents do not, so during vectorization it is important to +/// remove these lengthens and truncates when deciding the profitability of +/// vectorization. +/// +/// This function analyzes the given range of instructions and determines the +/// minimum type size each can be converted to. It attempts to remove or +/// minimize type size changes across each def-use chain, so for example in the +/// following code: +/// +/// %1 = load i8, i8* +/// %2 = add i8 %1, 2 +/// %3 = load i16, i16* +/// %4 = zext i8 %2 to i32 +/// %5 = zext i16 %3 to i32 +/// %6 = add i32 %4, %5 +/// %7 = trunc i32 %6 to i16 +/// +/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes +/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}. +/// +/// If the optional TargetTransformInfo is provided, this function tries harder +/// to do less work by only looking at illegal types. +MapVector<Instruction*, uint64_t> +computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks, + DemandedBits &DB, + const TargetTransformInfo *TTI=nullptr); + +/// Compute the union of two access-group lists. +/// +/// If the list contains just one access group, it is returned directly. If the +/// list is empty, returns nullptr. +MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2); + +/// Compute the access-group list of access groups that @p Inst1 and @p Inst2 +/// are both in. If either instruction does not access memory at all, it is +/// considered to be in every list. +/// +/// If the list contains just one access group, it is returned directly. If the +/// list is empty, returns nullptr. +MDNode *intersectAccessGroups(const Instruction *Inst1, + const Instruction *Inst2); + +/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, +/// MD_nontemporal, MD_access_group]. +/// For K in Kinds, we get the MDNode for K from each of the +/// elements of VL, compute their "intersection" (i.e., the most generic +/// metadata value that covers all of the individual values), and set I's +/// metadata for M equal to the intersection value. +/// +/// This function always sets a (possibly null) value for each K in Kinds. +Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL); + +/// Create a mask that filters the members of an interleave group where there +/// are gaps. +/// +/// For example, the mask for \p Group with interleave-factor 3 +/// and \p VF 4, that has only its first member present is: +/// +/// <1,0,0,1,0,0,1,0,0,1,0,0> +/// +/// Note: The result is a mask of 0's and 1's, as opposed to the other +/// create[*]Mask() utilities which create a shuffle mask (mask that +/// consists of indices). +Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, + const InterleaveGroup<Instruction> &Group); + +/// Create a mask with replicated elements. +/// +/// This function creates a shuffle mask for replicating each of the \p VF +/// elements in a vector \p ReplicationFactor times. It can be used to +/// transform a mask of \p VF elements into a mask of +/// \p VF * \p ReplicationFactor elements used by a predicated +/// interleaved-group of loads/stores whose Interleaved-factor == +/// \p ReplicationFactor. +/// +/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: +/// +/// <0,0,0,1,1,1,2,2,2,3,3,3> +Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, + unsigned VF); + +/// Create an interleave shuffle mask. +/// +/// This function creates a shuffle mask for interleaving \p NumVecs vectors of +/// vectorization factor \p VF into a single wide vector. The mask is of the +/// form: +/// +/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...> +/// +/// For example, the mask for VF = 4 and NumVecs = 2 is: +/// +/// <0, 4, 1, 5, 2, 6, 3, 7>. +Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF, + unsigned NumVecs); + +/// Create a stride shuffle mask. +/// +/// This function creates a shuffle mask whose elements begin at \p Start and +/// are incremented by \p Stride. The mask can be used to deinterleave an +/// interleaved vector into separate vectors of vectorization factor \p VF. The +/// mask is of the form: +/// +/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)> +/// +/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is: +/// +/// <0, 2, 4, 6> +Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start, + unsigned Stride, unsigned VF); + +/// Create a sequential shuffle mask. +/// +/// This function creates shuffle mask whose elements are sequential and begin +/// at \p Start. The mask contains \p NumInts integers and is padded with \p +/// NumUndefs undef values. The mask is of the form: +/// +/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs> +/// +/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is: +/// +/// <0, 1, 2, 3, undef, undef, undef, undef> +Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start, + unsigned NumInts, unsigned NumUndefs); + +/// Concatenate a list of vectors. +/// +/// This function generates code that concatenate the vectors in \p Vecs into a +/// single large vector. The number of vectors should be greater than one, and +/// their element types should be the same. The number of elements in the +/// vectors should also be the same; however, if the last vector has fewer +/// elements, it will be padded with undefs. +Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs); + +/// The group of interleaved loads/stores sharing the same stride and +/// close to each other. +/// +/// Each member in this group has an index starting from 0, and the largest +/// index should be less than interleaved factor, which is equal to the absolute +/// value of the access's stride. +/// +/// E.g. An interleaved load group of factor 4: +/// for (unsigned i = 0; i < 1024; i+=4) { +/// a = A[i]; // Member of index 0 +/// b = A[i+1]; // Member of index 1 +/// d = A[i+3]; // Member of index 3 +/// ... +/// } +/// +/// An interleaved store group of factor 4: +/// for (unsigned i = 0; i < 1024; i+=4) { +/// ... +/// A[i] = a; // Member of index 0 +/// A[i+1] = b; // Member of index 1 +/// A[i+2] = c; // Member of index 2 +/// A[i+3] = d; // Member of index 3 +/// } +/// +/// Note: the interleaved load group could have gaps (missing members), but +/// the interleaved store group doesn't allow gaps. +template <typename InstTy> class InterleaveGroup { +public: + InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align) + : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {} + + InterleaveGroup(InstTy *Instr, int Stride, unsigned Align) + : Align(Align), InsertPos(Instr) { + assert(Align && "The alignment should be non-zero"); + + Factor = std::abs(Stride); + assert(Factor > 1 && "Invalid interleave factor"); + + Reverse = Stride < 0; + Members[0] = Instr; + } + + bool isReverse() const { return Reverse; } + unsigned getFactor() const { return Factor; } + unsigned getAlignment() const { return Align; } + unsigned getNumMembers() const { return Members.size(); } + + /// Try to insert a new member \p Instr with index \p Index and + /// alignment \p NewAlign. The index is related to the leader and it could be + /// negative if it is the new leader. + /// + /// \returns false if the instruction doesn't belong to the group. + bool insertMember(InstTy *Instr, int Index, unsigned NewAlign) { + assert(NewAlign && "The new member's alignment should be non-zero"); + + int Key = Index + SmallestKey; + + // Skip if there is already a member with the same index. + if (Members.find(Key) != Members.end()) + return false; + + if (Key > LargestKey) { + // The largest index is always less than the interleave factor. + if (Index >= static_cast<int>(Factor)) + return false; + + LargestKey = Key; + } else if (Key < SmallestKey) { + // The largest index is always less than the interleave factor. + if (LargestKey - Key >= static_cast<int>(Factor)) + return false; + + SmallestKey = Key; + } + + // It's always safe to select the minimum alignment. + Align = std::min(Align, NewAlign); + Members[Key] = Instr; + return true; + } + + /// Get the member with the given index \p Index + /// + /// \returns nullptr if contains no such member. + InstTy *getMember(unsigned Index) const { + int Key = SmallestKey + Index; + auto Member = Members.find(Key); + if (Member == Members.end()) + return nullptr; + + return Member->second; + } + + /// Get the index for the given member. Unlike the key in the member + /// map, the index starts from 0. + unsigned getIndex(const InstTy *Instr) const { + for (auto I : Members) { + if (I.second == Instr) + return I.first - SmallestKey; + } + + llvm_unreachable("InterleaveGroup contains no such member"); + } + + InstTy *getInsertPos() const { return InsertPos; } + void setInsertPos(InstTy *Inst) { InsertPos = Inst; } + + /// Add metadata (e.g. alias info) from the instructions in this group to \p + /// NewInst. + /// + /// FIXME: this function currently does not add noalias metadata a'la + /// addNewMedata. To do that we need to compute the intersection of the + /// noalias info from all members. + void addMetadata(InstTy *NewInst) const; + + /// Returns true if this Group requires a scalar iteration to handle gaps. + bool requiresScalarEpilogue() const { + // If the last member of the Group exists, then a scalar epilog is not + // needed for this group. + if (getMember(getFactor() - 1)) + return false; + + // We have a group with gaps. It therefore cannot be a group of stores, + // and it can't be a reversed access, because such groups get invalidated. + assert(!getMember(0)->mayWriteToMemory() && + "Group should have been invalidated"); + assert(!isReverse() && "Group should have been invalidated"); + + // This is a group of loads, with gaps, and without a last-member + return true; + } + +private: + unsigned Factor; // Interleave Factor. + bool Reverse; + unsigned Align; + DenseMap<int, InstTy *> Members; + int SmallestKey = 0; + int LargestKey = 0; + + // To avoid breaking dependences, vectorized instructions of an interleave + // group should be inserted at either the first load or the last store in + // program order. + // + // E.g. %even = load i32 // Insert Position + // %add = add i32 %even // Use of %even + // %odd = load i32 + // + // store i32 %even + // %odd = add i32 // Def of %odd + // store i32 %odd // Insert Position + InstTy *InsertPos; +}; + +/// Drive the analysis of interleaved memory accesses in the loop. +/// +/// Use this class to analyze interleaved accesses only when we can vectorize +/// a loop. Otherwise it's meaningless to do analysis as the vectorization +/// on interleaved accesses is unsafe. +/// +/// The analysis collects interleave groups and records the relationships +/// between the member and the group in a map. +class InterleavedAccessInfo { +public: + InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, + DominatorTree *DT, LoopInfo *LI, + const LoopAccessInfo *LAI) + : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} + + ~InterleavedAccessInfo() { reset(); } + + /// Analyze the interleaved accesses and collect them in interleave + /// groups. Substitute symbolic strides using \p Strides. + /// Consider also predicated loads/stores in the analysis if + /// \p EnableMaskedInterleavedGroup is true. + void analyzeInterleaving(bool EnableMaskedInterleavedGroup); + + /// Invalidate groups, e.g., in case all blocks in loop will be predicated + /// contrary to original assumption. Although we currently prevent group + /// formation for predicated accesses, we may be able to relax this limitation + /// in the future once we handle more complicated blocks. + void reset() { + SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet; + // Avoid releasing a pointer twice. + for (auto &I : InterleaveGroupMap) + DelSet.insert(I.second); + for (auto *Ptr : DelSet) + delete Ptr; + InterleaveGroupMap.clear(); + RequiresScalarEpilogue = false; + } + + + /// Check if \p Instr belongs to any interleave group. + bool isInterleaved(Instruction *Instr) const { + return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end(); + } + + /// Get the interleave group that \p Instr belongs to. + /// + /// \returns nullptr if doesn't have such group. + InterleaveGroup<Instruction> * + getInterleaveGroup(const Instruction *Instr) const { + if (InterleaveGroupMap.count(Instr)) + return InterleaveGroupMap.find(Instr)->second; + return nullptr; + } + + iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>> + getInterleaveGroups() { + return make_range(InterleaveGroups.begin(), InterleaveGroups.end()); + } + + /// Returns true if an interleaved group that may access memory + /// out-of-bounds requires a scalar epilogue iteration for correctness. + bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } + + /// Invalidate groups that require a scalar epilogue (due to gaps). This can + /// happen when optimizing for size forbids a scalar epilogue, and the gap + /// cannot be filtered by masking the load/store. + void invalidateGroupsRequiringScalarEpilogue(); + +private: + /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. + /// Simplifies SCEV expressions in the context of existing SCEV assumptions. + /// The interleaved access analysis can also add new predicates (for example + /// by versioning strides of pointers). + PredicatedScalarEvolution &PSE; + + Loop *TheLoop; + DominatorTree *DT; + LoopInfo *LI; + const LoopAccessInfo *LAI; + + /// True if the loop may contain non-reversed interleaved groups with + /// out-of-bounds accesses. We ensure we don't speculatively access memory + /// out-of-bounds by executing at least one scalar epilogue iteration. + bool RequiresScalarEpilogue = false; + + /// Holds the relationships between the members and the interleave group. + DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap; + + SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups; + + /// Holds dependences among the memory accesses in the loop. It maps a source + /// access to a set of dependent sink accesses. + DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences; + + /// The descriptor for a strided memory access. + struct StrideDescriptor { + StrideDescriptor() = default; + StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, + unsigned Align) + : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {} + + // The access's stride. It is negative for a reverse access. + int64_t Stride = 0; + + // The scalar expression of this access. + const SCEV *Scev = nullptr; + + // The size of the memory object. + uint64_t Size = 0; + + // The alignment of this access. + unsigned Align = 0; + }; + + /// A type for holding instructions and their stride descriptors. + using StrideEntry = std::pair<Instruction *, StrideDescriptor>; + + /// Create a new interleave group with the given instruction \p Instr, + /// stride \p Stride and alignment \p Align. + /// + /// \returns the newly created interleave group. + InterleaveGroup<Instruction> * + createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) { + assert(!InterleaveGroupMap.count(Instr) && + "Already in an interleaved access group"); + InterleaveGroupMap[Instr] = + new InterleaveGroup<Instruction>(Instr, Stride, Align); + InterleaveGroups.insert(InterleaveGroupMap[Instr]); + return InterleaveGroupMap[Instr]; + } + + /// Release the group and remove all the relationships. + void releaseGroup(InterleaveGroup<Instruction> *Group) { + for (unsigned i = 0; i < Group->getFactor(); i++) + if (Instruction *Member = Group->getMember(i)) + InterleaveGroupMap.erase(Member); + + InterleaveGroups.erase(Group); + delete Group; + } + + /// Collect all the accesses with a constant stride in program order. + void collectConstStrideAccesses( + MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo, + const ValueToValueMap &Strides); + + /// Returns true if \p Stride is allowed in an interleaved group. + static bool isStrided(int Stride); + + /// Returns true if \p BB is a predicated block. + bool isPredicated(BasicBlock *BB) const { + return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); + } + + /// Returns true if LoopAccessInfo can be used for dependence queries. + bool areDependencesValid() const { + return LAI && LAI->getDepChecker().getDependences(); + } + + /// Returns true if memory accesses \p A and \p B can be reordered, if + /// necessary, when constructing interleaved groups. + /// + /// \p A must precede \p B in program order. We return false if reordering is + /// not necessary or is prevented because \p A and \p B may be dependent. + bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A, + StrideEntry *B) const { + // Code motion for interleaved accesses can potentially hoist strided loads + // and sink strided stores. The code below checks the legality of the + // following two conditions: + // + // 1. Potentially moving a strided load (B) before any store (A) that + // precedes B, or + // + // 2. Potentially moving a strided store (A) after any load or store (B) + // that A precedes. + // + // It's legal to reorder A and B if we know there isn't a dependence from A + // to B. Note that this determination is conservative since some + // dependences could potentially be reordered safely. + + // A is potentially the source of a dependence. + auto *Src = A->first; + auto SrcDes = A->second; + + // B is potentially the sink of a dependence. + auto *Sink = B->first; + auto SinkDes = B->second; + + // Code motion for interleaved accesses can't violate WAR dependences. + // Thus, reordering is legal if the source isn't a write. + if (!Src->mayWriteToMemory()) + return true; + + // At least one of the accesses must be strided. + if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride)) + return true; + + // If dependence information is not available from LoopAccessInfo, + // conservatively assume the instructions can't be reordered. + if (!areDependencesValid()) + return false; + + // If we know there is a dependence from source to sink, assume the + // instructions can't be reordered. Otherwise, reordering is legal. + return Dependences.find(Src) == Dependences.end() || + !Dependences.lookup(Src).count(Sink); + } + + /// Collect the dependences from LoopAccessInfo. + /// + /// We process the dependences once during the interleaved access analysis to + /// enable constant-time dependence queries. + void collectDependences() { + if (!areDependencesValid()) + return; + auto *Deps = LAI->getDepChecker().getDependences(); + for (auto Dep : *Deps) + Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI)); + } +}; + +} // llvm namespace + +#endif |
