diff options
Diffstat (limited to 'clang-r353983e/include/llvm/ProfileData')
12 files changed, 5364 insertions, 0 deletions
diff --git a/clang-r353983e/include/llvm/ProfileData/Coverage/CoverageMapping.h b/clang-r353983e/include/llvm/ProfileData/Coverage/CoverageMapping.h new file mode 100644 index 00000000..11758ac4 --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -0,0 +1,818 @@ +//===- CoverageMapping.h - Code coverage mapping support --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Code coverage mapping data is generated by clang and read by +// llvm-cov to show code coverage statistics for a file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPING_H +#define LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPING_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <memory> +#include <string> +#include <system_error> +#include <tuple> +#include <utility> +#include <vector> + +namespace llvm { + +class IndexedInstrProfReader; + +namespace coverage { + +class CoverageMappingReader; +struct CoverageMappingRecord; + +enum class coveragemap_error { + success = 0, + eof, + no_data_found, + unsupported_version, + truncated, + malformed +}; + +const std::error_category &coveragemap_category(); + +inline std::error_code make_error_code(coveragemap_error E) { + return std::error_code(static_cast<int>(E), coveragemap_category()); +} + +class CoverageMapError : public ErrorInfo<CoverageMapError> { +public: + CoverageMapError(coveragemap_error Err) : Err(Err) { + assert(Err != coveragemap_error::success && "Not an error"); + } + + std::string message() const override; + + void log(raw_ostream &OS) const override { OS << message(); } + + std::error_code convertToErrorCode() const override { + return make_error_code(Err); + } + + coveragemap_error get() const { return Err; } + + static char ID; + +private: + coveragemap_error Err; +}; + +/// A Counter is an abstract value that describes how to compute the +/// execution count for a region of code using the collected profile count data. +struct Counter { + enum CounterKind { Zero, CounterValueReference, Expression }; + static const unsigned EncodingTagBits = 2; + static const unsigned EncodingTagMask = 0x3; + static const unsigned EncodingCounterTagAndExpansionRegionTagBits = + EncodingTagBits + 1; + +private: + CounterKind Kind = Zero; + unsigned ID = 0; + + Counter(CounterKind Kind, unsigned ID) : Kind(Kind), ID(ID) {} + +public: + Counter() = default; + + CounterKind getKind() const { return Kind; } + + bool isZero() const { return Kind == Zero; } + + bool isExpression() const { return Kind == Expression; } + + unsigned getCounterID() const { return ID; } + + unsigned getExpressionID() const { return ID; } + + friend bool operator==(const Counter &LHS, const Counter &RHS) { + return LHS.Kind == RHS.Kind && LHS.ID == RHS.ID; + } + + friend bool operator!=(const Counter &LHS, const Counter &RHS) { + return !(LHS == RHS); + } + + friend bool operator<(const Counter &LHS, const Counter &RHS) { + return std::tie(LHS.Kind, LHS.ID) < std::tie(RHS.Kind, RHS.ID); + } + + /// Return the counter that represents the number zero. + static Counter getZero() { return Counter(); } + + /// Return the counter that corresponds to a specific profile counter. + static Counter getCounter(unsigned CounterId) { + return Counter(CounterValueReference, CounterId); + } + + /// Return the counter that corresponds to a specific addition counter + /// expression. + static Counter getExpression(unsigned ExpressionId) { + return Counter(Expression, ExpressionId); + } +}; + +/// A Counter expression is a value that represents an arithmetic operation +/// with two counters. +struct CounterExpression { + enum ExprKind { Subtract, Add }; + ExprKind Kind; + Counter LHS, RHS; + + CounterExpression(ExprKind Kind, Counter LHS, Counter RHS) + : Kind(Kind), LHS(LHS), RHS(RHS) {} +}; + +/// A Counter expression builder is used to construct the counter expressions. +/// It avoids unnecessary duplication and simplifies algebraic expressions. +class CounterExpressionBuilder { + /// A list of all the counter expressions + std::vector<CounterExpression> Expressions; + + /// A lookup table for the index of a given expression. + DenseMap<CounterExpression, unsigned> ExpressionIndices; + + /// Return the counter which corresponds to the given expression. + /// + /// If the given expression is already stored in the builder, a counter + /// that references that expression is returned. Otherwise, the given + /// expression is added to the builder's collection of expressions. + Counter get(const CounterExpression &E); + + /// Represents a term in a counter expression tree. + struct Term { + unsigned CounterID; + int Factor; + + Term(unsigned CounterID, int Factor) + : CounterID(CounterID), Factor(Factor) {} + }; + + /// Gather the terms of the expression tree for processing. + /// + /// This collects each addition and subtraction referenced by the counter into + /// a sequence that can be sorted and combined to build a simplified counter + /// expression. + void extractTerms(Counter C, int Sign, SmallVectorImpl<Term> &Terms); + + /// Simplifies the given expression tree + /// by getting rid of algebraically redundant operations. + Counter simplify(Counter ExpressionTree); + +public: + ArrayRef<CounterExpression> getExpressions() const { return Expressions; } + + /// Return a counter that represents the expression that adds LHS and RHS. + Counter add(Counter LHS, Counter RHS); + + /// Return a counter that represents the expression that subtracts RHS from + /// LHS. + Counter subtract(Counter LHS, Counter RHS); +}; + +using LineColPair = std::pair<unsigned, unsigned>; + +/// A Counter mapping region associates a source range with a specific counter. +struct CounterMappingRegion { + enum RegionKind { + /// A CodeRegion associates some code with a counter + CodeRegion, + + /// An ExpansionRegion represents a file expansion region that associates + /// a source range with the expansion of a virtual source file, such as + /// for a macro instantiation or #include file. + ExpansionRegion, + + /// A SkippedRegion represents a source range with code that was skipped + /// by a preprocessor or similar means. + SkippedRegion, + + /// A GapRegion is like a CodeRegion, but its count is only set as the + /// line execution count when its the only region in the line. + GapRegion + }; + + Counter Count; + unsigned FileID, ExpandedFileID; + unsigned LineStart, ColumnStart, LineEnd, ColumnEnd; + RegionKind Kind; + + CounterMappingRegion(Counter Count, unsigned FileID, unsigned ExpandedFileID, + unsigned LineStart, unsigned ColumnStart, + unsigned LineEnd, unsigned ColumnEnd, RegionKind Kind) + : Count(Count), FileID(FileID), ExpandedFileID(ExpandedFileID), + LineStart(LineStart), ColumnStart(ColumnStart), LineEnd(LineEnd), + ColumnEnd(ColumnEnd), Kind(Kind) {} + + static CounterMappingRegion + makeRegion(Counter Count, unsigned FileID, unsigned LineStart, + unsigned ColumnStart, unsigned LineEnd, unsigned ColumnEnd) { + return CounterMappingRegion(Count, FileID, 0, LineStart, ColumnStart, + LineEnd, ColumnEnd, CodeRegion); + } + + static CounterMappingRegion + makeExpansion(unsigned FileID, unsigned ExpandedFileID, unsigned LineStart, + unsigned ColumnStart, unsigned LineEnd, unsigned ColumnEnd) { + return CounterMappingRegion(Counter(), FileID, ExpandedFileID, LineStart, + ColumnStart, LineEnd, ColumnEnd, + ExpansionRegion); + } + + static CounterMappingRegion + makeSkipped(unsigned FileID, unsigned LineStart, unsigned ColumnStart, + unsigned LineEnd, unsigned ColumnEnd) { + return CounterMappingRegion(Counter(), FileID, 0, LineStart, ColumnStart, + LineEnd, ColumnEnd, SkippedRegion); + } + + static CounterMappingRegion + makeGapRegion(Counter Count, unsigned FileID, unsigned LineStart, + unsigned ColumnStart, unsigned LineEnd, unsigned ColumnEnd) { + return CounterMappingRegion(Count, FileID, 0, LineStart, ColumnStart, + LineEnd, (1U << 31) | ColumnEnd, GapRegion); + } + + inline LineColPair startLoc() const { + return LineColPair(LineStart, ColumnStart); + } + + inline LineColPair endLoc() const { return LineColPair(LineEnd, ColumnEnd); } +}; + +/// Associates a source range with an execution count. +struct CountedRegion : public CounterMappingRegion { + uint64_t ExecutionCount; + + CountedRegion(const CounterMappingRegion &R, uint64_t ExecutionCount) + : CounterMappingRegion(R), ExecutionCount(ExecutionCount) {} +}; + +/// A Counter mapping context is used to connect the counters, expressions +/// and the obtained counter values. +class CounterMappingContext { + ArrayRef<CounterExpression> Expressions; + ArrayRef<uint64_t> CounterValues; + +public: + CounterMappingContext(ArrayRef<CounterExpression> Expressions, + ArrayRef<uint64_t> CounterValues = None) + : Expressions(Expressions), CounterValues(CounterValues) {} + + void setCounts(ArrayRef<uint64_t> Counts) { CounterValues = Counts; } + + void dump(const Counter &C, raw_ostream &OS) const; + void dump(const Counter &C) const { dump(C, dbgs()); } + + /// Return the number of times that a region of code associated with this + /// counter was executed. + Expected<int64_t> evaluate(const Counter &C) const; +}; + +/// Code coverage information for a single function. +struct FunctionRecord { + /// Raw function name. + std::string Name; + /// Associated files. + std::vector<std::string> Filenames; + /// Regions in the function along with their counts. + std::vector<CountedRegion> CountedRegions; + /// The number of times this function was executed. + uint64_t ExecutionCount; + + FunctionRecord(StringRef Name, ArrayRef<StringRef> Filenames) + : Name(Name), Filenames(Filenames.begin(), Filenames.end()) {} + + FunctionRecord(FunctionRecord &&FR) = default; + FunctionRecord &operator=(FunctionRecord &&) = default; + + void pushRegion(CounterMappingRegion Region, uint64_t Count) { + if (CountedRegions.empty()) + ExecutionCount = Count; + CountedRegions.emplace_back(Region, Count); + } +}; + +/// Iterator over Functions, optionally filtered to a single file. +class FunctionRecordIterator + : public iterator_facade_base<FunctionRecordIterator, + std::forward_iterator_tag, FunctionRecord> { + ArrayRef<FunctionRecord> Records; + ArrayRef<FunctionRecord>::iterator Current; + StringRef Filename; + + /// Skip records whose primary file is not \c Filename. + void skipOtherFiles(); + +public: + FunctionRecordIterator(ArrayRef<FunctionRecord> Records_, + StringRef Filename = "") + : Records(Records_), Current(Records.begin()), Filename(Filename) { + skipOtherFiles(); + } + + FunctionRecordIterator() : Current(Records.begin()) {} + + bool operator==(const FunctionRecordIterator &RHS) const { + return Current == RHS.Current && Filename == RHS.Filename; + } + + const FunctionRecord &operator*() const { return *Current; } + + FunctionRecordIterator &operator++() { + assert(Current != Records.end() && "incremented past end"); + ++Current; + skipOtherFiles(); + return *this; + } +}; + +/// Coverage information for a macro expansion or #included file. +/// +/// When covered code has pieces that can be expanded for more detail, such as a +/// preprocessor macro use and its definition, these are represented as +/// expansions whose coverage can be looked up independently. +struct ExpansionRecord { + /// The abstract file this expansion covers. + unsigned FileID; + /// The region that expands to this record. + const CountedRegion &Region; + /// Coverage for the expansion. + const FunctionRecord &Function; + + ExpansionRecord(const CountedRegion &Region, + const FunctionRecord &Function) + : FileID(Region.ExpandedFileID), Region(Region), Function(Function) {} +}; + +/// The execution count information starting at a point in a file. +/// +/// A sequence of CoverageSegments gives execution counts for a file in format +/// that's simple to iterate through for processing. +struct CoverageSegment { + /// The line where this segment begins. + unsigned Line; + /// The column where this segment begins. + unsigned Col; + /// The execution count, or zero if no count was recorded. + uint64_t Count; + /// When false, the segment was uninstrumented or skipped. + bool HasCount; + /// Whether this enters a new region or returns to a previous count. + bool IsRegionEntry; + /// Whether this enters a gap region. + bool IsGapRegion; + + CoverageSegment(unsigned Line, unsigned Col, bool IsRegionEntry) + : Line(Line), Col(Col), Count(0), HasCount(false), + IsRegionEntry(IsRegionEntry), IsGapRegion(false) {} + + CoverageSegment(unsigned Line, unsigned Col, uint64_t Count, + bool IsRegionEntry, bool IsGapRegion = false) + : Line(Line), Col(Col), Count(Count), HasCount(true), + IsRegionEntry(IsRegionEntry), IsGapRegion(IsGapRegion) {} + + friend bool operator==(const CoverageSegment &L, const CoverageSegment &R) { + return std::tie(L.Line, L.Col, L.Count, L.HasCount, L.IsRegionEntry, + L.IsGapRegion) == std::tie(R.Line, R.Col, R.Count, + R.HasCount, R.IsRegionEntry, + R.IsGapRegion); + } +}; + +/// An instantiation group contains a \c FunctionRecord list, such that each +/// record corresponds to a distinct instantiation of the same function. +/// +/// Note that it's possible for a function to have more than one instantiation +/// (consider C++ template specializations or static inline functions). +class InstantiationGroup { + friend class CoverageMapping; + + unsigned Line; + unsigned Col; + std::vector<const FunctionRecord *> Instantiations; + + InstantiationGroup(unsigned Line, unsigned Col, + std::vector<const FunctionRecord *> Instantiations) + : Line(Line), Col(Col), Instantiations(std::move(Instantiations)) {} + +public: + InstantiationGroup(const InstantiationGroup &) = delete; + InstantiationGroup(InstantiationGroup &&) = default; + + /// Get the number of instantiations in this group. + size_t size() const { return Instantiations.size(); } + + /// Get the line where the common function was defined. + unsigned getLine() const { return Line; } + + /// Get the column where the common function was defined. + unsigned getColumn() const { return Col; } + + /// Check if the instantiations in this group have a common mangled name. + bool hasName() const { + for (unsigned I = 1, E = Instantiations.size(); I < E; ++I) + if (Instantiations[I]->Name != Instantiations[0]->Name) + return false; + return true; + } + + /// Get the common mangled name for instantiations in this group. + StringRef getName() const { + assert(hasName() && "Instantiations don't have a shared name"); + return Instantiations[0]->Name; + } + + /// Get the total execution count of all instantiations in this group. + uint64_t getTotalExecutionCount() const { + uint64_t Count = 0; + for (const FunctionRecord *F : Instantiations) + Count += F->ExecutionCount; + return Count; + } + + /// Get the instantiations in this group. + ArrayRef<const FunctionRecord *> getInstantiations() const { + return Instantiations; + } +}; + +/// Coverage information to be processed or displayed. +/// +/// This represents the coverage of an entire file, expansion, or function. It +/// provides a sequence of CoverageSegments to iterate through, as well as the +/// list of expansions that can be further processed. +class CoverageData { + friend class CoverageMapping; + + std::string Filename; + std::vector<CoverageSegment> Segments; + std::vector<ExpansionRecord> Expansions; + +public: + CoverageData() = default; + + CoverageData(StringRef Filename) : Filename(Filename) {} + + /// Get the name of the file this data covers. + StringRef getFilename() const { return Filename; } + + /// Get an iterator over the coverage segments for this object. The segments + /// are guaranteed to be uniqued and sorted by location. + std::vector<CoverageSegment>::const_iterator begin() const { + return Segments.begin(); + } + + std::vector<CoverageSegment>::const_iterator end() const { + return Segments.end(); + } + + bool empty() const { return Segments.empty(); } + + /// Expansions that can be further processed. + ArrayRef<ExpansionRecord> getExpansions() const { return Expansions; } +}; + +/// The mapping of profile information to coverage data. +/// +/// This is the main interface to get coverage information, using a profile to +/// fill out execution counts. +class CoverageMapping { + DenseMap<size_t, DenseSet<size_t>> RecordProvenance; + std::vector<FunctionRecord> Functions; + std::vector<std::pair<std::string, uint64_t>> FuncHashMismatches; + + CoverageMapping() = default; + + /// Add a function record corresponding to \p Record. + Error loadFunctionRecord(const CoverageMappingRecord &Record, + IndexedInstrProfReader &ProfileReader); + +public: + CoverageMapping(const CoverageMapping &) = delete; + CoverageMapping &operator=(const CoverageMapping &) = delete; + + /// Load the coverage mapping using the given readers. + static Expected<std::unique_ptr<CoverageMapping>> + load(ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders, + IndexedInstrProfReader &ProfileReader); + + /// Load the coverage mapping from the given object files and profile. If + /// \p Arches is non-empty, it must specify an architecture for each object. + static Expected<std::unique_ptr<CoverageMapping>> + load(ArrayRef<StringRef> ObjectFilenames, StringRef ProfileFilename, + ArrayRef<StringRef> Arches = None); + + /// The number of functions that couldn't have their profiles mapped. + /// + /// This is a count of functions whose profile is out of date or otherwise + /// can't be associated with any coverage information. + unsigned getMismatchedCount() const { return FuncHashMismatches.size(); } + + /// A hash mismatch occurs when a profile record for a symbol does not have + /// the same hash as a coverage mapping record for the same symbol. This + /// returns a list of hash mismatches, where each mismatch is a pair of the + /// symbol name and its coverage mapping hash. + ArrayRef<std::pair<std::string, uint64_t>> getHashMismatches() const { + return FuncHashMismatches; + } + + /// Returns a lexicographically sorted, unique list of files that are + /// covered. + std::vector<StringRef> getUniqueSourceFiles() const; + + /// Get the coverage for a particular file. + /// + /// The given filename must be the name as recorded in the coverage + /// information. That is, only names returned from getUniqueSourceFiles will + /// yield a result. + CoverageData getCoverageForFile(StringRef Filename) const; + + /// Get the coverage for a particular function. + CoverageData getCoverageForFunction(const FunctionRecord &Function) const; + + /// Get the coverage for an expansion within a coverage set. + CoverageData getCoverageForExpansion(const ExpansionRecord &Expansion) const; + + /// Gets all of the functions covered by this profile. + iterator_range<FunctionRecordIterator> getCoveredFunctions() const { + return make_range(FunctionRecordIterator(Functions), + FunctionRecordIterator()); + } + + /// Gets all of the functions in a particular file. + iterator_range<FunctionRecordIterator> + getCoveredFunctions(StringRef Filename) const { + return make_range(FunctionRecordIterator(Functions, Filename), + FunctionRecordIterator()); + } + + /// Get the list of function instantiation groups in a particular file. + /// + /// Every instantiation group in a program is attributed to exactly one file: + /// the file in which the definition for the common function begins. + std::vector<InstantiationGroup> + getInstantiationGroups(StringRef Filename) const; +}; + +/// Coverage statistics for a single line. +class LineCoverageStats { + uint64_t ExecutionCount; + bool HasMultipleRegions; + bool Mapped; + unsigned Line; + ArrayRef<const CoverageSegment *> LineSegments; + const CoverageSegment *WrappedSegment; + + friend class LineCoverageIterator; + LineCoverageStats() = default; + +public: + LineCoverageStats(ArrayRef<const CoverageSegment *> LineSegments, + const CoverageSegment *WrappedSegment, unsigned Line); + + uint64_t getExecutionCount() const { return ExecutionCount; } + + bool hasMultipleRegions() const { return HasMultipleRegions; } + + bool isMapped() const { return Mapped; } + + unsigned getLine() const { return Line; } + + ArrayRef<const CoverageSegment *> getLineSegments() const { + return LineSegments; + } + + const CoverageSegment *getWrappedSegment() const { return WrappedSegment; } +}; + +/// An iterator over the \c LineCoverageStats objects for lines described by +/// a \c CoverageData instance. +class LineCoverageIterator + : public iterator_facade_base< + LineCoverageIterator, std::forward_iterator_tag, LineCoverageStats> { +public: + LineCoverageIterator(const CoverageData &CD) + : LineCoverageIterator(CD, CD.begin()->Line) {} + + LineCoverageIterator(const CoverageData &CD, unsigned Line) + : CD(CD), WrappedSegment(nullptr), Next(CD.begin()), Ended(false), + Line(Line), Segments(), Stats() { + this->operator++(); + } + + bool operator==(const LineCoverageIterator &R) const { + return &CD == &R.CD && Next == R.Next && Ended == R.Ended; + } + + const LineCoverageStats &operator*() const { return Stats; } + + LineCoverageStats &operator*() { return Stats; } + + LineCoverageIterator &operator++(); + + LineCoverageIterator getEnd() const { + auto EndIt = *this; + EndIt.Next = CD.end(); + EndIt.Ended = true; + return EndIt; + } + +private: + const CoverageData &CD; + const CoverageSegment *WrappedSegment; + std::vector<CoverageSegment>::const_iterator Next; + bool Ended; + unsigned Line; + SmallVector<const CoverageSegment *, 4> Segments; + LineCoverageStats Stats; +}; + +/// Get a \c LineCoverageIterator range for the lines described by \p CD. +static inline iterator_range<LineCoverageIterator> +getLineCoverageStats(const coverage::CoverageData &CD) { + auto Begin = LineCoverageIterator(CD); + auto End = Begin.getEnd(); + return make_range(Begin, End); +} + +// Profile coverage map has the following layout: +// [CoverageMapFileHeader] +// [ArrayStart] +// [CovMapFunctionRecord] +// [CovMapFunctionRecord] +// ... +// [ArrayEnd] +// [Encoded Region Mapping Data] +LLVM_PACKED_START +template <class IntPtrT> struct CovMapFunctionRecordV1 { +#define COVMAP_V1 +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +#undef COVMAP_V1 + + // Return the structural hash associated with the function. + template <support::endianness Endian> uint64_t getFuncHash() const { + return support::endian::byte_swap<uint64_t, Endian>(FuncHash); + } + + // Return the coverage map data size for the funciton. + template <support::endianness Endian> uint32_t getDataSize() const { + return support::endian::byte_swap<uint32_t, Endian>(DataSize); + } + + // Return function lookup key. The value is consider opaque. + template <support::endianness Endian> IntPtrT getFuncNameRef() const { + return support::endian::byte_swap<IntPtrT, Endian>(NamePtr); + } + + // Return the PGO name of the function */ + template <support::endianness Endian> + Error getFuncName(InstrProfSymtab &ProfileNames, StringRef &FuncName) const { + IntPtrT NameRef = getFuncNameRef<Endian>(); + uint32_t NameS = support::endian::byte_swap<uint32_t, Endian>(NameSize); + FuncName = ProfileNames.getFuncName(NameRef, NameS); + if (NameS && FuncName.empty()) + return make_error<CoverageMapError>(coveragemap_error::malformed); + return Error::success(); + } +}; + +struct CovMapFunctionRecord { +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" + + // Return the structural hash associated with the function. + template <support::endianness Endian> uint64_t getFuncHash() const { + return support::endian::byte_swap<uint64_t, Endian>(FuncHash); + } + + // Return the coverage map data size for the funciton. + template <support::endianness Endian> uint32_t getDataSize() const { + return support::endian::byte_swap<uint32_t, Endian>(DataSize); + } + + // Return function lookup key. The value is consider opaque. + template <support::endianness Endian> uint64_t getFuncNameRef() const { + return support::endian::byte_swap<uint64_t, Endian>(NameRef); + } + + // Return the PGO name of the function */ + template <support::endianness Endian> + Error getFuncName(InstrProfSymtab &ProfileNames, StringRef &FuncName) const { + uint64_t NameRef = getFuncNameRef<Endian>(); + FuncName = ProfileNames.getFuncName(NameRef); + return Error::success(); + } +}; + +// Per module coverage mapping data header, i.e. CoverageMapFileHeader +// documented above. +struct CovMapHeader { +#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" + template <support::endianness Endian> uint32_t getNRecords() const { + return support::endian::byte_swap<uint32_t, Endian>(NRecords); + } + + template <support::endianness Endian> uint32_t getFilenamesSize() const { + return support::endian::byte_swap<uint32_t, Endian>(FilenamesSize); + } + + template <support::endianness Endian> uint32_t getCoverageSize() const { + return support::endian::byte_swap<uint32_t, Endian>(CoverageSize); + } + + template <support::endianness Endian> uint32_t getVersion() const { + return support::endian::byte_swap<uint32_t, Endian>(Version); + } +}; + +LLVM_PACKED_END + +enum CovMapVersion { + Version1 = 0, + // Function's name reference from CovMapFuncRecord is changed from raw + // name string pointer to MD5 to support name section compression. Name + // section is also compressed. + Version2 = 1, + // A new interpretation of the columnEnd field is added in order to mark + // regions as gap areas. + Version3 = 2, + // The current version is Version3 + CurrentVersion = INSTR_PROF_COVMAP_VERSION +}; + +template <int CovMapVersion, class IntPtrT> struct CovMapTraits { + using CovMapFuncRecordType = CovMapFunctionRecord; + using NameRefType = uint64_t; +}; + +template <class IntPtrT> struct CovMapTraits<CovMapVersion::Version1, IntPtrT> { + using CovMapFuncRecordType = CovMapFunctionRecordV1<IntPtrT>; + using NameRefType = IntPtrT; +}; + +} // end namespace coverage + +/// Provide DenseMapInfo for CounterExpression +template<> struct DenseMapInfo<coverage::CounterExpression> { + static inline coverage::CounterExpression getEmptyKey() { + using namespace coverage; + + return CounterExpression(CounterExpression::ExprKind::Subtract, + Counter::getCounter(~0U), + Counter::getCounter(~0U)); + } + + static inline coverage::CounterExpression getTombstoneKey() { + using namespace coverage; + + return CounterExpression(CounterExpression::ExprKind::Add, + Counter::getCounter(~0U), + Counter::getCounter(~0U)); + } + + static unsigned getHashValue(const coverage::CounterExpression &V) { + return static_cast<unsigned>( + hash_combine(V.Kind, V.LHS.getKind(), V.LHS.getCounterID(), + V.RHS.getKind(), V.RHS.getCounterID())); + } + + static bool isEqual(const coverage::CounterExpression &LHS, + const coverage::CounterExpression &RHS) { + return LHS.Kind == RHS.Kind && LHS.LHS == RHS.LHS && LHS.RHS == RHS.RHS; + } +}; + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPING_H diff --git a/clang-r353983e/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/clang-r353983e/include/llvm/ProfileData/Coverage/CoverageMappingReader.h new file mode 100644 index 00000000..dbb1976d --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/Coverage/CoverageMappingReader.h @@ -0,0 +1,216 @@ +//===- CoverageMappingReader.h - Code coverage mapping reader ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading coverage mapping data for +// instrumentation based coverage. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGREADER_H +#define LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGREADER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ProfileData/Coverage/CoverageMapping.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <memory> +#include <vector> + +namespace llvm { +namespace coverage { + +class CoverageMappingReader; + +/// Coverage mapping information for a single function. +struct CoverageMappingRecord { + StringRef FunctionName; + uint64_t FunctionHash; + ArrayRef<StringRef> Filenames; + ArrayRef<CounterExpression> Expressions; + ArrayRef<CounterMappingRegion> MappingRegions; +}; + +/// A file format agnostic iterator over coverage mapping data. +class CoverageMappingIterator + : public std::iterator<std::input_iterator_tag, CoverageMappingRecord> { + CoverageMappingReader *Reader; + CoverageMappingRecord Record; + coveragemap_error ReadErr; + + void increment(); + +public: + CoverageMappingIterator() + : Reader(nullptr), Record(), ReadErr(coveragemap_error::success) {} + + CoverageMappingIterator(CoverageMappingReader *Reader) + : Reader(Reader), Record(), ReadErr(coveragemap_error::success) { + increment(); + } + + ~CoverageMappingIterator() { + if (ReadErr != coveragemap_error::success) + llvm_unreachable("Unexpected error in coverage mapping iterator"); + } + + CoverageMappingIterator &operator++() { + increment(); + return *this; + } + bool operator==(const CoverageMappingIterator &RHS) { + return Reader == RHS.Reader; + } + bool operator!=(const CoverageMappingIterator &RHS) { + return Reader != RHS.Reader; + } + Expected<CoverageMappingRecord &> operator*() { + if (ReadErr != coveragemap_error::success) { + auto E = make_error<CoverageMapError>(ReadErr); + ReadErr = coveragemap_error::success; + return std::move(E); + } + return Record; + } + Expected<CoverageMappingRecord *> operator->() { + if (ReadErr != coveragemap_error::success) { + auto E = make_error<CoverageMapError>(ReadErr); + ReadErr = coveragemap_error::success; + return std::move(E); + } + return &Record; + } +}; + +class CoverageMappingReader { +public: + virtual ~CoverageMappingReader() = default; + + virtual Error readNextRecord(CoverageMappingRecord &Record) = 0; + CoverageMappingIterator begin() { return CoverageMappingIterator(this); } + CoverageMappingIterator end() { return CoverageMappingIterator(); } +}; + +/// Base class for the raw coverage mapping and filenames data readers. +class RawCoverageReader { +protected: + StringRef Data; + + RawCoverageReader(StringRef Data) : Data(Data) {} + + Error readULEB128(uint64_t &Result); + Error readIntMax(uint64_t &Result, uint64_t MaxPlus1); + Error readSize(uint64_t &Result); + Error readString(StringRef &Result); +}; + +/// Reader for the raw coverage filenames. +class RawCoverageFilenamesReader : public RawCoverageReader { + std::vector<StringRef> &Filenames; + +public: + RawCoverageFilenamesReader(StringRef Data, std::vector<StringRef> &Filenames) + : RawCoverageReader(Data), Filenames(Filenames) {} + RawCoverageFilenamesReader(const RawCoverageFilenamesReader &) = delete; + RawCoverageFilenamesReader & + operator=(const RawCoverageFilenamesReader &) = delete; + + Error read(); +}; + +/// Checks if the given coverage mapping data is exported for +/// an unused function. +class RawCoverageMappingDummyChecker : public RawCoverageReader { +public: + RawCoverageMappingDummyChecker(StringRef MappingData) + : RawCoverageReader(MappingData) {} + + Expected<bool> isDummy(); +}; + +/// Reader for the raw coverage mapping data. +class RawCoverageMappingReader : public RawCoverageReader { + ArrayRef<StringRef> TranslationUnitFilenames; + std::vector<StringRef> &Filenames; + std::vector<CounterExpression> &Expressions; + std::vector<CounterMappingRegion> &MappingRegions; + +public: + RawCoverageMappingReader(StringRef MappingData, + ArrayRef<StringRef> TranslationUnitFilenames, + std::vector<StringRef> &Filenames, + std::vector<CounterExpression> &Expressions, + std::vector<CounterMappingRegion> &MappingRegions) + : RawCoverageReader(MappingData), + TranslationUnitFilenames(TranslationUnitFilenames), + Filenames(Filenames), Expressions(Expressions), + MappingRegions(MappingRegions) {} + RawCoverageMappingReader(const RawCoverageMappingReader &) = delete; + RawCoverageMappingReader & + operator=(const RawCoverageMappingReader &) = delete; + + Error read(); + +private: + Error decodeCounter(unsigned Value, Counter &C); + Error readCounter(Counter &C); + Error + readMappingRegionsSubArray(std::vector<CounterMappingRegion> &MappingRegions, + unsigned InferredFileID, size_t NumFileIDs); +}; + +/// Reader for the coverage mapping data that is emitted by the +/// frontend and stored in an object file. +class BinaryCoverageReader : public CoverageMappingReader { +public: + struct ProfileMappingRecord { + CovMapVersion Version; + StringRef FunctionName; + uint64_t FunctionHash; + StringRef CoverageMapping; + size_t FilenamesBegin; + size_t FilenamesSize; + + ProfileMappingRecord(CovMapVersion Version, StringRef FunctionName, + uint64_t FunctionHash, StringRef CoverageMapping, + size_t FilenamesBegin, size_t FilenamesSize) + : Version(Version), FunctionName(FunctionName), + FunctionHash(FunctionHash), CoverageMapping(CoverageMapping), + FilenamesBegin(FilenamesBegin), FilenamesSize(FilenamesSize) {} + }; + +private: + std::vector<StringRef> Filenames; + std::vector<ProfileMappingRecord> MappingRecords; + InstrProfSymtab ProfileNames; + size_t CurrentRecord = 0; + std::vector<StringRef> FunctionsFilenames; + std::vector<CounterExpression> Expressions; + std::vector<CounterMappingRegion> MappingRegions; + + BinaryCoverageReader() = default; + +public: + BinaryCoverageReader(const BinaryCoverageReader &) = delete; + BinaryCoverageReader &operator=(const BinaryCoverageReader &) = delete; + + static Expected<std::unique_ptr<BinaryCoverageReader>> + create(std::unique_ptr<MemoryBuffer> &ObjectBuffer, + StringRef Arch); + + Error readNextRecord(CoverageMappingRecord &Record) override; +}; + +} // end namespace coverage +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGREADER_H diff --git a/clang-r353983e/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h b/clang-r353983e/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h new file mode 100644 index 00000000..5f88cacd --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h @@ -0,0 +1,61 @@ +//===- CoverageMappingWriter.h - Code coverage mapping writer ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing coverage mapping data for +// instrumentation based coverage. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGWRITER_H +#define LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGWRITER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ProfileData/Coverage/CoverageMapping.h" + +namespace llvm { + +class raw_ostream; + +namespace coverage { + +/// Writer of the filenames section for the instrumentation +/// based code coverage. +class CoverageFilenamesSectionWriter { + ArrayRef<StringRef> Filenames; + +public: + CoverageFilenamesSectionWriter(ArrayRef<StringRef> Filenames) + : Filenames(Filenames) {} + + /// Write encoded filenames to the given output stream. + void write(raw_ostream &OS); +}; + +/// Writer for instrumentation based coverage mapping data. +class CoverageMappingWriter { + ArrayRef<unsigned> VirtualFileMapping; + ArrayRef<CounterExpression> Expressions; + MutableArrayRef<CounterMappingRegion> MappingRegions; + +public: + CoverageMappingWriter(ArrayRef<unsigned> VirtualFileMapping, + ArrayRef<CounterExpression> Expressions, + MutableArrayRef<CounterMappingRegion> MappingRegions) + : VirtualFileMapping(VirtualFileMapping), Expressions(Expressions), + MappingRegions(MappingRegions) {} + + /// Write encoded coverage mapping data to the given output stream. + void write(raw_ostream &OS); +}; + +} // end namespace coverage + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_COVERAGE_COVERAGEMAPPINGWRITER_H diff --git a/clang-r353983e/include/llvm/ProfileData/GCOV.h b/clang-r353983e/include/llvm/ProfileData/GCOV.h new file mode 100644 index 00000000..27b76b57 --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/GCOV.h @@ -0,0 +1,476 @@ +//===- GCOV.h - LLVM coverage tool ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header provides the interface to read and write coverage files that +// use 'gcov' format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_GCOV_H +#define LLVM_PROFILEDATA_GCOV_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <memory> +#include <string> +#include <utility> + +namespace llvm { + +class GCOVFunction; +class GCOVBlock; +class FileInfo; + +namespace GCOV { + +enum GCOVVersion { V402, V404, V704 }; + +/// A struct for passing gcov options between functions. +struct Options { + Options(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N) + : AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F), + PreservePaths(P), UncondBranch(U), LongFileNames(L), NoOutput(N) {} + + bool AllBlocks; + bool BranchInfo; + bool BranchCount; + bool FuncCoverage; + bool PreservePaths; + bool UncondBranch; + bool LongFileNames; + bool NoOutput; +}; + +} // end namespace GCOV + +/// GCOVBuffer - A wrapper around MemoryBuffer to provide GCOV specific +/// read operations. +class GCOVBuffer { +public: + GCOVBuffer(MemoryBuffer *B) : Buffer(B) {} + + /// readGCNOFormat - Check GCNO signature is valid at the beginning of buffer. + bool readGCNOFormat() { + StringRef File = Buffer->getBuffer().slice(0, 4); + if (File != "oncg") { + errs() << "Unexpected file type: " << File << ".\n"; + return false; + } + Cursor = 4; + return true; + } + + /// readGCDAFormat - Check GCDA signature is valid at the beginning of buffer. + bool readGCDAFormat() { + StringRef File = Buffer->getBuffer().slice(0, 4); + if (File != "adcg") { + errs() << "Unexpected file type: " << File << ".\n"; + return false; + } + Cursor = 4; + return true; + } + + /// readGCOVVersion - Read GCOV version. + bool readGCOVVersion(GCOV::GCOVVersion &Version) { + StringRef VersionStr = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (VersionStr == "*204") { + Cursor += 4; + Version = GCOV::V402; + return true; + } + if (VersionStr == "*404") { + Cursor += 4; + Version = GCOV::V404; + return true; + } + if (VersionStr == "*704") { + Cursor += 4; + Version = GCOV::V704; + return true; + } + errs() << "Unexpected version: " << VersionStr << ".\n"; + return false; + } + + /// readFunctionTag - If cursor points to a function tag then increment the + /// cursor and return true otherwise return false. + bool readFunctionTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\0' || + Tag[3] != '\1') { + return false; + } + Cursor += 4; + return true; + } + + /// readBlockTag - If cursor points to a block tag then increment the + /// cursor and return true otherwise return false. + bool readBlockTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\x41' || + Tag[3] != '\x01') { + return false; + } + Cursor += 4; + return true; + } + + /// readEdgeTag - If cursor points to an edge tag then increment the + /// cursor and return true otherwise return false. + bool readEdgeTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\x43' || + Tag[3] != '\x01') { + return false; + } + Cursor += 4; + return true; + } + + /// readLineTag - If cursor points to a line tag then increment the + /// cursor and return true otherwise return false. + bool readLineTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\x45' || + Tag[3] != '\x01') { + return false; + } + Cursor += 4; + return true; + } + + /// readArcTag - If cursor points to an gcda arc tag then increment the + /// cursor and return true otherwise return false. + bool readArcTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\xa1' || + Tag[3] != '\1') { + return false; + } + Cursor += 4; + return true; + } + + /// readObjectTag - If cursor points to an object summary tag then increment + /// the cursor and return true otherwise return false. + bool readObjectTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\0' || + Tag[3] != '\xa1') { + return false; + } + Cursor += 4; + return true; + } + + /// readProgramTag - If cursor points to a program summary tag then increment + /// the cursor and return true otherwise return false. + bool readProgramTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\0' || + Tag[3] != '\xa3') { + return false; + } + Cursor += 4; + return true; + } + + bool readInt(uint32_t &Val) { + if (Buffer->getBuffer().size() < Cursor + 4) { + errs() << "Unexpected end of memory buffer: " << Cursor + 4 << ".\n"; + return false; + } + StringRef Str = Buffer->getBuffer().slice(Cursor, Cursor + 4); + Cursor += 4; + Val = *(const uint32_t *)(Str.data()); + return true; + } + + bool readInt64(uint64_t &Val) { + uint32_t Lo, Hi; + if (!readInt(Lo) || !readInt(Hi)) + return false; + Val = ((uint64_t)Hi << 32) | Lo; + return true; + } + + bool readString(StringRef &Str) { + uint32_t Len = 0; + // Keep reading until we find a non-zero length. This emulates gcov's + // behaviour, which appears to do the same. + while (Len == 0) + if (!readInt(Len)) + return false; + Len *= 4; + if (Buffer->getBuffer().size() < Cursor + Len) { + errs() << "Unexpected end of memory buffer: " << Cursor + Len << ".\n"; + return false; + } + Str = Buffer->getBuffer().slice(Cursor, Cursor + Len).split('\0').first; + Cursor += Len; + return true; + } + + uint64_t getCursor() const { return Cursor; } + void advanceCursor(uint32_t n) { Cursor += n * 4; } + +private: + MemoryBuffer *Buffer; + uint64_t Cursor = 0; +}; + +/// GCOVFile - Collects coverage information for one pair of coverage file +/// (.gcno and .gcda). +class GCOVFile { +public: + GCOVFile() = default; + + bool readGCNO(GCOVBuffer &Buffer); + bool readGCDA(GCOVBuffer &Buffer); + uint32_t getChecksum() const { return Checksum; } + void print(raw_ostream &OS) const; + void dump() const; + void collectLineCounts(FileInfo &FI); + +private: + bool GCNOInitialized = false; + GCOV::GCOVVersion Version; + uint32_t Checksum = 0; + SmallVector<std::unique_ptr<GCOVFunction>, 16> Functions; + uint32_t RunCount = 0; + uint32_t ProgramCount = 0; +}; + +/// GCOVEdge - Collects edge information. +struct GCOVEdge { + GCOVEdge(GCOVBlock &S, GCOVBlock &D) : Src(S), Dst(D) {} + + GCOVBlock &Src; + GCOVBlock &Dst; + uint64_t Count = 0; + uint64_t CyclesCount = 0; +}; + +/// GCOVFunction - Collects function information. +class GCOVFunction { +public: + using BlockIterator = pointee_iterator< + SmallVectorImpl<std::unique_ptr<GCOVBlock>>::const_iterator>; + + GCOVFunction(GCOVFile &P) : Parent(P) {} + + bool readGCNO(GCOVBuffer &Buffer, GCOV::GCOVVersion Version); + bool readGCDA(GCOVBuffer &Buffer, GCOV::GCOVVersion Version); + StringRef getName() const { return Name; } + StringRef getFilename() const { return Filename; } + size_t getNumBlocks() const { return Blocks.size(); } + uint64_t getEntryCount() const; + uint64_t getExitCount() const; + + BlockIterator block_begin() const { return Blocks.begin(); } + BlockIterator block_end() const { return Blocks.end(); } + iterator_range<BlockIterator> blocks() const { + return make_range(block_begin(), block_end()); + } + + void print(raw_ostream &OS) const; + void dump() const; + void collectLineCounts(FileInfo &FI); + +private: + GCOVFile &Parent; + uint32_t Ident = 0; + uint32_t Checksum; + uint32_t LineNumber = 0; + StringRef Name; + StringRef Filename; + SmallVector<std::unique_ptr<GCOVBlock>, 16> Blocks; + SmallVector<std::unique_ptr<GCOVEdge>, 16> Edges; +}; + +/// GCOVBlock - Collects block information. +class GCOVBlock { + struct EdgeWeight { + EdgeWeight(GCOVBlock *D) : Dst(D) {} + + GCOVBlock *Dst; + uint64_t Count = 0; + }; + + struct SortDstEdgesFunctor { + bool operator()(const GCOVEdge *E1, const GCOVEdge *E2) { + return E1->Dst.Number < E2->Dst.Number; + } + }; + +public: + using EdgeIterator = SmallVectorImpl<GCOVEdge *>::const_iterator; + using BlockVector = SmallVector<const GCOVBlock *, 4>; + using BlockVectorLists = SmallVector<BlockVector, 4>; + using Edges = SmallVector<GCOVEdge *, 4>; + + GCOVBlock(GCOVFunction &P, uint32_t N) : Parent(P), Number(N) {} + ~GCOVBlock(); + + const GCOVFunction &getParent() const { return Parent; } + void addLine(uint32_t N) { Lines.push_back(N); } + uint32_t getLastLine() const { return Lines.back(); } + void addCount(size_t DstEdgeNo, uint64_t N); + uint64_t getCount() const { return Counter; } + + void addSrcEdge(GCOVEdge *Edge) { + assert(&Edge->Dst == this); // up to caller to ensure edge is valid + SrcEdges.push_back(Edge); + } + + void addDstEdge(GCOVEdge *Edge) { + assert(&Edge->Src == this); // up to caller to ensure edge is valid + // Check if adding this edge causes list to become unsorted. + if (DstEdges.size() && DstEdges.back()->Dst.Number > Edge->Dst.Number) + DstEdgesAreSorted = false; + DstEdges.push_back(Edge); + } + + size_t getNumSrcEdges() const { return SrcEdges.size(); } + size_t getNumDstEdges() const { return DstEdges.size(); } + void sortDstEdges(); + + EdgeIterator src_begin() const { return SrcEdges.begin(); } + EdgeIterator src_end() const { return SrcEdges.end(); } + iterator_range<EdgeIterator> srcs() const { + return make_range(src_begin(), src_end()); + } + + EdgeIterator dst_begin() const { return DstEdges.begin(); } + EdgeIterator dst_end() const { return DstEdges.end(); } + iterator_range<EdgeIterator> dsts() const { + return make_range(dst_begin(), dst_end()); + } + + void print(raw_ostream &OS) const; + void dump() const; + void collectLineCounts(FileInfo &FI); + + static uint64_t getCycleCount(const Edges &Path); + static void unblock(const GCOVBlock *U, BlockVector &Blocked, + BlockVectorLists &BlockLists); + static bool lookForCircuit(const GCOVBlock *V, const GCOVBlock *Start, + Edges &Path, BlockVector &Blocked, + BlockVectorLists &BlockLists, + const BlockVector &Blocks, uint64_t &Count); + static void getCyclesCount(const BlockVector &Blocks, uint64_t &Count); + static uint64_t getLineCount(const BlockVector &Blocks); + +private: + GCOVFunction &Parent; + uint32_t Number; + uint64_t Counter = 0; + bool DstEdgesAreSorted = true; + SmallVector<GCOVEdge *, 16> SrcEdges; + SmallVector<GCOVEdge *, 16> DstEdges; + SmallVector<uint32_t, 16> Lines; +}; + +class FileInfo { +protected: + // It is unlikely--but possible--for multiple functions to be on the same + // line. + // Therefore this typedef allows LineData.Functions to store multiple + // functions + // per instance. This is rare, however, so optimize for the common case. + using FunctionVector = SmallVector<const GCOVFunction *, 1>; + using FunctionLines = DenseMap<uint32_t, FunctionVector>; + using BlockVector = SmallVector<const GCOVBlock *, 4>; + using BlockLines = DenseMap<uint32_t, BlockVector>; + + struct LineData { + LineData() = default; + + BlockLines Blocks; + FunctionLines Functions; + uint32_t LastLine = 0; + }; + + struct GCOVCoverage { + GCOVCoverage(StringRef Name) : Name(Name) {} + + StringRef Name; + + uint32_t LogicalLines = 0; + uint32_t LinesExec = 0; + + uint32_t Branches = 0; + uint32_t BranchesExec = 0; + uint32_t BranchesTaken = 0; + }; + +public: + FileInfo(const GCOV::Options &Options) : Options(Options) {} + + void addBlockLine(StringRef Filename, uint32_t Line, const GCOVBlock *Block) { + if (Line > LineInfo[Filename].LastLine) + LineInfo[Filename].LastLine = Line; + LineInfo[Filename].Blocks[Line - 1].push_back(Block); + } + + void addFunctionLine(StringRef Filename, uint32_t Line, + const GCOVFunction *Function) { + if (Line > LineInfo[Filename].LastLine) + LineInfo[Filename].LastLine = Line; + LineInfo[Filename].Functions[Line - 1].push_back(Function); + } + + void setRunCount(uint32_t Runs) { RunCount = Runs; } + void setProgramCount(uint32_t Programs) { ProgramCount = Programs; } + void print(raw_ostream &OS, StringRef MainFilename, StringRef GCNOFile, + StringRef GCDAFile); + +protected: + std::string getCoveragePath(StringRef Filename, StringRef MainFilename); + std::unique_ptr<raw_ostream> openCoveragePath(StringRef CoveragePath); + void printFunctionSummary(raw_ostream &OS, const FunctionVector &Funcs) const; + void printBlockInfo(raw_ostream &OS, const GCOVBlock &Block, + uint32_t LineIndex, uint32_t &BlockNo) const; + void printBranchInfo(raw_ostream &OS, const GCOVBlock &Block, + GCOVCoverage &Coverage, uint32_t &EdgeNo); + void printUncondBranchInfo(raw_ostream &OS, uint32_t &EdgeNo, + uint64_t Count) const; + + void printCoverage(raw_ostream &OS, const GCOVCoverage &Coverage) const; + void printFuncCoverage(raw_ostream &OS) const; + void printFileCoverage(raw_ostream &OS) const; + + const GCOV::Options &Options; + StringMap<LineData> LineInfo; + uint32_t RunCount = 0; + uint32_t ProgramCount = 0; + + using FileCoverageList = SmallVector<std::pair<std::string, GCOVCoverage>, 4>; + using FuncCoverageMap = MapVector<const GCOVFunction *, GCOVCoverage>; + + FileCoverageList FileCoverages; + FuncCoverageMap FuncCoverages; +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_GCOV_H diff --git a/clang-r353983e/include/llvm/ProfileData/InstrProf.h b/clang-r353983e/include/llvm/ProfileData/InstrProf.h new file mode 100644 index 00000000..9ea1b9bd --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/InstrProf.h @@ -0,0 +1,1048 @@ +//===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Instrumentation-based profiling data is generated by instrumented +// binaries through library functions in compiler-rt, and read by the clang +// frontend to feed PGO. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_INSTRPROF_H +#define LLVM_PROFILEDATA_INSTRPROF_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/InstrProfData.inc" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <list> +#include <memory> +#include <string> +#include <system_error> +#include <utility> +#include <vector> + +namespace llvm { + +class Function; +class GlobalVariable; +struct InstrProfRecord; +class InstrProfSymtab; +class Instruction; +class MDNode; +class Module; + +enum InstrProfSectKind { +#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +/// Return the name of the profile section corresponding to \p IPSK. +/// +/// The name of the section depends on the object format type \p OF. If +/// \p AddSegmentInfo is true, a segment prefix and additional linker hints may +/// be added to the section name (this is the default). +std::string getInstrProfSectionName(InstrProfSectKind IPSK, + Triple::ObjectFormatType OF, + bool AddSegmentInfo = true); + +/// Return the name profile runtime entry point to do value profiling +/// for a given site. +inline StringRef getInstrProfValueProfFuncName() { + return INSTR_PROF_VALUE_PROF_FUNC_STR; +} + +/// Return the name profile runtime entry point to do value range profiling. +inline StringRef getInstrProfValueRangeProfFuncName() { + return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR; +} + +/// Return the name prefix of variables containing instrumented function names. +inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } + +/// Return the name prefix of variables containing per-function control data. +inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } + +/// Return the name prefix of profile counter variables. +inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } + +/// Return the name prefix of value profile variables. +inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; } + +/// Return the name of value profile node array variables: +inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } + +/// Return the name prefix of the COMDAT group for instrumentation variables +/// associated with a COMDAT function. +inline StringRef getInstrProfComdatPrefix() { return "__profv_"; } + +/// Return the name of the variable holding the strings (possibly compressed) +/// of all function's PGO names. +inline StringRef getInstrProfNamesVarName() { + return "__llvm_prf_nm"; +} + +/// Return the name of a covarage mapping variable (internal linkage) +/// for each instrumented source module. Such variables are allocated +/// in the __llvm_covmap section. +inline StringRef getCoverageMappingVarName() { + return "__llvm_coverage_mapping"; +} + +/// Return the name of the internal variable recording the array +/// of PGO name vars referenced by the coverage mapping. The owning +/// functions of those names are not emitted by FE (e.g, unused inline +/// functions.) +inline StringRef getCoverageUnusedNamesVarName() { + return "__llvm_coverage_names"; +} + +/// Return the name of function that registers all the per-function control +/// data at program startup time by calling __llvm_register_function. This +/// function has internal linkage and is called by __llvm_profile_init +/// runtime method. This function is not generated for these platforms: +/// Darwin, Linux, and FreeBSD. +inline StringRef getInstrProfRegFuncsName() { + return "__llvm_profile_register_functions"; +} + +/// Return the name of the runtime interface that registers per-function control +/// data for one instrumented function. +inline StringRef getInstrProfRegFuncName() { + return "__llvm_profile_register_function"; +} + +/// Return the name of the runtime interface that registers the PGO name strings. +inline StringRef getInstrProfNamesRegFuncName() { + return "__llvm_profile_register_names_function"; +} + +/// Return the name of the runtime initialization method that is generated by +/// the compiler. The function calls __llvm_profile_register_functions and +/// __llvm_profile_override_default_filename functions if needed. This function +/// has internal linkage and invoked at startup time via init_array. +inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; } + +/// Return the name of the hook variable defined in profile runtime library. +/// A reference to the variable causes the linker to link in the runtime +/// initialization module (which defines the hook variable). +inline StringRef getInstrProfRuntimeHookVarName() { + return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR); +} + +/// Return the name of the compiler generated function that references the +/// runtime hook variable. The function is a weak global. +inline StringRef getInstrProfRuntimeHookVarUseFuncName() { + return "__llvm_profile_runtime_user"; +} + +/// Return the marker used to separate PGO names during serialization. +inline StringRef getInstrProfNameSeparator() { return "\01"; } + +/// Return the modified name for function \c F suitable to be +/// used the key for profile lookup. Variable \c InLTO indicates if this +/// is called in LTO optimization passes. +std::string getPGOFuncName(const Function &F, bool InLTO = false, + uint64_t Version = INSTR_PROF_INDEX_VERSION); + +/// Return the modified name for a function suitable to be +/// used the key for profile lookup. The function's original +/// name is \c RawFuncName and has linkage of type \c Linkage. +/// The function is defined in module \c FileName. +std::string getPGOFuncName(StringRef RawFuncName, + GlobalValue::LinkageTypes Linkage, + StringRef FileName, + uint64_t Version = INSTR_PROF_INDEX_VERSION); + +/// Return the name of the global variable used to store a function +/// name in PGO instrumentation. \c FuncName is the name of the function +/// returned by the \c getPGOFuncName call. +std::string getPGOFuncNameVarName(StringRef FuncName, + GlobalValue::LinkageTypes Linkage); + +/// Create and return the global variable for function name used in PGO +/// instrumentation. \c FuncName is the name of the function returned +/// by \c getPGOFuncName call. +GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName); + +/// Create and return the global variable for function name used in PGO +/// instrumentation. /// \c FuncName is the name of the function +/// returned by \c getPGOFuncName call, \c M is the owning module, +/// and \c Linkage is the linkage of the instrumented function. +GlobalVariable *createPGOFuncNameVar(Module &M, + GlobalValue::LinkageTypes Linkage, + StringRef PGOFuncName); + +/// Return the initializer in string of the PGO name var \c NameVar. +StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar); + +/// Given a PGO function name, remove the filename prefix and return +/// the original (static) function name. +StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, + StringRef FileName = "<unknown>"); + +/// Given a vector of strings (function PGO names) \c NameStrs, the +/// method generates a combined string \c Result thatis ready to be +/// serialized. The \c Result string is comprised of three fields: +/// The first field is the legnth of the uncompressed strings, and the +/// the second field is the length of the zlib-compressed string. +/// Both fields are encoded in ULEB128. If \c doCompress is false, the +/// third field is the uncompressed strings; otherwise it is the +/// compressed string. When the string compression is off, the +/// second field will have value zero. +Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs, + bool doCompression, std::string &Result); + +/// Produce \c Result string with the same format described above. The input +/// is vector of PGO function name variables that are referenced. +Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars, + std::string &Result, bool doCompression = true); + +/// \c NameStrings is a string composed of one of more sub-strings encoded in +/// the format described above. The substrings are separated by 0 or more zero +/// bytes. This method decodes the string and populates the \c Symtab. +Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab); + +/// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being +/// set in IR PGO compilation. +bool isIRPGOFlagSet(const Module *M); + +/// Check if we can safely rename this Comdat function. Instances of the same +/// comdat function may have different control flows thus can not share the +/// same counter variable. +bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false); + +enum InstrProfValueKind : uint32_t { +#define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +/// Get the value profile data for value site \p SiteIdx from \p InstrProfR +/// and annotate the instruction \p Inst with the value profile meta data. +/// Annotate up to \p MaxMDCount (default 3) number of records per value site. +void annotateValueSite(Module &M, Instruction &Inst, + const InstrProfRecord &InstrProfR, + InstrProfValueKind ValueKind, uint32_t SiteIndx, + uint32_t MaxMDCount = 3); + +/// Same as the above interface but using an ArrayRef, as well as \p Sum. +void annotateValueSite(Module &M, Instruction &Inst, + ArrayRef<InstrProfValueData> VDs, uint64_t Sum, + InstrProfValueKind ValueKind, uint32_t MaxMDCount); + +/// Extract the value profile data from \p Inst which is annotated with +/// value profile meta data. Return false if there is no value data annotated, +/// otherwise return true. +bool getValueProfDataFromInst(const Instruction &Inst, + InstrProfValueKind ValueKind, + uint32_t MaxNumValueData, + InstrProfValueData ValueData[], + uint32_t &ActualNumValueData, uint64_t &TotalC); + +inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } + +/// Return the PGOFuncName meta data associated with a function. +MDNode *getPGOFuncNameMetadata(const Function &F); + +/// Create the PGOFuncName meta data if PGOFuncName is different from +/// function's raw name. This should only apply to internal linkage functions +/// declared by users only. +void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); + +/// Check if we can use Comdat for profile variables. This will eliminate +/// the duplicated profile variables for Comdat functions. +bool needsComdatForCounter(const Function &F, const Module &M); + +const std::error_category &instrprof_category(); + +enum class instrprof_error { + success = 0, + eof, + unrecognized_format, + bad_magic, + bad_header, + unsupported_version, + unsupported_hash_type, + too_large, + truncated, + malformed, + unknown_function, + hash_mismatch, + count_mismatch, + counter_overflow, + value_site_count_mismatch, + compress_failed, + uncompress_failed, + empty_raw_profile, + zlib_unavailable +}; + +inline std::error_code make_error_code(instrprof_error E) { + return std::error_code(static_cast<int>(E), instrprof_category()); +} + +class InstrProfError : public ErrorInfo<InstrProfError> { +public: + InstrProfError(instrprof_error Err) : Err(Err) { + assert(Err != instrprof_error::success && "Not an error"); + } + + std::string message() const override; + + void log(raw_ostream &OS) const override { OS << message(); } + + std::error_code convertToErrorCode() const override { + return make_error_code(Err); + } + + instrprof_error get() const { return Err; } + + /// Consume an Error and return the raw enum value contained within it. The + /// Error must either be a success value, or contain a single InstrProfError. + static instrprof_error take(Error E) { + auto Err = instrprof_error::success; + handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) { + assert(Err == instrprof_error::success && "Multiple errors encountered"); + Err = IPE.get(); + }); + return Err; + } + + static char ID; + +private: + instrprof_error Err; +}; + +class SoftInstrProfErrors { + /// Count the number of soft instrprof_errors encountered and keep track of + /// the first such error for reporting purposes. + + /// The first soft error encountered. + instrprof_error FirstError = instrprof_error::success; + + /// The number of hash mismatches. + unsigned NumHashMismatches = 0; + + /// The number of count mismatches. + unsigned NumCountMismatches = 0; + + /// The number of counter overflows. + unsigned NumCounterOverflows = 0; + + /// The number of value site count mismatches. + unsigned NumValueSiteCountMismatches = 0; + +public: + SoftInstrProfErrors() = default; + + ~SoftInstrProfErrors() { + assert(FirstError == instrprof_error::success && + "Unchecked soft error encountered"); + } + + /// Track a soft error (\p IE) and increment its associated counter. + void addError(instrprof_error IE); + + /// Get the number of hash mismatches. + unsigned getNumHashMismatches() const { return NumHashMismatches; } + + /// Get the number of count mismatches. + unsigned getNumCountMismatches() const { return NumCountMismatches; } + + /// Get the number of counter overflows. + unsigned getNumCounterOverflows() const { return NumCounterOverflows; } + + /// Get the number of value site count mismatches. + unsigned getNumValueSiteCountMismatches() const { + return NumValueSiteCountMismatches; + } + + /// Return the first encountered error and reset FirstError to a success + /// value. + Error takeError() { + if (FirstError == instrprof_error::success) + return Error::success(); + auto E = make_error<InstrProfError>(FirstError); + FirstError = instrprof_error::success; + return E; + } +}; + +namespace object { + +class SectionRef; + +} // end namespace object + +namespace IndexedInstrProf { + +uint64_t ComputeHash(StringRef K); + +} // end namespace IndexedInstrProf + +/// A symbol table used for function PGO name look-up with keys +/// (such as pointers, md5hash values) to the function. A function's +/// PGO name or name's md5hash are used in retrieving the profile +/// data of the function. See \c getPGOFuncName() method for details +/// on how PGO name is formed. +class InstrProfSymtab { +public: + using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>; + +private: + StringRef Data; + uint64_t Address = 0; + // Unique name strings. + StringSet<> NameTab; + // A map from MD5 keys to function name strings. + std::vector<std::pair<uint64_t, StringRef>> MD5NameMap; + // A map from MD5 keys to function define. We only populate this map + // when build the Symtab from a Module. + std::vector<std::pair<uint64_t, Function *>> MD5FuncMap; + // A map from function runtime address to function name MD5 hash. + // This map is only populated and used by raw instr profile reader. + AddrHashMap AddrToMD5Map; + bool Sorted = false; + + static StringRef getExternalSymbol() { + return "** External Symbol **"; + } + + // If the symtab is created by a series of calls to \c addFuncName, \c + // finalizeSymtab needs to be called before looking up function names. + // This is required because the underlying map is a vector (for space + // efficiency) which needs to be sorted. + inline void finalizeSymtab(); + +public: + InstrProfSymtab() = default; + + /// Create InstrProfSymtab from an object file section which + /// contains function PGO names. When section may contain raw + /// string data or string data in compressed form. This method + /// only initialize the symtab with reference to the data and + /// the section base address. The decompression will be delayed + /// until before it is used. See also \c create(StringRef) method. + Error create(object::SectionRef &Section); + + /// This interface is used by reader of CoverageMapping test + /// format. + inline Error create(StringRef D, uint64_t BaseAddr); + + /// \c NameStrings is a string composed of one of more sub-strings + /// encoded in the format described in \c collectPGOFuncNameStrings. + /// This method is a wrapper to \c readPGOFuncNameStrings method. + inline Error create(StringRef NameStrings); + + /// A wrapper interface to populate the PGO symtab with functions + /// decls from module \c M. This interface is used by transformation + /// passes such as indirect function call promotion. Variable \c InLTO + /// indicates if this is called from LTO optimization passes. + Error create(Module &M, bool InLTO = false); + + /// Create InstrProfSymtab from a set of names iteratable from + /// \p IterRange. This interface is used by IndexedProfReader. + template <typename NameIterRange> Error create(const NameIterRange &IterRange); + + /// Update the symtab by adding \p FuncName to the table. This interface + /// is used by the raw and text profile readers. + Error addFuncName(StringRef FuncName) { + if (FuncName.empty()) + return make_error<InstrProfError>(instrprof_error::malformed); + auto Ins = NameTab.insert(FuncName); + if (Ins.second) { + MD5NameMap.push_back(std::make_pair( + IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); + Sorted = false; + } + return Error::success(); + } + + /// Map a function address to its name's MD5 hash. This interface + /// is only used by the raw profiler reader. + void mapAddress(uint64_t Addr, uint64_t MD5Val) { + AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); + } + + /// Return a function's hash, or 0, if the function isn't in this SymTab. + uint64_t getFunctionHashFromAddress(uint64_t Address); + + /// Return function's PGO name from the function name's symbol + /// address in the object file. If an error occurs, return + /// an empty string. + StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize); + + /// Return function's PGO name from the name's md5 hash value. + /// If not found, return an empty string. + inline StringRef getFuncName(uint64_t FuncMD5Hash); + + /// Just like getFuncName, except that it will return a non-empty StringRef + /// if the function is external to this symbol table. All such cases + /// will be represented using the same StringRef value. + inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash); + + /// True if Symbol is the value used to represent external symbols. + static bool isExternalSymbol(const StringRef &Symbol) { + return Symbol == InstrProfSymtab::getExternalSymbol(); + } + + /// Return function from the name's md5 hash. Return nullptr if not found. + inline Function *getFunction(uint64_t FuncMD5Hash); + + /// Return the function's original assembly name by stripping off + /// the prefix attached (to symbols with priviate linkage). For + /// global functions, it returns the same string as getFuncName. + inline StringRef getOrigFuncName(uint64_t FuncMD5Hash); + + /// Return the name section data. + inline StringRef getNameData() const { return Data; } +}; + +Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) { + Data = D; + Address = BaseAddr; + return Error::success(); +} + +Error InstrProfSymtab::create(StringRef NameStrings) { + return readPGOFuncNameStrings(NameStrings, *this); +} + +template <typename NameIterRange> +Error InstrProfSymtab::create(const NameIterRange &IterRange) { + for (auto Name : IterRange) + if (Error E = addFuncName(Name)) + return E; + + finalizeSymtab(); + return Error::success(); +} + +void InstrProfSymtab::finalizeSymtab() { + if (Sorted) + return; + llvm::sort(MD5NameMap, less_first()); + llvm::sort(MD5FuncMap, less_first()); + llvm::sort(AddrToMD5Map, less_first()); + AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), + AddrToMD5Map.end()); + Sorted = true; +} + +StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) { + StringRef ret = getFuncName(FuncMD5Hash); + if (ret.empty()) + return InstrProfSymtab::getExternalSymbol(); + return ret; +} + +StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { + finalizeSymtab(); + auto Result = + std::lower_bound(MD5NameMap.begin(), MD5NameMap.end(), FuncMD5Hash, + [](const std::pair<uint64_t, std::string> &LHS, + uint64_t RHS) { return LHS.first < RHS; }); + if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash) + return Result->second; + return StringRef(); +} + +Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) { + finalizeSymtab(); + auto Result = + std::lower_bound(MD5FuncMap.begin(), MD5FuncMap.end(), FuncMD5Hash, + [](const std::pair<uint64_t, Function*> &LHS, + uint64_t RHS) { return LHS.first < RHS; }); + if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash) + return Result->second; + return nullptr; +} + +// See also getPGOFuncName implementation. These two need to be +// matched. +StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) { + StringRef PGOName = getFuncName(FuncMD5Hash); + size_t S = PGOName.find_first_of(':'); + if (S == StringRef::npos) + return PGOName; + return PGOName.drop_front(S + 1); +} + +struct InstrProfValueSiteRecord { + /// Value profiling data pairs at a given value site. + std::list<InstrProfValueData> ValueData; + + InstrProfValueSiteRecord() { ValueData.clear(); } + template <class InputIterator> + InstrProfValueSiteRecord(InputIterator F, InputIterator L) + : ValueData(F, L) {} + + /// Sort ValueData ascending by Value + void sortByTargetValues() { + ValueData.sort( + [](const InstrProfValueData &left, const InstrProfValueData &right) { + return left.Value < right.Value; + }); + } + /// Sort ValueData Descending by Count + inline void sortByCount(); + + /// Merge data from another InstrProfValueSiteRecord + /// Optionally scale merged counts by \p Weight. + void merge(InstrProfValueSiteRecord &Input, uint64_t Weight, + function_ref<void(instrprof_error)> Warn); + /// Scale up value profile data counts. + void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn); +}; + +/// Profiling information for a single function. +struct InstrProfRecord { + std::vector<uint64_t> Counts; + + InstrProfRecord() = default; + InstrProfRecord(std::vector<uint64_t> Counts) : Counts(std::move(Counts)) {} + InstrProfRecord(InstrProfRecord &&) = default; + InstrProfRecord(const InstrProfRecord &RHS) + : Counts(RHS.Counts), + ValueData(RHS.ValueData + ? llvm::make_unique<ValueProfData>(*RHS.ValueData) + : nullptr) {} + InstrProfRecord &operator=(InstrProfRecord &&) = default; + InstrProfRecord &operator=(const InstrProfRecord &RHS) { + Counts = RHS.Counts; + if (!RHS.ValueData) { + ValueData = nullptr; + return *this; + } + if (!ValueData) + ValueData = llvm::make_unique<ValueProfData>(*RHS.ValueData); + else + *ValueData = *RHS.ValueData; + return *this; + } + + /// Return the number of value profile kinds with non-zero number + /// of profile sites. + inline uint32_t getNumValueKinds() const; + /// Return the number of instrumented sites for ValueKind. + inline uint32_t getNumValueSites(uint32_t ValueKind) const; + + /// Return the total number of ValueData for ValueKind. + inline uint32_t getNumValueData(uint32_t ValueKind) const; + + /// Return the number of value data collected for ValueKind at profiling + /// site: Site. + inline uint32_t getNumValueDataForSite(uint32_t ValueKind, + uint32_t Site) const; + + /// Return the array of profiled values at \p Site. If \p TotalC + /// is not null, the total count of all target values at this site + /// will be stored in \c *TotalC. + inline std::unique_ptr<InstrProfValueData[]> + getValueForSite(uint32_t ValueKind, uint32_t Site, + uint64_t *TotalC = nullptr) const; + + /// Get the target value/counts of kind \p ValueKind collected at site + /// \p Site and store the result in array \p Dest. Return the total + /// counts of all target values at this site. + inline uint64_t getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind, + uint32_t Site) const; + + /// Reserve space for NumValueSites sites. + inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites); + + /// Add ValueData for ValueKind at value Site. + void addValueData(uint32_t ValueKind, uint32_t Site, + InstrProfValueData *VData, uint32_t N, + InstrProfSymtab *SymTab); + + /// Merge the counts in \p Other into this one. + /// Optionally scale merged counts by \p Weight. + void merge(InstrProfRecord &Other, uint64_t Weight, + function_ref<void(instrprof_error)> Warn); + + /// Scale up profile counts (including value profile data) by + /// \p Weight. + void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn); + + /// Sort value profile data (per site) by count. + void sortValueData() { + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + for (auto &SR : getValueSitesForKind(Kind)) + SR.sortByCount(); + } + + /// Clear value data entries and edge counters. + void Clear() { + Counts.clear(); + clearValueData(); + } + + /// Clear value data entries + void clearValueData() { ValueData = nullptr; } + +private: + struct ValueProfData { + std::vector<InstrProfValueSiteRecord> IndirectCallSites; + std::vector<InstrProfValueSiteRecord> MemOPSizes; + }; + std::unique_ptr<ValueProfData> ValueData; + + MutableArrayRef<InstrProfValueSiteRecord> + getValueSitesForKind(uint32_t ValueKind) { + // Cast to /add/ const (should be an implicit_cast, ideally, if that's ever + // implemented in LLVM) to call the const overload of this function, then + // cast away the constness from the result. + auto AR = const_cast<const InstrProfRecord *>(this)->getValueSitesForKind( + ValueKind); + return makeMutableArrayRef( + const_cast<InstrProfValueSiteRecord *>(AR.data()), AR.size()); + } + ArrayRef<InstrProfValueSiteRecord> + getValueSitesForKind(uint32_t ValueKind) const { + if (!ValueData) + return None; + switch (ValueKind) { + case IPVK_IndirectCallTarget: + return ValueData->IndirectCallSites; + case IPVK_MemOPSize: + return ValueData->MemOPSizes; + default: + llvm_unreachable("Unknown value kind!"); + } + } + + std::vector<InstrProfValueSiteRecord> & + getOrCreateValueSitesForKind(uint32_t ValueKind) { + if (!ValueData) + ValueData = llvm::make_unique<ValueProfData>(); + switch (ValueKind) { + case IPVK_IndirectCallTarget: + return ValueData->IndirectCallSites; + case IPVK_MemOPSize: + return ValueData->MemOPSizes; + default: + llvm_unreachable("Unknown value kind!"); + } + } + + // Map indirect call target name hash to name string. + uint64_t remapValue(uint64_t Value, uint32_t ValueKind, + InstrProfSymtab *SymTab); + + // Merge Value Profile data from Src record to this record for ValueKind. + // Scale merged value counts by \p Weight. + void mergeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src, + uint64_t Weight, + function_ref<void(instrprof_error)> Warn); + + // Scale up value profile data count. + void scaleValueProfData(uint32_t ValueKind, uint64_t Weight, + function_ref<void(instrprof_error)> Warn); +}; + +struct NamedInstrProfRecord : InstrProfRecord { + StringRef Name; + uint64_t Hash; + + NamedInstrProfRecord() = default; + NamedInstrProfRecord(StringRef Name, uint64_t Hash, + std::vector<uint64_t> Counts) + : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} +}; + +uint32_t InstrProfRecord::getNumValueKinds() const { + uint32_t NumValueKinds = 0; + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + NumValueKinds += !(getValueSitesForKind(Kind).empty()); + return NumValueKinds; +} + +uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const { + uint32_t N = 0; + for (auto &SR : getValueSitesForKind(ValueKind)) + N += SR.ValueData.size(); + return N; +} + +uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const { + return getValueSitesForKind(ValueKind).size(); +} + +uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind, + uint32_t Site) const { + return getValueSitesForKind(ValueKind)[Site].ValueData.size(); +} + +std::unique_ptr<InstrProfValueData[]> +InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site, + uint64_t *TotalC) const { + uint64_t Dummy; + uint64_t &TotalCount = (TotalC == nullptr ? Dummy : *TotalC); + uint32_t N = getNumValueDataForSite(ValueKind, Site); + if (N == 0) { + TotalCount = 0; + return std::unique_ptr<InstrProfValueData[]>(nullptr); + } + + auto VD = llvm::make_unique<InstrProfValueData[]>(N); + TotalCount = getValueForSite(VD.get(), ValueKind, Site); + + return VD; +} + +uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[], + uint32_t ValueKind, + uint32_t Site) const { + uint32_t I = 0; + uint64_t TotalCount = 0; + for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) { + Dest[I].Value = V.Value; + Dest[I].Count = V.Count; + TotalCount = SaturatingAdd(TotalCount, V.Count); + I++; + } + return TotalCount; +} + +void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) { + if (!NumValueSites) + return; + getOrCreateValueSitesForKind(ValueKind).reserve(NumValueSites); +} + +inline support::endianness getHostEndianness() { + return sys::IsLittleEndianHost ? support::little : support::big; +} + +// Include definitions for value profile data +#define INSTR_PROF_VALUE_PROF_DATA +#include "llvm/ProfileData/InstrProfData.inc" + +void InstrProfValueSiteRecord::sortByCount() { + ValueData.sort( + [](const InstrProfValueData &left, const InstrProfValueData &right) { + return left.Count > right.Count; + }); + // Now truncate + size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE; + if (ValueData.size() > max_s) + ValueData.resize(max_s); +} + +namespace IndexedInstrProf { + +enum class HashT : uint32_t { + MD5, + Last = MD5 +}; + +inline uint64_t ComputeHash(HashT Type, StringRef K) { + switch (Type) { + case HashT::MD5: + return MD5Hash(K); + } + llvm_unreachable("Unhandled hash type"); +} + +const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" + +enum ProfVersion { + // Version 1 is the first version. In this version, the value of + // a key/value pair can only include profile data of a single function. + // Due to this restriction, the number of block counters for a given + // function is not recorded but derived from the length of the value. + Version1 = 1, + // The version 2 format supports recording profile data of multiple + // functions which share the same key in one value field. To support this, + // the number block counters is recorded as an uint64_t field right after the + // function structural hash. + Version2 = 2, + // Version 3 supports value profile data. The value profile data is expected + // to follow the block counter profile data. + Version3 = 3, + // In this version, profile summary data \c IndexedInstrProf::Summary is + // stored after the profile header. + Version4 = 4, + // In this version, the frontend PGO stable hash algorithm defaults to V2. + Version5 = 5, + // The current version is 5. + CurrentVersion = INSTR_PROF_INDEX_VERSION +}; +const uint64_t Version = ProfVersion::CurrentVersion; + +const HashT HashType = HashT::MD5; + +inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); } + +// This structure defines the file header of the LLVM profile +// data file in indexed-format. +struct Header { + uint64_t Magic; + uint64_t Version; + uint64_t Unused; // Becomes unused since version 4 + uint64_t HashType; + uint64_t HashOffset; +}; + +// Profile summary data recorded in the profile data file in indexed +// format. It is introduced in version 4. The summary data follows +// right after the profile file header. +struct Summary { + struct Entry { + uint64_t Cutoff; ///< The required percentile of total execution count. + uint64_t + MinBlockCount; ///< The minimum execution count for this percentile. + uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count. + }; + // The field kind enumerator to assigned value mapping should remain + // unchanged when a new kind is added or an old kind gets deleted in + // the future. + enum SummaryFieldKind { + /// The total number of functions instrumented. + TotalNumFunctions = 0, + /// Total number of instrumented blocks/edges. + TotalNumBlocks = 1, + /// The maximal execution count among all functions. + /// This field does not exist for profile data from IR based + /// instrumentation. + MaxFunctionCount = 2, + /// Max block count of the program. + MaxBlockCount = 3, + /// Max internal block count of the program (excluding entry blocks). + MaxInternalBlockCount = 4, + /// The sum of all instrumented block counts. + TotalBlockCount = 5, + NumKinds = TotalBlockCount + 1 + }; + + // The number of summmary fields following the summary header. + uint64_t NumSummaryFields; + // The number of Cutoff Entries (Summary::Entry) following summary fields. + uint64_t NumCutoffEntries; + + Summary() = delete; + Summary(uint32_t Size) { memset(this, 0, Size); } + + void operator delete(void *ptr) { ::operator delete(ptr); } + + static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) { + return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) + + NumSumFields * sizeof(uint64_t); + } + + const uint64_t *getSummaryDataBase() const { + return reinterpret_cast<const uint64_t *>(this + 1); + } + + uint64_t *getSummaryDataBase() { + return reinterpret_cast<uint64_t *>(this + 1); + } + + const Entry *getCutoffEntryBase() const { + return reinterpret_cast<const Entry *>( + &getSummaryDataBase()[NumSummaryFields]); + } + + Entry *getCutoffEntryBase() { + return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]); + } + + uint64_t get(SummaryFieldKind K) const { + return getSummaryDataBase()[K]; + } + + void set(SummaryFieldKind K, uint64_t V) { + getSummaryDataBase()[K] = V; + } + + const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; } + + void setEntry(uint32_t I, const ProfileSummaryEntry &E) { + Entry &ER = getCutoffEntryBase()[I]; + ER.Cutoff = E.Cutoff; + ER.MinBlockCount = E.MinCount; + ER.NumBlocks = E.NumCounts; + } +}; + +inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) { + return std::unique_ptr<Summary>(new (::operator new(TotalSize)) + Summary(TotalSize)); +} + +} // end namespace IndexedInstrProf + +namespace RawInstrProf { + +// Version 1: First version +// Version 2: Added value profile data section. Per-function control data +// struct has more fields to describe value profile information. +// Version 3: Compressed name section support. Function PGO name reference +// from control data struct is changed from raw pointer to Name's MD5 value. +// Version 4: ValueDataBegin and ValueDataSizes fields are removed from the +// raw header. +const uint64_t Version = INSTR_PROF_RAW_VERSION; + +template <class IntPtrT> inline uint64_t getMagic(); +template <> inline uint64_t getMagic<uint64_t>() { + return INSTR_PROF_RAW_MAGIC_64; +} + +template <> inline uint64_t getMagic<uint32_t>() { + return INSTR_PROF_RAW_MAGIC_32; +} + +// Per-function profile data header/control structure. +// The definition should match the structure defined in +// compiler-rt/lib/profile/InstrProfiling.h. +// It should also match the synthesized type in +// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters. +template <class IntPtrT> struct alignas(8) ProfileData { + #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name; + #include "llvm/ProfileData/InstrProfData.inc" +}; + +// File header structure of the LLVM profile data in raw format. +// The definition should match the header referenced in +// compiler-rt/lib/profile/InstrProfilingFile.c and +// InstrProfilingBuffer.c. +struct Header { +#define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +}; + +} // end namespace RawInstrProf + +// Parse MemOP Size range option. +void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, + int64_t &RangeLast); + +// Create the variable for the profile file name. +void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_INSTRPROF_H diff --git a/clang-r353983e/include/llvm/ProfileData/InstrProfData.inc b/clang-r353983e/include/llvm/ProfileData/InstrProfData.inc new file mode 100644 index 00000000..e1e2df55 --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/InstrProfData.inc @@ -0,0 +1,728 @@ +/*===-- InstrProfData.inc - instr profiling runtime structures -*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This is the master file that defines all the data structure, signature, + * constant literals that are shared across profiling runtime library, + * compiler (instrumentation), and host tools (reader/writer). The entities + * defined in this file affect the profile runtime ABI, the raw profile format, + * or both. + * + * The file has two identical copies. The master copy lives in LLVM and + * the other one sits in compiler-rt/lib/profile directory. To make changes + * in this file, first modify the master copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * + * The first part of the file includes macros that defines types, names, and + * initializers for the member fields of the core data structures. The field + * declarations for one structure is enabled by defining the field activation + * macro associated with that structure. Only one field activation record + * can be defined at one time and the rest definitions will be filtered out by + * the preprocessor. + * + * Examples of how the template is used to instantiate structure definition: + * 1. To declare a structure: + * + * struct ProfData { + * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + * Type Name; + * #include "llvm/ProfileData/InstrProfData.inc" + * }; + * + * 2. To construct LLVM type arrays for the struct type: + * + * Type *DataTypes[] = { + * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + * LLVMType, + * #include "llvm/ProfileData/InstrProfData.inc" + * }; + * + * 4. To construct constant array for the initializers: + * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + * Initializer, + * Constant *ConstantVals[] = { + * #include "llvm/ProfileData/InstrProfData.inc" + * }; + * + * + * The second part of the file includes definitions all other entities that + * are related to runtime ABI and format. When no field activation macro is + * defined, this file can be included to introduce the definitions. + * +\*===----------------------------------------------------------------------===*/ + +/* Functions marked with INSTR_PROF_VISIBILITY must have hidden visibility in + * the compiler runtime. */ +#ifndef INSTR_PROF_VISIBILITY +#define INSTR_PROF_VISIBILITY +#endif + +/* INSTR_PROF_DATA start. */ +/* Definition of member fields of the per-function control structure. */ +#ifndef INSTR_PROF_DATA +#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \ + ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ + IndexedInstrProf::ComputeHash(getPGOFuncNameVarInitializer(Inc->getName())))) +INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ + ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ + Inc->getHash()->getZExtValue())) +INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \ + ConstantExpr::getBitCast(CounterPtr, \ + llvm::Type::getInt64PtrTy(Ctx))) +/* This is used to map function pointers for the indirect call targets to + * function name hashes during the conversion from raw to merged profile + * data. + */ +INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), FunctionPointer, \ + FunctionAddr) +INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \ + ValuesPtrExpr) +INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters)) +INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \ + ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) +#undef INSTR_PROF_DATA +/* INSTR_PROF_DATA end. */ + + +/* This is an internal data structure used by value profiler. It + * is defined here to allow serialization code sharing by LLVM + * to be used in unit test. + * + * typedef struct ValueProfNode { + * // InstrProfValueData VData; + * uint64_t Value; + * uint64_t Count; + * struct ValueProfNode *Next; + * } ValueProfNode; + */ +/* INSTR_PROF_VALUE_NODE start. */ +#ifndef INSTR_PROF_VALUE_NODE +#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +INSTR_PROF_VALUE_NODE(uint64_t, llvm::Type::getInt64Ty(Ctx), Value, \ + ConstantInt::get(llvm::Type::GetInt64Ty(Ctx), 0)) +INSTR_PROF_VALUE_NODE(uint64_t, llvm::Type::getInt64Ty(Ctx), Count, \ + ConstantInt::get(llvm::Type::GetInt64Ty(Ctx), 0)) +INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \ + ConstantInt::get(llvm::Type::GetInt8PtrTy(Ctx), 0)) +#undef INSTR_PROF_VALUE_NODE +/* INSTR_PROF_VALUE_NODE end. */ + +/* INSTR_PROF_RAW_HEADER start */ +/* Definition of member fields of the raw profile header data structure. */ +#ifndef INSTR_PROF_RAW_HEADER +#define INSTR_PROF_RAW_HEADER(Type, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) +INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) +INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) +INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) +INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) +#undef INSTR_PROF_RAW_HEADER +/* INSTR_PROF_RAW_HEADER end */ + +/* VALUE_PROF_FUNC_PARAM start */ +/* Definition of parameter types of the runtime API used to do value profiling + * for a given value site. + */ +#ifndef VALUE_PROF_FUNC_PARAM +#define VALUE_PROF_FUNC_PARAM(ArgType, ArgName, ArgLLVMType) +#define INSTR_PROF_COMMA +#else +#define INSTR_PROF_DATA_DEFINED +#define INSTR_PROF_COMMA , +#endif +VALUE_PROF_FUNC_PARAM(uint64_t, TargetValue, Type::getInt64Ty(Ctx)) \ + INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA +#ifndef VALUE_RANGE_PROF +VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) +#else /* VALUE_RANGE_PROF */ +VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \ + INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \ + INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeLast, Type::getInt64Ty(Ctx)) \ + INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(uint64_t, LargeValue, Type::getInt64Ty(Ctx)) +#endif /*VALUE_RANGE_PROF */ +#undef VALUE_PROF_FUNC_PARAM +#undef INSTR_PROF_COMMA +/* VALUE_PROF_FUNC_PARAM end */ + +/* VALUE_PROF_KIND start */ +#ifndef VALUE_PROF_KIND +#define VALUE_PROF_KIND(Enumerator, Value) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +/* For indirect function call value profiling, the addresses of the target + * functions are profiled by the instrumented code. The target addresses are + * written in the raw profile data and converted to target function name's MD5 + * hash by the profile reader during deserialization. Typically, this happens + * when the raw profile data is read during profile merging. + * + * For this remapping the ProfData is used. ProfData contains both the function + * name hash and the function address. + */ +VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0) +/* For memory intrinsic functions size profiling. */ +VALUE_PROF_KIND(IPVK_MemOPSize, 1) +/* These two kinds must be the last to be + * declared. This is to make sure the string + * array created with the template can be + * indexed with the kind value. + */ +VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget) +VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize) + +#undef VALUE_PROF_KIND +/* VALUE_PROF_KIND end */ + +/* COVMAP_FUNC_RECORD start */ +/* Definition of member fields of the function record structure in coverage + * map. + */ +#ifndef COVMAP_FUNC_RECORD +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +#ifdef COVMAP_V1 +COVMAP_FUNC_RECORD(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), \ + NamePtr, llvm::ConstantExpr::getBitCast(NamePtr, \ + llvm::Type::getInt8PtrTy(Ctx))) +COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \ + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), \ + NameValue.size())) +#else +COVMAP_FUNC_RECORD(const int64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \ + llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ + llvm::IndexedInstrProf::ComputeHash(NameValue))) +#endif +COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), DataSize, \ + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\ + CoverageMapping.size())) +COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ + llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), FuncHash)) +#undef COVMAP_FUNC_RECORD +/* COVMAP_FUNC_RECORD end. */ + +/* COVMAP_HEADER start */ +/* Definition of member fields of coverage map header. + */ +#ifndef COVMAP_HEADER +#define COVMAP_HEADER(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +COVMAP_HEADER(uint32_t, Int32Ty, NRecords, \ + llvm::ConstantInt::get(Int32Ty, FunctionRecords.size())) +COVMAP_HEADER(uint32_t, Int32Ty, FilenamesSize, \ + llvm::ConstantInt::get(Int32Ty, FilenamesSize)) +COVMAP_HEADER(uint32_t, Int32Ty, CoverageSize, \ + llvm::ConstantInt::get(Int32Ty, CoverageMappingSize)) +COVMAP_HEADER(uint32_t, Int32Ty, Version, \ + llvm::ConstantInt::get(Int32Ty, CovMapVersion::CurrentVersion)) +#undef COVMAP_HEADER +/* COVMAP_HEADER end. */ + + +#ifdef INSTR_PROF_SECT_ENTRY +#define INSTR_PROF_DATA_DEFINED +INSTR_PROF_SECT_ENTRY(IPSK_data, \ + INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON), \ + INSTR_PROF_DATA_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_cnts, \ + INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \ + INSTR_PROF_CNTS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_name, \ + INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \ + INSTR_PROF_NAME_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_vals, \ + INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \ + INSTR_PROF_VALS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \ + INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \ + INSTR_PROF_VNODES_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_covmap, \ + INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \ + INSTR_PROF_COVMAP_COFF, "__LLVM_COV,") + +#undef INSTR_PROF_SECT_ENTRY +#endif + + +#ifdef INSTR_PROF_VALUE_PROF_DATA +#define INSTR_PROF_DATA_DEFINED + +#define INSTR_PROF_MAX_NUM_VAL_PER_SITE 255 +/*! + * This is the header of the data structure that defines the on-disk + * layout of the value profile data of a particular kind for one function. + */ +typedef struct ValueProfRecord { + /* The kind of the value profile record. */ + uint32_t Kind; + /* + * The number of value profile sites. It is guaranteed to be non-zero; + * otherwise the record for this kind won't be emitted. + */ + uint32_t NumValueSites; + /* + * The first element of the array that stores the number of profiled + * values for each value site. The size of the array is NumValueSites. + * Since NumValueSites is greater than zero, there is at least one + * element in the array. + */ + uint8_t SiteCountArray[1]; + + /* + * The fake declaration is for documentation purpose only. + * Align the start of next field to be on 8 byte boundaries. + uint8_t Padding[X]; + */ + + /* The array of value profile data. The size of the array is the sum + * of all elements in SiteCountArray[]. + InstrProfValueData ValueData[]; + */ + +#ifdef __cplusplus + /*! + * Return the number of value sites. + */ + uint32_t getNumValueSites() const { return NumValueSites; } + /*! + * Read data from this record and save it to Record. + */ + void deserializeTo(InstrProfRecord &Record, + InstrProfSymtab *SymTab); + /* + * In-place byte swap: + * Do byte swap for this instance. \c Old is the original order before + * the swap, and \c New is the New byte order. + */ + void swapBytes(support::endianness Old, support::endianness New); +#endif +} ValueProfRecord; + +/*! + * Per-function header/control data structure for value profiling + * data in indexed format. + */ +typedef struct ValueProfData { + /* + * Total size in bytes including this field. It must be a multiple + * of sizeof(uint64_t). + */ + uint32_t TotalSize; + /* + *The number of value profile kinds that has value profile data. + * In this implementation, a value profile kind is considered to + * have profile data if the number of value profile sites for the + * kind is not zero. More aggressively, the implementation can + * choose to check the actual data value: if none of the value sites + * has any profiled values, the kind can be skipped. + */ + uint32_t NumValueKinds; + + /* + * Following are a sequence of variable length records. The prefix/header + * of each record is defined by ValueProfRecord type. The number of + * records is NumValueKinds. + * ValueProfRecord Record_1; + * ValueProfRecord Record_N; + */ + +#if __cplusplus + /*! + * Return the total size in bytes of the on-disk value profile data + * given the data stored in Record. + */ + static uint32_t getSize(const InstrProfRecord &Record); + /*! + * Return a pointer to \c ValueProfData instance ready to be streamed. + */ + static std::unique_ptr<ValueProfData> + serializeFrom(const InstrProfRecord &Record); + /*! + * Check the integrity of the record. + */ + Error checkIntegrity(); + /*! + * Return a pointer to \c ValueProfileData instance ready to be read. + * All data in the instance are properly byte swapped. The input + * data is assumed to be in little endian order. + */ + static Expected<std::unique_ptr<ValueProfData>> + getValueProfData(const unsigned char *SrcBuffer, + const unsigned char *const SrcBufferEnd, + support::endianness SrcDataEndianness); + /*! + * Swap byte order from \c Endianness order to host byte order. + */ + void swapBytesToHost(support::endianness Endianness); + /*! + * Swap byte order from host byte order to \c Endianness order. + */ + void swapBytesFromHost(support::endianness Endianness); + /*! + * Return the total size of \c ValueProfileData. + */ + uint32_t getSize() const { return TotalSize; } + /*! + * Read data from this data and save it to \c Record. + */ + void deserializeTo(InstrProfRecord &Record, + InstrProfSymtab *SymTab); + void operator delete(void *ptr) { ::operator delete(ptr); } +#endif +} ValueProfData; + +/* + * The closure is designed to abstact away two types of value profile data: + * - InstrProfRecord which is the primary data structure used to + * represent profile data in host tools (reader, writer, and profile-use) + * - value profile runtime data structure suitable to be used by C + * runtime library. + * + * Both sources of data need to serialize to disk/memory-buffer in common + * format: ValueProfData. The abstraction allows compiler-rt's raw profiler + * writer to share the same format and code with indexed profile writer. + * + * For documentation of the member methods below, refer to corresponding methods + * in class InstrProfRecord. + */ +typedef struct ValueProfRecordClosure { + const void *Record; + uint32_t (*GetNumValueKinds)(const void *Record); + uint32_t (*GetNumValueSites)(const void *Record, uint32_t VKind); + uint32_t (*GetNumValueData)(const void *Record, uint32_t VKind); + uint32_t (*GetNumValueDataForSite)(const void *R, uint32_t VK, uint32_t S); + + /* + * After extracting the value profile data from the value profile record, + * this method is used to map the in-memory value to on-disk value. If + * the method is null, value will be written out untranslated. + */ + uint64_t (*RemapValueData)(uint32_t, uint64_t Value); + void (*GetValueForSite)(const void *R, InstrProfValueData *Dst, uint32_t K, + uint32_t S); + ValueProfData *(*AllocValueProfData)(size_t TotalSizeInBytes); +} ValueProfRecordClosure; + +INSTR_PROF_VISIBILITY ValueProfRecord * +getFirstValueProfRecord(ValueProfData *VPD); +INSTR_PROF_VISIBILITY ValueProfRecord * +getValueProfRecordNext(ValueProfRecord *VPR); +INSTR_PROF_VISIBILITY InstrProfValueData * +getValueProfRecordValueData(ValueProfRecord *VPR); +INSTR_PROF_VISIBILITY uint32_t +getValueProfRecordHeaderSize(uint32_t NumValueSites); + +#undef INSTR_PROF_VALUE_PROF_DATA +#endif /* INSTR_PROF_VALUE_PROF_DATA */ + + +#ifdef INSTR_PROF_COMMON_API_IMPL +#define INSTR_PROF_DATA_DEFINED +#ifdef __cplusplus +#define INSTR_PROF_INLINE inline +#define INSTR_PROF_NULLPTR nullptr +#else +#define INSTR_PROF_INLINE +#define INSTR_PROF_NULLPTR NULL +#endif + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +/*! + * Return the \c ValueProfRecord header size including the + * padding bytes. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +uint32_t getValueProfRecordHeaderSize(uint32_t NumValueSites) { + uint32_t Size = offsetof(ValueProfRecord, SiteCountArray) + + sizeof(uint8_t) * NumValueSites; + /* Round the size to multiple of 8 bytes. */ + Size = (Size + 7) & ~7; + return Size; +} + +/*! + * Return the total size of the value profile record including the + * header and the value data. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +uint32_t getValueProfRecordSize(uint32_t NumValueSites, + uint32_t NumValueData) { + return getValueProfRecordHeaderSize(NumValueSites) + + sizeof(InstrProfValueData) * NumValueData; +} + +/*! + * Return the pointer to the start of value data array. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +InstrProfValueData *getValueProfRecordValueData(ValueProfRecord *This) { + return (InstrProfValueData *)((char *)This + getValueProfRecordHeaderSize( + This->NumValueSites)); +} + +/*! + * Return the total number of value data for \c This record. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +uint32_t getValueProfRecordNumValueData(ValueProfRecord *This) { + uint32_t NumValueData = 0; + uint32_t I; + for (I = 0; I < This->NumValueSites; I++) + NumValueData += This->SiteCountArray[I]; + return NumValueData; +} + +/*! + * Use this method to advance to the next \c This \c ValueProfRecord. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +ValueProfRecord *getValueProfRecordNext(ValueProfRecord *This) { + uint32_t NumValueData = getValueProfRecordNumValueData(This); + return (ValueProfRecord *)((char *)This + + getValueProfRecordSize(This->NumValueSites, + NumValueData)); +} + +/*! + * Return the first \c ValueProfRecord instance. + */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +ValueProfRecord *getFirstValueProfRecord(ValueProfData *This) { + return (ValueProfRecord *)((char *)This + sizeof(ValueProfData)); +} + +/* Closure based interfaces. */ + +/*! + * Return the total size in bytes of the on-disk value profile data + * given the data stored in Record. + */ +INSTR_PROF_VISIBILITY uint32_t +getValueProfDataSize(ValueProfRecordClosure *Closure) { + uint32_t Kind; + uint32_t TotalSize = sizeof(ValueProfData); + const void *Record = Closure->Record; + + for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) { + uint32_t NumValueSites = Closure->GetNumValueSites(Record, Kind); + if (!NumValueSites) + continue; + TotalSize += getValueProfRecordSize(NumValueSites, + Closure->GetNumValueData(Record, Kind)); + } + return TotalSize; +} + +/*! + * Extract value profile data of a function for the profile kind \c ValueKind + * from the \c Closure and serialize the data into \c This record instance. + */ +INSTR_PROF_VISIBILITY void +serializeValueProfRecordFrom(ValueProfRecord *This, + ValueProfRecordClosure *Closure, + uint32_t ValueKind, uint32_t NumValueSites) { + uint32_t S; + const void *Record = Closure->Record; + This->Kind = ValueKind; + This->NumValueSites = NumValueSites; + InstrProfValueData *DstVD = getValueProfRecordValueData(This); + + for (S = 0; S < NumValueSites; S++) { + uint32_t ND = Closure->GetNumValueDataForSite(Record, ValueKind, S); + This->SiteCountArray[S] = ND; + Closure->GetValueForSite(Record, DstVD, ValueKind, S); + DstVD += ND; + } +} + +/*! + * Extract value profile data of a function from the \c Closure + * and serialize the data into \c DstData if it is not NULL or heap + * memory allocated by the \c Closure's allocator method. If \c + * DstData is not null, the caller is expected to set the TotalSize + * in DstData. + */ +INSTR_PROF_VISIBILITY ValueProfData * +serializeValueProfDataFrom(ValueProfRecordClosure *Closure, + ValueProfData *DstData) { + uint32_t Kind; + uint32_t TotalSize = + DstData ? DstData->TotalSize : getValueProfDataSize(Closure); + + ValueProfData *VPD = + DstData ? DstData : Closure->AllocValueProfData(TotalSize); + + VPD->TotalSize = TotalSize; + VPD->NumValueKinds = Closure->GetNumValueKinds(Closure->Record); + ValueProfRecord *VR = getFirstValueProfRecord(VPD); + for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) { + uint32_t NumValueSites = Closure->GetNumValueSites(Closure->Record, Kind); + if (!NumValueSites) + continue; + serializeValueProfRecordFrom(VR, Closure, Kind, NumValueSites); + VR = getValueProfRecordNext(VR); + } + return VPD; +} + +#undef INSTR_PROF_COMMON_API_IMPL +#endif /* INSTR_PROF_COMMON_API_IMPL */ + +/*============================================================================*/ + +#ifndef INSTR_PROF_DATA_DEFINED + +#ifndef INSTR_PROF_DATA_INC +#define INSTR_PROF_DATA_INC + +/* Helper macros. */ +#define INSTR_PROF_SIMPLE_QUOTE(x) #x +#define INSTR_PROF_QUOTE(x) INSTR_PROF_SIMPLE_QUOTE(x) +#define INSTR_PROF_SIMPLE_CONCAT(x,y) x ## y +#define INSTR_PROF_CONCAT(x,y) INSTR_PROF_SIMPLE_CONCAT(x,y) + +/* Magic number to detect file format and endianness. + * Use 255 at one end, since no UTF-8 file can use that character. Avoid 0, + * so that utilities, like strings, don't grab it as a string. 129 is also + * invalid UTF-8, and high enough to be interesting. + * Use "lprofr" in the centre to stand for "LLVM Profile Raw", or "lprofR" + * for 32-bit platforms. + */ +#define INSTR_PROF_RAW_MAGIC_64 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \ + (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \ + (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129 +#define INSTR_PROF_RAW_MAGIC_32 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \ + (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \ + (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 + +/* Raw profile format version (start from 1). */ +#define INSTR_PROF_RAW_VERSION 4 +/* Indexed profile format version (start from 1). */ +#define INSTR_PROF_INDEX_VERSION 5 +/* Coverage mapping format vresion (start from 0). */ +#define INSTR_PROF_COVMAP_VERSION 2 + +/* Profile version is always of type uint64_t. Reserve the upper 8 bits in the + * version for other variants of profile. We set the lowest bit of the upper 8 + * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton + * generated profile, and 0 if this is a Clang FE generated profile. + */ +#define VARIANT_MASKS_ALL 0xff00000000000000ULL +#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) +#define VARIANT_MASK_IR_PROF (0x1ULL << 56) +#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version +#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime + +/* The variable that holds the name of the profile data + * specified via command line. */ +#define INSTR_PROF_PROFILE_NAME_VAR __llvm_profile_filename + +/* section name strings common to all targets other + than WIN32 */ +#define INSTR_PROF_DATA_COMMON __llvm_prf_data +#define INSTR_PROF_NAME_COMMON __llvm_prf_names +#define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts +#define INSTR_PROF_VALS_COMMON __llvm_prf_vals +#define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds +#define INSTR_PROF_COVMAP_COMMON __llvm_covmap +/* Windows section names. Because these section names contain dollar characters, + * they must be quoted. + */ +#define INSTR_PROF_DATA_COFF ".lprfd$M" +#define INSTR_PROF_NAME_COFF ".lprfn$M" +#define INSTR_PROF_CNTS_COFF ".lprfc$M" +#define INSTR_PROF_VALS_COFF ".lprfv$M" +#define INSTR_PROF_VNODES_COFF ".lprfnd$M" +#define INSTR_PROF_COVMAP_COFF ".lcovmap$M" + +#ifdef _WIN32 +/* Runtime section names and name strings. */ +#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COFF +#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COFF +#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COFF +/* Array of pointers. Each pointer points to a list + * of value nodes associated with one value site. + */ +#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_VALS_COFF +/* Value profile nodes section. */ +#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COFF +#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COFF +#else +/* Runtime section names and name strings. */ +#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON) +#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON) +#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON) +/* Array of pointers. Each pointer points to a list + * of value nodes associated with one value site. + */ +#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON) +/* Value profile nodes section. */ +#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON) +#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON) +#endif + +/* Macros to define start/stop section symbol for a given + * section on Linux. For instance + * INSTR_PROF_SECT_START(INSTR_PROF_DATA_SECT_NAME) will + * expand to __start___llvm_prof_data + */ +#define INSTR_PROF_SECT_START(Sect) \ + INSTR_PROF_CONCAT(__start_,Sect) +#define INSTR_PROF_SECT_STOP(Sect) \ + INSTR_PROF_CONCAT(__stop_,Sect) + +/* Value Profiling API linkage name. */ +#define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target +#define INSTR_PROF_VALUE_PROF_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC) +#define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range +#define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC) + +/* InstrProfile per-function control data alignment. */ +#define INSTR_PROF_DATA_ALIGNMENT 8 + +/* The data structure that represents a tracked value by the + * value profiler. + */ +typedef struct InstrProfValueData { + /* Profiled value. */ + uint64_t Value; + /* Number of times the value appears in the training run. */ + uint64_t Count; +} InstrProfValueData; + +#endif /* INSTR_PROF_DATA_INC */ + +#else +#undef INSTR_PROF_DATA_DEFINED +#endif diff --git a/clang-r353983e/include/llvm/ProfileData/InstrProfReader.h b/clang-r353983e/include/llvm/ProfileData/InstrProfReader.h new file mode 100644 index 00000000..d465420f --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/InstrProfReader.h @@ -0,0 +1,477 @@ +//===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading profiling data for instrumentation +// based PGO and coverage. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H +#define LLVM_PROFILEDATA_INSTRPROFREADER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/OnDiskHashTable.h" +#include "llvm/Support/SwapByteOrder.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <memory> +#include <utility> +#include <vector> + +namespace llvm { + +class InstrProfReader; + +/// A file format agnostic iterator over profiling data. +class InstrProfIterator : public std::iterator<std::input_iterator_tag, + NamedInstrProfRecord> { + InstrProfReader *Reader = nullptr; + value_type Record; + + void Increment(); + +public: + InstrProfIterator() = default; + InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } + + InstrProfIterator &operator++() { Increment(); return *this; } + bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; } + bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; } + value_type &operator*() { return Record; } + value_type *operator->() { return &Record; } +}; + +/// Base class and interface for reading profiling data of any known instrprof +/// format. Provides an iterator over NamedInstrProfRecords. +class InstrProfReader { + instrprof_error LastError = instrprof_error::success; + +public: + InstrProfReader() = default; + virtual ~InstrProfReader() = default; + + /// Read the header. Required before reading first record. + virtual Error readHeader() = 0; + + /// Read a single record. + virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; + + /// Iterator over profile data. + InstrProfIterator begin() { return InstrProfIterator(this); } + InstrProfIterator end() { return InstrProfIterator(); } + + virtual bool isIRLevelProfile() const = 0; + + /// Return the PGO symtab. There are three different readers: + /// Raw, Text, and Indexed profile readers. The first two types + /// of readers are used only by llvm-profdata tool, while the indexed + /// profile reader is also used by llvm-cov tool and the compiler ( + /// backend or frontend). Since creating PGO symtab can create + /// significant runtime and memory overhead (as it touches data + /// for the whole program), InstrProfSymtab for the indexed profile + /// reader should be created on demand and it is recommended to be + /// only used for dumping purpose with llvm-proftool, not with the + /// compiler. + virtual InstrProfSymtab &getSymtab() = 0; + +protected: + std::unique_ptr<InstrProfSymtab> Symtab; + + /// Set the current error and return same. + Error error(instrprof_error Err) { + LastError = Err; + if (Err == instrprof_error::success) + return Error::success(); + return make_error<InstrProfError>(Err); + } + + Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); } + + /// Clear the current error and return a successful one. + Error success() { return error(instrprof_error::success); } + +public: + /// Return true if the reader has finished reading the profile data. + bool isEOF() { return LastError == instrprof_error::eof; } + + /// Return true if the reader encountered an error reading profiling data. + bool hasError() { return LastError != instrprof_error::success && !isEOF(); } + + /// Get the current error. + Error getError() { + if (hasError()) + return make_error<InstrProfError>(LastError); + return Error::success(); + } + + /// Factory method to create an appropriately typed reader for the given + /// instrprof file. + static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path); + + static Expected<std::unique_ptr<InstrProfReader>> + create(std::unique_ptr<MemoryBuffer> Buffer); +}; + +/// Reader for the simple text based instrprof format. +/// +/// This format is a simple text format that's suitable for test data. Records +/// are separated by one or more blank lines, and record fields are separated by +/// new lines. +/// +/// Each record consists of a function name, a function hash, a number of +/// counters, and then each counter value, in that order. +class TextInstrProfReader : public InstrProfReader { +private: + /// The profile data file contents. + std::unique_ptr<MemoryBuffer> DataBuffer; + /// Iterator over the profile data. + line_iterator Line; + bool IsIRLevelProfile = false; + + Error readValueProfileData(InstrProfRecord &Record); + +public: + TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_) + : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} + TextInstrProfReader(const TextInstrProfReader &) = delete; + TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; + + /// Return true if the given buffer is in text instrprof format. + static bool hasFormat(const MemoryBuffer &Buffer); + + bool isIRLevelProfile() const override { return IsIRLevelProfile; } + + /// Read the header. + Error readHeader() override; + + /// Read a single record. + Error readNextRecord(NamedInstrProfRecord &Record) override; + + InstrProfSymtab &getSymtab() override { + assert(Symtab.get()); + return *Symtab.get(); + } +}; + +/// Reader for the raw instrprof binary format from runtime. +/// +/// This format is a raw memory dump of the instrumentation-baed profiling data +/// from the runtime. It has no index. +/// +/// Templated on the unsigned type whose size matches pointers on the platform +/// that wrote the profile. +template <class IntPtrT> +class RawInstrProfReader : public InstrProfReader { +private: + /// The profile data file contents. + std::unique_ptr<MemoryBuffer> DataBuffer; + bool ShouldSwapBytes; + // The value of the version field of the raw profile data header. The lower 56 + // bits specifies the format version and the most significant 8 bits specify + // the variant types of the profile. + uint64_t Version; + uint64_t CountersDelta; + uint64_t NamesDelta; + const RawInstrProf::ProfileData<IntPtrT> *Data; + const RawInstrProf::ProfileData<IntPtrT> *DataEnd; + const uint64_t *CountersStart; + const char *NamesStart; + uint64_t NamesSize; + // After value profile is all read, this pointer points to + // the header of next profile data (if exists) + const uint8_t *ValueDataStart; + uint32_t ValueKindLast; + uint32_t CurValueDataSize; + +public: + RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) + : DataBuffer(std::move(DataBuffer)) {} + RawInstrProfReader(const RawInstrProfReader &) = delete; + RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; + + static bool hasFormat(const MemoryBuffer &DataBuffer); + Error readHeader() override; + Error readNextRecord(NamedInstrProfRecord &Record) override; + + bool isIRLevelProfile() const override { + return (Version & VARIANT_MASK_IR_PROF) != 0; + } + + InstrProfSymtab &getSymtab() override { + assert(Symtab.get()); + return *Symtab.get(); + } + +private: + Error createSymtab(InstrProfSymtab &Symtab); + Error readNextHeader(const char *CurrentPos); + Error readHeader(const RawInstrProf::Header &Header); + + template <class IntT> IntT swap(IntT Int) const { + return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; + } + + support::endianness getDataEndianness() const { + support::endianness HostEndian = getHostEndianness(); + if (!ShouldSwapBytes) + return HostEndian; + if (HostEndian == support::little) + return support::big; + else + return support::little; + } + + inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { + return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); + } + + Error readName(NamedInstrProfRecord &Record); + Error readFuncHash(NamedInstrProfRecord &Record); + Error readRawCounts(InstrProfRecord &Record); + Error readValueProfilingData(InstrProfRecord &Record); + bool atEnd() const { return Data == DataEnd; } + + void advanceData() { + Data++; + ValueDataStart += CurValueDataSize; + } + + const char *getNextHeaderPos() const { + assert(atEnd()); + return (const char *)ValueDataStart; + } + + const uint64_t *getCounter(IntPtrT CounterPtr) const { + ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); + return CountersStart + Offset; + } + + StringRef getName(uint64_t NameRef) const { + return Symtab->getFuncName(swap(NameRef)); + } +}; + +using RawInstrProfReader32 = RawInstrProfReader<uint32_t>; +using RawInstrProfReader64 = RawInstrProfReader<uint64_t>; + +namespace IndexedInstrProf { + +enum class HashT : uint32_t; + +} // end namespace IndexedInstrProf + +/// Trait for lookups into the on-disk hash table for the binary instrprof +/// format. +class InstrProfLookupTrait { + std::vector<NamedInstrProfRecord> DataBuffer; + IndexedInstrProf::HashT HashType; + unsigned FormatVersion; + // Endianness of the input value profile data. + // It should be LE by default, but can be changed + // for testing purpose. + support::endianness ValueProfDataEndianness = support::little; + +public: + InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) + : HashType(HashType), FormatVersion(FormatVersion) {} + + using data_type = ArrayRef<NamedInstrProfRecord>; + + using internal_key_type = StringRef; + using external_key_type = StringRef; + using hash_value_type = uint64_t; + using offset_type = uint64_t; + + static bool EqualKey(StringRef A, StringRef B) { return A == B; } + static StringRef GetInternalKey(StringRef K) { return K; } + static StringRef GetExternalKey(StringRef K) { return K; } + + hash_value_type ComputeHash(StringRef K); + + static std::pair<offset_type, offset_type> + ReadKeyDataLength(const unsigned char *&D) { + using namespace support; + + offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D); + offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D); + return std::make_pair(KeyLen, DataLen); + } + + StringRef ReadKey(const unsigned char *D, offset_type N) { + return StringRef((const char *)D, N); + } + + bool readValueProfilingData(const unsigned char *&D, + const unsigned char *const End); + data_type ReadData(StringRef K, const unsigned char *D, offset_type N); + + // Used for testing purpose only. + void setValueProfDataEndianness(support::endianness Endianness) { + ValueProfDataEndianness = Endianness; + } +}; + +struct InstrProfReaderIndexBase { + virtual ~InstrProfReaderIndexBase() = default; + + // Read all the profile records with the same key pointed to the current + // iterator. + virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0; + + // Read all the profile records with the key equal to FuncName + virtual Error getRecords(StringRef FuncName, + ArrayRef<NamedInstrProfRecord> &Data) = 0; + virtual void advanceToNextKey() = 0; + virtual bool atEnd() const = 0; + virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; + virtual uint64_t getVersion() const = 0; + virtual bool isIRLevelProfile() const = 0; + virtual Error populateSymtab(InstrProfSymtab &) = 0; +}; + +using OnDiskHashTableImplV3 = + OnDiskIterableChainedHashTable<InstrProfLookupTrait>; + +template <typename HashTableImpl> +class InstrProfReaderItaniumRemapper; + +template <typename HashTableImpl> +class InstrProfReaderIndex : public InstrProfReaderIndexBase { +private: + std::unique_ptr<HashTableImpl> HashTable; + typename HashTableImpl::data_iterator RecordIterator; + uint64_t FormatVersion; + + friend class InstrProfReaderItaniumRemapper<HashTableImpl>; + +public: + InstrProfReaderIndex(const unsigned char *Buckets, + const unsigned char *const Payload, + const unsigned char *const Base, + IndexedInstrProf::HashT HashType, uint64_t Version); + ~InstrProfReaderIndex() override = default; + + Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override; + Error getRecords(StringRef FuncName, + ArrayRef<NamedInstrProfRecord> &Data) override; + void advanceToNextKey() override { RecordIterator++; } + + bool atEnd() const override { + return RecordIterator == HashTable->data_end(); + } + + void setValueProfDataEndianness(support::endianness Endianness) override { + HashTable->getInfoObj().setValueProfDataEndianness(Endianness); + } + + uint64_t getVersion() const override { return GET_VERSION(FormatVersion); } + + bool isIRLevelProfile() const override { + return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; + } + + Error populateSymtab(InstrProfSymtab &Symtab) override { + return Symtab.create(HashTable->keys()); + } +}; + +/// Name matcher supporting fuzzy matching of symbol names to names in profiles. +class InstrProfReaderRemapper { +public: + virtual ~InstrProfReaderRemapper() {} + virtual Error populateRemappings() { return Error::success(); } + virtual Error getRecords(StringRef FuncName, + ArrayRef<NamedInstrProfRecord> &Data) = 0; +}; + +/// Reader for the indexed binary instrprof format. +class IndexedInstrProfReader : public InstrProfReader { +private: + /// The profile data file contents. + std::unique_ptr<MemoryBuffer> DataBuffer; + /// The profile remapping file contents. + std::unique_ptr<MemoryBuffer> RemappingBuffer; + /// The index into the profile data. + std::unique_ptr<InstrProfReaderIndexBase> Index; + /// The profile remapping file contents. + std::unique_ptr<InstrProfReaderRemapper> Remapper; + /// Profile summary data. + std::unique_ptr<ProfileSummary> Summary; + // Index to the current record in the record array. + unsigned RecordIndex; + + // Read the profile summary. Return a pointer pointing to one byte past the + // end of the summary data if it exists or the input \c Cur. + const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, + const unsigned char *Cur); + +public: + IndexedInstrProfReader( + std::unique_ptr<MemoryBuffer> DataBuffer, + std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr) + : DataBuffer(std::move(DataBuffer)), + RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} + IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; + IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; + + /// Return the profile version. + uint64_t getVersion() const { return Index->getVersion(); } + bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } + + /// Return true if the given buffer is in an indexed instrprof format. + static bool hasFormat(const MemoryBuffer &DataBuffer); + + /// Read the file header. + Error readHeader() override; + /// Read a single record. + Error readNextRecord(NamedInstrProfRecord &Record) override; + + /// Return the NamedInstrProfRecord associated with FuncName and FuncHash + Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, + uint64_t FuncHash); + + /// Fill Counts with the profile data for the given function name. + Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, + std::vector<uint64_t> &Counts); + + /// Return the maximum of all known function counts. + uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); } + + /// Factory method to create an indexed reader. + static Expected<std::unique_ptr<IndexedInstrProfReader>> + create(const Twine &Path, const Twine &RemappingPath = ""); + + static Expected<std::unique_ptr<IndexedInstrProfReader>> + create(std::unique_ptr<MemoryBuffer> Buffer, + std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr); + + // Used for testing purpose only. + void setValueProfDataEndianness(support::endianness Endianness) { + Index->setValueProfDataEndianness(Endianness); + } + + // See description in the base class. This interface is designed + // to be used by llvm-profdata (for dumping). Avoid using this when + // the client is the compiler. + InstrProfSymtab &getSymtab() override; + ProfileSummary &getSummary() { return *(Summary.get()); } +}; + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_INSTRPROFREADER_H diff --git a/clang-r353983e/include/llvm/ProfileData/InstrProfWriter.h b/clang-r353983e/include/llvm/ProfileData/InstrProfWriter.h new file mode 100644 index 00000000..b0ab31dd --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/InstrProfWriter.h @@ -0,0 +1,101 @@ +//===- InstrProfWriter.h - Instrumented profiling writer --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing profiling data for instrumentation +// based PGO and coverage. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_INSTRPROFWRITER_H +#define LLVM_PROFILEDATA_INSTRPROFWRITER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cstdint> +#include <memory> + +namespace llvm { + +/// Writer for instrumentation based profile data. +class InstrProfRecordWriterTrait; +class ProfOStream; +class raw_fd_ostream; + +class InstrProfWriter { +public: + using ProfilingData = SmallDenseMap<uint64_t, InstrProfRecord>; + enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel }; + +private: + bool Sparse; + StringMap<ProfilingData> FunctionData; + ProfKind ProfileKind = PF_Unknown; + // Use raw pointer here for the incomplete type object. + InstrProfRecordWriterTrait *InfoObj; + +public: + InstrProfWriter(bool Sparse = false); + ~InstrProfWriter(); + + /// Add function counts for the given function. If there are already counts + /// for this function and the hash and number of counts match, each counter is + /// summed. Optionally scale counts by \p Weight. + void addRecord(NamedInstrProfRecord &&I, uint64_t Weight, + function_ref<void(Error)> Warn); + void addRecord(NamedInstrProfRecord &&I, function_ref<void(Error)> Warn) { + addRecord(std::move(I), 1, Warn); + } + + /// Merge existing function counts from the given writer. + void mergeRecordsFromWriter(InstrProfWriter &&IPW, + function_ref<void(Error)> Warn); + + /// Write the profile to \c OS + void write(raw_fd_ostream &OS); + + /// Write the profile in text format to \c OS + Error writeText(raw_fd_ostream &OS); + + /// Write \c Record in text format to \c OS + static void writeRecordInText(StringRef Name, uint64_t Hash, + const InstrProfRecord &Counters, + InstrProfSymtab &Symtab, raw_fd_ostream &OS); + + /// Write the profile, returning the raw data. For testing. + std::unique_ptr<MemoryBuffer> writeBuffer(); + + /// Set the ProfileKind. Report error if mixing FE and IR level profiles. + Error setIsIRLevelProfile(bool IsIRLevel) { + if (ProfileKind == PF_Unknown) { + ProfileKind = IsIRLevel ? PF_IRLevel: PF_FE; + return Error::success(); + } + return (IsIRLevel == (ProfileKind == PF_IRLevel)) + ? Error::success() + : make_error<InstrProfError>( + instrprof_error::unsupported_version); + } + + // Internal interface for testing purpose only. + void setValueProfDataEndianness(support::endianness Endianness); + void setOutputSparse(bool Sparse); + +private: + void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I, + uint64_t Weight, function_ref<void(Error)> Warn); + bool shouldEncodeData(const ProfilingData &PD); + void writeImpl(ProfOStream &OS); +}; + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_INSTRPROFWRITER_H diff --git a/clang-r353983e/include/llvm/ProfileData/ProfileCommon.h b/clang-r353983e/include/llvm/ProfileData/ProfileCommon.h new file mode 100644 index 00000000..b52f8864 --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/ProfileCommon.h @@ -0,0 +1,101 @@ +//===- ProfileCommon.h - Common profiling APIs. -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains data structures and functions common to both instrumented +// and sample profiling. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_PROFILECOMMON_H +#define LLVM_PROFILEDATA_PROFILECOMMON_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Error.h" +#include <algorithm> +#include <cstdint> +#include <functional> +#include <map> +#include <memory> +#include <vector> + +namespace llvm { + +namespace sampleprof { + +class FunctionSamples; + +} // end namespace sampleprof + +inline const char *getHotSectionPrefix() { return ".hot"; } +inline const char *getUnlikelySectionPrefix() { return ".unlikely"; } + +class ProfileSummaryBuilder { +private: + /// We keep track of the number of times a count (block count or samples) + /// appears in the profile. The map is kept sorted in the descending order of + /// counts. + std::map<uint64_t, uint32_t, std::greater<uint64_t>> CountFrequencies; + std::vector<uint32_t> DetailedSummaryCutoffs; + +protected: + SummaryEntryVector DetailedSummary; + uint64_t TotalCount = 0; + uint64_t MaxCount = 0; + uint64_t MaxFunctionCount = 0; + uint32_t NumCounts = 0; + uint32_t NumFunctions = 0; + + ProfileSummaryBuilder(std::vector<uint32_t> Cutoffs) + : DetailedSummaryCutoffs(std::move(Cutoffs)) {} + ~ProfileSummaryBuilder() = default; + + inline void addCount(uint64_t Count); + void computeDetailedSummary(); + +public: + /// A vector of useful cutoff values for detailed summary. + static const ArrayRef<uint32_t> DefaultCutoffs; +}; + +class InstrProfSummaryBuilder final : public ProfileSummaryBuilder { + uint64_t MaxInternalBlockCount = 0; + + inline void addEntryCount(uint64_t Count); + inline void addInternalCount(uint64_t Count); + +public: + InstrProfSummaryBuilder(std::vector<uint32_t> Cutoffs) + : ProfileSummaryBuilder(std::move(Cutoffs)) {} + + void addRecord(const InstrProfRecord &); + std::unique_ptr<ProfileSummary> getSummary(); +}; + +class SampleProfileSummaryBuilder final : public ProfileSummaryBuilder { +public: + SampleProfileSummaryBuilder(std::vector<uint32_t> Cutoffs) + : ProfileSummaryBuilder(std::move(Cutoffs)) {} + + void addRecord(const sampleprof::FunctionSamples &FS); + std::unique_ptr<ProfileSummary> getSummary(); +}; + +/// This is called when a count is seen in the profile. +void ProfileSummaryBuilder::addCount(uint64_t Count) { + TotalCount += Count; + if (Count > MaxCount) + MaxCount = Count; + NumCounts++; + CountFrequencies[Count]++; +} + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_PROFILECOMMON_H diff --git a/clang-r353983e/include/llvm/ProfileData/SampleProf.h b/clang-r353983e/include/llvm/ProfileData/SampleProf.h new file mode 100644 index 00000000..ca3e2dec --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/SampleProf.h @@ -0,0 +1,564 @@ +//===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains common definitions used in the reading and writing of +// sample profile data. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_SAMPLEPROF_H +#define LLVM_PROFILEDATA_SAMPLEPROF_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <cstdint> +#include <map> +#include <string> +#include <system_error> +#include <utility> + +namespace llvm { + +class raw_ostream; + +const std::error_category &sampleprof_category(); + +enum class sampleprof_error { + success = 0, + bad_magic, + unsupported_version, + too_large, + truncated, + malformed, + unrecognized_format, + unsupported_writing_format, + truncated_name_table, + not_implemented, + counter_overflow, + ostream_seek_unsupported +}; + +inline std::error_code make_error_code(sampleprof_error E) { + return std::error_code(static_cast<int>(E), sampleprof_category()); +} + +inline sampleprof_error MergeResult(sampleprof_error &Accumulator, + sampleprof_error Result) { + // Prefer first error encountered as later errors may be secondary effects of + // the initial problem. + if (Accumulator == sampleprof_error::success && + Result != sampleprof_error::success) + Accumulator = Result; + return Accumulator; +} + +} // end namespace llvm + +namespace std { + +template <> +struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {}; + +} // end namespace std + +namespace llvm { +namespace sampleprof { + +enum SampleProfileFormat { + SPF_None = 0, + SPF_Text = 0x1, + SPF_Compact_Binary = 0x2, + SPF_GCC = 0x3, + SPF_Binary = 0xff +}; + +static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { + return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | + uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | + uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) | + uint64_t('2') << (64 - 56) | uint64_t(Format); +} + +// Get the proper representation of a string in the input Format. +static inline StringRef getRepInFormat(StringRef Name, + SampleProfileFormat Format, + std::string &GUIDBuf) { + if (Name.empty()) + return Name; + GUIDBuf = std::to_string(Function::getGUID(Name)); + return (Format == SPF_Compact_Binary) ? StringRef(GUIDBuf) : Name; +} + +static inline uint64_t SPVersion() { return 103; } + +/// Represents the relative location of an instruction. +/// +/// Instruction locations are specified by the line offset from the +/// beginning of the function (marked by the line where the function +/// header is) and the discriminator value within that line. +/// +/// The discriminator value is useful to distinguish instructions +/// that are on the same line but belong to different basic blocks +/// (e.g., the two post-increment instructions in "if (p) x++; else y++;"). +struct LineLocation { + LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {} + + void print(raw_ostream &OS) const; + void dump() const; + + bool operator<(const LineLocation &O) const { + return LineOffset < O.LineOffset || + (LineOffset == O.LineOffset && Discriminator < O.Discriminator); + } + + uint32_t LineOffset; + uint32_t Discriminator; +}; + +raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); + +/// Representation of a single sample record. +/// +/// A sample record is represented by a positive integer value, which +/// indicates how frequently was the associated line location executed. +/// +/// Additionally, if the associated location contains a function call, +/// the record will hold a list of all the possible called targets. For +/// direct calls, this will be the exact function being invoked. For +/// indirect calls (function pointers, virtual table dispatch), this +/// will be a list of one or more functions. +class SampleRecord { +public: + using CallTargetMap = StringMap<uint64_t>; + + SampleRecord() = default; + + /// Increment the number of samples for this record by \p S. + /// Optionally scale sample count \p S by \p Weight. + /// + /// Sample counts accumulate using saturating arithmetic, to avoid wrapping + /// around unsigned integers. + sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) { + bool Overflowed; + NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; + } + + /// Add called function \p F with samples \p S. + /// Optionally scale sample count \p S by \p Weight. + /// + /// Sample counts accumulate using saturating arithmetic, to avoid wrapping + /// around unsigned integers. + sampleprof_error addCalledTarget(StringRef F, uint64_t S, + uint64_t Weight = 1) { + uint64_t &TargetSamples = CallTargets[F]; + bool Overflowed; + TargetSamples = + SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; + } + + /// Return true if this sample record contains function calls. + bool hasCalls() const { return !CallTargets.empty(); } + + uint64_t getSamples() const { return NumSamples; } + const CallTargetMap &getCallTargets() const { return CallTargets; } + + /// Merge the samples in \p Other into this record. + /// Optionally scale sample counts by \p Weight. + sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) { + sampleprof_error Result = addSamples(Other.getSamples(), Weight); + for (const auto &I : Other.getCallTargets()) { + MergeResult(Result, addCalledTarget(I.first(), I.second, Weight)); + } + return Result; + } + + void print(raw_ostream &OS, unsigned Indent) const; + void dump() const; + +private: + uint64_t NumSamples = 0; + CallTargetMap CallTargets; +}; + +raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); + +class FunctionSamples; + +using BodySampleMap = std::map<LineLocation, SampleRecord>; +// NOTE: Using a StringMap here makes parsed profiles consume around 17% more +// memory, which is *very* significant for large profiles. +using FunctionSamplesMap = std::map<std::string, FunctionSamples>; +using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>; + +/// Representation of the samples collected for a function. +/// +/// This data structure contains all the collected samples for the body +/// of a function. Each sample corresponds to a LineLocation instance +/// within the body of the function. +class FunctionSamples { +public: + FunctionSamples() = default; + + void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; + void dump() const; + + sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) { + bool Overflowed; + TotalSamples = + SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; + } + + sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { + bool Overflowed; + TotalHeadSamples = + SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed); + return Overflowed ? sampleprof_error::counter_overflow + : sampleprof_error::success; + } + + sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, + uint64_t Num, uint64_t Weight = 1) { + return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples( + Num, Weight); + } + + sampleprof_error addCalledTargetSamples(uint32_t LineOffset, + uint32_t Discriminator, + StringRef FName, uint64_t Num, + uint64_t Weight = 1) { + return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( + FName, Num, Weight); + } + + /// Return the number of samples collected at the given location. + /// Each location is specified by \p LineOffset and \p Discriminator. + /// If the location is not found in profile, return error. + ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset, + uint32_t Discriminator) const { + const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); + if (ret == BodySamples.end()) + return std::error_code(); + else + return ret->second.getSamples(); + } + + /// Returns the call target map collected at a given location. + /// Each location is specified by \p LineOffset and \p Discriminator. + /// If the location is not found in profile, return error. + ErrorOr<SampleRecord::CallTargetMap> + findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const { + const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); + if (ret == BodySamples.end()) + return std::error_code(); + return ret->second.getCallTargets(); + } + + /// Return the function samples at the given callsite location. + FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { + return CallsiteSamples[Loc]; + } + + /// Returns the FunctionSamplesMap at the given \p Loc. + const FunctionSamplesMap * + findFunctionSamplesMapAt(const LineLocation &Loc) const { + auto iter = CallsiteSamples.find(Loc); + if (iter == CallsiteSamples.end()) + return nullptr; + return &iter->second; + } + + /// Returns a pointer to FunctionSamples at the given callsite location \p Loc + /// with callee \p CalleeName. If no callsite can be found, relax the + /// restriction to return the FunctionSamples at callsite location \p Loc + /// with the maximum total sample count. + const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc, + StringRef CalleeName) const { + std::string CalleeGUID; + CalleeName = getRepInFormat(CalleeName, Format, CalleeGUID); + + auto iter = CallsiteSamples.find(Loc); + if (iter == CallsiteSamples.end()) + return nullptr; + auto FS = iter->second.find(CalleeName); + if (FS != iter->second.end()) + return &FS->second; + // If we cannot find exact match of the callee name, return the FS with + // the max total count. + uint64_t MaxTotalSamples = 0; + const FunctionSamples *R = nullptr; + for (const auto &NameFS : iter->second) + if (NameFS.second.getTotalSamples() >= MaxTotalSamples) { + MaxTotalSamples = NameFS.second.getTotalSamples(); + R = &NameFS.second; + } + return R; + } + + bool empty() const { return TotalSamples == 0; } + + /// Return the total number of samples collected inside the function. + uint64_t getTotalSamples() const { return TotalSamples; } + + /// Return the total number of branch samples that have the function as the + /// branch target. This should be equivalent to the sample of the first + /// instruction of the symbol. But as we directly get this info for raw + /// profile without referring to potentially inaccurate debug info, this + /// gives more accurate profile data and is preferred for standalone symbols. + uint64_t getHeadSamples() const { return TotalHeadSamples; } + + /// Return the sample count of the first instruction of the function. + /// The function can be either a standalone symbol or an inlined function. + uint64_t getEntrySamples() const { + // Use either BodySamples or CallsiteSamples which ever has the smaller + // lineno. + if (!BodySamples.empty() && + (CallsiteSamples.empty() || + BodySamples.begin()->first < CallsiteSamples.begin()->first)) + return BodySamples.begin()->second.getSamples(); + if (!CallsiteSamples.empty()) { + uint64_t T = 0; + // An indirect callsite may be promoted to several inlined direct calls. + // We need to get the sum of them. + for (const auto &N_FS : CallsiteSamples.begin()->second) + T += N_FS.second.getEntrySamples(); + return T; + } + return 0; + } + + /// Return all the samples collected in the body of the function. + const BodySampleMap &getBodySamples() const { return BodySamples; } + + /// Return all the callsite samples collected in the body of the function. + const CallsiteSampleMap &getCallsiteSamples() const { + return CallsiteSamples; + } + + /// Merge the samples in \p Other into this one. + /// Optionally scale samples by \p Weight. + sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { + sampleprof_error Result = sampleprof_error::success; + Name = Other.getName(); + MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight)); + MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight)); + for (const auto &I : Other.getBodySamples()) { + const LineLocation &Loc = I.first; + const SampleRecord &Rec = I.second; + MergeResult(Result, BodySamples[Loc].merge(Rec, Weight)); + } + for (const auto &I : Other.getCallsiteSamples()) { + const LineLocation &Loc = I.first; + FunctionSamplesMap &FSMap = functionSamplesAt(Loc); + for (const auto &Rec : I.second) + MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight)); + } + return Result; + } + + /// Recursively traverses all children, if the total sample count of the + /// corresponding function is no less than \p Threshold, add its corresponding + /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID + /// to \p S. + void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M, + uint64_t Threshold) const { + if (TotalSamples <= Threshold) + return; + S.insert(getGUID(Name)); + // Import hot CallTargets, which may not be available in IR because full + // profile annotation cannot be done until backend compilation in ThinLTO. + for (const auto &BS : BodySamples) + for (const auto &TS : BS.second.getCallTargets()) + if (TS.getValue() > Threshold) { + const Function *Callee = + M->getFunction(getNameInModule(TS.getKey(), M)); + if (!Callee || !Callee->getSubprogram()) + S.insert(getGUID(TS.getKey())); + } + for (const auto &CS : CallsiteSamples) + for (const auto &NameFS : CS.second) + NameFS.second.findInlinedFunctions(S, M, Threshold); + } + + /// Set the name of the function. + void setName(StringRef FunctionName) { Name = FunctionName; } + + /// Return the function name. + StringRef getName() const { return Name; } + + /// Return the original function name if it exists in Module \p M. + StringRef getFuncNameInModule(const Module *M) const { + return getNameInModule(Name, M); + } + + /// Translate \p Name into its original name in Module. + /// When the Format is not SPF_Compact_Binary, \p Name needs no translation. + /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples + /// is actually GUID of the original function name. getNameInModule will + /// translate \p Name in current FunctionSamples into its original name. + /// If the original name doesn't exist in \p M, return empty StringRef. + StringRef getNameInModule(StringRef Name, const Module *M) const { + if (Format != SPF_Compact_Binary) + return Name; + // Expect CurrentModule to be initialized by GUIDToFuncNameMapper. + if (M != CurrentModule) + llvm_unreachable("Input Module should be the same as CurrentModule"); + auto iter = GUIDToFuncNameMap.find(std::stoull(Name.data())); + if (iter == GUIDToFuncNameMap.end()) + return StringRef(); + return iter->second; + } + + /// Returns the line offset to the start line of the subprogram. + /// We assume that a single function will not exceed 65535 LOC. + static unsigned getOffset(const DILocation *DIL); + + /// Get the FunctionSamples of the inline instance where DIL originates + /// from. + /// + /// The FunctionSamples of the instruction (Machine or IR) associated to + /// \p DIL is the inlined instance in which that instruction is coming from. + /// We traverse the inline stack of that instruction, and match it with the + /// tree nodes in the profile. + /// + /// \returns the FunctionSamples pointer to the inlined instance. + const FunctionSamples *findFunctionSamples(const DILocation *DIL) const; + + static SampleProfileFormat Format; + /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for + /// all the function symbols defined or declared in CurrentModule. + static DenseMap<uint64_t, StringRef> GUIDToFuncNameMap; + static Module *CurrentModule; + + class GUIDToFuncNameMapper { + public: + GUIDToFuncNameMapper(Module &M) { + if (Format != SPF_Compact_Binary) + return; + + for (const auto &F : M) { + StringRef OrigName = F.getName(); + GUIDToFuncNameMap.insert({Function::getGUID(OrigName), OrigName}); + /// Local to global var promotion used by optimization like thinlto + /// will rename the var and add suffix like ".llvm.xxx" to the + /// original local name. In sample profile, the suffixes of function + /// names are all stripped. Since it is possible that the mapper is + /// built in post-thin-link phase and var promotion has been done, + /// we need to add the substring of function name without the suffix + /// into the GUIDToFuncNameMap. + auto pos = OrigName.find('.'); + if (pos != StringRef::npos) { + StringRef NewName = OrigName.substr(0, pos); + GUIDToFuncNameMap.insert({Function::getGUID(NewName), NewName}); + } + } + CurrentModule = &M; + } + + ~GUIDToFuncNameMapper() { + if (Format != SPF_Compact_Binary) + return; + + GUIDToFuncNameMap.clear(); + CurrentModule = nullptr; + } + }; + + // Assume the input \p Name is a name coming from FunctionSamples itself. + // If the format is SPF_Compact_Binary, the name is already a GUID and we + // don't want to return the GUID of GUID. + static uint64_t getGUID(StringRef Name) { + return (Format == SPF_Compact_Binary) ? std::stoull(Name.data()) + : Function::getGUID(Name); + } + +private: + /// Mangled name of the function. + StringRef Name; + + /// Total number of samples collected inside this function. + /// + /// Samples are cumulative, they include all the samples collected + /// inside this function and all its inlined callees. + uint64_t TotalSamples = 0; + + /// Total number of samples collected at the head of the function. + /// This is an approximation of the number of calls made to this function + /// at runtime. + uint64_t TotalHeadSamples = 0; + + /// Map instruction locations to collected samples. + /// + /// Each entry in this map contains the number of samples + /// collected at the corresponding line offset. All line locations + /// are an offset from the start of the function. + BodySampleMap BodySamples; + + /// Map call sites to collected samples for the called function. + /// + /// Each entry in this map corresponds to all the samples + /// collected for the inlined function call at the given + /// location. For example, given: + /// + /// void foo() { + /// 1 bar(); + /// ... + /// 8 baz(); + /// } + /// + /// If the bar() and baz() calls were inlined inside foo(), this + /// map will contain two entries. One for all the samples collected + /// in the call to bar() at line offset 1, the other for all the samples + /// collected in the call to baz() at line offset 8. + CallsiteSampleMap CallsiteSamples; +}; + +raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); + +/// Sort a LocationT->SampleT map by LocationT. +/// +/// It produces a sorted list of <LocationT, SampleT> records by ascending +/// order of LocationT. +template <class LocationT, class SampleT> class SampleSorter { +public: + using SamplesWithLoc = std::pair<const LocationT, SampleT>; + using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>; + + SampleSorter(const std::map<LocationT, SampleT> &Samples) { + for (const auto &I : Samples) + V.push_back(&I); + std::stable_sort(V.begin(), V.end(), + [](const SamplesWithLoc *A, const SamplesWithLoc *B) { + return A->first < B->first; + }); + } + + const SamplesWithLocList &get() const { return V; } + +private: + SamplesWithLocList V; +}; + +} // end namespace sampleprof +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_SAMPLEPROF_H diff --git a/clang-r353983e/include/llvm/ProfileData/SampleProfReader.h b/clang-r353983e/include/llvm/ProfileData/SampleProfReader.h new file mode 100644 index 00000000..32b8f91d --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/SampleProfReader.h @@ -0,0 +1,581 @@ +//===- SampleProfReader.h - Read LLVM sample profile data -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions needed for reading sample profiles. +// +// NOTE: If you are making changes to this file format, please remember +// to document them in the Clang documentation at +// tools/clang/docs/UsersManual.rst. +// +// Text format +// ----------- +// +// Sample profiles are written as ASCII text. The file is divided into +// sections, which correspond to each of the functions executed at runtime. +// Each section has the following format +// +// function1:total_samples:total_head_samples +// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] +// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] +// ... +// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] +// offsetA[.discriminator]: fnA:num_of_total_samples +// offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ] +// ... +// +// This is a nested tree in which the identation represents the nesting level +// of the inline stack. There are no blank lines in the file. And the spacing +// within a single line is fixed. Additional spaces will result in an error +// while reading the file. +// +// Any line starting with the '#' character is completely ignored. +// +// Inlined calls are represented with indentation. The Inline stack is a +// stack of source locations in which the top of the stack represents the +// leaf function, and the bottom of the stack represents the actual +// symbol to which the instruction belongs. +// +// Function names must be mangled in order for the profile loader to +// match them in the current translation unit. The two numbers in the +// function header specify how many total samples were accumulated in the +// function (first number), and the total number of samples accumulated +// in the prologue of the function (second number). This head sample +// count provides an indicator of how frequently the function is invoked. +// +// There are two types of lines in the function body. +// +// * Sampled line represents the profile information of a source location. +// * Callsite line represents the profile information of a callsite. +// +// Each sampled line may contain several items. Some are optional (marked +// below): +// +// a. Source line offset. This number represents the line number +// in the function where the sample was collected. The line number is +// always relative to the line where symbol of the function is +// defined. So, if the function has its header at line 280, the offset +// 13 is at line 293 in the file. +// +// Note that this offset should never be a negative number. This could +// happen in cases like macros. The debug machinery will register the +// line number at the point of macro expansion. So, if the macro was +// expanded in a line before the start of the function, the profile +// converter should emit a 0 as the offset (this means that the optimizers +// will not be able to associate a meaningful weight to the instructions +// in the macro). +// +// b. [OPTIONAL] Discriminator. This is used if the sampled program +// was compiled with DWARF discriminator support +// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators). +// DWARF discriminators are unsigned integer values that allow the +// compiler to distinguish between multiple execution paths on the +// same source line location. +// +// For example, consider the line of code ``if (cond) foo(); else bar();``. +// If the predicate ``cond`` is true 80% of the time, then the edge +// into function ``foo`` should be considered to be taken most of the +// time. But both calls to ``foo`` and ``bar`` are at the same source +// line, so a sample count at that line is not sufficient. The +// compiler needs to know which part of that line is taken more +// frequently. +// +// This is what discriminators provide. In this case, the calls to +// ``foo`` and ``bar`` will be at the same line, but will have +// different discriminator values. This allows the compiler to correctly +// set edge weights into ``foo`` and ``bar``. +// +// c. Number of samples. This is an integer quantity representing the +// number of samples collected by the profiler at this source +// location. +// +// d. [OPTIONAL] Potential call targets and samples. If present, this +// line contains a call instruction. This models both direct and +// number of samples. For example, +// +// 130: 7 foo:3 bar:2 baz:7 +// +// The above means that at relative line offset 130 there is a call +// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``, +// with ``baz()`` being the relatively more frequently called target. +// +// Each callsite line may contain several items. Some are optional. +// +// a. Source line offset. This number represents the line number of the +// callsite that is inlined in the profiled binary. +// +// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line. +// +// c. Number of samples. This is an integer quantity representing the +// total number of samples collected for the inlined instance at this +// callsite +// +// +// Binary format +// ------------- +// +// This is a more compact encoding. Numbers are encoded as ULEB128 values +// and all strings are encoded in a name table. The file is organized in +// the following sections: +// +// MAGIC (uint64_t) +// File identifier computed by function SPMagic() (0x5350524f463432ff) +// +// VERSION (uint32_t) +// File format version number computed by SPVersion() +// +// SUMMARY +// TOTAL_COUNT (uint64_t) +// Total number of samples in the profile. +// MAX_COUNT (uint64_t) +// Maximum value of samples on a line. +// MAX_FUNCTION_COUNT (uint64_t) +// Maximum number of samples at function entry (head samples). +// NUM_COUNTS (uint64_t) +// Number of lines with samples. +// NUM_FUNCTIONS (uint64_t) +// Number of functions with samples. +// NUM_DETAILED_SUMMARY_ENTRIES (size_t) +// Number of entries in detailed summary +// DETAILED_SUMMARY +// A list of detailed summary entry. Each entry consists of +// CUTOFF (uint32_t) +// Required percentile of total sample count expressed as a fraction +// multiplied by 1000000. +// MIN_COUNT (uint64_t) +// The minimum number of samples required to reach the target +// CUTOFF. +// NUM_COUNTS (uint64_t) +// Number of samples to get to the desrired percentile. +// +// NAME TABLE +// SIZE (uint32_t) +// Number of entries in the name table. +// NAMES +// A NUL-separated list of SIZE strings. +// +// FUNCTION BODY (one for each uninlined function body present in the profile) +// HEAD_SAMPLES (uint64_t) [only for top-level functions] +// Total number of samples collected at the head (prologue) of the +// function. +// NOTE: This field should only be present for top-level functions +// (i.e., not inlined into any caller). Inlined function calls +// have no prologue, so they don't need this. +// NAME_IDX (uint32_t) +// Index into the name table indicating the function name. +// SAMPLES (uint64_t) +// Total number of samples collected in this function. +// NRECS (uint32_t) +// Total number of sampling records this function's profile. +// BODY RECORDS +// A list of NRECS entries. Each entry contains: +// OFFSET (uint32_t) +// Line offset from the start of the function. +// DISCRIMINATOR (uint32_t) +// Discriminator value (see description of discriminators +// in the text format documentation above). +// SAMPLES (uint64_t) +// Number of samples collected at this location. +// NUM_CALLS (uint32_t) +// Number of non-inlined function calls made at this location. In the +// case of direct calls, this number will always be 1. For indirect +// calls (virtual functions and function pointers) this will +// represent all the actual functions called at runtime. +// CALL_TARGETS +// A list of NUM_CALLS entries for each called function: +// NAME_IDX (uint32_t) +// Index into the name table with the callee name. +// SAMPLES (uint64_t) +// Number of samples collected at the call site. +// NUM_INLINED_FUNCTIONS (uint32_t) +// Number of callees inlined into this function. +// INLINED FUNCTION RECORDS +// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined +// callees. +// OFFSET (uint32_t) +// Line offset from the start of the function. +// DISCRIMINATOR (uint32_t) +// Discriminator value (see description of discriminators +// in the text format documentation above). +// FUNCTION BODY +// A FUNCTION BODY entry describing the inlined function. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H +#define LLVM_PROFILEDATA_SAMPLEPROFREADER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/GCOV.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SymbolRemappingReader.h" +#include <algorithm> +#include <cstdint> +#include <memory> +#include <string> +#include <system_error> +#include <vector> + +namespace llvm { + +class raw_ostream; + +namespace sampleprof { + +/// Sample-based profile reader. +/// +/// Each profile contains sample counts for all the functions +/// executed. Inside each function, statements are annotated with the +/// collected samples on all the instructions associated with that +/// statement. +/// +/// For this to produce meaningful data, the program needs to be +/// compiled with some debug information (at minimum, line numbers: +/// -gline-tables-only). Otherwise, it will be impossible to match IR +/// instructions to the line numbers collected by the profiler. +/// +/// From the profile file, we are interested in collecting the +/// following information: +/// +/// * A list of functions included in the profile (mangled names). +/// +/// * For each function F: +/// 1. The total number of samples collected in F. +/// +/// 2. The samples collected at each line in F. To provide some +/// protection against source code shuffling, line numbers should +/// be relative to the start of the function. +/// +/// The reader supports two file formats: text and binary. The text format +/// is useful for debugging and testing, while the binary format is more +/// compact and I/O efficient. They can both be used interchangeably. +class SampleProfileReader { +public: + SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C, + SampleProfileFormat Format = SPF_None) + : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {} + + virtual ~SampleProfileReader() = default; + + /// Read and validate the file header. + virtual std::error_code readHeader() = 0; + + /// Read sample profiles from the associated file. + virtual std::error_code read() = 0; + + /// Print the profile for \p FName on stream \p OS. + void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs()); + + virtual void collectFuncsToUse(const Module &M) {} + + /// Print all the profiles on stream \p OS. + void dump(raw_ostream &OS = dbgs()); + + /// Return the samples collected for function \p F. + FunctionSamples *getSamplesFor(const Function &F) { + // The function name may have been updated by adding suffix. In sample + // profile, the function names are all stripped, so we need to strip + // the function name suffix before matching with profile. + return getSamplesFor(F.getName().split('.').first); + } + + /// Return the samples collected for function \p F. + virtual FunctionSamples *getSamplesFor(StringRef Fname) { + std::string FGUID; + Fname = getRepInFormat(Fname, getFormat(), FGUID); + auto It = Profiles.find(Fname); + if (It != Profiles.end()) + return &It->second; + return nullptr; + } + + /// Return all the profiles. + StringMap<FunctionSamples> &getProfiles() { return Profiles; } + + /// Report a parse error message. + void reportError(int64_t LineNumber, Twine Msg) const { + Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(), + LineNumber, Msg)); + } + + /// Create a sample profile reader appropriate to the file format. + static ErrorOr<std::unique_ptr<SampleProfileReader>> + create(const Twine &Filename, LLVMContext &C); + + /// Create a sample profile reader from the supplied memory buffer. + static ErrorOr<std::unique_ptr<SampleProfileReader>> + create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C); + + /// Return the profile summary. + ProfileSummary &getSummary() { return *(Summary.get()); } + + /// \brief Return the profile format. + SampleProfileFormat getFormat() { return Format; } + +protected: + /// Map every function to its associated profile. + /// + /// The profile of every function executed at runtime is collected + /// in the structure FunctionSamples. This maps function objects + /// to their corresponding profiles. + StringMap<FunctionSamples> Profiles; + + /// LLVM context used to emit diagnostics. + LLVMContext &Ctx; + + /// Memory buffer holding the profile file. + std::unique_ptr<MemoryBuffer> Buffer; + + /// Profile summary information. + std::unique_ptr<ProfileSummary> Summary; + + /// Take ownership of the summary of this reader. + static std::unique_ptr<ProfileSummary> + takeSummary(SampleProfileReader &Reader) { + return std::move(Reader.Summary); + } + + /// Compute summary for this profile. + void computeSummary(); + + /// \brief The format of sample. + SampleProfileFormat Format = SPF_None; +}; + +class SampleProfileReaderText : public SampleProfileReader { +public: + SampleProfileReaderText(std::unique_ptr<MemoryBuffer> B, LLVMContext &C) + : SampleProfileReader(std::move(B), C, SPF_Text) {} + + /// Read and validate the file header. + std::error_code readHeader() override { return sampleprof_error::success; } + + /// Read sample profiles from the associated file. + std::error_code read() override; + + /// Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); +}; + +class SampleProfileReaderBinary : public SampleProfileReader { +public: + SampleProfileReaderBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C, + SampleProfileFormat Format = SPF_None) + : SampleProfileReader(std::move(B), C, Format) {} + + /// Read and validate the file header. + virtual std::error_code readHeader() override; + + /// Read sample profiles from the associated file. + std::error_code read() override; + +protected: + /// Read a numeric value of type T from the profile. + /// + /// If an error occurs during decoding, a diagnostic message is emitted and + /// EC is set. + /// + /// \returns the read value. + template <typename T> ErrorOr<T> readNumber(); + + /// Read a numeric value of type T from the profile. The value is saved + /// without encoded. + template <typename T> ErrorOr<T> readUnencodedNumber(); + + /// Read a string from the profile. + /// + /// If an error occurs during decoding, a diagnostic message is emitted and + /// EC is set. + /// + /// \returns the read value. + ErrorOr<StringRef> readString(); + + /// Read the string index and check whether it overflows the table. + template <typename T> inline ErrorOr<uint32_t> readStringIndex(T &Table); + + /// Return true if we've reached the end of file. + bool at_eof() const { return Data >= End; } + + /// Read the next function profile instance. + std::error_code readFuncProfile(); + + /// Read the contents of the given profile instance. + std::error_code readProfile(FunctionSamples &FProfile); + + /// Points to the current location in the buffer. + const uint8_t *Data = nullptr; + + /// Points to the end of the buffer. + const uint8_t *End = nullptr; + +private: + std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries); + virtual std::error_code verifySPMagic(uint64_t Magic) = 0; + + /// Read profile summary. + std::error_code readSummary(); + + /// Read the whole name table. + virtual std::error_code readNameTable() = 0; + + /// Read a string indirectly via the name table. + virtual ErrorOr<StringRef> readStringFromTable() = 0; +}; + +class SampleProfileReaderRawBinary : public SampleProfileReaderBinary { +private: + /// Function name table. + std::vector<StringRef> NameTable; + virtual std::error_code verifySPMagic(uint64_t Magic) override; + virtual std::error_code readNameTable() override; + /// Read a string indirectly via the name table. + virtual ErrorOr<StringRef> readStringFromTable() override; + +public: + SampleProfileReaderRawBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C) + : SampleProfileReaderBinary(std::move(B), C, SPF_Binary) {} + + /// \brief Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); +}; + +class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary { +private: + /// Function name table. + std::vector<std::string> NameTable; + /// The table mapping from function name to the offset of its FunctionSample + /// towards file start. + DenseMap<StringRef, uint64_t> FuncOffsetTable; + /// The set containing the functions to use when compiling a module. + DenseSet<StringRef> FuncsToUse; + virtual std::error_code verifySPMagic(uint64_t Magic) override; + virtual std::error_code readNameTable() override; + /// Read a string indirectly via the name table. + virtual ErrorOr<StringRef> readStringFromTable() override; + virtual std::error_code readHeader() override; + std::error_code readFuncOffsetTable(); + +public: + SampleProfileReaderCompactBinary(std::unique_ptr<MemoryBuffer> B, + LLVMContext &C) + : SampleProfileReaderBinary(std::move(B), C, SPF_Compact_Binary) {} + + /// \brief Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); + + /// Read samples only for functions to use. + std::error_code read() override; + + /// Collect functions to be used when compiling Module \p M. + void collectFuncsToUse(const Module &M) override; +}; + +using InlineCallStack = SmallVector<FunctionSamples *, 10>; + +// Supported histogram types in GCC. Currently, we only need support for +// call target histograms. +enum HistType { + HIST_TYPE_INTERVAL, + HIST_TYPE_POW2, + HIST_TYPE_SINGLE_VALUE, + HIST_TYPE_CONST_DELTA, + HIST_TYPE_INDIR_CALL, + HIST_TYPE_AVERAGE, + HIST_TYPE_IOR, + HIST_TYPE_INDIR_CALL_TOPN +}; + +class SampleProfileReaderGCC : public SampleProfileReader { +public: + SampleProfileReaderGCC(std::unique_ptr<MemoryBuffer> B, LLVMContext &C) + : SampleProfileReader(std::move(B), C, SPF_GCC), + GcovBuffer(Buffer.get()) {} + + /// Read and validate the file header. + std::error_code readHeader() override; + + /// Read sample profiles from the associated file. + std::error_code read() override; + + /// Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); + +protected: + std::error_code readNameTable(); + std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack, + bool Update, uint32_t Offset); + std::error_code readFunctionProfiles(); + std::error_code skipNextWord(); + template <typename T> ErrorOr<T> readNumber(); + ErrorOr<StringRef> readString(); + + /// Read the section tag and check that it's the same as \p Expected. + std::error_code readSectionTag(uint32_t Expected); + + /// GCOV buffer containing the profile. + GCOVBuffer GcovBuffer; + + /// Function names in this profile. + std::vector<std::string> Names; + + /// GCOV tags used to separate sections in the profile file. + static const uint32_t GCOVTagAFDOFileNames = 0xaa000000; + static const uint32_t GCOVTagAFDOFunction = 0xac000000; +}; + +/// A profile data reader proxy that remaps the profile data from another +/// sample profile data reader, by applying a provided set of equivalences +/// between components of the symbol names in the profile. +class SampleProfileReaderItaniumRemapper : public SampleProfileReader { +public: + SampleProfileReaderItaniumRemapper( + std::unique_ptr<MemoryBuffer> B, LLVMContext &C, + std::unique_ptr<SampleProfileReader> Underlying) + : SampleProfileReader(std::move(B), C, Underlying->getFormat()) { + Profiles = std::move(Underlying->getProfiles()); + Summary = takeSummary(*Underlying); + // Keep the underlying reader alive; the profile data may contain + // StringRefs referencing names in its name table. + UnderlyingReader = std::move(Underlying); + } + + /// Create a remapped sample profile from the given remapping file and + /// underlying samples. + static ErrorOr<std::unique_ptr<SampleProfileReader>> + create(const Twine &Filename, LLVMContext &C, + std::unique_ptr<SampleProfileReader> Underlying); + + /// Read and validate the file header. + std::error_code readHeader() override { return sampleprof_error::success; } + + /// Read remapping file and apply it to the sample profile. + std::error_code read() override; + + /// Return the samples collected for function \p F. + FunctionSamples *getSamplesFor(StringRef FunctionName) override; + using SampleProfileReader::getSamplesFor; + +private: + SymbolRemappingReader Remappings; + DenseMap<SymbolRemappingReader::Key, FunctionSamples*> SampleMap; + std::unique_ptr<SampleProfileReader> UnderlyingReader; +}; + +} // end namespace sampleprof + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_SAMPLEPROFREADER_H diff --git a/clang-r353983e/include/llvm/ProfileData/SampleProfWriter.h b/clang-r353983e/include/llvm/ProfileData/SampleProfWriter.h new file mode 100644 index 00000000..81e6e3ab --- /dev/null +++ b/clang-r353983e/include/llvm/ProfileData/SampleProfWriter.h @@ -0,0 +1,193 @@ +//===- SampleProfWriter.h - Write LLVM sample profile data ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions needed for writing sample profiles. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_PROFILEDATA_SAMPLEPROFWRITER_H +#define LLVM_PROFILEDATA_SAMPLEPROFWRITER_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <memory> +#include <set> +#include <system_error> + +namespace llvm { +namespace sampleprof { + +/// Sample-based profile writer. Base class. +class SampleProfileWriter { +public: + virtual ~SampleProfileWriter() = default; + + /// Write sample profiles in \p S. + /// + /// \returns status code of the file update operation. + virtual std::error_code write(const FunctionSamples &S) = 0; + + /// Write all the sample profiles in the given map of samples. + /// + /// \returns status code of the file update operation. + virtual std::error_code write(const StringMap<FunctionSamples> &ProfileMap); + + raw_ostream &getOutputStream() { return *OutputStream; } + + /// Profile writer factory. + /// + /// Create a new file writer based on the value of \p Format. + static ErrorOr<std::unique_ptr<SampleProfileWriter>> + create(StringRef Filename, SampleProfileFormat Format); + + /// Create a new stream writer based on the value of \p Format. + /// For testing. + static ErrorOr<std::unique_ptr<SampleProfileWriter>> + create(std::unique_ptr<raw_ostream> &OS, SampleProfileFormat Format); + +protected: + SampleProfileWriter(std::unique_ptr<raw_ostream> &OS) + : OutputStream(std::move(OS)) {} + + /// Write a file header for the profile file. + virtual std::error_code + writeHeader(const StringMap<FunctionSamples> &ProfileMap) = 0; + + /// Output stream where to emit the profile to. + std::unique_ptr<raw_ostream> OutputStream; + + /// Profile summary. + std::unique_ptr<ProfileSummary> Summary; + + /// Compute summary for this profile. + void computeSummary(const StringMap<FunctionSamples> &ProfileMap); +}; + +/// Sample-based profile writer (text format). +class SampleProfileWriterText : public SampleProfileWriter { +public: + std::error_code write(const FunctionSamples &S) override; + +protected: + SampleProfileWriterText(std::unique_ptr<raw_ostream> &OS) + : SampleProfileWriter(OS), Indent(0) {} + + std::error_code + writeHeader(const StringMap<FunctionSamples> &ProfileMap) override { + return sampleprof_error::success; + } + +private: + /// Indent level to use when writing. + /// + /// This is used when printing inlined callees. + unsigned Indent; + + friend ErrorOr<std::unique_ptr<SampleProfileWriter>> + SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS, + SampleProfileFormat Format); +}; + +/// Sample-based profile writer (binary format). +class SampleProfileWriterBinary : public SampleProfileWriter { +public: + virtual std::error_code write(const FunctionSamples &S) override; + SampleProfileWriterBinary(std::unique_ptr<raw_ostream> &OS) + : SampleProfileWriter(OS) {} + +protected: + virtual std::error_code writeNameTable() = 0; + virtual std::error_code writeMagicIdent() = 0; + virtual std::error_code + writeHeader(const StringMap<FunctionSamples> &ProfileMap) override; + std::error_code writeSummary(); + std::error_code writeNameIdx(StringRef FName); + std::error_code writeBody(const FunctionSamples &S); + inline void stablizeNameTable(std::set<StringRef> &V); + + MapVector<StringRef, uint32_t> NameTable; + +private: + void addName(StringRef FName); + void addNames(const FunctionSamples &S); + + friend ErrorOr<std::unique_ptr<SampleProfileWriter>> + SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS, + SampleProfileFormat Format); +}; + +class SampleProfileWriterRawBinary : public SampleProfileWriterBinary { + using SampleProfileWriterBinary::SampleProfileWriterBinary; + +protected: + virtual std::error_code writeNameTable() override; + virtual std::error_code writeMagicIdent() override; +}; + +// CompactBinary is a compact format of binary profile which both reduces +// the profile size and the load time needed when compiling. It has two +// major difference with Binary format. +// 1. It represents all the strings in name table using md5 hash. +// 2. It saves a function offset table which maps function name index to +// the offset of its function profile to the start of the binary profile, +// so by using the function offset table, for those function profiles which +// will not be needed when compiling a module, the profile reader does't +// have to read them and it saves compile time if the profile size is huge. +// The layout of the compact format is shown as follows: +// +// Part1: Profile header, the same as binary format, containing magic +// number, version, summary, name table... +// Part2: Function Offset Table Offset, which saves the position of +// Part4. +// Part3: Function profile collection +// function1 profile start +// .... +// function2 profile start +// .... +// function3 profile start +// .... +// ...... +// Part4: Function Offset Table +// function1 name index --> function1 profile start +// function2 name index --> function2 profile start +// function3 name index --> function3 profile start +// +// We need Part2 because profile reader can use it to find out and read +// function offset table without reading Part3 first. +class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary { + using SampleProfileWriterBinary::SampleProfileWriterBinary; + +public: + virtual std::error_code write(const FunctionSamples &S) override; + virtual std::error_code + write(const StringMap<FunctionSamples> &ProfileMap) override; + +protected: + /// The table mapping from function name to the offset of its FunctionSample + /// towards profile start. + MapVector<StringRef, uint64_t> FuncOffsetTable; + /// The offset of the slot to be filled with the offset of FuncOffsetTable + /// towards profile start. + uint64_t TableOffset; + virtual std::error_code writeNameTable() override; + virtual std::error_code writeMagicIdent() override; + virtual std::error_code + writeHeader(const StringMap<FunctionSamples> &ProfileMap) override; + std::error_code writeFuncOffsetTable(); +}; + +} // end namespace sampleprof +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_SAMPLEPROFWRITER_H |
