Clang Project

clang_source_code/lib/CodeGen/CodeGenPGO.cpp
1//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Instrumentation-based profile-guided optimization
10//
11//===----------------------------------------------------------------------===//
12
13#include "CodeGenPGO.h"
14#include "CodeGenFunction.h"
15#include "CoverageMappingGen.h"
16#include "clang/AST/RecursiveASTVisitor.h"
17#include "clang/AST/StmtVisitor.h"
18#include "llvm/IR/Intrinsics.h"
19#include "llvm/IR/MDBuilder.h"
20#include "llvm/Support/Endian.h"
21#include "llvm/Support/FileSystem.h"
22#include "llvm/Support/MD5.h"
23
24static llvm::cl::opt<bool>
25    EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore,
26                         llvm::cl::desc("Enable value profiling"),
27                         llvm::cl::Hidden, llvm::cl::init(false));
28
29using namespace clang;
30using namespace CodeGen;
31
32void CodeGenPGO::setFuncName(StringRef Name,
33                             llvm::GlobalValue::LinkageTypes Linkage) {
34  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
35  FuncName = llvm::getPGOFuncName(
36      Name, Linkage, CGM.getCodeGenOpts().MainFileName,
37      PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
38
39  // If we're generating a profile, create a variable for the name.
40  if (CGM.getCodeGenOpts().hasProfileClangInstr())
41    FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
42}
43
44void CodeGenPGO::setFuncName(llvm::Function *Fn) {
45  setFuncName(Fn->getName(), Fn->getLinkage());
46  // Create PGOFuncName meta data.
47  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
48}
49
50/// The version of the PGO hash algorithm.
51enum PGOHashVersion : unsigned {
52  PGO_HASH_V1,
53  PGO_HASH_V2,
54
55  // Keep this set to the latest hash version.
56  PGO_HASH_LATEST = PGO_HASH_V2
57};
58
59namespace {
60/// Stable hasher for PGO region counters.
61///
62/// PGOHash produces a stable hash of a given function's control flow.
63///
64/// Changing the output of this hash will invalidate all previously generated
65/// profiles -- i.e., don't do it.
66///
67/// \note  When this hash does eventually change (years?), we still need to
68/// support old hashes.  We'll need to pull in the version number from the
69/// profile data format and use the matching hash function.
70class PGOHash {
71  uint64_t Working;
72  unsigned Count;
73  PGOHashVersion HashVersion;
74  llvm::MD5 MD5;
75
76  static const int NumBitsPerType = 6;
77  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
78  static const unsigned TooBig = 1u << NumBitsPerType;
79
80public:
81  /// Hash values for AST nodes.
82  ///
83  /// Distinct values for AST nodes that have region counters attached.
84  ///
85  /// These values must be stable.  All new members must be added at the end,
86  /// and no members should be removed.  Changing the enumeration value for an
87  /// AST node will affect the hash of every function that contains that node.
88  enum HashType : unsigned char {
89    None = 0,
90    LabelStmt = 1,
91    WhileStmt,
92    DoStmt,
93    ForStmt,
94    CXXForRangeStmt,
95    ObjCForCollectionStmt,
96    SwitchStmt,
97    CaseStmt,
98    DefaultStmt,
99    IfStmt,
100    CXXTryStmt,
101    CXXCatchStmt,
102    ConditionalOperator,
103    BinaryOperatorLAnd,
104    BinaryOperatorLOr,
105    BinaryConditionalOperator,
106    // The preceding values are available with PGO_HASH_V1.
107
108    EndOfScope,
109    IfThenBranch,
110    IfElseBranch,
111    GotoStmt,
112    IndirectGotoStmt,
113    BreakStmt,
114    ContinueStmt,
115    ReturnStmt,
116    ThrowExpr,
117    UnaryOperatorLNot,
118    BinaryOperatorLT,
119    BinaryOperatorGT,
120    BinaryOperatorLE,
121    BinaryOperatorGE,
122    BinaryOperatorEQ,
123    BinaryOperatorNE,
124    // The preceding values are available with PGO_HASH_V2.
125
126    // Keep this last.  It's for the static assert that follows.
127    LastHashType
128  };
129  static_assert(LastHashType <= TooBig"Too many types in HashType");
130
131  PGOHash(PGOHashVersion HashVersion)
132      : Working(0), Count(0), HashVersion(HashVersion), MD5() {}
133  void combine(HashType Type);
134  uint64_t finalize();
135  PGOHashVersion getHashVersion() const { return HashVersion; }
136};
137const int PGOHash::NumBitsPerType;
138const unsigned PGOHash::NumTypesPerWord;
139const unsigned PGOHash::TooBig;
140
141/// Get the PGO hash version used in the given indexed profile.
142static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
143                                        CodeGenModule &CGM) {
144  if (PGOReader->getVersion() <= 4)
145    return PGO_HASH_V1;
146  return PGO_HASH_V2;
147}
148
149/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
150struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
151  using Base = RecursiveASTVisitor<MapRegionCounters>;
152
153  /// The next counter value to assign.
154  unsigned NextCounter;
155  /// The function hash.
156  PGOHash Hash;
157  /// The map of statements to counters.
158  llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
159
160  MapRegionCounters(PGOHashVersion HashVersion,
161                    llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
162      : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap) {}
163
164  // Blocks and lambdas are handled as separate functions, so we need not
165  // traverse them in the parent context.
166  bool TraverseBlockExpr(BlockExpr *BE) { return true; }
167  bool TraverseLambdaExpr(LambdaExpr *LE) {
168    // Traverse the captures, but not the body.
169    for (const auto &C : zip(LE->captures(), LE->capture_inits()))
170      TraverseLambdaCapture(LE, &std::get<0>(C), std::get<1>(C));
171    return true;
172  }
173  bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
174
175  bool VisitDecl(const Decl *D) {
176    switch (D->getKind()) {
177    default:
178      break;
179    case Decl::Function:
180    case Decl::CXXMethod:
181    case Decl::CXXConstructor:
182    case Decl::CXXDestructor:
183    case Decl::CXXConversion:
184    case Decl::ObjCMethod:
185    case Decl::Block:
186    case Decl::Captured:
187      CounterMap[D->getBody()] = NextCounter++;
188      break;
189    }
190    return true;
191  }
192
193  /// If \p S gets a fresh counter, update the counter mappings. Return the
194  /// V1 hash of \p S.
195  PGOHash::HashType updateCounterMappings(Stmt *S) {
196    auto Type = getHashType(PGO_HASH_V1S);
197    if (Type != PGOHash::None)
198      CounterMap[S] = NextCounter++;
199    return Type;
200  }
201
202  /// Include \p S in the function hash.
203  bool VisitStmt(Stmt *S) {
204    auto Type = updateCounterMappings(S);
205    if (Hash.getHashVersion() != PGO_HASH_V1)
206      Type = getHashType(Hash.getHashVersion(), S);
207    if (Type != PGOHash::None)
208      Hash.combine(Type);
209    return true;
210  }
211
212  bool TraverseIfStmt(IfStmt *If) {
213    // If we used the V1 hash, use the default traversal.
214    if (Hash.getHashVersion() == PGO_HASH_V1)
215      return Base::TraverseIfStmt(If);
216
217    // Otherwise, keep track of which branch we're in while traversing.
218    VisitStmt(If);
219    for (Stmt *CS : If->children()) {
220      if (!CS)
221        continue;
222      if (CS == If->getThen())
223        Hash.combine(PGOHash::IfThenBranch);
224      else if (CS == If->getElse())
225        Hash.combine(PGOHash::IfElseBranch);
226      TraverseStmt(CS);
227    }
228    Hash.combine(PGOHash::EndOfScope);
229    return true;
230  }
231
232// If the statement type \p N is nestable, and its nesting impacts profile
233// stability, define a custom traversal which tracks the end of the statement
234// in the hash (provided we're not using the V1 hash).
235#define DEFINE_NESTABLE_TRAVERSAL(N)                                           \
236  bool Traverse##N(N *S) {                                                     \
237    Base::Traverse##N(S);                                                      \
238    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
239      Hash.combine(PGOHash::EndOfScope);                                       \
240    return true;                                                               \
241  }
242
243  DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
244  DEFINE_NESTABLE_TRAVERSAL(DoStmt)
245  DEFINE_NESTABLE_TRAVERSAL(ForStmt)
246  DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
247  DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
248  DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
249  DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
250
251  /// Get version \p HashVersion of the PGO hash for \p S.
252  PGOHash::HashType getHashType(PGOHashVersion HashVersionconst Stmt *S) {
253    switch (S->getStmtClass()) {
254    default:
255      break;
256    case Stmt::LabelStmtClass:
257      return PGOHash::LabelStmt;
258    case Stmt::WhileStmtClass:
259      return PGOHash::WhileStmt;
260    case Stmt::DoStmtClass:
261      return PGOHash::DoStmt;
262    case Stmt::ForStmtClass:
263      return PGOHash::ForStmt;
264    case Stmt::CXXForRangeStmtClass:
265      return PGOHash::CXXForRangeStmt;
266    case Stmt::ObjCForCollectionStmtClass:
267      return PGOHash::ObjCForCollectionStmt;
268    case Stmt::SwitchStmtClass:
269      return PGOHash::SwitchStmt;
270    case Stmt::CaseStmtClass:
271      return PGOHash::CaseStmt;
272    case Stmt::DefaultStmtClass:
273      return PGOHash::DefaultStmt;
274    case Stmt::IfStmtClass:
275      return PGOHash::IfStmt;
276    case Stmt::CXXTryStmtClass:
277      return PGOHash::CXXTryStmt;
278    case Stmt::CXXCatchStmtClass:
279      return PGOHash::CXXCatchStmt;
280    case Stmt::ConditionalOperatorClass:
281      return PGOHash::ConditionalOperator;
282    case Stmt::BinaryConditionalOperatorClass:
283      return PGOHash::BinaryConditionalOperator;
284    case Stmt::BinaryOperatorClass: {
285      const BinaryOperator *BO = cast<BinaryOperator>(S);
286      if (BO->getOpcode() == BO_LAnd)
287        return PGOHash::BinaryOperatorLAnd;
288      if (BO->getOpcode() == BO_LOr)
289        return PGOHash::BinaryOperatorLOr;
290      if (HashVersion == PGO_HASH_V2) {
291        switch (BO->getOpcode()) {
292        default:
293          break;
294        case BO_LT:
295          return PGOHash::BinaryOperatorLT;
296        case BO_GT:
297          return PGOHash::BinaryOperatorGT;
298        case BO_LE:
299          return PGOHash::BinaryOperatorLE;
300        case BO_GE:
301          return PGOHash::BinaryOperatorGE;
302        case BO_EQ:
303          return PGOHash::BinaryOperatorEQ;
304        case BO_NE:
305          return PGOHash::BinaryOperatorNE;
306        }
307      }
308      break;
309    }
310    }
311
312    if (HashVersion == PGO_HASH_V2) {
313      switch (S->getStmtClass()) {
314      default:
315        break;
316      case Stmt::GotoStmtClass:
317        return PGOHash::GotoStmt;
318      case Stmt::IndirectGotoStmtClass:
319        return PGOHash::IndirectGotoStmt;
320      case Stmt::BreakStmtClass:
321        return PGOHash::BreakStmt;
322      case Stmt::ContinueStmtClass:
323        return PGOHash::ContinueStmt;
324      case Stmt::ReturnStmtClass:
325        return PGOHash::ReturnStmt;
326      case Stmt::CXXThrowExprClass:
327        return PGOHash::ThrowExpr;
328      case Stmt::UnaryOperatorClass: {
329        const UnaryOperator *UO = cast<UnaryOperator>(S);
330        if (UO->getOpcode() == UO_LNot)
331          return PGOHash::UnaryOperatorLNot;
332        break;
333      }
334      }
335    }
336
337    return PGOHash::None;
338  }
339};
340
341/// A StmtVisitor that propagates the raw counts through the AST and
342/// records the count at statements where the value may change.
343struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
344  /// PGO state.
345  CodeGenPGO &PGO;
346
347  /// A flag that is set when the current count should be recorded on the
348  /// next statement, such as at the exit of a loop.
349  bool RecordNextStmtCount;
350
351  /// The count at the current location in the traversal.
352  uint64_t CurrentCount;
353
354  /// The map of statements to count values.
355  llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
356
357  /// BreakContinueStack - Keep counts of breaks and continues inside loops.
358  struct BreakContinue {
359    uint64_t BreakCount;
360    uint64_t ContinueCount;
361    BreakContinue() : BreakCount(0), ContinueCount(0) {}
362  };
363  SmallVector<BreakContinue8BreakContinueStack;
364
365  ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
366                      CodeGenPGO &PGO)
367      : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
368
369  void RecordStmtCount(const Stmt *S) {
370    if (RecordNextStmtCount) {
371      CountMap[S] = CurrentCount;
372      RecordNextStmtCount = false;
373    }
374  }
375
376  /// Set and return the current count.
377  uint64_t setCount(uint64_t Count) {
378    CurrentCount = Count;
379    return Count;
380  }
381
382  void VisitStmt(const Stmt *S) {
383    RecordStmtCount(S);
384    for (const Stmt *Child : S->children())
385      if (Child)
386        this->Visit(Child);
387  }
388
389  void VisitFunctionDecl(const FunctionDecl *D) {
390    // Counter tracks entry to the function body.
391    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
392    CountMap[D->getBody()] = BodyCount;
393    Visit(D->getBody());
394  }
395
396  // Skip lambda expressions. We visit these as FunctionDecls when we're
397  // generating them and aren't interested in the body when generating a
398  // parent context.
399  void VisitLambdaExpr(const LambdaExpr *LE) {}
400
401  void VisitCapturedDecl(const CapturedDecl *D) {
402    // Counter tracks entry to the capture body.
403    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
404    CountMap[D->getBody()] = BodyCount;
405    Visit(D->getBody());
406  }
407
408  void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
409    // Counter tracks entry to the method body.
410    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
411    CountMap[D->getBody()] = BodyCount;
412    Visit(D->getBody());
413  }
414
415  void VisitBlockDecl(const BlockDecl *D) {
416    // Counter tracks entry to the block body.
417    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
418    CountMap[D->getBody()] = BodyCount;
419    Visit(D->getBody());
420  }
421
422  void VisitReturnStmt(const ReturnStmt *S) {
423    RecordStmtCount(S);
424    if (S->getRetValue())
425      Visit(S->getRetValue());
426    CurrentCount = 0;
427    RecordNextStmtCount = true;
428  }
429
430  void VisitCXXThrowExpr(const CXXThrowExpr *E) {
431    RecordStmtCount(E);
432    if (E->getSubExpr())
433      Visit(E->getSubExpr());
434    CurrentCount = 0;
435    RecordNextStmtCount = true;
436  }
437
438  void VisitGotoStmt(const GotoStmt *S) {
439    RecordStmtCount(S);
440    CurrentCount = 0;
441    RecordNextStmtCount = true;
442  }
443
444  void VisitLabelStmt(const LabelStmt *S) {
445    RecordNextStmtCount = false;
446    // Counter tracks the block following the label.
447    uint64_t BlockCount = setCount(PGO.getRegionCount(S));
448    CountMap[S] = BlockCount;
449    Visit(S->getSubStmt());
450  }
451
452  void VisitBreakStmt(const BreakStmt *S) {
453    RecordStmtCount(S);
454     (0) . __assert_fail ("!BreakContinueStack.empty() && \"break not in a loop or switch!\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CodeGenPGO.cpp", 454, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
455    BreakContinueStack.back().BreakCount += CurrentCount;
456    CurrentCount = 0;
457    RecordNextStmtCount = true;
458  }
459
460  void VisitContinueStmt(const ContinueStmt *S) {
461    RecordStmtCount(S);
462     (0) . __assert_fail ("!BreakContinueStack.empty() && \"continue stmt not in a loop!\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CodeGenPGO.cpp", 462, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
463    BreakContinueStack.back().ContinueCount += CurrentCount;
464    CurrentCount = 0;
465    RecordNextStmtCount = true;
466  }
467
468  void VisitWhileStmt(const WhileStmt *S) {
469    RecordStmtCount(S);
470    uint64_t ParentCount = CurrentCount;
471
472    BreakContinueStack.push_back(BreakContinue());
473    // Visit the body region first so the break/continue adjustments can be
474    // included when visiting the condition.
475    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
476    CountMap[S->getBody()] = CurrentCount;
477    Visit(S->getBody());
478    uint64_t BackedgeCount = CurrentCount;
479
480    // ...then go back and propagate counts through the condition. The count
481    // at the start of the condition is the sum of the incoming edges,
482    // the backedge from the end of the loop body, and the edges from
483    // continue statements.
484    BreakContinue BC = BreakContinueStack.pop_back_val();
485    uint64_t CondCount =
486        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
487    CountMap[S->getCond()] = CondCount;
488    Visit(S->getCond());
489    setCount(BC.BreakCount + CondCount - BodyCount);
490    RecordNextStmtCount = true;
491  }
492
493  void VisitDoStmt(const DoStmt *S) {
494    RecordStmtCount(S);
495    uint64_t LoopCount = PGO.getRegionCount(S);
496
497    BreakContinueStack.push_back(BreakContinue());
498    // The count doesn't include the fallthrough from the parent scope. Add it.
499    uint64_t BodyCount = setCount(LoopCount + CurrentCount);
500    CountMap[S->getBody()] = BodyCount;
501    Visit(S->getBody());
502    uint64_t BackedgeCount = CurrentCount;
503
504    BreakContinue BC = BreakContinueStack.pop_back_val();
505    // The count at the start of the condition is equal to the count at the
506    // end of the body, plus any continues.
507    uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
508    CountMap[S->getCond()] = CondCount;
509    Visit(S->getCond());
510    setCount(BC.BreakCount + CondCount - LoopCount);
511    RecordNextStmtCount = true;
512  }
513
514  void VisitForStmt(const ForStmt *S) {
515    RecordStmtCount(S);
516    if (S->getInit())
517      Visit(S->getInit());
518
519    uint64_t ParentCount = CurrentCount;
520
521    BreakContinueStack.push_back(BreakContinue());
522    // Visit the body region first. (This is basically the same as a while
523    // loop; see further comments in VisitWhileStmt.)
524    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
525    CountMap[S->getBody()] = BodyCount;
526    Visit(S->getBody());
527    uint64_t BackedgeCount = CurrentCount;
528    BreakContinue BC = BreakContinueStack.pop_back_val();
529
530    // The increment is essentially part of the body but it needs to include
531    // the count for all the continue statements.
532    if (S->getInc()) {
533      uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
534      CountMap[S->getInc()] = IncCount;
535      Visit(S->getInc());
536    }
537
538    // ...then go back and propagate counts through the condition.
539    uint64_t CondCount =
540        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
541    if (S->getCond()) {
542      CountMap[S->getCond()] = CondCount;
543      Visit(S->getCond());
544    }
545    setCount(BC.BreakCount + CondCount - BodyCount);
546    RecordNextStmtCount = true;
547  }
548
549  void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
550    RecordStmtCount(S);
551    if (S->getInit())
552      Visit(S->getInit());
553    Visit(S->getLoopVarStmt());
554    Visit(S->getRangeStmt());
555    Visit(S->getBeginStmt());
556    Visit(S->getEndStmt());
557
558    uint64_t ParentCount = CurrentCount;
559    BreakContinueStack.push_back(BreakContinue());
560    // Visit the body region first. (This is basically the same as a while
561    // loop; see further comments in VisitWhileStmt.)
562    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
563    CountMap[S->getBody()] = BodyCount;
564    Visit(S->getBody());
565    uint64_t BackedgeCount = CurrentCount;
566    BreakContinue BC = BreakContinueStack.pop_back_val();
567
568    // The increment is essentially part of the body but it needs to include
569    // the count for all the continue statements.
570    uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
571    CountMap[S->getInc()] = IncCount;
572    Visit(S->getInc());
573
574    // ...then go back and propagate counts through the condition.
575    uint64_t CondCount =
576        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
577    CountMap[S->getCond()] = CondCount;
578    Visit(S->getCond());
579    setCount(BC.BreakCount + CondCount - BodyCount);
580    RecordNextStmtCount = true;
581  }
582
583  void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
584    RecordStmtCount(S);
585    Visit(S->getElement());
586    uint64_t ParentCount = CurrentCount;
587    BreakContinueStack.push_back(BreakContinue());
588    // Counter tracks the body of the loop.
589    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
590    CountMap[S->getBody()] = BodyCount;
591    Visit(S->getBody());
592    uint64_t BackedgeCount = CurrentCount;
593    BreakContinue BC = BreakContinueStack.pop_back_val();
594
595    setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
596             BodyCount);
597    RecordNextStmtCount = true;
598  }
599
600  void VisitSwitchStmt(const SwitchStmt *S) {
601    RecordStmtCount(S);
602    if (S->getInit())
603      Visit(S->getInit());
604    Visit(S->getCond());
605    CurrentCount = 0;
606    BreakContinueStack.push_back(BreakContinue());
607    Visit(S->getBody());
608    // If the switch is inside a loop, add the continue counts.
609    BreakContinue BC = BreakContinueStack.pop_back_val();
610    if (!BreakContinueStack.empty())
611      BreakContinueStack.back().ContinueCount += BC.ContinueCount;
612    // Counter tracks the exit block of the switch.
613    setCount(PGO.getRegionCount(S));
614    RecordNextStmtCount = true;
615  }
616
617  void VisitSwitchCase(const SwitchCase *S) {
618    RecordNextStmtCount = false;
619    // Counter for this particular case. This counts only jumps from the
620    // switch header and does not include fallthrough from the case before
621    // this one.
622    uint64_t CaseCount = PGO.getRegionCount(S);
623    setCount(CurrentCount + CaseCount);
624    // We need the count without fallthrough in the mapping, so it's more useful
625    // for branch probabilities.
626    CountMap[S] = CaseCount;
627    RecordNextStmtCount = true;
628    Visit(S->getSubStmt());
629  }
630
631  void VisitIfStmt(const IfStmt *S) {
632    RecordStmtCount(S);
633    uint64_t ParentCount = CurrentCount;
634    if (S->getInit())
635      Visit(S->getInit());
636    Visit(S->getCond());
637
638    // Counter tracks the "then" part of an if statement. The count for
639    // the "else" part, if it exists, will be calculated from this counter.
640    uint64_t ThenCount = setCount(PGO.getRegionCount(S));
641    CountMap[S->getThen()] = ThenCount;
642    Visit(S->getThen());
643    uint64_t OutCount = CurrentCount;
644
645    uint64_t ElseCount = ParentCount - ThenCount;
646    if (S->getElse()) {
647      setCount(ElseCount);
648      CountMap[S->getElse()] = ElseCount;
649      Visit(S->getElse());
650      OutCount += CurrentCount;
651    } else
652      OutCount += ElseCount;
653    setCount(OutCount);
654    RecordNextStmtCount = true;
655  }
656
657  void VisitCXXTryStmt(const CXXTryStmt *S) {
658    RecordStmtCount(S);
659    Visit(S->getTryBlock());
660    for (unsigned I = 0E = S->getNumHandlers(); I < E; ++I)
661      Visit(S->getHandler(I));
662    // Counter tracks the continuation block of the try statement.
663    setCount(PGO.getRegionCount(S));
664    RecordNextStmtCount = true;
665  }
666
667  void VisitCXXCatchStmt(const CXXCatchStmt *S) {
668    RecordNextStmtCount = false;
669    // Counter tracks the catch statement's handler block.
670    uint64_t CatchCount = setCount(PGO.getRegionCount(S));
671    CountMap[S] = CatchCount;
672    Visit(S->getHandlerBlock());
673  }
674
675  void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
676    RecordStmtCount(E);
677    uint64_t ParentCount = CurrentCount;
678    Visit(E->getCond());
679
680    // Counter tracks the "true" part of a conditional operator. The
681    // count in the "false" part will be calculated from this counter.
682    uint64_t TrueCount = setCount(PGO.getRegionCount(E));
683    CountMap[E->getTrueExpr()] = TrueCount;
684    Visit(E->getTrueExpr());
685    uint64_t OutCount = CurrentCount;
686
687    uint64_t FalseCount = setCount(ParentCount - TrueCount);
688    CountMap[E->getFalseExpr()] = FalseCount;
689    Visit(E->getFalseExpr());
690    OutCount += CurrentCount;
691
692    setCount(OutCount);
693    RecordNextStmtCount = true;
694  }
695
696  void VisitBinLAnd(const BinaryOperator *E) {
697    RecordStmtCount(E);
698    uint64_t ParentCount = CurrentCount;
699    Visit(E->getLHS());
700    // Counter tracks the right hand side of a logical and operator.
701    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
702    CountMap[E->getRHS()] = RHSCount;
703    Visit(E->getRHS());
704    setCount(ParentCount + RHSCount - CurrentCount);
705    RecordNextStmtCount = true;
706  }
707
708  void VisitBinLOr(const BinaryOperator *E) {
709    RecordStmtCount(E);
710    uint64_t ParentCount = CurrentCount;
711    Visit(E->getLHS());
712    // Counter tracks the right hand side of a logical or operator.
713    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
714    CountMap[E->getRHS()] = RHSCount;
715    Visit(E->getRHS());
716    setCount(ParentCount + RHSCount - CurrentCount);
717    RecordNextStmtCount = true;
718  }
719};
720// end anonymous namespace
721
722void PGOHash::combine(HashType Type) {
723  // Check that we never combine 0 and only have six bits.
724   (0) . __assert_fail ("Type && \"Hash is invalid. unexpected type 0\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CodeGenPGO.cpp", 724, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Type && "Hash is invalid: unexpected type 0");
725   (0) . __assert_fail ("unsigned(Type) < TooBig && \"Hash is invalid. too many types\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CodeGenPGO.cpp", 725, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
726
727  // Pass through MD5 if enough work has built up.
728  if (Count && Count % NumTypesPerWord == 0) {
729    using namespace llvm::support;
730    uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
731    MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
732    Working = 0;
733  }
734
735  // Accumulate the current type.
736  ++Count;
737  Working = Working << NumBitsPerType | Type;
738}
739
740uint64_t PGOHash::finalize() {
741  // Use Working as the hash directly if we never used MD5.
742  if (Count <= NumTypesPerWord)
743    // No need to byte swap here, since none of the math was endian-dependent.
744    // This number will be byte-swapped as required on endianness transitions,
745    // so we will see the same value on the other side.
746    return Working;
747
748  // Check for remaining work in Working.
749  if (Working)
750    MD5.update(Working);
751
752  // Finalize the MD5 and return the hash.
753  llvm::MD5::MD5Result Result;
754  MD5.final(Result);
755  using namespace llvm::support;
756  return Result.low();
757}
758
759void CodeGenPGO::assignRegionCounters(GlobalDecl GDllvm::Function *Fn) {
760  const Decl *D = GD.getDecl();
761  if (!D->hasBody())
762    return;
763
764  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
765  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
766  if (!InstrumentRegions && !PGOReader)
767    return;
768  if (D->isImplicit())
769    return;
770  // Constructors and destructors may be represented by several functions in IR.
771  // If so, instrument only base variant, others are implemented by delegation
772  // to the base one, it would be counted twice otherwise.
773  if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
774    if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D))
775      if (GD.getCtorType() != Ctor_Base &&
776          CodeGenFunction::IsConstructorDelegationValid(CCD))
777        return;
778  }
779  if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base)
780    return;
781
782  CGM.ClearUnusedCoverageMapping(D);
783  setFuncName(Fn);
784
785  mapRegionCounters(D);
786  if (CGM.getCodeGenOpts().CoverageMapping)
787    emitCounterRegionMapping(D);
788  if (PGOReader) {
789    SourceManager &SM = CGM.getContext().getSourceManager();
790    loadRegionCounts(PGOReaderSM.isInMainFile(D->getLocation()));
791    computeRegionCounts(D);
792    applyFunctionAttributes(PGOReaderFn);
793  }
794}
795
796void CodeGenPGO::mapRegionCounters(const Decl *D) {
797  // Use the latest hash version when inserting instrumentation, but use the
798  // version in the indexed profile if we're reading PGO data.
799  PGOHashVersion HashVersion = PGO_HASH_LATEST;
800  if (auto *PGOReader = CGM.getPGOReader())
801    HashVersion = getPGOHashVersion(PGOReaderCGM);
802
803  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
804  MapRegionCounters Walker(HashVersion, *RegionCounterMap);
805  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
806    Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
807  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
808    Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
809  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
810    Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
811  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
812    Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
813   (0) . __assert_fail ("Walker.NextCounter > 0 && \"no entry counter mapped for decl\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CodeGenPGO.cpp", 813, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
814  NumRegionCounters = Walker.NextCounter;
815  FunctionHash = Walker.Hash.finalize();
816}
817
818bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
819  if (!D->getBody())
820    return true;
821
822  // Don't map the functions in system headers.
823  const auto &SM = CGM.getContext().getSourceManager();
824  auto Loc = D->getBody()->getBeginLoc();
825  return SM.isInSystemHeader(Loc);
826}
827
828void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
829  if (skipRegionMappingForDecl(D))
830    return;
831
832  std::string CoverageMapping;
833  llvm::raw_string_ostream OS(CoverageMapping);
834  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
835                                CGM.getContext().getSourceManager(),
836                                CGM.getLangOpts(), RegionCounterMap.get());
837  MappingGen.emitCounterMapping(D, OS);
838  OS.flush();
839
840  if (CoverageMapping.empty())
841    return;
842
843  CGM.getCoverageMapping()->addFunctionMappingRecord(
844      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
845}
846
847void
848CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
849                                    llvm::GlobalValue::LinkageTypes Linkage) {
850  if (skipRegionMappingForDecl(D))
851    return;
852
853  std::string CoverageMapping;
854  llvm::raw_string_ostream OS(CoverageMapping);
855  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
856                                CGM.getContext().getSourceManager(),
857                                CGM.getLangOpts());
858  MappingGen.emitEmptyMapping(D, OS);
859  OS.flush();
860
861  if (CoverageMapping.empty())
862    return;
863
864  setFuncName(Name, Linkage);
865  CGM.getCoverageMapping()->addFunctionMappingRecord(
866      FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
867}
868
869void CodeGenPGO::computeRegionCounts(const Decl *D) {
870  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
871  ComputeRegionCounts Walker(*StmtCountMap, *this);
872  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
873    Walker.VisitFunctionDecl(FD);
874  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
875    Walker.VisitObjCMethodDecl(MD);
876  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
877    Walker.VisitBlockDecl(BD);
878  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
879    Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
880}
881
882void
883CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
884                                    llvm::Function *Fn) {
885  if (!haveRegionCounts())
886    return;
887
888  uint64_t FunctionCount = getRegionCount(nullptr);
889  Fn->setEntryCount(FunctionCount);
890}
891
892void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
893                                      llvm::Value *StepV) {
894  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
895    return;
896  if (!Builder.GetInsertBlock())
897    return;
898
899  unsigned Counter = (*RegionCounterMap)[S];
900  auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
901
902  llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
903                         Builder.getInt64(FunctionHash),
904                         Builder.getInt32(NumRegionCounters),
905                         Builder.getInt32(Counter), StepV};
906  if (!StepV)
907    Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
908                       makeArrayRef(Args, 4));
909  else
910    Builder.CreateCall(
911        CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
912        makeArrayRef(Args));
913}
914
915// This method either inserts a call to the profile run-time during
916// instrumentation or puts profile data into metadata for PGO use.
917void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
918    llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
919
920  if (!EnableValueProfiling)
921    return;
922
923  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
924    return;
925
926  if (isa<llvm::Constant>(ValuePtr))
927    return;
928
929  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
930  if (InstrumentValueSites && RegionCounterMap) {
931    auto BuilderInsertPoint = Builder.saveIP();
932    Builder.SetInsertPoint(ValueSite);
933    llvm::Value *Args[5] = {
934        llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
935        Builder.getInt64(FunctionHash),
936        Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
937        Builder.getInt32(ValueKind),
938        Builder.getInt32(NumValueSites[ValueKind]++)
939    };
940    Builder.CreateCall(
941        CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
942    Builder.restoreIP(BuilderInsertPoint);
943    return;
944  }
945
946  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
947  if (PGOReader && haveRegionCounts()) {
948    // We record the top most called three functions at each call site.
949    // Profile metadata contains "VP" string identifying this metadata
950    // as value profiling data, then a uint32_t value for the value profiling
951    // kind, a uint64_t value for the total number of times the call is
952    // executed, followed by the function hash and execution count (uint64_t)
953    // pairs for each function.
954    if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
955      return;
956
957    llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
958                            (llvm::InstrProfValueKind)ValueKind,
959                            NumValueSites[ValueKind]);
960
961    NumValueSites[ValueKind]++;
962  }
963}
964
965void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
966                                  bool IsInMainFile) {
967  CGM.getPGOStats().addVisited(IsInMainFile);
968  RegionCounts.clear();
969  llvm::Expected<llvm::InstrProfRecord> RecordExpected =
970      PGOReader->getInstrProfRecord(FuncName, FunctionHash);
971  if (auto E = RecordExpected.takeError()) {
972    auto IPE = llvm::InstrProfError::take(std::move(E));
973    if (IPE == llvm::instrprof_error::unknown_function)
974      CGM.getPGOStats().addMissing(IsInMainFile);
975    else if (IPE == llvm::instrprof_error::hash_mismatch)
976      CGM.getPGOStats().addMismatched(IsInMainFile);
977    else if (IPE == llvm::instrprof_error::malformed)
978      // TODO: Consider a more specific warning for this case.
979      CGM.getPGOStats().addMismatched(IsInMainFile);
980    return;
981  }
982  ProfRecord =
983      llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
984  RegionCounts = ProfRecord->Counts;
985}
986
987/// Calculate what to divide by to scale weights.
988///
989/// Given the maximum weight, calculate a divisor that will scale all the
990/// weights to strictly less than UINT32_MAX.
991static uint64_t calculateWeightScale(uint64_t MaxWeight) {
992  return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
993}
994
995/// Scale an individual branch weight (and add 1).
996///
997/// Scale a 64-bit weight down to 32-bits using \c Scale.
998///
999/// According to Laplace's Rule of Succession, it is better to compute the
1000/// weight based on the count plus 1, so universally add 1 to the value.
1001///
1002/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
1003/// greater than \c Weight.
1004static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
1005   (0) . __assert_fail ("Scale && \"scale by 0?\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CodeGenPGO.cpp", 1005, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Scale && "scale by 0?");
1006  uint64_t Scaled = Weight / Scale + 1;
1007   (0) . __assert_fail ("Scaled <= UINT32_MAX && \"overflow 32-bits\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CodeGenPGO.cpp", 1007, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Scaled <= UINT32_MAX && "overflow 32-bits");
1008  return Scaled;
1009}
1010
1011llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
1012                                                    uint64_t FalseCount) {
1013  // Check for empty weights.
1014  if (!TrueCount && !FalseCount)
1015    return nullptr;
1016
1017  // Calculate how to scale down to 32-bits.
1018  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
1019
1020  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1021  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
1022                                      scaleBranchWeight(FalseCount, Scale));
1023}
1024
1025llvm::MDNode *
1026CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) {
1027  // We need at least two elements to create meaningful weights.
1028  if (Weights.size() < 2)
1029    return nullptr;
1030
1031  // Check for empty weights.
1032  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
1033  if (MaxWeight == 0)
1034    return nullptr;
1035
1036  // Calculate how to scale down to 32-bits.
1037  uint64_t Scale = calculateWeightScale(MaxWeight);
1038
1039  SmallVector<uint32_t, 16> ScaledWeights;
1040  ScaledWeights.reserve(Weights.size());
1041  for (uint64_t W : Weights)
1042    ScaledWeights.push_back(scaleBranchWeight(W, Scale));
1043
1044  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1045  return MDHelper.createBranchWeights(ScaledWeights);
1046}
1047
1048llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
1049                                                           uint64_t LoopCount) {
1050  if (!PGO.haveRegionCounts())
1051    return nullptr;
1052  Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
1053   (0) . __assert_fail ("CondCount.hasValue() && \"missing expected loop condition count\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CodeGenPGO.cpp", 1053, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(CondCount.hasValue() && "missing expected loop condition count");
1054  if (*CondCount == 0)
1055    return nullptr;
1056  return createProfileWeights(LoopCount,
1057                              std::max(*CondCount, LoopCount) - LoopCount);
1058}
1059
clang::CodeGen::CodeGenPGO::setFuncName
clang::CodeGen::CodeGenPGO::setFuncName
clang::CodeGen::CodeGenPGO::assignRegionCounters
clang::CodeGen::CodeGenPGO::mapRegionCounters