1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Latency.h"
11 
12 #include "Assembler.h"
13 #include "BenchmarkRunner.h"
14 #include "MCInstrDescView.h"
15 #include "PerfHelper.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstBuilder.h"
19 #include "llvm/Support/FormatVariadic.h"
20 
21 namespace exegesis {
22 
hasUnknownOperand(const llvm::MCOperandInfo & OpInfo)23 static bool hasUnknownOperand(const llvm::MCOperandInfo &OpInfo) {
24   return OpInfo.OperandType == llvm::MCOI::OPERAND_UNKNOWN;
25 }
26 
27 // FIXME: Handle memory, see PR36905.
hasMemoryOperand(const llvm::MCOperandInfo & OpInfo)28 static bool hasMemoryOperand(const llvm::MCOperandInfo &OpInfo) {
29   return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY;
30 }
31 
32 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
33 
isInfeasible(const llvm::MCInstrDesc & MCInstrDesc) const34 llvm::Error LatencyBenchmarkRunner::isInfeasible(
35     const llvm::MCInstrDesc &MCInstrDesc) const {
36   if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand))
37     return llvm::make_error<BenchmarkFailure>(
38         "Infeasible : has unknown operands");
39   if (llvm::any_of(MCInstrDesc.operands(), hasMemoryOperand))
40     return llvm::make_error<BenchmarkFailure>(
41         "Infeasible : has memory operands");
42   return llvm::Error::success();
43 }
44 
45 llvm::Expected<SnippetPrototype>
generateTwoInstructionPrototype(const Instruction & Instr) const46 LatencyBenchmarkRunner::generateTwoInstructionPrototype(
47     const Instruction &Instr) const {
48   std::vector<unsigned> Opcodes;
49   Opcodes.resize(State.getInstrInfo().getNumOpcodes());
50   std::iota(Opcodes.begin(), Opcodes.end(), 0U);
51   std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
52   for (const unsigned OtherOpcode : Opcodes) {
53     if (OtherOpcode == Instr.Description->Opcode)
54       continue;
55     const auto &OtherInstrDesc = State.getInstrInfo().get(OtherOpcode);
56     if (auto E = isInfeasible(OtherInstrDesc)) {
57       llvm::consumeError(std::move(E));
58       continue;
59     }
60     const Instruction OtherInstr(OtherInstrDesc, RATC);
61     const AliasingConfigurations Forward(Instr, OtherInstr);
62     const AliasingConfigurations Back(OtherInstr, Instr);
63     if (Forward.empty() || Back.empty())
64       continue;
65     InstructionInstance ThisII(Instr);
66     InstructionInstance OtherII(OtherInstr);
67     if (!Forward.hasImplicitAliasing())
68       setRandomAliasing(Forward, ThisII, OtherII);
69     if (!Back.hasImplicitAliasing())
70       setRandomAliasing(Back, OtherII, ThisII);
71     SnippetPrototype Prototype;
72     Prototype.Explanation =
73         llvm::formatv("creating cycle through {0}.",
74                       State.getInstrInfo().getName(OtherOpcode));
75     Prototype.Snippet.push_back(std::move(ThisII));
76     Prototype.Snippet.push_back(std::move(OtherII));
77     return std::move(Prototype);
78   }
79   return llvm::make_error<BenchmarkFailure>(
80       "Infeasible : Didn't find any scheme to make the instruction serial");
81 }
82 
83 llvm::Expected<SnippetPrototype>
generatePrototype(unsigned Opcode) const84 LatencyBenchmarkRunner::generatePrototype(unsigned Opcode) const {
85   const auto &InstrDesc = State.getInstrInfo().get(Opcode);
86   if (auto E = isInfeasible(InstrDesc))
87     return std::move(E);
88   const Instruction Instr(InstrDesc, RATC);
89   if (auto SelfAliasingPrototype = generateSelfAliasingPrototype(Instr))
90     return SelfAliasingPrototype;
91   else
92     llvm::consumeError(SelfAliasingPrototype.takeError());
93   // No self aliasing, trying to create a dependency through another opcode.
94   return generateTwoInstructionPrototype(Instr);
95 }
96 
getCounterName() const97 const char *LatencyBenchmarkRunner::getCounterName() const {
98   if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
99     llvm::report_fatal_error("sched model is missing extra processor info!");
100   const char *CounterName = State.getSubtargetInfo()
101                                 .getSchedModel()
102                                 .getExtraProcessorInfo()
103                                 .PfmCounters.CycleCounter;
104   if (!CounterName)
105     llvm::report_fatal_error("sched model does not define a cycle counter");
106   return CounterName;
107 }
108 
109 std::vector<BenchmarkMeasure>
runMeasurements(const ExecutableFunction & Function,const unsigned NumRepetitions) const110 LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
111                                         const unsigned NumRepetitions) const {
112   // Cycle measurements include some overhead from the kernel. Repeat the
113   // measure several times and take the minimum value.
114   constexpr const int NumMeasurements = 30;
115   int64_t MinLatency = std::numeric_limits<int64_t>::max();
116   const char *CounterName = getCounterName();
117   if (!CounterName)
118     llvm::report_fatal_error("could not determine cycle counter name");
119   const pfm::PerfEvent CyclesPerfEvent(CounterName);
120   if (!CyclesPerfEvent.valid())
121     llvm::report_fatal_error("invalid perf event");
122   for (size_t I = 0; I < NumMeasurements; ++I) {
123     pfm::Counter Counter(CyclesPerfEvent);
124     Counter.start();
125     Function();
126     Counter.stop();
127     const int64_t Value = Counter.read();
128     if (Value < MinLatency)
129       MinLatency = Value;
130   }
131   return {{"latency", static_cast<double>(MinLatency) / NumRepetitions, ""}};
132 }
133 
134 } // namespace exegesis
135