1 // Copyright (c) 2017 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "tools/stats/stats_analyzer.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <iostream>
21 #include <map>
22 #include <sstream>
23 #include <string>
24 #include <unordered_map>
25 #include <unordered_set>
26 #include <utility>
27 #include <vector>
28 
29 #include "source/comp/markv_model.h"
30 #include "source/enum_string_mapping.h"
31 #include "source/latest_version_spirv_header.h"
32 #include "source/opcode.h"
33 #include "source/operand.h"
34 #include "source/spirv_constant.h"
35 
36 namespace spvtools {
37 namespace stats {
38 namespace {
39 
40 // Signals that the value is not in the coding scheme and a fallback method
41 // needs to be used.
42 const uint64_t kMarkvNoneOfTheAbove =
43     comp::MarkvModel::GetMarkvNoneOfTheAbove();
44 
GetVersionString(uint32_t word)45 std::string GetVersionString(uint32_t word) {
46   std::stringstream ss;
47   ss << "Version " << SPV_SPIRV_VERSION_MAJOR_PART(word) << "."
48      << SPV_SPIRV_VERSION_MINOR_PART(word);
49   return ss.str();
50 }
51 
GetGeneratorString(uint32_t word)52 std::string GetGeneratorString(uint32_t word) {
53   return spvGeneratorStr(SPV_GENERATOR_TOOL_PART(word));
54 }
55 
GetOpcodeString(uint32_t word)56 std::string GetOpcodeString(uint32_t word) {
57   return spvOpcodeString(static_cast<SpvOp>(word));
58 }
59 
GetCapabilityString(uint32_t word)60 std::string GetCapabilityString(uint32_t word) {
61   return CapabilityToString(static_cast<SpvCapability>(word));
62 }
63 
64 template <class T>
KeyIsLabel(T key)65 std::string KeyIsLabel(T key) {
66   std::stringstream ss;
67   ss << key;
68   return ss.str();
69 }
70 
71 template <class Key>
GetRecall(const std::unordered_map<Key,uint32_t> & hist,uint64_t total)72 std::unordered_map<Key, double> GetRecall(
73     const std::unordered_map<Key, uint32_t>& hist, uint64_t total) {
74   std::unordered_map<Key, double> freq;
75   for (const auto& pair : hist) {
76     const double frequency =
77         static_cast<double>(pair.second) / static_cast<double>(total);
78     freq.emplace(pair.first, frequency);
79   }
80   return freq;
81 }
82 
83 template <class Key>
GetPrevalence(const std::unordered_map<Key,uint32_t> & hist)84 std::unordered_map<Key, double> GetPrevalence(
85     const std::unordered_map<Key, uint32_t>& hist) {
86   uint64_t total = 0;
87   for (const auto& pair : hist) {
88     total += pair.second;
89   }
90 
91   return GetRecall(hist, total);
92 }
93 
94 // Writes |freq| to |out| sorted by frequency in the following format:
95 // LABEL3 70%
96 // LABEL1 20%
97 // LABEL2 10%
98 // |label_from_key| is used to convert |Key| to label.
99 template <class Key>
WriteFreq(std::ostream & out,const std::unordered_map<Key,double> & freq,std::string (* label_from_key)(Key))100 void WriteFreq(std::ostream& out, const std::unordered_map<Key, double>& freq,
101                std::string (*label_from_key)(Key)) {
102   std::vector<std::pair<Key, double>> sorted_freq(freq.begin(), freq.end());
103   std::sort(sorted_freq.begin(), sorted_freq.end(),
104             [](const std::pair<Key, double>& left,
105                const std::pair<Key, double>& right) {
106               return left.second > right.second;
107             });
108 
109   for (const auto& pair : sorted_freq) {
110     if (pair.second < 0.001) break;
111     out << label_from_key(pair.first) << " " << pair.second * 100.0 << "%"
112         << std::endl;
113   }
114 }
115 
116 }  // namespace
117 
StatsAnalyzer(const SpirvStats & stats)118 StatsAnalyzer::StatsAnalyzer(const SpirvStats& stats) : stats_(stats) {
119   num_modules_ = 0;
120   for (const auto& pair : stats_.version_hist) {
121     num_modules_ += pair.second;
122   }
123 
124   version_freq_ = GetRecall(stats_.version_hist, num_modules_);
125   generator_freq_ = GetRecall(stats_.generator_hist, num_modules_);
126   capability_freq_ = GetRecall(stats_.capability_hist, num_modules_);
127   extension_freq_ = GetRecall(stats_.extension_hist, num_modules_);
128   opcode_freq_ = GetPrevalence(stats_.opcode_hist);
129 }
130 
WriteVersion(std::ostream & out)131 void StatsAnalyzer::WriteVersion(std::ostream& out) {
132   WriteFreq(out, version_freq_, GetVersionString);
133 }
134 
WriteGenerator(std::ostream & out)135 void StatsAnalyzer::WriteGenerator(std::ostream& out) {
136   WriteFreq(out, generator_freq_, GetGeneratorString);
137 }
138 
WriteCapability(std::ostream & out)139 void StatsAnalyzer::WriteCapability(std::ostream& out) {
140   WriteFreq(out, capability_freq_, GetCapabilityString);
141 }
142 
WriteExtension(std::ostream & out)143 void StatsAnalyzer::WriteExtension(std::ostream& out) {
144   WriteFreq(out, extension_freq_, KeyIsLabel);
145 }
146 
WriteOpcode(std::ostream & out)147 void StatsAnalyzer::WriteOpcode(std::ostream& out) {
148   out << "Total unique opcodes used: " << opcode_freq_.size() << std::endl;
149   WriteFreq(out, opcode_freq_, GetOpcodeString);
150 }
151 
WriteConstantLiterals(std::ostream & out)152 void StatsAnalyzer::WriteConstantLiterals(std::ostream& out) {
153   out << "Constant literals" << std::endl;
154 
155   out << "Float 32" << std::endl;
156   WriteFreq(out, GetPrevalence(stats_.f32_constant_hist), KeyIsLabel);
157 
158   out << std::endl << "Float 64" << std::endl;
159   WriteFreq(out, GetPrevalence(stats_.f64_constant_hist), KeyIsLabel);
160 
161   out << std::endl << "Unsigned int 16" << std::endl;
162   WriteFreq(out, GetPrevalence(stats_.u16_constant_hist), KeyIsLabel);
163 
164   out << std::endl << "Signed int 16" << std::endl;
165   WriteFreq(out, GetPrevalence(stats_.s16_constant_hist), KeyIsLabel);
166 
167   out << std::endl << "Unsigned int 32" << std::endl;
168   WriteFreq(out, GetPrevalence(stats_.u32_constant_hist), KeyIsLabel);
169 
170   out << std::endl << "Signed int 32" << std::endl;
171   WriteFreq(out, GetPrevalence(stats_.s32_constant_hist), KeyIsLabel);
172 
173   out << std::endl << "Unsigned int 64" << std::endl;
174   WriteFreq(out, GetPrevalence(stats_.u64_constant_hist), KeyIsLabel);
175 
176   out << std::endl << "Signed int 64" << std::endl;
177   WriteFreq(out, GetPrevalence(stats_.s64_constant_hist), KeyIsLabel);
178 }
179 
WriteOpcodeMarkov(std::ostream & out)180 void StatsAnalyzer::WriteOpcodeMarkov(std::ostream& out) {
181   if (stats_.opcode_markov_hist.empty()) return;
182 
183   const std::unordered_map<uint32_t, std::unordered_map<uint32_t, uint32_t>>&
184       cue_to_hist = stats_.opcode_markov_hist[0];
185 
186   // Sort by prevalence of the opcodes in opcode_freq_ (descending).
187   std::vector<std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>>
188       sorted_cue_to_hist(cue_to_hist.begin(), cue_to_hist.end());
189   std::sort(
190       sorted_cue_to_hist.begin(), sorted_cue_to_hist.end(),
191       [this](const std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>&
192                  left,
193              const std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>&
194                  right) {
195         const double lf = opcode_freq_[left.first];
196         const double rf = opcode_freq_[right.first];
197         if (lf == rf) return right.first > left.first;
198         return lf > rf;
199       });
200 
201   for (const auto& kv : sorted_cue_to_hist) {
202     const uint32_t cue = kv.first;
203     const double kFrequentEnoughToAnalyze = 0.0001;
204     if (opcode_freq_[cue] < kFrequentEnoughToAnalyze) continue;
205 
206     const std::unordered_map<uint32_t, uint32_t>& hist = kv.second;
207 
208     uint32_t total = 0;
209     for (const auto& pair : hist) {
210       total += pair.second;
211     }
212 
213     std::vector<std::pair<uint32_t, uint32_t>> sorted_hist(hist.begin(),
214                                                            hist.end());
215     std::sort(sorted_hist.begin(), sorted_hist.end(),
216               [](const std::pair<uint32_t, uint32_t>& left,
217                  const std::pair<uint32_t, uint32_t>& right) {
218                 if (left.second == right.second)
219                   return right.first > left.first;
220                 return left.second > right.second;
221               });
222 
223     for (const auto& pair : sorted_hist) {
224       const double prior = opcode_freq_[pair.first];
225       const double posterior =
226           static_cast<double>(pair.second) / static_cast<double>(total);
227       out << GetOpcodeString(cue) << " -> " << GetOpcodeString(pair.first)
228           << " " << posterior * 100 << "% (base rate " << prior * 100
229           << "%, pair occurrences " << pair.second << ")" << std::endl;
230     }
231   }
232 }
233 
234 }  // namespace stats
235 }  // namespace spvtools
236