1 // Copyright (c) 2017 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "tools/stats/stats_analyzer.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <iostream>
21 #include <map>
22 #include <sstream>
23 #include <string>
24 #include <unordered_map>
25 #include <unordered_set>
26 #include <utility>
27 #include <vector>
28
29 #include "source/comp/markv_model.h"
30 #include "source/enum_string_mapping.h"
31 #include "source/latest_version_spirv_header.h"
32 #include "source/opcode.h"
33 #include "source/operand.h"
34 #include "source/spirv_constant.h"
35
36 namespace spvtools {
37 namespace stats {
38 namespace {
39
40 // Signals that the value is not in the coding scheme and a fallback method
41 // needs to be used.
42 const uint64_t kMarkvNoneOfTheAbove =
43 comp::MarkvModel::GetMarkvNoneOfTheAbove();
44
GetVersionString(uint32_t word)45 std::string GetVersionString(uint32_t word) {
46 std::stringstream ss;
47 ss << "Version " << SPV_SPIRV_VERSION_MAJOR_PART(word) << "."
48 << SPV_SPIRV_VERSION_MINOR_PART(word);
49 return ss.str();
50 }
51
GetGeneratorString(uint32_t word)52 std::string GetGeneratorString(uint32_t word) {
53 return spvGeneratorStr(SPV_GENERATOR_TOOL_PART(word));
54 }
55
GetOpcodeString(uint32_t word)56 std::string GetOpcodeString(uint32_t word) {
57 return spvOpcodeString(static_cast<SpvOp>(word));
58 }
59
GetCapabilityString(uint32_t word)60 std::string GetCapabilityString(uint32_t word) {
61 return CapabilityToString(static_cast<SpvCapability>(word));
62 }
63
64 template <class T>
KeyIsLabel(T key)65 std::string KeyIsLabel(T key) {
66 std::stringstream ss;
67 ss << key;
68 return ss.str();
69 }
70
71 template <class Key>
GetRecall(const std::unordered_map<Key,uint32_t> & hist,uint64_t total)72 std::unordered_map<Key, double> GetRecall(
73 const std::unordered_map<Key, uint32_t>& hist, uint64_t total) {
74 std::unordered_map<Key, double> freq;
75 for (const auto& pair : hist) {
76 const double frequency =
77 static_cast<double>(pair.second) / static_cast<double>(total);
78 freq.emplace(pair.first, frequency);
79 }
80 return freq;
81 }
82
83 template <class Key>
GetPrevalence(const std::unordered_map<Key,uint32_t> & hist)84 std::unordered_map<Key, double> GetPrevalence(
85 const std::unordered_map<Key, uint32_t>& hist) {
86 uint64_t total = 0;
87 for (const auto& pair : hist) {
88 total += pair.second;
89 }
90
91 return GetRecall(hist, total);
92 }
93
94 // Writes |freq| to |out| sorted by frequency in the following format:
95 // LABEL3 70%
96 // LABEL1 20%
97 // LABEL2 10%
98 // |label_from_key| is used to convert |Key| to label.
99 template <class Key>
WriteFreq(std::ostream & out,const std::unordered_map<Key,double> & freq,std::string (* label_from_key)(Key))100 void WriteFreq(std::ostream& out, const std::unordered_map<Key, double>& freq,
101 std::string (*label_from_key)(Key)) {
102 std::vector<std::pair<Key, double>> sorted_freq(freq.begin(), freq.end());
103 std::sort(sorted_freq.begin(), sorted_freq.end(),
104 [](const std::pair<Key, double>& left,
105 const std::pair<Key, double>& right) {
106 return left.second > right.second;
107 });
108
109 for (const auto& pair : sorted_freq) {
110 if (pair.second < 0.001) break;
111 out << label_from_key(pair.first) << " " << pair.second * 100.0 << "%"
112 << std::endl;
113 }
114 }
115
116 } // namespace
117
StatsAnalyzer(const SpirvStats & stats)118 StatsAnalyzer::StatsAnalyzer(const SpirvStats& stats) : stats_(stats) {
119 num_modules_ = 0;
120 for (const auto& pair : stats_.version_hist) {
121 num_modules_ += pair.second;
122 }
123
124 version_freq_ = GetRecall(stats_.version_hist, num_modules_);
125 generator_freq_ = GetRecall(stats_.generator_hist, num_modules_);
126 capability_freq_ = GetRecall(stats_.capability_hist, num_modules_);
127 extension_freq_ = GetRecall(stats_.extension_hist, num_modules_);
128 opcode_freq_ = GetPrevalence(stats_.opcode_hist);
129 }
130
WriteVersion(std::ostream & out)131 void StatsAnalyzer::WriteVersion(std::ostream& out) {
132 WriteFreq(out, version_freq_, GetVersionString);
133 }
134
WriteGenerator(std::ostream & out)135 void StatsAnalyzer::WriteGenerator(std::ostream& out) {
136 WriteFreq(out, generator_freq_, GetGeneratorString);
137 }
138
WriteCapability(std::ostream & out)139 void StatsAnalyzer::WriteCapability(std::ostream& out) {
140 WriteFreq(out, capability_freq_, GetCapabilityString);
141 }
142
WriteExtension(std::ostream & out)143 void StatsAnalyzer::WriteExtension(std::ostream& out) {
144 WriteFreq(out, extension_freq_, KeyIsLabel);
145 }
146
WriteOpcode(std::ostream & out)147 void StatsAnalyzer::WriteOpcode(std::ostream& out) {
148 out << "Total unique opcodes used: " << opcode_freq_.size() << std::endl;
149 WriteFreq(out, opcode_freq_, GetOpcodeString);
150 }
151
WriteConstantLiterals(std::ostream & out)152 void StatsAnalyzer::WriteConstantLiterals(std::ostream& out) {
153 out << "Constant literals" << std::endl;
154
155 out << "Float 32" << std::endl;
156 WriteFreq(out, GetPrevalence(stats_.f32_constant_hist), KeyIsLabel);
157
158 out << std::endl << "Float 64" << std::endl;
159 WriteFreq(out, GetPrevalence(stats_.f64_constant_hist), KeyIsLabel);
160
161 out << std::endl << "Unsigned int 16" << std::endl;
162 WriteFreq(out, GetPrevalence(stats_.u16_constant_hist), KeyIsLabel);
163
164 out << std::endl << "Signed int 16" << std::endl;
165 WriteFreq(out, GetPrevalence(stats_.s16_constant_hist), KeyIsLabel);
166
167 out << std::endl << "Unsigned int 32" << std::endl;
168 WriteFreq(out, GetPrevalence(stats_.u32_constant_hist), KeyIsLabel);
169
170 out << std::endl << "Signed int 32" << std::endl;
171 WriteFreq(out, GetPrevalence(stats_.s32_constant_hist), KeyIsLabel);
172
173 out << std::endl << "Unsigned int 64" << std::endl;
174 WriteFreq(out, GetPrevalence(stats_.u64_constant_hist), KeyIsLabel);
175
176 out << std::endl << "Signed int 64" << std::endl;
177 WriteFreq(out, GetPrevalence(stats_.s64_constant_hist), KeyIsLabel);
178 }
179
WriteOpcodeMarkov(std::ostream & out)180 void StatsAnalyzer::WriteOpcodeMarkov(std::ostream& out) {
181 if (stats_.opcode_markov_hist.empty()) return;
182
183 const std::unordered_map<uint32_t, std::unordered_map<uint32_t, uint32_t>>&
184 cue_to_hist = stats_.opcode_markov_hist[0];
185
186 // Sort by prevalence of the opcodes in opcode_freq_ (descending).
187 std::vector<std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>>
188 sorted_cue_to_hist(cue_to_hist.begin(), cue_to_hist.end());
189 std::sort(
190 sorted_cue_to_hist.begin(), sorted_cue_to_hist.end(),
191 [this](const std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>&
192 left,
193 const std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>&
194 right) {
195 const double lf = opcode_freq_[left.first];
196 const double rf = opcode_freq_[right.first];
197 if (lf == rf) return right.first > left.first;
198 return lf > rf;
199 });
200
201 for (const auto& kv : sorted_cue_to_hist) {
202 const uint32_t cue = kv.first;
203 const double kFrequentEnoughToAnalyze = 0.0001;
204 if (opcode_freq_[cue] < kFrequentEnoughToAnalyze) continue;
205
206 const std::unordered_map<uint32_t, uint32_t>& hist = kv.second;
207
208 uint32_t total = 0;
209 for (const auto& pair : hist) {
210 total += pair.second;
211 }
212
213 std::vector<std::pair<uint32_t, uint32_t>> sorted_hist(hist.begin(),
214 hist.end());
215 std::sort(sorted_hist.begin(), sorted_hist.end(),
216 [](const std::pair<uint32_t, uint32_t>& left,
217 const std::pair<uint32_t, uint32_t>& right) {
218 if (left.second == right.second)
219 return right.first > left.first;
220 return left.second > right.second;
221 });
222
223 for (const auto& pair : sorted_hist) {
224 const double prior = opcode_freq_[pair.first];
225 const double posterior =
226 static_cast<double>(pair.second) / static_cast<double>(total);
227 out << GetOpcodeString(cue) << " -> " << GetOpcodeString(pair.first)
228 << " " << posterior * 100 << "% (base rate " << prior * 100
229 << "%, pair occurrences " << pair.second << ")" << std::endl;
230 }
231 }
232 }
233
234 } // namespace stats
235 } // namespace spvtools
236