1 // Copyright (c) 2018 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef SOURCE_COMP_MARKV_MODEL_H_
16 #define SOURCE_COMP_MARKV_MODEL_H_
17 
18 #include <unordered_set>
19 
20 #include "source/comp/huffman_codec.h"
21 #include "source/latest_version_spirv_header.h"
22 #include "spirv-tools/libspirv.hpp"
23 
24 namespace spvtools {
25 namespace comp {
26 
27 // Base class for MARK-V models.
28 // The class contains encoding/decoding model with various constants and
29 // codecs used by the compression algorithm.
30 class MarkvModel {
31  public:
MarkvModel()32   MarkvModel()
33       : operand_chunk_lengths_(
34             static_cast<size_t>(SPV_OPERAND_TYPE_NUM_OPERAND_TYPES), 0) {
35     // Set default values.
36     operand_chunk_lengths_[SPV_OPERAND_TYPE_TYPE_ID] = 4;
37     operand_chunk_lengths_[SPV_OPERAND_TYPE_RESULT_ID] = 8;
38     operand_chunk_lengths_[SPV_OPERAND_TYPE_ID] = 8;
39     operand_chunk_lengths_[SPV_OPERAND_TYPE_SCOPE_ID] = 8;
40     operand_chunk_lengths_[SPV_OPERAND_TYPE_MEMORY_SEMANTICS_ID] = 8;
41     operand_chunk_lengths_[SPV_OPERAND_TYPE_LITERAL_INTEGER] = 6;
42     operand_chunk_lengths_[SPV_OPERAND_TYPE_OPTIONAL_LITERAL_INTEGER] = 6;
43     operand_chunk_lengths_[SPV_OPERAND_TYPE_CAPABILITY] = 6;
44     operand_chunk_lengths_[SPV_OPERAND_TYPE_SOURCE_LANGUAGE] = 3;
45     operand_chunk_lengths_[SPV_OPERAND_TYPE_EXECUTION_MODEL] = 3;
46     operand_chunk_lengths_[SPV_OPERAND_TYPE_ADDRESSING_MODEL] = 2;
47     operand_chunk_lengths_[SPV_OPERAND_TYPE_MEMORY_MODEL] = 2;
48     operand_chunk_lengths_[SPV_OPERAND_TYPE_EXECUTION_MODE] = 6;
49     operand_chunk_lengths_[SPV_OPERAND_TYPE_STORAGE_CLASS] = 4;
50     operand_chunk_lengths_[SPV_OPERAND_TYPE_DIMENSIONALITY] = 3;
51     operand_chunk_lengths_[SPV_OPERAND_TYPE_SAMPLER_ADDRESSING_MODE] = 3;
52     operand_chunk_lengths_[SPV_OPERAND_TYPE_SAMPLER_FILTER_MODE] = 2;
53     operand_chunk_lengths_[SPV_OPERAND_TYPE_SAMPLER_IMAGE_FORMAT] = 6;
54     operand_chunk_lengths_[SPV_OPERAND_TYPE_FP_ROUNDING_MODE] = 2;
55     operand_chunk_lengths_[SPV_OPERAND_TYPE_LINKAGE_TYPE] = 2;
56     operand_chunk_lengths_[SPV_OPERAND_TYPE_ACCESS_QUALIFIER] = 2;
57     operand_chunk_lengths_[SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER] = 2;
58     operand_chunk_lengths_[SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE] = 3;
59     operand_chunk_lengths_[SPV_OPERAND_TYPE_DECORATION] = 6;
60     operand_chunk_lengths_[SPV_OPERAND_TYPE_BUILT_IN] = 6;
61     operand_chunk_lengths_[SPV_OPERAND_TYPE_GROUP_OPERATION] = 2;
62     operand_chunk_lengths_[SPV_OPERAND_TYPE_KERNEL_ENQ_FLAGS] = 2;
63     operand_chunk_lengths_[SPV_OPERAND_TYPE_KERNEL_PROFILING_INFO] = 2;
64     operand_chunk_lengths_[SPV_OPERAND_TYPE_FP_FAST_MATH_MODE] = 4;
65     operand_chunk_lengths_[SPV_OPERAND_TYPE_FUNCTION_CONTROL] = 4;
66     operand_chunk_lengths_[SPV_OPERAND_TYPE_LOOP_CONTROL] = 4;
67     operand_chunk_lengths_[SPV_OPERAND_TYPE_IMAGE] = 4;
68     operand_chunk_lengths_[SPV_OPERAND_TYPE_OPTIONAL_IMAGE] = 4;
69     operand_chunk_lengths_[SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS] = 4;
70     operand_chunk_lengths_[SPV_OPERAND_TYPE_SELECTION_CONTROL] = 4;
71     operand_chunk_lengths_[SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER] = 6;
72     operand_chunk_lengths_[SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER] = 6;
73   }
74 
model_type()75   uint32_t model_type() const { return model_type_; }
model_version()76   uint32_t model_version() const { return model_version_; }
77 
opcode_chunk_length()78   uint32_t opcode_chunk_length() const { return opcode_chunk_length_; }
num_operands_chunk_length()79   uint32_t num_operands_chunk_length() const {
80     return num_operands_chunk_length_;
81   }
mtf_rank_chunk_length()82   uint32_t mtf_rank_chunk_length() const { return mtf_rank_chunk_length_; }
83 
u64_chunk_length()84   uint32_t u64_chunk_length() const { return u64_chunk_length_; }
s64_chunk_length()85   uint32_t s64_chunk_length() const { return s64_chunk_length_; }
s64_block_exponent()86   uint32_t s64_block_exponent() const { return s64_block_exponent_; }
87 
88   enum class IdFallbackStrategy {
89     kRuleBased = 0,
90     kShortDescriptor,
91   };
92 
id_fallback_strategy()93   IdFallbackStrategy id_fallback_strategy() const {
94     return id_fallback_strategy_;
95   }
96 
97   // Returns a codec for common opcode_and_num_operands words for the given
98   // previous opcode. May return nullptr if the codec doesn't exist.
GetOpcodeAndNumOperandsMarkovHuffmanCodec(uint32_t prev_opcode)99   const HuffmanCodec<uint64_t>* GetOpcodeAndNumOperandsMarkovHuffmanCodec(
100       uint32_t prev_opcode) const {
101     if (prev_opcode == SpvOpNop)
102       return opcode_and_num_operands_huffman_codec_.get();
103 
104     const auto it =
105         opcode_and_num_operands_markov_huffman_codecs_.find(prev_opcode);
106     if (it == opcode_and_num_operands_markov_huffman_codecs_.end())
107       return nullptr;
108     return it->second.get();
109   }
110 
111   // Returns a codec for common non-id words used for given operand slot.
112   // Operand slot is defined by the opcode and the operand index.
113   // May return nullptr if the codec doesn't exist.
GetNonIdWordHuffmanCodec(uint32_t opcode,uint32_t operand_index)114   const HuffmanCodec<uint64_t>* GetNonIdWordHuffmanCodec(
115       uint32_t opcode, uint32_t operand_index) const {
116     const auto it = non_id_word_huffman_codecs_.find(
117         std::pair<uint32_t, uint32_t>(opcode, operand_index));
118     if (it == non_id_word_huffman_codecs_.end()) return nullptr;
119     return it->second.get();
120   }
121 
122   // Returns a codec for common id descriptos used for given operand slot.
123   // Operand slot is defined by the opcode and the operand index.
124   // May return nullptr if the codec doesn't exist.
GetIdDescriptorHuffmanCodec(uint32_t opcode,uint32_t operand_index)125   const HuffmanCodec<uint64_t>* GetIdDescriptorHuffmanCodec(
126       uint32_t opcode, uint32_t operand_index) const {
127     const auto it = id_descriptor_huffman_codecs_.find(
128         std::pair<uint32_t, uint32_t>(opcode, operand_index));
129     if (it == id_descriptor_huffman_codecs_.end()) return nullptr;
130     return it->second.get();
131   }
132 
133   // Returns a codec for common strings used by the given opcode.
134   // Operand slot is defined by the opcode and the operand index.
135   // May return nullptr if the codec doesn't exist.
GetLiteralStringHuffmanCodec(uint32_t opcode)136   const HuffmanCodec<std::string>* GetLiteralStringHuffmanCodec(
137       uint32_t opcode) const {
138     const auto it = literal_string_huffman_codecs_.find(opcode);
139     if (it == literal_string_huffman_codecs_.end()) return nullptr;
140     return it->second.get();
141   }
142 
143   // Checks if |descriptor| has a coding scheme in any of
144   // id_descriptor_huffman_codecs_.
DescriptorHasCodingScheme(uint32_t descriptor)145   bool DescriptorHasCodingScheme(uint32_t descriptor) const {
146     return descriptors_with_coding_scheme_.count(descriptor);
147   }
148 
149   // Checks if any descriptor has a coding scheme.
AnyDescriptorHasCodingScheme()150   bool AnyDescriptorHasCodingScheme() const {
151     return !descriptors_with_coding_scheme_.empty();
152   }
153 
154   // Returns chunk length used for variable length encoding of spirv operand
155   // words.
GetOperandVariableWidthChunkLength(spv_operand_type_t type)156   uint32_t GetOperandVariableWidthChunkLength(spv_operand_type_t type) const {
157     return operand_chunk_lengths_.at(static_cast<size_t>(type));
158   }
159 
160   // Sets model type.
SetModelType(uint32_t in_model_type)161   void SetModelType(uint32_t in_model_type) { model_type_ = in_model_type; }
162 
163   // Sets model version.
SetModelVersion(uint32_t in_model_version)164   void SetModelVersion(uint32_t in_model_version) {
165     model_version_ = in_model_version;
166   }
167 
168   // Returns value used by Huffman codecs as a signal that a value is not in the
169   // coding table.
GetMarkvNoneOfTheAbove()170   static uint64_t GetMarkvNoneOfTheAbove() {
171     // Magic number.
172     return 1111111111111111111;
173   }
174 
175   MarkvModel(const MarkvModel&) = delete;
176   const MarkvModel& operator=(const MarkvModel&) = delete;
177 
178  protected:
179   // Huffman codec for base-rate of opcode_and_num_operands.
180   std::unique_ptr<HuffmanCodec<uint64_t>>
181       opcode_and_num_operands_huffman_codec_;
182 
183   // Huffman codecs for opcode_and_num_operands. The map key is previous opcode.
184   std::map<uint32_t, std::unique_ptr<HuffmanCodec<uint64_t>>>
185       opcode_and_num_operands_markov_huffman_codecs_;
186 
187   // Huffman codecs for non-id single-word operand values.
188   // The map key is pair <opcode, operand_index>.
189   std::map<std::pair<uint32_t, uint32_t>,
190            std::unique_ptr<HuffmanCodec<uint64_t>>>
191       non_id_word_huffman_codecs_;
192 
193   // Huffman codecs for id descriptors. The map key is pair
194   // <opcode, operand_index>.
195   std::map<std::pair<uint32_t, uint32_t>,
196            std::unique_ptr<HuffmanCodec<uint64_t>>>
197       id_descriptor_huffman_codecs_;
198 
199   // Set of all descriptors which have a coding scheme in any of
200   // id_descriptor_huffman_codecs_.
201   std::unordered_set<uint32_t> descriptors_with_coding_scheme_;
202 
203   // Huffman codecs for literal strings. The map key is the opcode of the
204   // current instruction. This assumes, that there is no more than one literal
205   // string operand per instruction, but would still work even if this is not
206   // the case. Names and debug information strings are not collected.
207   std::map<uint32_t, std::unique_ptr<HuffmanCodec<std::string>>>
208       literal_string_huffman_codecs_;
209 
210   // Chunk lengths used for variable width encoding of operands (index is
211   // spv_operand_type of the operand).
212   std::vector<uint32_t> operand_chunk_lengths_;
213 
214   uint32_t opcode_chunk_length_ = 7;
215   uint32_t num_operands_chunk_length_ = 3;
216   uint32_t mtf_rank_chunk_length_ = 5;
217 
218   uint32_t u64_chunk_length_ = 8;
219   uint32_t s64_chunk_length_ = 8;
220   uint32_t s64_block_exponent_ = 10;
221 
222   IdFallbackStrategy id_fallback_strategy_ =
223       IdFallbackStrategy::kShortDescriptor;
224 
225   uint32_t model_type_ = 0;
226   uint32_t model_version_ = 0;
227 };
228 
229 }  // namespace comp
230 }  // namespace spvtools
231 
232 #endif  // SOURCE_COMP_MARKV_MODEL_H_
233