1 // Copyright (c) 2018 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef SOURCE_COMP_MARKV_CODEC_H_
16 #define SOURCE_COMP_MARKV_CODEC_H_
17 
18 #include <list>
19 #include <map>
20 #include <memory>
21 #include <vector>
22 
23 #include "source/assembly_grammar.h"
24 #include "source/comp/huffman_codec.h"
25 #include "source/comp/markv_model.h"
26 #include "source/comp/move_to_front.h"
27 #include "source/diagnostic.h"
28 #include "source/id_descriptor.h"
29 
30 #include "source/val/instruction.h"
31 
32 // Base class for MARK-V encoder and decoder. Contains common functionality
33 // such as:
34 // - Validator connection and validation state.
35 // - SPIR-V grammar and helper functions.
36 
37 namespace spvtools {
38 namespace comp {
39 
40 class MarkvLogger;
41 
42 // Handles for move-to-front sequences. Enums which end with "Begin" define
43 // handle spaces which start at that value and span 16 or 32 bit wide.
44 enum : uint64_t {
45   kMtfNone = 0,
46   // All ids.
47   kMtfAll,
48   // All forward declared ids.
49   kMtfForwardDeclared,
50   // All type ids except for generated by OpTypeFunction.
51   kMtfTypeNonFunction,
52   // All labels.
53   kMtfLabel,
54   // All ids created by instructions which had type_id.
55   kMtfObject,
56   // All types generated by OpTypeFloat, OpTypeInt, OpTypeBool.
57   kMtfTypeScalar,
58   // All composite types.
59   kMtfTypeComposite,
60   // Boolean type or any vector type of it.
61   kMtfTypeBoolScalarOrVector,
62   // All float types or any vector floats type.
63   kMtfTypeFloatScalarOrVector,
64   // All int types or any vector int type.
65   kMtfTypeIntScalarOrVector,
66   // All types declared as return types in OpTypeFunction.
67   kMtfTypeReturnedByFunction,
68   // All composite objects.
69   kMtfComposite,
70   // All bool objects or vectors of bools.
71   kMtfBoolScalarOrVector,
72   // All float objects or vectors of float.
73   kMtfFloatScalarOrVector,
74   // All int objects or vectors of int.
75   kMtfIntScalarOrVector,
76   // All pointer types which point to composited.
77   kMtfTypePointerToComposite,
78   // Used by EncodeMtfRankHuffman.
79   kMtfGenericNonZeroRank,
80   // Handle space for ids of specific type.
81   kMtfIdOfTypeBegin = 0x10000,
82   // Handle space for ids generated by specific opcode.
83   kMtfIdGeneratedByOpcode = 0x20000,
84   // Handle space for ids of objects with type generated by specific opcode.
85   kMtfIdWithTypeGeneratedByOpcodeBegin = 0x30000,
86   // All vectors of specific component type.
87   kMtfVectorOfComponentTypeBegin = 0x40000,
88   // All vector types of specific size.
89   kMtfTypeVectorOfSizeBegin = 0x50000,
90   // All pointer types to specific type.
91   kMtfPointerToTypeBegin = 0x60000,
92   // All function types which return specific type.
93   kMtfFunctionTypeWithReturnTypeBegin = 0x70000,
94   // All function objects which return specific type.
95   kMtfFunctionWithReturnTypeBegin = 0x80000,
96   // Short id descriptor space (max 16-bit).
97   kMtfShortIdDescriptorSpaceBegin = 0x90000,
98   // Long id descriptor space (32-bit).
99   kMtfLongIdDescriptorSpaceBegin = 0x100000000,
100 };
101 
102 class MarkvCodec {
103  public:
104   static const uint32_t kMarkvMagicNumber;
105 
106   // Mtf ranks smaller than this are encoded with Huffman coding.
107   static const uint32_t kMtfSmallestRankEncodedByValue;
108 
109   // Signals that the mtf rank is too large to be encoded with Huffman.
110   static const uint32_t kMtfRankEncodedByValueSignal;
111 
112   static const uint32_t kShortDescriptorNumBits;
113 
114   static const size_t kByteBreakAfterInstIfLessThanUntilNextByte;
115 
116   static uint32_t GetMarkvVersion();
117 
118   virtual ~MarkvCodec();
119 
120  protected:
121   struct MarkvHeader {
122     MarkvHeader();
123 
124     uint32_t magic_number;
125     uint32_t markv_version;
126     // Magic number to identify or verify MarkvModel used for encoding.
127     uint32_t markv_model = 0;
128     uint32_t markv_length_in_bits = 0;
129     uint32_t spirv_version = 0;
130     uint32_t spirv_generator = 0;
131   };
132 
133   // |model| is owned by the caller, must be not null and valid during the
134   // lifetime of the codec.
135   MarkvCodec(spv_const_context context, spv_validator_options validator_options,
136              const MarkvModel* model);
137 
138   // Returns instruction which created |id| or nullptr if such instruction was
139   // not registered.
FindDef(uint32_t id)140   const val::Instruction* FindDef(uint32_t id) const {
141     const auto it = id_to_def_instruction_.find(id);
142     if (it == id_to_def_instruction_.end()) return nullptr;
143     return it->second;
144   }
145 
146   size_t GetNumBitsToNextByte(size_t bit_pos) const;
147   bool OpcodeHasFixedNumberOfOperands(SpvOp opcode) const;
148 
149   // Returns type id of vector type component.
GetVectorComponentType(uint32_t vector_type_id)150   uint32_t GetVectorComponentType(uint32_t vector_type_id) const {
151     const val::Instruction* type_inst = FindDef(vector_type_id);
152     assert(type_inst);
153     assert(type_inst->opcode() == SpvOpTypeVector);
154 
155     const uint32_t component_type =
156         type_inst->word(type_inst->operands()[1].offset);
157     return component_type;
158   }
159 
160   // Returns mtf handle for ids of given type.
GetMtfIdOfType(uint32_t type_id)161   uint64_t GetMtfIdOfType(uint32_t type_id) const {
162     return kMtfIdOfTypeBegin + type_id;
163   }
164 
165   // Returns mtf handle for ids generated by given opcode.
GetMtfIdGeneratedByOpcode(SpvOp opcode)166   uint64_t GetMtfIdGeneratedByOpcode(SpvOp opcode) const {
167     return kMtfIdGeneratedByOpcode + opcode;
168   }
169 
170   // Returns mtf handle for ids of type generated by given opcode.
GetMtfIdWithTypeGeneratedByOpcode(SpvOp opcode)171   uint64_t GetMtfIdWithTypeGeneratedByOpcode(SpvOp opcode) const {
172     return kMtfIdWithTypeGeneratedByOpcodeBegin + opcode;
173   }
174 
175   // Returns mtf handle for vectors of specific component type.
GetMtfVectorOfComponentType(uint32_t type_id)176   uint64_t GetMtfVectorOfComponentType(uint32_t type_id) const {
177     return kMtfVectorOfComponentTypeBegin + type_id;
178   }
179 
180   // Returns mtf handle for vector type of specific size.
GetMtfTypeVectorOfSize(uint32_t size)181   uint64_t GetMtfTypeVectorOfSize(uint32_t size) const {
182     return kMtfTypeVectorOfSizeBegin + size;
183   }
184 
185   // Returns mtf handle for pointers to specific size.
GetMtfPointerToType(uint32_t type_id)186   uint64_t GetMtfPointerToType(uint32_t type_id) const {
187     return kMtfPointerToTypeBegin + type_id;
188   }
189 
190   // Returns mtf handle for function types with given return type.
GetMtfFunctionTypeWithReturnType(uint32_t type_id)191   uint64_t GetMtfFunctionTypeWithReturnType(uint32_t type_id) const {
192     return kMtfFunctionTypeWithReturnTypeBegin + type_id;
193   }
194 
195   // Returns mtf handle for functions with given return type.
GetMtfFunctionWithReturnType(uint32_t type_id)196   uint64_t GetMtfFunctionWithReturnType(uint32_t type_id) const {
197     return kMtfFunctionWithReturnTypeBegin + type_id;
198   }
199 
200   // Returns mtf handle for the given long id descriptor.
GetMtfLongIdDescriptor(uint32_t descriptor)201   uint64_t GetMtfLongIdDescriptor(uint32_t descriptor) const {
202     return kMtfLongIdDescriptorSpaceBegin + descriptor;
203   }
204 
205   // Returns mtf handle for the given short id descriptor.
GetMtfShortIdDescriptor(uint32_t descriptor)206   uint64_t GetMtfShortIdDescriptor(uint32_t descriptor) const {
207     return kMtfShortIdDescriptorSpaceBegin + descriptor;
208   }
209 
210   // Process data from the current instruction. This would update MTFs and
211   // other data containers.
212   void ProcessCurInstruction();
213 
214   // Returns move-to-front handle to be used for the current operand slot.
215   // Mtf handle is chosen based on a set of rules defined by SPIR-V grammar.
216   uint64_t GetRuleBasedMtf();
217 
218   // Returns words of the current instruction. Decoder has a different
219   // implementation and the array is valid only until the previously decoded
220   // word.
GetInstWords()221   virtual const uint32_t* GetInstWords() const { return inst_.words; }
222 
223   // Returns the opcode of the previous instruction.
GetPrevOpcode()224   SpvOp GetPrevOpcode() const {
225     if (instructions_.empty()) return SpvOpNop;
226 
227     return instructions_.back()->opcode();
228   }
229 
230   // Returns diagnostic stream, position index is set to instruction number.
Diag(spv_result_t error_code)231   DiagnosticStream Diag(spv_result_t error_code) const {
232     return DiagnosticStream({0, 0, instructions_.size()}, context_->consumer,
233                             "", error_code);
234   }
235 
236   // Returns current id bound.
GetIdBound()237   uint32_t GetIdBound() const { return id_bound_; }
238 
239   // Sets current id bound, expected to be no lower than the previous one.
SetIdBound(uint32_t id_bound)240   void SetIdBound(uint32_t id_bound) {
241     assert(id_bound >= id_bound_);
242     id_bound_ = id_bound;
243   }
244 
245   // Returns Huffman codec for ranks of the mtf with given |handle|.
246   // Different mtfs can use different rank distributions.
247   // May return nullptr if the codec doesn't exist.
GetMtfHuffmanCodec(uint64_t handle)248   const HuffmanCodec<uint32_t>* GetMtfHuffmanCodec(uint64_t handle) const {
249     const auto it = mtf_huffman_codecs_.find(handle);
250     if (it == mtf_huffman_codecs_.end()) return nullptr;
251     return it->second.get();
252   }
253 
254   // Promotes id in all move-to-front sequences if ids can be shared by multiple
255   // sequences.
PromoteIfNeeded(uint32_t id)256   void PromoteIfNeeded(uint32_t id) {
257     if (!model_->AnyDescriptorHasCodingScheme() &&
258         model_->id_fallback_strategy() ==
259             MarkvModel::IdFallbackStrategy::kShortDescriptor) {
260       // Move-to-front sequences do not share ids. Nothing to do.
261       return;
262     }
263     multi_mtf_.Promote(id);
264   }
265 
266   spv_validator_options validator_options_ = nullptr;
267   const AssemblyGrammar grammar_;
268   MarkvHeader header_;
269 
270   // MARK-V model, not owned.
271   const MarkvModel* model_ = nullptr;
272 
273   // Current instruction, current operand and current operand index.
274   spv_parsed_instruction_t inst_;
275   spv_parsed_operand_t operand_;
276   uint32_t operand_index_;
277 
278   // Maps a result ID to its type ID.  By convention:
279   //  - a result ID that is a type definition maps to itself.
280   //  - a result ID without a type maps to 0.  (E.g. for OpLabel)
281   std::unordered_map<uint32_t, uint32_t> id_to_type_id_;
282 
283   // Container for all move-to-front sequences.
284   MultiMoveToFront multi_mtf_;
285 
286   // Id of the current function or zero if outside of function.
287   uint32_t cur_function_id_ = 0;
288 
289   // Return type of the current function.
290   uint32_t cur_function_return_type_ = 0;
291 
292   // Remaining function parameter types. This container is filled on OpFunction,
293   // and drained on OpFunctionParameter.
294   std::list<uint32_t> remaining_function_parameter_types_;
295 
296   // List of ids local to the current function.
297   std::vector<uint32_t> ids_local_to_cur_function_;
298 
299   // List of instructions in the order they are given in the module.
300   std::vector<std::unique_ptr<const val::Instruction>> instructions_;
301 
302   // Container/computer for long (32-bit) id descriptors.
303   IdDescriptorCollection long_id_descriptors_;
304 
305   // Container/computer for short id descriptors.
306   // Short descriptors are stored in uint32_t, but their actual bit width is
307   // defined with kShortDescriptorNumBits.
308   // It doesn't seem logical to have a different computer for short id
309   // descriptors, since one could actually map/truncate long descriptors.
310   // But as short descriptors have collisions, the efficiency of
311   // compression depends on the collision pattern, and short descriptors
312   // produced by function ShortHashU32Array have been empirically proven to
313   // produce better results.
314   IdDescriptorCollection short_id_descriptors_;
315 
316   // Huffman codecs for move-to-front ranks. The map key is mtf handle. Doesn't
317   // need to contain a different codec for every handle as most use one and the
318   // same.
319   std::map<uint64_t, std::unique_ptr<HuffmanCodec<uint32_t>>>
320       mtf_huffman_codecs_;
321 
322   // If not nullptr, codec will log comments on the compression process.
323   std::unique_ptr<MarkvLogger> logger_;
324 
325   spv_const_context context_ = nullptr;
326 
327  private:
328   // Maps result id to the instruction which defined it.
329   std::unordered_map<uint32_t, const val::Instruction*> id_to_def_instruction_;
330 
331   uint32_t id_bound_ = 1;
332 };
333 
334 }  // namespace comp
335 }  // namespace spvtools
336 
337 #endif  // SOURCE_COMP_MARKV_CODEC_H_
338