1 // Copyright (c) 2018 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef SOURCE_COMP_MARKV_CODEC_H_ 16 #define SOURCE_COMP_MARKV_CODEC_H_ 17 18 #include <list> 19 #include <map> 20 #include <memory> 21 #include <vector> 22 23 #include "source/assembly_grammar.h" 24 #include "source/comp/huffman_codec.h" 25 #include "source/comp/markv_model.h" 26 #include "source/comp/move_to_front.h" 27 #include "source/diagnostic.h" 28 #include "source/id_descriptor.h" 29 30 #include "source/val/instruction.h" 31 32 // Base class for MARK-V encoder and decoder. Contains common functionality 33 // such as: 34 // - Validator connection and validation state. 35 // - SPIR-V grammar and helper functions. 36 37 namespace spvtools { 38 namespace comp { 39 40 class MarkvLogger; 41 42 // Handles for move-to-front sequences. Enums which end with "Begin" define 43 // handle spaces which start at that value and span 16 or 32 bit wide. 44 enum : uint64_t { 45 kMtfNone = 0, 46 // All ids. 47 kMtfAll, 48 // All forward declared ids. 49 kMtfForwardDeclared, 50 // All type ids except for generated by OpTypeFunction. 51 kMtfTypeNonFunction, 52 // All labels. 53 kMtfLabel, 54 // All ids created by instructions which had type_id. 55 kMtfObject, 56 // All types generated by OpTypeFloat, OpTypeInt, OpTypeBool. 57 kMtfTypeScalar, 58 // All composite types. 59 kMtfTypeComposite, 60 // Boolean type or any vector type of it. 61 kMtfTypeBoolScalarOrVector, 62 // All float types or any vector floats type. 63 kMtfTypeFloatScalarOrVector, 64 // All int types or any vector int type. 65 kMtfTypeIntScalarOrVector, 66 // All types declared as return types in OpTypeFunction. 67 kMtfTypeReturnedByFunction, 68 // All composite objects. 69 kMtfComposite, 70 // All bool objects or vectors of bools. 71 kMtfBoolScalarOrVector, 72 // All float objects or vectors of float. 73 kMtfFloatScalarOrVector, 74 // All int objects or vectors of int. 75 kMtfIntScalarOrVector, 76 // All pointer types which point to composited. 77 kMtfTypePointerToComposite, 78 // Used by EncodeMtfRankHuffman. 79 kMtfGenericNonZeroRank, 80 // Handle space for ids of specific type. 81 kMtfIdOfTypeBegin = 0x10000, 82 // Handle space for ids generated by specific opcode. 83 kMtfIdGeneratedByOpcode = 0x20000, 84 // Handle space for ids of objects with type generated by specific opcode. 85 kMtfIdWithTypeGeneratedByOpcodeBegin = 0x30000, 86 // All vectors of specific component type. 87 kMtfVectorOfComponentTypeBegin = 0x40000, 88 // All vector types of specific size. 89 kMtfTypeVectorOfSizeBegin = 0x50000, 90 // All pointer types to specific type. 91 kMtfPointerToTypeBegin = 0x60000, 92 // All function types which return specific type. 93 kMtfFunctionTypeWithReturnTypeBegin = 0x70000, 94 // All function objects which return specific type. 95 kMtfFunctionWithReturnTypeBegin = 0x80000, 96 // Short id descriptor space (max 16-bit). 97 kMtfShortIdDescriptorSpaceBegin = 0x90000, 98 // Long id descriptor space (32-bit). 99 kMtfLongIdDescriptorSpaceBegin = 0x100000000, 100 }; 101 102 class MarkvCodec { 103 public: 104 static const uint32_t kMarkvMagicNumber; 105 106 // Mtf ranks smaller than this are encoded with Huffman coding. 107 static const uint32_t kMtfSmallestRankEncodedByValue; 108 109 // Signals that the mtf rank is too large to be encoded with Huffman. 110 static const uint32_t kMtfRankEncodedByValueSignal; 111 112 static const uint32_t kShortDescriptorNumBits; 113 114 static const size_t kByteBreakAfterInstIfLessThanUntilNextByte; 115 116 static uint32_t GetMarkvVersion(); 117 118 virtual ~MarkvCodec(); 119 120 protected: 121 struct MarkvHeader { 122 MarkvHeader(); 123 124 uint32_t magic_number; 125 uint32_t markv_version; 126 // Magic number to identify or verify MarkvModel used for encoding. 127 uint32_t markv_model = 0; 128 uint32_t markv_length_in_bits = 0; 129 uint32_t spirv_version = 0; 130 uint32_t spirv_generator = 0; 131 }; 132 133 // |model| is owned by the caller, must be not null and valid during the 134 // lifetime of the codec. 135 MarkvCodec(spv_const_context context, spv_validator_options validator_options, 136 const MarkvModel* model); 137 138 // Returns instruction which created |id| or nullptr if such instruction was 139 // not registered. FindDef(uint32_t id)140 const val::Instruction* FindDef(uint32_t id) const { 141 const auto it = id_to_def_instruction_.find(id); 142 if (it == id_to_def_instruction_.end()) return nullptr; 143 return it->second; 144 } 145 146 size_t GetNumBitsToNextByte(size_t bit_pos) const; 147 bool OpcodeHasFixedNumberOfOperands(SpvOp opcode) const; 148 149 // Returns type id of vector type component. GetVectorComponentType(uint32_t vector_type_id)150 uint32_t GetVectorComponentType(uint32_t vector_type_id) const { 151 const val::Instruction* type_inst = FindDef(vector_type_id); 152 assert(type_inst); 153 assert(type_inst->opcode() == SpvOpTypeVector); 154 155 const uint32_t component_type = 156 type_inst->word(type_inst->operands()[1].offset); 157 return component_type; 158 } 159 160 // Returns mtf handle for ids of given type. GetMtfIdOfType(uint32_t type_id)161 uint64_t GetMtfIdOfType(uint32_t type_id) const { 162 return kMtfIdOfTypeBegin + type_id; 163 } 164 165 // Returns mtf handle for ids generated by given opcode. GetMtfIdGeneratedByOpcode(SpvOp opcode)166 uint64_t GetMtfIdGeneratedByOpcode(SpvOp opcode) const { 167 return kMtfIdGeneratedByOpcode + opcode; 168 } 169 170 // Returns mtf handle for ids of type generated by given opcode. GetMtfIdWithTypeGeneratedByOpcode(SpvOp opcode)171 uint64_t GetMtfIdWithTypeGeneratedByOpcode(SpvOp opcode) const { 172 return kMtfIdWithTypeGeneratedByOpcodeBegin + opcode; 173 } 174 175 // Returns mtf handle for vectors of specific component type. GetMtfVectorOfComponentType(uint32_t type_id)176 uint64_t GetMtfVectorOfComponentType(uint32_t type_id) const { 177 return kMtfVectorOfComponentTypeBegin + type_id; 178 } 179 180 // Returns mtf handle for vector type of specific size. GetMtfTypeVectorOfSize(uint32_t size)181 uint64_t GetMtfTypeVectorOfSize(uint32_t size) const { 182 return kMtfTypeVectorOfSizeBegin + size; 183 } 184 185 // Returns mtf handle for pointers to specific size. GetMtfPointerToType(uint32_t type_id)186 uint64_t GetMtfPointerToType(uint32_t type_id) const { 187 return kMtfPointerToTypeBegin + type_id; 188 } 189 190 // Returns mtf handle for function types with given return type. GetMtfFunctionTypeWithReturnType(uint32_t type_id)191 uint64_t GetMtfFunctionTypeWithReturnType(uint32_t type_id) const { 192 return kMtfFunctionTypeWithReturnTypeBegin + type_id; 193 } 194 195 // Returns mtf handle for functions with given return type. GetMtfFunctionWithReturnType(uint32_t type_id)196 uint64_t GetMtfFunctionWithReturnType(uint32_t type_id) const { 197 return kMtfFunctionWithReturnTypeBegin + type_id; 198 } 199 200 // Returns mtf handle for the given long id descriptor. GetMtfLongIdDescriptor(uint32_t descriptor)201 uint64_t GetMtfLongIdDescriptor(uint32_t descriptor) const { 202 return kMtfLongIdDescriptorSpaceBegin + descriptor; 203 } 204 205 // Returns mtf handle for the given short id descriptor. GetMtfShortIdDescriptor(uint32_t descriptor)206 uint64_t GetMtfShortIdDescriptor(uint32_t descriptor) const { 207 return kMtfShortIdDescriptorSpaceBegin + descriptor; 208 } 209 210 // Process data from the current instruction. This would update MTFs and 211 // other data containers. 212 void ProcessCurInstruction(); 213 214 // Returns move-to-front handle to be used for the current operand slot. 215 // Mtf handle is chosen based on a set of rules defined by SPIR-V grammar. 216 uint64_t GetRuleBasedMtf(); 217 218 // Returns words of the current instruction. Decoder has a different 219 // implementation and the array is valid only until the previously decoded 220 // word. GetInstWords()221 virtual const uint32_t* GetInstWords() const { return inst_.words; } 222 223 // Returns the opcode of the previous instruction. GetPrevOpcode()224 SpvOp GetPrevOpcode() const { 225 if (instructions_.empty()) return SpvOpNop; 226 227 return instructions_.back()->opcode(); 228 } 229 230 // Returns diagnostic stream, position index is set to instruction number. Diag(spv_result_t error_code)231 DiagnosticStream Diag(spv_result_t error_code) const { 232 return DiagnosticStream({0, 0, instructions_.size()}, context_->consumer, 233 "", error_code); 234 } 235 236 // Returns current id bound. GetIdBound()237 uint32_t GetIdBound() const { return id_bound_; } 238 239 // Sets current id bound, expected to be no lower than the previous one. SetIdBound(uint32_t id_bound)240 void SetIdBound(uint32_t id_bound) { 241 assert(id_bound >= id_bound_); 242 id_bound_ = id_bound; 243 } 244 245 // Returns Huffman codec for ranks of the mtf with given |handle|. 246 // Different mtfs can use different rank distributions. 247 // May return nullptr if the codec doesn't exist. GetMtfHuffmanCodec(uint64_t handle)248 const HuffmanCodec<uint32_t>* GetMtfHuffmanCodec(uint64_t handle) const { 249 const auto it = mtf_huffman_codecs_.find(handle); 250 if (it == mtf_huffman_codecs_.end()) return nullptr; 251 return it->second.get(); 252 } 253 254 // Promotes id in all move-to-front sequences if ids can be shared by multiple 255 // sequences. PromoteIfNeeded(uint32_t id)256 void PromoteIfNeeded(uint32_t id) { 257 if (!model_->AnyDescriptorHasCodingScheme() && 258 model_->id_fallback_strategy() == 259 MarkvModel::IdFallbackStrategy::kShortDescriptor) { 260 // Move-to-front sequences do not share ids. Nothing to do. 261 return; 262 } 263 multi_mtf_.Promote(id); 264 } 265 266 spv_validator_options validator_options_ = nullptr; 267 const AssemblyGrammar grammar_; 268 MarkvHeader header_; 269 270 // MARK-V model, not owned. 271 const MarkvModel* model_ = nullptr; 272 273 // Current instruction, current operand and current operand index. 274 spv_parsed_instruction_t inst_; 275 spv_parsed_operand_t operand_; 276 uint32_t operand_index_; 277 278 // Maps a result ID to its type ID. By convention: 279 // - a result ID that is a type definition maps to itself. 280 // - a result ID without a type maps to 0. (E.g. for OpLabel) 281 std::unordered_map<uint32_t, uint32_t> id_to_type_id_; 282 283 // Container for all move-to-front sequences. 284 MultiMoveToFront multi_mtf_; 285 286 // Id of the current function or zero if outside of function. 287 uint32_t cur_function_id_ = 0; 288 289 // Return type of the current function. 290 uint32_t cur_function_return_type_ = 0; 291 292 // Remaining function parameter types. This container is filled on OpFunction, 293 // and drained on OpFunctionParameter. 294 std::list<uint32_t> remaining_function_parameter_types_; 295 296 // List of ids local to the current function. 297 std::vector<uint32_t> ids_local_to_cur_function_; 298 299 // List of instructions in the order they are given in the module. 300 std::vector<std::unique_ptr<const val::Instruction>> instructions_; 301 302 // Container/computer for long (32-bit) id descriptors. 303 IdDescriptorCollection long_id_descriptors_; 304 305 // Container/computer for short id descriptors. 306 // Short descriptors are stored in uint32_t, but their actual bit width is 307 // defined with kShortDescriptorNumBits. 308 // It doesn't seem logical to have a different computer for short id 309 // descriptors, since one could actually map/truncate long descriptors. 310 // But as short descriptors have collisions, the efficiency of 311 // compression depends on the collision pattern, and short descriptors 312 // produced by function ShortHashU32Array have been empirically proven to 313 // produce better results. 314 IdDescriptorCollection short_id_descriptors_; 315 316 // Huffman codecs for move-to-front ranks. The map key is mtf handle. Doesn't 317 // need to contain a different codec for every handle as most use one and the 318 // same. 319 std::map<uint64_t, std::unique_ptr<HuffmanCodec<uint32_t>>> 320 mtf_huffman_codecs_; 321 322 // If not nullptr, codec will log comments on the compression process. 323 std::unique_ptr<MarkvLogger> logger_; 324 325 spv_const_context context_ = nullptr; 326 327 private: 328 // Maps result id to the instruction which defined it. 329 std::unordered_map<uint32_t, const val::Instruction*> id_to_def_instruction_; 330 331 uint32_t id_bound_ = 1; 332 }; 333 334 } // namespace comp 335 } // namespace spvtools 336 337 #endif // SOURCE_COMP_MARKV_CODEC_H_ 338