1 // Copyright 2017 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_ASMJS_ASM_PARSER_H_ 6 #define V8_ASMJS_ASM_PARSER_H_ 7 8 #include <memory> 9 #include <string> 10 11 #include "src/asmjs/asm-scanner.h" 12 #include "src/asmjs/asm-types.h" 13 #include "src/wasm/wasm-module-builder.h" 14 #include "src/zone/zone-containers.h" 15 16 namespace v8 { 17 namespace internal { 18 19 class Utf16CharacterStream; 20 21 namespace wasm { 22 23 // A custom parser + validator + wasm converter for asm.js: 24 // http://asmjs.org/spec/latest/ 25 // This parser intentionally avoids the portion of JavaScript parsing 26 // that are not required to determine if code is valid asm.js code. 27 // * It is mostly one pass. 28 // * It bails out on unexpected input. 29 // * It assumes strict ordering insofar as permitted by asm.js validation rules. 30 // * It relies on a custom scanner that provides de-duped identifiers in two 31 // scopes (local + module wide). 32 class AsmJsParser { 33 public: 34 // clang-format off 35 enum StandardMember { 36 kInfinity, 37 kNaN, 38 #define V(_unused1, name, _unused2, _unused3) kMath##name, 39 STDLIB_MATH_FUNCTION_LIST(V) 40 #undef V 41 #define V(name, _unused1) kMath##name, 42 STDLIB_MATH_VALUE_LIST(V) 43 #undef V 44 #define V(name, _unused1, _unused2, _unused3) k##name, 45 STDLIB_ARRAY_TYPE_LIST(V) 46 #undef V 47 }; 48 // clang-format on 49 50 typedef EnumSet<StandardMember, uint64_t> StdlibSet; 51 52 explicit AsmJsParser(Zone* zone, uintptr_t stack_limit, 53 Utf16CharacterStream* stream); 54 bool Run(); failure_message()55 const char* failure_message() const { return failure_message_; } failure_location()56 int failure_location() const { return failure_location_; } module_builder()57 WasmModuleBuilder* module_builder() { return module_builder_; } stdlib_uses()58 const StdlibSet* stdlib_uses() const { return &stdlib_uses_; } 59 60 private: 61 // clang-format off 62 enum class VarKind { 63 kUnused, 64 kLocal, 65 kGlobal, 66 kSpecial, 67 kFunction, 68 kTable, 69 kImportedFunction, 70 #define V(_unused0, Name, _unused1, _unused2) kMath##Name, 71 STDLIB_MATH_FUNCTION_LIST(V) 72 #undef V 73 #define V(Name, _unused1) kMath##Name, 74 STDLIB_MATH_VALUE_LIST(V) 75 #undef V 76 }; 77 // clang-format on 78 79 // A single import in asm.js can require multiple imports in wasm, if the 80 // function is used with different signatures. {cache} keeps the wasm 81 // imports for the single asm.js import of name {function_name}. 82 struct FunctionImportInfo { 83 Vector<const char> function_name; 84 ZoneUnorderedMap<FunctionSig, uint32_t> cache; 85 86 // Constructor. FunctionImportInfoFunctionImportInfo87 FunctionImportInfo(Vector<const char> name, Zone* zone) 88 : function_name(name), cache(zone) {} 89 }; 90 91 struct VarInfo { 92 AsmType* type = AsmType::None(); 93 WasmFunctionBuilder* function_builder = nullptr; 94 FunctionImportInfo* import = nullptr; 95 uint32_t mask = 0; 96 uint32_t index = 0; 97 VarKind kind = VarKind::kUnused; 98 bool mutable_variable = true; 99 bool function_defined = false; 100 }; 101 102 struct GlobalImport { 103 Vector<const char> import_name; 104 ValueType value_type; 105 VarInfo* var_info; 106 }; 107 108 enum class BlockKind { kRegular, kLoop, kOther }; 109 110 struct BlockInfo { 111 BlockKind kind; 112 AsmJsScanner::token_t label; 113 }; 114 115 // Helper class to make {TempVariable} safe for nesting. 116 class TemporaryVariableScope; 117 118 template <typename T> 119 class CachedVectors { 120 public: CachedVectors(Zone * zone)121 explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {} 122 zone()123 Zone* zone() const { return reusable_vectors_.get_allocator().zone(); } 124 fill(ZoneVector<T> * vec)125 inline void fill(ZoneVector<T>* vec) { 126 if (reusable_vectors_.empty()) return; 127 reusable_vectors_.back().swap(*vec); 128 reusable_vectors_.pop_back(); 129 vec->clear(); 130 } 131 reuse(ZoneVector<T> * vec)132 inline void reuse(ZoneVector<T>* vec) { 133 reusable_vectors_.emplace_back(std::move(*vec)); 134 } 135 136 private: 137 ZoneVector<ZoneVector<T>> reusable_vectors_; 138 }; 139 140 template <typename T> 141 class CachedVector final : public ZoneVector<T> { 142 public: CachedVector(CachedVectors<T> & cache)143 explicit CachedVector(CachedVectors<T>& cache) 144 : ZoneVector<T>(cache.zone()), cache_(&cache) { 145 cache.fill(this); 146 } ~CachedVector()147 ~CachedVector() { cache_->reuse(this); } 148 149 private: 150 CachedVectors<T>* cache_; 151 }; 152 153 Zone* zone_; 154 AsmJsScanner scanner_; 155 WasmModuleBuilder* module_builder_; 156 WasmFunctionBuilder* current_function_builder_; 157 AsmType* return_type_; 158 uintptr_t stack_limit_; 159 StdlibSet stdlib_uses_; 160 ZoneVector<VarInfo> global_var_info_; 161 ZoneVector<VarInfo> local_var_info_; 162 163 CachedVectors<ValueType> cached_valuetype_vectors_{zone_}; 164 CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_}; 165 CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_}; 166 CachedVectors<int32_t> cached_int_vectors_{zone_}; 167 168 int function_temp_locals_offset_; 169 int function_temp_locals_used_; 170 int function_temp_locals_depth_; 171 172 // Error Handling related 173 bool failed_; 174 const char* failure_message_; 175 int failure_location_; 176 177 // Module Related. 178 AsmJsScanner::token_t stdlib_name_; 179 AsmJsScanner::token_t foreign_name_; 180 AsmJsScanner::token_t heap_name_; 181 182 static const AsmJsScanner::token_t kTokenNone = 0; 183 184 // Track if parsing a heap assignment. 185 bool inside_heap_assignment_; 186 AsmType* heap_access_type_; 187 188 ZoneVector<BlockInfo> block_stack_; 189 190 // Types used for stdlib function and their set up. 191 AsmType* stdlib_dq2d_; 192 AsmType* stdlib_dqdq2d_; 193 AsmType* stdlib_i2s_; 194 AsmType* stdlib_ii2s_; 195 AsmType* stdlib_minmax_; 196 AsmType* stdlib_abs_; 197 AsmType* stdlib_ceil_like_; 198 AsmType* stdlib_fround_; 199 200 // When making calls, the return type is needed to lookup signatures. 201 // For `+callsite(..)` or `fround(callsite(..))` use this value to pass 202 // along the coercion. 203 AsmType* call_coercion_; 204 205 // The source position associated with the above {call_coercion}. 206 size_t call_coercion_position_; 207 208 // When making calls, the coercion can also appear in the source stream 209 // syntactically "behind" the call site. For `callsite(..)|0` use this 210 // value to flag that such a coercion must happen. 211 AsmType* call_coercion_deferred_; 212 213 // The source position at which requesting a deferred coercion via the 214 // aforementioned {call_coercion_deferred} is allowed. 215 size_t call_coercion_deferred_position_; 216 217 // The code position of the last heap access shift by an immediate value. 218 // For `heap[expr >> value:NumericLiteral]` this indicates from where to 219 // delete code when the expression is used as part of a valid heap access. 220 // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched. 221 size_t heap_access_shift_position_; 222 uint32_t heap_access_shift_value_; 223 static const size_t kNoHeapAccessShift = -1; 224 225 // Used to track the last label we've seen so it can be matched to later 226 // statements it's attached to. 227 AsmJsScanner::token_t pending_label_; 228 229 // Global imports. The list of imported variables that are copied during 230 // module instantiation into a corresponding global variable. 231 ZoneLinkedList<GlobalImport> global_imports_; 232 zone()233 Zone* zone() { return zone_; } 234 Peek(AsmJsScanner::token_t token)235 inline bool Peek(AsmJsScanner::token_t token) { 236 return scanner_.Token() == token; 237 } 238 Check(AsmJsScanner::token_t token)239 inline bool Check(AsmJsScanner::token_t token) { 240 if (scanner_.Token() == token) { 241 scanner_.Next(); 242 return true; 243 } else { 244 return false; 245 } 246 } 247 CheckForZero()248 inline bool CheckForZero() { 249 if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) { 250 scanner_.Next(); 251 return true; 252 } else { 253 return false; 254 } 255 } 256 CheckForDouble(double * value)257 inline bool CheckForDouble(double* value) { 258 if (scanner_.IsDouble()) { 259 *value = scanner_.AsDouble(); 260 scanner_.Next(); 261 return true; 262 } else { 263 return false; 264 } 265 } 266 CheckForUnsigned(uint32_t * value)267 inline bool CheckForUnsigned(uint32_t* value) { 268 if (scanner_.IsUnsigned()) { 269 *value = scanner_.AsUnsigned(); 270 scanner_.Next(); 271 return true; 272 } else { 273 return false; 274 } 275 } 276 CheckForUnsignedBelow(uint32_t limit,uint32_t * value)277 inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) { 278 if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) { 279 *value = scanner_.AsUnsigned(); 280 scanner_.Next(); 281 return true; 282 } else { 283 return false; 284 } 285 } 286 Consume()287 inline AsmJsScanner::token_t Consume() { 288 AsmJsScanner::token_t ret = scanner_.Token(); 289 scanner_.Next(); 290 return ret; 291 } 292 293 void SkipSemicolon(); 294 295 VarInfo* GetVarInfo(AsmJsScanner::token_t token); 296 uint32_t VarIndex(VarInfo* info); 297 void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type, 298 ValueType vtype, 299 const WasmInitExpr& init = WasmInitExpr()); 300 void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type); 301 void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype, 302 bool mutable_variable, VarInfo* info); 303 304 // Allocates a temporary local variable. The given {index} is absolute within 305 // the function body, consider using {TemporaryVariableScope} when nesting. 306 uint32_t TempVariable(int index); 307 308 // Preserves a copy of the scanner's current identifier string in the zone. 309 Vector<const char> CopyCurrentIdentifierString(); 310 311 // Use to set up block stack layers (including synthetic ones for if-else). 312 // Begin/Loop/End below are implemented with these plus code generation. 313 void BareBegin(BlockKind kind = BlockKind::kOther, 314 AsmJsScanner::token_t label = 0); 315 void BareEnd(); 316 int FindContinueLabelDepth(AsmJsScanner::token_t label); 317 int FindBreakLabelDepth(AsmJsScanner::token_t label); 318 319 // Use to set up actual wasm blocks/loops. 320 void Begin(AsmJsScanner::token_t label = 0); 321 void Loop(AsmJsScanner::token_t label = 0); 322 void End(); 323 324 void InitializeStdlibTypes(); 325 326 FunctionSig* ConvertSignature(AsmType* return_type, 327 const ZoneVector<AsmType*>& params); 328 329 void ValidateModule(); // 6.1 ValidateModule 330 void ValidateModuleParameters(); // 6.1 ValidateModule - parameters 331 void ValidateModuleVars(); // 6.1 ValidateModule - variables 332 void ValidateModuleVar(bool mutable_variable); 333 void ValidateModuleVarImport(VarInfo* info, bool mutable_variable); 334 void ValidateModuleVarStdlib(VarInfo* info); 335 void ValidateModuleVarNewStdlib(VarInfo* info); 336 void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable); 337 338 void ValidateExport(); // 6.2 ValidateExport 339 void ValidateFunctionTable(); // 6.3 ValidateFunctionTable 340 void ValidateFunction(); // 6.4 ValidateFunction 341 void ValidateFunctionParams(ZoneVector<AsmType*>* params); 342 void ValidateFunctionLocals(size_t param_count, 343 ZoneVector<ValueType>* locals); 344 void ValidateStatement(); // 6.5 ValidateStatement 345 void Block(); // 6.5.1 Block 346 void ExpressionStatement(); // 6.5.2 ExpressionStatement 347 void EmptyStatement(); // 6.5.3 EmptyStatement 348 void IfStatement(); // 6.5.4 IfStatement 349 void ReturnStatement(); // 6.5.5 ReturnStatement 350 bool IterationStatement(); // 6.5.6 IterationStatement 351 void WhileStatement(); // 6.5.6 IterationStatement - while 352 void DoStatement(); // 6.5.6 IterationStatement - do 353 void ForStatement(); // 6.5.6 IterationStatement - for 354 void BreakStatement(); // 6.5.7 BreakStatement 355 void ContinueStatement(); // 6.5.8 ContinueStatement 356 void LabelledStatement(); // 6.5.9 LabelledStatement 357 void SwitchStatement(); // 6.5.10 SwitchStatement 358 void ValidateCase(); // 6.6. ValidateCase 359 void ValidateDefault(); // 6.7 ValidateDefault 360 AsmType* ValidateExpression(); // 6.8 ValidateExpression 361 AsmType* Expression(AsmType* expect); // 6.8.1 Expression 362 AsmType* NumericLiteral(); // 6.8.2 NumericLiteral 363 AsmType* Identifier(); // 6.8.3 Identifier 364 AsmType* CallExpression(); // 6.8.4 CallExpression 365 AsmType* MemberExpression(); // 6.8.5 MemberExpression 366 AsmType* AssignmentExpression(); // 6.8.6 AssignmentExpression 367 AsmType* UnaryExpression(); // 6.8.7 UnaryExpression 368 AsmType* MultiplicativeExpression(); // 6.8.8 MultiplicativeExpression 369 AsmType* AdditiveExpression(); // 6.8.9 AdditiveExpression 370 AsmType* ShiftExpression(); // 6.8.10 ShiftExpression 371 AsmType* RelationalExpression(); // 6.8.11 RelationalExpression 372 AsmType* EqualityExpression(); // 6.8.12 EqualityExpression 373 AsmType* BitwiseANDExpression(); // 6.8.13 BitwiseANDExpression 374 AsmType* BitwiseXORExpression(); // 6.8.14 BitwiseXORExpression 375 AsmType* BitwiseORExpression(); // 6.8.15 BitwiseORExpression 376 AsmType* ConditionalExpression(); // 6.8.16 ConditionalExpression 377 AsmType* ParenthesizedExpression(); // 6.8.17 ParenthesiedExpression 378 AsmType* ValidateCall(); // 6.9 ValidateCall 379 bool PeekCall(); // 6.9 ValidateCall - helper 380 void ValidateHeapAccess(); // 6.10 ValidateHeapAccess 381 void ValidateFloatCoercion(); // 6.11 ValidateFloatCoercion 382 383 // Used as part of {ForStatement}. Scans forward to the next `)` in order to 384 // skip over the third expression in a for-statement. This is one piece that 385 // makes this parser not be a pure single-pass. 386 void ScanToClosingParenthesis(); 387 388 // Used as part of {SwitchStatement}. Collects all case labels in the current 389 // switch-statement, then resets the scanner position. This is one piece that 390 // makes this parser not be a pure single-pass. 391 void GatherCases(ZoneVector<int32_t>* cases); 392 }; 393 394 } // namespace wasm 395 } // namespace internal 396 } // namespace v8 397 398 #endif // V8_ASMJS_ASM_PARSER_H_ 399