1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_ASMJS_ASM_PARSER_H_
6 #define V8_ASMJS_ASM_PARSER_H_
7 
8 #include <memory>
9 #include <string>
10 
11 #include "src/asmjs/asm-scanner.h"
12 #include "src/asmjs/asm-types.h"
13 #include "src/wasm/wasm-module-builder.h"
14 #include "src/zone/zone-containers.h"
15 
16 namespace v8 {
17 namespace internal {
18 
19 class Utf16CharacterStream;
20 
21 namespace wasm {
22 
23 // A custom parser + validator + wasm converter for asm.js:
24 // http://asmjs.org/spec/latest/
25 // This parser intentionally avoids the portion of JavaScript parsing
26 // that are not required to determine if code is valid asm.js code.
27 // * It is mostly one pass.
28 // * It bails out on unexpected input.
29 // * It assumes strict ordering insofar as permitted by asm.js validation rules.
30 // * It relies on a custom scanner that provides de-duped identifiers in two
31 //   scopes (local + module wide).
32 class AsmJsParser {
33  public:
34   // clang-format off
35   enum StandardMember {
36     kInfinity,
37     kNaN,
38 #define V(_unused1, name, _unused2, _unused3) kMath##name,
39     STDLIB_MATH_FUNCTION_LIST(V)
40 #undef V
41 #define V(name, _unused1) kMath##name,
42     STDLIB_MATH_VALUE_LIST(V)
43 #undef V
44 #define V(name, _unused1, _unused2, _unused3) k##name,
45     STDLIB_ARRAY_TYPE_LIST(V)
46 #undef V
47   };
48   // clang-format on
49 
50   typedef EnumSet<StandardMember, uint64_t> StdlibSet;
51 
52   explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
53                        Utf16CharacterStream* stream);
54   bool Run();
failure_message()55   const char* failure_message() const { return failure_message_; }
failure_location()56   int failure_location() const { return failure_location_; }
module_builder()57   WasmModuleBuilder* module_builder() { return module_builder_; }
stdlib_uses()58   const StdlibSet* stdlib_uses() const { return &stdlib_uses_; }
59 
60  private:
61   // clang-format off
62   enum class VarKind {
63     kUnused,
64     kLocal,
65     kGlobal,
66     kSpecial,
67     kFunction,
68     kTable,
69     kImportedFunction,
70 #define V(_unused0, Name, _unused1, _unused2) kMath##Name,
71     STDLIB_MATH_FUNCTION_LIST(V)
72 #undef V
73 #define V(Name, _unused1) kMath##Name,
74     STDLIB_MATH_VALUE_LIST(V)
75 #undef V
76   };
77   // clang-format on
78 
79   // A single import in asm.js can require multiple imports in wasm, if the
80   // function is used with different signatures. {cache} keeps the wasm
81   // imports for the single asm.js import of name {function_name}.
82   struct FunctionImportInfo {
83     Vector<const char> function_name;
84     ZoneUnorderedMap<FunctionSig, uint32_t> cache;
85 
86     // Constructor.
FunctionImportInfoFunctionImportInfo87     FunctionImportInfo(Vector<const char> name, Zone* zone)
88         : function_name(name), cache(zone) {}
89   };
90 
91   struct VarInfo {
92     AsmType* type = AsmType::None();
93     WasmFunctionBuilder* function_builder = nullptr;
94     FunctionImportInfo* import = nullptr;
95     uint32_t mask = 0;
96     uint32_t index = 0;
97     VarKind kind = VarKind::kUnused;
98     bool mutable_variable = true;
99     bool function_defined = false;
100   };
101 
102   struct GlobalImport {
103     Vector<const char> import_name;
104     ValueType value_type;
105     VarInfo* var_info;
106   };
107 
108   enum class BlockKind { kRegular, kLoop, kOther };
109 
110   struct BlockInfo {
111     BlockKind kind;
112     AsmJsScanner::token_t label;
113   };
114 
115   // Helper class to make {TempVariable} safe for nesting.
116   class TemporaryVariableScope;
117 
118   template <typename T>
119   class CachedVectors {
120    public:
CachedVectors(Zone * zone)121     explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {}
122 
zone()123     Zone* zone() const { return reusable_vectors_.get_allocator().zone(); }
124 
fill(ZoneVector<T> * vec)125     inline void fill(ZoneVector<T>* vec) {
126       if (reusable_vectors_.empty()) return;
127       reusable_vectors_.back().swap(*vec);
128       reusable_vectors_.pop_back();
129       vec->clear();
130     }
131 
reuse(ZoneVector<T> * vec)132     inline void reuse(ZoneVector<T>* vec) {
133       reusable_vectors_.emplace_back(std::move(*vec));
134     }
135 
136    private:
137     ZoneVector<ZoneVector<T>> reusable_vectors_;
138   };
139 
140   template <typename T>
141   class CachedVector final : public ZoneVector<T> {
142    public:
CachedVector(CachedVectors<T> & cache)143     explicit CachedVector(CachedVectors<T>& cache)
144         : ZoneVector<T>(cache.zone()), cache_(&cache) {
145       cache.fill(this);
146     }
~CachedVector()147     ~CachedVector() { cache_->reuse(this); }
148 
149    private:
150     CachedVectors<T>* cache_;
151   };
152 
153   Zone* zone_;
154   AsmJsScanner scanner_;
155   WasmModuleBuilder* module_builder_;
156   WasmFunctionBuilder* current_function_builder_;
157   AsmType* return_type_;
158   uintptr_t stack_limit_;
159   StdlibSet stdlib_uses_;
160   ZoneVector<VarInfo> global_var_info_;
161   ZoneVector<VarInfo> local_var_info_;
162 
163   CachedVectors<ValueType> cached_valuetype_vectors_{zone_};
164   CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_};
165   CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_};
166   CachedVectors<int32_t> cached_int_vectors_{zone_};
167 
168   int function_temp_locals_offset_;
169   int function_temp_locals_used_;
170   int function_temp_locals_depth_;
171 
172   // Error Handling related
173   bool failed_;
174   const char* failure_message_;
175   int failure_location_;
176 
177   // Module Related.
178   AsmJsScanner::token_t stdlib_name_;
179   AsmJsScanner::token_t foreign_name_;
180   AsmJsScanner::token_t heap_name_;
181 
182   static const AsmJsScanner::token_t kTokenNone = 0;
183 
184   // Track if parsing a heap assignment.
185   bool inside_heap_assignment_;
186   AsmType* heap_access_type_;
187 
188   ZoneVector<BlockInfo> block_stack_;
189 
190   // Types used for stdlib function and their set up.
191   AsmType* stdlib_dq2d_;
192   AsmType* stdlib_dqdq2d_;
193   AsmType* stdlib_i2s_;
194   AsmType* stdlib_ii2s_;
195   AsmType* stdlib_minmax_;
196   AsmType* stdlib_abs_;
197   AsmType* stdlib_ceil_like_;
198   AsmType* stdlib_fround_;
199 
200   // When making calls, the return type is needed to lookup signatures.
201   // For `+callsite(..)` or `fround(callsite(..))` use this value to pass
202   // along the coercion.
203   AsmType* call_coercion_;
204 
205   // The source position associated with the above {call_coercion}.
206   size_t call_coercion_position_;
207 
208   // When making calls, the coercion can also appear in the source stream
209   // syntactically "behind" the call site. For `callsite(..)|0` use this
210   // value to flag that such a coercion must happen.
211   AsmType* call_coercion_deferred_;
212 
213   // The source position at which requesting a deferred coercion via the
214   // aforementioned {call_coercion_deferred} is allowed.
215   size_t call_coercion_deferred_position_;
216 
217   // The code position of the last heap access shift by an immediate value.
218   // For `heap[expr >> value:NumericLiteral]` this indicates from where to
219   // delete code when the expression is used as part of a valid heap access.
220   // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched.
221   size_t heap_access_shift_position_;
222   uint32_t heap_access_shift_value_;
223   static const size_t kNoHeapAccessShift = -1;
224 
225   // Used to track the last label we've seen so it can be matched to later
226   // statements it's attached to.
227   AsmJsScanner::token_t pending_label_;
228 
229   // Global imports. The list of imported variables that are copied during
230   // module instantiation into a corresponding global variable.
231   ZoneLinkedList<GlobalImport> global_imports_;
232 
zone()233   Zone* zone() { return zone_; }
234 
Peek(AsmJsScanner::token_t token)235   inline bool Peek(AsmJsScanner::token_t token) {
236     return scanner_.Token() == token;
237   }
238 
Check(AsmJsScanner::token_t token)239   inline bool Check(AsmJsScanner::token_t token) {
240     if (scanner_.Token() == token) {
241       scanner_.Next();
242       return true;
243     } else {
244       return false;
245     }
246   }
247 
CheckForZero()248   inline bool CheckForZero() {
249     if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) {
250       scanner_.Next();
251       return true;
252     } else {
253       return false;
254     }
255   }
256 
CheckForDouble(double * value)257   inline bool CheckForDouble(double* value) {
258     if (scanner_.IsDouble()) {
259       *value = scanner_.AsDouble();
260       scanner_.Next();
261       return true;
262     } else {
263       return false;
264     }
265   }
266 
CheckForUnsigned(uint32_t * value)267   inline bool CheckForUnsigned(uint32_t* value) {
268     if (scanner_.IsUnsigned()) {
269       *value = scanner_.AsUnsigned();
270       scanner_.Next();
271       return true;
272     } else {
273       return false;
274     }
275   }
276 
CheckForUnsignedBelow(uint32_t limit,uint32_t * value)277   inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) {
278     if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) {
279       *value = scanner_.AsUnsigned();
280       scanner_.Next();
281       return true;
282     } else {
283       return false;
284     }
285   }
286 
Consume()287   inline AsmJsScanner::token_t Consume() {
288     AsmJsScanner::token_t ret = scanner_.Token();
289     scanner_.Next();
290     return ret;
291   }
292 
293   void SkipSemicolon();
294 
295   VarInfo* GetVarInfo(AsmJsScanner::token_t token);
296   uint32_t VarIndex(VarInfo* info);
297   void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type,
298                      ValueType vtype,
299                      const WasmInitExpr& init = WasmInitExpr());
300   void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type);
301   void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype,
302                        bool mutable_variable, VarInfo* info);
303 
304   // Allocates a temporary local variable. The given {index} is absolute within
305   // the function body, consider using {TemporaryVariableScope} when nesting.
306   uint32_t TempVariable(int index);
307 
308   // Preserves a copy of the scanner's current identifier string in the zone.
309   Vector<const char> CopyCurrentIdentifierString();
310 
311   // Use to set up block stack layers (including synthetic ones for if-else).
312   // Begin/Loop/End below are implemented with these plus code generation.
313   void BareBegin(BlockKind kind = BlockKind::kOther,
314                  AsmJsScanner::token_t label = 0);
315   void BareEnd();
316   int FindContinueLabelDepth(AsmJsScanner::token_t label);
317   int FindBreakLabelDepth(AsmJsScanner::token_t label);
318 
319   // Use to set up actual wasm blocks/loops.
320   void Begin(AsmJsScanner::token_t label = 0);
321   void Loop(AsmJsScanner::token_t label = 0);
322   void End();
323 
324   void InitializeStdlibTypes();
325 
326   FunctionSig* ConvertSignature(AsmType* return_type,
327                                 const ZoneVector<AsmType*>& params);
328 
329   void ValidateModule();            // 6.1 ValidateModule
330   void ValidateModuleParameters();  // 6.1 ValidateModule - parameters
331   void ValidateModuleVars();        // 6.1 ValidateModule - variables
332   void ValidateModuleVar(bool mutable_variable);
333   void ValidateModuleVarImport(VarInfo* info, bool mutable_variable);
334   void ValidateModuleVarStdlib(VarInfo* info);
335   void ValidateModuleVarNewStdlib(VarInfo* info);
336   void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable);
337 
338   void ValidateExport();         // 6.2 ValidateExport
339   void ValidateFunctionTable();  // 6.3 ValidateFunctionTable
340   void ValidateFunction();       // 6.4 ValidateFunction
341   void ValidateFunctionParams(ZoneVector<AsmType*>* params);
342   void ValidateFunctionLocals(size_t param_count,
343                               ZoneVector<ValueType>* locals);
344   void ValidateStatement();              // 6.5 ValidateStatement
345   void Block();                          // 6.5.1 Block
346   void ExpressionStatement();            // 6.5.2 ExpressionStatement
347   void EmptyStatement();                 // 6.5.3 EmptyStatement
348   void IfStatement();                    // 6.5.4 IfStatement
349   void ReturnStatement();                // 6.5.5 ReturnStatement
350   bool IterationStatement();             // 6.5.6 IterationStatement
351   void WhileStatement();                 // 6.5.6 IterationStatement - while
352   void DoStatement();                    // 6.5.6 IterationStatement - do
353   void ForStatement();                   // 6.5.6 IterationStatement - for
354   void BreakStatement();                 // 6.5.7 BreakStatement
355   void ContinueStatement();              // 6.5.8 ContinueStatement
356   void LabelledStatement();              // 6.5.9 LabelledStatement
357   void SwitchStatement();                // 6.5.10 SwitchStatement
358   void ValidateCase();                   // 6.6. ValidateCase
359   void ValidateDefault();                // 6.7 ValidateDefault
360   AsmType* ValidateExpression();         // 6.8 ValidateExpression
361   AsmType* Expression(AsmType* expect);  // 6.8.1 Expression
362   AsmType* NumericLiteral();             // 6.8.2 NumericLiteral
363   AsmType* Identifier();                 // 6.8.3 Identifier
364   AsmType* CallExpression();             // 6.8.4 CallExpression
365   AsmType* MemberExpression();           // 6.8.5 MemberExpression
366   AsmType* AssignmentExpression();       // 6.8.6 AssignmentExpression
367   AsmType* UnaryExpression();            // 6.8.7 UnaryExpression
368   AsmType* MultiplicativeExpression();   // 6.8.8 MultiplicativeExpression
369   AsmType* AdditiveExpression();         // 6.8.9 AdditiveExpression
370   AsmType* ShiftExpression();            // 6.8.10 ShiftExpression
371   AsmType* RelationalExpression();       // 6.8.11 RelationalExpression
372   AsmType* EqualityExpression();         // 6.8.12 EqualityExpression
373   AsmType* BitwiseANDExpression();       // 6.8.13 BitwiseANDExpression
374   AsmType* BitwiseXORExpression();       // 6.8.14 BitwiseXORExpression
375   AsmType* BitwiseORExpression();        // 6.8.15 BitwiseORExpression
376   AsmType* ConditionalExpression();      // 6.8.16 ConditionalExpression
377   AsmType* ParenthesizedExpression();    // 6.8.17 ParenthesiedExpression
378   AsmType* ValidateCall();               // 6.9 ValidateCall
379   bool PeekCall();                       // 6.9 ValidateCall - helper
380   void ValidateHeapAccess();             // 6.10 ValidateHeapAccess
381   void ValidateFloatCoercion();          // 6.11 ValidateFloatCoercion
382 
383   // Used as part of {ForStatement}. Scans forward to the next `)` in order to
384   // skip over the third expression in a for-statement. This is one piece that
385   // makes this parser not be a pure single-pass.
386   void ScanToClosingParenthesis();
387 
388   // Used as part of {SwitchStatement}. Collects all case labels in the current
389   // switch-statement, then resets the scanner position. This is one piece that
390   // makes this parser not be a pure single-pass.
391   void GatherCases(ZoneVector<int32_t>* cases);
392 };
393 
394 }  // namespace wasm
395 }  // namespace internal
396 }  // namespace v8
397 
398 #endif  // V8_ASMJS_ASM_PARSER_H_
399