1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 //     * Redistributions of source code must retain the above copyright
7 //       notice, this list of conditions and the following disclaimer.
8 //     * Redistributions in binary form must reproduce the above
9 //       copyright notice, this list of conditions and the following
10 //       disclaimer in the documentation and/or other materials provided
11 //       with the distribution.
12 //     * Neither the name of Google Inc. nor the names of its
13 //       contributors may be used to endorse or promote products derived
14 //       from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_AST_AST_VALUE_FACTORY_H_
29 #define V8_AST_AST_VALUE_FACTORY_H_
30 
31 #include "src/api.h"
32 #include "src/base/hashmap.h"
33 #include "src/globals.h"
34 #include "src/utils.h"
35 
36 // AstString, AstValue and AstValueFactory are for storing strings and values
37 // independent of the V8 heap and internalizing them later. During parsing,
38 // AstStrings and AstValues are created and stored outside the heap, in
39 // AstValueFactory. After parsing, the strings and values are internalized
40 // (moved into the V8 heap).
41 namespace v8 {
42 namespace internal {
43 
44 class AstString : public ZoneObject {
45  public:
AstString(bool is_raw)46   explicit AstString(bool is_raw)
47       : next_(nullptr), bit_field_(IsRawStringBits::encode(is_raw)) {}
48 
49   int length() const;
IsEmpty()50   bool IsEmpty() const { return length() == 0; }
51 
52   // Puts the string into the V8 heap.
53   void Internalize(Isolate* isolate);
54 
55   // This function can be called after internalizing.
string()56   V8_INLINE Handle<String> string() const {
57     DCHECK_NOT_NULL(string_);
58     return Handle<String>(string_);
59   }
60 
next()61   AstString* next() { return next_; }
next_location()62   AstString** next_location() { return &next_; }
63 
64  protected:
set_string(Handle<String> string)65   void set_string(Handle<String> string) { string_ = string.location(); }
66   // {string_} is stored as String** instead of a Handle<String> so it can be
67   // stored in a union with {next_}.
68   union {
69     AstString* next_;
70     String** string_;
71   };
72   // Poor-man's virtual dispatch to AstRawString / AstConsString. Takes less
73   // memory.
74   class IsRawStringBits : public BitField<bool, 0, 1> {};
75   int bit_field_;
76 };
77 
78 
79 class AstRawString final : public AstString {
80  public:
length()81   int length() const {
82     if (is_one_byte()) return literal_bytes_.length();
83     return literal_bytes_.length() / 2;
84   }
85 
byte_length()86   int byte_length() const { return literal_bytes_.length(); }
87 
88   void Internalize(Isolate* isolate);
89 
90   bool AsArrayIndex(uint32_t* index) const;
91 
92   // The string is not null-terminated, use length() to find out the length.
raw_data()93   const unsigned char* raw_data() const {
94     return literal_bytes_.start();
95   }
96 
is_one_byte()97   bool is_one_byte() const { return IsOneByteBits::decode(bit_field_); }
98 
99   bool IsOneByteEqualTo(const char* data) const;
FirstCharacter()100   uint16_t FirstCharacter() const {
101     if (is_one_byte()) return literal_bytes_[0];
102     const uint16_t* c =
103         reinterpret_cast<const uint16_t*>(literal_bytes_.start());
104     return *c;
105   }
106 
107   // For storing AstRawStrings in a hash map.
hash()108   uint32_t hash() const {
109     return hash_;
110   }
111 
112  private:
113   friend class AstValueFactory;
114   friend class AstRawStringInternalizationKey;
115 
AstRawString(bool is_one_byte,const Vector<const byte> & literal_bytes,uint32_t hash)116   AstRawString(bool is_one_byte, const Vector<const byte>& literal_bytes,
117                uint32_t hash)
118       : AstString(true), hash_(hash), literal_bytes_(literal_bytes) {
119     bit_field_ |= IsOneByteBits::encode(is_one_byte);
120   }
121 
AstRawString()122   AstRawString() : AstString(true), hash_(0) {
123     bit_field_ |= IsOneByteBits::encode(true);
124   }
125 
126   class IsOneByteBits : public BitField<bool, IsRawStringBits::kNext, 1> {};
127 
128   uint32_t hash_;
129   // Points to memory owned by Zone.
130   Vector<const byte> literal_bytes_;
131 };
132 
133 
134 class AstConsString final : public AstString {
135  public:
AstConsString(const AstString * left,const AstString * right)136   AstConsString(const AstString* left, const AstString* right)
137       : AstString(false),
138         length_(left->length() + right->length()),
139         left_(left),
140         right_(right) {}
141 
length()142   int length() const { return length_; }
143 
144   void Internalize(Isolate* isolate);
145 
146  private:
147   const int length_;
148   const AstString* left_;
149   const AstString* right_;
150 };
151 
152 
153 // AstValue is either a string, a number, a string array, a boolean, or a
154 // special value (null, undefined, the hole).
155 class AstValue : public ZoneObject {
156  public:
IsString()157   bool IsString() const {
158     return type_ == STRING;
159   }
160 
IsNumber()161   bool IsNumber() const {
162     return type_ == NUMBER || type_ == NUMBER_WITH_DOT || type_ == SMI ||
163            type_ == SMI_WITH_DOT;
164   }
165 
ContainsDot()166   bool ContainsDot() const {
167     return type_ == NUMBER_WITH_DOT || type_ == SMI_WITH_DOT;
168   }
169 
AsString()170   const AstRawString* AsString() const {
171     CHECK_EQ(STRING, type_);
172     return string_;
173   }
174 
AsNumber()175   double AsNumber() const {
176     if (type_ == NUMBER || type_ == NUMBER_WITH_DOT)
177       return number_;
178     if (type_ == SMI || type_ == SMI_WITH_DOT)
179       return smi_;
180     UNREACHABLE();
181     return 0;
182   }
183 
AsSmi()184   Smi* AsSmi() const {
185     CHECK(type_ == SMI || type_ == SMI_WITH_DOT);
186     return Smi::FromInt(smi_);
187   }
188 
EqualsString(const AstRawString * string)189   bool EqualsString(const AstRawString* string) const {
190     return type_ == STRING && string_ == string;
191   }
192 
193   bool IsPropertyName() const;
194 
195   bool BooleanValue() const;
196 
IsSmi()197   bool IsSmi() const { return type_ == SMI || type_ == SMI_WITH_DOT; }
IsFalse()198   bool IsFalse() const { return type_ == BOOLEAN && !bool_; }
IsTrue()199   bool IsTrue() const { return type_ == BOOLEAN && bool_; }
IsUndefined()200   bool IsUndefined() const { return type_ == UNDEFINED; }
IsTheHole()201   bool IsTheHole() const { return type_ == THE_HOLE; }
IsNull()202   bool IsNull() const { return type_ == NULL_TYPE; }
203 
204   void Internalize(Isolate* isolate);
205 
206   // Can be called after Internalize has been called.
value()207   V8_INLINE Handle<Object> value() const {
208     if (type_ == STRING) {
209       return string_->string();
210     }
211     DCHECK_NOT_NULL(value_);
212     return Handle<Object>(value_);
213   }
next()214   AstValue* next() const { return next_; }
set_next(AstValue * next)215   void set_next(AstValue* next) { next_ = next; }
216 
217  private:
set_value(Handle<Object> object)218   void set_value(Handle<Object> object) { value_ = object.location(); }
219   friend class AstValueFactory;
220 
221   enum Type {
222     STRING,
223     SYMBOL,
224     NUMBER,
225     NUMBER_WITH_DOT,
226     SMI,
227     SMI_WITH_DOT,
228     BOOLEAN,
229     NULL_TYPE,
230     UNDEFINED,
231     THE_HOLE
232   };
233 
AstValue(const AstRawString * s)234   explicit AstValue(const AstRawString* s) : type_(STRING), next_(nullptr) {
235     string_ = s;
236   }
237 
AstValue(const char * name)238   explicit AstValue(const char* name) : type_(SYMBOL), next_(nullptr) {
239     symbol_name_ = name;
240   }
241 
AstValue(double n,bool with_dot)242   explicit AstValue(double n, bool with_dot) : next_(nullptr) {
243     int int_value;
244     if (DoubleToSmiInteger(n, &int_value)) {
245       type_ = with_dot ? SMI_WITH_DOT : SMI;
246       smi_ = int_value;
247     } else {
248       type_ = with_dot ? NUMBER_WITH_DOT : NUMBER;
249       number_ = n;
250     }
251   }
252 
AstValue(Type t,int i)253   AstValue(Type t, int i) : type_(t), next_(nullptr) {
254     DCHECK(type_ == SMI);
255     smi_ = i;
256   }
257 
AstValue(bool b)258   explicit AstValue(bool b) : type_(BOOLEAN), next_(nullptr) { bool_ = b; }
259 
AstValue(Type t)260   explicit AstValue(Type t) : type_(t), next_(nullptr) {
261     DCHECK(t == NULL_TYPE || t == UNDEFINED || t == THE_HOLE);
262   }
263 
264   Type type_;
265 
266   // {value_} is stored as Object** instead of a Handle<Object> so it can be
267   // stored in a union with {next_}.
268   union {
269     Object** value_;  // if internalized
270     AstValue* next_;  // if !internalized
271   };
272 
273   // Uninternalized value.
274   union {
275     const AstRawString* string_;
276     double number_;
277     int smi_;
278     bool bool_;
279     const char* symbol_name_;
280   };
281 };
282 
283 
284 // For generating constants.
285 #define STRING_CONSTANTS(F)                     \
286   F(anonymous_function, "(anonymous function)") \
287   F(arguments, "arguments")                     \
288   F(async, "async")                             \
289   F(await, "await")                             \
290   F(constructor, "constructor")                 \
291   F(default, "default")                         \
292   F(done, "done")                               \
293   F(dot, ".")                                   \
294   F(dot_class_field_init, ".class-field-init")  \
295   F(dot_for, ".for")                            \
296   F(dot_generator_object, ".generator_object")  \
297   F(dot_iterator, ".iterator")                  \
298   F(dot_result, ".result")                      \
299   F(dot_switch_tag, ".switch_tag")              \
300   F(dot_catch, ".catch")                        \
301   F(empty, "")                                  \
302   F(eval, "eval")                               \
303   F(function, "function")                       \
304   F(get_space, "get ")                          \
305   F(length, "length")                           \
306   F(let, "let")                                 \
307   F(native, "native")                           \
308   F(new_target, ".new.target")                  \
309   F(next, "next")                               \
310   F(proto, "__proto__")                         \
311   F(prototype, "prototype")                     \
312   F(return, "return")                           \
313   F(set_space, "set ")                          \
314   F(star_default_star, "*default*")             \
315   F(this, "this")                               \
316   F(this_function, ".this_function")            \
317   F(throw, "throw")                             \
318   F(undefined, "undefined")                     \
319   F(use_asm, "use asm")                         \
320   F(use_strict, "use strict")                   \
321   F(value, "value")
322 
323 #define OTHER_CONSTANTS(F) \
324   F(true_value)            \
325   F(false_value)           \
326   F(null_value)            \
327   F(undefined_value)       \
328   F(the_hole_value)
329 
330 class AstValueFactory {
331  public:
AstValueFactory(Zone * zone,uint32_t hash_seed)332   AstValueFactory(Zone* zone, uint32_t hash_seed)
333       : string_table_(AstRawStringCompare),
334         values_(nullptr),
335         smis_(),
336         strings_(nullptr),
337         strings_end_(&strings_),
338         zone_(zone),
339         hash_seed_(hash_seed) {
340 #define F(name, str) name##_string_ = NULL;
341     STRING_CONSTANTS(F)
342 #undef F
343 #define F(name) name##_ = NULL;
344     OTHER_CONSTANTS(F)
345 #undef F
346     std::fill(smis_, smis_ + arraysize(smis_), nullptr);
347   }
348 
zone()349   Zone* zone() const { return zone_; }
350 
GetOneByteString(Vector<const uint8_t> literal)351   const AstRawString* GetOneByteString(Vector<const uint8_t> literal) {
352     return GetOneByteStringInternal(literal);
353   }
GetOneByteString(const char * string)354   const AstRawString* GetOneByteString(const char* string) {
355     return GetOneByteString(Vector<const uint8_t>(
356         reinterpret_cast<const uint8_t*>(string), StrLength(string)));
357   }
GetTwoByteString(Vector<const uint16_t> literal)358   const AstRawString* GetTwoByteString(Vector<const uint16_t> literal) {
359     return GetTwoByteStringInternal(literal);
360   }
361   const AstRawString* GetString(Handle<String> literal);
362   const AstConsString* NewConsString(const AstString* left,
363                                      const AstString* right);
364   const AstRawString* ConcatStrings(const AstRawString* left,
365                                     const AstRawString* right);
366 
367   void Internalize(Isolate* isolate);
368 
369 #define F(name, str)                                                    \
370   const AstRawString* name##_string() {                                 \
371     if (name##_string_ == NULL) {                                       \
372       const char* data = str;                                           \
373       name##_string_ = GetOneByteString(                                \
374           Vector<const uint8_t>(reinterpret_cast<const uint8_t*>(data), \
375                                 static_cast<int>(strlen(data))));       \
376     }                                                                   \
377     return name##_string_;                                              \
378   }
379   STRING_CONSTANTS(F)
380 #undef F
381 
382   const AstValue* NewString(const AstRawString* string);
383   // A JavaScript symbol (ECMA-262 edition 6).
384   const AstValue* NewSymbol(const char* name);
385   const AstValue* NewNumber(double number, bool with_dot = false);
386   const AstValue* NewSmi(uint32_t number);
387   const AstValue* NewBoolean(bool b);
388   const AstValue* NewStringList(ZoneList<const AstRawString*>* strings);
389   const AstValue* NewNull();
390   const AstValue* NewUndefined();
391   const AstValue* NewTheHole();
392 
393  private:
394   static const uint32_t kMaxCachedSmi = 1 << 10;
395 
396   STATIC_ASSERT(kMaxCachedSmi <= Smi::kMaxValue);
397 
AddValue(AstValue * value)398   AstValue* AddValue(AstValue* value) {
399     value->set_next(values_);
400     values_ = value;
401     return value;
402   }
AddString(AstString * string)403   AstString* AddString(AstString* string) {
404     *strings_end_ = string;
405     strings_end_ = string->next_location();
406     return string;
407   }
ResetStrings()408   void ResetStrings() {
409     strings_ = nullptr;
410     strings_end_ = &strings_;
411   }
412   V8_EXPORT_PRIVATE AstRawString* GetOneByteStringInternal(
413       Vector<const uint8_t> literal);
414   AstRawString* GetTwoByteStringInternal(Vector<const uint16_t> literal);
415   AstRawString* GetString(uint32_t hash, bool is_one_byte,
416                           Vector<const byte> literal_bytes);
417 
418   static bool AstRawStringCompare(void* a, void* b);
419 
420   // All strings are copied here, one after another (no NULLs inbetween).
421   base::CustomMatcherHashMap string_table_;
422   // For keeping track of all AstValues and AstRawStrings we've created (so that
423   // they can be internalized later).
424   AstValue* values_;
425 
426   AstValue* smis_[kMaxCachedSmi + 1];
427   // We need to keep track of strings_ in order since cons strings require their
428   // members to be internalized first.
429   AstString* strings_;
430   AstString** strings_end_;
431   Zone* zone_;
432 
433   uint32_t hash_seed_;
434 
435 #define F(name, str) const AstRawString* name##_string_;
436   STRING_CONSTANTS(F)
437 #undef F
438 
439 #define F(name) AstValue* name##_;
440   OTHER_CONSTANTS(F)
441 #undef F
442 };
443 }  // namespace internal
444 }  // namespace v8
445 
446 #undef STRING_CONSTANTS
447 #undef OTHER_CONSTANTS
448 
449 #endif  // V8_AST_AST_VALUE_FACTORY_H_
450