1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 //     * Redistributions of source code must retain the above copyright
7 //       notice, this list of conditions and the following disclaimer.
8 //     * Redistributions in binary form must reproduce the above
9 //       copyright notice, this list of conditions and the following
10 //       disclaimer in the documentation and/or other materials provided
11 //       with the distribution.
12 //     * Neither the name of Google Inc. nor the names of its
13 //       contributors may be used to endorse or promote products derived
14 //       from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_AST_VALUE_FACTORY_H_
29 #define V8_AST_VALUE_FACTORY_H_
30 
31 #include "src/api.h"
32 #include "src/hashmap.h"
33 #include "src/utils.h"
34 
35 // AstString, AstValue and AstValueFactory are for storing strings and values
36 // independent of the V8 heap and internalizing them later. During parsing,
37 // AstStrings and AstValues are created and stored outside the heap, in
38 // AstValueFactory. After parsing, the strings and values are internalized
39 // (moved into the V8 heap).
40 namespace v8 {
41 namespace internal {
42 
43 class AstString : public ZoneObject {
44  public:
~AstString()45   virtual ~AstString() {}
46 
47   virtual int length() const = 0;
IsEmpty()48   bool IsEmpty() const { return length() == 0; }
49 
50   // Puts the string into the V8 heap.
51   virtual void Internalize(Isolate* isolate) = 0;
52 
53   // This function can be called after internalizing.
string()54   V8_INLINE Handle<String> string() const {
55     DCHECK(!string_.is_null());
56     return string_;
57   }
58 
59  protected:
60   // This is null until the string is internalized.
61   Handle<String> string_;
62 };
63 
64 
65 class AstRawString : public AstString {
66  public:
length()67   virtual int length() const OVERRIDE {
68     if (is_one_byte_)
69       return literal_bytes_.length();
70     return literal_bytes_.length() / 2;
71   }
72 
73   virtual void Internalize(Isolate* isolate) OVERRIDE;
74 
75   bool AsArrayIndex(uint32_t* index) const;
76 
77   // The string is not null-terminated, use length() to find out the length.
raw_data()78   const unsigned char* raw_data() const {
79     return literal_bytes_.start();
80   }
is_one_byte()81   bool is_one_byte() const { return is_one_byte_; }
82   bool IsOneByteEqualTo(const char* data) const;
FirstCharacter()83   uint16_t FirstCharacter() const {
84     if (is_one_byte_)
85       return literal_bytes_[0];
86     const uint16_t* c =
87         reinterpret_cast<const uint16_t*>(literal_bytes_.start());
88     return *c;
89   }
90 
91   // For storing AstRawStrings in a hash map.
hash()92   uint32_t hash() const {
93     return hash_;
94   }
95   static bool Compare(void* a, void* b);
96 
97  private:
98   friend class AstValueFactory;
99   friend class AstRawStringInternalizationKey;
100 
AstRawString(bool is_one_byte,const Vector<const byte> & literal_bytes,uint32_t hash)101   AstRawString(bool is_one_byte, const Vector<const byte>& literal_bytes,
102             uint32_t hash)
103       : is_one_byte_(is_one_byte), literal_bytes_(literal_bytes), hash_(hash) {}
104 
AstRawString()105   AstRawString()
106       : is_one_byte_(true),
107         hash_(0) {}
108 
109   bool is_one_byte_;
110 
111   // Points to memory owned by Zone.
112   Vector<const byte> literal_bytes_;
113   uint32_t hash_;
114 };
115 
116 
117 class AstConsString : public AstString {
118  public:
AstConsString(const AstString * left,const AstString * right)119   AstConsString(const AstString* left, const AstString* right)
120       : left_(left),
121         right_(right) {}
122 
length()123   virtual int length() const OVERRIDE {
124     return left_->length() + right_->length();
125   }
126 
127   virtual void Internalize(Isolate* isolate) OVERRIDE;
128 
129  private:
130   friend class AstValueFactory;
131 
132   const AstString* left_;
133   const AstString* right_;
134 };
135 
136 
137 // AstValue is either a string, a number, a string array, a boolean, or a
138 // special value (null, undefined, the hole).
139 class AstValue : public ZoneObject {
140  public:
IsString()141   bool IsString() const {
142     return type_ == STRING;
143   }
144 
IsNumber()145   bool IsNumber() const {
146     return type_ == NUMBER || type_ == SMI;
147   }
148 
AsString()149   const AstRawString* AsString() const {
150     if (type_ == STRING)
151       return string_;
152     UNREACHABLE();
153     return 0;
154   }
155 
AsNumber()156   double AsNumber() const {
157     if (type_ == NUMBER)
158       return number_;
159     if (type_ == SMI)
160       return smi_;
161     UNREACHABLE();
162     return 0;
163   }
164 
EqualsString(const AstRawString * string)165   bool EqualsString(const AstRawString* string) const {
166     return type_ == STRING && string_ == string;
167   }
168 
169   bool IsPropertyName() const;
170 
171   bool BooleanValue() const;
172 
173   void Internalize(Isolate* isolate);
174 
175   // Can be called after Internalize has been called.
value()176   V8_INLINE Handle<Object> value() const {
177     if (type_ == STRING) {
178       return string_->string();
179     }
180     DCHECK(!value_.is_null());
181     return value_;
182   }
183 
184  private:
185   friend class AstValueFactory;
186 
187   enum Type {
188     STRING,
189     SYMBOL,
190     NUMBER,
191     SMI,
192     BOOLEAN,
193     STRING_ARRAY,
194     NULL_TYPE,
195     UNDEFINED,
196     THE_HOLE
197   };
198 
AstValue(const AstRawString * s)199   explicit AstValue(const AstRawString* s) : type_(STRING) { string_ = s; }
200 
AstValue(const char * name)201   explicit AstValue(const char* name) : type_(SYMBOL) { symbol_name_ = name; }
202 
AstValue(double n)203   explicit AstValue(double n) : type_(NUMBER) { number_ = n; }
204 
AstValue(Type t,int i)205   AstValue(Type t, int i) : type_(t) {
206     DCHECK(type_ == SMI);
207     smi_ = i;
208   }
209 
AstValue(bool b)210   explicit AstValue(bool b) : type_(BOOLEAN) { bool_ = b; }
211 
AstValue(ZoneList<const AstRawString * > * s)212   explicit AstValue(ZoneList<const AstRawString*>* s) : type_(STRING_ARRAY) {
213     strings_ = s;
214   }
215 
AstValue(Type t)216   explicit AstValue(Type t) : type_(t) {
217     DCHECK(t == NULL_TYPE || t == UNDEFINED || t == THE_HOLE);
218   }
219 
220   Type type_;
221 
222   // Uninternalized value.
223   union {
224     const AstRawString* string_;
225     double number_;
226     int smi_;
227     bool bool_;
228     ZoneList<const AstRawString*>* strings_;
229     const char* symbol_name_;
230   };
231 
232   // Internalized value (empty before internalized).
233   Handle<Object> value_;
234 };
235 
236 
237 // For generating string constants.
238 #define STRING_CONSTANTS(F)                           \
239   F(anonymous_function, "(anonymous function)")       \
240   F(arguments, "arguments")                           \
241   F(constructor, "constructor")                       \
242   F(done, "done")                                     \
243   F(dot, ".")                                         \
244   F(dot_for, ".for")                                  \
245   F(dot_generator, ".generator")                      \
246   F(dot_generator_object, ".generator_object")        \
247   F(dot_iterator, ".iterator")                        \
248   F(dot_module, ".module")                            \
249   F(dot_result, ".result")                            \
250   F(empty, "")                                        \
251   F(eval, "eval")                                     \
252   F(initialize_const_global, "initializeConstGlobal") \
253   F(initialize_var_global, "initializeVarGlobal")     \
254   F(make_reference_error, "MakeReferenceError")       \
255   F(make_syntax_error, "MakeSyntaxError")             \
256   F(make_type_error, "MakeTypeError")                 \
257   F(module, "module")                                 \
258   F(native, "native")                                 \
259   F(next, "next")                                     \
260   F(proto, "__proto__")                               \
261   F(prototype, "prototype")                           \
262   F(this, "this")                                     \
263   F(use_asm, "use asm")                               \
264   F(use_strict, "use strict")                         \
265   F(value, "value")
266 
267 
268 class AstValueFactory {
269  public:
AstValueFactory(Zone * zone,uint32_t hash_seed)270   AstValueFactory(Zone* zone, uint32_t hash_seed)
271       : string_table_(AstRawString::Compare),
272         zone_(zone),
273         isolate_(NULL),
274         hash_seed_(hash_seed) {
275 #define F(name, str) \
276     name##_string_ = NULL;
277     STRING_CONSTANTS(F)
278 #undef F
279   }
280 
281   const AstRawString* GetOneByteString(Vector<const uint8_t> literal);
GetOneByteString(const char * string)282   const AstRawString* GetOneByteString(const char* string) {
283     return GetOneByteString(Vector<const uint8_t>(
284         reinterpret_cast<const uint8_t*>(string), StrLength(string)));
285   }
286   const AstRawString* GetTwoByteString(Vector<const uint16_t> literal);
287   const AstRawString* GetString(Handle<String> literal);
288   const AstConsString* NewConsString(const AstString* left,
289                                      const AstString* right);
290 
291   void Internalize(Isolate* isolate);
IsInternalized()292   bool IsInternalized() {
293     return isolate_ != NULL;
294   }
295 
296 #define F(name, str) \
297   const AstRawString* name##_string() { \
298     if (name##_string_ == NULL) { \
299       const char* data = str; \
300       name##_string_ = GetOneByteString( \
301           Vector<const uint8_t>(reinterpret_cast<const uint8_t*>(data), \
302                                 static_cast<int>(strlen(data)))); \
303     } \
304     return name##_string_; \
305   }
306   STRING_CONSTANTS(F)
307 #undef F
308 
309   const AstValue* NewString(const AstRawString* string);
310   // A JavaScript symbol (ECMA-262 edition 6).
311   const AstValue* NewSymbol(const char* name);
312   const AstValue* NewNumber(double number);
313   const AstValue* NewSmi(int number);
314   const AstValue* NewBoolean(bool b);
315   const AstValue* NewStringList(ZoneList<const AstRawString*>* strings);
316   const AstValue* NewNull();
317   const AstValue* NewUndefined();
318   const AstValue* NewTheHole();
319 
320  private:
321   const AstRawString* GetString(uint32_t hash, bool is_one_byte,
322                                 Vector<const byte> literal_bytes);
323 
324   // All strings are copied here, one after another (no NULLs inbetween).
325   HashMap string_table_;
326   // For keeping track of all AstValues and AstRawStrings we've created (so that
327   // they can be internalized later).
328   List<AstValue*> values_;
329   List<AstString*> strings_;
330   Zone* zone_;
331   Isolate* isolate_;
332 
333   uint32_t hash_seed_;
334 
335 #define F(name, str) \
336   const AstRawString* name##_string_;
337   STRING_CONSTANTS(F)
338 #undef F
339 };
340 
341 } }  // namespace v8::internal
342 
343 #undef STRING_CONSTANTS
344 
345 #endif  // V8_AST_VALUE_FACTORY_H_
346