1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 //     * Redistributions of source code must retain the above copyright
7 //       notice, this list of conditions and the following disclaimer.
8 //     * Redistributions in binary form must reproduce the above
9 //       copyright notice, this list of conditions and the following
10 //       disclaimer in the documentation and/or other materials provided
11 //       with the distribution.
12 //     * Neither the name of Google Inc. nor the names of its
13 //       contributors may be used to endorse or promote products derived
14 //       from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #ifndef V8_AST_AST_VALUE_FACTORY_H_
29 #define V8_AST_AST_VALUE_FACTORY_H_
30 
31 #include <forward_list>
32 
33 #include "src/base/hashmap.h"
34 #include "src/conversions.h"
35 #include "src/globals.h"
36 #include "src/heap/factory.h"
37 #include "src/isolate.h"
38 #include "src/utils.h"
39 
40 // Ast(Raw|Cons)String and AstValueFactory are for storing strings and
41 // values independent of the V8 heap and internalizing them later. During
42 // parsing, they are created and stored outside the heap, in AstValueFactory.
43 // After parsing, the strings and values are internalized (moved into the V8
44 // heap).
45 namespace v8 {
46 namespace internal {
47 
48 class AstRawString final : public ZoneObject {
49  public:
IsEmpty()50   bool IsEmpty() const { return literal_bytes_.length() == 0; }
length()51   int length() const {
52     return is_one_byte() ? literal_bytes_.length()
53                          : literal_bytes_.length() / 2;
54   }
55   bool AsArrayIndex(uint32_t* index) const;
56   bool IsOneByteEqualTo(const char* data) const;
57   uint16_t FirstCharacter() const;
58 
59   void Internalize(Isolate* isolate);
60 
61   // Access the physical representation:
is_one_byte()62   bool is_one_byte() const { return is_one_byte_; }
byte_length()63   int byte_length() const { return literal_bytes_.length(); }
raw_data()64   const unsigned char* raw_data() const {
65     return literal_bytes_.start();
66   }
67 
68   // For storing AstRawStrings in a hash map.
hash_field()69   uint32_t hash_field() const { return hash_field_; }
Hash()70   uint32_t Hash() const { return hash_field_ >> Name::kHashShift; }
71 
72   // This function can be called after internalizing.
string()73   V8_INLINE Handle<String> string() const {
74     DCHECK_NOT_NULL(string_);
75     DCHECK(has_string_);
76     return Handle<String>(string_);
77   }
78 
79  private:
80   friend class AstRawStringInternalizationKey;
81   friend class AstStringConstants;
82   friend class AstValueFactory;
83 
84   // Members accessed only by the AstValueFactory & related classes:
85   static bool Compare(void* a, void* b);
AstRawString(bool is_one_byte,const Vector<const byte> & literal_bytes,uint32_t hash_field)86   AstRawString(bool is_one_byte, const Vector<const byte>& literal_bytes,
87                uint32_t hash_field)
88       : next_(nullptr),
89         literal_bytes_(literal_bytes),
90         hash_field_(hash_field),
91         is_one_byte_(is_one_byte) {}
next()92   AstRawString* next() {
93     DCHECK(!has_string_);
94     return next_;
95   }
next_location()96   AstRawString** next_location() {
97     DCHECK(!has_string_);
98     return &next_;
99   }
100 
set_string(Handle<String> string)101   void set_string(Handle<String> string) {
102     DCHECK(!string.is_null());
103     DCHECK(!has_string_);
104     string_ = string.location();
105 #ifdef DEBUG
106     has_string_ = true;
107 #endif
108   }
109 
110   // {string_} is stored as String** instead of a Handle<String> so it can be
111   // stored in a union with {next_}.
112   union {
113     AstRawString* next_;
114     String** string_;
115   };
116 
117   Vector<const byte> literal_bytes_;  // Memory owned by Zone.
118   uint32_t hash_field_;
119   bool is_one_byte_;
120 #ifdef DEBUG
121   // (Debug-only:) Verify the object life-cylce: Some functions may only be
122   // called after internalization (that is, after a v8::internal::String has
123   // been set); some only before.
124   bool has_string_ = false;
125 #endif
126 };
127 
128 class AstConsString final : public ZoneObject {
129  public:
AddString(Zone * zone,const AstRawString * s)130   AstConsString* AddString(Zone* zone, const AstRawString* s) {
131     if (s->IsEmpty()) return this;
132     if (!IsEmpty()) {
133       // We're putting the new string to the head of the list, meaning
134       // the string segments will be in reverse order.
135       Segment* tmp = new (zone->New(sizeof(Segment))) Segment;
136       *tmp = segment_;
137       segment_.next = tmp;
138     }
139     segment_.string = s;
140     return this;
141   }
142 
IsEmpty()143   bool IsEmpty() const {
144     DCHECK_IMPLIES(segment_.string == nullptr, segment_.next == nullptr);
145     DCHECK_IMPLIES(segment_.string != nullptr, !segment_.string->IsEmpty());
146     return segment_.string == nullptr;
147   }
148 
149   void Internalize(Isolate* isolate);
150 
string()151   V8_INLINE Handle<String> string() const {
152     DCHECK_NOT_NULL(string_);
153     return Handle<String>(string_);
154   }
155 
156   std::forward_list<const AstRawString*> ToRawStrings() const;
157 
158  private:
159   friend class AstValueFactory;
160 
AstConsString()161   AstConsString() : next_(nullptr), segment_({nullptr, nullptr}) {}
162 
next()163   AstConsString* next() const { return next_; }
next_location()164   AstConsString** next_location() { return &next_; }
165 
166   // {string_} is stored as String** instead of a Handle<String> so it can be
167   // stored in a union with {next_}.
set_string(Handle<String> string)168   void set_string(Handle<String> string) { string_ = string.location(); }
169   union {
170     AstConsString* next_;
171     String** string_;
172   };
173 
174   struct Segment {
175     const AstRawString* string;
176     AstConsString::Segment* next;
177   };
178   Segment segment_;
179 };
180 
181 enum class AstSymbol : uint8_t { kHomeObjectSymbol };
182 
183 class AstBigInt {
184  public:
185   // |bigint| must be a NUL-terminated string of ASCII characters
186   // representing a BigInt (suitable for passing to BigIntLiteral()
187   // from conversions.h).
AstBigInt(const char * bigint)188   explicit AstBigInt(const char* bigint) : bigint_(bigint) {}
189 
c_str()190   const char* c_str() const { return bigint_; }
191 
192  private:
193   const char* bigint_;
194 };
195 
196 // For generating constants.
197 #define AST_STRING_CONSTANTS(F)                                      \
198   F(anonymous_function, "(anonymous function)")                      \
199   F(arguments, "arguments")                                          \
200   F(async, "async")                                                  \
201   F(await, "await")                                                  \
202   F(bigint, "bigint")                                                \
203   F(boolean, "boolean")                                              \
204   F(constructor, "constructor")                                      \
205   F(default, "default")                                              \
206   F(done, "done")                                                    \
207   F(dot, ".")                                                        \
208   F(dot_for, ".for")                                                 \
209   F(dot_generator_object, ".generator_object")                       \
210   F(dot_iterator, ".iterator")                                       \
211   F(dot_result, ".result")                                           \
212   F(dot_switch_tag, ".switch_tag")                                   \
213   F(dot_catch, ".catch")                                             \
214   F(empty, "")                                                       \
215   F(eval, "eval")                                                    \
216   F(function, "function")                                            \
217   F(get_space, "get ")                                               \
218   F(length, "length")                                                \
219   F(let, "let")                                                      \
220   F(name, "name")                                                    \
221   F(native, "native")                                                \
222   F(new_target, ".new.target")                                       \
223   F(next, "next")                                                    \
224   F(number, "number")                                                \
225   F(object, "object")                                                \
226   F(proto, "__proto__")                                              \
227   F(prototype, "prototype")                                          \
228   F(return, "return")                                                \
229   F(set_space, "set ")                                               \
230   F(star_default_star, "*default*")                                  \
231   F(string, "string")                                                \
232   F(symbol, "symbol")                                                \
233   F(this, "this")                                                    \
234   F(this_function, ".this_function")                                 \
235   F(throw, "throw")                                                  \
236   F(undefined, "undefined")                                          \
237   F(use_asm, "use asm")                                              \
238   F(use_strict, "use strict")                                        \
239   F(value, "value")
240 
241 class AstStringConstants final {
242  public:
243   AstStringConstants(Isolate* isolate, uint64_t hash_seed);
244 
245 #define F(name, str) \
246   const AstRawString* name##_string() const { return name##_string_; }
AST_STRING_CONSTANTS(F)247   AST_STRING_CONSTANTS(F)
248 #undef F
249 
250   uint64_t hash_seed() const { return hash_seed_; }
string_table()251   const base::CustomMatcherHashMap* string_table() const {
252     return &string_table_;
253   }
254 
255  private:
256   Zone zone_;
257   base::CustomMatcherHashMap string_table_;
258   uint64_t hash_seed_;
259 
260 #define F(name, str) AstRawString* name##_string_;
261   AST_STRING_CONSTANTS(F)
262 #undef F
263 
264   DISALLOW_COPY_AND_ASSIGN(AstStringConstants);
265 };
266 
267 class AstValueFactory {
268  public:
AstValueFactory(Zone * zone,const AstStringConstants * string_constants,uint64_t hash_seed)269   AstValueFactory(Zone* zone, const AstStringConstants* string_constants,
270                   uint64_t hash_seed)
271       : string_table_(string_constants->string_table()),
272         strings_(nullptr),
273         strings_end_(&strings_),
274         cons_strings_(nullptr),
275         cons_strings_end_(&cons_strings_),
276         string_constants_(string_constants),
277         empty_cons_string_(nullptr),
278         zone_(zone),
279         hash_seed_(hash_seed) {
280     DCHECK_EQ(hash_seed, string_constants->hash_seed());
281     std::fill(one_character_strings_,
282               one_character_strings_ + arraysize(one_character_strings_),
283               nullptr);
284     empty_cons_string_ = NewConsString();
285   }
286 
zone()287   Zone* zone() const { return zone_; }
288 
GetOneByteString(Vector<const uint8_t> literal)289   const AstRawString* GetOneByteString(Vector<const uint8_t> literal) {
290     return GetOneByteStringInternal(literal);
291   }
GetOneByteString(const char * string)292   const AstRawString* GetOneByteString(const char* string) {
293     return GetOneByteString(Vector<const uint8_t>(
294         reinterpret_cast<const uint8_t*>(string), StrLength(string)));
295   }
GetTwoByteString(Vector<const uint16_t> literal)296   const AstRawString* GetTwoByteString(Vector<const uint16_t> literal) {
297     return GetTwoByteStringInternal(literal);
298   }
299   const AstRawString* GetString(Handle<String> literal);
300   V8_EXPORT_PRIVATE AstConsString* NewConsString();
301   AstConsString* NewConsString(const AstRawString* str);
302   AstConsString* NewConsString(const AstRawString* str1,
303                                const AstRawString* str2);
304 
305   V8_EXPORT_PRIVATE void Internalize(Isolate* isolate);
306 
307 #define F(name, str)                           \
308   const AstRawString* name##_string() const {  \
309     return string_constants_->name##_string(); \
310   }
AST_STRING_CONSTANTS(F)311   AST_STRING_CONSTANTS(F)
312 #undef F
313   const AstConsString* empty_cons_string() const { return empty_cons_string_; }
314 
315  private:
AddString(AstRawString * string)316   AstRawString* AddString(AstRawString* string) {
317     *strings_end_ = string;
318     strings_end_ = string->next_location();
319     return string;
320   }
AddConsString(AstConsString * string)321   AstConsString* AddConsString(AstConsString* string) {
322     *cons_strings_end_ = string;
323     cons_strings_end_ = string->next_location();
324     return string;
325   }
ResetStrings()326   void ResetStrings() {
327     strings_ = nullptr;
328     strings_end_ = &strings_;
329     cons_strings_ = nullptr;
330     cons_strings_end_ = &cons_strings_;
331   }
332   V8_EXPORT_PRIVATE AstRawString* GetOneByteStringInternal(
333       Vector<const uint8_t> literal);
334   AstRawString* GetTwoByteStringInternal(Vector<const uint16_t> literal);
335   AstRawString* GetString(uint32_t hash, bool is_one_byte,
336                           Vector<const byte> literal_bytes);
337 
338   // All strings are copied here, one after another (no zeroes inbetween).
339   base::CustomMatcherHashMap string_table_;
340 
341   // We need to keep track of strings_ in order since cons strings require their
342   // members to be internalized first.
343   AstRawString* strings_;
344   AstRawString** strings_end_;
345   AstConsString* cons_strings_;
346   AstConsString** cons_strings_end_;
347 
348   // Holds constant string values which are shared across the isolate.
349   const AstStringConstants* string_constants_;
350   const AstConsString* empty_cons_string_;
351 
352   // Caches one character lowercase strings (for minified code).
353   AstRawString* one_character_strings_[26];
354 
355   Zone* zone_;
356 
357   uint64_t hash_seed_;
358 };
359 }  // namespace internal
360 }  // namespace v8
361 
362 #endif  // V8_AST_AST_VALUE_FACTORY_H_
363