1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 //     * Redistributions of source code must retain the above copyright
7 //       notice, this list of conditions and the following disclaimer.
8 //     * Redistributions in binary form must reproduce the above
9 //       copyright notice, this list of conditions and the following
10 //       disclaimer in the documentation and/or other materials provided
11 //       with the distribution.
12 //     * Neither the name of Google Inc. nor the names of its
13 //       contributors may be used to endorse or promote products derived
14 //       from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #include "src/ast/ast-value-factory.h"
29 
30 #include "src/char-predicates-inl.h"
31 #include "src/objects-inl.h"
32 #include "src/objects.h"
33 #include "src/string-hasher.h"
34 #include "src/utils-inl.h"
35 
36 namespace v8 {
37 namespace internal {
38 
39 namespace {
40 
41 // For using StringToArrayIndex.
42 class OneByteStringStream {
43  public:
OneByteStringStream(Vector<const byte> lb)44   explicit OneByteStringStream(Vector<const byte> lb) :
45       literal_bytes_(lb), pos_(0) {}
46 
HasMore()47   bool HasMore() { return pos_ < literal_bytes_.length(); }
GetNext()48   uint16_t GetNext() { return literal_bytes_[pos_++]; }
49 
50  private:
51   Vector<const byte> literal_bytes_;
52   int pos_;
53 };
54 
55 }  // namespace
56 
57 class AstRawStringInternalizationKey : public StringTableKey {
58  public:
AstRawStringInternalizationKey(const AstRawString * string)59   explicit AstRawStringInternalizationKey(const AstRawString* string)
60       : StringTableKey(string->hash_field()), string_(string) {}
61 
IsMatch(Object * other)62   bool IsMatch(Object* other) override {
63     if (string_->is_one_byte())
64       return String::cast(other)->IsOneByteEqualTo(string_->literal_bytes_);
65     return String::cast(other)->IsTwoByteEqualTo(
66         Vector<const uint16_t>::cast(string_->literal_bytes_));
67   }
68 
AsHandle(Isolate * isolate)69   Handle<String> AsHandle(Isolate* isolate) override {
70     if (string_->is_one_byte())
71       return isolate->factory()->NewOneByteInternalizedString(
72           string_->literal_bytes_, string_->hash_field());
73     return isolate->factory()->NewTwoByteInternalizedString(
74         Vector<const uint16_t>::cast(string_->literal_bytes_),
75         string_->hash_field());
76   }
77 
78  private:
79   const AstRawString* string_;
80 };
81 
Internalize(Isolate * isolate)82 void AstRawString::Internalize(Isolate* isolate) {
83   DCHECK(!has_string_);
84   if (literal_bytes_.length() == 0) {
85     set_string(isolate->factory()->empty_string());
86   } else {
87     AstRawStringInternalizationKey key(this);
88     set_string(StringTable::LookupKey(isolate, &key));
89   }
90 }
91 
AsArrayIndex(uint32_t * index) const92 bool AstRawString::AsArrayIndex(uint32_t* index) const {
93   // The StringHasher will set up the hash in such a way that we can use it to
94   // figure out whether the string is convertible to an array index.
95   if ((hash_field_ & Name::kIsNotArrayIndexMask) != 0) return false;
96   if (length() <= Name::kMaxCachedArrayIndexLength) {
97     *index = Name::ArrayIndexValueBits::decode(hash_field_);
98   } else {
99     OneByteStringStream stream(literal_bytes_);
100     CHECK(StringToArrayIndex(&stream, index));
101   }
102   return true;
103 }
104 
IsOneByteEqualTo(const char * data) const105 bool AstRawString::IsOneByteEqualTo(const char* data) const {
106   if (!is_one_byte()) return false;
107 
108   size_t length = static_cast<size_t>(literal_bytes_.length());
109   if (length != strlen(data)) return false;
110 
111   return 0 == strncmp(reinterpret_cast<const char*>(literal_bytes_.start()),
112                       data, length);
113 }
114 
FirstCharacter() const115 uint16_t AstRawString::FirstCharacter() const {
116   if (is_one_byte()) return literal_bytes_[0];
117   const uint16_t* c = reinterpret_cast<const uint16_t*>(literal_bytes_.start());
118   return *c;
119 }
120 
Compare(void * a,void * b)121 bool AstRawString::Compare(void* a, void* b) {
122   const AstRawString* lhs = static_cast<AstRawString*>(a);
123   const AstRawString* rhs = static_cast<AstRawString*>(b);
124   DCHECK_EQ(lhs->Hash(), rhs->Hash());
125 
126   if (lhs->length() != rhs->length()) return false;
127   const unsigned char* l = lhs->raw_data();
128   const unsigned char* r = rhs->raw_data();
129   size_t length = rhs->length();
130   if (lhs->is_one_byte()) {
131     if (rhs->is_one_byte()) {
132       return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(l),
133                                   reinterpret_cast<const uint8_t*>(r),
134                                   length) == 0;
135     } else {
136       return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(l),
137                                   reinterpret_cast<const uint16_t*>(r),
138                                   length) == 0;
139     }
140   } else {
141     if (rhs->is_one_byte()) {
142       return CompareCharsUnsigned(reinterpret_cast<const uint16_t*>(l),
143                                   reinterpret_cast<const uint8_t*>(r),
144                                   length) == 0;
145     } else {
146       return CompareCharsUnsigned(reinterpret_cast<const uint16_t*>(l),
147                                   reinterpret_cast<const uint16_t*>(r),
148                                   length) == 0;
149     }
150   }
151 }
152 
Internalize(Isolate * isolate)153 void AstConsString::Internalize(Isolate* isolate) {
154   if (IsEmpty()) {
155     set_string(isolate->factory()->empty_string());
156     return;
157   }
158   // AstRawStrings are internalized before AstConsStrings, so
159   // AstRawString::string() will just work.
160   Handle<String> tmp(segment_.string->string());
161   for (AstConsString::Segment* current = segment_.next; current != nullptr;
162        current = current->next) {
163     tmp = isolate->factory()
164               ->NewConsString(current->string->string(), tmp)
165               .ToHandleChecked();
166   }
167   set_string(tmp);
168 }
169 
ToRawStrings() const170 std::forward_list<const AstRawString*> AstConsString::ToRawStrings() const {
171   std::forward_list<const AstRawString*> result;
172   if (IsEmpty()) {
173     return result;
174   }
175 
176   result.emplace_front(segment_.string);
177   for (AstConsString::Segment* current = segment_.next; current != nullptr;
178        current = current->next) {
179     result.emplace_front(current->string);
180   }
181   return result;
182 }
183 
AstStringConstants(Isolate * isolate,uint64_t hash_seed)184 AstStringConstants::AstStringConstants(Isolate* isolate, uint64_t hash_seed)
185     : zone_(isolate->allocator(), ZONE_NAME),
186       string_table_(AstRawString::Compare),
187       hash_seed_(hash_seed) {
188   DCHECK(ThreadId::Current().Equals(isolate->thread_id()));
189 #define F(name, str)                                                       \
190   {                                                                        \
191     const char* data = str;                                                \
192     Vector<const uint8_t> literal(reinterpret_cast<const uint8_t*>(data),  \
193                                   static_cast<int>(strlen(data)));         \
194     uint32_t hash_field = StringHasher::HashSequentialString<uint8_t>(     \
195         literal.start(), literal.length(), hash_seed_);                    \
196     name##_string_ = new (&zone_) AstRawString(true, literal, hash_field); \
197     /* The Handle returned by the factory is located on the roots */       \
198     /* array, not on the temporary HandleScope, so this is safe.  */       \
199     name##_string_->set_string(isolate->factory()->name##_string());       \
200     base::HashMap::Entry* entry =                                          \
201         string_table_.InsertNew(name##_string_, name##_string_->Hash());   \
202     DCHECK_NULL(entry->value);                                             \
203     entry->value = reinterpret_cast<void*>(1);                             \
204   }
205   AST_STRING_CONSTANTS(F)
206 #undef F
207 }
208 
GetOneByteStringInternal(Vector<const uint8_t> literal)209 AstRawString* AstValueFactory::GetOneByteStringInternal(
210     Vector<const uint8_t> literal) {
211   if (literal.length() == 1 && IsInRange(literal[0], 'a', 'z')) {
212     int key = literal[0] - 'a';
213     if (one_character_strings_[key] == nullptr) {
214       uint32_t hash_field = StringHasher::HashSequentialString<uint8_t>(
215           literal.start(), literal.length(), hash_seed_);
216       one_character_strings_[key] = GetString(hash_field, true, literal);
217     }
218     return one_character_strings_[key];
219   }
220   uint32_t hash_field = StringHasher::HashSequentialString<uint8_t>(
221       literal.start(), literal.length(), hash_seed_);
222   return GetString(hash_field, true, literal);
223 }
224 
GetTwoByteStringInternal(Vector<const uint16_t> literal)225 AstRawString* AstValueFactory::GetTwoByteStringInternal(
226     Vector<const uint16_t> literal) {
227   uint32_t hash_field = StringHasher::HashSequentialString<uint16_t>(
228       literal.start(), literal.length(), hash_seed_);
229   return GetString(hash_field, false, Vector<const byte>::cast(literal));
230 }
231 
GetString(Handle<String> literal)232 const AstRawString* AstValueFactory::GetString(Handle<String> literal) {
233   AstRawString* result = nullptr;
234   DisallowHeapAllocation no_gc;
235   String::FlatContent content = literal->GetFlatContent();
236   if (content.IsOneByte()) {
237     result = GetOneByteStringInternal(content.ToOneByteVector());
238   } else {
239     DCHECK(content.IsTwoByte());
240     result = GetTwoByteStringInternal(content.ToUC16Vector());
241   }
242   return result;
243 }
244 
NewConsString()245 AstConsString* AstValueFactory::NewConsString() {
246   AstConsString* new_string = new (zone_) AstConsString;
247   DCHECK_NOT_NULL(new_string);
248   AddConsString(new_string);
249   return new_string;
250 }
251 
NewConsString(const AstRawString * str)252 AstConsString* AstValueFactory::NewConsString(const AstRawString* str) {
253   return NewConsString()->AddString(zone_, str);
254 }
255 
NewConsString(const AstRawString * str1,const AstRawString * str2)256 AstConsString* AstValueFactory::NewConsString(const AstRawString* str1,
257                                               const AstRawString* str2) {
258   return NewConsString()->AddString(zone_, str1)->AddString(zone_, str2);
259 }
260 
Internalize(Isolate * isolate)261 void AstValueFactory::Internalize(Isolate* isolate) {
262   // Strings need to be internalized before values, because values refer to
263   // strings.
264   for (AstRawString* current = strings_; current != nullptr;) {
265     AstRawString* next = current->next();
266     current->Internalize(isolate);
267     current = next;
268   }
269 
270   // AstConsStrings refer to AstRawStrings.
271   for (AstConsString* current = cons_strings_; current != nullptr;) {
272     AstConsString* next = current->next();
273     current->Internalize(isolate);
274     current = next;
275   }
276 
277   ResetStrings();
278 }
279 
GetString(uint32_t hash_field,bool is_one_byte,Vector<const byte> literal_bytes)280 AstRawString* AstValueFactory::GetString(uint32_t hash_field, bool is_one_byte,
281                                          Vector<const byte> literal_bytes) {
282   // literal_bytes here points to whatever the user passed, and this is OK
283   // because we use vector_compare (which checks the contents) to compare
284   // against the AstRawStrings which are in the string_table_. We should not
285   // return this AstRawString.
286   AstRawString key(is_one_byte, literal_bytes, hash_field);
287   base::HashMap::Entry* entry = string_table_.LookupOrInsert(&key, key.Hash());
288   if (entry->value == nullptr) {
289     // Copy literal contents for later comparison.
290     int length = literal_bytes.length();
291     byte* new_literal_bytes = zone_->NewArray<byte>(length);
292     memcpy(new_literal_bytes, literal_bytes.start(), length);
293     AstRawString* new_string = new (zone_) AstRawString(
294         is_one_byte, Vector<const byte>(new_literal_bytes, length), hash_field);
295     CHECK_NOT_NULL(new_string);
296     AddString(new_string);
297     entry->key = new_string;
298     entry->value = reinterpret_cast<void*>(1);
299   }
300   return reinterpret_cast<AstRawString*>(entry->key);
301 }
302 
303 }  // namespace internal
304 }  // namespace v8
305