1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/json-parser.h"
6 
7 #include "src/char-predicates-inl.h"
8 #include "src/conversions.h"
9 #include "src/debug/debug.h"
10 #include "src/field-type.h"
11 #include "src/messages.h"
12 #include "src/objects-inl.h"
13 #include "src/objects/hash-table-inl.h"
14 #include "src/property-descriptor.h"
15 #include "src/string-hasher.h"
16 #include "src/transitions.h"
17 #include "src/unicode-cache.h"
18 
19 namespace v8 {
20 namespace internal {
21 
22 namespace {
23 
24 // A vector-like data structure that uses a larger vector for allocation, and
25 // provides limited utility access. The original vector must not be used for the
26 // duration, and it may even be reallocated. This allows vector storage to be
27 // reused for the properties of sibling objects.
28 template <typename Container>
29 class VectorSegment {
30  public:
31   using value_type = typename Container::value_type;
32 
VectorSegment(Container * container)33   explicit VectorSegment(Container* container)
34       : container_(*container), begin_(container->size()) {}
~VectorSegment()35   ~VectorSegment() { container_.resize(begin_); }
36 
GetVector() const37   Vector<const value_type> GetVector() const {
38     return Vector<const value_type>(container_.data() + begin_,
39                                     container_.size() - begin_);
40   }
41 
42   template <typename T>
push_back(T && value)43   void push_back(T&& value) {
44     container_.push_back(std::forward<T>(value));
45   }
46 
47  private:
48   Container& container_;
49   const typename Container::size_type begin_;
50 };
51 
52 }  // namespace
53 
Internalize(Isolate * isolate,Handle<Object> object,Handle<Object> reviver)54 MaybeHandle<Object> JsonParseInternalizer::Internalize(Isolate* isolate,
55                                                        Handle<Object> object,
56                                                        Handle<Object> reviver) {
57   DCHECK(reviver->IsCallable());
58   JsonParseInternalizer internalizer(isolate,
59                                      Handle<JSReceiver>::cast(reviver));
60   Handle<JSObject> holder =
61       isolate->factory()->NewJSObject(isolate->object_function());
62   Handle<String> name = isolate->factory()->empty_string();
63   JSObject::AddProperty(isolate, holder, name, object, NONE);
64   return internalizer.InternalizeJsonProperty(holder, name);
65 }
66 
InternalizeJsonProperty(Handle<JSReceiver> holder,Handle<String> name)67 MaybeHandle<Object> JsonParseInternalizer::InternalizeJsonProperty(
68     Handle<JSReceiver> holder, Handle<String> name) {
69   HandleScope outer_scope(isolate_);
70   Handle<Object> value;
71   ASSIGN_RETURN_ON_EXCEPTION(
72       isolate_, value, Object::GetPropertyOrElement(isolate_, holder, name),
73       Object);
74   if (value->IsJSReceiver()) {
75     Handle<JSReceiver> object = Handle<JSReceiver>::cast(value);
76     Maybe<bool> is_array = Object::IsArray(object);
77     if (is_array.IsNothing()) return MaybeHandle<Object>();
78     if (is_array.FromJust()) {
79       Handle<Object> length_object;
80       ASSIGN_RETURN_ON_EXCEPTION(
81           isolate_, length_object,
82           Object::GetLengthFromArrayLike(isolate_, object), Object);
83       double length = length_object->Number();
84       for (double i = 0; i < length; i++) {
85         HandleScope inner_scope(isolate_);
86         Handle<Object> index = isolate_->factory()->NewNumber(i);
87         Handle<String> name = isolate_->factory()->NumberToString(index);
88         if (!RecurseAndApply(object, name)) return MaybeHandle<Object>();
89       }
90     } else {
91       Handle<FixedArray> contents;
92       ASSIGN_RETURN_ON_EXCEPTION(
93           isolate_, contents,
94           KeyAccumulator::GetKeys(object, KeyCollectionMode::kOwnOnly,
95                                   ENUMERABLE_STRINGS,
96                                   GetKeysConversion::kConvertToString),
97           Object);
98       for (int i = 0; i < contents->length(); i++) {
99         HandleScope inner_scope(isolate_);
100         Handle<String> name(String::cast(contents->get(i)), isolate_);
101         if (!RecurseAndApply(object, name)) return MaybeHandle<Object>();
102       }
103     }
104   }
105   Handle<Object> argv[] = {name, value};
106   Handle<Object> result;
107   ASSIGN_RETURN_ON_EXCEPTION(
108       isolate_, result, Execution::Call(isolate_, reviver_, holder, 2, argv),
109       Object);
110   return outer_scope.CloseAndEscape(result);
111 }
112 
RecurseAndApply(Handle<JSReceiver> holder,Handle<String> name)113 bool JsonParseInternalizer::RecurseAndApply(Handle<JSReceiver> holder,
114                                             Handle<String> name) {
115   STACK_CHECK(isolate_, false);
116 
117   Handle<Object> result;
118   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
119       isolate_, result, InternalizeJsonProperty(holder, name), false);
120   Maybe<bool> change_result = Nothing<bool>();
121   if (result->IsUndefined(isolate_)) {
122     change_result = JSReceiver::DeletePropertyOrElement(holder, name,
123                                                         LanguageMode::kSloppy);
124   } else {
125     PropertyDescriptor desc;
126     desc.set_value(result);
127     desc.set_configurable(true);
128     desc.set_enumerable(true);
129     desc.set_writable(true);
130     change_result = JSReceiver::DefineOwnProperty(isolate_, holder, name, &desc,
131                                                   kDontThrow);
132   }
133   MAYBE_RETURN(change_result, false);
134   return true;
135 }
136 
137 template <bool seq_one_byte>
JsonParser(Isolate * isolate,Handle<String> source)138 JsonParser<seq_one_byte>::JsonParser(Isolate* isolate, Handle<String> source)
139     : source_(source),
140       source_length_(source->length()),
141       isolate_(isolate),
142       zone_(isolate_->allocator(), ZONE_NAME),
143       object_constructor_(isolate_->native_context()->object_function(),
144                           isolate_),
145       position_(-1),
146       properties_(&zone_) {
147   source_ = String::Flatten(isolate, source_);
148   pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED;
149 
150   // Optimized fast case where we only have Latin1 characters.
151   if (seq_one_byte) {
152     seq_source_ = Handle<SeqOneByteString>::cast(source_);
153   }
154 }
155 
156 template <bool seq_one_byte>
ParseJson()157 MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() {
158   // Advance to the first character (possibly EOS)
159   AdvanceSkipWhitespace();
160   Handle<Object> result = ParseJsonValue();
161   if (result.is_null() || c0_ != kEndOfString) {
162     // Some exception (for example stack overflow) is already pending.
163     if (isolate_->has_pending_exception()) return Handle<Object>::null();
164 
165     // Parse failed. Current character is the unexpected token.
166     Factory* factory = this->factory();
167     MessageTemplate::Template message;
168     Handle<Object> arg1 = Handle<Smi>(Smi::FromInt(position_), isolate());
169     Handle<Object> arg2;
170 
171     switch (c0_) {
172       case kEndOfString:
173         message = MessageTemplate::kJsonParseUnexpectedEOS;
174         break;
175       case '-':
176       case '0':
177       case '1':
178       case '2':
179       case '3':
180       case '4':
181       case '5':
182       case '6':
183       case '7':
184       case '8':
185       case '9':
186         message = MessageTemplate::kJsonParseUnexpectedTokenNumber;
187         break;
188       case '"':
189         message = MessageTemplate::kJsonParseUnexpectedTokenString;
190         break;
191       default:
192         message = MessageTemplate::kJsonParseUnexpectedToken;
193         arg2 = arg1;
194         arg1 = factory->LookupSingleCharacterStringFromCode(c0_);
195         break;
196     }
197 
198     Handle<Script> script(factory->NewScript(source_));
199     if (isolate()->NeedsSourcePositionsForProfiling()) {
200       Script::InitLineEnds(script);
201     }
202     // We should sent compile error event because we compile JSON object in
203     // separated source file.
204     isolate()->debug()->OnCompileError(script);
205     MessageLocation location(script, position_, position_ + 1);
206     Handle<Object> error = factory->NewSyntaxError(message, arg1, arg2);
207     return isolate()->template Throw<Object>(error, &location);
208   }
209   return result;
210 }
211 
212 MaybeHandle<Object> InternalizeJsonProperty(Handle<JSObject> holder,
213                                             Handle<String> key);
214 
215 template <bool seq_one_byte>
Advance()216 void JsonParser<seq_one_byte>::Advance() {
217   position_++;
218   if (position_ >= source_length_) {
219     c0_ = kEndOfString;
220   } else if (seq_one_byte) {
221     c0_ = seq_source_->SeqOneByteStringGet(position_);
222   } else {
223     c0_ = source_->Get(position_);
224   }
225 }
226 
227 template <bool seq_one_byte>
AdvanceSkipWhitespace()228 void JsonParser<seq_one_byte>::AdvanceSkipWhitespace() {
229   do {
230     Advance();
231   } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r');
232 }
233 
234 template <bool seq_one_byte>
SkipWhitespace()235 void JsonParser<seq_one_byte>::SkipWhitespace() {
236   while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') {
237     Advance();
238   }
239 }
240 
241 template <bool seq_one_byte>
AdvanceGetChar()242 uc32 JsonParser<seq_one_byte>::AdvanceGetChar() {
243   Advance();
244   return c0_;
245 }
246 
247 template <bool seq_one_byte>
MatchSkipWhiteSpace(uc32 c)248 bool JsonParser<seq_one_byte>::MatchSkipWhiteSpace(uc32 c) {
249   if (c0_ == c) {
250     AdvanceSkipWhitespace();
251     return true;
252   }
253   return false;
254 }
255 
256 template <bool seq_one_byte>
ParseJsonString(Handle<String> expected)257 bool JsonParser<seq_one_byte>::ParseJsonString(Handle<String> expected) {
258   int length = expected->length();
259   if (source_->length() - position_ - 1 > length) {
260     DisallowHeapAllocation no_gc;
261     String::FlatContent content = expected->GetFlatContent();
262     if (content.IsOneByte()) {
263       DCHECK_EQ('"', c0_);
264       const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1;
265       const uint8_t* expected_chars = content.ToOneByteVector().start();
266       for (int i = 0; i < length; i++) {
267         uint8_t c0 = input_chars[i];
268         if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') {
269           return false;
270         }
271       }
272       if (input_chars[length] == '"') {
273         position_ = position_ + length + 1;
274         AdvanceSkipWhitespace();
275         return true;
276       }
277     }
278   }
279   return false;
280 }
281 
282 // Parse any JSON value.
283 template <bool seq_one_byte>
ParseJsonValue()284 Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() {
285   StackLimitCheck stack_check(isolate_);
286   if (stack_check.HasOverflowed()) {
287     isolate_->StackOverflow();
288     return Handle<Object>::null();
289   }
290 
291   if (stack_check.InterruptRequested() &&
292       isolate_->stack_guard()->HandleInterrupts()->IsException(isolate_)) {
293     return Handle<Object>::null();
294   }
295 
296   if (c0_ == '"') return ParseJsonString();
297   if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
298   if (c0_ == '{') return ParseJsonObject();
299   if (c0_ == '[') return ParseJsonArray();
300   if (c0_ == 'f') {
301     if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
302         AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
303       AdvanceSkipWhitespace();
304       return factory()->false_value();
305     }
306     return ReportUnexpectedCharacter();
307   }
308   if (c0_ == 't') {
309     if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
310         AdvanceGetChar() == 'e') {
311       AdvanceSkipWhitespace();
312       return factory()->true_value();
313     }
314     return ReportUnexpectedCharacter();
315   }
316   if (c0_ == 'n') {
317     if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
318         AdvanceGetChar() == 'l') {
319       AdvanceSkipWhitespace();
320       return factory()->null_value();
321     }
322     return ReportUnexpectedCharacter();
323   }
324   return ReportUnexpectedCharacter();
325 }
326 
327 template <bool seq_one_byte>
ParseElement(Handle<JSObject> json_object)328 ParseElementResult JsonParser<seq_one_byte>::ParseElement(
329     Handle<JSObject> json_object) {
330   uint32_t index = 0;
331   // Maybe an array index, try to parse it.
332   if (c0_ == '0') {
333     // With a leading zero, the string has to be "0" only to be an index.
334     Advance();
335   } else {
336     do {
337       int d = c0_ - '0';
338       if (index > 429496729U - ((d + 3) >> 3)) break;
339       index = (index * 10) + d;
340       Advance();
341     } while (IsDecimalDigit(c0_));
342   }
343 
344   if (c0_ == '"') {
345     // Successfully parsed index, parse and store element.
346     AdvanceSkipWhitespace();
347 
348     if (c0_ == ':') {
349       AdvanceSkipWhitespace();
350       Handle<Object> value = ParseJsonValue();
351       if (!value.is_null()) {
352         JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE)
353             .Assert();
354         return kElementFound;
355       } else {
356         return kNullHandle;
357       }
358     }
359   }
360   return kElementNotFound;
361 }
362 
363 // Parse a JSON object. Position must be right at '{'.
364 template <bool seq_one_byte>
ParseJsonObject()365 Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() {
366   HandleScope scope(isolate());
367   Handle<JSObject> json_object =
368       factory()->NewJSObject(object_constructor(), pretenure_);
369   Handle<Map> map(json_object->map(), isolate());
370   int descriptor = 0;
371   VectorSegment<ZoneVector<Handle<Object>>> properties(&properties_);
372   DCHECK_EQ(c0_, '{');
373 
374   bool transitioning = true;
375 
376   AdvanceSkipWhitespace();
377   if (c0_ != '}') {
378     do {
379       if (c0_ != '"') return ReportUnexpectedCharacter();
380 
381       int start_position = position_;
382       Advance();
383 
384       if (IsDecimalDigit(c0_)) {
385         ParseElementResult element_result = ParseElement(json_object);
386         if (element_result == kNullHandle) return Handle<Object>::null();
387         if (element_result == kElementFound) continue;
388       }
389       // Not an index, fallback to the slow path.
390 
391       position_ = start_position;
392 #ifdef DEBUG
393       c0_ = '"';
394 #endif
395 
396       Handle<String> key;
397       Handle<Object> value;
398 
399       // Try to follow existing transitions as long as possible. Once we stop
400       // transitioning, no transition can be found anymore.
401       DCHECK(transitioning);
402       // First check whether there is a single expected transition. If so, try
403       // to parse it first.
404       bool follow_expected = false;
405       Handle<Map> target;
406       if (seq_one_byte) {
407         DisallowHeapAllocation no_gc;
408         TransitionsAccessor transitions(isolate(), *map, &no_gc);
409         key = transitions.ExpectedTransitionKey();
410         follow_expected = !key.is_null() && ParseJsonString(key);
411         // If the expected transition hits, follow it.
412         if (follow_expected) {
413           target = transitions.ExpectedTransitionTarget();
414         }
415       }
416       if (!follow_expected) {
417         // If the expected transition failed, parse an internalized string and
418         // try to find a matching transition.
419         key = ParseJsonString();
420         if (key.is_null()) return ReportUnexpectedCharacter();
421 
422         // If a transition was found, follow it and continue.
423         transitioning = TransitionsAccessor(isolate(), map)
424                             .FindTransitionToField(key)
425                             .ToHandle(&target);
426       }
427       if (c0_ != ':') return ReportUnexpectedCharacter();
428 
429       AdvanceSkipWhitespace();
430       value = ParseJsonValue();
431       if (value.is_null()) return ReportUnexpectedCharacter();
432 
433       if (transitioning) {
434         PropertyDetails details =
435             target->instance_descriptors()->GetDetails(descriptor);
436         Representation expected_representation = details.representation();
437 
438         if (value->FitsRepresentation(expected_representation)) {
439           if (expected_representation.IsHeapObject() &&
440               !target->instance_descriptors()
441                    ->GetFieldType(descriptor)
442                    ->NowContains(value)) {
443             Handle<FieldType> value_type(
444                 value->OptimalType(isolate(), expected_representation));
445             Map::GeneralizeField(isolate(), target, descriptor,
446                                  details.constness(), expected_representation,
447                                  value_type);
448           }
449           DCHECK(target->instance_descriptors()
450                      ->GetFieldType(descriptor)
451                      ->NowContains(value));
452           properties.push_back(value);
453           map = target;
454           descriptor++;
455           continue;
456         } else {
457           transitioning = false;
458         }
459       }
460 
461       DCHECK(!transitioning);
462 
463       // Commit the intermediate state to the object and stop transitioning.
464       CommitStateToJsonObject(json_object, map, properties.GetVector());
465 
466       JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value)
467           .Check();
468     } while (transitioning && MatchSkipWhiteSpace(','));
469 
470     // If we transitioned until the very end, transition the map now.
471     if (transitioning) {
472       CommitStateToJsonObject(json_object, map, properties.GetVector());
473     } else {
474       while (MatchSkipWhiteSpace(',')) {
475         HandleScope local_scope(isolate());
476         if (c0_ != '"') return ReportUnexpectedCharacter();
477 
478         int start_position = position_;
479         Advance();
480 
481         if (IsDecimalDigit(c0_)) {
482           ParseElementResult element_result = ParseElement(json_object);
483           if (element_result == kNullHandle) return Handle<Object>::null();
484           if (element_result == kElementFound) continue;
485         }
486         // Not an index, fallback to the slow path.
487 
488         position_ = start_position;
489 #ifdef DEBUG
490         c0_ = '"';
491 #endif
492 
493         Handle<String> key;
494         Handle<Object> value;
495 
496         key = ParseJsonString();
497         if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
498 
499         AdvanceSkipWhitespace();
500         value = ParseJsonValue();
501         if (value.is_null()) return ReportUnexpectedCharacter();
502 
503         JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key,
504                                                           value)
505             .Check();
506       }
507     }
508 
509     if (c0_ != '}') {
510       return ReportUnexpectedCharacter();
511     }
512   }
513   AdvanceSkipWhitespace();
514   return scope.CloseAndEscape(json_object);
515 }
516 
517 template <bool seq_one_byte>
CommitStateToJsonObject(Handle<JSObject> json_object,Handle<Map> map,Vector<const Handle<Object>> properties)518 void JsonParser<seq_one_byte>::CommitStateToJsonObject(
519     Handle<JSObject> json_object, Handle<Map> map,
520     Vector<const Handle<Object>> properties) {
521   JSObject::AllocateStorageForMap(json_object, map);
522   DCHECK(!json_object->map()->is_dictionary_map());
523 
524   DisallowHeapAllocation no_gc;
525   DescriptorArray* descriptors = json_object->map()->instance_descriptors();
526   for (int i = 0; i < properties.length(); i++) {
527     Handle<Object> value = properties[i];
528     // Initializing store.
529     json_object->WriteToField(i, descriptors->GetDetails(i), *value);
530   }
531 }
532 
533 class ElementKindLattice {
534  private:
535   enum {
536     SMI_ELEMENTS,
537     NUMBER_ELEMENTS,
538     OBJECT_ELEMENTS,
539   };
540 
541  public:
ElementKindLattice()542   ElementKindLattice() : value_(SMI_ELEMENTS) {}
543 
Update(Handle<Object> o)544   void Update(Handle<Object> o) {
545     if (o->IsSmi()) {
546       return;
547     } else if (o->IsHeapNumber()) {
548       if (value_ < NUMBER_ELEMENTS) value_ = NUMBER_ELEMENTS;
549     } else {
550       DCHECK(!o->IsNumber());
551       value_ = OBJECT_ELEMENTS;
552     }
553   }
554 
GetElementsKind() const555   ElementsKind GetElementsKind() const {
556     switch (value_) {
557       case SMI_ELEMENTS:
558         return PACKED_SMI_ELEMENTS;
559       case NUMBER_ELEMENTS:
560         return PACKED_DOUBLE_ELEMENTS;
561       case OBJECT_ELEMENTS:
562         return PACKED_ELEMENTS;
563       default:
564         UNREACHABLE();
565         return PACKED_ELEMENTS;
566     }
567   }
568 
569  private:
570   int value_;
571 };
572 
573 // Parse a JSON array. Position must be right at '['.
574 template <bool seq_one_byte>
ParseJsonArray()575 Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() {
576   HandleScope scope(isolate());
577   ZoneVector<Handle<Object>> elements(zone());
578   DCHECK_EQ(c0_, '[');
579 
580   ElementKindLattice lattice;
581 
582   AdvanceSkipWhitespace();
583   if (c0_ != ']') {
584     do {
585       Handle<Object> element = ParseJsonValue();
586       if (element.is_null()) return ReportUnexpectedCharacter();
587       elements.push_back(element);
588       lattice.Update(element);
589     } while (MatchSkipWhiteSpace(','));
590     if (c0_ != ']') {
591       return ReportUnexpectedCharacter();
592     }
593   }
594   AdvanceSkipWhitespace();
595 
596   // Allocate a fixed array with all the elements.
597 
598   Handle<Object> json_array;
599   const ElementsKind kind = lattice.GetElementsKind();
600   int elements_size = static_cast<int>(elements.size());
601 
602   switch (kind) {
603     case PACKED_ELEMENTS:
604     case PACKED_SMI_ELEMENTS: {
605       Handle<FixedArray> elems =
606           factory()->NewFixedArray(elements_size, pretenure_);
607       for (int i = 0; i < elements_size; i++) elems->set(i, *elements[i]);
608       json_array = factory()->NewJSArrayWithElements(elems, kind, pretenure_);
609       break;
610     }
611     case PACKED_DOUBLE_ELEMENTS: {
612       Handle<FixedDoubleArray> elems = Handle<FixedDoubleArray>::cast(
613           factory()->NewFixedDoubleArray(elements_size, pretenure_));
614       for (int i = 0; i < elements_size; i++) {
615         elems->set(i, elements[i]->Number());
616       }
617       json_array = factory()->NewJSArrayWithElements(elems, kind, pretenure_);
618       break;
619     }
620     default:
621       UNREACHABLE();
622   }
623 
624   return scope.CloseAndEscape(json_array);
625 }
626 
627 template <bool seq_one_byte>
ParseJsonNumber()628 Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() {
629   bool negative = false;
630   int beg_pos = position_;
631   if (c0_ == '-') {
632     Advance();
633     negative = true;
634   }
635   if (c0_ == '0') {
636     Advance();
637     // Prefix zero is only allowed if it's the only digit before
638     // a decimal point or exponent.
639     if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
640   } else {
641     int i = 0;
642     int digits = 0;
643     if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
644     do {
645       i = i * 10 + c0_ - '0';
646       digits++;
647       Advance();
648     } while (IsDecimalDigit(c0_));
649     if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
650       SkipWhitespace();
651       return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
652     }
653   }
654   if (c0_ == '.') {
655     Advance();
656     if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
657     do {
658       Advance();
659     } while (IsDecimalDigit(c0_));
660   }
661   if (AsciiAlphaToLower(c0_) == 'e') {
662     Advance();
663     if (c0_ == '-' || c0_ == '+') Advance();
664     if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
665     do {
666       Advance();
667     } while (IsDecimalDigit(c0_));
668   }
669   int length = position_ - beg_pos;
670   double number;
671   if (seq_one_byte) {
672     Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length);
673     number = StringToDouble(isolate()->unicode_cache(), chars,
674                             NO_FLAGS,  // Hex, octal or trailing junk.
675                             std::numeric_limits<double>::quiet_NaN());
676   } else {
677     Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
678     String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
679     Vector<const uint8_t> result =
680         Vector<const uint8_t>(buffer.start(), length);
681     number = StringToDouble(isolate()->unicode_cache(), result,
682                             NO_FLAGS,  // Hex, octal or trailing junk.
683                             0.0);
684     buffer.Dispose();
685   }
686   SkipWhitespace();
687   return factory()->NewNumber(number, pretenure_);
688 }
689 
690 template <typename StringType>
691 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
692 
693 template <>
SeqStringSet(Handle<SeqTwoByteString> seq_str,int i,uc32 c)694 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
695   seq_str->SeqTwoByteStringSet(i, c);
696 }
697 
698 template <>
SeqStringSet(Handle<SeqOneByteString> seq_str,int i,uc32 c)699 inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) {
700   seq_str->SeqOneByteStringSet(i, c);
701 }
702 
703 template <typename StringType>
704 inline Handle<StringType> NewRawString(Factory* factory, int length,
705                                        PretenureFlag pretenure);
706 
707 template <>
NewRawString(Factory * factory,int length,PretenureFlag pretenure)708 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length,
709                                              PretenureFlag pretenure) {
710   return factory->NewRawTwoByteString(length, pretenure).ToHandleChecked();
711 }
712 
713 template <>
NewRawString(Factory * factory,int length,PretenureFlag pretenure)714 inline Handle<SeqOneByteString> NewRawString(Factory* factory, int length,
715                                              PretenureFlag pretenure) {
716   return factory->NewRawOneByteString(length, pretenure).ToHandleChecked();
717 }
718 
719 // Scans the rest of a JSON string starting from position_ and writes
720 // prefix[start..end] along with the scanned characters into a
721 // sequential string of type StringType.
722 template <bool seq_one_byte>
723 template <typename StringType, typename SinkChar>
SlowScanJsonString(Handle<String> prefix,int start,int end)724 Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString(
725     Handle<String> prefix, int start, int end) {
726   int count = end - start;
727   int max_length = count + source_length_ - position_;
728   int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
729   Handle<StringType> seq_string =
730       NewRawString<StringType>(factory(), length, pretenure_);
731   // Copy prefix into seq_str.
732   SinkChar* dest = seq_string->GetChars();
733   String::WriteToFlat(*prefix, dest, start, end);
734 
735   while (c0_ != '"') {
736     // Check for control character (0x00-0x1F) or unterminated string (<0).
737     if (c0_ < 0x20) return Handle<String>::null();
738     if (count >= length) {
739       // We need to create a longer sequential string for the result.
740       return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count);
741     }
742     if (c0_ != '\\') {
743       // If the sink can contain UC16 characters, or source_ contains only
744       // Latin1 characters, there's no need to test whether we can store the
745       // character. Otherwise check whether the UC16 source character can fit
746       // in the Latin1 sink.
747       if (sizeof(SinkChar) == kUC16Size || seq_one_byte ||
748           c0_ <= String::kMaxOneByteCharCode) {
749         SeqStringSet(seq_string, count++, c0_);
750         Advance();
751       } else {
752         // StringType is SeqOneByteString and we just read a non-Latin1 char.
753         return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count);
754       }
755     } else {
756       Advance();  // Advance past the \.
757       switch (c0_) {
758         case '"':
759         case '\\':
760         case '/':
761           SeqStringSet(seq_string, count++, c0_);
762           break;
763         case 'b':
764           SeqStringSet(seq_string, count++, '\x08');
765           break;
766         case 'f':
767           SeqStringSet(seq_string, count++, '\x0C');
768           break;
769         case 'n':
770           SeqStringSet(seq_string, count++, '\x0A');
771           break;
772         case 'r':
773           SeqStringSet(seq_string, count++, '\x0D');
774           break;
775         case 't':
776           SeqStringSet(seq_string, count++, '\x09');
777           break;
778         case 'u': {
779           uc32 value = 0;
780           for (int i = 0; i < 4; i++) {
781             Advance();
782             int digit = HexValue(c0_);
783             if (digit < 0) {
784               return Handle<String>::null();
785             }
786             value = value * 16 + digit;
787           }
788           if (sizeof(SinkChar) == kUC16Size ||
789               value <= String::kMaxOneByteCharCode) {
790             SeqStringSet(seq_string, count++, value);
791             break;
792           } else {
793             // StringType is SeqOneByteString and we just read a non-Latin1
794             // char.
795             position_ -= 6;  // Rewind position_ to \ in \uxxxx.
796             Advance();
797             return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0,
798                                                               count);
799           }
800         }
801         default:
802           return Handle<String>::null();
803       }
804       Advance();
805     }
806   }
807 
808   DCHECK_EQ('"', c0_);
809   // Advance past the last '"'.
810   AdvanceSkipWhitespace();
811 
812   // Shrink seq_string length to count and return.
813   return SeqString::Truncate(seq_string, count);
814 }
815 
816 template <bool seq_one_byte>
ScanJsonString()817 Handle<String> JsonParser<seq_one_byte>::ScanJsonString() {
818   DCHECK_EQ('"', c0_);
819   Advance();
820   if (c0_ == '"') {
821     AdvanceSkipWhitespace();
822     return factory()->empty_string();
823   }
824 
825   if (seq_one_byte) {
826     // Fast path for existing internalized strings.  If the the string being
827     // parsed is not a known internalized string, contains backslashes or
828     // unexpectedly reaches the end of string, return with an empty handle.
829 
830     // We intentionally use local variables instead of fields, compute hash
831     // while we are iterating a string and manually inline StringTable lookup
832     // here.
833 
834     int position = position_;
835     uc32 c0 = c0_;
836     uint32_t running_hash =
837         static_cast<uint32_t>(isolate()->heap()->HashSeed());
838     uint32_t index = 0;
839     bool is_array_index = true;
840 
841     do {
842       if (c0 == '\\') {
843         c0_ = c0;
844         int beg_pos = position_;
845         position_ = position;
846         return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, beg_pos,
847                                                              position_);
848       }
849       if (c0 < 0x20) {
850         c0_ = c0;
851         position_ = position;
852         return Handle<String>::null();
853       }
854       if (is_array_index) {
855         // With leading zero, the string has to be "0" to be a valid index.
856         if (!IsDecimalDigit(c0) || (position > position_ && index == 0)) {
857           is_array_index = false;
858         } else {
859           int d = c0 - '0';
860           is_array_index = index <= 429496729U - ((d + 3) >> 3);
861           index = (index * 10) + d;
862         }
863       }
864       running_hash = StringHasher::AddCharacterCore(running_hash,
865                                                     static_cast<uint16_t>(c0));
866       position++;
867       if (position >= source_length_) {
868         c0_ = kEndOfString;
869         position_ = position;
870         return Handle<String>::null();
871       }
872       c0 = seq_source_->SeqOneByteStringGet(position);
873     } while (c0 != '"');
874     int length = position - position_;
875     uint32_t hash;
876     if (is_array_index) {
877       hash =
878           StringHasher::MakeArrayIndexHash(index, length) >> String::kHashShift;
879     } else if (length <= String::kMaxHashCalcLength) {
880       hash = StringHasher::GetHashCore(running_hash);
881     } else {
882       hash = static_cast<uint32_t>(length);
883     }
884     Vector<const uint8_t> string_vector(seq_source_->GetChars() + position_,
885                                         length);
886     StringTable* string_table = isolate()->heap()->string_table();
887     uint32_t capacity = string_table->Capacity();
888     uint32_t entry = StringTable::FirstProbe(hash, capacity);
889     uint32_t count = 1;
890     Handle<String> result;
891     while (true) {
892       Object* element = string_table->KeyAt(entry);
893       if (element->IsUndefined(isolate())) {
894         // Lookup failure.
895         result =
896             factory()->InternalizeOneByteString(seq_source_, position_, length);
897         break;
898       }
899       if (!element->IsTheHole(isolate()) &&
900           String::cast(element)->IsOneByteEqualTo(string_vector)) {
901         result = Handle<String>(String::cast(element), isolate());
902         DCHECK_EQ(result->Hash(),
903                   (hash << String::kHashShift) >> String::kHashShift);
904         break;
905       }
906       entry = StringTable::NextProbe(entry, count++, capacity);
907     }
908     position_ = position;
909     // Advance past the last '"'.
910     AdvanceSkipWhitespace();
911     return result;
912   }
913 
914   int beg_pos = position_;
915   // Fast case for Latin1 only without escape characters.
916   do {
917     // Check for control character (0x00-0x1F) or unterminated string (<0).
918     if (c0_ < 0x20) return Handle<String>::null();
919     if (c0_ != '\\') {
920       if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) {
921         Advance();
922       } else {
923         return SlowScanJsonString<SeqTwoByteString, uc16>(source_, beg_pos,
924                                                           position_);
925       }
926     } else {
927       return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, beg_pos,
928                                                            position_);
929     }
930   } while (c0_ != '"');
931   int length = position_ - beg_pos;
932   Handle<String> result =
933       factory()->NewRawOneByteString(length, pretenure_).ToHandleChecked();
934   uint8_t* dest = SeqOneByteString::cast(*result)->GetChars();
935   String::WriteToFlat(*source_, dest, beg_pos, position_);
936 
937   DCHECK_EQ('"', c0_);
938   // Advance past the last '"'.
939   AdvanceSkipWhitespace();
940   return result;
941 }
942 
943 // Explicit instantiation.
944 template class JsonParser<true>;
945 template class JsonParser<false>;
946 
947 }  // namespace internal
948 }  // namespace v8
949