1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/json-parser.h"
6
7 #include "src/char-predicates-inl.h"
8 #include "src/conversions.h"
9 #include "src/debug/debug.h"
10 #include "src/field-type.h"
11 #include "src/messages.h"
12 #include "src/objects-inl.h"
13 #include "src/objects/hash-table-inl.h"
14 #include "src/property-descriptor.h"
15 #include "src/string-hasher.h"
16 #include "src/transitions.h"
17 #include "src/unicode-cache.h"
18
19 namespace v8 {
20 namespace internal {
21
22 namespace {
23
24 // A vector-like data structure that uses a larger vector for allocation, and
25 // provides limited utility access. The original vector must not be used for the
26 // duration, and it may even be reallocated. This allows vector storage to be
27 // reused for the properties of sibling objects.
28 template <typename Container>
29 class VectorSegment {
30 public:
31 using value_type = typename Container::value_type;
32
VectorSegment(Container * container)33 explicit VectorSegment(Container* container)
34 : container_(*container), begin_(container->size()) {}
~VectorSegment()35 ~VectorSegment() { container_.resize(begin_); }
36
GetVector() const37 Vector<const value_type> GetVector() const {
38 return Vector<const value_type>(container_.data() + begin_,
39 container_.size() - begin_);
40 }
41
42 template <typename T>
push_back(T && value)43 void push_back(T&& value) {
44 container_.push_back(std::forward<T>(value));
45 }
46
47 private:
48 Container& container_;
49 const typename Container::size_type begin_;
50 };
51
52 } // namespace
53
Internalize(Isolate * isolate,Handle<Object> object,Handle<Object> reviver)54 MaybeHandle<Object> JsonParseInternalizer::Internalize(Isolate* isolate,
55 Handle<Object> object,
56 Handle<Object> reviver) {
57 DCHECK(reviver->IsCallable());
58 JsonParseInternalizer internalizer(isolate,
59 Handle<JSReceiver>::cast(reviver));
60 Handle<JSObject> holder =
61 isolate->factory()->NewJSObject(isolate->object_function());
62 Handle<String> name = isolate->factory()->empty_string();
63 JSObject::AddProperty(isolate, holder, name, object, NONE);
64 return internalizer.InternalizeJsonProperty(holder, name);
65 }
66
InternalizeJsonProperty(Handle<JSReceiver> holder,Handle<String> name)67 MaybeHandle<Object> JsonParseInternalizer::InternalizeJsonProperty(
68 Handle<JSReceiver> holder, Handle<String> name) {
69 HandleScope outer_scope(isolate_);
70 Handle<Object> value;
71 ASSIGN_RETURN_ON_EXCEPTION(
72 isolate_, value, Object::GetPropertyOrElement(isolate_, holder, name),
73 Object);
74 if (value->IsJSReceiver()) {
75 Handle<JSReceiver> object = Handle<JSReceiver>::cast(value);
76 Maybe<bool> is_array = Object::IsArray(object);
77 if (is_array.IsNothing()) return MaybeHandle<Object>();
78 if (is_array.FromJust()) {
79 Handle<Object> length_object;
80 ASSIGN_RETURN_ON_EXCEPTION(
81 isolate_, length_object,
82 Object::GetLengthFromArrayLike(isolate_, object), Object);
83 double length = length_object->Number();
84 for (double i = 0; i < length; i++) {
85 HandleScope inner_scope(isolate_);
86 Handle<Object> index = isolate_->factory()->NewNumber(i);
87 Handle<String> name = isolate_->factory()->NumberToString(index);
88 if (!RecurseAndApply(object, name)) return MaybeHandle<Object>();
89 }
90 } else {
91 Handle<FixedArray> contents;
92 ASSIGN_RETURN_ON_EXCEPTION(
93 isolate_, contents,
94 KeyAccumulator::GetKeys(object, KeyCollectionMode::kOwnOnly,
95 ENUMERABLE_STRINGS,
96 GetKeysConversion::kConvertToString),
97 Object);
98 for (int i = 0; i < contents->length(); i++) {
99 HandleScope inner_scope(isolate_);
100 Handle<String> name(String::cast(contents->get(i)), isolate_);
101 if (!RecurseAndApply(object, name)) return MaybeHandle<Object>();
102 }
103 }
104 }
105 Handle<Object> argv[] = {name, value};
106 Handle<Object> result;
107 ASSIGN_RETURN_ON_EXCEPTION(
108 isolate_, result, Execution::Call(isolate_, reviver_, holder, 2, argv),
109 Object);
110 return outer_scope.CloseAndEscape(result);
111 }
112
RecurseAndApply(Handle<JSReceiver> holder,Handle<String> name)113 bool JsonParseInternalizer::RecurseAndApply(Handle<JSReceiver> holder,
114 Handle<String> name) {
115 STACK_CHECK(isolate_, false);
116
117 Handle<Object> result;
118 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
119 isolate_, result, InternalizeJsonProperty(holder, name), false);
120 Maybe<bool> change_result = Nothing<bool>();
121 if (result->IsUndefined(isolate_)) {
122 change_result = JSReceiver::DeletePropertyOrElement(holder, name,
123 LanguageMode::kSloppy);
124 } else {
125 PropertyDescriptor desc;
126 desc.set_value(result);
127 desc.set_configurable(true);
128 desc.set_enumerable(true);
129 desc.set_writable(true);
130 change_result = JSReceiver::DefineOwnProperty(isolate_, holder, name, &desc,
131 kDontThrow);
132 }
133 MAYBE_RETURN(change_result, false);
134 return true;
135 }
136
137 template <bool seq_one_byte>
JsonParser(Isolate * isolate,Handle<String> source)138 JsonParser<seq_one_byte>::JsonParser(Isolate* isolate, Handle<String> source)
139 : source_(source),
140 source_length_(source->length()),
141 isolate_(isolate),
142 zone_(isolate_->allocator(), ZONE_NAME),
143 object_constructor_(isolate_->native_context()->object_function(),
144 isolate_),
145 position_(-1),
146 properties_(&zone_) {
147 source_ = String::Flatten(isolate, source_);
148 pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED;
149
150 // Optimized fast case where we only have Latin1 characters.
151 if (seq_one_byte) {
152 seq_source_ = Handle<SeqOneByteString>::cast(source_);
153 }
154 }
155
156 template <bool seq_one_byte>
ParseJson()157 MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() {
158 // Advance to the first character (possibly EOS)
159 AdvanceSkipWhitespace();
160 Handle<Object> result = ParseJsonValue();
161 if (result.is_null() || c0_ != kEndOfString) {
162 // Some exception (for example stack overflow) is already pending.
163 if (isolate_->has_pending_exception()) return Handle<Object>::null();
164
165 // Parse failed. Current character is the unexpected token.
166 Factory* factory = this->factory();
167 MessageTemplate::Template message;
168 Handle<Object> arg1 = Handle<Smi>(Smi::FromInt(position_), isolate());
169 Handle<Object> arg2;
170
171 switch (c0_) {
172 case kEndOfString:
173 message = MessageTemplate::kJsonParseUnexpectedEOS;
174 break;
175 case '-':
176 case '0':
177 case '1':
178 case '2':
179 case '3':
180 case '4':
181 case '5':
182 case '6':
183 case '7':
184 case '8':
185 case '9':
186 message = MessageTemplate::kJsonParseUnexpectedTokenNumber;
187 break;
188 case '"':
189 message = MessageTemplate::kJsonParseUnexpectedTokenString;
190 break;
191 default:
192 message = MessageTemplate::kJsonParseUnexpectedToken;
193 arg2 = arg1;
194 arg1 = factory->LookupSingleCharacterStringFromCode(c0_);
195 break;
196 }
197
198 Handle<Script> script(factory->NewScript(source_));
199 if (isolate()->NeedsSourcePositionsForProfiling()) {
200 Script::InitLineEnds(script);
201 }
202 // We should sent compile error event because we compile JSON object in
203 // separated source file.
204 isolate()->debug()->OnCompileError(script);
205 MessageLocation location(script, position_, position_ + 1);
206 Handle<Object> error = factory->NewSyntaxError(message, arg1, arg2);
207 return isolate()->template Throw<Object>(error, &location);
208 }
209 return result;
210 }
211
212 MaybeHandle<Object> InternalizeJsonProperty(Handle<JSObject> holder,
213 Handle<String> key);
214
215 template <bool seq_one_byte>
Advance()216 void JsonParser<seq_one_byte>::Advance() {
217 position_++;
218 if (position_ >= source_length_) {
219 c0_ = kEndOfString;
220 } else if (seq_one_byte) {
221 c0_ = seq_source_->SeqOneByteStringGet(position_);
222 } else {
223 c0_ = source_->Get(position_);
224 }
225 }
226
227 template <bool seq_one_byte>
AdvanceSkipWhitespace()228 void JsonParser<seq_one_byte>::AdvanceSkipWhitespace() {
229 do {
230 Advance();
231 } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r');
232 }
233
234 template <bool seq_one_byte>
SkipWhitespace()235 void JsonParser<seq_one_byte>::SkipWhitespace() {
236 while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') {
237 Advance();
238 }
239 }
240
241 template <bool seq_one_byte>
AdvanceGetChar()242 uc32 JsonParser<seq_one_byte>::AdvanceGetChar() {
243 Advance();
244 return c0_;
245 }
246
247 template <bool seq_one_byte>
MatchSkipWhiteSpace(uc32 c)248 bool JsonParser<seq_one_byte>::MatchSkipWhiteSpace(uc32 c) {
249 if (c0_ == c) {
250 AdvanceSkipWhitespace();
251 return true;
252 }
253 return false;
254 }
255
256 template <bool seq_one_byte>
ParseJsonString(Handle<String> expected)257 bool JsonParser<seq_one_byte>::ParseJsonString(Handle<String> expected) {
258 int length = expected->length();
259 if (source_->length() - position_ - 1 > length) {
260 DisallowHeapAllocation no_gc;
261 String::FlatContent content = expected->GetFlatContent();
262 if (content.IsOneByte()) {
263 DCHECK_EQ('"', c0_);
264 const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1;
265 const uint8_t* expected_chars = content.ToOneByteVector().start();
266 for (int i = 0; i < length; i++) {
267 uint8_t c0 = input_chars[i];
268 if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') {
269 return false;
270 }
271 }
272 if (input_chars[length] == '"') {
273 position_ = position_ + length + 1;
274 AdvanceSkipWhitespace();
275 return true;
276 }
277 }
278 }
279 return false;
280 }
281
282 // Parse any JSON value.
283 template <bool seq_one_byte>
ParseJsonValue()284 Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() {
285 StackLimitCheck stack_check(isolate_);
286 if (stack_check.HasOverflowed()) {
287 isolate_->StackOverflow();
288 return Handle<Object>::null();
289 }
290
291 if (stack_check.InterruptRequested() &&
292 isolate_->stack_guard()->HandleInterrupts()->IsException(isolate_)) {
293 return Handle<Object>::null();
294 }
295
296 if (c0_ == '"') return ParseJsonString();
297 if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
298 if (c0_ == '{') return ParseJsonObject();
299 if (c0_ == '[') return ParseJsonArray();
300 if (c0_ == 'f') {
301 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
302 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
303 AdvanceSkipWhitespace();
304 return factory()->false_value();
305 }
306 return ReportUnexpectedCharacter();
307 }
308 if (c0_ == 't') {
309 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
310 AdvanceGetChar() == 'e') {
311 AdvanceSkipWhitespace();
312 return factory()->true_value();
313 }
314 return ReportUnexpectedCharacter();
315 }
316 if (c0_ == 'n') {
317 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
318 AdvanceGetChar() == 'l') {
319 AdvanceSkipWhitespace();
320 return factory()->null_value();
321 }
322 return ReportUnexpectedCharacter();
323 }
324 return ReportUnexpectedCharacter();
325 }
326
327 template <bool seq_one_byte>
ParseElement(Handle<JSObject> json_object)328 ParseElementResult JsonParser<seq_one_byte>::ParseElement(
329 Handle<JSObject> json_object) {
330 uint32_t index = 0;
331 // Maybe an array index, try to parse it.
332 if (c0_ == '0') {
333 // With a leading zero, the string has to be "0" only to be an index.
334 Advance();
335 } else {
336 do {
337 int d = c0_ - '0';
338 if (index > 429496729U - ((d + 3) >> 3)) break;
339 index = (index * 10) + d;
340 Advance();
341 } while (IsDecimalDigit(c0_));
342 }
343
344 if (c0_ == '"') {
345 // Successfully parsed index, parse and store element.
346 AdvanceSkipWhitespace();
347
348 if (c0_ == ':') {
349 AdvanceSkipWhitespace();
350 Handle<Object> value = ParseJsonValue();
351 if (!value.is_null()) {
352 JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE)
353 .Assert();
354 return kElementFound;
355 } else {
356 return kNullHandle;
357 }
358 }
359 }
360 return kElementNotFound;
361 }
362
363 // Parse a JSON object. Position must be right at '{'.
364 template <bool seq_one_byte>
ParseJsonObject()365 Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() {
366 HandleScope scope(isolate());
367 Handle<JSObject> json_object =
368 factory()->NewJSObject(object_constructor(), pretenure_);
369 Handle<Map> map(json_object->map(), isolate());
370 int descriptor = 0;
371 VectorSegment<ZoneVector<Handle<Object>>> properties(&properties_);
372 DCHECK_EQ(c0_, '{');
373
374 bool transitioning = true;
375
376 AdvanceSkipWhitespace();
377 if (c0_ != '}') {
378 do {
379 if (c0_ != '"') return ReportUnexpectedCharacter();
380
381 int start_position = position_;
382 Advance();
383
384 if (IsDecimalDigit(c0_)) {
385 ParseElementResult element_result = ParseElement(json_object);
386 if (element_result == kNullHandle) return Handle<Object>::null();
387 if (element_result == kElementFound) continue;
388 }
389 // Not an index, fallback to the slow path.
390
391 position_ = start_position;
392 #ifdef DEBUG
393 c0_ = '"';
394 #endif
395
396 Handle<String> key;
397 Handle<Object> value;
398
399 // Try to follow existing transitions as long as possible. Once we stop
400 // transitioning, no transition can be found anymore.
401 DCHECK(transitioning);
402 // First check whether there is a single expected transition. If so, try
403 // to parse it first.
404 bool follow_expected = false;
405 Handle<Map> target;
406 if (seq_one_byte) {
407 DisallowHeapAllocation no_gc;
408 TransitionsAccessor transitions(isolate(), *map, &no_gc);
409 key = transitions.ExpectedTransitionKey();
410 follow_expected = !key.is_null() && ParseJsonString(key);
411 // If the expected transition hits, follow it.
412 if (follow_expected) {
413 target = transitions.ExpectedTransitionTarget();
414 }
415 }
416 if (!follow_expected) {
417 // If the expected transition failed, parse an internalized string and
418 // try to find a matching transition.
419 key = ParseJsonString();
420 if (key.is_null()) return ReportUnexpectedCharacter();
421
422 // If a transition was found, follow it and continue.
423 transitioning = TransitionsAccessor(isolate(), map)
424 .FindTransitionToField(key)
425 .ToHandle(&target);
426 }
427 if (c0_ != ':') return ReportUnexpectedCharacter();
428
429 AdvanceSkipWhitespace();
430 value = ParseJsonValue();
431 if (value.is_null()) return ReportUnexpectedCharacter();
432
433 if (transitioning) {
434 PropertyDetails details =
435 target->instance_descriptors()->GetDetails(descriptor);
436 Representation expected_representation = details.representation();
437
438 if (value->FitsRepresentation(expected_representation)) {
439 if (expected_representation.IsHeapObject() &&
440 !target->instance_descriptors()
441 ->GetFieldType(descriptor)
442 ->NowContains(value)) {
443 Handle<FieldType> value_type(
444 value->OptimalType(isolate(), expected_representation));
445 Map::GeneralizeField(isolate(), target, descriptor,
446 details.constness(), expected_representation,
447 value_type);
448 }
449 DCHECK(target->instance_descriptors()
450 ->GetFieldType(descriptor)
451 ->NowContains(value));
452 properties.push_back(value);
453 map = target;
454 descriptor++;
455 continue;
456 } else {
457 transitioning = false;
458 }
459 }
460
461 DCHECK(!transitioning);
462
463 // Commit the intermediate state to the object and stop transitioning.
464 CommitStateToJsonObject(json_object, map, properties.GetVector());
465
466 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value)
467 .Check();
468 } while (transitioning && MatchSkipWhiteSpace(','));
469
470 // If we transitioned until the very end, transition the map now.
471 if (transitioning) {
472 CommitStateToJsonObject(json_object, map, properties.GetVector());
473 } else {
474 while (MatchSkipWhiteSpace(',')) {
475 HandleScope local_scope(isolate());
476 if (c0_ != '"') return ReportUnexpectedCharacter();
477
478 int start_position = position_;
479 Advance();
480
481 if (IsDecimalDigit(c0_)) {
482 ParseElementResult element_result = ParseElement(json_object);
483 if (element_result == kNullHandle) return Handle<Object>::null();
484 if (element_result == kElementFound) continue;
485 }
486 // Not an index, fallback to the slow path.
487
488 position_ = start_position;
489 #ifdef DEBUG
490 c0_ = '"';
491 #endif
492
493 Handle<String> key;
494 Handle<Object> value;
495
496 key = ParseJsonString();
497 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
498
499 AdvanceSkipWhitespace();
500 value = ParseJsonValue();
501 if (value.is_null()) return ReportUnexpectedCharacter();
502
503 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key,
504 value)
505 .Check();
506 }
507 }
508
509 if (c0_ != '}') {
510 return ReportUnexpectedCharacter();
511 }
512 }
513 AdvanceSkipWhitespace();
514 return scope.CloseAndEscape(json_object);
515 }
516
517 template <bool seq_one_byte>
CommitStateToJsonObject(Handle<JSObject> json_object,Handle<Map> map,Vector<const Handle<Object>> properties)518 void JsonParser<seq_one_byte>::CommitStateToJsonObject(
519 Handle<JSObject> json_object, Handle<Map> map,
520 Vector<const Handle<Object>> properties) {
521 JSObject::AllocateStorageForMap(json_object, map);
522 DCHECK(!json_object->map()->is_dictionary_map());
523
524 DisallowHeapAllocation no_gc;
525 DescriptorArray* descriptors = json_object->map()->instance_descriptors();
526 for (int i = 0; i < properties.length(); i++) {
527 Handle<Object> value = properties[i];
528 // Initializing store.
529 json_object->WriteToField(i, descriptors->GetDetails(i), *value);
530 }
531 }
532
533 class ElementKindLattice {
534 private:
535 enum {
536 SMI_ELEMENTS,
537 NUMBER_ELEMENTS,
538 OBJECT_ELEMENTS,
539 };
540
541 public:
ElementKindLattice()542 ElementKindLattice() : value_(SMI_ELEMENTS) {}
543
Update(Handle<Object> o)544 void Update(Handle<Object> o) {
545 if (o->IsSmi()) {
546 return;
547 } else if (o->IsHeapNumber()) {
548 if (value_ < NUMBER_ELEMENTS) value_ = NUMBER_ELEMENTS;
549 } else {
550 DCHECK(!o->IsNumber());
551 value_ = OBJECT_ELEMENTS;
552 }
553 }
554
GetElementsKind() const555 ElementsKind GetElementsKind() const {
556 switch (value_) {
557 case SMI_ELEMENTS:
558 return PACKED_SMI_ELEMENTS;
559 case NUMBER_ELEMENTS:
560 return PACKED_DOUBLE_ELEMENTS;
561 case OBJECT_ELEMENTS:
562 return PACKED_ELEMENTS;
563 default:
564 UNREACHABLE();
565 return PACKED_ELEMENTS;
566 }
567 }
568
569 private:
570 int value_;
571 };
572
573 // Parse a JSON array. Position must be right at '['.
574 template <bool seq_one_byte>
ParseJsonArray()575 Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() {
576 HandleScope scope(isolate());
577 ZoneVector<Handle<Object>> elements(zone());
578 DCHECK_EQ(c0_, '[');
579
580 ElementKindLattice lattice;
581
582 AdvanceSkipWhitespace();
583 if (c0_ != ']') {
584 do {
585 Handle<Object> element = ParseJsonValue();
586 if (element.is_null()) return ReportUnexpectedCharacter();
587 elements.push_back(element);
588 lattice.Update(element);
589 } while (MatchSkipWhiteSpace(','));
590 if (c0_ != ']') {
591 return ReportUnexpectedCharacter();
592 }
593 }
594 AdvanceSkipWhitespace();
595
596 // Allocate a fixed array with all the elements.
597
598 Handle<Object> json_array;
599 const ElementsKind kind = lattice.GetElementsKind();
600 int elements_size = static_cast<int>(elements.size());
601
602 switch (kind) {
603 case PACKED_ELEMENTS:
604 case PACKED_SMI_ELEMENTS: {
605 Handle<FixedArray> elems =
606 factory()->NewFixedArray(elements_size, pretenure_);
607 for (int i = 0; i < elements_size; i++) elems->set(i, *elements[i]);
608 json_array = factory()->NewJSArrayWithElements(elems, kind, pretenure_);
609 break;
610 }
611 case PACKED_DOUBLE_ELEMENTS: {
612 Handle<FixedDoubleArray> elems = Handle<FixedDoubleArray>::cast(
613 factory()->NewFixedDoubleArray(elements_size, pretenure_));
614 for (int i = 0; i < elements_size; i++) {
615 elems->set(i, elements[i]->Number());
616 }
617 json_array = factory()->NewJSArrayWithElements(elems, kind, pretenure_);
618 break;
619 }
620 default:
621 UNREACHABLE();
622 }
623
624 return scope.CloseAndEscape(json_array);
625 }
626
627 template <bool seq_one_byte>
ParseJsonNumber()628 Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() {
629 bool negative = false;
630 int beg_pos = position_;
631 if (c0_ == '-') {
632 Advance();
633 negative = true;
634 }
635 if (c0_ == '0') {
636 Advance();
637 // Prefix zero is only allowed if it's the only digit before
638 // a decimal point or exponent.
639 if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
640 } else {
641 int i = 0;
642 int digits = 0;
643 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
644 do {
645 i = i * 10 + c0_ - '0';
646 digits++;
647 Advance();
648 } while (IsDecimalDigit(c0_));
649 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
650 SkipWhitespace();
651 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
652 }
653 }
654 if (c0_ == '.') {
655 Advance();
656 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
657 do {
658 Advance();
659 } while (IsDecimalDigit(c0_));
660 }
661 if (AsciiAlphaToLower(c0_) == 'e') {
662 Advance();
663 if (c0_ == '-' || c0_ == '+') Advance();
664 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
665 do {
666 Advance();
667 } while (IsDecimalDigit(c0_));
668 }
669 int length = position_ - beg_pos;
670 double number;
671 if (seq_one_byte) {
672 Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length);
673 number = StringToDouble(isolate()->unicode_cache(), chars,
674 NO_FLAGS, // Hex, octal or trailing junk.
675 std::numeric_limits<double>::quiet_NaN());
676 } else {
677 Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
678 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
679 Vector<const uint8_t> result =
680 Vector<const uint8_t>(buffer.start(), length);
681 number = StringToDouble(isolate()->unicode_cache(), result,
682 NO_FLAGS, // Hex, octal or trailing junk.
683 0.0);
684 buffer.Dispose();
685 }
686 SkipWhitespace();
687 return factory()->NewNumber(number, pretenure_);
688 }
689
690 template <typename StringType>
691 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
692
693 template <>
SeqStringSet(Handle<SeqTwoByteString> seq_str,int i,uc32 c)694 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
695 seq_str->SeqTwoByteStringSet(i, c);
696 }
697
698 template <>
SeqStringSet(Handle<SeqOneByteString> seq_str,int i,uc32 c)699 inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) {
700 seq_str->SeqOneByteStringSet(i, c);
701 }
702
703 template <typename StringType>
704 inline Handle<StringType> NewRawString(Factory* factory, int length,
705 PretenureFlag pretenure);
706
707 template <>
NewRawString(Factory * factory,int length,PretenureFlag pretenure)708 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length,
709 PretenureFlag pretenure) {
710 return factory->NewRawTwoByteString(length, pretenure).ToHandleChecked();
711 }
712
713 template <>
NewRawString(Factory * factory,int length,PretenureFlag pretenure)714 inline Handle<SeqOneByteString> NewRawString(Factory* factory, int length,
715 PretenureFlag pretenure) {
716 return factory->NewRawOneByteString(length, pretenure).ToHandleChecked();
717 }
718
719 // Scans the rest of a JSON string starting from position_ and writes
720 // prefix[start..end] along with the scanned characters into a
721 // sequential string of type StringType.
722 template <bool seq_one_byte>
723 template <typename StringType, typename SinkChar>
SlowScanJsonString(Handle<String> prefix,int start,int end)724 Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString(
725 Handle<String> prefix, int start, int end) {
726 int count = end - start;
727 int max_length = count + source_length_ - position_;
728 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
729 Handle<StringType> seq_string =
730 NewRawString<StringType>(factory(), length, pretenure_);
731 // Copy prefix into seq_str.
732 SinkChar* dest = seq_string->GetChars();
733 String::WriteToFlat(*prefix, dest, start, end);
734
735 while (c0_ != '"') {
736 // Check for control character (0x00-0x1F) or unterminated string (<0).
737 if (c0_ < 0x20) return Handle<String>::null();
738 if (count >= length) {
739 // We need to create a longer sequential string for the result.
740 return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count);
741 }
742 if (c0_ != '\\') {
743 // If the sink can contain UC16 characters, or source_ contains only
744 // Latin1 characters, there's no need to test whether we can store the
745 // character. Otherwise check whether the UC16 source character can fit
746 // in the Latin1 sink.
747 if (sizeof(SinkChar) == kUC16Size || seq_one_byte ||
748 c0_ <= String::kMaxOneByteCharCode) {
749 SeqStringSet(seq_string, count++, c0_);
750 Advance();
751 } else {
752 // StringType is SeqOneByteString and we just read a non-Latin1 char.
753 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count);
754 }
755 } else {
756 Advance(); // Advance past the \.
757 switch (c0_) {
758 case '"':
759 case '\\':
760 case '/':
761 SeqStringSet(seq_string, count++, c0_);
762 break;
763 case 'b':
764 SeqStringSet(seq_string, count++, '\x08');
765 break;
766 case 'f':
767 SeqStringSet(seq_string, count++, '\x0C');
768 break;
769 case 'n':
770 SeqStringSet(seq_string, count++, '\x0A');
771 break;
772 case 'r':
773 SeqStringSet(seq_string, count++, '\x0D');
774 break;
775 case 't':
776 SeqStringSet(seq_string, count++, '\x09');
777 break;
778 case 'u': {
779 uc32 value = 0;
780 for (int i = 0; i < 4; i++) {
781 Advance();
782 int digit = HexValue(c0_);
783 if (digit < 0) {
784 return Handle<String>::null();
785 }
786 value = value * 16 + digit;
787 }
788 if (sizeof(SinkChar) == kUC16Size ||
789 value <= String::kMaxOneByteCharCode) {
790 SeqStringSet(seq_string, count++, value);
791 break;
792 } else {
793 // StringType is SeqOneByteString and we just read a non-Latin1
794 // char.
795 position_ -= 6; // Rewind position_ to \ in \uxxxx.
796 Advance();
797 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0,
798 count);
799 }
800 }
801 default:
802 return Handle<String>::null();
803 }
804 Advance();
805 }
806 }
807
808 DCHECK_EQ('"', c0_);
809 // Advance past the last '"'.
810 AdvanceSkipWhitespace();
811
812 // Shrink seq_string length to count and return.
813 return SeqString::Truncate(seq_string, count);
814 }
815
816 template <bool seq_one_byte>
ScanJsonString()817 Handle<String> JsonParser<seq_one_byte>::ScanJsonString() {
818 DCHECK_EQ('"', c0_);
819 Advance();
820 if (c0_ == '"') {
821 AdvanceSkipWhitespace();
822 return factory()->empty_string();
823 }
824
825 if (seq_one_byte) {
826 // Fast path for existing internalized strings. If the the string being
827 // parsed is not a known internalized string, contains backslashes or
828 // unexpectedly reaches the end of string, return with an empty handle.
829
830 // We intentionally use local variables instead of fields, compute hash
831 // while we are iterating a string and manually inline StringTable lookup
832 // here.
833
834 int position = position_;
835 uc32 c0 = c0_;
836 uint32_t running_hash =
837 static_cast<uint32_t>(isolate()->heap()->HashSeed());
838 uint32_t index = 0;
839 bool is_array_index = true;
840
841 do {
842 if (c0 == '\\') {
843 c0_ = c0;
844 int beg_pos = position_;
845 position_ = position;
846 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, beg_pos,
847 position_);
848 }
849 if (c0 < 0x20) {
850 c0_ = c0;
851 position_ = position;
852 return Handle<String>::null();
853 }
854 if (is_array_index) {
855 // With leading zero, the string has to be "0" to be a valid index.
856 if (!IsDecimalDigit(c0) || (position > position_ && index == 0)) {
857 is_array_index = false;
858 } else {
859 int d = c0 - '0';
860 is_array_index = index <= 429496729U - ((d + 3) >> 3);
861 index = (index * 10) + d;
862 }
863 }
864 running_hash = StringHasher::AddCharacterCore(running_hash,
865 static_cast<uint16_t>(c0));
866 position++;
867 if (position >= source_length_) {
868 c0_ = kEndOfString;
869 position_ = position;
870 return Handle<String>::null();
871 }
872 c0 = seq_source_->SeqOneByteStringGet(position);
873 } while (c0 != '"');
874 int length = position - position_;
875 uint32_t hash;
876 if (is_array_index) {
877 hash =
878 StringHasher::MakeArrayIndexHash(index, length) >> String::kHashShift;
879 } else if (length <= String::kMaxHashCalcLength) {
880 hash = StringHasher::GetHashCore(running_hash);
881 } else {
882 hash = static_cast<uint32_t>(length);
883 }
884 Vector<const uint8_t> string_vector(seq_source_->GetChars() + position_,
885 length);
886 StringTable* string_table = isolate()->heap()->string_table();
887 uint32_t capacity = string_table->Capacity();
888 uint32_t entry = StringTable::FirstProbe(hash, capacity);
889 uint32_t count = 1;
890 Handle<String> result;
891 while (true) {
892 Object* element = string_table->KeyAt(entry);
893 if (element->IsUndefined(isolate())) {
894 // Lookup failure.
895 result =
896 factory()->InternalizeOneByteString(seq_source_, position_, length);
897 break;
898 }
899 if (!element->IsTheHole(isolate()) &&
900 String::cast(element)->IsOneByteEqualTo(string_vector)) {
901 result = Handle<String>(String::cast(element), isolate());
902 DCHECK_EQ(result->Hash(),
903 (hash << String::kHashShift) >> String::kHashShift);
904 break;
905 }
906 entry = StringTable::NextProbe(entry, count++, capacity);
907 }
908 position_ = position;
909 // Advance past the last '"'.
910 AdvanceSkipWhitespace();
911 return result;
912 }
913
914 int beg_pos = position_;
915 // Fast case for Latin1 only without escape characters.
916 do {
917 // Check for control character (0x00-0x1F) or unterminated string (<0).
918 if (c0_ < 0x20) return Handle<String>::null();
919 if (c0_ != '\\') {
920 if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) {
921 Advance();
922 } else {
923 return SlowScanJsonString<SeqTwoByteString, uc16>(source_, beg_pos,
924 position_);
925 }
926 } else {
927 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, beg_pos,
928 position_);
929 }
930 } while (c0_ != '"');
931 int length = position_ - beg_pos;
932 Handle<String> result =
933 factory()->NewRawOneByteString(length, pretenure_).ToHandleChecked();
934 uint8_t* dest = SeqOneByteString::cast(*result)->GetChars();
935 String::WriteToFlat(*source_, dest, beg_pos, position_);
936
937 DCHECK_EQ('"', c0_);
938 // Advance past the last '"'.
939 AdvanceSkipWhitespace();
940 return result;
941 }
942
943 // Explicit instantiation.
944 template class JsonParser<true>;
945 template class JsonParser<false>;
946
947 } // namespace internal
948 } // namespace v8
949