1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_PARSING_JSON_PARSER_H_
6 #define V8_PARSING_JSON_PARSER_H_
7
8 #include "src/char-predicates.h"
9 #include "src/conversions.h"
10 #include "src/debug/debug.h"
11 #include "src/factory.h"
12 #include "src/messages.h"
13 #include "src/parsing/scanner.h"
14 #include "src/parsing/token.h"
15 #include "src/transitions.h"
16 #include "src/types.h"
17
18 namespace v8 {
19 namespace internal {
20
21 enum ParseElementResult { kElementFound, kElementNotFound, kNullHandle };
22
23
24 // A simple json parser.
25 template <bool seq_one_byte>
26 class JsonParser BASE_EMBEDDED {
27 public:
Parse(Handle<String> source)28 MUST_USE_RESULT static MaybeHandle<Object> Parse(Handle<String> source) {
29 return JsonParser(source).ParseJson();
30 }
31
32 static const int kEndOfString = -1;
33
34 private:
JsonParser(Handle<String> source)35 explicit JsonParser(Handle<String> source)
36 : source_(source),
37 source_length_(source->length()),
38 isolate_(source->map()->GetHeap()->isolate()),
39 factory_(isolate_->factory()),
40 object_constructor_(isolate_->native_context()->object_function(),
41 isolate_),
42 position_(-1) {
43 source_ = String::Flatten(source_);
44 pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED;
45
46 // Optimized fast case where we only have Latin1 characters.
47 if (seq_one_byte) {
48 seq_source_ = Handle<SeqOneByteString>::cast(source_);
49 }
50 }
51
52 // Parse a string containing a single JSON value.
53 MaybeHandle<Object> ParseJson();
54
Advance()55 inline void Advance() {
56 position_++;
57 if (position_ >= source_length_) {
58 c0_ = kEndOfString;
59 } else if (seq_one_byte) {
60 c0_ = seq_source_->SeqOneByteStringGet(position_);
61 } else {
62 c0_ = source_->Get(position_);
63 }
64 }
65
66 // The JSON lexical grammar is specified in the ECMAScript 5 standard,
67 // section 15.12.1.1. The only allowed whitespace characters between tokens
68 // are tab, carriage-return, newline and space.
69
AdvanceSkipWhitespace()70 inline void AdvanceSkipWhitespace() {
71 do {
72 Advance();
73 } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r');
74 }
75
SkipWhitespace()76 inline void SkipWhitespace() {
77 while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') {
78 Advance();
79 }
80 }
81
AdvanceGetChar()82 inline uc32 AdvanceGetChar() {
83 Advance();
84 return c0_;
85 }
86
87 // Checks that current charater is c.
88 // If so, then consume c and skip whitespace.
MatchSkipWhiteSpace(uc32 c)89 inline bool MatchSkipWhiteSpace(uc32 c) {
90 if (c0_ == c) {
91 AdvanceSkipWhitespace();
92 return true;
93 }
94 return false;
95 }
96
97 // A JSON string (production JSONString) is subset of valid JavaScript string
98 // literals. The string must only be double-quoted (not single-quoted), and
99 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
100 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
ParseJsonString()101 Handle<String> ParseJsonString() {
102 return ScanJsonString<false>();
103 }
104
ParseJsonString(Handle<String> expected)105 bool ParseJsonString(Handle<String> expected) {
106 int length = expected->length();
107 if (source_->length() - position_ - 1 > length) {
108 DisallowHeapAllocation no_gc;
109 String::FlatContent content = expected->GetFlatContent();
110 if (content.IsOneByte()) {
111 DCHECK_EQ('"', c0_);
112 const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1;
113 const uint8_t* expected_chars = content.ToOneByteVector().start();
114 for (int i = 0; i < length; i++) {
115 uint8_t c0 = input_chars[i];
116 if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') {
117 return false;
118 }
119 }
120 if (input_chars[length] == '"') {
121 position_ = position_ + length + 1;
122 AdvanceSkipWhitespace();
123 return true;
124 }
125 }
126 }
127 return false;
128 }
129
ParseJsonInternalizedString()130 Handle<String> ParseJsonInternalizedString() {
131 return ScanJsonString<true>();
132 }
133
134 template <bool is_internalized>
135 Handle<String> ScanJsonString();
136 // Creates a new string and copies prefix[start..end] into the beginning
137 // of it. Then scans the rest of the string, adding characters after the
138 // prefix. Called by ScanJsonString when reaching a '\' or non-Latin1 char.
139 template <typename StringType, typename SinkChar>
140 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
141
142 // A JSON number (production JSONNumber) is a subset of the valid JavaScript
143 // decimal number literals.
144 // It includes an optional minus sign, must have at least one
145 // digit before and after a decimal point, may not have prefixed zeros (unless
146 // the integer part is zero), and may include an exponent part (e.g., "e-10").
147 // Hexadecimal and octal numbers are not allowed.
148 Handle<Object> ParseJsonNumber();
149
150 // Parse a single JSON value from input (grammar production JSONValue).
151 // A JSON value is either a (double-quoted) string literal, a number literal,
152 // one of "true", "false", or "null", or an object or array literal.
153 Handle<Object> ParseJsonValue();
154
155 // Parse a JSON object literal (grammar production JSONObject).
156 // An object literal is a squiggly-braced and comma separated sequence
157 // (possibly empty) of key/value pairs, where the key is a JSON string
158 // literal, the value is a JSON value, and the two are separated by a colon.
159 // A JSON array doesn't allow numbers and identifiers as keys, like a
160 // JavaScript array.
161 Handle<Object> ParseJsonObject();
162
163 // Helper for ParseJsonObject. Parses the form "123": obj, which is recorded
164 // as an element, not a property.
165 ParseElementResult ParseElement(Handle<JSObject> json_object);
166
167 // Parses a JSON array literal (grammar production JSONArray). An array
168 // literal is a square-bracketed and comma separated sequence (possibly empty)
169 // of JSON values.
170 // A JSON array doesn't allow leaving out values from the sequence, nor does
171 // it allow a terminal comma, like a JavaScript array does.
172 Handle<Object> ParseJsonArray();
173
174
175 // Mark that a parsing error has happened at the current token, and
176 // return a null handle. Primarily for readability.
ReportUnexpectedCharacter()177 inline Handle<Object> ReportUnexpectedCharacter() {
178 return Handle<Object>::null();
179 }
180
isolate()181 inline Isolate* isolate() { return isolate_; }
factory()182 inline Factory* factory() { return factory_; }
object_constructor()183 inline Handle<JSFunction> object_constructor() { return object_constructor_; }
184
185 static const int kInitialSpecialStringLength = 32;
186 static const int kPretenureTreshold = 100 * 1024;
187
188
189 private:
zone()190 Zone* zone() { return &zone_; }
191
192 void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map,
193 ZoneList<Handle<Object> >* properties);
194
195 Handle<String> source_;
196 int source_length_;
197 Handle<SeqOneByteString> seq_source_;
198
199 PretenureFlag pretenure_;
200 Isolate* isolate_;
201 Factory* factory_;
202 Zone zone_;
203 Handle<JSFunction> object_constructor_;
204 uc32 c0_;
205 int position_;
206 };
207
208 template <bool seq_one_byte>
ParseJson()209 MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() {
210 // Advance to the first character (possibly EOS)
211 AdvanceSkipWhitespace();
212 Handle<Object> result = ParseJsonValue();
213 if (result.is_null() || c0_ != kEndOfString) {
214 // Some exception (for example stack overflow) is already pending.
215 if (isolate_->has_pending_exception()) return Handle<Object>::null();
216
217 // Parse failed. Current character is the unexpected token.
218 Factory* factory = this->factory();
219 MessageTemplate::Template message;
220 Handle<String> argument;
221
222 switch (c0_) {
223 case kEndOfString:
224 message = MessageTemplate::kUnexpectedEOS;
225 break;
226 case '-':
227 case '0':
228 case '1':
229 case '2':
230 case '3':
231 case '4':
232 case '5':
233 case '6':
234 case '7':
235 case '8':
236 case '9':
237 message = MessageTemplate::kUnexpectedTokenNumber;
238 break;
239 case '"':
240 message = MessageTemplate::kUnexpectedTokenString;
241 break;
242 default:
243 message = MessageTemplate::kUnexpectedToken;
244 argument = factory->LookupSingleCharacterStringFromCode(c0_);
245 break;
246 }
247
248 Handle<Script> script(factory->NewScript(source_));
249 // We should sent compile error event because we compile JSON object in
250 // separated source file.
251 isolate()->debug()->OnCompileError(script);
252 MessageLocation location(script, position_, position_ + 1);
253 Handle<Object> error = factory->NewSyntaxError(message, argument);
254 return isolate()->template Throw<Object>(error, &location);
255 }
256 return result;
257 }
258
259
260 // Parse any JSON value.
261 template <bool seq_one_byte>
ParseJsonValue()262 Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() {
263 StackLimitCheck stack_check(isolate_);
264 if (stack_check.HasOverflowed()) {
265 isolate_->StackOverflow();
266 return Handle<Object>::null();
267 }
268
269 if (stack_check.InterruptRequested()) {
270 ExecutionAccess access(isolate_);
271 // Avoid blocking GC in long running parser (v8:3974).
272 isolate_->stack_guard()->HandleGCInterrupt();
273 }
274
275 if (c0_ == '"') return ParseJsonString();
276 if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
277 if (c0_ == '{') return ParseJsonObject();
278 if (c0_ == '[') return ParseJsonArray();
279 if (c0_ == 'f') {
280 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
281 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
282 AdvanceSkipWhitespace();
283 return factory()->false_value();
284 }
285 return ReportUnexpectedCharacter();
286 }
287 if (c0_ == 't') {
288 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
289 AdvanceGetChar() == 'e') {
290 AdvanceSkipWhitespace();
291 return factory()->true_value();
292 }
293 return ReportUnexpectedCharacter();
294 }
295 if (c0_ == 'n') {
296 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
297 AdvanceGetChar() == 'l') {
298 AdvanceSkipWhitespace();
299 return factory()->null_value();
300 }
301 return ReportUnexpectedCharacter();
302 }
303 return ReportUnexpectedCharacter();
304 }
305
306
307 template <bool seq_one_byte>
ParseElement(Handle<JSObject> json_object)308 ParseElementResult JsonParser<seq_one_byte>::ParseElement(
309 Handle<JSObject> json_object) {
310 uint32_t index = 0;
311 // Maybe an array index, try to parse it.
312 if (c0_ == '0') {
313 // With a leading zero, the string has to be "0" only to be an index.
314 Advance();
315 } else {
316 do {
317 int d = c0_ - '0';
318 if (index > 429496729U - ((d + 3) >> 3)) break;
319 index = (index * 10) + d;
320 Advance();
321 } while (IsDecimalDigit(c0_));
322 }
323
324 if (c0_ == '"') {
325 // Successfully parsed index, parse and store element.
326 AdvanceSkipWhitespace();
327
328 if (c0_ == ':') {
329 AdvanceSkipWhitespace();
330 Handle<Object> value = ParseJsonValue();
331 if (!value.is_null()) {
332 JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE)
333 .Assert();
334 return kElementFound;
335 } else {
336 return kNullHandle;
337 }
338 }
339 }
340 return kElementNotFound;
341 }
342
343 // Parse a JSON object. Position must be right at '{'.
344 template <bool seq_one_byte>
ParseJsonObject()345 Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() {
346 HandleScope scope(isolate());
347 Handle<JSObject> json_object =
348 factory()->NewJSObject(object_constructor(), pretenure_);
349 Handle<Map> map(json_object->map());
350 int descriptor = 0;
351 ZoneList<Handle<Object> > properties(8, zone());
352 DCHECK_EQ(c0_, '{');
353
354 bool transitioning = true;
355
356 AdvanceSkipWhitespace();
357 if (c0_ != '}') {
358 do {
359 if (c0_ != '"') return ReportUnexpectedCharacter();
360
361 int start_position = position_;
362 Advance();
363
364 if (IsDecimalDigit(c0_)) {
365 ParseElementResult element_result = ParseElement(json_object);
366 if (element_result == kNullHandle) return Handle<Object>::null();
367 if (element_result == kElementFound) continue;
368 }
369 // Not an index, fallback to the slow path.
370
371 position_ = start_position;
372 #ifdef DEBUG
373 c0_ = '"';
374 #endif
375
376 Handle<String> key;
377 Handle<Object> value;
378
379 // Try to follow existing transitions as long as possible. Once we stop
380 // transitioning, no transition can be found anymore.
381 DCHECK(transitioning);
382 // First check whether there is a single expected transition. If so, try
383 // to parse it first.
384 bool follow_expected = false;
385 Handle<Map> target;
386 if (seq_one_byte) {
387 key = TransitionArray::ExpectedTransitionKey(map);
388 follow_expected = !key.is_null() && ParseJsonString(key);
389 }
390 // If the expected transition hits, follow it.
391 if (follow_expected) {
392 target = TransitionArray::ExpectedTransitionTarget(map);
393 } else {
394 // If the expected transition failed, parse an internalized string and
395 // try to find a matching transition.
396 key = ParseJsonInternalizedString();
397 if (key.is_null()) return ReportUnexpectedCharacter();
398
399 target = TransitionArray::FindTransitionToField(map, key);
400 // If a transition was found, follow it and continue.
401 transitioning = !target.is_null();
402 }
403 if (c0_ != ':') return ReportUnexpectedCharacter();
404
405 AdvanceSkipWhitespace();
406 value = ParseJsonValue();
407 if (value.is_null()) return ReportUnexpectedCharacter();
408
409 if (transitioning) {
410 PropertyDetails details =
411 target->instance_descriptors()->GetDetails(descriptor);
412 Representation expected_representation = details.representation();
413
414 if (value->FitsRepresentation(expected_representation)) {
415 if (expected_representation.IsHeapObject() &&
416 !target->instance_descriptors()
417 ->GetFieldType(descriptor)
418 ->NowContains(value)) {
419 Handle<HeapType> value_type(
420 value->OptimalType(isolate(), expected_representation));
421 Map::GeneralizeFieldType(target, descriptor,
422 expected_representation, value_type);
423 }
424 DCHECK(target->instance_descriptors()
425 ->GetFieldType(descriptor)
426 ->NowContains(value));
427 properties.Add(value, zone());
428 map = target;
429 descriptor++;
430 continue;
431 } else {
432 transitioning = false;
433 }
434 }
435
436 DCHECK(!transitioning);
437
438 // Commit the intermediate state to the object and stop transitioning.
439 CommitStateToJsonObject(json_object, map, &properties);
440
441 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value)
442 .Check();
443 } while (transitioning && MatchSkipWhiteSpace(','));
444
445 // If we transitioned until the very end, transition the map now.
446 if (transitioning) {
447 CommitStateToJsonObject(json_object, map, &properties);
448 } else {
449 while (MatchSkipWhiteSpace(',')) {
450 HandleScope local_scope(isolate());
451 if (c0_ != '"') return ReportUnexpectedCharacter();
452
453 int start_position = position_;
454 Advance();
455
456 if (IsDecimalDigit(c0_)) {
457 ParseElementResult element_result = ParseElement(json_object);
458 if (element_result == kNullHandle) return Handle<Object>::null();
459 if (element_result == kElementFound) continue;
460 }
461 // Not an index, fallback to the slow path.
462
463 position_ = start_position;
464 #ifdef DEBUG
465 c0_ = '"';
466 #endif
467
468 Handle<String> key;
469 Handle<Object> value;
470
471 key = ParseJsonInternalizedString();
472 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
473
474 AdvanceSkipWhitespace();
475 value = ParseJsonValue();
476 if (value.is_null()) return ReportUnexpectedCharacter();
477
478 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key,
479 value).Check();
480 }
481 }
482
483 if (c0_ != '}') {
484 return ReportUnexpectedCharacter();
485 }
486 }
487 AdvanceSkipWhitespace();
488 return scope.CloseAndEscape(json_object);
489 }
490
491
492 template <bool seq_one_byte>
CommitStateToJsonObject(Handle<JSObject> json_object,Handle<Map> map,ZoneList<Handle<Object>> * properties)493 void JsonParser<seq_one_byte>::CommitStateToJsonObject(
494 Handle<JSObject> json_object, Handle<Map> map,
495 ZoneList<Handle<Object> >* properties) {
496 JSObject::AllocateStorageForMap(json_object, map);
497 DCHECK(!json_object->map()->is_dictionary_map());
498
499 DisallowHeapAllocation no_gc;
500
501 int length = properties->length();
502 for (int i = 0; i < length; i++) {
503 Handle<Object> value = (*properties)[i];
504 json_object->WriteToField(i, *value);
505 }
506 }
507
508
509 // Parse a JSON array. Position must be right at '['.
510 template <bool seq_one_byte>
ParseJsonArray()511 Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() {
512 HandleScope scope(isolate());
513 ZoneList<Handle<Object> > elements(4, zone());
514 DCHECK_EQ(c0_, '[');
515
516 AdvanceSkipWhitespace();
517 if (c0_ != ']') {
518 do {
519 Handle<Object> element = ParseJsonValue();
520 if (element.is_null()) return ReportUnexpectedCharacter();
521 elements.Add(element, zone());
522 } while (MatchSkipWhiteSpace(','));
523 if (c0_ != ']') {
524 return ReportUnexpectedCharacter();
525 }
526 }
527 AdvanceSkipWhitespace();
528 // Allocate a fixed array with all the elements.
529 Handle<FixedArray> fast_elements =
530 factory()->NewFixedArray(elements.length(), pretenure_);
531 for (int i = 0, n = elements.length(); i < n; i++) {
532 fast_elements->set(i, *elements[i]);
533 }
534 Handle<Object> json_array = factory()->NewJSArrayWithElements(
535 fast_elements, FAST_ELEMENTS, Strength::WEAK, pretenure_);
536 return scope.CloseAndEscape(json_array);
537 }
538
539
540 template <bool seq_one_byte>
ParseJsonNumber()541 Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() {
542 bool negative = false;
543 int beg_pos = position_;
544 if (c0_ == '-') {
545 Advance();
546 negative = true;
547 }
548 if (c0_ == '0') {
549 Advance();
550 // Prefix zero is only allowed if it's the only digit before
551 // a decimal point or exponent.
552 if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
553 } else {
554 int i = 0;
555 int digits = 0;
556 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
557 do {
558 i = i * 10 + c0_ - '0';
559 digits++;
560 Advance();
561 } while (IsDecimalDigit(c0_));
562 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
563 SkipWhitespace();
564 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
565 }
566 }
567 if (c0_ == '.') {
568 Advance();
569 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
570 do {
571 Advance();
572 } while (IsDecimalDigit(c0_));
573 }
574 if (AsciiAlphaToLower(c0_) == 'e') {
575 Advance();
576 if (c0_ == '-' || c0_ == '+') Advance();
577 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
578 do {
579 Advance();
580 } while (IsDecimalDigit(c0_));
581 }
582 int length = position_ - beg_pos;
583 double number;
584 if (seq_one_byte) {
585 Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length);
586 number = StringToDouble(isolate()->unicode_cache(), chars,
587 NO_FLAGS, // Hex, octal or trailing junk.
588 std::numeric_limits<double>::quiet_NaN());
589 } else {
590 Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
591 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
592 Vector<const uint8_t> result =
593 Vector<const uint8_t>(buffer.start(), length);
594 number = StringToDouble(isolate()->unicode_cache(),
595 result,
596 NO_FLAGS, // Hex, octal or trailing junk.
597 0.0);
598 buffer.Dispose();
599 }
600 SkipWhitespace();
601 return factory()->NewNumber(number, pretenure_);
602 }
603
604
605 template <typename StringType>
606 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
607
608 template <>
SeqStringSet(Handle<SeqTwoByteString> seq_str,int i,uc32 c)609 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
610 seq_str->SeqTwoByteStringSet(i, c);
611 }
612
613 template <>
SeqStringSet(Handle<SeqOneByteString> seq_str,int i,uc32 c)614 inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) {
615 seq_str->SeqOneByteStringSet(i, c);
616 }
617
618 template <typename StringType>
619 inline Handle<StringType> NewRawString(Factory* factory,
620 int length,
621 PretenureFlag pretenure);
622
623 template <>
NewRawString(Factory * factory,int length,PretenureFlag pretenure)624 inline Handle<SeqTwoByteString> NewRawString(Factory* factory,
625 int length,
626 PretenureFlag pretenure) {
627 return factory->NewRawTwoByteString(length, pretenure).ToHandleChecked();
628 }
629
630 template <>
NewRawString(Factory * factory,int length,PretenureFlag pretenure)631 inline Handle<SeqOneByteString> NewRawString(Factory* factory,
632 int length,
633 PretenureFlag pretenure) {
634 return factory->NewRawOneByteString(length, pretenure).ToHandleChecked();
635 }
636
637
638 // Scans the rest of a JSON string starting from position_ and writes
639 // prefix[start..end] along with the scanned characters into a
640 // sequential string of type StringType.
641 template <bool seq_one_byte>
642 template <typename StringType, typename SinkChar>
SlowScanJsonString(Handle<String> prefix,int start,int end)643 Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString(
644 Handle<String> prefix, int start, int end) {
645 int count = end - start;
646 int max_length = count + source_length_ - position_;
647 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
648 Handle<StringType> seq_string =
649 NewRawString<StringType>(factory(), length, pretenure_);
650 // Copy prefix into seq_str.
651 SinkChar* dest = seq_string->GetChars();
652 String::WriteToFlat(*prefix, dest, start, end);
653
654 while (c0_ != '"') {
655 // Check for control character (0x00-0x1f) or unterminated string (<0).
656 if (c0_ < 0x20) return Handle<String>::null();
657 if (count >= length) {
658 // We need to create a longer sequential string for the result.
659 return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count);
660 }
661 if (c0_ != '\\') {
662 // If the sink can contain UC16 characters, or source_ contains only
663 // Latin1 characters, there's no need to test whether we can store the
664 // character. Otherwise check whether the UC16 source character can fit
665 // in the Latin1 sink.
666 if (sizeof(SinkChar) == kUC16Size || seq_one_byte ||
667 c0_ <= String::kMaxOneByteCharCode) {
668 SeqStringSet(seq_string, count++, c0_);
669 Advance();
670 } else {
671 // StringType is SeqOneByteString and we just read a non-Latin1 char.
672 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count);
673 }
674 } else {
675 Advance(); // Advance past the \.
676 switch (c0_) {
677 case '"':
678 case '\\':
679 case '/':
680 SeqStringSet(seq_string, count++, c0_);
681 break;
682 case 'b':
683 SeqStringSet(seq_string, count++, '\x08');
684 break;
685 case 'f':
686 SeqStringSet(seq_string, count++, '\x0c');
687 break;
688 case 'n':
689 SeqStringSet(seq_string, count++, '\x0a');
690 break;
691 case 'r':
692 SeqStringSet(seq_string, count++, '\x0d');
693 break;
694 case 't':
695 SeqStringSet(seq_string, count++, '\x09');
696 break;
697 case 'u': {
698 uc32 value = 0;
699 for (int i = 0; i < 4; i++) {
700 Advance();
701 int digit = HexValue(c0_);
702 if (digit < 0) {
703 return Handle<String>::null();
704 }
705 value = value * 16 + digit;
706 }
707 if (sizeof(SinkChar) == kUC16Size ||
708 value <= String::kMaxOneByteCharCode) {
709 SeqStringSet(seq_string, count++, value);
710 break;
711 } else {
712 // StringType is SeqOneByteString and we just read a non-Latin1
713 // char.
714 position_ -= 6; // Rewind position_ to \ in \uxxxx.
715 Advance();
716 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string,
717 0,
718 count);
719 }
720 }
721 default:
722 return Handle<String>::null();
723 }
724 Advance();
725 }
726 }
727
728 DCHECK_EQ('"', c0_);
729 // Advance past the last '"'.
730 AdvanceSkipWhitespace();
731
732 // Shrink seq_string length to count and return.
733 return SeqString::Truncate(seq_string, count);
734 }
735
736
737 template <bool seq_one_byte>
738 template <bool is_internalized>
ScanJsonString()739 Handle<String> JsonParser<seq_one_byte>::ScanJsonString() {
740 DCHECK_EQ('"', c0_);
741 Advance();
742 if (c0_ == '"') {
743 AdvanceSkipWhitespace();
744 return factory()->empty_string();
745 }
746
747 if (seq_one_byte && is_internalized) {
748 // Fast path for existing internalized strings. If the the string being
749 // parsed is not a known internalized string, contains backslashes or
750 // unexpectedly reaches the end of string, return with an empty handle.
751 uint32_t running_hash = isolate()->heap()->HashSeed();
752 int position = position_;
753 uc32 c0 = c0_;
754 do {
755 if (c0 == '\\') {
756 c0_ = c0;
757 int beg_pos = position_;
758 position_ = position;
759 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
760 beg_pos,
761 position_);
762 }
763 if (c0 < 0x20) return Handle<String>::null();
764 running_hash = StringHasher::AddCharacterCore(running_hash,
765 static_cast<uint16_t>(c0));
766 position++;
767 if (position >= source_length_) return Handle<String>::null();
768 c0 = seq_source_->SeqOneByteStringGet(position);
769 } while (c0 != '"');
770 int length = position - position_;
771 uint32_t hash = (length <= String::kMaxHashCalcLength)
772 ? StringHasher::GetHashCore(running_hash)
773 : static_cast<uint32_t>(length);
774 Vector<const uint8_t> string_vector(
775 seq_source_->GetChars() + position_, length);
776 StringTable* string_table = isolate()->heap()->string_table();
777 uint32_t capacity = string_table->Capacity();
778 uint32_t entry = StringTable::FirstProbe(hash, capacity);
779 uint32_t count = 1;
780 Handle<String> result;
781 while (true) {
782 Object* element = string_table->KeyAt(entry);
783 if (element == isolate()->heap()->undefined_value()) {
784 // Lookup failure.
785 result = factory()->InternalizeOneByteString(
786 seq_source_, position_, length);
787 break;
788 }
789 if (element != isolate()->heap()->the_hole_value() &&
790 String::cast(element)->IsOneByteEqualTo(string_vector)) {
791 result = Handle<String>(String::cast(element), isolate());
792 #ifdef DEBUG
793 uint32_t hash_field =
794 (hash << String::kHashShift) | String::kIsNotArrayIndexMask;
795 DCHECK_EQ(static_cast<int>(result->Hash()),
796 static_cast<int>(hash_field >> String::kHashShift));
797 #endif
798 break;
799 }
800 entry = StringTable::NextProbe(entry, count++, capacity);
801 }
802 position_ = position;
803 // Advance past the last '"'.
804 AdvanceSkipWhitespace();
805 return result;
806 }
807
808 int beg_pos = position_;
809 // Fast case for Latin1 only without escape characters.
810 do {
811 // Check for control character (0x00-0x1f) or unterminated string (<0).
812 if (c0_ < 0x20) return Handle<String>::null();
813 if (c0_ != '\\') {
814 if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) {
815 Advance();
816 } else {
817 return SlowScanJsonString<SeqTwoByteString, uc16>(source_,
818 beg_pos,
819 position_);
820 }
821 } else {
822 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
823 beg_pos,
824 position_);
825 }
826 } while (c0_ != '"');
827 int length = position_ - beg_pos;
828 Handle<String> result =
829 factory()->NewRawOneByteString(length, pretenure_).ToHandleChecked();
830 uint8_t* dest = SeqOneByteString::cast(*result)->GetChars();
831 String::WriteToFlat(*source_, dest, beg_pos, position_);
832
833 DCHECK_EQ('"', c0_);
834 // Advance past the last '"'.
835 AdvanceSkipWhitespace();
836 return result;
837 }
838
839 } // namespace internal
840 } // namespace v8
841
842 #endif // V8_PARSING_JSON_PARSER_H_
843