1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_PARSING_JSON_PARSER_H_
6 #define V8_PARSING_JSON_PARSER_H_
7 
8 #include "src/char-predicates.h"
9 #include "src/conversions.h"
10 #include "src/debug/debug.h"
11 #include "src/factory.h"
12 #include "src/messages.h"
13 #include "src/parsing/scanner.h"
14 #include "src/parsing/token.h"
15 #include "src/transitions.h"
16 #include "src/types.h"
17 
18 namespace v8 {
19 namespace internal {
20 
21 enum ParseElementResult { kElementFound, kElementNotFound, kNullHandle };
22 
23 
24 // A simple json parser.
25 template <bool seq_one_byte>
26 class JsonParser BASE_EMBEDDED {
27  public:
Parse(Handle<String> source)28   MUST_USE_RESULT static MaybeHandle<Object> Parse(Handle<String> source) {
29     return JsonParser(source).ParseJson();
30   }
31 
32   static const int kEndOfString = -1;
33 
34  private:
JsonParser(Handle<String> source)35   explicit JsonParser(Handle<String> source)
36       : source_(source),
37         source_length_(source->length()),
38         isolate_(source->map()->GetHeap()->isolate()),
39         factory_(isolate_->factory()),
40         object_constructor_(isolate_->native_context()->object_function(),
41                             isolate_),
42         position_(-1) {
43     source_ = String::Flatten(source_);
44     pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED;
45 
46     // Optimized fast case where we only have Latin1 characters.
47     if (seq_one_byte) {
48       seq_source_ = Handle<SeqOneByteString>::cast(source_);
49     }
50   }
51 
52   // Parse a string containing a single JSON value.
53   MaybeHandle<Object> ParseJson();
54 
Advance()55   inline void Advance() {
56     position_++;
57     if (position_ >= source_length_) {
58       c0_ = kEndOfString;
59     } else if (seq_one_byte) {
60       c0_ = seq_source_->SeqOneByteStringGet(position_);
61     } else {
62       c0_ = source_->Get(position_);
63     }
64   }
65 
66   // The JSON lexical grammar is specified in the ECMAScript 5 standard,
67   // section 15.12.1.1. The only allowed whitespace characters between tokens
68   // are tab, carriage-return, newline and space.
69 
AdvanceSkipWhitespace()70   inline void AdvanceSkipWhitespace() {
71     do {
72       Advance();
73     } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r');
74   }
75 
SkipWhitespace()76   inline void SkipWhitespace() {
77     while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') {
78       Advance();
79     }
80   }
81 
AdvanceGetChar()82   inline uc32 AdvanceGetChar() {
83     Advance();
84     return c0_;
85   }
86 
87   // Checks that current charater is c.
88   // If so, then consume c and skip whitespace.
MatchSkipWhiteSpace(uc32 c)89   inline bool MatchSkipWhiteSpace(uc32 c) {
90     if (c0_ == c) {
91       AdvanceSkipWhitespace();
92       return true;
93     }
94     return false;
95   }
96 
97   // A JSON string (production JSONString) is subset of valid JavaScript string
98   // literals. The string must only be double-quoted (not single-quoted), and
99   // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
100   // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
ParseJsonString()101   Handle<String> ParseJsonString() {
102     return ScanJsonString<false>();
103   }
104 
ParseJsonString(Handle<String> expected)105   bool ParseJsonString(Handle<String> expected) {
106     int length = expected->length();
107     if (source_->length() - position_ - 1 > length) {
108       DisallowHeapAllocation no_gc;
109       String::FlatContent content = expected->GetFlatContent();
110       if (content.IsOneByte()) {
111         DCHECK_EQ('"', c0_);
112         const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1;
113         const uint8_t* expected_chars = content.ToOneByteVector().start();
114         for (int i = 0; i < length; i++) {
115           uint8_t c0 = input_chars[i];
116           if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') {
117             return false;
118           }
119         }
120         if (input_chars[length] == '"') {
121           position_ = position_ + length + 1;
122           AdvanceSkipWhitespace();
123           return true;
124         }
125       }
126     }
127     return false;
128   }
129 
ParseJsonInternalizedString()130   Handle<String> ParseJsonInternalizedString() {
131     return ScanJsonString<true>();
132   }
133 
134   template <bool is_internalized>
135   Handle<String> ScanJsonString();
136   // Creates a new string and copies prefix[start..end] into the beginning
137   // of it. Then scans the rest of the string, adding characters after the
138   // prefix. Called by ScanJsonString when reaching a '\' or non-Latin1 char.
139   template <typename StringType, typename SinkChar>
140   Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
141 
142   // A JSON number (production JSONNumber) is a subset of the valid JavaScript
143   // decimal number literals.
144   // It includes an optional minus sign, must have at least one
145   // digit before and after a decimal point, may not have prefixed zeros (unless
146   // the integer part is zero), and may include an exponent part (e.g., "e-10").
147   // Hexadecimal and octal numbers are not allowed.
148   Handle<Object> ParseJsonNumber();
149 
150   // Parse a single JSON value from input (grammar production JSONValue).
151   // A JSON value is either a (double-quoted) string literal, a number literal,
152   // one of "true", "false", or "null", or an object or array literal.
153   Handle<Object> ParseJsonValue();
154 
155   // Parse a JSON object literal (grammar production JSONObject).
156   // An object literal is a squiggly-braced and comma separated sequence
157   // (possibly empty) of key/value pairs, where the key is a JSON string
158   // literal, the value is a JSON value, and the two are separated by a colon.
159   // A JSON array doesn't allow numbers and identifiers as keys, like a
160   // JavaScript array.
161   Handle<Object> ParseJsonObject();
162 
163   // Helper for ParseJsonObject. Parses the form "123": obj, which is recorded
164   // as an element, not a property.
165   ParseElementResult ParseElement(Handle<JSObject> json_object);
166 
167   // Parses a JSON array literal (grammar production JSONArray). An array
168   // literal is a square-bracketed and comma separated sequence (possibly empty)
169   // of JSON values.
170   // A JSON array doesn't allow leaving out values from the sequence, nor does
171   // it allow a terminal comma, like a JavaScript array does.
172   Handle<Object> ParseJsonArray();
173 
174 
175   // Mark that a parsing error has happened at the current token, and
176   // return a null handle. Primarily for readability.
ReportUnexpectedCharacter()177   inline Handle<Object> ReportUnexpectedCharacter() {
178     return Handle<Object>::null();
179   }
180 
isolate()181   inline Isolate* isolate() { return isolate_; }
factory()182   inline Factory* factory() { return factory_; }
object_constructor()183   inline Handle<JSFunction> object_constructor() { return object_constructor_; }
184 
185   static const int kInitialSpecialStringLength = 32;
186   static const int kPretenureTreshold = 100 * 1024;
187 
188 
189  private:
zone()190   Zone* zone() { return &zone_; }
191 
192   void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map,
193                                ZoneList<Handle<Object> >* properties);
194 
195   Handle<String> source_;
196   int source_length_;
197   Handle<SeqOneByteString> seq_source_;
198 
199   PretenureFlag pretenure_;
200   Isolate* isolate_;
201   Factory* factory_;
202   Zone zone_;
203   Handle<JSFunction> object_constructor_;
204   uc32 c0_;
205   int position_;
206 };
207 
208 template <bool seq_one_byte>
ParseJson()209 MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() {
210   // Advance to the first character (possibly EOS)
211   AdvanceSkipWhitespace();
212   Handle<Object> result = ParseJsonValue();
213   if (result.is_null() || c0_ != kEndOfString) {
214     // Some exception (for example stack overflow) is already pending.
215     if (isolate_->has_pending_exception()) return Handle<Object>::null();
216 
217     // Parse failed. Current character is the unexpected token.
218     Factory* factory = this->factory();
219     MessageTemplate::Template message;
220     Handle<String> argument;
221 
222     switch (c0_) {
223       case kEndOfString:
224         message = MessageTemplate::kUnexpectedEOS;
225         break;
226       case '-':
227       case '0':
228       case '1':
229       case '2':
230       case '3':
231       case '4':
232       case '5':
233       case '6':
234       case '7':
235       case '8':
236       case '9':
237         message = MessageTemplate::kUnexpectedTokenNumber;
238         break;
239       case '"':
240         message = MessageTemplate::kUnexpectedTokenString;
241         break;
242       default:
243         message = MessageTemplate::kUnexpectedToken;
244         argument = factory->LookupSingleCharacterStringFromCode(c0_);
245         break;
246     }
247 
248     Handle<Script> script(factory->NewScript(source_));
249     // We should sent compile error event because we compile JSON object in
250     // separated source file.
251     isolate()->debug()->OnCompileError(script);
252     MessageLocation location(script, position_, position_ + 1);
253     Handle<Object> error = factory->NewSyntaxError(message, argument);
254     return isolate()->template Throw<Object>(error, &location);
255   }
256   return result;
257 }
258 
259 
260 // Parse any JSON value.
261 template <bool seq_one_byte>
ParseJsonValue()262 Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() {
263   StackLimitCheck stack_check(isolate_);
264   if (stack_check.HasOverflowed()) {
265     isolate_->StackOverflow();
266     return Handle<Object>::null();
267   }
268 
269   if (stack_check.InterruptRequested()) {
270     ExecutionAccess access(isolate_);
271     // Avoid blocking GC in long running parser (v8:3974).
272     isolate_->stack_guard()->HandleGCInterrupt();
273   }
274 
275   if (c0_ == '"') return ParseJsonString();
276   if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
277   if (c0_ == '{') return ParseJsonObject();
278   if (c0_ == '[') return ParseJsonArray();
279   if (c0_ == 'f') {
280     if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
281         AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
282       AdvanceSkipWhitespace();
283       return factory()->false_value();
284     }
285     return ReportUnexpectedCharacter();
286   }
287   if (c0_ == 't') {
288     if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
289         AdvanceGetChar() == 'e') {
290       AdvanceSkipWhitespace();
291       return factory()->true_value();
292     }
293     return ReportUnexpectedCharacter();
294   }
295   if (c0_ == 'n') {
296     if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
297         AdvanceGetChar() == 'l') {
298       AdvanceSkipWhitespace();
299       return factory()->null_value();
300     }
301     return ReportUnexpectedCharacter();
302   }
303   return ReportUnexpectedCharacter();
304 }
305 
306 
307 template <bool seq_one_byte>
ParseElement(Handle<JSObject> json_object)308 ParseElementResult JsonParser<seq_one_byte>::ParseElement(
309     Handle<JSObject> json_object) {
310   uint32_t index = 0;
311   // Maybe an array index, try to parse it.
312   if (c0_ == '0') {
313     // With a leading zero, the string has to be "0" only to be an index.
314     Advance();
315   } else {
316     do {
317       int d = c0_ - '0';
318       if (index > 429496729U - ((d + 3) >> 3)) break;
319       index = (index * 10) + d;
320       Advance();
321     } while (IsDecimalDigit(c0_));
322   }
323 
324   if (c0_ == '"') {
325     // Successfully parsed index, parse and store element.
326     AdvanceSkipWhitespace();
327 
328     if (c0_ == ':') {
329       AdvanceSkipWhitespace();
330       Handle<Object> value = ParseJsonValue();
331       if (!value.is_null()) {
332         JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE)
333             .Assert();
334         return kElementFound;
335       } else {
336         return kNullHandle;
337       }
338     }
339   }
340   return kElementNotFound;
341 }
342 
343 // Parse a JSON object. Position must be right at '{'.
344 template <bool seq_one_byte>
ParseJsonObject()345 Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() {
346   HandleScope scope(isolate());
347   Handle<JSObject> json_object =
348       factory()->NewJSObject(object_constructor(), pretenure_);
349   Handle<Map> map(json_object->map());
350   int descriptor = 0;
351   ZoneList<Handle<Object> > properties(8, zone());
352   DCHECK_EQ(c0_, '{');
353 
354   bool transitioning = true;
355 
356   AdvanceSkipWhitespace();
357   if (c0_ != '}') {
358     do {
359       if (c0_ != '"') return ReportUnexpectedCharacter();
360 
361       int start_position = position_;
362       Advance();
363 
364       if (IsDecimalDigit(c0_)) {
365         ParseElementResult element_result = ParseElement(json_object);
366         if (element_result == kNullHandle) return Handle<Object>::null();
367         if (element_result == kElementFound) continue;
368       }
369       // Not an index, fallback to the slow path.
370 
371       position_ = start_position;
372 #ifdef DEBUG
373       c0_ = '"';
374 #endif
375 
376       Handle<String> key;
377       Handle<Object> value;
378 
379       // Try to follow existing transitions as long as possible. Once we stop
380       // transitioning, no transition can be found anymore.
381       DCHECK(transitioning);
382       // First check whether there is a single expected transition. If so, try
383       // to parse it first.
384       bool follow_expected = false;
385       Handle<Map> target;
386       if (seq_one_byte) {
387         key = TransitionArray::ExpectedTransitionKey(map);
388         follow_expected = !key.is_null() && ParseJsonString(key);
389       }
390       // If the expected transition hits, follow it.
391       if (follow_expected) {
392         target = TransitionArray::ExpectedTransitionTarget(map);
393       } else {
394         // If the expected transition failed, parse an internalized string and
395         // try to find a matching transition.
396         key = ParseJsonInternalizedString();
397         if (key.is_null()) return ReportUnexpectedCharacter();
398 
399         target = TransitionArray::FindTransitionToField(map, key);
400         // If a transition was found, follow it and continue.
401         transitioning = !target.is_null();
402       }
403       if (c0_ != ':') return ReportUnexpectedCharacter();
404 
405       AdvanceSkipWhitespace();
406       value = ParseJsonValue();
407       if (value.is_null()) return ReportUnexpectedCharacter();
408 
409       if (transitioning) {
410         PropertyDetails details =
411             target->instance_descriptors()->GetDetails(descriptor);
412         Representation expected_representation = details.representation();
413 
414         if (value->FitsRepresentation(expected_representation)) {
415           if (expected_representation.IsHeapObject() &&
416               !target->instance_descriptors()
417                    ->GetFieldType(descriptor)
418                    ->NowContains(value)) {
419             Handle<HeapType> value_type(
420                 value->OptimalType(isolate(), expected_representation));
421             Map::GeneralizeFieldType(target, descriptor,
422                                      expected_representation, value_type);
423           }
424           DCHECK(target->instance_descriptors()
425                      ->GetFieldType(descriptor)
426                      ->NowContains(value));
427           properties.Add(value, zone());
428           map = target;
429           descriptor++;
430           continue;
431         } else {
432           transitioning = false;
433         }
434       }
435 
436       DCHECK(!transitioning);
437 
438       // Commit the intermediate state to the object and stop transitioning.
439       CommitStateToJsonObject(json_object, map, &properties);
440 
441       JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value)
442           .Check();
443     } while (transitioning && MatchSkipWhiteSpace(','));
444 
445     // If we transitioned until the very end, transition the map now.
446     if (transitioning) {
447       CommitStateToJsonObject(json_object, map, &properties);
448     } else {
449       while (MatchSkipWhiteSpace(',')) {
450         HandleScope local_scope(isolate());
451         if (c0_ != '"') return ReportUnexpectedCharacter();
452 
453         int start_position = position_;
454         Advance();
455 
456         if (IsDecimalDigit(c0_)) {
457           ParseElementResult element_result = ParseElement(json_object);
458           if (element_result == kNullHandle) return Handle<Object>::null();
459           if (element_result == kElementFound) continue;
460         }
461         // Not an index, fallback to the slow path.
462 
463         position_ = start_position;
464 #ifdef DEBUG
465         c0_ = '"';
466 #endif
467 
468         Handle<String> key;
469         Handle<Object> value;
470 
471         key = ParseJsonInternalizedString();
472         if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
473 
474         AdvanceSkipWhitespace();
475         value = ParseJsonValue();
476         if (value.is_null()) return ReportUnexpectedCharacter();
477 
478         JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key,
479                                                           value).Check();
480       }
481     }
482 
483     if (c0_ != '}') {
484       return ReportUnexpectedCharacter();
485     }
486   }
487   AdvanceSkipWhitespace();
488   return scope.CloseAndEscape(json_object);
489 }
490 
491 
492 template <bool seq_one_byte>
CommitStateToJsonObject(Handle<JSObject> json_object,Handle<Map> map,ZoneList<Handle<Object>> * properties)493 void JsonParser<seq_one_byte>::CommitStateToJsonObject(
494     Handle<JSObject> json_object, Handle<Map> map,
495     ZoneList<Handle<Object> >* properties) {
496   JSObject::AllocateStorageForMap(json_object, map);
497   DCHECK(!json_object->map()->is_dictionary_map());
498 
499   DisallowHeapAllocation no_gc;
500 
501   int length = properties->length();
502   for (int i = 0; i < length; i++) {
503     Handle<Object> value = (*properties)[i];
504     json_object->WriteToField(i, *value);
505   }
506 }
507 
508 
509 // Parse a JSON array. Position must be right at '['.
510 template <bool seq_one_byte>
ParseJsonArray()511 Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() {
512   HandleScope scope(isolate());
513   ZoneList<Handle<Object> > elements(4, zone());
514   DCHECK_EQ(c0_, '[');
515 
516   AdvanceSkipWhitespace();
517   if (c0_ != ']') {
518     do {
519       Handle<Object> element = ParseJsonValue();
520       if (element.is_null()) return ReportUnexpectedCharacter();
521       elements.Add(element, zone());
522     } while (MatchSkipWhiteSpace(','));
523     if (c0_ != ']') {
524       return ReportUnexpectedCharacter();
525     }
526   }
527   AdvanceSkipWhitespace();
528   // Allocate a fixed array with all the elements.
529   Handle<FixedArray> fast_elements =
530       factory()->NewFixedArray(elements.length(), pretenure_);
531   for (int i = 0, n = elements.length(); i < n; i++) {
532     fast_elements->set(i, *elements[i]);
533   }
534   Handle<Object> json_array = factory()->NewJSArrayWithElements(
535       fast_elements, FAST_ELEMENTS, Strength::WEAK, pretenure_);
536   return scope.CloseAndEscape(json_array);
537 }
538 
539 
540 template <bool seq_one_byte>
ParseJsonNumber()541 Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() {
542   bool negative = false;
543   int beg_pos = position_;
544   if (c0_ == '-') {
545     Advance();
546     negative = true;
547   }
548   if (c0_ == '0') {
549     Advance();
550     // Prefix zero is only allowed if it's the only digit before
551     // a decimal point or exponent.
552     if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
553   } else {
554     int i = 0;
555     int digits = 0;
556     if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
557     do {
558       i = i * 10 + c0_ - '0';
559       digits++;
560       Advance();
561     } while (IsDecimalDigit(c0_));
562     if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
563       SkipWhitespace();
564       return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
565     }
566   }
567   if (c0_ == '.') {
568     Advance();
569     if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
570     do {
571       Advance();
572     } while (IsDecimalDigit(c0_));
573   }
574   if (AsciiAlphaToLower(c0_) == 'e') {
575     Advance();
576     if (c0_ == '-' || c0_ == '+') Advance();
577     if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
578     do {
579       Advance();
580     } while (IsDecimalDigit(c0_));
581   }
582   int length = position_ - beg_pos;
583   double number;
584   if (seq_one_byte) {
585     Vector<const uint8_t> chars(seq_source_->GetChars() +  beg_pos, length);
586     number = StringToDouble(isolate()->unicode_cache(), chars,
587                             NO_FLAGS,  // Hex, octal or trailing junk.
588                             std::numeric_limits<double>::quiet_NaN());
589   } else {
590     Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
591     String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
592     Vector<const uint8_t> result =
593         Vector<const uint8_t>(buffer.start(), length);
594     number = StringToDouble(isolate()->unicode_cache(),
595                             result,
596                             NO_FLAGS,  // Hex, octal or trailing junk.
597                             0.0);
598     buffer.Dispose();
599   }
600   SkipWhitespace();
601   return factory()->NewNumber(number, pretenure_);
602 }
603 
604 
605 template <typename StringType>
606 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
607 
608 template <>
SeqStringSet(Handle<SeqTwoByteString> seq_str,int i,uc32 c)609 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
610   seq_str->SeqTwoByteStringSet(i, c);
611 }
612 
613 template <>
SeqStringSet(Handle<SeqOneByteString> seq_str,int i,uc32 c)614 inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) {
615   seq_str->SeqOneByteStringSet(i, c);
616 }
617 
618 template <typename StringType>
619 inline Handle<StringType> NewRawString(Factory* factory,
620                                        int length,
621                                        PretenureFlag pretenure);
622 
623 template <>
NewRawString(Factory * factory,int length,PretenureFlag pretenure)624 inline Handle<SeqTwoByteString> NewRawString(Factory* factory,
625                                              int length,
626                                              PretenureFlag pretenure) {
627   return factory->NewRawTwoByteString(length, pretenure).ToHandleChecked();
628 }
629 
630 template <>
NewRawString(Factory * factory,int length,PretenureFlag pretenure)631 inline Handle<SeqOneByteString> NewRawString(Factory* factory,
632                                            int length,
633                                            PretenureFlag pretenure) {
634   return factory->NewRawOneByteString(length, pretenure).ToHandleChecked();
635 }
636 
637 
638 // Scans the rest of a JSON string starting from position_ and writes
639 // prefix[start..end] along with the scanned characters into a
640 // sequential string of type StringType.
641 template <bool seq_one_byte>
642 template <typename StringType, typename SinkChar>
SlowScanJsonString(Handle<String> prefix,int start,int end)643 Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString(
644     Handle<String> prefix, int start, int end) {
645   int count = end - start;
646   int max_length = count + source_length_ - position_;
647   int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
648   Handle<StringType> seq_string =
649       NewRawString<StringType>(factory(), length, pretenure_);
650   // Copy prefix into seq_str.
651   SinkChar* dest = seq_string->GetChars();
652   String::WriteToFlat(*prefix, dest, start, end);
653 
654   while (c0_ != '"') {
655     // Check for control character (0x00-0x1f) or unterminated string (<0).
656     if (c0_ < 0x20) return Handle<String>::null();
657     if (count >= length) {
658       // We need to create a longer sequential string for the result.
659       return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count);
660     }
661     if (c0_ != '\\') {
662       // If the sink can contain UC16 characters, or source_ contains only
663       // Latin1 characters, there's no need to test whether we can store the
664       // character. Otherwise check whether the UC16 source character can fit
665       // in the Latin1 sink.
666       if (sizeof(SinkChar) == kUC16Size || seq_one_byte ||
667           c0_ <= String::kMaxOneByteCharCode) {
668         SeqStringSet(seq_string, count++, c0_);
669         Advance();
670       } else {
671         // StringType is SeqOneByteString and we just read a non-Latin1 char.
672         return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count);
673       }
674     } else {
675       Advance();  // Advance past the \.
676       switch (c0_) {
677         case '"':
678         case '\\':
679         case '/':
680           SeqStringSet(seq_string, count++, c0_);
681           break;
682         case 'b':
683           SeqStringSet(seq_string, count++, '\x08');
684           break;
685         case 'f':
686           SeqStringSet(seq_string, count++, '\x0c');
687           break;
688         case 'n':
689           SeqStringSet(seq_string, count++, '\x0a');
690           break;
691         case 'r':
692           SeqStringSet(seq_string, count++, '\x0d');
693           break;
694         case 't':
695           SeqStringSet(seq_string, count++, '\x09');
696           break;
697         case 'u': {
698           uc32 value = 0;
699           for (int i = 0; i < 4; i++) {
700             Advance();
701             int digit = HexValue(c0_);
702             if (digit < 0) {
703               return Handle<String>::null();
704             }
705             value = value * 16 + digit;
706           }
707           if (sizeof(SinkChar) == kUC16Size ||
708               value <= String::kMaxOneByteCharCode) {
709             SeqStringSet(seq_string, count++, value);
710             break;
711           } else {
712             // StringType is SeqOneByteString and we just read a non-Latin1
713             // char.
714             position_ -= 6;  // Rewind position_ to \ in \uxxxx.
715             Advance();
716             return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string,
717                                                               0,
718                                                               count);
719           }
720         }
721         default:
722           return Handle<String>::null();
723       }
724       Advance();
725     }
726   }
727 
728   DCHECK_EQ('"', c0_);
729   // Advance past the last '"'.
730   AdvanceSkipWhitespace();
731 
732   // Shrink seq_string length to count and return.
733   return SeqString::Truncate(seq_string, count);
734 }
735 
736 
737 template <bool seq_one_byte>
738 template <bool is_internalized>
ScanJsonString()739 Handle<String> JsonParser<seq_one_byte>::ScanJsonString() {
740   DCHECK_EQ('"', c0_);
741   Advance();
742   if (c0_ == '"') {
743     AdvanceSkipWhitespace();
744     return factory()->empty_string();
745   }
746 
747   if (seq_one_byte && is_internalized) {
748     // Fast path for existing internalized strings.  If the the string being
749     // parsed is not a known internalized string, contains backslashes or
750     // unexpectedly reaches the end of string, return with an empty handle.
751     uint32_t running_hash = isolate()->heap()->HashSeed();
752     int position = position_;
753     uc32 c0 = c0_;
754     do {
755       if (c0 == '\\') {
756         c0_ = c0;
757         int beg_pos = position_;
758         position_ = position;
759         return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
760                                                              beg_pos,
761                                                              position_);
762       }
763       if (c0 < 0x20) return Handle<String>::null();
764       running_hash = StringHasher::AddCharacterCore(running_hash,
765                                                     static_cast<uint16_t>(c0));
766       position++;
767       if (position >= source_length_) return Handle<String>::null();
768       c0 = seq_source_->SeqOneByteStringGet(position);
769     } while (c0 != '"');
770     int length = position - position_;
771     uint32_t hash = (length <= String::kMaxHashCalcLength)
772                         ? StringHasher::GetHashCore(running_hash)
773                         : static_cast<uint32_t>(length);
774     Vector<const uint8_t> string_vector(
775         seq_source_->GetChars() + position_, length);
776     StringTable* string_table = isolate()->heap()->string_table();
777     uint32_t capacity = string_table->Capacity();
778     uint32_t entry = StringTable::FirstProbe(hash, capacity);
779     uint32_t count = 1;
780     Handle<String> result;
781     while (true) {
782       Object* element = string_table->KeyAt(entry);
783       if (element == isolate()->heap()->undefined_value()) {
784         // Lookup failure.
785         result = factory()->InternalizeOneByteString(
786             seq_source_, position_, length);
787         break;
788       }
789       if (element != isolate()->heap()->the_hole_value() &&
790           String::cast(element)->IsOneByteEqualTo(string_vector)) {
791         result = Handle<String>(String::cast(element), isolate());
792 #ifdef DEBUG
793         uint32_t hash_field =
794             (hash << String::kHashShift) | String::kIsNotArrayIndexMask;
795         DCHECK_EQ(static_cast<int>(result->Hash()),
796                   static_cast<int>(hash_field >> String::kHashShift));
797 #endif
798         break;
799       }
800       entry = StringTable::NextProbe(entry, count++, capacity);
801     }
802     position_ = position;
803     // Advance past the last '"'.
804     AdvanceSkipWhitespace();
805     return result;
806   }
807 
808   int beg_pos = position_;
809   // Fast case for Latin1 only without escape characters.
810   do {
811     // Check for control character (0x00-0x1f) or unterminated string (<0).
812     if (c0_ < 0x20) return Handle<String>::null();
813     if (c0_ != '\\') {
814       if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) {
815         Advance();
816       } else {
817         return SlowScanJsonString<SeqTwoByteString, uc16>(source_,
818                                                           beg_pos,
819                                                           position_);
820       }
821     } else {
822       return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
823                                                            beg_pos,
824                                                            position_);
825     }
826   } while (c0_ != '"');
827   int length = position_ - beg_pos;
828   Handle<String> result =
829       factory()->NewRawOneByteString(length, pretenure_).ToHandleChecked();
830   uint8_t* dest = SeqOneByteString::cast(*result)->GetChars();
831   String::WriteToFlat(*source_, dest, beg_pos, position_);
832 
833   DCHECK_EQ('"', c0_);
834   // Advance past the last '"'.
835   AdvanceSkipWhitespace();
836   return result;
837 }
838 
839 }  // namespace internal
840 }  // namespace v8
841 
842 #endif  // V8_PARSING_JSON_PARSER_H_
843