1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/json/json_parser.h"
6 
7 #include <cmath>
8 
9 #include "base/logging.h"
10 #include "base/macros.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/strings/utf_string_conversion_utils.h"
17 #include "base/third_party/icu/icu_utf.h"
18 #include "base/values.h"
19 
20 namespace base {
21 namespace internal {
22 
23 namespace {
24 
25 const int kStackMaxDepth = 100;
26 
27 const int32_t kExtendedASCIIStart = 0x80;
28 
29 // This and the class below are used to own the JSON input string for when
30 // string tokens are stored as StringPiece instead of std::string. This
31 // optimization avoids about 2/3rds of string memory copies. The constructor
32 // takes ownership of the input string. The real root value is Swap()ed into
33 // the new instance.
34 class DictionaryHiddenRootValue : public DictionaryValue {
35  public:
DictionaryHiddenRootValue(std::string * json,Value * root)36   DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
37     DCHECK(root->IsType(Value::TYPE_DICTIONARY));
38     DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
39   }
40 
Swap(DictionaryValue * other)41   void Swap(DictionaryValue* other) override {
42     DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
43 
44     // First deep copy to convert JSONStringValue to std::string and swap that
45     // copy with |other|, which contains the new contents of |this|.
46     scoped_ptr<DictionaryValue> copy(DeepCopy());
47     copy->Swap(other);
48 
49     // Then erase the contents of the current dictionary and swap in the
50     // new contents, originally from |other|.
51     Clear();
52     json_.reset();
53     DictionaryValue::Swap(copy.get());
54   }
55 
56   // Not overriding DictionaryValue::Remove because it just calls through to
57   // the method below.
58 
RemoveWithoutPathExpansion(const std::string & key,scoped_ptr<Value> * out)59   bool RemoveWithoutPathExpansion(const std::string& key,
60                                   scoped_ptr<Value>* out) override {
61     // If the caller won't take ownership of the removed value, just call up.
62     if (!out)
63       return DictionaryValue::RemoveWithoutPathExpansion(key, out);
64 
65     DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
66 
67     // Otherwise, remove the value while its still "owned" by this and copy it
68     // to convert any JSONStringValues to std::string.
69     scoped_ptr<Value> out_owned;
70     if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
71       return false;
72 
73     out->reset(out_owned->DeepCopy());
74 
75     return true;
76   }
77 
78  private:
79   scoped_ptr<std::string> json_;
80 
81   DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
82 };
83 
84 class ListHiddenRootValue : public ListValue {
85  public:
ListHiddenRootValue(std::string * json,Value * root)86   ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
87     DCHECK(root->IsType(Value::TYPE_LIST));
88     ListValue::Swap(static_cast<ListValue*>(root));
89   }
90 
Swap(ListValue * other)91   void Swap(ListValue* other) override {
92     DVLOG(1) << "Swap()ing a ListValue inefficiently.";
93 
94     // First deep copy to convert JSONStringValue to std::string and swap that
95     // copy with |other|, which contains the new contents of |this|.
96     scoped_ptr<ListValue> copy(DeepCopy());
97     copy->Swap(other);
98 
99     // Then erase the contents of the current list and swap in the new contents,
100     // originally from |other|.
101     Clear();
102     json_.reset();
103     ListValue::Swap(copy.get());
104   }
105 
Remove(size_t index,scoped_ptr<Value> * out)106   bool Remove(size_t index, scoped_ptr<Value>* out) override {
107     // If the caller won't take ownership of the removed value, just call up.
108     if (!out)
109       return ListValue::Remove(index, out);
110 
111     DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
112 
113     // Otherwise, remove the value while its still "owned" by this and copy it
114     // to convert any JSONStringValues to std::string.
115     scoped_ptr<Value> out_owned;
116     if (!ListValue::Remove(index, &out_owned))
117       return false;
118 
119     out->reset(out_owned->DeepCopy());
120 
121     return true;
122   }
123 
124  private:
125   scoped_ptr<std::string> json_;
126 
127   DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
128 };
129 
130 // A variant on StringValue that uses StringPiece instead of copying the string
131 // into the Value. This can only be stored in a child of hidden root (above),
132 // otherwise the referenced string will not be guaranteed to outlive it.
133 class JSONStringValue : public Value {
134  public:
JSONStringValue(const StringPiece & piece)135   explicit JSONStringValue(const StringPiece& piece)
136       : Value(TYPE_STRING),
137         string_piece_(piece) {
138   }
139 
140   // Overridden from Value:
GetAsString(std::string * out_value) const141   bool GetAsString(std::string* out_value) const override {
142     string_piece_.CopyToString(out_value);
143     return true;
144   }
DeepCopy() const145   Value* DeepCopy() const override {
146     return new StringValue(string_piece_.as_string());
147   }
Equals(const Value * other) const148   bool Equals(const Value* other) const override {
149     std::string other_string;
150     return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
151         StringPiece(other_string) == string_piece_;
152   }
153 
154  private:
155   // The location in the original input stream.
156   StringPiece string_piece_;
157 
158   DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
159 };
160 
161 // Simple class that checks for maximum recursion/"stack overflow."
162 class StackMarker {
163  public:
StackMarker(int * depth)164   explicit StackMarker(int* depth) : depth_(depth) {
165     ++(*depth_);
166     DCHECK_LE(*depth_, kStackMaxDepth);
167   }
~StackMarker()168   ~StackMarker() {
169     --(*depth_);
170   }
171 
IsTooDeep() const172   bool IsTooDeep() const {
173     return *depth_ >= kStackMaxDepth;
174   }
175 
176  private:
177   int* const depth_;
178 
179   DISALLOW_COPY_AND_ASSIGN(StackMarker);
180 };
181 
182 }  // namespace
183 
JSONParser(int options)184 JSONParser::JSONParser(int options)
185     : options_(options),
186       start_pos_(NULL),
187       pos_(NULL),
188       end_pos_(NULL),
189       index_(0),
190       stack_depth_(0),
191       line_number_(0),
192       index_last_line_(0),
193       error_code_(JSONReader::JSON_NO_ERROR),
194       error_line_(0),
195       error_column_(0) {
196 }
197 
~JSONParser()198 JSONParser::~JSONParser() {
199 }
200 
Parse(const StringPiece & input)201 Value* JSONParser::Parse(const StringPiece& input) {
202   scoped_ptr<std::string> input_copy;
203   // If the children of a JSON root can be detached, then hidden roots cannot
204   // be used, so do not bother copying the input because StringPiece will not
205   // be used anywhere.
206   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
207     input_copy.reset(new std::string(input.as_string()));
208     start_pos_ = input_copy->data();
209   } else {
210     start_pos_ = input.data();
211   }
212   pos_ = start_pos_;
213   end_pos_ = start_pos_ + input.length();
214   index_ = 0;
215   line_number_ = 1;
216   index_last_line_ = 0;
217 
218   error_code_ = JSONReader::JSON_NO_ERROR;
219   error_line_ = 0;
220   error_column_ = 0;
221 
222   // When the input JSON string starts with a UTF-8 Byte-Order-Mark
223   // <0xEF 0xBB 0xBF>, advance the start position to avoid the
224   // ParseNextToken function mis-treating a Unicode BOM as an invalid
225   // character and returning NULL.
226   if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF &&
227       static_cast<uint8_t>(*(pos_ + 1)) == 0xBB &&
228       static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) {
229     NextNChars(3);
230   }
231 
232   // Parse the first and any nested tokens.
233   scoped_ptr<Value> root(ParseNextToken());
234   if (!root.get())
235     return NULL;
236 
237   // Make sure the input stream is at an end.
238   if (GetNextToken() != T_END_OF_INPUT) {
239     if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
240       ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
241       return NULL;
242     }
243   }
244 
245   // Dictionaries and lists can contain JSONStringValues, so wrap them in a
246   // hidden root.
247   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
248     if (root->IsType(Value::TYPE_DICTIONARY)) {
249       return new DictionaryHiddenRootValue(input_copy.release(), root.get());
250     } else if (root->IsType(Value::TYPE_LIST)) {
251       return new ListHiddenRootValue(input_copy.release(), root.get());
252     } else if (root->IsType(Value::TYPE_STRING)) {
253       // A string type could be a JSONStringValue, but because there's no
254       // corresponding HiddenRootValue, the memory will be lost. Deep copy to
255       // preserve it.
256       return root->DeepCopy();
257     }
258   }
259 
260   // All other values can be returned directly.
261   return root.release();
262 }
263 
error_code() const264 JSONReader::JsonParseError JSONParser::error_code() const {
265   return error_code_;
266 }
267 
GetErrorMessage() const268 std::string JSONParser::GetErrorMessage() const {
269   return FormatErrorMessage(error_line_, error_column_,
270       JSONReader::ErrorCodeToString(error_code_));
271 }
272 
error_line() const273 int JSONParser::error_line() const {
274   return error_line_;
275 }
276 
error_column() const277 int JSONParser::error_column() const {
278   return error_column_;
279 }
280 
281 // StringBuilder ///////////////////////////////////////////////////////////////
282 
StringBuilder()283 JSONParser::StringBuilder::StringBuilder()
284     : pos_(NULL),
285       length_(0),
286       string_(NULL) {
287 }
288 
StringBuilder(const char * pos)289 JSONParser::StringBuilder::StringBuilder(const char* pos)
290     : pos_(pos),
291       length_(0),
292       string_(NULL) {
293 }
294 
Swap(StringBuilder * other)295 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
296   std::swap(other->string_, string_);
297   std::swap(other->pos_, pos_);
298   std::swap(other->length_, length_);
299 }
300 
~StringBuilder()301 JSONParser::StringBuilder::~StringBuilder() {
302   delete string_;
303 }
304 
Append(const char & c)305 void JSONParser::StringBuilder::Append(const char& c) {
306   DCHECK_GE(c, 0);
307   DCHECK_LT(c, 128);
308 
309   if (string_)
310     string_->push_back(c);
311   else
312     ++length_;
313 }
314 
AppendString(const std::string & str)315 void JSONParser::StringBuilder::AppendString(const std::string& str) {
316   DCHECK(string_);
317   string_->append(str);
318 }
319 
Convert()320 void JSONParser::StringBuilder::Convert() {
321   if (string_)
322     return;
323   string_  = new std::string(pos_, length_);
324 }
325 
CanBeStringPiece() const326 bool JSONParser::StringBuilder::CanBeStringPiece() const {
327   return !string_;
328 }
329 
AsStringPiece()330 StringPiece JSONParser::StringBuilder::AsStringPiece() {
331   if (string_)
332     return StringPiece();
333   return StringPiece(pos_, length_);
334 }
335 
AsString()336 const std::string& JSONParser::StringBuilder::AsString() {
337   if (!string_)
338     Convert();
339   return *string_;
340 }
341 
342 // JSONParser private //////////////////////////////////////////////////////////
343 
CanConsume(int length)344 inline bool JSONParser::CanConsume(int length) {
345   return pos_ + length <= end_pos_;
346 }
347 
NextChar()348 const char* JSONParser::NextChar() {
349   DCHECK(CanConsume(1));
350   ++index_;
351   ++pos_;
352   return pos_;
353 }
354 
NextNChars(int n)355 void JSONParser::NextNChars(int n) {
356   DCHECK(CanConsume(n));
357   index_ += n;
358   pos_ += n;
359 }
360 
GetNextToken()361 JSONParser::Token JSONParser::GetNextToken() {
362   EatWhitespaceAndComments();
363   if (!CanConsume(1))
364     return T_END_OF_INPUT;
365 
366   switch (*pos_) {
367     case '{':
368       return T_OBJECT_BEGIN;
369     case '}':
370       return T_OBJECT_END;
371     case '[':
372       return T_ARRAY_BEGIN;
373     case ']':
374       return T_ARRAY_END;
375     case '"':
376       return T_STRING;
377     case '0':
378     case '1':
379     case '2':
380     case '3':
381     case '4':
382     case '5':
383     case '6':
384     case '7':
385     case '8':
386     case '9':
387     case '-':
388       return T_NUMBER;
389     case 't':
390       return T_BOOL_TRUE;
391     case 'f':
392       return T_BOOL_FALSE;
393     case 'n':
394       return T_NULL;
395     case ',':
396       return T_LIST_SEPARATOR;
397     case ':':
398       return T_OBJECT_PAIR_SEPARATOR;
399     default:
400       return T_INVALID_TOKEN;
401   }
402 }
403 
EatWhitespaceAndComments()404 void JSONParser::EatWhitespaceAndComments() {
405   while (pos_ < end_pos_) {
406     switch (*pos_) {
407       case '\r':
408       case '\n':
409         index_last_line_ = index_;
410         // Don't increment line_number_ twice for "\r\n".
411         if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
412           ++line_number_;
413         // Fall through.
414       case ' ':
415       case '\t':
416         NextChar();
417         break;
418       case '/':
419         if (!EatComment())
420           return;
421         break;
422       default:
423         return;
424     }
425   }
426 }
427 
EatComment()428 bool JSONParser::EatComment() {
429   if (*pos_ != '/' || !CanConsume(1))
430     return false;
431 
432   char next_char = *NextChar();
433   if (next_char == '/') {
434     // Single line comment, read to newline.
435     while (CanConsume(1)) {
436       next_char = *NextChar();
437       if (next_char == '\n' || next_char == '\r')
438         return true;
439     }
440   } else if (next_char == '*') {
441     char previous_char = '\0';
442     // Block comment, read until end marker.
443     while (CanConsume(1)) {
444       next_char = *NextChar();
445       if (previous_char == '*' && next_char == '/') {
446         // EatWhitespaceAndComments will inspect pos_, which will still be on
447         // the last / of the comment, so advance once more (which may also be
448         // end of input).
449         NextChar();
450         return true;
451       }
452       previous_char = next_char;
453     }
454 
455     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
456   }
457 
458   return false;
459 }
460 
ParseNextToken()461 Value* JSONParser::ParseNextToken() {
462   return ParseToken(GetNextToken());
463 }
464 
ParseToken(Token token)465 Value* JSONParser::ParseToken(Token token) {
466   switch (token) {
467     case T_OBJECT_BEGIN:
468       return ConsumeDictionary();
469     case T_ARRAY_BEGIN:
470       return ConsumeList();
471     case T_STRING:
472       return ConsumeString();
473     case T_NUMBER:
474       return ConsumeNumber();
475     case T_BOOL_TRUE:
476     case T_BOOL_FALSE:
477     case T_NULL:
478       return ConsumeLiteral();
479     default:
480       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
481       return NULL;
482   }
483 }
484 
ConsumeDictionary()485 Value* JSONParser::ConsumeDictionary() {
486   if (*pos_ != '{') {
487     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
488     return NULL;
489   }
490 
491   StackMarker depth_check(&stack_depth_);
492   if (depth_check.IsTooDeep()) {
493     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
494     return NULL;
495   }
496 
497   scoped_ptr<DictionaryValue> dict(new DictionaryValue);
498 
499   NextChar();
500   Token token = GetNextToken();
501   while (token != T_OBJECT_END) {
502     if (token != T_STRING) {
503       ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
504       return NULL;
505     }
506 
507     // First consume the key.
508     StringBuilder key;
509     if (!ConsumeStringRaw(&key)) {
510       return NULL;
511     }
512 
513     // Read the separator.
514     NextChar();
515     token = GetNextToken();
516     if (token != T_OBJECT_PAIR_SEPARATOR) {
517       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
518       return NULL;
519     }
520 
521     // The next token is the value. Ownership transfers to |dict|.
522     NextChar();
523     Value* value = ParseNextToken();
524     if (!value) {
525       // ReportError from deeper level.
526       return NULL;
527     }
528 
529     dict->SetWithoutPathExpansion(key.AsString(), value);
530 
531     NextChar();
532     token = GetNextToken();
533     if (token == T_LIST_SEPARATOR) {
534       NextChar();
535       token = GetNextToken();
536       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
537         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
538         return NULL;
539       }
540     } else if (token != T_OBJECT_END) {
541       ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
542       return NULL;
543     }
544   }
545 
546   return dict.release();
547 }
548 
ConsumeList()549 Value* JSONParser::ConsumeList() {
550   if (*pos_ != '[') {
551     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
552     return NULL;
553   }
554 
555   StackMarker depth_check(&stack_depth_);
556   if (depth_check.IsTooDeep()) {
557     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
558     return NULL;
559   }
560 
561   scoped_ptr<ListValue> list(new ListValue);
562 
563   NextChar();
564   Token token = GetNextToken();
565   while (token != T_ARRAY_END) {
566     Value* item = ParseToken(token);
567     if (!item) {
568       // ReportError from deeper level.
569       return NULL;
570     }
571 
572     list->Append(item);
573 
574     NextChar();
575     token = GetNextToken();
576     if (token == T_LIST_SEPARATOR) {
577       NextChar();
578       token = GetNextToken();
579       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
580         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
581         return NULL;
582       }
583     } else if (token != T_ARRAY_END) {
584       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
585       return NULL;
586     }
587   }
588 
589   return list.release();
590 }
591 
ConsumeString()592 Value* JSONParser::ConsumeString() {
593   StringBuilder string;
594   if (!ConsumeStringRaw(&string))
595     return NULL;
596 
597   // Create the Value representation, using a hidden root, if configured
598   // to do so, and if the string can be represented by StringPiece.
599   if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
600     return new JSONStringValue(string.AsStringPiece());
601   } else {
602     if (string.CanBeStringPiece())
603       string.Convert();
604     return new StringValue(string.AsString());
605   }
606 }
607 
ConsumeStringRaw(StringBuilder * out)608 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
609   if (*pos_ != '"') {
610     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
611     return false;
612   }
613 
614   // StringBuilder will internally build a StringPiece unless a UTF-16
615   // conversion occurs, at which point it will perform a copy into a
616   // std::string.
617   StringBuilder string(NextChar());
618 
619   int length = end_pos_ - start_pos_;
620   int32_t next_char = 0;
621 
622   while (CanConsume(1)) {
623     pos_ = start_pos_ + index_;  // CBU8_NEXT is postcrement.
624     CBU8_NEXT(start_pos_, index_, length, next_char);
625     if (next_char < 0 || !IsValidCharacter(next_char)) {
626       ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
627       return false;
628     }
629 
630     // If this character is an escape sequence...
631     if (next_char == '\\') {
632       // The input string will be adjusted (either by combining the two
633       // characters of an encoded escape sequence, or with a UTF conversion),
634       // so using StringPiece isn't possible -- force a conversion.
635       string.Convert();
636 
637       if (!CanConsume(1)) {
638         ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
639         return false;
640       }
641 
642       switch (*NextChar()) {
643         // Allowed esape sequences:
644         case 'x': {  // UTF-8 sequence.
645           // UTF-8 \x escape sequences are not allowed in the spec, but they
646           // are supported here for backwards-compatiblity with the old parser.
647           if (!CanConsume(2)) {
648             ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
649             return false;
650           }
651 
652           int hex_digit = 0;
653           if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
654             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
655             return false;
656           }
657           NextChar();
658 
659           if (hex_digit < kExtendedASCIIStart)
660             string.Append(static_cast<char>(hex_digit));
661           else
662             DecodeUTF8(hex_digit, &string);
663           break;
664         }
665         case 'u': {  // UTF-16 sequence.
666           // UTF units are of the form \uXXXX.
667           if (!CanConsume(5)) {  // 5 being 'u' and four HEX digits.
668             ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
669             return false;
670           }
671 
672           // Skip the 'u'.
673           NextChar();
674 
675           std::string utf8_units;
676           if (!DecodeUTF16(&utf8_units)) {
677             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
678             return false;
679           }
680 
681           string.AppendString(utf8_units);
682           break;
683         }
684         case '"':
685           string.Append('"');
686           break;
687         case '\\':
688           string.Append('\\');
689           break;
690         case '/':
691           string.Append('/');
692           break;
693         case 'b':
694           string.Append('\b');
695           break;
696         case 'f':
697           string.Append('\f');
698           break;
699         case 'n':
700           string.Append('\n');
701           break;
702         case 'r':
703           string.Append('\r');
704           break;
705         case 't':
706           string.Append('\t');
707           break;
708         case 'v':  // Not listed as valid escape sequence in the RFC.
709           string.Append('\v');
710           break;
711         // All other escape squences are illegal.
712         default:
713           ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
714           return false;
715       }
716     } else if (next_char == '"') {
717       --index_;  // Rewind by one because of CBU8_NEXT.
718       out->Swap(&string);
719       return true;
720     } else {
721       if (next_char < kExtendedASCIIStart)
722         string.Append(static_cast<char>(next_char));
723       else
724         DecodeUTF8(next_char, &string);
725     }
726   }
727 
728   ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
729   return false;
730 }
731 
732 // Entry is at the first X in \uXXXX.
DecodeUTF16(std::string * dest_string)733 bool JSONParser::DecodeUTF16(std::string* dest_string) {
734   if (!CanConsume(4))
735     return false;
736 
737   // This is a 32-bit field because the shift operations in the
738   // conversion process below cause MSVC to error about "data loss."
739   // This only stores UTF-16 code units, though.
740   // Consume the UTF-16 code unit, which may be a high surrogate.
741   int code_unit16_high = 0;
742   if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
743     return false;
744 
745   // Only add 3, not 4, because at the end of this iteration, the parser has
746   // finished working with the last digit of the UTF sequence, meaning that
747   // the next iteration will advance to the next byte.
748   NextNChars(3);
749 
750   // Used to convert the UTF-16 code units to a code point and then to a UTF-8
751   // code unit sequence.
752   char code_unit8[8] = { 0 };
753   size_t offset = 0;
754 
755   // If this is a high surrogate, consume the next code unit to get the
756   // low surrogate.
757   if (CBU16_IS_SURROGATE(code_unit16_high)) {
758     // Make sure this is the high surrogate. If not, it's an encoding
759     // error.
760     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
761       return false;
762 
763     // Make sure that the token has more characters to consume the
764     // lower surrogate.
765     if (!CanConsume(6))  // 6 being '\' 'u' and four HEX digits.
766       return false;
767     if (*NextChar() != '\\' || *NextChar() != 'u')
768       return false;
769 
770     NextChar();  // Read past 'u'.
771     int code_unit16_low = 0;
772     if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
773       return false;
774 
775     NextNChars(3);
776 
777     if (!CBU16_IS_TRAIL(code_unit16_low)) {
778       return false;
779     }
780 
781     uint32_t code_point =
782         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
783     if (!IsValidCharacter(code_point))
784       return false;
785 
786     offset = 0;
787     CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
788   } else {
789     // Not a surrogate.
790     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
791     if (!IsValidCharacter(code_unit16_high))
792       return false;
793 
794     CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
795   }
796 
797   dest_string->append(code_unit8);
798   return true;
799 }
800 
DecodeUTF8(const int32_t & point,StringBuilder * dest)801 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) {
802   DCHECK(IsValidCharacter(point));
803 
804   // Anything outside of the basic ASCII plane will need to be decoded from
805   // int32_t to a multi-byte sequence.
806   if (point < kExtendedASCIIStart) {
807     dest->Append(static_cast<char>(point));
808   } else {
809     char utf8_units[4] = { 0 };
810     int offset = 0;
811     CBU8_APPEND_UNSAFE(utf8_units, offset, point);
812     dest->Convert();
813     // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
814     // zero terminated at this point.  |offset| contains the correct length.
815     dest->AppendString(std::string(utf8_units, offset));
816   }
817 }
818 
ConsumeNumber()819 Value* JSONParser::ConsumeNumber() {
820   const char* num_start = pos_;
821   const int start_index = index_;
822   int end_index = start_index;
823 
824   if (*pos_ == '-')
825     NextChar();
826 
827   if (!ReadInt(false)) {
828     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
829     return NULL;
830   }
831   end_index = index_;
832 
833   // The optional fraction part.
834   if (*pos_ == '.') {
835     if (!CanConsume(1)) {
836       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
837       return NULL;
838     }
839     NextChar();
840     if (!ReadInt(true)) {
841       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
842       return NULL;
843     }
844     end_index = index_;
845   }
846 
847   // Optional exponent part.
848   if (*pos_ == 'e' || *pos_ == 'E') {
849     NextChar();
850     if (*pos_ == '-' || *pos_ == '+')
851       NextChar();
852     if (!ReadInt(true)) {
853       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
854       return NULL;
855     }
856     end_index = index_;
857   }
858 
859   // ReadInt is greedy because numbers have no easily detectable sentinel,
860   // so save off where the parser should be on exit (see Consume invariant at
861   // the top of the header), then make sure the next token is one which is
862   // valid.
863   const char* exit_pos = pos_ - 1;
864   int exit_index = index_ - 1;
865 
866   switch (GetNextToken()) {
867     case T_OBJECT_END:
868     case T_ARRAY_END:
869     case T_LIST_SEPARATOR:
870     case T_END_OF_INPUT:
871       break;
872     default:
873       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
874       return NULL;
875   }
876 
877   pos_ = exit_pos;
878   index_ = exit_index;
879 
880   StringPiece num_string(num_start, end_index - start_index);
881 
882   int num_int;
883   if (StringToInt(num_string, &num_int))
884     return new FundamentalValue(num_int);
885 
886   double num_double;
887   if (StringToDouble(num_string.as_string(), &num_double) &&
888       std::isfinite(num_double)) {
889     return new FundamentalValue(num_double);
890   }
891 
892   return NULL;
893 }
894 
ReadInt(bool allow_leading_zeros)895 bool JSONParser::ReadInt(bool allow_leading_zeros) {
896   char first = *pos_;
897   int len = 0;
898 
899   char c = first;
900   while (CanConsume(1) && std::isdigit(c)) {
901     c = *NextChar();
902     ++len;
903   }
904 
905   if (len == 0)
906     return false;
907 
908   if (!allow_leading_zeros && len > 1 && first == '0')
909     return false;
910 
911   return true;
912 }
913 
ConsumeLiteral()914 Value* JSONParser::ConsumeLiteral() {
915   switch (*pos_) {
916     case 't': {
917       const char kTrueLiteral[] = "true";
918       const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
919       if (!CanConsume(kTrueLen - 1) ||
920           !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
921         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
922         return NULL;
923       }
924       NextNChars(kTrueLen - 1);
925       return new FundamentalValue(true);
926     }
927     case 'f': {
928       const char kFalseLiteral[] = "false";
929       const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
930       if (!CanConsume(kFalseLen - 1) ||
931           !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
932         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
933         return NULL;
934       }
935       NextNChars(kFalseLen - 1);
936       return new FundamentalValue(false);
937     }
938     case 'n': {
939       const char kNullLiteral[] = "null";
940       const int kNullLen = static_cast<int>(strlen(kNullLiteral));
941       if (!CanConsume(kNullLen - 1) ||
942           !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
943         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
944         return NULL;
945       }
946       NextNChars(kNullLen - 1);
947       return Value::CreateNullValue().release();
948     }
949     default:
950       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
951       return NULL;
952   }
953 }
954 
955 // static
StringsAreEqual(const char * one,const char * two,size_t len)956 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
957   return strncmp(one, two, len) == 0;
958 }
959 
ReportError(JSONReader::JsonParseError code,int column_adjust)960 void JSONParser::ReportError(JSONReader::JsonParseError code,
961                              int column_adjust) {
962   error_code_ = code;
963   error_line_ = line_number_;
964   error_column_ = index_ - index_last_line_ + column_adjust;
965 }
966 
967 // static
FormatErrorMessage(int line,int column,const std::string & description)968 std::string JSONParser::FormatErrorMessage(int line, int column,
969                                            const std::string& description) {
970   if (line || column) {
971     return StringPrintf("Line: %i, column: %i, %s",
972         line, column, description.c_str());
973   }
974   return description;
975 }
976 
977 }  // namespace internal
978 }  // namespace base
979