1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/json/json_parser.h"
6 
7 #include <cmath>
8 
9 #include "base/logging.h"
10 #include "base/macros.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/strings/utf_string_conversion_utils.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "base/third_party/icu/icu_utf.h"
19 #include "base/values.h"
20 
21 namespace base {
22 namespace internal {
23 
24 namespace {
25 
26 const int kStackMaxDepth = 100;
27 
28 const int32_t kExtendedASCIIStart = 0x80;
29 
30 // This and the class below are used to own the JSON input string for when
31 // string tokens are stored as StringPiece instead of std::string. This
32 // optimization avoids about 2/3rds of string memory copies. The constructor
33 // takes ownership of the input string. The real root value is Swap()ed into
34 // the new instance.
35 class DictionaryHiddenRootValue : public DictionaryValue {
36  public:
DictionaryHiddenRootValue(std::string * json,Value * root)37   DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
38     DCHECK(root->IsType(Value::TYPE_DICTIONARY));
39     DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
40   }
41 
Swap(DictionaryValue * other)42   void Swap(DictionaryValue* other) override {
43     DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
44 
45     // First deep copy to convert JSONStringValue to std::string and swap that
46     // copy with |other|, which contains the new contents of |this|.
47     scoped_ptr<DictionaryValue> copy(DeepCopy());
48     copy->Swap(other);
49 
50     // Then erase the contents of the current dictionary and swap in the
51     // new contents, originally from |other|.
52     Clear();
53     json_.reset();
54     DictionaryValue::Swap(copy.get());
55   }
56 
57   // Not overriding DictionaryValue::Remove because it just calls through to
58   // the method below.
59 
RemoveWithoutPathExpansion(const std::string & key,scoped_ptr<Value> * out)60   bool RemoveWithoutPathExpansion(const std::string& key,
61                                   scoped_ptr<Value>* out) override {
62     // If the caller won't take ownership of the removed value, just call up.
63     if (!out)
64       return DictionaryValue::RemoveWithoutPathExpansion(key, out);
65 
66     DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
67 
68     // Otherwise, remove the value while its still "owned" by this and copy it
69     // to convert any JSONStringValues to std::string.
70     scoped_ptr<Value> out_owned;
71     if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
72       return false;
73 
74     out->reset(out_owned->DeepCopy());
75 
76     return true;
77   }
78 
79  private:
80   scoped_ptr<std::string> json_;
81 
82   DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
83 };
84 
85 class ListHiddenRootValue : public ListValue {
86  public:
ListHiddenRootValue(std::string * json,Value * root)87   ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
88     DCHECK(root->IsType(Value::TYPE_LIST));
89     ListValue::Swap(static_cast<ListValue*>(root));
90   }
91 
Swap(ListValue * other)92   void Swap(ListValue* other) override {
93     DVLOG(1) << "Swap()ing a ListValue inefficiently.";
94 
95     // First deep copy to convert JSONStringValue to std::string and swap that
96     // copy with |other|, which contains the new contents of |this|.
97     scoped_ptr<ListValue> copy(DeepCopy());
98     copy->Swap(other);
99 
100     // Then erase the contents of the current list and swap in the new contents,
101     // originally from |other|.
102     Clear();
103     json_.reset();
104     ListValue::Swap(copy.get());
105   }
106 
Remove(size_t index,scoped_ptr<Value> * out)107   bool Remove(size_t index, scoped_ptr<Value>* out) override {
108     // If the caller won't take ownership of the removed value, just call up.
109     if (!out)
110       return ListValue::Remove(index, out);
111 
112     DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
113 
114     // Otherwise, remove the value while its still "owned" by this and copy it
115     // to convert any JSONStringValues to std::string.
116     scoped_ptr<Value> out_owned;
117     if (!ListValue::Remove(index, &out_owned))
118       return false;
119 
120     out->reset(out_owned->DeepCopy());
121 
122     return true;
123   }
124 
125  private:
126   scoped_ptr<std::string> json_;
127 
128   DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
129 };
130 
131 // A variant on StringValue that uses StringPiece instead of copying the string
132 // into the Value. This can only be stored in a child of hidden root (above),
133 // otherwise the referenced string will not be guaranteed to outlive it.
134 class JSONStringValue : public Value {
135  public:
JSONStringValue(const StringPiece & piece)136   explicit JSONStringValue(const StringPiece& piece)
137       : Value(TYPE_STRING),
138         string_piece_(piece) {
139   }
140 
141   // Overridden from Value:
GetAsString(std::string * out_value) const142   bool GetAsString(std::string* out_value) const override {
143     string_piece_.CopyToString(out_value);
144     return true;
145   }
GetAsString(string16 * out_value) const146   bool GetAsString(string16* out_value) const override {
147     *out_value = UTF8ToUTF16(string_piece_);
148     return true;
149   }
DeepCopy() const150   Value* DeepCopy() const override {
151     return new StringValue(string_piece_.as_string());
152   }
Equals(const Value * other) const153   bool Equals(const Value* other) const override {
154     std::string other_string;
155     return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
156         StringPiece(other_string) == string_piece_;
157   }
158 
159  private:
160   // The location in the original input stream.
161   StringPiece string_piece_;
162 
163   DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
164 };
165 
166 // Simple class that checks for maximum recursion/"stack overflow."
167 class StackMarker {
168  public:
StackMarker(int * depth)169   explicit StackMarker(int* depth) : depth_(depth) {
170     ++(*depth_);
171     DCHECK_LE(*depth_, kStackMaxDepth);
172   }
~StackMarker()173   ~StackMarker() {
174     --(*depth_);
175   }
176 
IsTooDeep() const177   bool IsTooDeep() const {
178     return *depth_ >= kStackMaxDepth;
179   }
180 
181  private:
182   int* const depth_;
183 
184   DISALLOW_COPY_AND_ASSIGN(StackMarker);
185 };
186 
187 }  // namespace
188 
JSONParser(int options)189 JSONParser::JSONParser(int options)
190     : options_(options),
191       start_pos_(NULL),
192       pos_(NULL),
193       end_pos_(NULL),
194       index_(0),
195       stack_depth_(0),
196       line_number_(0),
197       index_last_line_(0),
198       error_code_(JSONReader::JSON_NO_ERROR),
199       error_line_(0),
200       error_column_(0) {
201 }
202 
~JSONParser()203 JSONParser::~JSONParser() {
204 }
205 
Parse(const StringPiece & input)206 Value* JSONParser::Parse(const StringPiece& input) {
207   scoped_ptr<std::string> input_copy;
208   // If the children of a JSON root can be detached, then hidden roots cannot
209   // be used, so do not bother copying the input because StringPiece will not
210   // be used anywhere.
211   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
212     input_copy.reset(new std::string(input.as_string()));
213     start_pos_ = input_copy->data();
214   } else {
215     start_pos_ = input.data();
216   }
217   pos_ = start_pos_;
218   end_pos_ = start_pos_ + input.length();
219   index_ = 0;
220   line_number_ = 1;
221   index_last_line_ = 0;
222 
223   error_code_ = JSONReader::JSON_NO_ERROR;
224   error_line_ = 0;
225   error_column_ = 0;
226 
227   // When the input JSON string starts with a UTF-8 Byte-Order-Mark
228   // <0xEF 0xBB 0xBF>, advance the start position to avoid the
229   // ParseNextToken function mis-treating a Unicode BOM as an invalid
230   // character and returning NULL.
231   if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF &&
232       static_cast<uint8_t>(*(pos_ + 1)) == 0xBB &&
233       static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) {
234     NextNChars(3);
235   }
236 
237   // Parse the first and any nested tokens.
238   scoped_ptr<Value> root(ParseNextToken());
239   if (!root.get())
240     return NULL;
241 
242   // Make sure the input stream is at an end.
243   if (GetNextToken() != T_END_OF_INPUT) {
244     if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
245       ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
246       return NULL;
247     }
248   }
249 
250   // Dictionaries and lists can contain JSONStringValues, so wrap them in a
251   // hidden root.
252   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
253     if (root->IsType(Value::TYPE_DICTIONARY)) {
254       return new DictionaryHiddenRootValue(input_copy.release(), root.get());
255     } else if (root->IsType(Value::TYPE_LIST)) {
256       return new ListHiddenRootValue(input_copy.release(), root.get());
257     } else if (root->IsType(Value::TYPE_STRING)) {
258       // A string type could be a JSONStringValue, but because there's no
259       // corresponding HiddenRootValue, the memory will be lost. Deep copy to
260       // preserve it.
261       return root->DeepCopy();
262     }
263   }
264 
265   // All other values can be returned directly.
266   return root.release();
267 }
268 
error_code() const269 JSONReader::JsonParseError JSONParser::error_code() const {
270   return error_code_;
271 }
272 
GetErrorMessage() const273 std::string JSONParser::GetErrorMessage() const {
274   return FormatErrorMessage(error_line_, error_column_,
275       JSONReader::ErrorCodeToString(error_code_));
276 }
277 
error_line() const278 int JSONParser::error_line() const {
279   return error_line_;
280 }
281 
error_column() const282 int JSONParser::error_column() const {
283   return error_column_;
284 }
285 
286 // StringBuilder ///////////////////////////////////////////////////////////////
287 
StringBuilder()288 JSONParser::StringBuilder::StringBuilder()
289     : pos_(NULL),
290       length_(0),
291       string_(NULL) {
292 }
293 
StringBuilder(const char * pos)294 JSONParser::StringBuilder::StringBuilder(const char* pos)
295     : pos_(pos),
296       length_(0),
297       string_(NULL) {
298 }
299 
Swap(StringBuilder * other)300 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
301   std::swap(other->string_, string_);
302   std::swap(other->pos_, pos_);
303   std::swap(other->length_, length_);
304 }
305 
~StringBuilder()306 JSONParser::StringBuilder::~StringBuilder() {
307   delete string_;
308 }
309 
Append(const char & c)310 void JSONParser::StringBuilder::Append(const char& c) {
311   DCHECK_GE(c, 0);
312   DCHECK_LT(c, 128);
313 
314   if (string_)
315     string_->push_back(c);
316   else
317     ++length_;
318 }
319 
AppendString(const std::string & str)320 void JSONParser::StringBuilder::AppendString(const std::string& str) {
321   DCHECK(string_);
322   string_->append(str);
323 }
324 
Convert()325 void JSONParser::StringBuilder::Convert() {
326   if (string_)
327     return;
328   string_  = new std::string(pos_, length_);
329 }
330 
CanBeStringPiece() const331 bool JSONParser::StringBuilder::CanBeStringPiece() const {
332   return !string_;
333 }
334 
AsStringPiece()335 StringPiece JSONParser::StringBuilder::AsStringPiece() {
336   if (string_)
337     return StringPiece();
338   return StringPiece(pos_, length_);
339 }
340 
AsString()341 const std::string& JSONParser::StringBuilder::AsString() {
342   if (!string_)
343     Convert();
344   return *string_;
345 }
346 
347 // JSONParser private //////////////////////////////////////////////////////////
348 
CanConsume(int length)349 inline bool JSONParser::CanConsume(int length) {
350   return pos_ + length <= end_pos_;
351 }
352 
NextChar()353 const char* JSONParser::NextChar() {
354   DCHECK(CanConsume(1));
355   ++index_;
356   ++pos_;
357   return pos_;
358 }
359 
NextNChars(int n)360 void JSONParser::NextNChars(int n) {
361   DCHECK(CanConsume(n));
362   index_ += n;
363   pos_ += n;
364 }
365 
GetNextToken()366 JSONParser::Token JSONParser::GetNextToken() {
367   EatWhitespaceAndComments();
368   if (!CanConsume(1))
369     return T_END_OF_INPUT;
370 
371   switch (*pos_) {
372     case '{':
373       return T_OBJECT_BEGIN;
374     case '}':
375       return T_OBJECT_END;
376     case '[':
377       return T_ARRAY_BEGIN;
378     case ']':
379       return T_ARRAY_END;
380     case '"':
381       return T_STRING;
382     case '0':
383     case '1':
384     case '2':
385     case '3':
386     case '4':
387     case '5':
388     case '6':
389     case '7':
390     case '8':
391     case '9':
392     case '-':
393       return T_NUMBER;
394     case 't':
395       return T_BOOL_TRUE;
396     case 'f':
397       return T_BOOL_FALSE;
398     case 'n':
399       return T_NULL;
400     case ',':
401       return T_LIST_SEPARATOR;
402     case ':':
403       return T_OBJECT_PAIR_SEPARATOR;
404     default:
405       return T_INVALID_TOKEN;
406   }
407 }
408 
EatWhitespaceAndComments()409 void JSONParser::EatWhitespaceAndComments() {
410   while (pos_ < end_pos_) {
411     switch (*pos_) {
412       case '\r':
413       case '\n':
414         index_last_line_ = index_;
415         // Don't increment line_number_ twice for "\r\n".
416         if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
417           ++line_number_;
418         // Fall through.
419       case ' ':
420       case '\t':
421         NextChar();
422         break;
423       case '/':
424         if (!EatComment())
425           return;
426         break;
427       default:
428         return;
429     }
430   }
431 }
432 
EatComment()433 bool JSONParser::EatComment() {
434   if (*pos_ != '/' || !CanConsume(1))
435     return false;
436 
437   char next_char = *NextChar();
438   if (next_char == '/') {
439     // Single line comment, read to newline.
440     while (CanConsume(1)) {
441       next_char = *NextChar();
442       if (next_char == '\n' || next_char == '\r')
443         return true;
444     }
445   } else if (next_char == '*') {
446     char previous_char = '\0';
447     // Block comment, read until end marker.
448     while (CanConsume(1)) {
449       next_char = *NextChar();
450       if (previous_char == '*' && next_char == '/') {
451         // EatWhitespaceAndComments will inspect pos_, which will still be on
452         // the last / of the comment, so advance once more (which may also be
453         // end of input).
454         NextChar();
455         return true;
456       }
457       previous_char = next_char;
458     }
459 
460     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
461   }
462 
463   return false;
464 }
465 
ParseNextToken()466 Value* JSONParser::ParseNextToken() {
467   return ParseToken(GetNextToken());
468 }
469 
ParseToken(Token token)470 Value* JSONParser::ParseToken(Token token) {
471   switch (token) {
472     case T_OBJECT_BEGIN:
473       return ConsumeDictionary();
474     case T_ARRAY_BEGIN:
475       return ConsumeList();
476     case T_STRING:
477       return ConsumeString();
478     case T_NUMBER:
479       return ConsumeNumber();
480     case T_BOOL_TRUE:
481     case T_BOOL_FALSE:
482     case T_NULL:
483       return ConsumeLiteral();
484     default:
485       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
486       return NULL;
487   }
488 }
489 
ConsumeDictionary()490 Value* JSONParser::ConsumeDictionary() {
491   if (*pos_ != '{') {
492     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
493     return NULL;
494   }
495 
496   StackMarker depth_check(&stack_depth_);
497   if (depth_check.IsTooDeep()) {
498     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
499     return NULL;
500   }
501 
502   scoped_ptr<DictionaryValue> dict(new DictionaryValue);
503 
504   NextChar();
505   Token token = GetNextToken();
506   while (token != T_OBJECT_END) {
507     if (token != T_STRING) {
508       ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
509       return NULL;
510     }
511 
512     // First consume the key.
513     StringBuilder key;
514     if (!ConsumeStringRaw(&key)) {
515       return NULL;
516     }
517 
518     // Read the separator.
519     NextChar();
520     token = GetNextToken();
521     if (token != T_OBJECT_PAIR_SEPARATOR) {
522       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
523       return NULL;
524     }
525 
526     // The next token is the value. Ownership transfers to |dict|.
527     NextChar();
528     Value* value = ParseNextToken();
529     if (!value) {
530       // ReportError from deeper level.
531       return NULL;
532     }
533 
534     dict->SetWithoutPathExpansion(key.AsString(), value);
535 
536     NextChar();
537     token = GetNextToken();
538     if (token == T_LIST_SEPARATOR) {
539       NextChar();
540       token = GetNextToken();
541       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
542         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
543         return NULL;
544       }
545     } else if (token != T_OBJECT_END) {
546       ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
547       return NULL;
548     }
549   }
550 
551   return dict.release();
552 }
553 
ConsumeList()554 Value* JSONParser::ConsumeList() {
555   if (*pos_ != '[') {
556     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
557     return NULL;
558   }
559 
560   StackMarker depth_check(&stack_depth_);
561   if (depth_check.IsTooDeep()) {
562     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
563     return NULL;
564   }
565 
566   scoped_ptr<ListValue> list(new ListValue);
567 
568   NextChar();
569   Token token = GetNextToken();
570   while (token != T_ARRAY_END) {
571     Value* item = ParseToken(token);
572     if (!item) {
573       // ReportError from deeper level.
574       return NULL;
575     }
576 
577     list->Append(item);
578 
579     NextChar();
580     token = GetNextToken();
581     if (token == T_LIST_SEPARATOR) {
582       NextChar();
583       token = GetNextToken();
584       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
585         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
586         return NULL;
587       }
588     } else if (token != T_ARRAY_END) {
589       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
590       return NULL;
591     }
592   }
593 
594   return list.release();
595 }
596 
ConsumeString()597 Value* JSONParser::ConsumeString() {
598   StringBuilder string;
599   if (!ConsumeStringRaw(&string))
600     return NULL;
601 
602   // Create the Value representation, using a hidden root, if configured
603   // to do so, and if the string can be represented by StringPiece.
604   if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
605     return new JSONStringValue(string.AsStringPiece());
606   } else {
607     if (string.CanBeStringPiece())
608       string.Convert();
609     return new StringValue(string.AsString());
610   }
611 }
612 
ConsumeStringRaw(StringBuilder * out)613 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
614   if (*pos_ != '"') {
615     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
616     return false;
617   }
618 
619   // StringBuilder will internally build a StringPiece unless a UTF-16
620   // conversion occurs, at which point it will perform a copy into a
621   // std::string.
622   StringBuilder string(NextChar());
623 
624   int length = end_pos_ - start_pos_;
625   int32_t next_char = 0;
626 
627   while (CanConsume(1)) {
628     pos_ = start_pos_ + index_;  // CBU8_NEXT is postcrement.
629     CBU8_NEXT(start_pos_, index_, length, next_char);
630     if (next_char < 0 || !IsValidCharacter(next_char)) {
631       ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
632       return false;
633     }
634 
635     // If this character is an escape sequence...
636     if (next_char == '\\') {
637       // The input string will be adjusted (either by combining the two
638       // characters of an encoded escape sequence, or with a UTF conversion),
639       // so using StringPiece isn't possible -- force a conversion.
640       string.Convert();
641 
642       if (!CanConsume(1)) {
643         ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
644         return false;
645       }
646 
647       switch (*NextChar()) {
648         // Allowed esape sequences:
649         case 'x': {  // UTF-8 sequence.
650           // UTF-8 \x escape sequences are not allowed in the spec, but they
651           // are supported here for backwards-compatiblity with the old parser.
652           if (!CanConsume(2)) {
653             ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
654             return false;
655           }
656 
657           int hex_digit = 0;
658           if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
659             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
660             return false;
661           }
662           NextChar();
663 
664           if (hex_digit < kExtendedASCIIStart)
665             string.Append(static_cast<char>(hex_digit));
666           else
667             DecodeUTF8(hex_digit, &string);
668           break;
669         }
670         case 'u': {  // UTF-16 sequence.
671           // UTF units are of the form \uXXXX.
672           if (!CanConsume(5)) {  // 5 being 'u' and four HEX digits.
673             ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
674             return false;
675           }
676 
677           // Skip the 'u'.
678           NextChar();
679 
680           std::string utf8_units;
681           if (!DecodeUTF16(&utf8_units)) {
682             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
683             return false;
684           }
685 
686           string.AppendString(utf8_units);
687           break;
688         }
689         case '"':
690           string.Append('"');
691           break;
692         case '\\':
693           string.Append('\\');
694           break;
695         case '/':
696           string.Append('/');
697           break;
698         case 'b':
699           string.Append('\b');
700           break;
701         case 'f':
702           string.Append('\f');
703           break;
704         case 'n':
705           string.Append('\n');
706           break;
707         case 'r':
708           string.Append('\r');
709           break;
710         case 't':
711           string.Append('\t');
712           break;
713         case 'v':  // Not listed as valid escape sequence in the RFC.
714           string.Append('\v');
715           break;
716         // All other escape squences are illegal.
717         default:
718           ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
719           return false;
720       }
721     } else if (next_char == '"') {
722       --index_;  // Rewind by one because of CBU8_NEXT.
723       out->Swap(&string);
724       return true;
725     } else {
726       if (next_char < kExtendedASCIIStart)
727         string.Append(static_cast<char>(next_char));
728       else
729         DecodeUTF8(next_char, &string);
730     }
731   }
732 
733   ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
734   return false;
735 }
736 
737 // Entry is at the first X in \uXXXX.
DecodeUTF16(std::string * dest_string)738 bool JSONParser::DecodeUTF16(std::string* dest_string) {
739   if (!CanConsume(4))
740     return false;
741 
742   // This is a 32-bit field because the shift operations in the
743   // conversion process below cause MSVC to error about "data loss."
744   // This only stores UTF-16 code units, though.
745   // Consume the UTF-16 code unit, which may be a high surrogate.
746   int code_unit16_high = 0;
747   if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
748     return false;
749 
750   // Only add 3, not 4, because at the end of this iteration, the parser has
751   // finished working with the last digit of the UTF sequence, meaning that
752   // the next iteration will advance to the next byte.
753   NextNChars(3);
754 
755   // Used to convert the UTF-16 code units to a code point and then to a UTF-8
756   // code unit sequence.
757   char code_unit8[8] = { 0 };
758   size_t offset = 0;
759 
760   // If this is a high surrogate, consume the next code unit to get the
761   // low surrogate.
762   if (CBU16_IS_SURROGATE(code_unit16_high)) {
763     // Make sure this is the high surrogate. If not, it's an encoding
764     // error.
765     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
766       return false;
767 
768     // Make sure that the token has more characters to consume the
769     // lower surrogate.
770     if (!CanConsume(6))  // 6 being '\' 'u' and four HEX digits.
771       return false;
772     if (*NextChar() != '\\' || *NextChar() != 'u')
773       return false;
774 
775     NextChar();  // Read past 'u'.
776     int code_unit16_low = 0;
777     if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
778       return false;
779 
780     NextNChars(3);
781 
782     if (!CBU16_IS_TRAIL(code_unit16_low)) {
783       return false;
784     }
785 
786     uint32_t code_point =
787         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
788     if (!IsValidCharacter(code_point))
789       return false;
790 
791     offset = 0;
792     CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
793   } else {
794     // Not a surrogate.
795     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
796     if (!IsValidCharacter(code_unit16_high))
797       return false;
798 
799     CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
800   }
801 
802   dest_string->append(code_unit8);
803   return true;
804 }
805 
DecodeUTF8(const int32_t & point,StringBuilder * dest)806 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) {
807   DCHECK(IsValidCharacter(point));
808 
809   // Anything outside of the basic ASCII plane will need to be decoded from
810   // int32_t to a multi-byte sequence.
811   if (point < kExtendedASCIIStart) {
812     dest->Append(static_cast<char>(point));
813   } else {
814     char utf8_units[4] = { 0 };
815     int offset = 0;
816     CBU8_APPEND_UNSAFE(utf8_units, offset, point);
817     dest->Convert();
818     // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
819     // zero terminated at this point.  |offset| contains the correct length.
820     dest->AppendString(std::string(utf8_units, offset));
821   }
822 }
823 
ConsumeNumber()824 Value* JSONParser::ConsumeNumber() {
825   const char* num_start = pos_;
826   const int start_index = index_;
827   int end_index = start_index;
828 
829   if (*pos_ == '-')
830     NextChar();
831 
832   if (!ReadInt(false)) {
833     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
834     return NULL;
835   }
836   end_index = index_;
837 
838   // The optional fraction part.
839   if (*pos_ == '.') {
840     if (!CanConsume(1)) {
841       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
842       return NULL;
843     }
844     NextChar();
845     if (!ReadInt(true)) {
846       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
847       return NULL;
848     }
849     end_index = index_;
850   }
851 
852   // Optional exponent part.
853   if (*pos_ == 'e' || *pos_ == 'E') {
854     NextChar();
855     if (*pos_ == '-' || *pos_ == '+')
856       NextChar();
857     if (!ReadInt(true)) {
858       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
859       return NULL;
860     }
861     end_index = index_;
862   }
863 
864   // ReadInt is greedy because numbers have no easily detectable sentinel,
865   // so save off where the parser should be on exit (see Consume invariant at
866   // the top of the header), then make sure the next token is one which is
867   // valid.
868   const char* exit_pos = pos_ - 1;
869   int exit_index = index_ - 1;
870 
871   switch (GetNextToken()) {
872     case T_OBJECT_END:
873     case T_ARRAY_END:
874     case T_LIST_SEPARATOR:
875     case T_END_OF_INPUT:
876       break;
877     default:
878       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
879       return NULL;
880   }
881 
882   pos_ = exit_pos;
883   index_ = exit_index;
884 
885   StringPiece num_string(num_start, end_index - start_index);
886 
887   int num_int;
888   if (StringToInt(num_string, &num_int))
889     return new FundamentalValue(num_int);
890 
891   double num_double;
892   if (StringToDouble(num_string.as_string(), &num_double) &&
893       std::isfinite(num_double)) {
894     return new FundamentalValue(num_double);
895   }
896 
897   return NULL;
898 }
899 
ReadInt(bool allow_leading_zeros)900 bool JSONParser::ReadInt(bool allow_leading_zeros) {
901   char first = *pos_;
902   int len = 0;
903 
904   char c = first;
905   while (CanConsume(1) && IsAsciiDigit(c)) {
906     c = *NextChar();
907     ++len;
908   }
909 
910   if (len == 0)
911     return false;
912 
913   if (!allow_leading_zeros && len > 1 && first == '0')
914     return false;
915 
916   return true;
917 }
918 
ConsumeLiteral()919 Value* JSONParser::ConsumeLiteral() {
920   switch (*pos_) {
921     case 't': {
922       const char kTrueLiteral[] = "true";
923       const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
924       if (!CanConsume(kTrueLen - 1) ||
925           !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
926         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
927         return NULL;
928       }
929       NextNChars(kTrueLen - 1);
930       return new FundamentalValue(true);
931     }
932     case 'f': {
933       const char kFalseLiteral[] = "false";
934       const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
935       if (!CanConsume(kFalseLen - 1) ||
936           !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
937         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
938         return NULL;
939       }
940       NextNChars(kFalseLen - 1);
941       return new FundamentalValue(false);
942     }
943     case 'n': {
944       const char kNullLiteral[] = "null";
945       const int kNullLen = static_cast<int>(strlen(kNullLiteral));
946       if (!CanConsume(kNullLen - 1) ||
947           !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
948         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
949         return NULL;
950       }
951       NextNChars(kNullLen - 1);
952       return Value::CreateNullValue().release();
953     }
954     default:
955       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
956       return NULL;
957   }
958 }
959 
960 // static
StringsAreEqual(const char * one,const char * two,size_t len)961 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
962   return strncmp(one, two, len) == 0;
963 }
964 
ReportError(JSONReader::JsonParseError code,int column_adjust)965 void JSONParser::ReportError(JSONReader::JsonParseError code,
966                              int column_adjust) {
967   error_code_ = code;
968   error_line_ = line_number_;
969   error_column_ = index_ - index_last_line_ + column_adjust;
970 }
971 
972 // static
FormatErrorMessage(int line,int column,const std::string & description)973 std::string JSONParser::FormatErrorMessage(int line, int column,
974                                            const std::string& description) {
975   if (line || column) {
976     return StringPrintf("Line: %i, column: %i, %s",
977         line, column, description.c_str());
978   }
979   return description;
980 }
981 
982 }  // namespace internal
983 }  // namespace base
984