1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/json/json_parser.h"
6 
7 #include <cmath>
8 #include <utility>
9 
10 #include "base/logging.h"
11 #include "base/macros.h"
12 #include "base/memory/ptr_util.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_piece.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/stringprintf.h"
17 #include "base/strings/utf_string_conversion_utils.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/third_party/icu/icu_utf.h"
20 #include "base/values.h"
21 
22 namespace base {
23 namespace internal {
24 
25 namespace {
26 
27 const int kStackMaxDepth = 100;
28 
29 const int32_t kExtendedASCIIStart = 0x80;
30 
31 // DictionaryHiddenRootValue and ListHiddenRootValue are used in conjunction
32 // with JSONStringValue as an optimization for reducing the number of string
33 // copies. When this optimization is active, the parser uses a hidden root to
34 // keep the original JSON input string live and creates JSONStringValue children
35 // holding StringPiece references to the input string, avoiding about 2/3rds of
36 // string memory copies. The real root value is Swap()ed into the new instance.
37 class DictionaryHiddenRootValue : public DictionaryValue {
38  public:
DictionaryHiddenRootValue(std::unique_ptr<std::string> json,std::unique_ptr<Value> root)39   DictionaryHiddenRootValue(std::unique_ptr<std::string> json,
40                             std::unique_ptr<Value> root)
41       : json_(std::move(json)) {
42     DCHECK(root->IsType(Value::TYPE_DICTIONARY));
43     DictionaryValue::Swap(static_cast<DictionaryValue*>(root.get()));
44   }
45 
Swap(DictionaryValue * other)46   void Swap(DictionaryValue* other) override {
47     DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
48 
49     // First deep copy to convert JSONStringValue to std::string and swap that
50     // copy with |other|, which contains the new contents of |this|.
51     std::unique_ptr<DictionaryValue> copy(CreateDeepCopy());
52     copy->Swap(other);
53 
54     // Then erase the contents of the current dictionary and swap in the
55     // new contents, originally from |other|.
56     Clear();
57     json_.reset();
58     DictionaryValue::Swap(copy.get());
59   }
60 
61   // Not overriding DictionaryValue::Remove because it just calls through to
62   // the method below.
63 
RemoveWithoutPathExpansion(const std::string & key,std::unique_ptr<Value> * out)64   bool RemoveWithoutPathExpansion(const std::string& key,
65                                   std::unique_ptr<Value>* out) override {
66     // If the caller won't take ownership of the removed value, just call up.
67     if (!out)
68       return DictionaryValue::RemoveWithoutPathExpansion(key, out);
69 
70     DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
71 
72     // Otherwise, remove the value while its still "owned" by this and copy it
73     // to convert any JSONStringValues to std::string.
74     std::unique_ptr<Value> out_owned;
75     if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
76       return false;
77 
78     *out = out_owned->CreateDeepCopy();
79 
80     return true;
81   }
82 
83  private:
84   std::unique_ptr<std::string> json_;
85 
86   DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
87 };
88 
89 class ListHiddenRootValue : public ListValue {
90  public:
ListHiddenRootValue(std::unique_ptr<std::string> json,std::unique_ptr<Value> root)91   ListHiddenRootValue(std::unique_ptr<std::string> json,
92                       std::unique_ptr<Value> root)
93       : json_(std::move(json)) {
94     DCHECK(root->IsType(Value::TYPE_LIST));
95     ListValue::Swap(static_cast<ListValue*>(root.get()));
96   }
97 
Swap(ListValue * other)98   void Swap(ListValue* other) override {
99     DVLOG(1) << "Swap()ing a ListValue inefficiently.";
100 
101     // First deep copy to convert JSONStringValue to std::string and swap that
102     // copy with |other|, which contains the new contents of |this|.
103     std::unique_ptr<ListValue> copy(CreateDeepCopy());
104     copy->Swap(other);
105 
106     // Then erase the contents of the current list and swap in the new contents,
107     // originally from |other|.
108     Clear();
109     json_.reset();
110     ListValue::Swap(copy.get());
111   }
112 
Remove(size_t index,std::unique_ptr<Value> * out)113   bool Remove(size_t index, std::unique_ptr<Value>* out) override {
114     // If the caller won't take ownership of the removed value, just call up.
115     if (!out)
116       return ListValue::Remove(index, out);
117 
118     DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
119 
120     // Otherwise, remove the value while its still "owned" by this and copy it
121     // to convert any JSONStringValues to std::string.
122     std::unique_ptr<Value> out_owned;
123     if (!ListValue::Remove(index, &out_owned))
124       return false;
125 
126     *out = out_owned->CreateDeepCopy();
127 
128     return true;
129   }
130 
131  private:
132   std::unique_ptr<std::string> json_;
133 
134   DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
135 };
136 
137 // A variant on StringValue that uses StringPiece instead of copying the string
138 // into the Value. This can only be stored in a child of hidden root (above),
139 // otherwise the referenced string will not be guaranteed to outlive it.
140 class JSONStringValue : public Value {
141  public:
JSONStringValue(StringPiece piece)142   explicit JSONStringValue(StringPiece piece)
143       : Value(TYPE_STRING), string_piece_(piece) {}
144 
145   // Overridden from Value:
GetAsString(std::string * out_value) const146   bool GetAsString(std::string* out_value) const override {
147     string_piece_.CopyToString(out_value);
148     return true;
149   }
GetAsString(string16 * out_value) const150   bool GetAsString(string16* out_value) const override {
151     *out_value = UTF8ToUTF16(string_piece_);
152     return true;
153   }
DeepCopy() const154   Value* DeepCopy() const override {
155     return new StringValue(string_piece_.as_string());
156   }
Equals(const Value * other) const157   bool Equals(const Value* other) const override {
158     std::string other_string;
159     return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
160         StringPiece(other_string) == string_piece_;
161   }
162 
163  private:
164   // The location in the original input stream.
165   StringPiece string_piece_;
166 
167   DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
168 };
169 
170 // Simple class that checks for maximum recursion/"stack overflow."
171 class StackMarker {
172  public:
StackMarker(int * depth)173   explicit StackMarker(int* depth) : depth_(depth) {
174     ++(*depth_);
175     DCHECK_LE(*depth_, kStackMaxDepth);
176   }
~StackMarker()177   ~StackMarker() {
178     --(*depth_);
179   }
180 
IsTooDeep() const181   bool IsTooDeep() const {
182     return *depth_ >= kStackMaxDepth;
183   }
184 
185  private:
186   int* const depth_;
187 
188   DISALLOW_COPY_AND_ASSIGN(StackMarker);
189 };
190 
191 }  // namespace
192 
JSONParser(int options)193 JSONParser::JSONParser(int options)
194     : options_(options),
195       start_pos_(nullptr),
196       pos_(nullptr),
197       end_pos_(nullptr),
198       index_(0),
199       stack_depth_(0),
200       line_number_(0),
201       index_last_line_(0),
202       error_code_(JSONReader::JSON_NO_ERROR),
203       error_line_(0),
204       error_column_(0) {
205 }
206 
~JSONParser()207 JSONParser::~JSONParser() {
208 }
209 
Parse(StringPiece input)210 std::unique_ptr<Value> JSONParser::Parse(StringPiece input) {
211   std::unique_ptr<std::string> input_copy;
212   // If the children of a JSON root can be detached, then hidden roots cannot
213   // be used, so do not bother copying the input because StringPiece will not
214   // be used anywhere.
215   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
216     input_copy = MakeUnique<std::string>(input.as_string());
217     start_pos_ = input_copy->data();
218   } else {
219     start_pos_ = input.data();
220   }
221   pos_ = start_pos_;
222   end_pos_ = start_pos_ + input.length();
223   index_ = 0;
224   line_number_ = 1;
225   index_last_line_ = 0;
226 
227   error_code_ = JSONReader::JSON_NO_ERROR;
228   error_line_ = 0;
229   error_column_ = 0;
230 
231   // When the input JSON string starts with a UTF-8 Byte-Order-Mark
232   // <0xEF 0xBB 0xBF>, advance the start position to avoid the
233   // ParseNextToken function mis-treating a Unicode BOM as an invalid
234   // character and returning NULL.
235   if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF &&
236       static_cast<uint8_t>(*(pos_ + 1)) == 0xBB &&
237       static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) {
238     NextNChars(3);
239   }
240 
241   // Parse the first and any nested tokens.
242   std::unique_ptr<Value> root(ParseNextToken());
243   if (!root)
244     return nullptr;
245 
246   // Make sure the input stream is at an end.
247   if (GetNextToken() != T_END_OF_INPUT) {
248     if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
249       ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
250       return nullptr;
251     }
252   }
253 
254   // Dictionaries and lists can contain JSONStringValues, so wrap them in a
255   // hidden root.
256   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
257     if (root->IsType(Value::TYPE_DICTIONARY)) {
258       return MakeUnique<DictionaryHiddenRootValue>(std::move(input_copy),
259                                                    std::move(root));
260     }
261     if (root->IsType(Value::TYPE_LIST)) {
262       return MakeUnique<ListHiddenRootValue>(std::move(input_copy),
263                                              std::move(root));
264     }
265     if (root->IsType(Value::TYPE_STRING)) {
266       // A string type could be a JSONStringValue, but because there's no
267       // corresponding HiddenRootValue, the memory will be lost. Deep copy to
268       // preserve it.
269       return root->CreateDeepCopy();
270     }
271   }
272 
273   // All other values can be returned directly.
274   return root;
275 }
276 
error_code() const277 JSONReader::JsonParseError JSONParser::error_code() const {
278   return error_code_;
279 }
280 
GetErrorMessage() const281 std::string JSONParser::GetErrorMessage() const {
282   return FormatErrorMessage(error_line_, error_column_,
283       JSONReader::ErrorCodeToString(error_code_));
284 }
285 
error_line() const286 int JSONParser::error_line() const {
287   return error_line_;
288 }
289 
error_column() const290 int JSONParser::error_column() const {
291   return error_column_;
292 }
293 
294 // StringBuilder ///////////////////////////////////////////////////////////////
295 
StringBuilder()296 JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
297 
StringBuilder(const char * pos)298 JSONParser::StringBuilder::StringBuilder(const char* pos)
299     : pos_(pos),
300       length_(0),
301       string_(nullptr) {
302 }
303 
Swap(StringBuilder * other)304 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
305   std::swap(other->string_, string_);
306   std::swap(other->pos_, pos_);
307   std::swap(other->length_, length_);
308 }
309 
~StringBuilder()310 JSONParser::StringBuilder::~StringBuilder() {
311   delete string_;
312 }
313 
Append(const char & c)314 void JSONParser::StringBuilder::Append(const char& c) {
315   DCHECK_GE(c, 0);
316   DCHECK_LT(static_cast<unsigned char>(c), 128);
317 
318   if (string_)
319     string_->push_back(c);
320   else
321     ++length_;
322 }
323 
AppendString(const std::string & str)324 void JSONParser::StringBuilder::AppendString(const std::string& str) {
325   DCHECK(string_);
326   string_->append(str);
327 }
328 
Convert()329 void JSONParser::StringBuilder::Convert() {
330   if (string_)
331     return;
332   string_  = new std::string(pos_, length_);
333 }
334 
CanBeStringPiece() const335 bool JSONParser::StringBuilder::CanBeStringPiece() const {
336   return !string_;
337 }
338 
AsStringPiece()339 StringPiece JSONParser::StringBuilder::AsStringPiece() {
340   if (string_)
341     return StringPiece();
342   return StringPiece(pos_, length_);
343 }
344 
AsString()345 const std::string& JSONParser::StringBuilder::AsString() {
346   if (!string_)
347     Convert();
348   return *string_;
349 }
350 
351 // JSONParser private //////////////////////////////////////////////////////////
352 
CanConsume(int length)353 inline bool JSONParser::CanConsume(int length) {
354   return pos_ + length <= end_pos_;
355 }
356 
NextChar()357 const char* JSONParser::NextChar() {
358   DCHECK(CanConsume(1));
359   ++index_;
360   ++pos_;
361   return pos_;
362 }
363 
NextNChars(int n)364 void JSONParser::NextNChars(int n) {
365   DCHECK(CanConsume(n));
366   index_ += n;
367   pos_ += n;
368 }
369 
GetNextToken()370 JSONParser::Token JSONParser::GetNextToken() {
371   EatWhitespaceAndComments();
372   if (!CanConsume(1))
373     return T_END_OF_INPUT;
374 
375   switch (*pos_) {
376     case '{':
377       return T_OBJECT_BEGIN;
378     case '}':
379       return T_OBJECT_END;
380     case '[':
381       return T_ARRAY_BEGIN;
382     case ']':
383       return T_ARRAY_END;
384     case '"':
385       return T_STRING;
386     case '0':
387     case '1':
388     case '2':
389     case '3':
390     case '4':
391     case '5':
392     case '6':
393     case '7':
394     case '8':
395     case '9':
396     case '-':
397       return T_NUMBER;
398     case 't':
399       return T_BOOL_TRUE;
400     case 'f':
401       return T_BOOL_FALSE;
402     case 'n':
403       return T_NULL;
404     case ',':
405       return T_LIST_SEPARATOR;
406     case ':':
407       return T_OBJECT_PAIR_SEPARATOR;
408     default:
409       return T_INVALID_TOKEN;
410   }
411 }
412 
EatWhitespaceAndComments()413 void JSONParser::EatWhitespaceAndComments() {
414   while (pos_ < end_pos_) {
415     switch (*pos_) {
416       case '\r':
417       case '\n':
418         index_last_line_ = index_;
419         // Don't increment line_number_ twice for "\r\n".
420         if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
421           ++line_number_;
422         // Fall through.
423       case ' ':
424       case '\t':
425         NextChar();
426         break;
427       case '/':
428         if (!EatComment())
429           return;
430         break;
431       default:
432         return;
433     }
434   }
435 }
436 
EatComment()437 bool JSONParser::EatComment() {
438   if (*pos_ != '/' || !CanConsume(1))
439     return false;
440 
441   char next_char = *NextChar();
442   if (next_char == '/') {
443     // Single line comment, read to newline.
444     while (CanConsume(1)) {
445       next_char = *NextChar();
446       if (next_char == '\n' || next_char == '\r')
447         return true;
448     }
449   } else if (next_char == '*') {
450     char previous_char = '\0';
451     // Block comment, read until end marker.
452     while (CanConsume(1)) {
453       next_char = *NextChar();
454       if (previous_char == '*' && next_char == '/') {
455         // EatWhitespaceAndComments will inspect pos_, which will still be on
456         // the last / of the comment, so advance once more (which may also be
457         // end of input).
458         NextChar();
459         return true;
460       }
461       previous_char = next_char;
462     }
463 
464     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
465   }
466 
467   return false;
468 }
469 
ParseNextToken()470 Value* JSONParser::ParseNextToken() {
471   return ParseToken(GetNextToken());
472 }
473 
ParseToken(Token token)474 Value* JSONParser::ParseToken(Token token) {
475   switch (token) {
476     case T_OBJECT_BEGIN:
477       return ConsumeDictionary();
478     case T_ARRAY_BEGIN:
479       return ConsumeList();
480     case T_STRING:
481       return ConsumeString();
482     case T_NUMBER:
483       return ConsumeNumber();
484     case T_BOOL_TRUE:
485     case T_BOOL_FALSE:
486     case T_NULL:
487       return ConsumeLiteral();
488     default:
489       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
490       return nullptr;
491   }
492 }
493 
ConsumeDictionary()494 Value* JSONParser::ConsumeDictionary() {
495   if (*pos_ != '{') {
496     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
497     return nullptr;
498   }
499 
500   StackMarker depth_check(&stack_depth_);
501   if (depth_check.IsTooDeep()) {
502     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
503     return nullptr;
504   }
505 
506   std::unique_ptr<DictionaryValue> dict(new DictionaryValue);
507 
508   NextChar();
509   Token token = GetNextToken();
510   while (token != T_OBJECT_END) {
511     if (token != T_STRING) {
512       ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
513       return nullptr;
514     }
515 
516     // First consume the key.
517     StringBuilder key;
518     if (!ConsumeStringRaw(&key)) {
519       return nullptr;
520     }
521 
522     // Read the separator.
523     NextChar();
524     token = GetNextToken();
525     if (token != T_OBJECT_PAIR_SEPARATOR) {
526       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
527       return nullptr;
528     }
529 
530     // The next token is the value. Ownership transfers to |dict|.
531     NextChar();
532     Value* value = ParseNextToken();
533     if (!value) {
534       // ReportError from deeper level.
535       return nullptr;
536     }
537 
538     dict->SetWithoutPathExpansion(key.AsString(), value);
539 
540     NextChar();
541     token = GetNextToken();
542     if (token == T_LIST_SEPARATOR) {
543       NextChar();
544       token = GetNextToken();
545       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
546         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
547         return nullptr;
548       }
549     } else if (token != T_OBJECT_END) {
550       ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
551       return nullptr;
552     }
553   }
554 
555   return dict.release();
556 }
557 
ConsumeList()558 Value* JSONParser::ConsumeList() {
559   if (*pos_ != '[') {
560     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
561     return nullptr;
562   }
563 
564   StackMarker depth_check(&stack_depth_);
565   if (depth_check.IsTooDeep()) {
566     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
567     return nullptr;
568   }
569 
570   std::unique_ptr<ListValue> list(new ListValue);
571 
572   NextChar();
573   Token token = GetNextToken();
574   while (token != T_ARRAY_END) {
575     Value* item = ParseToken(token);
576     if (!item) {
577       // ReportError from deeper level.
578       return nullptr;
579     }
580 
581     list->Append(item);
582 
583     NextChar();
584     token = GetNextToken();
585     if (token == T_LIST_SEPARATOR) {
586       NextChar();
587       token = GetNextToken();
588       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
589         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
590         return nullptr;
591       }
592     } else if (token != T_ARRAY_END) {
593       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
594       return nullptr;
595     }
596   }
597 
598   return list.release();
599 }
600 
ConsumeString()601 Value* JSONParser::ConsumeString() {
602   StringBuilder string;
603   if (!ConsumeStringRaw(&string))
604     return nullptr;
605 
606   // Create the Value representation, using a hidden root, if configured
607   // to do so, and if the string can be represented by StringPiece.
608   if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN))
609     return new JSONStringValue(string.AsStringPiece());
610 
611   if (string.CanBeStringPiece())
612     string.Convert();
613   return new StringValue(string.AsString());
614 }
615 
ConsumeStringRaw(StringBuilder * out)616 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
617   if (*pos_ != '"') {
618     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
619     return false;
620   }
621 
622   // StringBuilder will internally build a StringPiece unless a UTF-16
623   // conversion occurs, at which point it will perform a copy into a
624   // std::string.
625   StringBuilder string(NextChar());
626 
627   int length = end_pos_ - start_pos_;
628   int32_t next_char = 0;
629 
630   while (CanConsume(1)) {
631     pos_ = start_pos_ + index_;  // CBU8_NEXT is postcrement.
632     CBU8_NEXT(start_pos_, index_, length, next_char);
633     if (next_char < 0 || !IsValidCharacter(next_char)) {
634       ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
635       return false;
636     }
637 
638     if (next_char == '"') {
639       --index_;  // Rewind by one because of CBU8_NEXT.
640       out->Swap(&string);
641       return true;
642     }
643 
644     // If this character is not an escape sequence...
645     if (next_char != '\\') {
646       if (next_char < kExtendedASCIIStart)
647         string.Append(static_cast<char>(next_char));
648       else
649         DecodeUTF8(next_char, &string);
650     } else {
651       // And if it is an escape sequence, the input string will be adjusted
652       // (either by combining the two characters of an encoded escape sequence,
653       // or with a UTF conversion), so using StringPiece isn't possible -- force
654       // a conversion.
655       string.Convert();
656 
657       if (!CanConsume(1)) {
658         ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
659         return false;
660       }
661 
662       switch (*NextChar()) {
663         // Allowed esape sequences:
664         case 'x': {  // UTF-8 sequence.
665           // UTF-8 \x escape sequences are not allowed in the spec, but they
666           // are supported here for backwards-compatiblity with the old parser.
667           if (!CanConsume(2)) {
668             ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
669             return false;
670           }
671 
672           int hex_digit = 0;
673           if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
674             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
675             return false;
676           }
677           NextChar();
678 
679           if (hex_digit < kExtendedASCIIStart)
680             string.Append(static_cast<char>(hex_digit));
681           else
682             DecodeUTF8(hex_digit, &string);
683           break;
684         }
685         case 'u': {  // UTF-16 sequence.
686           // UTF units are of the form \uXXXX.
687           if (!CanConsume(5)) {  // 5 being 'u' and four HEX digits.
688             ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
689             return false;
690           }
691 
692           // Skip the 'u'.
693           NextChar();
694 
695           std::string utf8_units;
696           if (!DecodeUTF16(&utf8_units)) {
697             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
698             return false;
699           }
700 
701           string.AppendString(utf8_units);
702           break;
703         }
704         case '"':
705           string.Append('"');
706           break;
707         case '\\':
708           string.Append('\\');
709           break;
710         case '/':
711           string.Append('/');
712           break;
713         case 'b':
714           string.Append('\b');
715           break;
716         case 'f':
717           string.Append('\f');
718           break;
719         case 'n':
720           string.Append('\n');
721           break;
722         case 'r':
723           string.Append('\r');
724           break;
725         case 't':
726           string.Append('\t');
727           break;
728         case 'v':  // Not listed as valid escape sequence in the RFC.
729           string.Append('\v');
730           break;
731         // All other escape squences are illegal.
732         default:
733           ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
734           return false;
735       }
736     }
737   }
738 
739   ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
740   return false;
741 }
742 
743 // Entry is at the first X in \uXXXX.
DecodeUTF16(std::string * dest_string)744 bool JSONParser::DecodeUTF16(std::string* dest_string) {
745   if (!CanConsume(4))
746     return false;
747 
748   // This is a 32-bit field because the shift operations in the
749   // conversion process below cause MSVC to error about "data loss."
750   // This only stores UTF-16 code units, though.
751   // Consume the UTF-16 code unit, which may be a high surrogate.
752   int code_unit16_high = 0;
753   if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
754     return false;
755 
756   // Only add 3, not 4, because at the end of this iteration, the parser has
757   // finished working with the last digit of the UTF sequence, meaning that
758   // the next iteration will advance to the next byte.
759   NextNChars(3);
760 
761   // Used to convert the UTF-16 code units to a code point and then to a UTF-8
762   // code unit sequence.
763   char code_unit8[8] = { 0 };
764   size_t offset = 0;
765 
766   // If this is a high surrogate, consume the next code unit to get the
767   // low surrogate.
768   if (CBU16_IS_SURROGATE(code_unit16_high)) {
769     // Make sure this is the high surrogate. If not, it's an encoding
770     // error.
771     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
772       return false;
773 
774     // Make sure that the token has more characters to consume the
775     // lower surrogate.
776     if (!CanConsume(6))  // 6 being '\' 'u' and four HEX digits.
777       return false;
778     if (*NextChar() != '\\' || *NextChar() != 'u')
779       return false;
780 
781     NextChar();  // Read past 'u'.
782     int code_unit16_low = 0;
783     if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
784       return false;
785 
786     NextNChars(3);
787 
788     if (!CBU16_IS_TRAIL(code_unit16_low)) {
789       return false;
790     }
791 
792     uint32_t code_point =
793         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
794     if (!IsValidCharacter(code_point))
795       return false;
796 
797     offset = 0;
798     CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
799   } else {
800     // Not a surrogate.
801     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
802     if (!IsValidCharacter(code_unit16_high))
803       return false;
804 
805     CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
806   }
807 
808   dest_string->append(code_unit8);
809   return true;
810 }
811 
DecodeUTF8(const int32_t & point,StringBuilder * dest)812 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) {
813   DCHECK(IsValidCharacter(point));
814 
815   // Anything outside of the basic ASCII plane will need to be decoded from
816   // int32_t to a multi-byte sequence.
817   if (point < kExtendedASCIIStart) {
818     dest->Append(static_cast<char>(point));
819   } else {
820     char utf8_units[4] = { 0 };
821     int offset = 0;
822     CBU8_APPEND_UNSAFE(utf8_units, offset, point);
823     dest->Convert();
824     // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
825     // zero terminated at this point.  |offset| contains the correct length.
826     dest->AppendString(std::string(utf8_units, offset));
827   }
828 }
829 
ConsumeNumber()830 Value* JSONParser::ConsumeNumber() {
831   const char* num_start = pos_;
832   const int start_index = index_;
833   int end_index = start_index;
834 
835   if (*pos_ == '-')
836     NextChar();
837 
838   if (!ReadInt(false)) {
839     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
840     return nullptr;
841   }
842   end_index = index_;
843 
844   // The optional fraction part.
845   if (*pos_ == '.') {
846     if (!CanConsume(1)) {
847       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
848       return nullptr;
849     }
850     NextChar();
851     if (!ReadInt(true)) {
852       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
853       return nullptr;
854     }
855     end_index = index_;
856   }
857 
858   // Optional exponent part.
859   if (*pos_ == 'e' || *pos_ == 'E') {
860     NextChar();
861     if (*pos_ == '-' || *pos_ == '+')
862       NextChar();
863     if (!ReadInt(true)) {
864       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
865       return nullptr;
866     }
867     end_index = index_;
868   }
869 
870   // ReadInt is greedy because numbers have no easily detectable sentinel,
871   // so save off where the parser should be on exit (see Consume invariant at
872   // the top of the header), then make sure the next token is one which is
873   // valid.
874   const char* exit_pos = pos_ - 1;
875   int exit_index = index_ - 1;
876 
877   switch (GetNextToken()) {
878     case T_OBJECT_END:
879     case T_ARRAY_END:
880     case T_LIST_SEPARATOR:
881     case T_END_OF_INPUT:
882       break;
883     default:
884       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
885       return nullptr;
886   }
887 
888   pos_ = exit_pos;
889   index_ = exit_index;
890 
891   StringPiece num_string(num_start, end_index - start_index);
892 
893   int num_int;
894   if (StringToInt(num_string, &num_int))
895     return new FundamentalValue(num_int);
896 
897   double num_double;
898   if (StringToDouble(num_string.as_string(), &num_double) &&
899       std::isfinite(num_double)) {
900     return new FundamentalValue(num_double);
901   }
902 
903   return nullptr;
904 }
905 
ReadInt(bool allow_leading_zeros)906 bool JSONParser::ReadInt(bool allow_leading_zeros) {
907   char first = *pos_;
908   int len = 0;
909 
910   char c = first;
911   while (CanConsume(1) && IsAsciiDigit(c)) {
912     c = *NextChar();
913     ++len;
914   }
915 
916   if (len == 0)
917     return false;
918 
919   if (!allow_leading_zeros && len > 1 && first == '0')
920     return false;
921 
922   return true;
923 }
924 
ConsumeLiteral()925 Value* JSONParser::ConsumeLiteral() {
926   switch (*pos_) {
927     case 't': {
928       const char kTrueLiteral[] = "true";
929       const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
930       if (!CanConsume(kTrueLen - 1) ||
931           !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
932         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
933         return nullptr;
934       }
935       NextNChars(kTrueLen - 1);
936       return new FundamentalValue(true);
937     }
938     case 'f': {
939       const char kFalseLiteral[] = "false";
940       const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
941       if (!CanConsume(kFalseLen - 1) ||
942           !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
943         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
944         return nullptr;
945       }
946       NextNChars(kFalseLen - 1);
947       return new FundamentalValue(false);
948     }
949     case 'n': {
950       const char kNullLiteral[] = "null";
951       const int kNullLen = static_cast<int>(strlen(kNullLiteral));
952       if (!CanConsume(kNullLen - 1) ||
953           !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
954         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
955         return nullptr;
956       }
957       NextNChars(kNullLen - 1);
958       return Value::CreateNullValue().release();
959     }
960     default:
961       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
962       return nullptr;
963   }
964 }
965 
966 // static
StringsAreEqual(const char * one,const char * two,size_t len)967 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
968   return strncmp(one, two, len) == 0;
969 }
970 
ReportError(JSONReader::JsonParseError code,int column_adjust)971 void JSONParser::ReportError(JSONReader::JsonParseError code,
972                              int column_adjust) {
973   error_code_ = code;
974   error_line_ = line_number_;
975   error_column_ = index_ - index_last_line_ + column_adjust;
976 }
977 
978 // static
FormatErrorMessage(int line,int column,const std::string & description)979 std::string JSONParser::FormatErrorMessage(int line, int column,
980                                            const std::string& description) {
981   if (line || column) {
982     return StringPrintf("Line: %i, column: %i, %s",
983         line, column, description.c_str());
984   }
985   return description;
986 }
987 
988 }  // namespace internal
989 }  // namespace base
990