1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/json_parser.h"
6
7 #include <cmath>
8
9 #include "base/logging.h"
10 #include "base/macros.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/strings/utf_string_conversion_utils.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "base/third_party/icu/icu_utf.h"
19 #include "base/values.h"
20
21 namespace base {
22 namespace internal {
23
24 namespace {
25
26 const int kStackMaxDepth = 100;
27
28 const int32_t kExtendedASCIIStart = 0x80;
29
30 // This and the class below are used to own the JSON input string for when
31 // string tokens are stored as StringPiece instead of std::string. This
32 // optimization avoids about 2/3rds of string memory copies. The constructor
33 // takes ownership of the input string. The real root value is Swap()ed into
34 // the new instance.
35 class DictionaryHiddenRootValue : public DictionaryValue {
36 public:
DictionaryHiddenRootValue(std::string * json,Value * root)37 DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
38 DCHECK(root->IsType(Value::TYPE_DICTIONARY));
39 DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
40 }
41
Swap(DictionaryValue * other)42 void Swap(DictionaryValue* other) override {
43 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
44
45 // First deep copy to convert JSONStringValue to std::string and swap that
46 // copy with |other|, which contains the new contents of |this|.
47 scoped_ptr<DictionaryValue> copy(DeepCopy());
48 copy->Swap(other);
49
50 // Then erase the contents of the current dictionary and swap in the
51 // new contents, originally from |other|.
52 Clear();
53 json_.reset();
54 DictionaryValue::Swap(copy.get());
55 }
56
57 // Not overriding DictionaryValue::Remove because it just calls through to
58 // the method below.
59
RemoveWithoutPathExpansion(const std::string & key,scoped_ptr<Value> * out)60 bool RemoveWithoutPathExpansion(const std::string& key,
61 scoped_ptr<Value>* out) override {
62 // If the caller won't take ownership of the removed value, just call up.
63 if (!out)
64 return DictionaryValue::RemoveWithoutPathExpansion(key, out);
65
66 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
67
68 // Otherwise, remove the value while its still "owned" by this and copy it
69 // to convert any JSONStringValues to std::string.
70 scoped_ptr<Value> out_owned;
71 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
72 return false;
73
74 out->reset(out_owned->DeepCopy());
75
76 return true;
77 }
78
79 private:
80 scoped_ptr<std::string> json_;
81
82 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
83 };
84
85 class ListHiddenRootValue : public ListValue {
86 public:
ListHiddenRootValue(std::string * json,Value * root)87 ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
88 DCHECK(root->IsType(Value::TYPE_LIST));
89 ListValue::Swap(static_cast<ListValue*>(root));
90 }
91
Swap(ListValue * other)92 void Swap(ListValue* other) override {
93 DVLOG(1) << "Swap()ing a ListValue inefficiently.";
94
95 // First deep copy to convert JSONStringValue to std::string and swap that
96 // copy with |other|, which contains the new contents of |this|.
97 scoped_ptr<ListValue> copy(DeepCopy());
98 copy->Swap(other);
99
100 // Then erase the contents of the current list and swap in the new contents,
101 // originally from |other|.
102 Clear();
103 json_.reset();
104 ListValue::Swap(copy.get());
105 }
106
Remove(size_t index,scoped_ptr<Value> * out)107 bool Remove(size_t index, scoped_ptr<Value>* out) override {
108 // If the caller won't take ownership of the removed value, just call up.
109 if (!out)
110 return ListValue::Remove(index, out);
111
112 DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
113
114 // Otherwise, remove the value while its still "owned" by this and copy it
115 // to convert any JSONStringValues to std::string.
116 scoped_ptr<Value> out_owned;
117 if (!ListValue::Remove(index, &out_owned))
118 return false;
119
120 out->reset(out_owned->DeepCopy());
121
122 return true;
123 }
124
125 private:
126 scoped_ptr<std::string> json_;
127
128 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
129 };
130
131 // A variant on StringValue that uses StringPiece instead of copying the string
132 // into the Value. This can only be stored in a child of hidden root (above),
133 // otherwise the referenced string will not be guaranteed to outlive it.
134 class JSONStringValue : public Value {
135 public:
JSONStringValue(const StringPiece & piece)136 explicit JSONStringValue(const StringPiece& piece)
137 : Value(TYPE_STRING),
138 string_piece_(piece) {
139 }
140
141 // Overridden from Value:
GetAsString(std::string * out_value) const142 bool GetAsString(std::string* out_value) const override {
143 string_piece_.CopyToString(out_value);
144 return true;
145 }
GetAsString(string16 * out_value) const146 bool GetAsString(string16* out_value) const override {
147 *out_value = UTF8ToUTF16(string_piece_);
148 return true;
149 }
DeepCopy() const150 Value* DeepCopy() const override {
151 return new StringValue(string_piece_.as_string());
152 }
Equals(const Value * other) const153 bool Equals(const Value* other) const override {
154 std::string other_string;
155 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
156 StringPiece(other_string) == string_piece_;
157 }
158
159 private:
160 // The location in the original input stream.
161 StringPiece string_piece_;
162
163 DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
164 };
165
166 // Simple class that checks for maximum recursion/"stack overflow."
167 class StackMarker {
168 public:
StackMarker(int * depth)169 explicit StackMarker(int* depth) : depth_(depth) {
170 ++(*depth_);
171 DCHECK_LE(*depth_, kStackMaxDepth);
172 }
~StackMarker()173 ~StackMarker() {
174 --(*depth_);
175 }
176
IsTooDeep() const177 bool IsTooDeep() const {
178 return *depth_ >= kStackMaxDepth;
179 }
180
181 private:
182 int* const depth_;
183
184 DISALLOW_COPY_AND_ASSIGN(StackMarker);
185 };
186
187 } // namespace
188
JSONParser(int options)189 JSONParser::JSONParser(int options)
190 : options_(options),
191 start_pos_(NULL),
192 pos_(NULL),
193 end_pos_(NULL),
194 index_(0),
195 stack_depth_(0),
196 line_number_(0),
197 index_last_line_(0),
198 error_code_(JSONReader::JSON_NO_ERROR),
199 error_line_(0),
200 error_column_(0) {
201 }
202
~JSONParser()203 JSONParser::~JSONParser() {
204 }
205
Parse(const StringPiece & input)206 Value* JSONParser::Parse(const StringPiece& input) {
207 scoped_ptr<std::string> input_copy;
208 // If the children of a JSON root can be detached, then hidden roots cannot
209 // be used, so do not bother copying the input because StringPiece will not
210 // be used anywhere.
211 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
212 input_copy.reset(new std::string(input.as_string()));
213 start_pos_ = input_copy->data();
214 } else {
215 start_pos_ = input.data();
216 }
217 pos_ = start_pos_;
218 end_pos_ = start_pos_ + input.length();
219 index_ = 0;
220 line_number_ = 1;
221 index_last_line_ = 0;
222
223 error_code_ = JSONReader::JSON_NO_ERROR;
224 error_line_ = 0;
225 error_column_ = 0;
226
227 // When the input JSON string starts with a UTF-8 Byte-Order-Mark
228 // <0xEF 0xBB 0xBF>, advance the start position to avoid the
229 // ParseNextToken function mis-treating a Unicode BOM as an invalid
230 // character and returning NULL.
231 if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF &&
232 static_cast<uint8_t>(*(pos_ + 1)) == 0xBB &&
233 static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) {
234 NextNChars(3);
235 }
236
237 // Parse the first and any nested tokens.
238 scoped_ptr<Value> root(ParseNextToken());
239 if (!root.get())
240 return NULL;
241
242 // Make sure the input stream is at an end.
243 if (GetNextToken() != T_END_OF_INPUT) {
244 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
245 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
246 return NULL;
247 }
248 }
249
250 // Dictionaries and lists can contain JSONStringValues, so wrap them in a
251 // hidden root.
252 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
253 if (root->IsType(Value::TYPE_DICTIONARY)) {
254 return new DictionaryHiddenRootValue(input_copy.release(), root.get());
255 } else if (root->IsType(Value::TYPE_LIST)) {
256 return new ListHiddenRootValue(input_copy.release(), root.get());
257 } else if (root->IsType(Value::TYPE_STRING)) {
258 // A string type could be a JSONStringValue, but because there's no
259 // corresponding HiddenRootValue, the memory will be lost. Deep copy to
260 // preserve it.
261 return root->DeepCopy();
262 }
263 }
264
265 // All other values can be returned directly.
266 return root.release();
267 }
268
error_code() const269 JSONReader::JsonParseError JSONParser::error_code() const {
270 return error_code_;
271 }
272
GetErrorMessage() const273 std::string JSONParser::GetErrorMessage() const {
274 return FormatErrorMessage(error_line_, error_column_,
275 JSONReader::ErrorCodeToString(error_code_));
276 }
277
error_line() const278 int JSONParser::error_line() const {
279 return error_line_;
280 }
281
error_column() const282 int JSONParser::error_column() const {
283 return error_column_;
284 }
285
286 // StringBuilder ///////////////////////////////////////////////////////////////
287
StringBuilder()288 JSONParser::StringBuilder::StringBuilder()
289 : pos_(NULL),
290 length_(0),
291 string_(NULL) {
292 }
293
StringBuilder(const char * pos)294 JSONParser::StringBuilder::StringBuilder(const char* pos)
295 : pos_(pos),
296 length_(0),
297 string_(NULL) {
298 }
299
Swap(StringBuilder * other)300 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
301 std::swap(other->string_, string_);
302 std::swap(other->pos_, pos_);
303 std::swap(other->length_, length_);
304 }
305
~StringBuilder()306 JSONParser::StringBuilder::~StringBuilder() {
307 delete string_;
308 }
309
Append(const char & c)310 void JSONParser::StringBuilder::Append(const char& c) {
311 DCHECK_GE(c, 0);
312 DCHECK_LT(c, 128);
313
314 if (string_)
315 string_->push_back(c);
316 else
317 ++length_;
318 }
319
AppendString(const std::string & str)320 void JSONParser::StringBuilder::AppendString(const std::string& str) {
321 DCHECK(string_);
322 string_->append(str);
323 }
324
Convert()325 void JSONParser::StringBuilder::Convert() {
326 if (string_)
327 return;
328 string_ = new std::string(pos_, length_);
329 }
330
CanBeStringPiece() const331 bool JSONParser::StringBuilder::CanBeStringPiece() const {
332 return !string_;
333 }
334
AsStringPiece()335 StringPiece JSONParser::StringBuilder::AsStringPiece() {
336 if (string_)
337 return StringPiece();
338 return StringPiece(pos_, length_);
339 }
340
AsString()341 const std::string& JSONParser::StringBuilder::AsString() {
342 if (!string_)
343 Convert();
344 return *string_;
345 }
346
347 // JSONParser private //////////////////////////////////////////////////////////
348
CanConsume(int length)349 inline bool JSONParser::CanConsume(int length) {
350 return pos_ + length <= end_pos_;
351 }
352
NextChar()353 const char* JSONParser::NextChar() {
354 DCHECK(CanConsume(1));
355 ++index_;
356 ++pos_;
357 return pos_;
358 }
359
NextNChars(int n)360 void JSONParser::NextNChars(int n) {
361 DCHECK(CanConsume(n));
362 index_ += n;
363 pos_ += n;
364 }
365
GetNextToken()366 JSONParser::Token JSONParser::GetNextToken() {
367 EatWhitespaceAndComments();
368 if (!CanConsume(1))
369 return T_END_OF_INPUT;
370
371 switch (*pos_) {
372 case '{':
373 return T_OBJECT_BEGIN;
374 case '}':
375 return T_OBJECT_END;
376 case '[':
377 return T_ARRAY_BEGIN;
378 case ']':
379 return T_ARRAY_END;
380 case '"':
381 return T_STRING;
382 case '0':
383 case '1':
384 case '2':
385 case '3':
386 case '4':
387 case '5':
388 case '6':
389 case '7':
390 case '8':
391 case '9':
392 case '-':
393 return T_NUMBER;
394 case 't':
395 return T_BOOL_TRUE;
396 case 'f':
397 return T_BOOL_FALSE;
398 case 'n':
399 return T_NULL;
400 case ',':
401 return T_LIST_SEPARATOR;
402 case ':':
403 return T_OBJECT_PAIR_SEPARATOR;
404 default:
405 return T_INVALID_TOKEN;
406 }
407 }
408
EatWhitespaceAndComments()409 void JSONParser::EatWhitespaceAndComments() {
410 while (pos_ < end_pos_) {
411 switch (*pos_) {
412 case '\r':
413 case '\n':
414 index_last_line_ = index_;
415 // Don't increment line_number_ twice for "\r\n".
416 if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
417 ++line_number_;
418 // Fall through.
419 case ' ':
420 case '\t':
421 NextChar();
422 break;
423 case '/':
424 if (!EatComment())
425 return;
426 break;
427 default:
428 return;
429 }
430 }
431 }
432
EatComment()433 bool JSONParser::EatComment() {
434 if (*pos_ != '/' || !CanConsume(1))
435 return false;
436
437 char next_char = *NextChar();
438 if (next_char == '/') {
439 // Single line comment, read to newline.
440 while (CanConsume(1)) {
441 next_char = *NextChar();
442 if (next_char == '\n' || next_char == '\r')
443 return true;
444 }
445 } else if (next_char == '*') {
446 char previous_char = '\0';
447 // Block comment, read until end marker.
448 while (CanConsume(1)) {
449 next_char = *NextChar();
450 if (previous_char == '*' && next_char == '/') {
451 // EatWhitespaceAndComments will inspect pos_, which will still be on
452 // the last / of the comment, so advance once more (which may also be
453 // end of input).
454 NextChar();
455 return true;
456 }
457 previous_char = next_char;
458 }
459
460 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
461 }
462
463 return false;
464 }
465
ParseNextToken()466 Value* JSONParser::ParseNextToken() {
467 return ParseToken(GetNextToken());
468 }
469
ParseToken(Token token)470 Value* JSONParser::ParseToken(Token token) {
471 switch (token) {
472 case T_OBJECT_BEGIN:
473 return ConsumeDictionary();
474 case T_ARRAY_BEGIN:
475 return ConsumeList();
476 case T_STRING:
477 return ConsumeString();
478 case T_NUMBER:
479 return ConsumeNumber();
480 case T_BOOL_TRUE:
481 case T_BOOL_FALSE:
482 case T_NULL:
483 return ConsumeLiteral();
484 default:
485 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
486 return NULL;
487 }
488 }
489
ConsumeDictionary()490 Value* JSONParser::ConsumeDictionary() {
491 if (*pos_ != '{') {
492 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
493 return NULL;
494 }
495
496 StackMarker depth_check(&stack_depth_);
497 if (depth_check.IsTooDeep()) {
498 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
499 return NULL;
500 }
501
502 scoped_ptr<DictionaryValue> dict(new DictionaryValue);
503
504 NextChar();
505 Token token = GetNextToken();
506 while (token != T_OBJECT_END) {
507 if (token != T_STRING) {
508 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
509 return NULL;
510 }
511
512 // First consume the key.
513 StringBuilder key;
514 if (!ConsumeStringRaw(&key)) {
515 return NULL;
516 }
517
518 // Read the separator.
519 NextChar();
520 token = GetNextToken();
521 if (token != T_OBJECT_PAIR_SEPARATOR) {
522 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
523 return NULL;
524 }
525
526 // The next token is the value. Ownership transfers to |dict|.
527 NextChar();
528 Value* value = ParseNextToken();
529 if (!value) {
530 // ReportError from deeper level.
531 return NULL;
532 }
533
534 dict->SetWithoutPathExpansion(key.AsString(), value);
535
536 NextChar();
537 token = GetNextToken();
538 if (token == T_LIST_SEPARATOR) {
539 NextChar();
540 token = GetNextToken();
541 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
542 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
543 return NULL;
544 }
545 } else if (token != T_OBJECT_END) {
546 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
547 return NULL;
548 }
549 }
550
551 return dict.release();
552 }
553
ConsumeList()554 Value* JSONParser::ConsumeList() {
555 if (*pos_ != '[') {
556 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
557 return NULL;
558 }
559
560 StackMarker depth_check(&stack_depth_);
561 if (depth_check.IsTooDeep()) {
562 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
563 return NULL;
564 }
565
566 scoped_ptr<ListValue> list(new ListValue);
567
568 NextChar();
569 Token token = GetNextToken();
570 while (token != T_ARRAY_END) {
571 Value* item = ParseToken(token);
572 if (!item) {
573 // ReportError from deeper level.
574 return NULL;
575 }
576
577 list->Append(item);
578
579 NextChar();
580 token = GetNextToken();
581 if (token == T_LIST_SEPARATOR) {
582 NextChar();
583 token = GetNextToken();
584 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
585 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
586 return NULL;
587 }
588 } else if (token != T_ARRAY_END) {
589 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
590 return NULL;
591 }
592 }
593
594 return list.release();
595 }
596
ConsumeString()597 Value* JSONParser::ConsumeString() {
598 StringBuilder string;
599 if (!ConsumeStringRaw(&string))
600 return NULL;
601
602 // Create the Value representation, using a hidden root, if configured
603 // to do so, and if the string can be represented by StringPiece.
604 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
605 return new JSONStringValue(string.AsStringPiece());
606 } else {
607 if (string.CanBeStringPiece())
608 string.Convert();
609 return new StringValue(string.AsString());
610 }
611 }
612
ConsumeStringRaw(StringBuilder * out)613 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
614 if (*pos_ != '"') {
615 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
616 return false;
617 }
618
619 // StringBuilder will internally build a StringPiece unless a UTF-16
620 // conversion occurs, at which point it will perform a copy into a
621 // std::string.
622 StringBuilder string(NextChar());
623
624 int length = end_pos_ - start_pos_;
625 int32_t next_char = 0;
626
627 while (CanConsume(1)) {
628 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.
629 CBU8_NEXT(start_pos_, index_, length, next_char);
630 if (next_char < 0 || !IsValidCharacter(next_char)) {
631 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
632 return false;
633 }
634
635 // If this character is an escape sequence...
636 if (next_char == '\\') {
637 // The input string will be adjusted (either by combining the two
638 // characters of an encoded escape sequence, or with a UTF conversion),
639 // so using StringPiece isn't possible -- force a conversion.
640 string.Convert();
641
642 if (!CanConsume(1)) {
643 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
644 return false;
645 }
646
647 switch (*NextChar()) {
648 // Allowed esape sequences:
649 case 'x': { // UTF-8 sequence.
650 // UTF-8 \x escape sequences are not allowed in the spec, but they
651 // are supported here for backwards-compatiblity with the old parser.
652 if (!CanConsume(2)) {
653 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
654 return false;
655 }
656
657 int hex_digit = 0;
658 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
659 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
660 return false;
661 }
662 NextChar();
663
664 if (hex_digit < kExtendedASCIIStart)
665 string.Append(static_cast<char>(hex_digit));
666 else
667 DecodeUTF8(hex_digit, &string);
668 break;
669 }
670 case 'u': { // UTF-16 sequence.
671 // UTF units are of the form \uXXXX.
672 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.
673 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
674 return false;
675 }
676
677 // Skip the 'u'.
678 NextChar();
679
680 std::string utf8_units;
681 if (!DecodeUTF16(&utf8_units)) {
682 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
683 return false;
684 }
685
686 string.AppendString(utf8_units);
687 break;
688 }
689 case '"':
690 string.Append('"');
691 break;
692 case '\\':
693 string.Append('\\');
694 break;
695 case '/':
696 string.Append('/');
697 break;
698 case 'b':
699 string.Append('\b');
700 break;
701 case 'f':
702 string.Append('\f');
703 break;
704 case 'n':
705 string.Append('\n');
706 break;
707 case 'r':
708 string.Append('\r');
709 break;
710 case 't':
711 string.Append('\t');
712 break;
713 case 'v': // Not listed as valid escape sequence in the RFC.
714 string.Append('\v');
715 break;
716 // All other escape squences are illegal.
717 default:
718 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
719 return false;
720 }
721 } else if (next_char == '"') {
722 --index_; // Rewind by one because of CBU8_NEXT.
723 out->Swap(&string);
724 return true;
725 } else {
726 if (next_char < kExtendedASCIIStart)
727 string.Append(static_cast<char>(next_char));
728 else
729 DecodeUTF8(next_char, &string);
730 }
731 }
732
733 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
734 return false;
735 }
736
737 // Entry is at the first X in \uXXXX.
DecodeUTF16(std::string * dest_string)738 bool JSONParser::DecodeUTF16(std::string* dest_string) {
739 if (!CanConsume(4))
740 return false;
741
742 // This is a 32-bit field because the shift operations in the
743 // conversion process below cause MSVC to error about "data loss."
744 // This only stores UTF-16 code units, though.
745 // Consume the UTF-16 code unit, which may be a high surrogate.
746 int code_unit16_high = 0;
747 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
748 return false;
749
750 // Only add 3, not 4, because at the end of this iteration, the parser has
751 // finished working with the last digit of the UTF sequence, meaning that
752 // the next iteration will advance to the next byte.
753 NextNChars(3);
754
755 // Used to convert the UTF-16 code units to a code point and then to a UTF-8
756 // code unit sequence.
757 char code_unit8[8] = { 0 };
758 size_t offset = 0;
759
760 // If this is a high surrogate, consume the next code unit to get the
761 // low surrogate.
762 if (CBU16_IS_SURROGATE(code_unit16_high)) {
763 // Make sure this is the high surrogate. If not, it's an encoding
764 // error.
765 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
766 return false;
767
768 // Make sure that the token has more characters to consume the
769 // lower surrogate.
770 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.
771 return false;
772 if (*NextChar() != '\\' || *NextChar() != 'u')
773 return false;
774
775 NextChar(); // Read past 'u'.
776 int code_unit16_low = 0;
777 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
778 return false;
779
780 NextNChars(3);
781
782 if (!CBU16_IS_TRAIL(code_unit16_low)) {
783 return false;
784 }
785
786 uint32_t code_point =
787 CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
788 if (!IsValidCharacter(code_point))
789 return false;
790
791 offset = 0;
792 CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
793 } else {
794 // Not a surrogate.
795 DCHECK(CBU16_IS_SINGLE(code_unit16_high));
796 if (!IsValidCharacter(code_unit16_high))
797 return false;
798
799 CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
800 }
801
802 dest_string->append(code_unit8);
803 return true;
804 }
805
DecodeUTF8(const int32_t & point,StringBuilder * dest)806 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) {
807 DCHECK(IsValidCharacter(point));
808
809 // Anything outside of the basic ASCII plane will need to be decoded from
810 // int32_t to a multi-byte sequence.
811 if (point < kExtendedASCIIStart) {
812 dest->Append(static_cast<char>(point));
813 } else {
814 char utf8_units[4] = { 0 };
815 int offset = 0;
816 CBU8_APPEND_UNSAFE(utf8_units, offset, point);
817 dest->Convert();
818 // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
819 // zero terminated at this point. |offset| contains the correct length.
820 dest->AppendString(std::string(utf8_units, offset));
821 }
822 }
823
ConsumeNumber()824 Value* JSONParser::ConsumeNumber() {
825 const char* num_start = pos_;
826 const int start_index = index_;
827 int end_index = start_index;
828
829 if (*pos_ == '-')
830 NextChar();
831
832 if (!ReadInt(false)) {
833 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
834 return NULL;
835 }
836 end_index = index_;
837
838 // The optional fraction part.
839 if (*pos_ == '.') {
840 if (!CanConsume(1)) {
841 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
842 return NULL;
843 }
844 NextChar();
845 if (!ReadInt(true)) {
846 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
847 return NULL;
848 }
849 end_index = index_;
850 }
851
852 // Optional exponent part.
853 if (*pos_ == 'e' || *pos_ == 'E') {
854 NextChar();
855 if (*pos_ == '-' || *pos_ == '+')
856 NextChar();
857 if (!ReadInt(true)) {
858 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
859 return NULL;
860 }
861 end_index = index_;
862 }
863
864 // ReadInt is greedy because numbers have no easily detectable sentinel,
865 // so save off where the parser should be on exit (see Consume invariant at
866 // the top of the header), then make sure the next token is one which is
867 // valid.
868 const char* exit_pos = pos_ - 1;
869 int exit_index = index_ - 1;
870
871 switch (GetNextToken()) {
872 case T_OBJECT_END:
873 case T_ARRAY_END:
874 case T_LIST_SEPARATOR:
875 case T_END_OF_INPUT:
876 break;
877 default:
878 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
879 return NULL;
880 }
881
882 pos_ = exit_pos;
883 index_ = exit_index;
884
885 StringPiece num_string(num_start, end_index - start_index);
886
887 int num_int;
888 if (StringToInt(num_string, &num_int))
889 return new FundamentalValue(num_int);
890
891 double num_double;
892 if (StringToDouble(num_string.as_string(), &num_double) &&
893 std::isfinite(num_double)) {
894 return new FundamentalValue(num_double);
895 }
896
897 return NULL;
898 }
899
ReadInt(bool allow_leading_zeros)900 bool JSONParser::ReadInt(bool allow_leading_zeros) {
901 char first = *pos_;
902 int len = 0;
903
904 char c = first;
905 while (CanConsume(1) && IsAsciiDigit(c)) {
906 c = *NextChar();
907 ++len;
908 }
909
910 if (len == 0)
911 return false;
912
913 if (!allow_leading_zeros && len > 1 && first == '0')
914 return false;
915
916 return true;
917 }
918
ConsumeLiteral()919 Value* JSONParser::ConsumeLiteral() {
920 switch (*pos_) {
921 case 't': {
922 const char kTrueLiteral[] = "true";
923 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
924 if (!CanConsume(kTrueLen - 1) ||
925 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
926 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
927 return NULL;
928 }
929 NextNChars(kTrueLen - 1);
930 return new FundamentalValue(true);
931 }
932 case 'f': {
933 const char kFalseLiteral[] = "false";
934 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
935 if (!CanConsume(kFalseLen - 1) ||
936 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
937 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
938 return NULL;
939 }
940 NextNChars(kFalseLen - 1);
941 return new FundamentalValue(false);
942 }
943 case 'n': {
944 const char kNullLiteral[] = "null";
945 const int kNullLen = static_cast<int>(strlen(kNullLiteral));
946 if (!CanConsume(kNullLen - 1) ||
947 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
948 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
949 return NULL;
950 }
951 NextNChars(kNullLen - 1);
952 return Value::CreateNullValue().release();
953 }
954 default:
955 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
956 return NULL;
957 }
958 }
959
960 // static
StringsAreEqual(const char * one,const char * two,size_t len)961 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
962 return strncmp(one, two, len) == 0;
963 }
964
ReportError(JSONReader::JsonParseError code,int column_adjust)965 void JSONParser::ReportError(JSONReader::JsonParseError code,
966 int column_adjust) {
967 error_code_ = code;
968 error_line_ = line_number_;
969 error_column_ = index_ - index_last_line_ + column_adjust;
970 }
971
972 // static
FormatErrorMessage(int line,int column,const std::string & description)973 std::string JSONParser::FormatErrorMessage(int line, int column,
974 const std::string& description) {
975 if (line || column) {
976 return StringPrintf("Line: %i, column: %i, %s",
977 line, column, description.c_str());
978 }
979 return description;
980 }
981
982 } // namespace internal
983 } // namespace base
984