1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/json_parser.h"
6
7 #include <cmath>
8
9 #include "base/logging.h"
10 #include "base/macros.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/strings/utf_string_conversion_utils.h"
17 #include "base/third_party/icu/icu_utf.h"
18 #include "base/values.h"
19
20 namespace base {
21 namespace internal {
22
23 namespace {
24
25 const int kStackMaxDepth = 100;
26
27 const int32_t kExtendedASCIIStart = 0x80;
28
29 // This and the class below are used to own the JSON input string for when
30 // string tokens are stored as StringPiece instead of std::string. This
31 // optimization avoids about 2/3rds of string memory copies. The constructor
32 // takes ownership of the input string. The real root value is Swap()ed into
33 // the new instance.
34 class DictionaryHiddenRootValue : public DictionaryValue {
35 public:
DictionaryHiddenRootValue(std::string * json,Value * root)36 DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
37 DCHECK(root->IsType(Value::TYPE_DICTIONARY));
38 DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
39 }
40
Swap(DictionaryValue * other)41 void Swap(DictionaryValue* other) override {
42 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
43
44 // First deep copy to convert JSONStringValue to std::string and swap that
45 // copy with |other|, which contains the new contents of |this|.
46 scoped_ptr<DictionaryValue> copy(DeepCopy());
47 copy->Swap(other);
48
49 // Then erase the contents of the current dictionary and swap in the
50 // new contents, originally from |other|.
51 Clear();
52 json_.reset();
53 DictionaryValue::Swap(copy.get());
54 }
55
56 // Not overriding DictionaryValue::Remove because it just calls through to
57 // the method below.
58
RemoveWithoutPathExpansion(const std::string & key,scoped_ptr<Value> * out)59 bool RemoveWithoutPathExpansion(const std::string& key,
60 scoped_ptr<Value>* out) override {
61 // If the caller won't take ownership of the removed value, just call up.
62 if (!out)
63 return DictionaryValue::RemoveWithoutPathExpansion(key, out);
64
65 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
66
67 // Otherwise, remove the value while its still "owned" by this and copy it
68 // to convert any JSONStringValues to std::string.
69 scoped_ptr<Value> out_owned;
70 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
71 return false;
72
73 out->reset(out_owned->DeepCopy());
74
75 return true;
76 }
77
78 private:
79 scoped_ptr<std::string> json_;
80
81 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
82 };
83
84 class ListHiddenRootValue : public ListValue {
85 public:
ListHiddenRootValue(std::string * json,Value * root)86 ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
87 DCHECK(root->IsType(Value::TYPE_LIST));
88 ListValue::Swap(static_cast<ListValue*>(root));
89 }
90
Swap(ListValue * other)91 void Swap(ListValue* other) override {
92 DVLOG(1) << "Swap()ing a ListValue inefficiently.";
93
94 // First deep copy to convert JSONStringValue to std::string and swap that
95 // copy with |other|, which contains the new contents of |this|.
96 scoped_ptr<ListValue> copy(DeepCopy());
97 copy->Swap(other);
98
99 // Then erase the contents of the current list and swap in the new contents,
100 // originally from |other|.
101 Clear();
102 json_.reset();
103 ListValue::Swap(copy.get());
104 }
105
Remove(size_t index,scoped_ptr<Value> * out)106 bool Remove(size_t index, scoped_ptr<Value>* out) override {
107 // If the caller won't take ownership of the removed value, just call up.
108 if (!out)
109 return ListValue::Remove(index, out);
110
111 DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
112
113 // Otherwise, remove the value while its still "owned" by this and copy it
114 // to convert any JSONStringValues to std::string.
115 scoped_ptr<Value> out_owned;
116 if (!ListValue::Remove(index, &out_owned))
117 return false;
118
119 out->reset(out_owned->DeepCopy());
120
121 return true;
122 }
123
124 private:
125 scoped_ptr<std::string> json_;
126
127 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
128 };
129
130 // A variant on StringValue that uses StringPiece instead of copying the string
131 // into the Value. This can only be stored in a child of hidden root (above),
132 // otherwise the referenced string will not be guaranteed to outlive it.
133 class JSONStringValue : public Value {
134 public:
JSONStringValue(const StringPiece & piece)135 explicit JSONStringValue(const StringPiece& piece)
136 : Value(TYPE_STRING),
137 string_piece_(piece) {
138 }
139
140 // Overridden from Value:
GetAsString(std::string * out_value) const141 bool GetAsString(std::string* out_value) const override {
142 string_piece_.CopyToString(out_value);
143 return true;
144 }
DeepCopy() const145 Value* DeepCopy() const override {
146 return new StringValue(string_piece_.as_string());
147 }
Equals(const Value * other) const148 bool Equals(const Value* other) const override {
149 std::string other_string;
150 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
151 StringPiece(other_string) == string_piece_;
152 }
153
154 private:
155 // The location in the original input stream.
156 StringPiece string_piece_;
157
158 DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
159 };
160
161 // Simple class that checks for maximum recursion/"stack overflow."
162 class StackMarker {
163 public:
StackMarker(int * depth)164 explicit StackMarker(int* depth) : depth_(depth) {
165 ++(*depth_);
166 DCHECK_LE(*depth_, kStackMaxDepth);
167 }
~StackMarker()168 ~StackMarker() {
169 --(*depth_);
170 }
171
IsTooDeep() const172 bool IsTooDeep() const {
173 return *depth_ >= kStackMaxDepth;
174 }
175
176 private:
177 int* const depth_;
178
179 DISALLOW_COPY_AND_ASSIGN(StackMarker);
180 };
181
182 } // namespace
183
JSONParser(int options)184 JSONParser::JSONParser(int options)
185 : options_(options),
186 start_pos_(NULL),
187 pos_(NULL),
188 end_pos_(NULL),
189 index_(0),
190 stack_depth_(0),
191 line_number_(0),
192 index_last_line_(0),
193 error_code_(JSONReader::JSON_NO_ERROR),
194 error_line_(0),
195 error_column_(0) {
196 }
197
~JSONParser()198 JSONParser::~JSONParser() {
199 }
200
Parse(const StringPiece & input)201 Value* JSONParser::Parse(const StringPiece& input) {
202 scoped_ptr<std::string> input_copy;
203 // If the children of a JSON root can be detached, then hidden roots cannot
204 // be used, so do not bother copying the input because StringPiece will not
205 // be used anywhere.
206 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
207 input_copy.reset(new std::string(input.as_string()));
208 start_pos_ = input_copy->data();
209 } else {
210 start_pos_ = input.data();
211 }
212 pos_ = start_pos_;
213 end_pos_ = start_pos_ + input.length();
214 index_ = 0;
215 line_number_ = 1;
216 index_last_line_ = 0;
217
218 error_code_ = JSONReader::JSON_NO_ERROR;
219 error_line_ = 0;
220 error_column_ = 0;
221
222 // When the input JSON string starts with a UTF-8 Byte-Order-Mark
223 // <0xEF 0xBB 0xBF>, advance the start position to avoid the
224 // ParseNextToken function mis-treating a Unicode BOM as an invalid
225 // character and returning NULL.
226 if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF &&
227 static_cast<uint8_t>(*(pos_ + 1)) == 0xBB &&
228 static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) {
229 NextNChars(3);
230 }
231
232 // Parse the first and any nested tokens.
233 scoped_ptr<Value> root(ParseNextToken());
234 if (!root.get())
235 return NULL;
236
237 // Make sure the input stream is at an end.
238 if (GetNextToken() != T_END_OF_INPUT) {
239 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
240 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
241 return NULL;
242 }
243 }
244
245 // Dictionaries and lists can contain JSONStringValues, so wrap them in a
246 // hidden root.
247 if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
248 if (root->IsType(Value::TYPE_DICTIONARY)) {
249 return new DictionaryHiddenRootValue(input_copy.release(), root.get());
250 } else if (root->IsType(Value::TYPE_LIST)) {
251 return new ListHiddenRootValue(input_copy.release(), root.get());
252 } else if (root->IsType(Value::TYPE_STRING)) {
253 // A string type could be a JSONStringValue, but because there's no
254 // corresponding HiddenRootValue, the memory will be lost. Deep copy to
255 // preserve it.
256 return root->DeepCopy();
257 }
258 }
259
260 // All other values can be returned directly.
261 return root.release();
262 }
263
error_code() const264 JSONReader::JsonParseError JSONParser::error_code() const {
265 return error_code_;
266 }
267
GetErrorMessage() const268 std::string JSONParser::GetErrorMessage() const {
269 return FormatErrorMessage(error_line_, error_column_,
270 JSONReader::ErrorCodeToString(error_code_));
271 }
272
error_line() const273 int JSONParser::error_line() const {
274 return error_line_;
275 }
276
error_column() const277 int JSONParser::error_column() const {
278 return error_column_;
279 }
280
281 // StringBuilder ///////////////////////////////////////////////////////////////
282
StringBuilder()283 JSONParser::StringBuilder::StringBuilder()
284 : pos_(NULL),
285 length_(0),
286 string_(NULL) {
287 }
288
StringBuilder(const char * pos)289 JSONParser::StringBuilder::StringBuilder(const char* pos)
290 : pos_(pos),
291 length_(0),
292 string_(NULL) {
293 }
294
Swap(StringBuilder * other)295 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
296 std::swap(other->string_, string_);
297 std::swap(other->pos_, pos_);
298 std::swap(other->length_, length_);
299 }
300
~StringBuilder()301 JSONParser::StringBuilder::~StringBuilder() {
302 delete string_;
303 }
304
Append(const char & c)305 void JSONParser::StringBuilder::Append(const char& c) {
306 DCHECK_GE(c, 0);
307 DCHECK_LT(c, 128);
308
309 if (string_)
310 string_->push_back(c);
311 else
312 ++length_;
313 }
314
AppendString(const std::string & str)315 void JSONParser::StringBuilder::AppendString(const std::string& str) {
316 DCHECK(string_);
317 string_->append(str);
318 }
319
Convert()320 void JSONParser::StringBuilder::Convert() {
321 if (string_)
322 return;
323 string_ = new std::string(pos_, length_);
324 }
325
CanBeStringPiece() const326 bool JSONParser::StringBuilder::CanBeStringPiece() const {
327 return !string_;
328 }
329
AsStringPiece()330 StringPiece JSONParser::StringBuilder::AsStringPiece() {
331 if (string_)
332 return StringPiece();
333 return StringPiece(pos_, length_);
334 }
335
AsString()336 const std::string& JSONParser::StringBuilder::AsString() {
337 if (!string_)
338 Convert();
339 return *string_;
340 }
341
342 // JSONParser private //////////////////////////////////////////////////////////
343
CanConsume(int length)344 inline bool JSONParser::CanConsume(int length) {
345 return pos_ + length <= end_pos_;
346 }
347
NextChar()348 const char* JSONParser::NextChar() {
349 DCHECK(CanConsume(1));
350 ++index_;
351 ++pos_;
352 return pos_;
353 }
354
NextNChars(int n)355 void JSONParser::NextNChars(int n) {
356 DCHECK(CanConsume(n));
357 index_ += n;
358 pos_ += n;
359 }
360
GetNextToken()361 JSONParser::Token JSONParser::GetNextToken() {
362 EatWhitespaceAndComments();
363 if (!CanConsume(1))
364 return T_END_OF_INPUT;
365
366 switch (*pos_) {
367 case '{':
368 return T_OBJECT_BEGIN;
369 case '}':
370 return T_OBJECT_END;
371 case '[':
372 return T_ARRAY_BEGIN;
373 case ']':
374 return T_ARRAY_END;
375 case '"':
376 return T_STRING;
377 case '0':
378 case '1':
379 case '2':
380 case '3':
381 case '4':
382 case '5':
383 case '6':
384 case '7':
385 case '8':
386 case '9':
387 case '-':
388 return T_NUMBER;
389 case 't':
390 return T_BOOL_TRUE;
391 case 'f':
392 return T_BOOL_FALSE;
393 case 'n':
394 return T_NULL;
395 case ',':
396 return T_LIST_SEPARATOR;
397 case ':':
398 return T_OBJECT_PAIR_SEPARATOR;
399 default:
400 return T_INVALID_TOKEN;
401 }
402 }
403
EatWhitespaceAndComments()404 void JSONParser::EatWhitespaceAndComments() {
405 while (pos_ < end_pos_) {
406 switch (*pos_) {
407 case '\r':
408 case '\n':
409 index_last_line_ = index_;
410 // Don't increment line_number_ twice for "\r\n".
411 if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
412 ++line_number_;
413 // Fall through.
414 case ' ':
415 case '\t':
416 NextChar();
417 break;
418 case '/':
419 if (!EatComment())
420 return;
421 break;
422 default:
423 return;
424 }
425 }
426 }
427
EatComment()428 bool JSONParser::EatComment() {
429 if (*pos_ != '/' || !CanConsume(1))
430 return false;
431
432 char next_char = *NextChar();
433 if (next_char == '/') {
434 // Single line comment, read to newline.
435 while (CanConsume(1)) {
436 next_char = *NextChar();
437 if (next_char == '\n' || next_char == '\r')
438 return true;
439 }
440 } else if (next_char == '*') {
441 char previous_char = '\0';
442 // Block comment, read until end marker.
443 while (CanConsume(1)) {
444 next_char = *NextChar();
445 if (previous_char == '*' && next_char == '/') {
446 // EatWhitespaceAndComments will inspect pos_, which will still be on
447 // the last / of the comment, so advance once more (which may also be
448 // end of input).
449 NextChar();
450 return true;
451 }
452 previous_char = next_char;
453 }
454
455 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
456 }
457
458 return false;
459 }
460
ParseNextToken()461 Value* JSONParser::ParseNextToken() {
462 return ParseToken(GetNextToken());
463 }
464
ParseToken(Token token)465 Value* JSONParser::ParseToken(Token token) {
466 switch (token) {
467 case T_OBJECT_BEGIN:
468 return ConsumeDictionary();
469 case T_ARRAY_BEGIN:
470 return ConsumeList();
471 case T_STRING:
472 return ConsumeString();
473 case T_NUMBER:
474 return ConsumeNumber();
475 case T_BOOL_TRUE:
476 case T_BOOL_FALSE:
477 case T_NULL:
478 return ConsumeLiteral();
479 default:
480 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
481 return NULL;
482 }
483 }
484
ConsumeDictionary()485 Value* JSONParser::ConsumeDictionary() {
486 if (*pos_ != '{') {
487 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
488 return NULL;
489 }
490
491 StackMarker depth_check(&stack_depth_);
492 if (depth_check.IsTooDeep()) {
493 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
494 return NULL;
495 }
496
497 scoped_ptr<DictionaryValue> dict(new DictionaryValue);
498
499 NextChar();
500 Token token = GetNextToken();
501 while (token != T_OBJECT_END) {
502 if (token != T_STRING) {
503 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
504 return NULL;
505 }
506
507 // First consume the key.
508 StringBuilder key;
509 if (!ConsumeStringRaw(&key)) {
510 return NULL;
511 }
512
513 // Read the separator.
514 NextChar();
515 token = GetNextToken();
516 if (token != T_OBJECT_PAIR_SEPARATOR) {
517 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
518 return NULL;
519 }
520
521 // The next token is the value. Ownership transfers to |dict|.
522 NextChar();
523 Value* value = ParseNextToken();
524 if (!value) {
525 // ReportError from deeper level.
526 return NULL;
527 }
528
529 dict->SetWithoutPathExpansion(key.AsString(), value);
530
531 NextChar();
532 token = GetNextToken();
533 if (token == T_LIST_SEPARATOR) {
534 NextChar();
535 token = GetNextToken();
536 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
537 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
538 return NULL;
539 }
540 } else if (token != T_OBJECT_END) {
541 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
542 return NULL;
543 }
544 }
545
546 return dict.release();
547 }
548
ConsumeList()549 Value* JSONParser::ConsumeList() {
550 if (*pos_ != '[') {
551 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
552 return NULL;
553 }
554
555 StackMarker depth_check(&stack_depth_);
556 if (depth_check.IsTooDeep()) {
557 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
558 return NULL;
559 }
560
561 scoped_ptr<ListValue> list(new ListValue);
562
563 NextChar();
564 Token token = GetNextToken();
565 while (token != T_ARRAY_END) {
566 Value* item = ParseToken(token);
567 if (!item) {
568 // ReportError from deeper level.
569 return NULL;
570 }
571
572 list->Append(item);
573
574 NextChar();
575 token = GetNextToken();
576 if (token == T_LIST_SEPARATOR) {
577 NextChar();
578 token = GetNextToken();
579 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
580 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
581 return NULL;
582 }
583 } else if (token != T_ARRAY_END) {
584 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
585 return NULL;
586 }
587 }
588
589 return list.release();
590 }
591
ConsumeString()592 Value* JSONParser::ConsumeString() {
593 StringBuilder string;
594 if (!ConsumeStringRaw(&string))
595 return NULL;
596
597 // Create the Value representation, using a hidden root, if configured
598 // to do so, and if the string can be represented by StringPiece.
599 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
600 return new JSONStringValue(string.AsStringPiece());
601 } else {
602 if (string.CanBeStringPiece())
603 string.Convert();
604 return new StringValue(string.AsString());
605 }
606 }
607
ConsumeStringRaw(StringBuilder * out)608 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
609 if (*pos_ != '"') {
610 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
611 return false;
612 }
613
614 // StringBuilder will internally build a StringPiece unless a UTF-16
615 // conversion occurs, at which point it will perform a copy into a
616 // std::string.
617 StringBuilder string(NextChar());
618
619 int length = end_pos_ - start_pos_;
620 int32_t next_char = 0;
621
622 while (CanConsume(1)) {
623 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement.
624 CBU8_NEXT(start_pos_, index_, length, next_char);
625 if (next_char < 0 || !IsValidCharacter(next_char)) {
626 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
627 return false;
628 }
629
630 // If this character is an escape sequence...
631 if (next_char == '\\') {
632 // The input string will be adjusted (either by combining the two
633 // characters of an encoded escape sequence, or with a UTF conversion),
634 // so using StringPiece isn't possible -- force a conversion.
635 string.Convert();
636
637 if (!CanConsume(1)) {
638 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
639 return false;
640 }
641
642 switch (*NextChar()) {
643 // Allowed esape sequences:
644 case 'x': { // UTF-8 sequence.
645 // UTF-8 \x escape sequences are not allowed in the spec, but they
646 // are supported here for backwards-compatiblity with the old parser.
647 if (!CanConsume(2)) {
648 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
649 return false;
650 }
651
652 int hex_digit = 0;
653 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
654 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
655 return false;
656 }
657 NextChar();
658
659 if (hex_digit < kExtendedASCIIStart)
660 string.Append(static_cast<char>(hex_digit));
661 else
662 DecodeUTF8(hex_digit, &string);
663 break;
664 }
665 case 'u': { // UTF-16 sequence.
666 // UTF units are of the form \uXXXX.
667 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits.
668 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
669 return false;
670 }
671
672 // Skip the 'u'.
673 NextChar();
674
675 std::string utf8_units;
676 if (!DecodeUTF16(&utf8_units)) {
677 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
678 return false;
679 }
680
681 string.AppendString(utf8_units);
682 break;
683 }
684 case '"':
685 string.Append('"');
686 break;
687 case '\\':
688 string.Append('\\');
689 break;
690 case '/':
691 string.Append('/');
692 break;
693 case 'b':
694 string.Append('\b');
695 break;
696 case 'f':
697 string.Append('\f');
698 break;
699 case 'n':
700 string.Append('\n');
701 break;
702 case 'r':
703 string.Append('\r');
704 break;
705 case 't':
706 string.Append('\t');
707 break;
708 case 'v': // Not listed as valid escape sequence in the RFC.
709 string.Append('\v');
710 break;
711 // All other escape squences are illegal.
712 default:
713 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
714 return false;
715 }
716 } else if (next_char == '"') {
717 --index_; // Rewind by one because of CBU8_NEXT.
718 out->Swap(&string);
719 return true;
720 } else {
721 if (next_char < kExtendedASCIIStart)
722 string.Append(static_cast<char>(next_char));
723 else
724 DecodeUTF8(next_char, &string);
725 }
726 }
727
728 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
729 return false;
730 }
731
732 // Entry is at the first X in \uXXXX.
DecodeUTF16(std::string * dest_string)733 bool JSONParser::DecodeUTF16(std::string* dest_string) {
734 if (!CanConsume(4))
735 return false;
736
737 // This is a 32-bit field because the shift operations in the
738 // conversion process below cause MSVC to error about "data loss."
739 // This only stores UTF-16 code units, though.
740 // Consume the UTF-16 code unit, which may be a high surrogate.
741 int code_unit16_high = 0;
742 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
743 return false;
744
745 // Only add 3, not 4, because at the end of this iteration, the parser has
746 // finished working with the last digit of the UTF sequence, meaning that
747 // the next iteration will advance to the next byte.
748 NextNChars(3);
749
750 // Used to convert the UTF-16 code units to a code point and then to a UTF-8
751 // code unit sequence.
752 char code_unit8[8] = { 0 };
753 size_t offset = 0;
754
755 // If this is a high surrogate, consume the next code unit to get the
756 // low surrogate.
757 if (CBU16_IS_SURROGATE(code_unit16_high)) {
758 // Make sure this is the high surrogate. If not, it's an encoding
759 // error.
760 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
761 return false;
762
763 // Make sure that the token has more characters to consume the
764 // lower surrogate.
765 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits.
766 return false;
767 if (*NextChar() != '\\' || *NextChar() != 'u')
768 return false;
769
770 NextChar(); // Read past 'u'.
771 int code_unit16_low = 0;
772 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
773 return false;
774
775 NextNChars(3);
776
777 if (!CBU16_IS_TRAIL(code_unit16_low)) {
778 return false;
779 }
780
781 uint32_t code_point =
782 CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
783 if (!IsValidCharacter(code_point))
784 return false;
785
786 offset = 0;
787 CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
788 } else {
789 // Not a surrogate.
790 DCHECK(CBU16_IS_SINGLE(code_unit16_high));
791 if (!IsValidCharacter(code_unit16_high))
792 return false;
793
794 CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
795 }
796
797 dest_string->append(code_unit8);
798 return true;
799 }
800
DecodeUTF8(const int32_t & point,StringBuilder * dest)801 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) {
802 DCHECK(IsValidCharacter(point));
803
804 // Anything outside of the basic ASCII plane will need to be decoded from
805 // int32_t to a multi-byte sequence.
806 if (point < kExtendedASCIIStart) {
807 dest->Append(static_cast<char>(point));
808 } else {
809 char utf8_units[4] = { 0 };
810 int offset = 0;
811 CBU8_APPEND_UNSAFE(utf8_units, offset, point);
812 dest->Convert();
813 // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
814 // zero terminated at this point. |offset| contains the correct length.
815 dest->AppendString(std::string(utf8_units, offset));
816 }
817 }
818
ConsumeNumber()819 Value* JSONParser::ConsumeNumber() {
820 const char* num_start = pos_;
821 const int start_index = index_;
822 int end_index = start_index;
823
824 if (*pos_ == '-')
825 NextChar();
826
827 if (!ReadInt(false)) {
828 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
829 return NULL;
830 }
831 end_index = index_;
832
833 // The optional fraction part.
834 if (*pos_ == '.') {
835 if (!CanConsume(1)) {
836 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
837 return NULL;
838 }
839 NextChar();
840 if (!ReadInt(true)) {
841 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
842 return NULL;
843 }
844 end_index = index_;
845 }
846
847 // Optional exponent part.
848 if (*pos_ == 'e' || *pos_ == 'E') {
849 NextChar();
850 if (*pos_ == '-' || *pos_ == '+')
851 NextChar();
852 if (!ReadInt(true)) {
853 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
854 return NULL;
855 }
856 end_index = index_;
857 }
858
859 // ReadInt is greedy because numbers have no easily detectable sentinel,
860 // so save off where the parser should be on exit (see Consume invariant at
861 // the top of the header), then make sure the next token is one which is
862 // valid.
863 const char* exit_pos = pos_ - 1;
864 int exit_index = index_ - 1;
865
866 switch (GetNextToken()) {
867 case T_OBJECT_END:
868 case T_ARRAY_END:
869 case T_LIST_SEPARATOR:
870 case T_END_OF_INPUT:
871 break;
872 default:
873 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
874 return NULL;
875 }
876
877 pos_ = exit_pos;
878 index_ = exit_index;
879
880 StringPiece num_string(num_start, end_index - start_index);
881
882 int num_int;
883 if (StringToInt(num_string, &num_int))
884 return new FundamentalValue(num_int);
885
886 double num_double;
887 if (StringToDouble(num_string.as_string(), &num_double) &&
888 std::isfinite(num_double)) {
889 return new FundamentalValue(num_double);
890 }
891
892 return NULL;
893 }
894
ReadInt(bool allow_leading_zeros)895 bool JSONParser::ReadInt(bool allow_leading_zeros) {
896 char first = *pos_;
897 int len = 0;
898
899 char c = first;
900 while (CanConsume(1) && std::isdigit(c)) {
901 c = *NextChar();
902 ++len;
903 }
904
905 if (len == 0)
906 return false;
907
908 if (!allow_leading_zeros && len > 1 && first == '0')
909 return false;
910
911 return true;
912 }
913
ConsumeLiteral()914 Value* JSONParser::ConsumeLiteral() {
915 switch (*pos_) {
916 case 't': {
917 const char kTrueLiteral[] = "true";
918 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
919 if (!CanConsume(kTrueLen - 1) ||
920 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
921 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
922 return NULL;
923 }
924 NextNChars(kTrueLen - 1);
925 return new FundamentalValue(true);
926 }
927 case 'f': {
928 const char kFalseLiteral[] = "false";
929 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
930 if (!CanConsume(kFalseLen - 1) ||
931 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
932 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
933 return NULL;
934 }
935 NextNChars(kFalseLen - 1);
936 return new FundamentalValue(false);
937 }
938 case 'n': {
939 const char kNullLiteral[] = "null";
940 const int kNullLen = static_cast<int>(strlen(kNullLiteral));
941 if (!CanConsume(kNullLen - 1) ||
942 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
943 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
944 return NULL;
945 }
946 NextNChars(kNullLen - 1);
947 return Value::CreateNullValue().release();
948 }
949 default:
950 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
951 return NULL;
952 }
953 }
954
955 // static
StringsAreEqual(const char * one,const char * two,size_t len)956 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
957 return strncmp(one, two, len) == 0;
958 }
959
ReportError(JSONReader::JsonParseError code,int column_adjust)960 void JSONParser::ReportError(JSONReader::JsonParseError code,
961 int column_adjust) {
962 error_code_ = code;
963 error_line_ = line_number_;
964 error_column_ = index_ - index_last_line_ + column_adjust;
965 }
966
967 // static
FormatErrorMessage(int line,int column,const std::string & description)968 std::string JSONParser::FormatErrorMessage(int line, int column,
969 const std::string& description) {
970 if (line || column) {
971 return StringPrintf("Line: %i, column: %i, %s",
972 line, column, description.c_str());
973 }
974 return description;
975 }
976
977 } // namespace internal
978 } // namespace base
979