1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #ifndef RAPIDJSON_READER_H_
16 #define RAPIDJSON_READER_H_
17
18 /*! \file reader.h */
19
20 #include "rapidjson.h"
21 #include "encodings.h"
22 #include "internal/meta.h"
23 #include "internal/stack.h"
24 #include "internal/strtod.h"
25
26 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
27 #include <intrin.h>
28 #pragma intrinsic(_BitScanForward)
29 #endif
30 #ifdef RAPIDJSON_SSE42
31 #include <nmmintrin.h>
32 #elif defined(RAPIDJSON_SSE2)
33 #include <emmintrin.h>
34 #endif
35
36 #ifdef _MSC_VER
37 RAPIDJSON_DIAG_PUSH
38 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
39 RAPIDJSON_DIAG_OFF(4702) // unreachable code
40 #endif
41
42 #ifdef __GNUC__
43 RAPIDJSON_DIAG_PUSH
44 RAPIDJSON_DIAG_OFF(effc++)
45 #endif
46
47 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
48 #define RAPIDJSON_NOTHING /* deliberately empty */
49 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
50 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
51 RAPIDJSON_MULTILINEMACRO_BEGIN \
52 if (HasParseError()) { return value; } \
53 RAPIDJSON_MULTILINEMACRO_END
54 #endif
55 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
56 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
57 //!@endcond
58
59 /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
60 \ingroup RAPIDJSON_ERRORS
61 \brief Macro to indicate a parse error.
62 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
63 \param offset position of the error in JSON input (\c size_t)
64
65 This macros can be used as a customization point for the internal
66 error handling mechanism of RapidJSON.
67
68 A common usage model is to throw an exception instead of requiring the
69 caller to explicitly check the \ref rapidjson::GenericReader::Parse's
70 return value:
71
72 \code
73 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
74 throw ParseException(parseErrorCode, #parseErrorCode, offset)
75
76 #include <stdexcept> // std::runtime_error
77 #include "rapidjson/error/error.h" // rapidjson::ParseResult
78
79 struct ParseException : std::runtime_error, rapidjson::ParseResult {
80 ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
81 : std::runtime_error(msg), ParseResult(code, offset) {}
82 };
83
84 #include "rapidjson/reader.h"
85 \endcode
86
87 \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
88 */
89 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
90 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
91 RAPIDJSON_MULTILINEMACRO_BEGIN \
92 RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
93 SetParseError(parseErrorCode, offset); \
94 RAPIDJSON_MULTILINEMACRO_END
95 #endif
96
97 /*! \def RAPIDJSON_PARSE_ERROR
98 \ingroup RAPIDJSON_ERRORS
99 \brief (Internal) macro to indicate and handle a parse error.
100 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
101 \param offset position of the error in JSON input (\c size_t)
102
103 Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
104
105 \see RAPIDJSON_PARSE_ERROR_NORETURN
106 \hideinitializer
107 */
108 #ifndef RAPIDJSON_PARSE_ERROR
109 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
110 RAPIDJSON_MULTILINEMACRO_BEGIN \
111 RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
112 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
113 RAPIDJSON_MULTILINEMACRO_END
114 #endif
115
116 #include "error/error.h" // ParseErrorCode, ParseResult
117
118 RAPIDJSON_NAMESPACE_BEGIN
119
120 ///////////////////////////////////////////////////////////////////////////////
121 // ParseFlag
122
123 /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
124 \ingroup RAPIDJSON_CONFIG
125 \brief User-defined kParseDefaultFlags definition.
126
127 User can define this as any \c ParseFlag combinations.
128 */
129 #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
130 #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
131 #endif
132
133 //! Combination of parseFlags
134 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
135 */
136 enum ParseFlag {
137 kParseNoFlags = 0, //!< No flags are set.
138 kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
139 kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
140 kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
141 kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
142 kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower).
143 kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
144 };
145
146 ///////////////////////////////////////////////////////////////////////////////
147 // Handler
148
149 /*! \class rapidjson::Handler
150 \brief Concept for receiving events from GenericReader upon parsing.
151 The functions return true if no error occurs. If they return false,
152 the event publisher should terminate the process.
153 \code
154 concept Handler {
155 typename Ch;
156
157 bool Null();
158 bool Bool(bool b);
159 bool Int(int i);
160 bool Uint(unsigned i);
161 bool Int64(int64_t i);
162 bool Uint64(uint64_t i);
163 bool Double(double d);
164 bool String(const Ch* str, SizeType length, bool copy);
165 bool StartObject();
166 bool Key(const Ch* str, SizeType length, bool copy);
167 bool EndObject(SizeType memberCount);
168 bool StartArray();
169 bool EndArray(SizeType elementCount);
170 };
171 \endcode
172 */
173 ///////////////////////////////////////////////////////////////////////////////
174 // BaseReaderHandler
175
176 //! Default implementation of Handler.
177 /*! This can be used as base class of any reader handler.
178 \note implements Handler concept
179 */
180 template<typename Encoding = UTF8<>, typename Derived = void>
181 struct BaseReaderHandler {
182 typedef typename Encoding::Ch Ch;
183
184 typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
185
DefaultBaseReaderHandler186 bool Default() { return true; }
NullBaseReaderHandler187 bool Null() { return static_cast<Override&>(*this).Default(); }
BoolBaseReaderHandler188 bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
IntBaseReaderHandler189 bool Int(int) { return static_cast<Override&>(*this).Default(); }
UintBaseReaderHandler190 bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
Int64BaseReaderHandler191 bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
Uint64BaseReaderHandler192 bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
DoubleBaseReaderHandler193 bool Double(double) { return static_cast<Override&>(*this).Default(); }
StringBaseReaderHandler194 bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
StartObjectBaseReaderHandler195 bool StartObject() { return static_cast<Override&>(*this).Default(); }
KeyBaseReaderHandler196 bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
EndObjectBaseReaderHandler197 bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
StartArrayBaseReaderHandler198 bool StartArray() { return static_cast<Override&>(*this).Default(); }
EndArrayBaseReaderHandler199 bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
200 };
201
202 ///////////////////////////////////////////////////////////////////////////////
203 // StreamLocalCopy
204
205 namespace internal {
206
207 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
208 class StreamLocalCopy;
209
210 //! Do copy optimization.
211 template<typename Stream>
212 class StreamLocalCopy<Stream, 1> {
213 public:
StreamLocalCopy(Stream & original)214 StreamLocalCopy(Stream& original) : s(original), original_(original) {}
~StreamLocalCopy()215 ~StreamLocalCopy() { original_ = s; }
216
217 Stream s;
218
219 private:
220 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
221
222 Stream& original_;
223 };
224
225 //! Keep reference.
226 template<typename Stream>
227 class StreamLocalCopy<Stream, 0> {
228 public:
StreamLocalCopy(Stream & original)229 StreamLocalCopy(Stream& original) : s(original) {}
230
231 Stream& s;
232
233 private:
234 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
235 };
236
237 } // namespace internal
238
239 ///////////////////////////////////////////////////////////////////////////////
240 // SkipWhitespace
241
242 //! Skip the JSON white spaces in a stream.
243 /*! \param is A input stream for skipping white spaces.
244 \note This function has SSE2/SSE4.2 specialization.
245 */
246 template<typename InputStream>
SkipWhitespace(InputStream & is)247 void SkipWhitespace(InputStream& is) {
248 internal::StreamLocalCopy<InputStream> copy(is);
249 InputStream& s(copy.s);
250
251 while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
252 s.Take();
253 }
254
255 #ifdef RAPIDJSON_SSE42
256 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)257 inline const char *SkipWhitespace_SIMD(const char* p) {
258 // Fast return for single non-whitespace
259 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
260 ++p;
261 else
262 return p;
263
264 // 16-byte align to the next boundary
265 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & ~15);
266 while (p != nextAligned)
267 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
268 ++p;
269 else
270 return p;
271
272 // The rest of string using SIMD
273 static const char whitespace[16] = " \n\r\t";
274 const __m128i w = _mm_loadu_si128((const __m128i *)&whitespace[0]);
275
276 for (;; p += 16) {
277 const __m128i s = _mm_load_si128((const __m128i *)p);
278 const unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
279 if (r != 0) { // some of characters is non-whitespace
280 #ifdef _MSC_VER // Find the index of first non-whitespace
281 unsigned long offset;
282 _BitScanForward(&offset, r);
283 return p + offset;
284 #else
285 return p + __builtin_ffs(r) - 1;
286 #endif
287 }
288 }
289 }
290
291 #elif defined(RAPIDJSON_SSE2)
292
293 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)294 inline const char *SkipWhitespace_SIMD(const char* p) {
295 // Fast return for single non-whitespace
296 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
297 ++p;
298 else
299 return p;
300
301 // 16-byte align to the next boundary
302 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & ~15);
303 while (p != nextAligned)
304 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
305 ++p;
306 else
307 return p;
308
309 // The rest of string
310 static const char whitespaces[4][17] = {
311 " ",
312 "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
313 "\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r",
314 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"};
315
316 const __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]);
317 const __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]);
318 const __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]);
319 const __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]);
320
321 for (;; p += 16) {
322 const __m128i s = _mm_load_si128((const __m128i *)p);
323 __m128i x = _mm_cmpeq_epi8(s, w0);
324 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
325 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
326 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
327 unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
328 if (r != 0) { // some of characters may be non-whitespace
329 #ifdef _MSC_VER // Find the index of first non-whitespace
330 unsigned long offset;
331 _BitScanForward(&offset, r);
332 return p + offset;
333 #else
334 return p + __builtin_ffs(r) - 1;
335 #endif
336 }
337 }
338 }
339
340 #endif // RAPIDJSON_SSE2
341
342 #ifdef RAPIDJSON_SIMD
343 //! Template function specialization for InsituStringStream
SkipWhitespace(InsituStringStream & is)344 template<> inline void SkipWhitespace(InsituStringStream& is) {
345 is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
346 }
347
348 //! Template function specialization for StringStream
SkipWhitespace(StringStream & is)349 template<> inline void SkipWhitespace(StringStream& is) {
350 is.src_ = SkipWhitespace_SIMD(is.src_);
351 }
352 #endif // RAPIDJSON_SIMD
353
354 ///////////////////////////////////////////////////////////////////////////////
355 // GenericReader
356
357 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
358 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
359 object implementing Handler concept.
360
361 It needs to allocate a stack for storing a single decoded string during
362 non-destructive parsing.
363
364 For in-situ parsing, the decoded string is directly written to the source
365 text string, no temporary buffer is required.
366
367 A GenericReader object can be reused for parsing multiple JSON text.
368
369 \tparam SourceEncoding Encoding of the input stream.
370 \tparam TargetEncoding Encoding of the parse output.
371 \tparam StackAllocator Allocator type for stack.
372 */
373 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
374 class GenericReader {
375 public:
376 typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
377
378 //! Constructor.
379 /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
380 \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
381 */
stack_(stackAllocator,stackCapacity)382 GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {}
383
384 //! Parse JSON text.
385 /*! \tparam parseFlags Combination of \ref ParseFlag.
386 \tparam InputStream Type of input stream, implementing Stream concept.
387 \tparam Handler Type of handler, implementing Handler concept.
388 \param is Input stream to be parsed.
389 \param handler The handler to receive events.
390 \return Whether the parsing is successful.
391 */
392 template <unsigned parseFlags, typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)393 ParseResult Parse(InputStream& is, Handler& handler) {
394 if (parseFlags & kParseIterativeFlag)
395 return IterativeParse<parseFlags>(is, handler);
396
397 parseResult_.Clear();
398
399 ClearStackOnExit scope(*this);
400
401 SkipWhitespace(is);
402
403 if (is.Peek() == '\0') {
404 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
405 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
406 }
407 else {
408 ParseValue<parseFlags>(is, handler);
409 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
410
411 if (!(parseFlags & kParseStopWhenDoneFlag)) {
412 SkipWhitespace(is);
413
414 if (is.Peek() != '\0') {
415 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
416 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
417 }
418 }
419 }
420
421 return parseResult_;
422 }
423
424 //! Parse JSON text (with \ref kParseDefaultFlags)
425 /*! \tparam InputStream Type of input stream, implementing Stream concept
426 \tparam Handler Type of handler, implementing Handler concept.
427 \param is Input stream to be parsed.
428 \param handler The handler to receive events.
429 \return Whether the parsing is successful.
430 */
431 template <typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)432 ParseResult Parse(InputStream& is, Handler& handler) {
433 return Parse<kParseDefaultFlags>(is, handler);
434 }
435
436 //! Whether a parse error has occured in the last parsing.
HasParseError()437 bool HasParseError() const { return parseResult_.IsError(); }
438
439 //! Get the \ref ParseErrorCode of last parsing.
GetParseErrorCode()440 ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
441
442 //! Get the position of last parsing error in input, 0 otherwise.
GetErrorOffset()443 size_t GetErrorOffset() const { return parseResult_.Offset(); }
444
445 protected:
SetParseError(ParseErrorCode code,size_t offset)446 void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
447
448 private:
449 // Prohibit copy constructor & assignment operator.
450 GenericReader(const GenericReader&);
451 GenericReader& operator=(const GenericReader&);
452
ClearStack()453 void ClearStack() { stack_.Clear(); }
454
455 // clear stack on any exit from ParseStream, e.g. due to exception
456 struct ClearStackOnExit {
ClearStackOnExitClearStackOnExit457 explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
~ClearStackOnExitClearStackOnExit458 ~ClearStackOnExit() { r_.ClearStack(); }
459 private:
460 GenericReader& r_;
461 ClearStackOnExit(const ClearStackOnExit&);
462 ClearStackOnExit& operator=(const ClearStackOnExit&);
463 };
464
465 // Parse object: { string : value, ... }
466 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseObject(InputStream & is,Handler & handler)467 void ParseObject(InputStream& is, Handler& handler) {
468 RAPIDJSON_ASSERT(is.Peek() == '{');
469 is.Take(); // Skip '{'
470
471 if (!handler.StartObject())
472 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
473
474 SkipWhitespace(is);
475
476 if (is.Peek() == '}') {
477 is.Take();
478 if (!handler.EndObject(0)) // empty object
479 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
480 return;
481 }
482
483 for (SizeType memberCount = 0;;) {
484 if (is.Peek() != '"')
485 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
486
487 ParseString<parseFlags>(is, handler, true);
488 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
489
490 SkipWhitespace(is);
491
492 if (is.Take() != ':')
493 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
494
495 SkipWhitespace(is);
496
497 ParseValue<parseFlags>(is, handler);
498 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
499
500 SkipWhitespace(is);
501
502 ++memberCount;
503
504 switch (is.Take()) {
505 case ',': SkipWhitespace(is); break;
506 case '}':
507 if (!handler.EndObject(memberCount))
508 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
509 return;
510 default: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
511 }
512 }
513 }
514
515 // Parse array: [ value, ... ]
516 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseArray(InputStream & is,Handler & handler)517 void ParseArray(InputStream& is, Handler& handler) {
518 RAPIDJSON_ASSERT(is.Peek() == '[');
519 is.Take(); // Skip '['
520
521 if (!handler.StartArray())
522 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
523
524 SkipWhitespace(is);
525
526 if (is.Peek() == ']') {
527 is.Take();
528 if (!handler.EndArray(0)) // empty array
529 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
530 return;
531 }
532
533 for (SizeType elementCount = 0;;) {
534 ParseValue<parseFlags>(is, handler);
535 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
536
537 ++elementCount;
538 SkipWhitespace(is);
539
540 switch (is.Take()) {
541 case ',': SkipWhitespace(is); break;
542 case ']':
543 if (!handler.EndArray(elementCount))
544 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
545 return;
546 default: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
547 }
548 }
549 }
550
551 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNull(InputStream & is,Handler & handler)552 void ParseNull(InputStream& is, Handler& handler) {
553 RAPIDJSON_ASSERT(is.Peek() == 'n');
554 is.Take();
555
556 if (is.Take() == 'u' && is.Take() == 'l' && is.Take() == 'l') {
557 if (!handler.Null())
558 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
559 }
560 else
561 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
562 }
563
564 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseTrue(InputStream & is,Handler & handler)565 void ParseTrue(InputStream& is, Handler& handler) {
566 RAPIDJSON_ASSERT(is.Peek() == 't');
567 is.Take();
568
569 if (is.Take() == 'r' && is.Take() == 'u' && is.Take() == 'e') {
570 if (!handler.Bool(true))
571 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
572 }
573 else
574 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
575 }
576
577 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseFalse(InputStream & is,Handler & handler)578 void ParseFalse(InputStream& is, Handler& handler) {
579 RAPIDJSON_ASSERT(is.Peek() == 'f');
580 is.Take();
581
582 if (is.Take() == 'a' && is.Take() == 'l' && is.Take() == 's' && is.Take() == 'e') {
583 if (!handler.Bool(false))
584 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
585 }
586 else
587 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
588 }
589
590 // Helper function to parse four hexidecimal digits in \uXXXX in ParseString().
591 template<typename InputStream>
ParseHex4(InputStream & is)592 unsigned ParseHex4(InputStream& is) {
593 unsigned codepoint = 0;
594 for (int i = 0; i < 4; i++) {
595 Ch c = is.Take();
596 codepoint <<= 4;
597 codepoint += static_cast<unsigned>(c);
598 if (c >= '0' && c <= '9')
599 codepoint -= '0';
600 else if (c >= 'A' && c <= 'F')
601 codepoint -= 'A' - 10;
602 else if (c >= 'a' && c <= 'f')
603 codepoint -= 'a' - 10;
604 else {
605 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, is.Tell() - 1);
606 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
607 }
608 }
609 return codepoint;
610 }
611
612 template <typename CharType>
613 class StackStream {
614 public:
615 typedef CharType Ch;
616
StackStream(internal::Stack<StackAllocator> & stack)617 StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
Put(Ch c)618 RAPIDJSON_FORCEINLINE void Put(Ch c) {
619 *stack_.template Push<Ch>() = c;
620 ++length_;
621 }
Length()622 size_t Length() const { return length_; }
Pop()623 Ch* Pop() {
624 return stack_.template Pop<Ch>(length_);
625 }
626
627 private:
628 StackStream(const StackStream&);
629 StackStream& operator=(const StackStream&);
630
631 internal::Stack<StackAllocator>& stack_;
632 SizeType length_;
633 };
634
635 // Parse string and generate String event. Different code paths for kParseInsituFlag.
636 template<unsigned parseFlags, typename InputStream, typename Handler>
637 void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
638 internal::StreamLocalCopy<InputStream> copy(is);
639 InputStream& s(copy.s);
640
641 bool success = false;
642 if (parseFlags & kParseInsituFlag) {
643 typename InputStream::Ch *head = s.PutBegin();
644 ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
645 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
646 size_t length = s.PutEnd(head) - 1;
647 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
648 const typename TargetEncoding::Ch* const str = (typename TargetEncoding::Ch*)head;
649 success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
650 }
651 else {
652 StackStream<typename TargetEncoding::Ch> stackStream(stack_);
653 ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
654 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
655 SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
656 const typename TargetEncoding::Ch* const str = stackStream.Pop();
657 success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
658 }
659 if (!success)
660 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
661 }
662
663 // Parse string to an output is
664 // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
665 template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
ParseStringToStream(InputStream & is,OutputStream & os)666 RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
667 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
668 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
669 static const char escape[256] = {
670 Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
671 Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
672 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
673 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
674 Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
675 };
676 #undef Z16
677 //!@endcond
678
679 RAPIDJSON_ASSERT(is.Peek() == '\"');
680 is.Take(); // Skip '\"'
681
682 for (;;) {
683 Ch c = is.Peek();
684 if (c == '\\') { // Escape
685 is.Take();
686 Ch e = is.Take();
687 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) {
688 os.Put(escape[(unsigned char)e]);
689 }
690 else if (e == 'u') { // Unicode
691 unsigned codepoint = ParseHex4(is);
692 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
693 if (codepoint >= 0xD800 && codepoint <= 0xDBFF) {
694 // Handle UTF-16 surrogate pair
695 if (is.Take() != '\\' || is.Take() != 'u')
696 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2);
697 unsigned codepoint2 = ParseHex4(is);
698 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
699 if (codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)
700 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2);
701 codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
702 }
703 TEncoding::Encode(os, codepoint);
704 }
705 else
706 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1);
707 }
708 else if (c == '"') { // Closing double quote
709 is.Take();
710 os.Put('\0'); // null-terminate the string
711 return;
712 }
713 else if (c == '\0')
714 RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell() - 1);
715 else if ((unsigned)c < 0x20) // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
716 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1);
717 else {
718 if (parseFlags & kParseValidateEncodingFlag ?
719 !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
720 !Transcoder<SEncoding, TEncoding>::Transcode(is, os))
721 RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
722 }
723 }
724 }
725
726 template<typename InputStream, bool backup>
727 class NumberStream;
728
729 template<typename InputStream>
730 class NumberStream<InputStream, false> {
731 public:
NumberStream(GenericReader & reader,InputStream & s)732 NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
~NumberStream()733 ~NumberStream() {}
734
Peek()735 RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
TakePush()736 RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
Take()737 RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
Tell()738 size_t Tell() { return is.Tell(); }
Length()739 size_t Length() { return 0; }
Pop()740 const char* Pop() { return 0; }
741
742 protected:
743 NumberStream& operator=(const NumberStream&);
744
745 InputStream& is;
746 };
747
748 template<typename InputStream>
749 class NumberStream<InputStream, true> : public NumberStream<InputStream, false> {
750 typedef NumberStream<InputStream, false> Base;
751 public:
NumberStream(GenericReader & reader,InputStream & is)752 NumberStream(GenericReader& reader, InputStream& is) : NumberStream<InputStream, false>(reader, is), stackStream(reader.stack_) {}
~NumberStream()753 ~NumberStream() {}
754
TakePush()755 RAPIDJSON_FORCEINLINE Ch TakePush() {
756 stackStream.Put((char)Base::is.Peek());
757 return Base::is.Take();
758 }
759
Length()760 size_t Length() { return stackStream.Length(); }
761
Pop()762 const char* Pop() {
763 stackStream.Put('\0');
764 return stackStream.Pop();
765 }
766
767 private:
768 StackStream<char> stackStream;
769 };
770
771 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNumber(InputStream & is,Handler & handler)772 void ParseNumber(InputStream& is, Handler& handler) {
773 internal::StreamLocalCopy<InputStream> copy(is);
774 NumberStream<InputStream, (parseFlags & kParseFullPrecisionFlag) != 0> s(*this, copy.s);
775
776 // Parse minus
777 bool minus = false;
778 if (s.Peek() == '-') {
779 minus = true;
780 s.Take();
781 }
782
783 // Parse int: zero / ( digit1-9 *DIGIT )
784 unsigned i = 0;
785 uint64_t i64 = 0;
786 bool use64bit = false;
787 int significandDigit = 0;
788 if (s.Peek() == '0') {
789 i = 0;
790 s.TakePush();
791 }
792 else if (s.Peek() >= '1' && s.Peek() <= '9') {
793 i = static_cast<unsigned>(s.TakePush() - '0');
794
795 if (minus)
796 while (s.Peek() >= '0' && s.Peek() <= '9') {
797 if (i >= 214748364) { // 2^31 = 2147483648
798 if (i != 214748364 || s.Peek() > '8') {
799 i64 = i;
800 use64bit = true;
801 break;
802 }
803 }
804 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
805 significandDigit++;
806 }
807 else
808 while (s.Peek() >= '0' && s.Peek() <= '9') {
809 if (i >= 429496729) { // 2^32 - 1 = 4294967295
810 if (i != 429496729 || s.Peek() > '5') {
811 i64 = i;
812 use64bit = true;
813 break;
814 }
815 }
816 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
817 significandDigit++;
818 }
819 }
820 else
821 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
822
823 // Parse 64bit int
824 bool useDouble = false;
825 double d = 0.0;
826 if (use64bit) {
827 if (minus)
828 while (s.Peek() >= '0' && s.Peek() <= '9') {
829 if (i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC)) // 2^63 = 9223372036854775808
830 if (i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8') {
831 d = i64;
832 useDouble = true;
833 break;
834 }
835 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
836 significandDigit++;
837 }
838 else
839 while (s.Peek() >= '0' && s.Peek() <= '9') {
840 if (i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999)) // 2^64 - 1 = 18446744073709551615
841 if (i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5') {
842 d = i64;
843 useDouble = true;
844 break;
845 }
846 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
847 significandDigit++;
848 }
849 }
850
851 // Force double for big integer
852 if (useDouble) {
853 while (s.Peek() >= '0' && s.Peek() <= '9') {
854 if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0
855 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
856 d = d * 10 + (s.TakePush() - '0');
857 }
858 }
859
860 // Parse frac = decimal-point 1*DIGIT
861 int expFrac = 0;
862 size_t decimalPosition;
863 if (s.Peek() == '.') {
864 s.Take();
865 decimalPosition = s.Length();
866
867 if (!(s.Peek() >= '0' && s.Peek() <= '9'))
868 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
869
870 if (!useDouble) {
871 #if RAPIDJSON_64BIT
872 // Use i64 to store significand in 64-bit architecture
873 if (!use64bit)
874 i64 = i;
875
876 while (s.Peek() >= '0' && s.Peek() <= '9') {
877 if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
878 break;
879 else {
880 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
881 --expFrac;
882 if (i64 != 0)
883 significandDigit++;
884 }
885 }
886
887 d = (double)i64;
888 #else
889 // Use double to store significand in 32-bit architecture
890 d = use64bit ? (double)i64 : (double)i;
891 #endif
892 useDouble = true;
893 }
894
895 while (s.Peek() >= '0' && s.Peek() <= '9') {
896 if (significandDigit < 17) {
897 d = d * 10.0 + (s.TakePush() - '0');
898 --expFrac;
899 if (d > 0.0)
900 significandDigit++;
901 }
902 else
903 s.TakePush();
904 }
905 }
906 else
907 decimalPosition = s.Length(); // decimal position at the end of integer.
908
909 // Parse exp = e [ minus / plus ] 1*DIGIT
910 int exp = 0;
911 if (s.Peek() == 'e' || s.Peek() == 'E') {
912 if (!useDouble) {
913 d = use64bit ? i64 : i;
914 useDouble = true;
915 }
916 s.Take();
917
918 bool expMinus = false;
919 if (s.Peek() == '+')
920 s.Take();
921 else if (s.Peek() == '-') {
922 s.Take();
923 expMinus = true;
924 }
925
926 if (s.Peek() >= '0' && s.Peek() <= '9') {
927 exp = s.Take() - '0';
928 if (expMinus) {
929 while (s.Peek() >= '0' && s.Peek() <= '9') {
930 exp = exp * 10 + (s.Take() - '0');
931 if (exp >= 214748364) { // Issue #313: prevent overflow exponent
932 while (s.Peek() >= '0' && s.Peek() <= '9') // Consume the rest of exponent
933 s.Take();
934 }
935 }
936 }
937 else { // positive exp
938 int maxExp = 308 - expFrac;
939 while (s.Peek() >= '0' && s.Peek() <= '9') {
940 exp = exp * 10 + (s.Take() - '0');
941 if (exp > maxExp)
942 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
943 }
944 }
945 }
946 else
947 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
948
949 if (expMinus)
950 exp = -exp;
951 }
952
953 // Finish parsing, call event according to the type of number.
954 bool cont = true;
955 size_t length = s.Length();
956 const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
957
958 if (useDouble) {
959 int p = exp + expFrac;
960 if (parseFlags & kParseFullPrecisionFlag)
961 d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
962 else
963 d = internal::StrtodNormalPrecision(d, p);
964
965 cont = handler.Double(minus ? -d : d);
966 }
967 else {
968 if (use64bit) {
969 if (minus)
970 cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
971 else
972 cont = handler.Uint64(i64);
973 }
974 else {
975 if (minus)
976 cont = handler.Int(static_cast<int32_t>(~i + 1));
977 else
978 cont = handler.Uint(i);
979 }
980 }
981 if (!cont)
982 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
983 }
984
985 // Parse any JSON value
986 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseValue(InputStream & is,Handler & handler)987 void ParseValue(InputStream& is, Handler& handler) {
988 switch (is.Peek()) {
989 case 'n': ParseNull <parseFlags>(is, handler); break;
990 case 't': ParseTrue <parseFlags>(is, handler); break;
991 case 'f': ParseFalse <parseFlags>(is, handler); break;
992 case '"': ParseString<parseFlags>(is, handler); break;
993 case '{': ParseObject<parseFlags>(is, handler); break;
994 case '[': ParseArray <parseFlags>(is, handler); break;
995 default : ParseNumber<parseFlags>(is, handler);
996 }
997 }
998
999 // Iterative Parsing
1000
1001 // States
1002 enum IterativeParsingState {
1003 IterativeParsingStartState = 0,
1004 IterativeParsingFinishState,
1005 IterativeParsingErrorState,
1006
1007 // Object states
1008 IterativeParsingObjectInitialState,
1009 IterativeParsingMemberKeyState,
1010 IterativeParsingKeyValueDelimiterState,
1011 IterativeParsingMemberValueState,
1012 IterativeParsingMemberDelimiterState,
1013 IterativeParsingObjectFinishState,
1014
1015 // Array states
1016 IterativeParsingArrayInitialState,
1017 IterativeParsingElementState,
1018 IterativeParsingElementDelimiterState,
1019 IterativeParsingArrayFinishState,
1020
1021 // Single value state
1022 IterativeParsingValueState,
1023
1024 cIterativeParsingStateCount
1025 };
1026
1027 // Tokens
1028 enum Token {
1029 LeftBracketToken = 0,
1030 RightBracketToken,
1031
1032 LeftCurlyBracketToken,
1033 RightCurlyBracketToken,
1034
1035 CommaToken,
1036 ColonToken,
1037
1038 StringToken,
1039 FalseToken,
1040 TrueToken,
1041 NullToken,
1042 NumberToken,
1043
1044 kTokenCount
1045 };
1046
Tokenize(Ch c)1047 RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) {
1048
1049 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
1050 #define N NumberToken
1051 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1052 // Maps from ASCII to Token
1053 static const unsigned char tokenMap[256] = {
1054 N16, // 00~0F
1055 N16, // 10~1F
1056 N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1057 N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1058 N16, // 40~4F
1059 N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1060 N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1061 N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1062 N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1063 };
1064 #undef N
1065 #undef N16
1066 //!@endcond
1067
1068 if (sizeof(Ch) == 1 || unsigned(c) < 256)
1069 return (Token)tokenMap[(unsigned char)c];
1070 else
1071 return NumberToken;
1072 }
1073
Predict(IterativeParsingState state,Token token)1074 RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
1075 // current state x one lookahead token -> new state
1076 static const char G[cIterativeParsingStateCount][kTokenCount] = {
1077 // Start
1078 {
1079 IterativeParsingArrayInitialState, // Left bracket
1080 IterativeParsingErrorState, // Right bracket
1081 IterativeParsingObjectInitialState, // Left curly bracket
1082 IterativeParsingErrorState, // Right curly bracket
1083 IterativeParsingErrorState, // Comma
1084 IterativeParsingErrorState, // Colon
1085 IterativeParsingValueState, // String
1086 IterativeParsingValueState, // False
1087 IterativeParsingValueState, // True
1088 IterativeParsingValueState, // Null
1089 IterativeParsingValueState // Number
1090 },
1091 // Finish(sink state)
1092 {
1093 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1094 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1095 IterativeParsingErrorState
1096 },
1097 // Error(sink state)
1098 {
1099 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1100 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1101 IterativeParsingErrorState
1102 },
1103 // ObjectInitial
1104 {
1105 IterativeParsingErrorState, // Left bracket
1106 IterativeParsingErrorState, // Right bracket
1107 IterativeParsingErrorState, // Left curly bracket
1108 IterativeParsingObjectFinishState, // Right curly bracket
1109 IterativeParsingErrorState, // Comma
1110 IterativeParsingErrorState, // Colon
1111 IterativeParsingMemberKeyState, // String
1112 IterativeParsingErrorState, // False
1113 IterativeParsingErrorState, // True
1114 IterativeParsingErrorState, // Null
1115 IterativeParsingErrorState // Number
1116 },
1117 // MemberKey
1118 {
1119 IterativeParsingErrorState, // Left bracket
1120 IterativeParsingErrorState, // Right bracket
1121 IterativeParsingErrorState, // Left curly bracket
1122 IterativeParsingErrorState, // Right curly bracket
1123 IterativeParsingErrorState, // Comma
1124 IterativeParsingKeyValueDelimiterState, // Colon
1125 IterativeParsingErrorState, // String
1126 IterativeParsingErrorState, // False
1127 IterativeParsingErrorState, // True
1128 IterativeParsingErrorState, // Null
1129 IterativeParsingErrorState // Number
1130 },
1131 // KeyValueDelimiter
1132 {
1133 IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1134 IterativeParsingErrorState, // Right bracket
1135 IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1136 IterativeParsingErrorState, // Right curly bracket
1137 IterativeParsingErrorState, // Comma
1138 IterativeParsingErrorState, // Colon
1139 IterativeParsingMemberValueState, // String
1140 IterativeParsingMemberValueState, // False
1141 IterativeParsingMemberValueState, // True
1142 IterativeParsingMemberValueState, // Null
1143 IterativeParsingMemberValueState // Number
1144 },
1145 // MemberValue
1146 {
1147 IterativeParsingErrorState, // Left bracket
1148 IterativeParsingErrorState, // Right bracket
1149 IterativeParsingErrorState, // Left curly bracket
1150 IterativeParsingObjectFinishState, // Right curly bracket
1151 IterativeParsingMemberDelimiterState, // Comma
1152 IterativeParsingErrorState, // Colon
1153 IterativeParsingErrorState, // String
1154 IterativeParsingErrorState, // False
1155 IterativeParsingErrorState, // True
1156 IterativeParsingErrorState, // Null
1157 IterativeParsingErrorState // Number
1158 },
1159 // MemberDelimiter
1160 {
1161 IterativeParsingErrorState, // Left bracket
1162 IterativeParsingErrorState, // Right bracket
1163 IterativeParsingErrorState, // Left curly bracket
1164 IterativeParsingErrorState, // Right curly bracket
1165 IterativeParsingErrorState, // Comma
1166 IterativeParsingErrorState, // Colon
1167 IterativeParsingMemberKeyState, // String
1168 IterativeParsingErrorState, // False
1169 IterativeParsingErrorState, // True
1170 IterativeParsingErrorState, // Null
1171 IterativeParsingErrorState // Number
1172 },
1173 // ObjectFinish(sink state)
1174 {
1175 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1176 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1177 IterativeParsingErrorState
1178 },
1179 // ArrayInitial
1180 {
1181 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1182 IterativeParsingArrayFinishState, // Right bracket
1183 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1184 IterativeParsingErrorState, // Right curly bracket
1185 IterativeParsingErrorState, // Comma
1186 IterativeParsingErrorState, // Colon
1187 IterativeParsingElementState, // String
1188 IterativeParsingElementState, // False
1189 IterativeParsingElementState, // True
1190 IterativeParsingElementState, // Null
1191 IterativeParsingElementState // Number
1192 },
1193 // Element
1194 {
1195 IterativeParsingErrorState, // Left bracket
1196 IterativeParsingArrayFinishState, // Right bracket
1197 IterativeParsingErrorState, // Left curly bracket
1198 IterativeParsingErrorState, // Right curly bracket
1199 IterativeParsingElementDelimiterState, // Comma
1200 IterativeParsingErrorState, // Colon
1201 IterativeParsingErrorState, // String
1202 IterativeParsingErrorState, // False
1203 IterativeParsingErrorState, // True
1204 IterativeParsingErrorState, // Null
1205 IterativeParsingErrorState // Number
1206 },
1207 // ElementDelimiter
1208 {
1209 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1210 IterativeParsingErrorState, // Right bracket
1211 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1212 IterativeParsingErrorState, // Right curly bracket
1213 IterativeParsingErrorState, // Comma
1214 IterativeParsingErrorState, // Colon
1215 IterativeParsingElementState, // String
1216 IterativeParsingElementState, // False
1217 IterativeParsingElementState, // True
1218 IterativeParsingElementState, // Null
1219 IterativeParsingElementState // Number
1220 },
1221 // ArrayFinish(sink state)
1222 {
1223 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1224 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1225 IterativeParsingErrorState
1226 },
1227 // Single Value (sink state)
1228 {
1229 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1230 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1231 IterativeParsingErrorState
1232 }
1233 }; // End of G
1234
1235 return (IterativeParsingState)G[state][token];
1236 }
1237
1238 // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
1239 // May return a new state on state pop.
1240 template <unsigned parseFlags, typename InputStream, typename Handler>
Transit(IterativeParsingState src,Token token,IterativeParsingState dst,InputStream & is,Handler & handler)1241 RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
1242 (void)token;
1243
1244 switch (dst) {
1245 case IterativeParsingErrorState:
1246 return dst;
1247
1248 case IterativeParsingObjectInitialState:
1249 case IterativeParsingArrayInitialState:
1250 {
1251 // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
1252 // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
1253 IterativeParsingState n = src;
1254 if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
1255 n = IterativeParsingElementState;
1256 else if (src == IterativeParsingKeyValueDelimiterState)
1257 n = IterativeParsingMemberValueState;
1258 // Push current state.
1259 *stack_.template Push<SizeType>(1) = n;
1260 // Initialize and push the member/element count.
1261 *stack_.template Push<SizeType>(1) = 0;
1262 // Call handler
1263 bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
1264 // On handler short circuits the parsing.
1265 if (!hr) {
1266 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1267 return IterativeParsingErrorState;
1268 }
1269 else {
1270 is.Take();
1271 return dst;
1272 }
1273 }
1274
1275 case IterativeParsingMemberKeyState:
1276 ParseString<parseFlags>(is, handler, true);
1277 if (HasParseError())
1278 return IterativeParsingErrorState;
1279 else
1280 return dst;
1281
1282 case IterativeParsingKeyValueDelimiterState:
1283 RAPIDJSON_ASSERT(token == ColonToken);
1284 is.Take();
1285 return dst;
1286
1287 case IterativeParsingMemberValueState:
1288 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1289 ParseValue<parseFlags>(is, handler);
1290 if (HasParseError()) {
1291 return IterativeParsingErrorState;
1292 }
1293 return dst;
1294
1295 case IterativeParsingElementState:
1296 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1297 ParseValue<parseFlags>(is, handler);
1298 if (HasParseError()) {
1299 return IterativeParsingErrorState;
1300 }
1301 return dst;
1302
1303 case IterativeParsingMemberDelimiterState:
1304 case IterativeParsingElementDelimiterState:
1305 is.Take();
1306 // Update member/element count.
1307 *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
1308 return dst;
1309
1310 case IterativeParsingObjectFinishState:
1311 {
1312 // Get member count.
1313 SizeType c = *stack_.template Pop<SizeType>(1);
1314 // If the object is not empty, count the last member.
1315 if (src == IterativeParsingMemberValueState)
1316 ++c;
1317 // Restore the state.
1318 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1319 // Transit to Finish state if this is the topmost scope.
1320 if (n == IterativeParsingStartState)
1321 n = IterativeParsingFinishState;
1322 // Call handler
1323 bool hr = handler.EndObject(c);
1324 // On handler short circuits the parsing.
1325 if (!hr) {
1326 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1327 return IterativeParsingErrorState;
1328 }
1329 else {
1330 is.Take();
1331 return n;
1332 }
1333 }
1334
1335 case IterativeParsingArrayFinishState:
1336 {
1337 // Get element count.
1338 SizeType c = *stack_.template Pop<SizeType>(1);
1339 // If the array is not empty, count the last element.
1340 if (src == IterativeParsingElementState)
1341 ++c;
1342 // Restore the state.
1343 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1344 // Transit to Finish state if this is the topmost scope.
1345 if (n == IterativeParsingStartState)
1346 n = IterativeParsingFinishState;
1347 // Call handler
1348 bool hr = handler.EndArray(c);
1349 // On handler short circuits the parsing.
1350 if (!hr) {
1351 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1352 return IterativeParsingErrorState;
1353 }
1354 else {
1355 is.Take();
1356 return n;
1357 }
1358 }
1359
1360 default:
1361 // This branch is for IterativeParsingValueState actually.
1362 // Use `default:` rather than
1363 // `case IterativeParsingValueState:` is for code coverage.
1364
1365 // The IterativeParsingStartState is not enumerated in this switch-case.
1366 // It is impossible for that case. And it can be caught by following assertion.
1367
1368 // The IterativeParsingFinishState is not enumerated in this switch-case either.
1369 // It is a "derivative" state which cannot triggered from Predict() directly.
1370 // Therefore it cannot happen here. And it can be caught by following assertion.
1371 RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
1372
1373 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1374 ParseValue<parseFlags>(is, handler);
1375 if (HasParseError()) {
1376 return IterativeParsingErrorState;
1377 }
1378 return IterativeParsingFinishState;
1379 }
1380 }
1381
1382 template <typename InputStream>
HandleError(IterativeParsingState src,InputStream & is)1383 void HandleError(IterativeParsingState src, InputStream& is) {
1384 if (HasParseError()) {
1385 // Error flag has been set.
1386 return;
1387 }
1388
1389 switch (src) {
1390 case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
1391 case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
1392 case IterativeParsingObjectInitialState:
1393 case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
1394 case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
1395 case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
1396 case IterativeParsingElementState: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
1397 default: RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
1398 }
1399 }
1400
1401 template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParse(InputStream & is,Handler & handler)1402 ParseResult IterativeParse(InputStream& is, Handler& handler) {
1403 parseResult_.Clear();
1404 ClearStackOnExit scope(*this);
1405 IterativeParsingState state = IterativeParsingStartState;
1406
1407 SkipWhitespace(is);
1408 while (is.Peek() != '\0') {
1409 Token t = Tokenize(is.Peek());
1410 IterativeParsingState n = Predict(state, t);
1411 IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
1412
1413 if (d == IterativeParsingErrorState) {
1414 HandleError(state, is);
1415 break;
1416 }
1417
1418 state = d;
1419
1420 // Do not further consume streams if a root JSON has been parsed.
1421 if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
1422 break;
1423
1424 SkipWhitespace(is);
1425 }
1426
1427 // Handle the end of file.
1428 if (state != IterativeParsingFinishState)
1429 HandleError(state, is);
1430
1431 return parseResult_;
1432 }
1433
1434 static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
1435 internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
1436 ParseResult parseResult_;
1437 }; // class GenericReader
1438
1439 //! Reader with UTF8 encoding and default allocator.
1440 typedef GenericReader<UTF8<>, UTF8<> > Reader;
1441
1442 RAPIDJSON_NAMESPACE_END
1443
1444 #ifdef __GNUC__
1445 RAPIDJSON_DIAG_POP
1446 #endif
1447
1448 #ifdef _MSC_VER
1449 RAPIDJSON_DIAG_POP
1450 #endif
1451
1452 #endif // RAPIDJSON_READER_H_
1453