1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef BASE_JSON_JSON_PARSER_H_ 6 #define BASE_JSON_JSON_PARSER_H_ 7 8 #include <stddef.h> 9 #include <stdint.h> 10 11 #include <memory> 12 #include <string> 13 14 #include "base/base_export.h" 15 #include "base/compiler_specific.h" 16 #include "base/gtest_prod_util.h" 17 #include "base/json/json_reader.h" 18 #include "base/macros.h" 19 #include "base/strings/string_piece.h" 20 21 namespace base { 22 23 class Value; 24 25 namespace internal { 26 27 class JSONParserTest; 28 29 // The implementation behind the JSONReader interface. This class is not meant 30 // to be used directly; it encapsulates logic that need not be exposed publicly. 31 // 32 // This parser guarantees O(n) time through the input string. It also optimizes 33 // base::StringValue by using StringPiece where possible when returning Value 34 // objects by using "hidden roots," discussed in the implementation. 35 // 36 // Iteration happens on the byte level, with the functions CanConsume and 37 // NextChar. The conversion from byte to JSON token happens without advancing 38 // the parser in GetNextToken/ParseToken, that is tokenization operates on 39 // the current parser position without advancing. 40 // 41 // Built on top of these are a family of Consume functions that iterate 42 // internally. Invariant: on entry of a Consume function, the parser is wound 43 // to the first byte of a valid JSON token. On exit, it is on the last byte 44 // of a token, such that the next iteration of the parser will be at the byte 45 // immediately following the token, which would likely be the first byte of the 46 // next token. 47 class BASE_EXPORT JSONParser { 48 public: 49 explicit JSONParser(int options); 50 ~JSONParser(); 51 52 // Parses the input string according to the set options and returns the 53 // result as a Value. 54 // Wrap this in base::FooValue::From() to check the Value is of type Foo and 55 // convert to a FooValue at the same time. 56 std::unique_ptr<Value> Parse(StringPiece input); 57 58 // Returns the error code. 59 JSONReader::JsonParseError error_code() const; 60 61 // Returns the human-friendly error message. 62 std::string GetErrorMessage() const; 63 64 // Returns the error line number if parse error happened. Otherwise always 65 // returns 0. 66 int error_line() const; 67 68 // Returns the error column number if parse error happened. Otherwise always 69 // returns 0. 70 int error_column() const; 71 72 private: 73 enum Token { 74 T_OBJECT_BEGIN, // { 75 T_OBJECT_END, // } 76 T_ARRAY_BEGIN, // [ 77 T_ARRAY_END, // ] 78 T_STRING, 79 T_NUMBER, 80 T_BOOL_TRUE, // true 81 T_BOOL_FALSE, // false 82 T_NULL, // null 83 T_LIST_SEPARATOR, // , 84 T_OBJECT_PAIR_SEPARATOR, // : 85 T_END_OF_INPUT, 86 T_INVALID_TOKEN, 87 }; 88 89 // A helper class used for parsing strings. One optimization performed is to 90 // create base::Value with a StringPiece to avoid unnecessary std::string 91 // copies. This is not possible if the input string needs to be decoded from 92 // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped. 93 // This class centralizes that logic. 94 class StringBuilder { 95 public: 96 // Empty constructor. Used for creating a builder with which to Swap(). 97 StringBuilder(); 98 99 // |pos| is the beginning of an input string, excluding the |"|. 100 explicit StringBuilder(const char* pos); 101 102 ~StringBuilder(); 103 104 // Swaps the contents of |other| with this. 105 void Swap(StringBuilder* other); 106 107 // Either increases the |length_| of the string or copies the character if 108 // the StringBuilder has been converted. |c| must be in the basic ASCII 109 // plane; all other characters need to be in UTF-8 units, appended with 110 // AppendString below. 111 void Append(const char& c); 112 113 // Appends a string to the std::string. Must be Convert()ed to use. 114 void AppendString(const std::string& str); 115 116 // Converts the builder from its default StringPiece to a full std::string, 117 // performing a copy. Once a builder is converted, it cannot be made a 118 // StringPiece again. 119 void Convert(); 120 121 // Returns whether the builder can be converted to a StringPiece. 122 bool CanBeStringPiece() const; 123 124 // Returns the StringPiece representation. Returns an empty piece if it 125 // cannot be converted. 126 StringPiece AsStringPiece(); 127 128 // Returns the builder as a std::string. 129 const std::string& AsString(); 130 131 private: 132 // The beginning of the input string. 133 const char* pos_; 134 135 // Number of bytes in |pos_| that make up the string being built. 136 size_t length_; 137 138 // The copied string representation. NULL until Convert() is called. 139 // Strong. std::unique_ptr<T> has too much of an overhead here. 140 std::string* string_; 141 }; 142 143 // Quick check that the stream has capacity to consume |length| more bytes. 144 bool CanConsume(int length); 145 146 // The basic way to consume a single character in the stream. Consumes one 147 // byte of the input stream and returns a pointer to the rest of it. 148 const char* NextChar(); 149 150 // Performs the equivalent of NextChar N times. 151 void NextNChars(int n); 152 153 // Skips over whitespace and comments to find the next token in the stream. 154 // This does not advance the parser for non-whitespace or comment chars. 155 Token GetNextToken(); 156 157 // Consumes whitespace characters and comments until the next non-that is 158 // encountered. 159 void EatWhitespaceAndComments(); 160 // Helper function that consumes a comment, assuming that the parser is 161 // currently wound to a '/'. 162 bool EatComment(); 163 164 // Calls GetNextToken() and then ParseToken(). Caller owns the result. 165 Value* ParseNextToken(); 166 167 // Takes a token that represents the start of a Value ("a structural token" 168 // in RFC terms) and consumes it, returning the result as an object the 169 // caller owns. 170 Value* ParseToken(Token token); 171 172 // Assuming that the parser is currently wound to '{', this parses a JSON 173 // object into a DictionaryValue. 174 Value* ConsumeDictionary(); 175 176 // Assuming that the parser is wound to '[', this parses a JSON list into a 177 // ListValue. 178 Value* ConsumeList(); 179 180 // Calls through ConsumeStringRaw and wraps it in a value. 181 Value* ConsumeString(); 182 183 // Assuming that the parser is wound to a double quote, this parses a string, 184 // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on 185 // success and Swap()s the result into |out|. Returns false on failure with 186 // error information set. 187 bool ConsumeStringRaw(StringBuilder* out); 188 // Helper function for ConsumeStringRaw() that consumes the next four or 10 189 // bytes (parser is wound to the first character of a HEX sequence, with the 190 // potential for consuming another \uXXXX for a surrogate). Returns true on 191 // success and places the UTF8 code units in |dest_string|, and false on 192 // failure. 193 bool DecodeUTF16(std::string* dest_string); 194 // Helper function for ConsumeStringRaw() that takes a single code point, 195 // decodes it into UTF-8 units, and appends it to the given builder. The 196 // point must be valid. 197 void DecodeUTF8(const int32_t& point, StringBuilder* dest); 198 199 // Assuming that the parser is wound to the start of a valid JSON number, 200 // this parses and converts it to either an int or double value. 201 Value* ConsumeNumber(); 202 // Helper that reads characters that are ints. Returns true if a number was 203 // read and false on error. 204 bool ReadInt(bool allow_leading_zeros); 205 206 // Consumes the literal values of |true|, |false|, and |null|, assuming the 207 // parser is wound to the first character of any of those. 208 Value* ConsumeLiteral(); 209 210 // Compares two string buffers of a given length. 211 static bool StringsAreEqual(const char* left, const char* right, size_t len); 212 213 // Sets the error information to |code| at the current column, based on 214 // |index_| and |index_last_line_|, with an optional positive/negative 215 // adjustment by |column_adjust|. 216 void ReportError(JSONReader::JsonParseError code, int column_adjust); 217 218 // Given the line and column number of an error, formats one of the error 219 // message contants from json_reader.h for human display. 220 static std::string FormatErrorMessage(int line, int column, 221 const std::string& description); 222 223 // base::JSONParserOptions that control parsing. 224 const int options_; 225 226 // Pointer to the start of the input data. 227 const char* start_pos_; 228 229 // Pointer to the current position in the input data. Equivalent to 230 // |start_pos_ + index_|. 231 const char* pos_; 232 233 // Pointer to the last character of the input data. 234 const char* end_pos_; 235 236 // The index in the input stream to which the parser is wound. 237 int index_; 238 239 // The number of times the parser has recursed (current stack depth). 240 int stack_depth_; 241 242 // The line number that the parser is at currently. 243 int line_number_; 244 245 // The last value of |index_| on the previous line. 246 int index_last_line_; 247 248 // Error information. 249 JSONReader::JsonParseError error_code_; 250 int error_line_; 251 int error_column_; 252 253 friend class JSONParserTest; 254 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar); 255 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary); 256 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList); 257 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString); 258 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals); 259 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers); 260 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages); 261 262 DISALLOW_COPY_AND_ASSIGN(JSONParser); 263 }; 264 265 } // namespace internal 266 } // namespace base 267 268 #endif // BASE_JSON_JSON_PARSER_H_ 269