1 //! When serializing or deserializing JSON goes wrong.
2 
3 use crate::io;
4 use crate::lib::str::FromStr;
5 use crate::lib::*;
6 use serde::{de, ser};
7 
8 /// This type represents all possible errors that can occur when serializing or
9 /// deserializing JSON data.
10 pub struct Error {
11     /// This `Box` allows us to keep the size of `Error` as small as possible. A
12     /// larger `Error` type was substantially slower due to all the functions
13     /// that pass around `Result<T, Error>`.
14     err: Box<ErrorImpl>,
15 }
16 
17 /// Alias for a `Result` with the error type `serde_json::Error`.
18 pub type Result<T> = result::Result<T, Error>;
19 
20 impl Error {
21     /// One-based line number at which the error was detected.
22     ///
23     /// Characters in the first line of the input (before the first newline
24     /// character) are in line 1.
line(&self) -> usize25     pub fn line(&self) -> usize {
26         self.err.line
27     }
28 
29     /// One-based column number at which the error was detected.
30     ///
31     /// The first character in the input and any characters immediately
32     /// following a newline character are in column 1.
33     ///
34     /// Note that errors may occur in column 0, for example if a read from an IO
35     /// stream fails immediately following a previously read newline character.
column(&self) -> usize36     pub fn column(&self) -> usize {
37         self.err.column
38     }
39 
40     /// Categorizes the cause of this error.
41     ///
42     /// - `Category::Io` - failure to read or write bytes on an IO stream
43     /// - `Category::Syntax` - input that is not syntactically valid JSON
44     /// - `Category::Data` - input data that is semantically incorrect
45     /// - `Category::Eof` - unexpected end of the input data
classify(&self) -> Category46     pub fn classify(&self) -> Category {
47         match self.err.code {
48             ErrorCode::Message(_) => Category::Data,
49             ErrorCode::Io(_) => Category::Io,
50             ErrorCode::EofWhileParsingList
51             | ErrorCode::EofWhileParsingObject
52             | ErrorCode::EofWhileParsingString
53             | ErrorCode::EofWhileParsingValue => Category::Eof,
54             ErrorCode::ExpectedColon
55             | ErrorCode::ExpectedListCommaOrEnd
56             | ErrorCode::ExpectedObjectCommaOrEnd
57             | ErrorCode::ExpectedSomeIdent
58             | ErrorCode::ExpectedSomeValue
59             | ErrorCode::InvalidEscape
60             | ErrorCode::InvalidNumber
61             | ErrorCode::NumberOutOfRange
62             | ErrorCode::InvalidUnicodeCodePoint
63             | ErrorCode::ControlCharacterWhileParsingString
64             | ErrorCode::KeyMustBeAString
65             | ErrorCode::LoneLeadingSurrogateInHexEscape
66             | ErrorCode::TrailingComma
67             | ErrorCode::TrailingCharacters
68             | ErrorCode::UnexpectedEndOfHexEscape
69             | ErrorCode::RecursionLimitExceeded => Category::Syntax,
70         }
71     }
72 
73     /// Returns true if this error was caused by a failure to read or write
74     /// bytes on an IO stream.
is_io(&self) -> bool75     pub fn is_io(&self) -> bool {
76         self.classify() == Category::Io
77     }
78 
79     /// Returns true if this error was caused by input that was not
80     /// syntactically valid JSON.
is_syntax(&self) -> bool81     pub fn is_syntax(&self) -> bool {
82         self.classify() == Category::Syntax
83     }
84 
85     /// Returns true if this error was caused by input data that was
86     /// semantically incorrect.
87     ///
88     /// For example, JSON containing a number is semantically incorrect when the
89     /// type being deserialized into holds a String.
is_data(&self) -> bool90     pub fn is_data(&self) -> bool {
91         self.classify() == Category::Data
92     }
93 
94     /// Returns true if this error was caused by prematurely reaching the end of
95     /// the input data.
96     ///
97     /// Callers that process streaming input may be interested in retrying the
98     /// deserialization once more data is available.
is_eof(&self) -> bool99     pub fn is_eof(&self) -> bool {
100         self.classify() == Category::Eof
101     }
102 }
103 
104 /// Categorizes the cause of a `serde_json::Error`.
105 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
106 pub enum Category {
107     /// The error was caused by a failure to read or write bytes on an IO
108     /// stream.
109     Io,
110 
111     /// The error was caused by input that was not syntactically valid JSON.
112     Syntax,
113 
114     /// The error was caused by input data that was semantically incorrect.
115     ///
116     /// For example, JSON containing a number is semantically incorrect when the
117     /// type being deserialized into holds a String.
118     Data,
119 
120     /// The error was caused by prematurely reaching the end of the input data.
121     ///
122     /// Callers that process streaming input may be interested in retrying the
123     /// deserialization once more data is available.
124     Eof,
125 }
126 
127 #[cfg(feature = "std")]
128 #[allow(clippy::fallible_impl_from)]
129 impl From<Error> for io::Error {
130     /// Convert a `serde_json::Error` into an `io::Error`.
131     ///
132     /// JSON syntax and data errors are turned into `InvalidData` IO errors.
133     /// EOF errors are turned into `UnexpectedEof` IO errors.
134     ///
135     /// ```
136     /// use std::io;
137     ///
138     /// enum MyError {
139     ///     Io(io::Error),
140     ///     Json(serde_json::Error),
141     /// }
142     ///
143     /// impl From<serde_json::Error> for MyError {
144     ///     fn from(err: serde_json::Error) -> MyError {
145     ///         use serde_json::error::Category;
146     ///         match err.classify() {
147     ///             Category::Io => {
148     ///                 MyError::Io(err.into())
149     ///             }
150     ///             Category::Syntax | Category::Data | Category::Eof => {
151     ///                 MyError::Json(err)
152     ///             }
153     ///         }
154     ///     }
155     /// }
156     /// ```
from(j: Error) -> Self157     fn from(j: Error) -> Self {
158         if let ErrorCode::Io(err) = j.err.code {
159             err
160         } else {
161             match j.classify() {
162                 Category::Io => unreachable!(),
163                 Category::Syntax | Category::Data => io::Error::new(io::ErrorKind::InvalidData, j),
164                 Category::Eof => io::Error::new(io::ErrorKind::UnexpectedEof, j),
165             }
166         }
167     }
168 }
169 
170 struct ErrorImpl {
171     code: ErrorCode,
172     line: usize,
173     column: usize,
174 }
175 
176 pub(crate) enum ErrorCode {
177     /// Catchall for syntax error messages
178     Message(Box<str>),
179 
180     /// Some IO error occurred while serializing or deserializing.
181     Io(io::Error),
182 
183     /// EOF while parsing a list.
184     EofWhileParsingList,
185 
186     /// EOF while parsing an object.
187     EofWhileParsingObject,
188 
189     /// EOF while parsing a string.
190     EofWhileParsingString,
191 
192     /// EOF while parsing a JSON value.
193     EofWhileParsingValue,
194 
195     /// Expected this character to be a `':'`.
196     ExpectedColon,
197 
198     /// Expected this character to be either a `','` or a `']'`.
199     ExpectedListCommaOrEnd,
200 
201     /// Expected this character to be either a `','` or a `'}'`.
202     ExpectedObjectCommaOrEnd,
203 
204     /// Expected to parse either a `true`, `false`, or a `null`.
205     ExpectedSomeIdent,
206 
207     /// Expected this character to start a JSON value.
208     ExpectedSomeValue,
209 
210     /// Invalid hex escape code.
211     InvalidEscape,
212 
213     /// Invalid number.
214     InvalidNumber,
215 
216     /// Number is bigger than the maximum value of its type.
217     NumberOutOfRange,
218 
219     /// Invalid unicode code point.
220     InvalidUnicodeCodePoint,
221 
222     /// Control character found while parsing a string.
223     ControlCharacterWhileParsingString,
224 
225     /// Object key is not a string.
226     KeyMustBeAString,
227 
228     /// Lone leading surrogate in hex escape.
229     LoneLeadingSurrogateInHexEscape,
230 
231     /// JSON has a comma after the last value in an array or map.
232     TrailingComma,
233 
234     /// JSON has non-whitespace trailing characters after the value.
235     TrailingCharacters,
236 
237     /// Unexpected end of hex excape.
238     UnexpectedEndOfHexEscape,
239 
240     /// Encountered nesting of JSON maps and arrays more than 128 layers deep.
241     RecursionLimitExceeded,
242 }
243 
244 impl Error {
245     #[cold]
syntax(code: ErrorCode, line: usize, column: usize) -> Self246     pub(crate) fn syntax(code: ErrorCode, line: usize, column: usize) -> Self {
247         Error {
248             err: Box::new(ErrorImpl { code, line, column }),
249         }
250     }
251 
252     // Not public API. Should be pub(crate).
253     //
254     // Update `eager_json` crate when this function changes.
255     #[doc(hidden)]
256     #[cold]
io(error: io::Error) -> Self257     pub fn io(error: io::Error) -> Self {
258         Error {
259             err: Box::new(ErrorImpl {
260                 code: ErrorCode::Io(error),
261                 line: 0,
262                 column: 0,
263             }),
264         }
265     }
266 
267     #[cold]
fix_position<F>(self, f: F) -> Self where F: FnOnce(ErrorCode) -> Error,268     pub(crate) fn fix_position<F>(self, f: F) -> Self
269     where
270         F: FnOnce(ErrorCode) -> Error,
271     {
272         if self.err.line == 0 {
273             f(self.err.code)
274         } else {
275             self
276         }
277     }
278 }
279 
280 impl Display for ErrorCode {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result281     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
282         match *self {
283             ErrorCode::Message(ref msg) => f.write_str(msg),
284             ErrorCode::Io(ref err) => Display::fmt(err, f),
285             ErrorCode::EofWhileParsingList => f.write_str("EOF while parsing a list"),
286             ErrorCode::EofWhileParsingObject => f.write_str("EOF while parsing an object"),
287             ErrorCode::EofWhileParsingString => f.write_str("EOF while parsing a string"),
288             ErrorCode::EofWhileParsingValue => f.write_str("EOF while parsing a value"),
289             ErrorCode::ExpectedColon => f.write_str("expected `:`"),
290             ErrorCode::ExpectedListCommaOrEnd => f.write_str("expected `,` or `]`"),
291             ErrorCode::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`"),
292             ErrorCode::ExpectedSomeIdent => f.write_str("expected ident"),
293             ErrorCode::ExpectedSomeValue => f.write_str("expected value"),
294             ErrorCode::InvalidEscape => f.write_str("invalid escape"),
295             ErrorCode::InvalidNumber => f.write_str("invalid number"),
296             ErrorCode::NumberOutOfRange => f.write_str("number out of range"),
297             ErrorCode::InvalidUnicodeCodePoint => f.write_str("invalid unicode code point"),
298             ErrorCode::ControlCharacterWhileParsingString => {
299                 f.write_str("control character (\\u0000-\\u001F) found while parsing a string")
300             }
301             ErrorCode::KeyMustBeAString => f.write_str("key must be a string"),
302             ErrorCode::LoneLeadingSurrogateInHexEscape => {
303                 f.write_str("lone leading surrogate in hex escape")
304             }
305             ErrorCode::TrailingComma => f.write_str("trailing comma"),
306             ErrorCode::TrailingCharacters => f.write_str("trailing characters"),
307             ErrorCode::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape"),
308             ErrorCode::RecursionLimitExceeded => f.write_str("recursion limit exceeded"),
309         }
310     }
311 }
312 
313 impl serde::de::StdError for Error {
314     #[cfg(feature = "std")]
source(&self) -> Option<&(dyn error::Error + 'static)>315     fn source(&self) -> Option<&(dyn error::Error + 'static)> {
316         match self.err.code {
317             ErrorCode::Io(ref err) => Some(err),
318             _ => None,
319         }
320     }
321 }
322 
323 impl Display for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result324     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
325         Display::fmt(&*self.err, f)
326     }
327 }
328 
329 impl Display for ErrorImpl {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result330     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
331         if self.line == 0 {
332             Display::fmt(&self.code, f)
333         } else {
334             write!(
335                 f,
336                 "{} at line {} column {}",
337                 self.code, self.line, self.column
338             )
339         }
340     }
341 }
342 
343 // Remove two layers of verbosity from the debug representation. Humans often
344 // end up seeing this representation because it is what unwrap() shows.
345 impl Debug for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result346     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
347         write!(
348             f,
349             "Error({:?}, line: {}, column: {})",
350             self.err.code.to_string(),
351             self.err.line,
352             self.err.column
353         )
354     }
355 }
356 
357 impl de::Error for Error {
358     #[cold]
custom<T: Display>(msg: T) -> Error359     fn custom<T: Display>(msg: T) -> Error {
360         make_error(msg.to_string())
361     }
362 
363     #[cold]
invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self364     fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self {
365         if let de::Unexpected::Unit = unexp {
366             Error::custom(format_args!("invalid type: null, expected {}", exp))
367         } else {
368             Error::custom(format_args!("invalid type: {}, expected {}", unexp, exp))
369         }
370     }
371 }
372 
373 impl ser::Error for Error {
374     #[cold]
custom<T: Display>(msg: T) -> Error375     fn custom<T: Display>(msg: T) -> Error {
376         make_error(msg.to_string())
377     }
378 }
379 
380 // Parse our own error message that looks like "{} at line {} column {}" to work
381 // around erased-serde round-tripping the error through de::Error::custom.
make_error(mut msg: String) -> Error382 fn make_error(mut msg: String) -> Error {
383     let (line, column) = parse_line_col(&mut msg).unwrap_or((0, 0));
384     Error {
385         err: Box::new(ErrorImpl {
386             code: ErrorCode::Message(msg.into_boxed_str()),
387             line,
388             column,
389         }),
390     }
391 }
392 
parse_line_col(msg: &mut String) -> Option<(usize, usize)>393 fn parse_line_col(msg: &mut String) -> Option<(usize, usize)> {
394     let start_of_suffix = match msg.rfind(" at line ") {
395         Some(index) => index,
396         None => return None,
397     };
398 
399     // Find start and end of line number.
400     let start_of_line = start_of_suffix + " at line ".len();
401     let mut end_of_line = start_of_line;
402     while starts_with_digit(&msg[end_of_line..]) {
403         end_of_line += 1;
404     }
405 
406     if !msg[end_of_line..].starts_with(" column ") {
407         return None;
408     }
409 
410     // Find start and end of column number.
411     let start_of_column = end_of_line + " column ".len();
412     let mut end_of_column = start_of_column;
413     while starts_with_digit(&msg[end_of_column..]) {
414         end_of_column += 1;
415     }
416 
417     if end_of_column < msg.len() {
418         return None;
419     }
420 
421     // Parse numbers.
422     let line = match usize::from_str(&msg[start_of_line..end_of_line]) {
423         Ok(line) => line,
424         Err(_) => return None,
425     };
426     let column = match usize::from_str(&msg[start_of_column..end_of_column]) {
427         Ok(column) => column,
428         Err(_) => return None,
429     };
430 
431     msg.truncate(start_of_suffix);
432     Some((line, column))
433 }
434 
starts_with_digit(slice: &str) -> bool435 fn starts_with_digit(slice: &str) -> bool {
436     match slice.as_bytes().get(0) {
437         None => false,
438         Some(&byte) => byte >= b'0' && byte <= b'9',
439     }
440 }
441