1 // Copyright 2015-2016 Brian Smith.
2 //
3 // Permission to use, copy, modify, and/or distribute this software for any
4 // purpose with or without fee is hereby granted, provided that the above
5 // copyright notice and this permission notice appear in all copies.
6 //
7 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
8 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
10 // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 
15 //! untrusted.rs: Safe, fast, zero-panic, zero-crashing, zero-allocation
16 //! parsing of untrusted inputs in Rust.
17 //!
18 //! <code>git clone https://github.com/briansmith/untrusted</code>
19 //!
20 //! untrusted.rs goes beyond Rust's normal safety guarantees by  also
21 //! guaranteeing that parsing will be panic-free, as long as
22 //! `untrusted::Input::as_slice_less_safe()` is not used. It avoids copying
23 //! data and heap allocation and strives to prevent common pitfalls such as
24 //! accidentally parsing input bytes multiple times. In order to meet these
25 //! goals, untrusted.rs is limited in functionality such that it works best for
26 //! input languages with a small fixed amount of lookahead such as ASN.1, TLS,
27 //! TCP/IP, and many other networking, IPC, and related protocols. Languages
28 //! that require more lookahead and/or backtracking require some significant
29 //! contortions to parse using this framework. It would not be realistic to use
30 //! it for parsing programming language code, for example.
31 //!
32 //! The overall pattern for using untrusted.rs is:
33 //!
34 //! 1. Write a recursive-descent-style parser for the input language, where the
35 //!    input data is given as a `&mut untrusted::Reader` parameter to each
36 //!    function. Each function should have a return type of `Result<V, E>` for
37 //!    some value type `V` and some error type `E`, either or both of which may
38 //!    be `()`. Functions for parsing the lowest-level language constructs
39 //!    should be defined. Those lowest-level functions will parse their inputs
40 //!    using `::read_byte()`, `Reader::peek()`, and similar functions.
41 //!    Higher-level language constructs are then parsed by calling the
42 //!    lower-level functions in sequence.
43 //!
44 //! 2. Wrap the top-most functions of your recursive-descent parser in
45 //!    functions that take their input data as an `untrusted::Input`. The
46 //!    wrapper functions should call the `Input`'s `read_all` (or a variant
47 //!    thereof) method. The wrapper functions are the only ones that should be
48 //!    exposed outside the parser's module.
49 //!
50 //! 3. After receiving the input data to parse, wrap it in an `untrusted::Input`
51 //!    using `untrusted::Input::from()` as early as possible. Pass the
52 //!    `untrusted::Input` to the wrapper functions when they need to be parsed.
53 //!
54 //! In general parsers built using `untrusted::Reader` do not need to explicitly
55 //! check for end-of-input unless they are parsing optional constructs, because
56 //! `Reader::read_byte()` will return `Err(EndOfInput)` on end-of-input.
57 //! Similarly, parsers using `untrusted::Reader` generally don't need to check
58 //! for extra junk at the end of the input as long as the parser's API uses the
59 //! pattern described above, as `read_all` and its variants automatically check
60 //! for trailing junk. `Reader::skip_to_end()` must be used when any remaining
61 //! unread input should be ignored without triggering an error.
62 //!
63 //! untrusted.rs works best when all processing of the input data is done
64 //! through the `untrusted::Input` and `untrusted::Reader` types. In
65 //! particular, avoid trying to parse input data using functions that take
66 //! byte slices. However, when you need to access a part of the input data as
67 //! a slice to use a function that isn't written using untrusted.rs,
68 //! `Input::as_slice_less_safe()` can be used.
69 //!
70 //! It is recommend to use `use untrusted;` and then `untrusted::Input`,
71 //! `untrusted::Reader`, etc., instead of using `use untrusted::*`. Qualifying
72 //! the names with `untrusted` helps remind the reader of the code that it is
73 //! dealing with *untrusted* input.
74 //!
75 //! # Examples
76 //!
77 //! [*ring*](https://github.com/briansmith/ring)'s parser for the subset of
78 //! ASN.1 DER it needs to understand,
79 //! [`ring::der`](https://github.com/briansmith/ring/blob/master/src/der.rs),
80 //! is built on top of untrusted.rs. *ring* also uses untrusted.rs to parse ECC
81 //! public keys, RSA PKCS#1 1.5 padding, and for all other parsing it does.
82 //!
83 //! All of [webpki](https://github.com/briansmith/webpki)'s parsing of X.509
84 //! certificates (also ASN.1 DER) is done using untrusted.rs.
85 
86 #![doc(html_root_url = "https://briansmith.org/rustdoc/")]
87 // `#[derive(...)]` uses `#[allow(unused_qualifications)]` internally.
88 #![deny(unused_qualifications)]
89 #![forbid(
90     anonymous_parameters,
91     box_pointers,
92     missing_docs,
93     trivial_casts,
94     trivial_numeric_casts,
95     unsafe_code,
96     unstable_features,
97     unused_extern_crates,
98     unused_import_braces,
99     unused_results,
100     variant_size_differences,
101     warnings
102 )]
103 // ANDROID: This lint is included in warnings and thus forbidden, but this version of the library
104 // is not compatible with it.
105 #![allow(non_autolinks)]
106 #![no_std]
107 
108 // ANDROID: Unconditionally use std to allow building as a dylib.
109 extern crate std;
110 
111 /// A wrapper around `&'a [u8]` that helps in writing panic-free code.
112 ///
113 /// No methods of `Input` will ever panic.
114 #[derive(Clone, Copy, Debug, Eq)]
115 pub struct Input<'a> {
116     value: no_panic::Slice<'a>,
117 }
118 
119 impl<'a> Input<'a> {
120     /// Construct a new `Input` for the given input `bytes`.
from(bytes: &'a [u8]) -> Self121     pub const fn from(bytes: &'a [u8]) -> Self {
122         // This limit is important for avoiding integer overflow. In particular,
123         // `Reader` assumes that an `i + 1 > i` if `input.value.get(i)` does
124         // not return `None`. According to the Rust language reference, the
125         // maximum object size is `core::isize::MAX`, and in practice it is
126         // impossible to create an object of size `core::usize::MAX` or larger.
127         Self {
128             value: no_panic::Slice::new(bytes),
129         }
130     }
131 
132     /// Returns `true` if the input is empty and false otherwise.
133     #[inline]
is_empty(&self) -> bool134     pub fn is_empty(&self) -> bool { self.value.is_empty() }
135 
136     /// Returns the length of the `Input`.
137     #[inline]
len(&self) -> usize138     pub fn len(&self) -> usize { self.value.len() }
139 
140     /// Calls `read` with the given input as a `Reader`, ensuring that `read`
141     /// consumed the entire input. If `read` does not consume the entire input,
142     /// `incomplete_read` is returned.
read_all<F, R, E>(&self, incomplete_read: E, read: F) -> Result<R, E> where F: FnOnce(&mut Reader<'a>) -> Result<R, E>,143     pub fn read_all<F, R, E>(&self, incomplete_read: E, read: F) -> Result<R, E>
144     where
145         F: FnOnce(&mut Reader<'a>) -> Result<R, E>,
146     {
147         let mut input = Reader::new(*self);
148         let result = read(&mut input)?;
149         if input.at_end() {
150             Ok(result)
151         } else {
152             Err(incomplete_read)
153         }
154     }
155 
156     /// Access the input as a slice so it can be processed by functions that
157     /// are not written using the Input/Reader framework.
158     #[inline]
as_slice_less_safe(&self) -> &'a [u8]159     pub fn as_slice_less_safe(&self) -> &'a [u8] { self.value.as_slice_less_safe() }
160 }
161 
162 impl<'a> From<&'a [u8]> for Input<'a> {
163     #[inline]
from(value: &'a [u8]) -> Self164     fn from(value: &'a [u8]) -> Self { Self { value: no_panic::Slice::new(value)} }
165 }
166 
167 // #[derive(PartialEq)] would result in lifetime bounds that are
168 // unnecessarily restrictive; see
169 // https://github.com/rust-lang/rust/issues/26925.
170 impl PartialEq<Input<'_>> for Input<'_> {
171     #[inline]
eq(&self, other: &Input) -> bool172     fn eq(&self, other: &Input) -> bool {
173         self.as_slice_less_safe() == other.as_slice_less_safe()
174     }
175 }
176 
177 impl PartialEq<[u8]> for Input<'_> {
178     #[inline]
eq(&self, other: &[u8]) -> bool179     fn eq(&self, other: &[u8]) -> bool { self.as_slice_less_safe() == other }
180 }
181 
182 impl PartialEq<Input<'_>> for [u8] {
183     #[inline]
eq(&self, other: &Input) -> bool184     fn eq(&self, other: &Input) -> bool { other.as_slice_less_safe() == self }
185 }
186 
187 /// Calls `read` with the given input as a `Reader`, ensuring that `read`
188 /// consumed the entire input. When `input` is `None`, `read` will be
189 /// called with `None`.
read_all_optional<'a, F, R, E>( input: Option<Input<'a>>, incomplete_read: E, read: F, ) -> Result<R, E> where F: FnOnce(Option<&mut Reader<'a>>) -> Result<R, E>,190 pub fn read_all_optional<'a, F, R, E>(
191     input: Option<Input<'a>>, incomplete_read: E, read: F,
192 ) -> Result<R, E>
193 where
194     F: FnOnce(Option<&mut Reader<'a>>) -> Result<R, E>,
195 {
196     match input {
197         Some(input) => {
198             let mut input = Reader::new(input);
199             let result = read(Some(&mut input))?;
200             if input.at_end() {
201                 Ok(result)
202             } else {
203                 Err(incomplete_read)
204             }
205         },
206         None => read(None),
207     }
208 }
209 
210 /// A read-only, forward-only* cursor into the data in an `Input`.
211 ///
212 /// Using `Reader` to parse input helps to ensure that no byte of the input
213 /// will be accidentally processed more than once. Using `Reader` in
214 /// conjunction with `read_all` and `read_all_optional` helps ensure that no
215 /// byte of the input is accidentally left unprocessed. The methods of `Reader`
216 /// never panic, so `Reader` also assists the writing of panic-free code.
217 ///
218 /// \* `Reader` is not strictly forward-only because of the method
219 /// `get_input_between_marks`, which is provided mainly to support calculating
220 /// digests over parsed data.
221 #[derive(Debug)]
222 pub struct Reader<'a> {
223     input: no_panic::Slice<'a>,
224     i: usize,
225 }
226 
227 /// An index into the already-parsed input of a `Reader`.
228 pub struct Mark {
229     i: usize,
230 }
231 
232 impl<'a> Reader<'a> {
233     /// Construct a new Reader for the given input. Use `read_all` or
234     /// `read_all_optional` instead of `Reader::new` whenever possible.
235     #[inline]
new(input: Input<'a>) -> Self236     pub fn new(input: Input<'a>) -> Self {
237         Self {
238             input: input.value,
239             i: 0,
240         }
241     }
242 
243     /// Returns `true` if the reader is at the end of the input, and `false`
244     /// otherwise.
245     #[inline]
at_end(&self) -> bool246     pub fn at_end(&self) -> bool { self.i == self.input.len() }
247 
248     /// Returns an `Input` for already-parsed input that has had its boundaries
249     /// marked using `mark`.
250     #[inline]
get_input_between_marks( &self, mark1: Mark, mark2: Mark, ) -> Result<Input<'a>, EndOfInput>251     pub fn get_input_between_marks(
252         &self, mark1: Mark, mark2: Mark,
253     ) -> Result<Input<'a>, EndOfInput> {
254         self.input
255             .subslice(mark1.i..mark2.i)
256             .map(|subslice| Input { value: subslice })
257             .ok_or(EndOfInput)
258     }
259 
260     /// Return the current position of the `Reader` for future use in a call
261     /// to `get_input_between_marks`.
262     #[inline]
mark(&self) -> Mark263     pub fn mark(&self) -> Mark { Mark { i: self.i } }
264 
265     /// Returns `true` if there is at least one more byte in the input and that
266     /// byte is equal to `b`, and false otherwise.
267     #[inline]
peek(&self, b: u8) -> bool268     pub fn peek(&self, b: u8) -> bool {
269         match self.input.get(self.i) {
270             Some(actual_b) => b == *actual_b,
271             None => false,
272         }
273     }
274 
275     /// Reads the next input byte.
276     ///
277     /// Returns `Ok(b)` where `b` is the next input byte, or `Err(EndOfInput)`
278     /// if the `Reader` is at the end of the input.
279     #[inline]
read_byte(&mut self) -> Result<u8, EndOfInput>280     pub fn read_byte(&mut self) -> Result<u8, EndOfInput> {
281         match self.input.get(self.i) {
282             Some(b) => {
283                 self.i += 1; // safe from overflow; see Input::from().
284                 Ok(*b)
285             },
286             None => Err(EndOfInput),
287         }
288     }
289 
290     /// Skips `num_bytes` of the input, returning the skipped input as an
291     /// `Input`.
292     ///
293     /// Returns `Ok(i)` if there are at least `num_bytes` of input remaining,
294     /// and `Err(EndOfInput)` otherwise.
295     #[inline]
read_bytes(&mut self, num_bytes: usize) -> Result<Input<'a>, EndOfInput>296     pub fn read_bytes(&mut self, num_bytes: usize) -> Result<Input<'a>, EndOfInput> {
297         let new_i = self.i.checked_add(num_bytes).ok_or(EndOfInput)?;
298         let ret = self
299             .input
300             .subslice(self.i..new_i)
301             .map(|subslice| Input { value: subslice })
302             .ok_or(EndOfInput)?;
303         self.i = new_i;
304         Ok(ret)
305     }
306 
307     /// Skips the reader to the end of the input, returning the skipped input
308     /// as an `Input`.
309     #[inline]
read_bytes_to_end(&mut self) -> Input<'a>310     pub fn read_bytes_to_end(&mut self) -> Input<'a> {
311         let to_skip = self.input.len() - self.i;
312         self.read_bytes(to_skip).unwrap()
313     }
314 
315     /// Calls `read()` with the given input as a `Reader`. On success, returns a
316     /// pair `(bytes_read, r)` where `bytes_read` is what `read()` consumed and
317     /// `r` is `read()`'s return value.
read_partial<F, R, E>(&mut self, read: F) -> Result<(Input<'a>, R), E> where F: FnOnce(&mut Reader<'a>) -> Result<R, E>,318     pub fn read_partial<F, R, E>(&mut self, read: F) -> Result<(Input<'a>, R), E>
319     where
320         F: FnOnce(&mut Reader<'a>) -> Result<R, E>,
321     {
322         let start = self.i;
323         let r = read(self)?;
324         let bytes_read = Input {
325             value: self.input.subslice(start..self.i).unwrap()
326         };
327         Ok((bytes_read, r))
328     }
329 
330     /// Skips `num_bytes` of the input.
331     ///
332     /// Returns `Ok(i)` if there are at least `num_bytes` of input remaining,
333     /// and `Err(EndOfInput)` otherwise.
334     #[inline]
skip(&mut self, num_bytes: usize) -> Result<(), EndOfInput>335     pub fn skip(&mut self, num_bytes: usize) -> Result<(), EndOfInput> {
336         self.read_bytes(num_bytes).map(|_| ())
337     }
338 
339     /// Skips the reader to the end of the input.
340     #[inline]
skip_to_end(&mut self) -> ()341     pub fn skip_to_end(&mut self) -> () { let _ = self.read_bytes_to_end(); }
342 }
343 
344 /// The error type used to indicate the end of the input was reached before the
345 /// operation could be completed.
346 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
347 pub struct EndOfInput;
348 
349 mod no_panic {
350     use core;
351 
352     /// A wrapper around a slice that exposes no functions that can panic.
353     #[derive(Clone, Copy, Debug, Eq, PartialEq)]
354     pub struct Slice<'a> {
355         bytes: &'a [u8],
356     }
357 
358     impl<'a> Slice<'a> {
359         #[inline]
new(bytes: &'a [u8]) -> Self360         pub const fn new(bytes: &'a [u8]) -> Self { Self { bytes } }
361 
362         #[inline]
get(&self, i: usize) -> Option<&u8>363         pub fn get(&self, i: usize) -> Option<&u8> { self.bytes.get(i) }
364 
365         #[inline]
subslice(&self, r: core::ops::Range<usize>) -> Option<Self>366         pub fn subslice(&self, r: core::ops::Range<usize>) -> Option<Self> {
367             self.bytes.get(r).map(|bytes| Self { bytes })
368         }
369 
370         #[inline]
is_empty(&self) -> bool371         pub fn is_empty(&self) -> bool { self.bytes.is_empty() }
372 
373         #[inline]
len(&self) -> usize374         pub fn len(&self) -> usize { self.bytes.len() }
375 
376         #[inline]
as_slice_less_safe(&self) -> &'a [u8]377         pub fn as_slice_less_safe(&self) -> &'a [u8] { self.bytes }
378     }
379 
380 } // mod no_panic
381