1 // This is a part of Chrono.
2 // See README.md and LICENSE.txt for details.
3 
4 /*!
5  * Various scanning routines for the parser.
6  */
7 
8 #![allow(deprecated)]
9 
10 use super::{ParseResult, INVALID, OUT_OF_RANGE, TOO_SHORT};
11 use Weekday;
12 
13 /// Returns true when two slices are equal case-insensitively (in ASCII).
14 /// Assumes that the `pattern` is already converted to lower case.
equals(s: &str, pattern: &str) -> bool15 fn equals(s: &str, pattern: &str) -> bool {
16     let mut xs = s.as_bytes().iter().map(|&c| match c {
17         b'A'...b'Z' => c + 32,
18         _ => c,
19     });
20     let mut ys = pattern.as_bytes().iter().cloned();
21     loop {
22         match (xs.next(), ys.next()) {
23             (None, None) => return true,
24             (None, _) | (_, None) => return false,
25             (Some(x), Some(y)) if x != y => return false,
26             _ => (),
27         }
28     }
29 }
30 
31 /// Tries to parse the non-negative number from `min` to `max` digits.
32 ///
33 /// The absence of digits at all is an unconditional error.
34 /// More than `max` digits are consumed up to the first `max` digits.
35 /// Any number that does not fit in `i64` is an error.
36 #[inline]
number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)>37 pub fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> {
38     assert!(min <= max);
39 
40     // We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on
41     // the first non-numeric byte, which may be another ascii character or beginning of multi-byte
42     // UTF-8 character.
43     let bytes = s.as_bytes();
44     if bytes.len() < min {
45         return Err(TOO_SHORT);
46     }
47 
48     let mut n = 0i64;
49     for (i, c) in bytes.iter().take(max).cloned().enumerate() {
50         // cloned() = copied()
51         if c < b'0' || b'9' < c {
52             if i < min {
53                 return Err(INVALID);
54             } else {
55                 return Ok((&s[i..], n));
56             }
57         }
58 
59         n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as i64)) {
60             Some(n) => n,
61             None => return Err(OUT_OF_RANGE),
62         };
63     }
64 
65     Ok((&s[::core::cmp::min(max, bytes.len())..], n))
66 }
67 
68 /// Tries to consume at least one digits as a fractional second.
69 /// Returns the number of whole nanoseconds (0--999,999,999).
nanosecond(s: &str) -> ParseResult<(&str, i64)>70 pub fn nanosecond(s: &str) -> ParseResult<(&str, i64)> {
71     // record the number of digits consumed for later scaling.
72     let origlen = s.len();
73     let (s, v) = number(s, 1, 9)?;
74     let consumed = origlen - s.len();
75 
76     // scale the number accordingly.
77     static SCALE: [i64; 10] =
78         [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1];
79     let v = v.checked_mul(SCALE[consumed]).ok_or(OUT_OF_RANGE)?;
80 
81     // if there are more than 9 digits, skip next digits.
82     let s = s.trim_left_matches(|c: char| '0' <= c && c <= '9');
83 
84     Ok((s, v))
85 }
86 
87 /// Tries to consume a fixed number of digits as a fractional second.
88 /// Returns the number of whole nanoseconds (0--999,999,999).
nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)>89 pub fn nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)> {
90     // record the number of digits consumed for later scaling.
91     let (s, v) = number(s, digits, digits)?;
92 
93     // scale the number accordingly.
94     static SCALE: [i64; 10] =
95         [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1];
96     let v = v.checked_mul(SCALE[digits]).ok_or(OUT_OF_RANGE)?;
97 
98     Ok((s, v))
99 }
100 
101 /// Tries to parse the month index (0 through 11) with the first three ASCII letters.
short_month0(s: &str) -> ParseResult<(&str, u8)>102 pub fn short_month0(s: &str) -> ParseResult<(&str, u8)> {
103     if s.len() < 3 {
104         return Err(TOO_SHORT);
105     }
106     let buf = s.as_bytes();
107     let month0 = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
108         (b'j', b'a', b'n') => 0,
109         (b'f', b'e', b'b') => 1,
110         (b'm', b'a', b'r') => 2,
111         (b'a', b'p', b'r') => 3,
112         (b'm', b'a', b'y') => 4,
113         (b'j', b'u', b'n') => 5,
114         (b'j', b'u', b'l') => 6,
115         (b'a', b'u', b'g') => 7,
116         (b's', b'e', b'p') => 8,
117         (b'o', b'c', b't') => 9,
118         (b'n', b'o', b'v') => 10,
119         (b'd', b'e', b'c') => 11,
120         _ => return Err(INVALID),
121     };
122     Ok((&s[3..], month0))
123 }
124 
125 /// Tries to parse the weekday with the first three ASCII letters.
short_weekday(s: &str) -> ParseResult<(&str, Weekday)>126 pub fn short_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
127     if s.len() < 3 {
128         return Err(TOO_SHORT);
129     }
130     let buf = s.as_bytes();
131     let weekday = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
132         (b'm', b'o', b'n') => Weekday::Mon,
133         (b't', b'u', b'e') => Weekday::Tue,
134         (b'w', b'e', b'd') => Weekday::Wed,
135         (b't', b'h', b'u') => Weekday::Thu,
136         (b'f', b'r', b'i') => Weekday::Fri,
137         (b's', b'a', b't') => Weekday::Sat,
138         (b's', b'u', b'n') => Weekday::Sun,
139         _ => return Err(INVALID),
140     };
141     Ok((&s[3..], weekday))
142 }
143 
144 /// Tries to parse the month index (0 through 11) with short or long month names.
145 /// It prefers long month names to short month names when both are possible.
short_or_long_month0(s: &str) -> ParseResult<(&str, u8)>146 pub fn short_or_long_month0(s: &str) -> ParseResult<(&str, u8)> {
147     // lowercased month names, minus first three chars
148     static LONG_MONTH_SUFFIXES: [&'static str; 12] =
149         ["uary", "ruary", "ch", "il", "", "e", "y", "ust", "tember", "ober", "ember", "ember"];
150 
151     let (mut s, month0) = short_month0(s)?;
152 
153     // tries to consume the suffix if possible
154     let suffix = LONG_MONTH_SUFFIXES[month0 as usize];
155     if s.len() >= suffix.len() && equals(&s[..suffix.len()], suffix) {
156         s = &s[suffix.len()..];
157     }
158 
159     Ok((s, month0))
160 }
161 
162 /// Tries to parse the weekday with short or long weekday names.
163 /// It prefers long weekday names to short weekday names when both are possible.
short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)>164 pub fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
165     // lowercased weekday names, minus first three chars
166     static LONG_WEEKDAY_SUFFIXES: [&'static str; 7] =
167         ["day", "sday", "nesday", "rsday", "day", "urday", "day"];
168 
169     let (mut s, weekday) = short_weekday(s)?;
170 
171     // tries to consume the suffix if possible
172     let suffix = LONG_WEEKDAY_SUFFIXES[weekday.num_days_from_monday() as usize];
173     if s.len() >= suffix.len() && equals(&s[..suffix.len()], suffix) {
174         s = &s[suffix.len()..];
175     }
176 
177     Ok((s, weekday))
178 }
179 
180 /// Tries to consume exactly one given character.
char(s: &str, c1: u8) -> ParseResult<&str>181 pub fn char(s: &str, c1: u8) -> ParseResult<&str> {
182     match s.as_bytes().first() {
183         Some(&c) if c == c1 => Ok(&s[1..]),
184         Some(_) => Err(INVALID),
185         None => Err(TOO_SHORT),
186     }
187 }
188 
189 /// Tries to consume one or more whitespace.
space(s: &str) -> ParseResult<&str>190 pub fn space(s: &str) -> ParseResult<&str> {
191     let s_ = s.trim_left();
192     if s_.len() < s.len() {
193         Ok(s_)
194     } else if s.is_empty() {
195         Err(TOO_SHORT)
196     } else {
197         Err(INVALID)
198     }
199 }
200 
201 /// Consumes any number (including zero) of colon or spaces.
colon_or_space(s: &str) -> ParseResult<&str>202 pub fn colon_or_space(s: &str) -> ParseResult<&str> {
203     Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace()))
204 }
205 
206 /// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible.
207 ///
208 /// The additional `colon` may be used to parse a mandatory or optional `:`
209 /// between hours and minutes, and should return either a new suffix or `Err` when parsing fails.
timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, i32)> where F: FnMut(&str) -> ParseResult<&str>,210 pub fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, i32)>
211 where
212     F: FnMut(&str) -> ParseResult<&str>,
213 {
214     timezone_offset_internal(s, consume_colon, false)
215 }
216 
timezone_offset_internal<F>( mut s: &str, mut consume_colon: F, allow_missing_minutes: bool, ) -> ParseResult<(&str, i32)> where F: FnMut(&str) -> ParseResult<&str>,217 fn timezone_offset_internal<F>(
218     mut s: &str,
219     mut consume_colon: F,
220     allow_missing_minutes: bool,
221 ) -> ParseResult<(&str, i32)>
222 where
223     F: FnMut(&str) -> ParseResult<&str>,
224 {
225     fn digits(s: &str) -> ParseResult<(u8, u8)> {
226         let b = s.as_bytes();
227         if b.len() < 2 {
228             Err(TOO_SHORT)
229         } else {
230             Ok((b[0], b[1]))
231         }
232     }
233     let negative = match s.as_bytes().first() {
234         Some(&b'+') => false,
235         Some(&b'-') => true,
236         Some(_) => return Err(INVALID),
237         None => return Err(TOO_SHORT),
238     };
239     s = &s[1..];
240 
241     // hours (00--99)
242     let hours = match digits(s)? {
243         (h1 @ b'0'...b'9', h2 @ b'0'...b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')),
244         _ => return Err(INVALID),
245     };
246     s = &s[2..];
247 
248     // colons (and possibly other separators)
249     s = consume_colon(s)?;
250 
251     // minutes (00--59)
252     // if the next two items are digits then we have to add minutes
253     let minutes = if let Ok(ds) = digits(s) {
254         match ds {
255             (m1 @ b'0'...b'5', m2 @ b'0'...b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')),
256             (b'6'...b'9', b'0'...b'9') => return Err(OUT_OF_RANGE),
257             _ => return Err(INVALID),
258         }
259     } else if allow_missing_minutes {
260         0
261     } else {
262         return Err(TOO_SHORT);
263     };
264     s = match s.len() {
265         len if len >= 2 => &s[2..],
266         len if len == 0 => s,
267         _ => return Err(TOO_SHORT),
268     };
269 
270     let seconds = hours * 3600 + minutes * 60;
271     Ok((s, if negative { -seconds } else { seconds }))
272 }
273 
274 /// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as `+00:00`.
timezone_offset_zulu<F>(s: &str, colon: F) -> ParseResult<(&str, i32)> where F: FnMut(&str) -> ParseResult<&str>,275 pub fn timezone_offset_zulu<F>(s: &str, colon: F) -> ParseResult<(&str, i32)>
276 where
277     F: FnMut(&str) -> ParseResult<&str>,
278 {
279     let bytes = s.as_bytes();
280     match bytes.first() {
281         Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
282         Some(&b'u') | Some(&b'U') => {
283             if bytes.len() >= 3 {
284                 let (b, c) = (bytes[1], bytes[2]);
285                 match (b | 32, c | 32) {
286                     (b't', b'c') => Ok((&s[3..], 0)),
287                     _ => Err(INVALID),
288                 }
289             } else {
290                 Err(INVALID)
291             }
292         }
293         _ => timezone_offset(s, colon),
294     }
295 }
296 
297 /// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as
298 /// `+00:00`, and allows missing minutes entirely.
timezone_offset_permissive<F>(s: &str, colon: F) -> ParseResult<(&str, i32)> where F: FnMut(&str) -> ParseResult<&str>,299 pub fn timezone_offset_permissive<F>(s: &str, colon: F) -> ParseResult<(&str, i32)>
300 where
301     F: FnMut(&str) -> ParseResult<&str>,
302 {
303     match s.as_bytes().first() {
304         Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
305         _ => timezone_offset_internal(s, colon, true),
306     }
307 }
308 
309 /// Same as `timezone_offset` but also allows for RFC 2822 legacy timezones.
310 /// May return `None` which indicates an insufficient offset data (i.e. `-0000`).
timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)>311 pub fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)> {
312     // tries to parse legacy time zone names
313     let upto = s
314         .as_bytes()
315         .iter()
316         .position(|&c| match c {
317             b'a'...b'z' | b'A'...b'Z' => false,
318             _ => true,
319         })
320         .unwrap_or_else(|| s.len());
321     if upto > 0 {
322         let name = &s[..upto];
323         let s = &s[upto..];
324         let offset_hours = |o| Ok((s, Some(o * 3600)));
325         if equals(name, "gmt") || equals(name, "ut") {
326             offset_hours(0)
327         } else if equals(name, "edt") {
328             offset_hours(-4)
329         } else if equals(name, "est") || equals(name, "cdt") {
330             offset_hours(-5)
331         } else if equals(name, "cst") || equals(name, "mdt") {
332             offset_hours(-6)
333         } else if equals(name, "mst") || equals(name, "pdt") {
334             offset_hours(-7)
335         } else if equals(name, "pst") {
336             offset_hours(-8)
337         } else {
338             Ok((s, None)) // recommended by RFC 2822: consume but treat it as -0000
339         }
340     } else {
341         let (s_, offset) = timezone_offset(s, |s| Ok(s))?;
342         Ok((s_, Some(offset)))
343     }
344 }
345 
346 /// Tries to consume everyting until next whitespace-like symbol.
347 /// Does not provide any offset information from the consumed data.
timezone_name_skip(s: &str) -> ParseResult<(&str, ())>348 pub fn timezone_name_skip(s: &str) -> ParseResult<(&str, ())> {
349     Ok((s.trim_left_matches(|c: char| !c.is_whitespace()), ()))
350 }
351