1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_DATEPARSER_INL_H_
6 #define V8_DATEPARSER_INL_H_
7 
8 #include "src/char-predicates-inl.h"
9 #include "src/dateparser.h"
10 #include "src/unicode-cache-inl.h"
11 
12 namespace v8 {
13 namespace internal {
14 
15 template <typename Char>
Parse(Isolate * isolate,Vector<Char> str,FixedArray * out)16 bool DateParser::Parse(Isolate* isolate, Vector<Char> str, FixedArray* out) {
17   UnicodeCache* unicode_cache = isolate->unicode_cache();
18   DCHECK(out->length() >= OUTPUT_SIZE);
19   InputReader<Char> in(unicode_cache, str);
20   DateStringTokenizer<Char> scanner(&in);
21   TimeZoneComposer tz;
22   TimeComposer time;
23   DayComposer day;
24 
25   // Specification:
26   // Accept ES5 ISO 8601 date-time-strings or legacy dates compatible
27   // with Safari.
28   // ES5 ISO 8601 dates:
29   //   [('-'|'+')yy]yyyy[-MM[-DD]][THH:mm[:ss[.sss]][Z|(+|-)hh:mm]]
30   //   where yyyy is in the range 0000..9999 and
31   //         +/-yyyyyy is in the range -999999..+999999 -
32   //           but -000000 is invalid (year zero must be positive),
33   //         MM is in the range 01..12,
34   //         DD is in the range 01..31,
35   //         MM and DD defaults to 01 if missing,,
36   //         HH is generally in the range 00..23, but can be 24 if mm, ss
37   //           and sss are zero (or missing), representing midnight at the
38   //           end of a day,
39   //         mm and ss are in the range 00..59,
40   //         sss is in the range 000..999,
41   //         hh is in the range 00..23,
42   //         mm, ss, and sss default to 00 if missing, and
43   //         timezone defaults to Z if missing
44   //           (following Safari, ISO actually demands local time).
45   //  Extensions:
46   //   We also allow sss to have more or less than three digits (but at
47   //   least one).
48   //   We allow hh:mm to be specified as hhmm.
49   // Legacy dates:
50   //  Any unrecognized word before the first number is ignored.
51   //  Parenthesized text is ignored.
52   //  An unsigned number followed by ':' is a time value, and is
53   //  added to the TimeComposer. A number followed by '::' adds a second
54   //  zero as well. A number followed by '.' is also a time and must be
55   //  followed by milliseconds.
56   //  Any other number is a date component and is added to DayComposer.
57   //  A month name (or really: any word having the same first three letters
58   //  as a month name) is recorded as a named month in the Day composer.
59   //  A word recognizable as a time-zone is recorded as such, as is
60   //  '(+|-)(hhmm|hh:)'.
61   //  Legacy dates don't allow extra signs ('+' or '-') or umatched ')'
62   //  after a number has been read (before the first number, any garbage
63   //  is allowed).
64   // Intersection of the two:
65   //  A string that matches both formats (e.g. 1970-01-01) will be
66   //  parsed as an ES5 date-time string - which means it will default
67   //  to UTC time-zone. That's unavoidable if following the ES5
68   //  specification.
69   //  After a valid "T" has been read while scanning an ES5 datetime string,
70   //  the input can no longer be a valid legacy date, since the "T" is a
71   //  garbage string after a number has been read.
72 
73   // First try getting as far as possible with as ES5 Date Time String.
74   DateToken next_unhandled_token = ParseES5DateTime(&scanner, &day, &time, &tz);
75   if (next_unhandled_token.IsInvalid()) return false;
76   bool has_read_number = !day.IsEmpty();
77   // If there's anything left, continue with the legacy parser.
78   bool legacy_parser = false;
79   for (DateToken token = next_unhandled_token;
80        !token.IsEndOfInput();
81        token = scanner.Next()) {
82     if (token.IsNumber()) {
83       legacy_parser = true;
84       has_read_number = true;
85       int n = token.number();
86       if (scanner.SkipSymbol(':')) {
87         if (scanner.SkipSymbol(':')) {
88           // n + "::"
89           if (!time.IsEmpty()) return false;
90           time.Add(n);
91           time.Add(0);
92         } else {
93           // n + ":"
94           if (!time.Add(n)) return false;
95           if (scanner.Peek().IsSymbol('.')) scanner.Next();
96         }
97       } else if (scanner.SkipSymbol('.') && time.IsExpecting(n)) {
98         time.Add(n);
99         if (!scanner.Peek().IsNumber()) return false;
100         int n = ReadMilliseconds(scanner.Next());
101         if (n < 0) return false;
102         time.AddFinal(n);
103       } else if (tz.IsExpecting(n)) {
104         tz.SetAbsoluteMinute(n);
105       } else if (time.IsExpecting(n)) {
106         time.AddFinal(n);
107         // Require end, white space, "Z", "+" or "-" immediately after
108         // finalizing time.
109         DateToken peek = scanner.Peek();
110         if (!peek.IsEndOfInput() &&
111             !peek.IsWhiteSpace() &&
112             !peek.IsKeywordZ() &&
113             !peek.IsAsciiSign()) return false;
114       } else {
115         if (!day.Add(n)) return false;
116         scanner.SkipSymbol('-');
117       }
118     } else if (token.IsKeyword()) {
119       legacy_parser = true;
120       // Parse a "word" (sequence of chars. >= 'A').
121       KeywordType type = token.keyword_type();
122       int value = token.keyword_value();
123       if (type == AM_PM && !time.IsEmpty()) {
124         time.SetHourOffset(value);
125       } else if (type == MONTH_NAME) {
126         day.SetNamedMonth(value);
127         scanner.SkipSymbol('-');
128       } else if (type == TIME_ZONE_NAME && has_read_number) {
129         tz.Set(value);
130       } else {
131         // Garbage words are illegal if a number has been read.
132         if (has_read_number) return false;
133         // The first number has to be separated from garbage words by
134         // whitespace or other separators.
135         if (scanner.Peek().IsNumber()) return false;
136       }
137     } else if (token.IsAsciiSign() && (tz.IsUTC() || !time.IsEmpty())) {
138       legacy_parser = true;
139       // Parse UTC offset (only after UTC or time).
140       tz.SetSign(token.ascii_sign());
141       // The following number may be empty.
142       int n = 0;
143       int length = 0;
144       if (scanner.Peek().IsNumber()) {
145         DateToken token = scanner.Next();
146         length = token.length();
147         n = token.number();
148       }
149       has_read_number = true;
150 
151       if (scanner.Peek().IsSymbol(':')) {
152         tz.SetAbsoluteHour(n);
153         // TODO(littledan): Use minutes as part of timezone?
154         tz.SetAbsoluteMinute(kNone);
155       } else if (length == 2 || length == 1) {
156         // Handle time zones like GMT-8
157         tz.SetAbsoluteHour(n);
158         tz.SetAbsoluteMinute(0);
159       } else if (length == 4 || length == 3) {
160         // Looks like the hhmm format
161         tz.SetAbsoluteHour(n / 100);
162         tz.SetAbsoluteMinute(n % 100);
163       } else {
164         // No need to accept time zones like GMT-12345
165         return false;
166       }
167     } else if ((token.IsAsciiSign() || token.IsSymbol(')')) &&
168                has_read_number) {
169       // Extra sign or ')' is illegal if a number has been read.
170       return false;
171     } else {
172       // Ignore other characters and whitespace.
173     }
174   }
175 
176   bool success = day.Write(out) && time.Write(out) && tz.Write(out);
177 
178   if (legacy_parser && success) {
179     isolate->CountUsage(v8::Isolate::kLegacyDateParser);
180   }
181 
182   return success;
183 }
184 
185 
186 template<typename CharType>
Scan()187 DateParser::DateToken DateParser::DateStringTokenizer<CharType>::Scan() {
188   int pre_pos = in_->position();
189   if (in_->IsEnd()) return DateToken::EndOfInput();
190   if (in_->IsAsciiDigit()) {
191     int n = in_->ReadUnsignedNumeral();
192     int length = in_->position() - pre_pos;
193     return DateToken::Number(n, length);
194   }
195   if (in_->Skip(':')) return DateToken::Symbol(':');
196   if (in_->Skip('-')) return DateToken::Symbol('-');
197   if (in_->Skip('+')) return DateToken::Symbol('+');
198   if (in_->Skip('.')) return DateToken::Symbol('.');
199   if (in_->Skip(')')) return DateToken::Symbol(')');
200   if (in_->IsAsciiAlphaOrAbove()) {
201     DCHECK_EQ(KeywordTable::kPrefixLength, 3);
202     uint32_t buffer[3] = {0, 0, 0};
203     int length = in_->ReadWord(buffer, 3);
204     int index = KeywordTable::Lookup(buffer, length);
205     return DateToken::Keyword(KeywordTable::GetType(index),
206                               KeywordTable::GetValue(index),
207                               length);
208   }
209   if (in_->SkipWhiteSpace()) {
210     return DateToken::WhiteSpace(in_->position() - pre_pos);
211   }
212   if (in_->SkipParentheses()) {
213     return DateToken::Unknown();
214   }
215   in_->Next();
216   return DateToken::Unknown();
217 }
218 
219 
220 template <typename Char>
SkipWhiteSpace()221 bool DateParser::InputReader<Char>::SkipWhiteSpace() {
222   if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) {
223     Next();
224     return true;
225   }
226   return false;
227 }
228 
229 
230 template <typename Char>
SkipParentheses()231 bool DateParser::InputReader<Char>::SkipParentheses() {
232   if (ch_ != '(') return false;
233   int balance = 0;
234   do {
235     if (ch_ == ')') --balance;
236     else if (ch_ == '(') ++balance;
237     Next();
238   } while (balance > 0 && ch_);
239   return true;
240 }
241 
242 
243 template <typename Char>
ParseES5DateTime(DateStringTokenizer<Char> * scanner,DayComposer * day,TimeComposer * time,TimeZoneComposer * tz)244 DateParser::DateToken DateParser::ParseES5DateTime(
245     DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time,
246     TimeZoneComposer* tz) {
247   DCHECK(day->IsEmpty());
248   DCHECK(time->IsEmpty());
249   DCHECK(tz->IsEmpty());
250 
251   // Parse mandatory date string: [('-'|'+')yy]yyyy[':'MM[':'DD]]
252   if (scanner->Peek().IsAsciiSign()) {
253     // Keep the sign token, so we can pass it back to the legacy
254     // parser if we don't use it.
255     DateToken sign_token = scanner->Next();
256     if (!scanner->Peek().IsFixedLengthNumber(6)) return sign_token;
257     int sign = sign_token.ascii_sign();
258     int year = scanner->Next().number();
259     if (sign < 0 && year == 0) return sign_token;
260     day->Add(sign * year);
261   } else if (scanner->Peek().IsFixedLengthNumber(4)) {
262     day->Add(scanner->Next().number());
263   } else {
264     return scanner->Next();
265   }
266   if (scanner->SkipSymbol('-')) {
267     if (!scanner->Peek().IsFixedLengthNumber(2) ||
268         !DayComposer::IsMonth(scanner->Peek().number())) return scanner->Next();
269     day->Add(scanner->Next().number());
270     if (scanner->SkipSymbol('-')) {
271       if (!scanner->Peek().IsFixedLengthNumber(2) ||
272           !DayComposer::IsDay(scanner->Peek().number())) return scanner->Next();
273       day->Add(scanner->Next().number());
274     }
275   }
276   // Check for optional time string: 'T'HH':'mm[':'ss['.'sss]]Z
277   if (!scanner->Peek().IsKeywordType(TIME_SEPARATOR)) {
278     if (!scanner->Peek().IsEndOfInput()) return scanner->Next();
279   } else {
280     // ES5 Date Time String time part is present.
281     scanner->Next();
282     if (!scanner->Peek().IsFixedLengthNumber(2) ||
283         !Between(scanner->Peek().number(), 0, 24)) {
284       return DateToken::Invalid();
285     }
286     // Allow 24:00[:00[.000]], but no other time starting with 24.
287     bool hour_is_24 = (scanner->Peek().number() == 24);
288     time->Add(scanner->Next().number());
289     if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
290     if (!scanner->Peek().IsFixedLengthNumber(2) ||
291         !TimeComposer::IsMinute(scanner->Peek().number()) ||
292         (hour_is_24 && scanner->Peek().number() > 0)) {
293       return DateToken::Invalid();
294     }
295     time->Add(scanner->Next().number());
296     if (scanner->SkipSymbol(':')) {
297       if (!scanner->Peek().IsFixedLengthNumber(2) ||
298           !TimeComposer::IsSecond(scanner->Peek().number()) ||
299           (hour_is_24 && scanner->Peek().number() > 0)) {
300         return DateToken::Invalid();
301       }
302       time->Add(scanner->Next().number());
303       if (scanner->SkipSymbol('.')) {
304         if (!scanner->Peek().IsNumber() ||
305             (hour_is_24 && scanner->Peek().number() > 0)) {
306           return DateToken::Invalid();
307         }
308         // Allow more or less than the mandated three digits.
309         time->Add(ReadMilliseconds(scanner->Next()));
310       }
311     }
312     // Check for optional timezone designation: 'Z' | ('+'|'-')hh':'mm
313     if (scanner->Peek().IsKeywordZ()) {
314       scanner->Next();
315       tz->Set(0);
316     } else if (scanner->Peek().IsSymbol('+') ||
317                scanner->Peek().IsSymbol('-')) {
318       tz->SetSign(scanner->Next().symbol() == '+' ? 1 : -1);
319       if (scanner->Peek().IsFixedLengthNumber(4)) {
320         // hhmm extension syntax.
321         int hourmin = scanner->Next().number();
322         int hour = hourmin / 100;
323         int min = hourmin % 100;
324         if (!TimeComposer::IsHour(hour) || !TimeComposer::IsMinute(min)) {
325           return DateToken::Invalid();
326         }
327         tz->SetAbsoluteHour(hour);
328         tz->SetAbsoluteMinute(min);
329       } else {
330         // hh:mm standard syntax.
331         if (!scanner->Peek().IsFixedLengthNumber(2) ||
332             !TimeComposer::IsHour(scanner->Peek().number())) {
333           return DateToken::Invalid();
334         }
335         tz->SetAbsoluteHour(scanner->Next().number());
336         if (!scanner->SkipSymbol(':')) return DateToken::Invalid();
337         if (!scanner->Peek().IsFixedLengthNumber(2) ||
338             !TimeComposer::IsMinute(scanner->Peek().number())) {
339           return DateToken::Invalid();
340         }
341         tz->SetAbsoluteMinute(scanner->Next().number());
342       }
343     }
344     if (!scanner->Peek().IsEndOfInput()) return DateToken::Invalid();
345   }
346   // Successfully parsed ES5 Date Time String.
347   // ES#sec-date-time-string-format Date Time String Format
348   // "When the time zone offset is absent, date-only forms are interpreted
349   //  as a UTC time and date-time forms are interpreted as a local time."
350   if (tz->IsEmpty() && time->IsEmpty()) {
351     tz->Set(0);
352   }
353   day->set_iso_date();
354   return DateToken::EndOfInput();
355 }
356 
357 
358 }  // namespace internal
359 }  // namespace v8
360 
361 #endif  // V8_DATEPARSER_INL_H_
362