1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_DATEPARSER_H_
6 #define V8_DATEPARSER_H_
7 
8 #include "src/allocation.h"
9 #include "src/char-predicates.h"
10 #include "src/unicode-cache.h"
11 
12 namespace v8 {
13 namespace internal {
14 
15 class DateParser : public AllStatic {
16  public:
17   // Parse the string as a date. If parsing succeeds, return true after
18   // filling out the output array as follows (all integers are Smis):
19   // [0]: year
20   // [1]: month (0 = Jan, 1 = Feb, ...)
21   // [2]: day
22   // [3]: hour
23   // [4]: minute
24   // [5]: second
25   // [6]: millisecond
26   // [7]: UTC offset in seconds, or null value if no timezone specified
27   // If parsing fails, return false (content of output array is not defined).
28   template <typename Char>
29   static bool Parse(Isolate* isolate, Vector<Char> str, FixedArray* output);
30 
31   enum {
32     YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
33   };
34 
35  private:
36   // Range testing
Between(int x,int lo,int hi)37   static inline bool Between(int x, int lo, int hi) {
38     return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
39   }
40 
41   // Indicates a missing value.
42   static const int kNone = kMaxInt;
43 
44   // Maximal number of digits used to build the value of a numeral.
45   // Remaining digits are ignored.
46   static const int kMaxSignificantDigits = 9;
47 
48   // InputReader provides basic string parsing and character classification.
49   template <typename Char>
50   class InputReader BASE_EMBEDDED {
51    public:
InputReader(UnicodeCache * unicode_cache,Vector<Char> s)52     InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
53         : index_(0),
54           buffer_(s),
55           unicode_cache_(unicode_cache) {
56       Next();
57     }
58 
position()59     int position() { return index_; }
60 
61     // Advance to the next character of the string.
Next()62     void Next() {
63       ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
64       index_++;
65     }
66 
67     // Read a string of digits as an unsigned number. Cap value at
68     // kMaxSignificantDigits, but skip remaining digits if the numeral
69     // is longer.
ReadUnsignedNumeral()70     int ReadUnsignedNumeral() {
71       int n = 0;
72       int i = 0;
73       while (IsAsciiDigit()) {
74         if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
75         i++;
76         Next();
77       }
78       return n;
79     }
80 
81     // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
82     // lower-case prefix, and pad any remainder of the buffer with zeroes.
83     // Return word length.
ReadWord(uint32_t * prefix,int prefix_size)84     int ReadWord(uint32_t* prefix, int prefix_size) {
85       int len;
86       for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
87         if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
88       }
89       for (int i = len; i < prefix_size; i++) prefix[i] = 0;
90       return len;
91     }
92 
93     // The skip methods return whether they actually skipped something.
Skip(uint32_t c)94     bool Skip(uint32_t c) {
95       if (ch_ == c) {
96         Next();
97         return true;
98       }
99       return false;
100     }
101 
102     inline bool SkipWhiteSpace();
103     inline bool SkipParentheses();
104 
105     // Character testing/classification. Non-ASCII digits are not supported.
Is(uint32_t c)106     bool Is(uint32_t c) const { return ch_ == c; }
IsEnd()107     bool IsEnd() const { return ch_ == 0; }
IsAsciiDigit()108     bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
IsAsciiAlphaOrAbove()109     bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
IsAsciiSign()110     bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
111 
112     // Return 1 for '+' and -1 for '-'.
GetAsciiSignValue()113     int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
114 
115    private:
116     int index_;
117     Vector<Char> buffer_;
118     uint32_t ch_;
119     UnicodeCache* unicode_cache_;
120   };
121 
122   enum KeywordType {
123       INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
124   };
125 
126   struct DateToken {
127    public:
IsInvalidDateToken128     bool IsInvalid() { return tag_ == kInvalidTokenTag; }
IsUnknownDateToken129     bool IsUnknown() { return tag_ == kUnknownTokenTag; }
IsNumberDateToken130     bool IsNumber() { return tag_ == kNumberTag; }
IsSymbolDateToken131     bool IsSymbol() { return tag_ == kSymbolTag; }
IsWhiteSpaceDateToken132     bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
IsEndOfInputDateToken133     bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
IsKeywordDateToken134     bool IsKeyword() { return tag_ >= kKeywordTagStart; }
135 
lengthDateToken136     int length() { return length_; }
137 
numberDateToken138     int number() {
139       DCHECK(IsNumber());
140       return value_;
141     }
keyword_typeDateToken142     KeywordType keyword_type() {
143       DCHECK(IsKeyword());
144       return static_cast<KeywordType>(tag_);
145     }
keyword_valueDateToken146     int keyword_value() {
147       DCHECK(IsKeyword());
148       return value_;
149     }
symbolDateToken150     char symbol() {
151       DCHECK(IsSymbol());
152       return static_cast<char>(value_);
153     }
IsSymbolDateToken154     bool IsSymbol(char symbol) {
155       return IsSymbol() && this->symbol() == symbol;
156     }
IsKeywordTypeDateToken157     bool IsKeywordType(KeywordType tag) {
158       return tag_ == tag;
159     }
IsFixedLengthNumberDateToken160     bool IsFixedLengthNumber(int length) {
161       return IsNumber() && length_ == length;
162     }
IsAsciiSignDateToken163     bool IsAsciiSign() {
164       return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
165     }
ascii_signDateToken166     int ascii_sign() {
167       DCHECK(IsAsciiSign());
168       return 44 - value_;
169     }
IsKeywordZDateToken170     bool IsKeywordZ() {
171       return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
172     }
IsUnknownDateToken173     bool IsUnknown(int character) {
174       return IsUnknown() && value_ == character;
175     }
176     // Factory functions.
KeywordDateToken177     static DateToken Keyword(KeywordType tag, int value, int length) {
178       return DateToken(tag, length, value);
179     }
NumberDateToken180     static DateToken Number(int value, int length) {
181       return DateToken(kNumberTag, length, value);
182     }
SymbolDateToken183     static DateToken Symbol(char symbol) {
184       return DateToken(kSymbolTag, 1, symbol);
185     }
EndOfInputDateToken186     static DateToken EndOfInput() {
187       return DateToken(kEndOfInputTag, 0, -1);
188     }
WhiteSpaceDateToken189     static DateToken WhiteSpace(int length) {
190       return DateToken(kWhiteSpaceTag, length, -1);
191     }
UnknownDateToken192     static DateToken Unknown() {
193       return DateToken(kUnknownTokenTag, 1, -1);
194     }
InvalidDateToken195     static DateToken Invalid() {
196       return DateToken(kInvalidTokenTag, 0, -1);
197     }
198 
199    private:
200     enum TagType {
201       kInvalidTokenTag = -6,
202       kUnknownTokenTag = -5,
203       kWhiteSpaceTag = -4,
204       kNumberTag = -3,
205       kSymbolTag = -2,
206       kEndOfInputTag = -1,
207       kKeywordTagStart = 0
208     };
DateTokenDateToken209     DateToken(int tag, int length, int value)
210         : tag_(tag),
211           length_(length),
212           value_(value) { }
213 
214     int tag_;
215     int length_;  // Number of characters.
216     int value_;
217   };
218 
219   template <typename Char>
220   class DateStringTokenizer {
221    public:
DateStringTokenizer(InputReader<Char> * in)222     explicit DateStringTokenizer(InputReader<Char>* in)
223         : in_(in), next_(Scan()) { }
Next()224     DateToken Next() {
225       DateToken result = next_;
226       next_ = Scan();
227       return result;
228     }
229 
Peek()230     DateToken Peek() {
231       return next_;
232     }
SkipSymbol(char symbol)233     bool SkipSymbol(char symbol) {
234       if (next_.IsSymbol(symbol)) {
235         next_ = Scan();
236         return true;
237       }
238       return false;
239     }
240 
241    private:
242     DateToken Scan();
243 
244     InputReader<Char>* in_;
245     DateToken next_;
246   };
247 
248   static int ReadMilliseconds(DateToken number);
249 
250   // KeywordTable maps names of months, time zones, am/pm to numbers.
251   class KeywordTable : public AllStatic {
252    public:
253     // Look up a word in the keyword table and return an index.
254     // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
255     // and 'len' is the word length.
256     static int Lookup(const uint32_t* pre, int len);
257     // Get the type of the keyword at index i.
GetType(int i)258     static KeywordType GetType(int i) {
259       return static_cast<KeywordType>(array[i][kTypeOffset]);
260     }
261     // Get the value of the keyword at index i.
GetValue(int i)262     static int GetValue(int i) { return array[i][kValueOffset]; }
263 
264     static const int kPrefixLength = 3;
265     static const int kTypeOffset = kPrefixLength;
266     static const int kValueOffset = kTypeOffset + 1;
267     static const int kEntrySize = kValueOffset + 1;
268     static const int8_t array[][kEntrySize];
269   };
270 
271   class TimeZoneComposer BASE_EMBEDDED {
272    public:
TimeZoneComposer()273     TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
Set(int offset_in_hours)274     void Set(int offset_in_hours) {
275       sign_ = offset_in_hours < 0 ? -1 : 1;
276       hour_ = offset_in_hours * sign_;
277       minute_ = 0;
278     }
SetSign(int sign)279     void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
SetAbsoluteHour(int hour)280     void SetAbsoluteHour(int hour) { hour_ = hour; }
SetAbsoluteMinute(int minute)281     void SetAbsoluteMinute(int minute) { minute_ = minute; }
IsExpecting(int n)282     bool IsExpecting(int n) const {
283       return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
284     }
IsUTC()285     bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
286     bool Write(FixedArray* output);
IsEmpty()287     bool IsEmpty() { return hour_ == kNone; }
288    private:
289     int sign_;
290     int hour_;
291     int minute_;
292   };
293 
294   class TimeComposer BASE_EMBEDDED {
295    public:
TimeComposer()296     TimeComposer() : index_(0), hour_offset_(kNone) {}
IsEmpty()297     bool IsEmpty() const { return index_ == 0; }
IsExpecting(int n)298     bool IsExpecting(int n) const {
299       return (index_ == 1 && IsMinute(n)) ||
300              (index_ == 2 && IsSecond(n)) ||
301              (index_ == 3 && IsMillisecond(n));
302     }
Add(int n)303     bool Add(int n) {
304       return index_ < kSize ? (comp_[index_++] = n, true) : false;
305     }
AddFinal(int n)306     bool AddFinal(int n) {
307       if (!Add(n)) return false;
308       while (index_ < kSize) comp_[index_++] = 0;
309       return true;
310     }
SetHourOffset(int n)311     void SetHourOffset(int n) { hour_offset_ = n; }
312     bool Write(FixedArray* output);
313 
IsMinute(int x)314     static bool IsMinute(int x) { return Between(x, 0, 59); }
IsHour(int x)315     static bool IsHour(int x) { return Between(x, 0, 23); }
IsSecond(int x)316     static bool IsSecond(int x) { return Between(x, 0, 59); }
317 
318    private:
IsHour12(int x)319     static bool IsHour12(int x) { return Between(x, 0, 12); }
IsMillisecond(int x)320     static bool IsMillisecond(int x) { return Between(x, 0, 999); }
321 
322     static const int kSize = 4;
323     int comp_[kSize];
324     int index_;
325     int hour_offset_;
326   };
327 
328   class DayComposer BASE_EMBEDDED {
329    public:
DayComposer()330     DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
IsEmpty()331     bool IsEmpty() const { return index_ == 0; }
Add(int n)332     bool Add(int n) {
333       if (index_ < kSize) {
334         comp_[index_] = n;
335         index_++;
336         return true;
337       }
338       return false;
339     }
SetNamedMonth(int n)340     void SetNamedMonth(int n) { named_month_ = n; }
341     bool Write(FixedArray* output);
set_iso_date()342     void set_iso_date() { is_iso_date_ = true; }
IsMonth(int x)343     static bool IsMonth(int x) { return Between(x, 1, 12); }
IsDay(int x)344     static bool IsDay(int x) { return Between(x, 1, 31); }
345 
346    private:
347     static const int kSize = 3;
348     int comp_[kSize];
349     int index_;
350     int named_month_;
351     // If set, ensures that data is always parsed in year-month-date order.
352     bool is_iso_date_;
353   };
354 
355   // Tries to parse an ES5 Date Time String. Returns the next token
356   // to continue with in the legacy date string parser. If parsing is
357   // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
358   // returns DateToken::Invalid(). Otherwise parsing continues in the
359   // legacy parser.
360   template <typename Char>
361   static DateParser::DateToken ParseES5DateTime(
362       DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time,
363       TimeZoneComposer* tz);
364 };
365 
366 
367 }  // namespace internal
368 }  // namespace v8
369 
370 #endif  // V8_DATEPARSER_H_
371