1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_DATEPARSER_H_
6 #define V8_DATEPARSER_H_
7 
8 #include "src/allocation.h"
9 #include "src/char-predicates-inl.h"
10 
11 namespace v8 {
12 namespace internal {
13 
14 class DateParser : public AllStatic {
15  public:
16   // Parse the string as a date. If parsing succeeds, return true after
17   // filling out the output array as follows (all integers are Smis):
18   // [0]: year
19   // [1]: month (0 = Jan, 1 = Feb, ...)
20   // [2]: day
21   // [3]: hour
22   // [4]: minute
23   // [5]: second
24   // [6]: millisecond
25   // [7]: UTC offset in seconds, or null value if no timezone specified
26   // If parsing fails, return false (content of output array is not defined).
27   template <typename Char>
28   static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
29 
30   enum {
31     YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
32   };
33 
34  private:
35   // Range testing
Between(int x,int lo,int hi)36   static inline bool Between(int x, int lo, int hi) {
37     return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
38   }
39 
40   // Indicates a missing value.
41   static const int kNone = kMaxInt;
42 
43   // Maximal number of digits used to build the value of a numeral.
44   // Remaining digits are ignored.
45   static const int kMaxSignificantDigits = 9;
46 
47   // InputReader provides basic string parsing and character classification.
48   template <typename Char>
49   class InputReader BASE_EMBEDDED {
50    public:
InputReader(UnicodeCache * unicode_cache,Vector<Char> s)51     InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
52         : index_(0),
53           buffer_(s),
54           unicode_cache_(unicode_cache) {
55       Next();
56     }
57 
position()58     int position() { return index_; }
59 
60     // Advance to the next character of the string.
Next()61     void Next() {
62       ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
63       index_++;
64     }
65 
66     // Read a string of digits as an unsigned number. Cap value at
67     // kMaxSignificantDigits, but skip remaining digits if the numeral
68     // is longer.
ReadUnsignedNumeral()69     int ReadUnsignedNumeral() {
70       int n = 0;
71       int i = 0;
72       while (IsAsciiDigit()) {
73         if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
74         i++;
75         Next();
76       }
77       return n;
78     }
79 
80     // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
81     // lower-case prefix, and pad any remainder of the buffer with zeroes.
82     // Return word length.
ReadWord(uint32_t * prefix,int prefix_size)83     int ReadWord(uint32_t* prefix, int prefix_size) {
84       int len;
85       for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
86         if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
87       }
88       for (int i = len; i < prefix_size; i++) prefix[i] = 0;
89       return len;
90     }
91 
92     // The skip methods return whether they actually skipped something.
Skip(uint32_t c)93     bool Skip(uint32_t c) {
94       if (ch_ == c) {
95         Next();
96         return true;
97       }
98       return false;
99     }
100 
SkipWhiteSpace()101     bool SkipWhiteSpace() {
102       if (unicode_cache_->IsWhiteSpaceOrLineTerminator(ch_)) {
103         Next();
104         return true;
105       }
106       return false;
107     }
108 
SkipParentheses()109     bool SkipParentheses() {
110       if (ch_ != '(') return false;
111       int balance = 0;
112       do {
113         if (ch_ == ')') --balance;
114         else if (ch_ == '(') ++balance;
115         Next();
116       } while (balance > 0 && ch_);
117       return true;
118     }
119 
120     // Character testing/classification. Non-ASCII digits are not supported.
Is(uint32_t c)121     bool Is(uint32_t c) const { return ch_ == c; }
IsEnd()122     bool IsEnd() const { return ch_ == 0; }
IsAsciiDigit()123     bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
IsAsciiAlphaOrAbove()124     bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
IsAsciiSign()125     bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
126 
127     // Return 1 for '+' and -1 for '-'.
GetAsciiSignValue()128     int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
129 
130    private:
131     int index_;
132     Vector<Char> buffer_;
133     uint32_t ch_;
134     UnicodeCache* unicode_cache_;
135   };
136 
137   enum KeywordType {
138       INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
139   };
140 
141   struct DateToken {
142    public:
IsInvalidDateToken143     bool IsInvalid() { return tag_ == kInvalidTokenTag; }
IsUnknownDateToken144     bool IsUnknown() { return tag_ == kUnknownTokenTag; }
IsNumberDateToken145     bool IsNumber() { return tag_ == kNumberTag; }
IsSymbolDateToken146     bool IsSymbol() { return tag_ == kSymbolTag; }
IsWhiteSpaceDateToken147     bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
IsEndOfInputDateToken148     bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
IsKeywordDateToken149     bool IsKeyword() { return tag_ >= kKeywordTagStart; }
150 
lengthDateToken151     int length() { return length_; }
152 
numberDateToken153     int number() {
154       DCHECK(IsNumber());
155       return value_;
156     }
keyword_typeDateToken157     KeywordType keyword_type() {
158       DCHECK(IsKeyword());
159       return static_cast<KeywordType>(tag_);
160     }
keyword_valueDateToken161     int keyword_value() {
162       DCHECK(IsKeyword());
163       return value_;
164     }
symbolDateToken165     char symbol() {
166       DCHECK(IsSymbol());
167       return static_cast<char>(value_);
168     }
IsSymbolDateToken169     bool IsSymbol(char symbol) {
170       return IsSymbol() && this->symbol() == symbol;
171     }
IsKeywordTypeDateToken172     bool IsKeywordType(KeywordType tag) {
173       return tag_ == tag;
174     }
IsFixedLengthNumberDateToken175     bool IsFixedLengthNumber(int length) {
176       return IsNumber() && length_ == length;
177     }
IsAsciiSignDateToken178     bool IsAsciiSign() {
179       return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
180     }
ascii_signDateToken181     int ascii_sign() {
182       DCHECK(IsAsciiSign());
183       return 44 - value_;
184     }
IsKeywordZDateToken185     bool IsKeywordZ() {
186       return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
187     }
IsUnknownDateToken188     bool IsUnknown(int character) {
189       return IsUnknown() && value_ == character;
190     }
191     // Factory functions.
KeywordDateToken192     static DateToken Keyword(KeywordType tag, int value, int length) {
193       return DateToken(tag, length, value);
194     }
NumberDateToken195     static DateToken Number(int value, int length) {
196       return DateToken(kNumberTag, length, value);
197     }
SymbolDateToken198     static DateToken Symbol(char symbol) {
199       return DateToken(kSymbolTag, 1, symbol);
200     }
EndOfInputDateToken201     static DateToken EndOfInput() {
202       return DateToken(kEndOfInputTag, 0, -1);
203     }
WhiteSpaceDateToken204     static DateToken WhiteSpace(int length) {
205       return DateToken(kWhiteSpaceTag, length, -1);
206     }
UnknownDateToken207     static DateToken Unknown() {
208       return DateToken(kUnknownTokenTag, 1, -1);
209     }
InvalidDateToken210     static DateToken Invalid() {
211       return DateToken(kInvalidTokenTag, 0, -1);
212     }
213 
214    private:
215     enum TagType {
216       kInvalidTokenTag = -6,
217       kUnknownTokenTag = -5,
218       kWhiteSpaceTag = -4,
219       kNumberTag = -3,
220       kSymbolTag = -2,
221       kEndOfInputTag = -1,
222       kKeywordTagStart = 0
223     };
DateTokenDateToken224     DateToken(int tag, int length, int value)
225         : tag_(tag),
226           length_(length),
227           value_(value) { }
228 
229     int tag_;
230     int length_;  // Number of characters.
231     int value_;
232   };
233 
234   template <typename Char>
235   class DateStringTokenizer {
236    public:
DateStringTokenizer(InputReader<Char> * in)237     explicit DateStringTokenizer(InputReader<Char>* in)
238         : in_(in), next_(Scan()) { }
Next()239     DateToken Next() {
240       DateToken result = next_;
241       next_ = Scan();
242       return result;
243     }
244 
Peek()245     DateToken Peek() {
246       return next_;
247     }
SkipSymbol(char symbol)248     bool SkipSymbol(char symbol) {
249       if (next_.IsSymbol(symbol)) {
250         next_ = Scan();
251         return true;
252       }
253       return false;
254     }
255 
256    private:
257     DateToken Scan();
258 
259     InputReader<Char>* in_;
260     DateToken next_;
261   };
262 
263   static int ReadMilliseconds(DateToken number);
264 
265   // KeywordTable maps names of months, time zones, am/pm to numbers.
266   class KeywordTable : public AllStatic {
267    public:
268     // Look up a word in the keyword table and return an index.
269     // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
270     // and 'len' is the word length.
271     static int Lookup(const uint32_t* pre, int len);
272     // Get the type of the keyword at index i.
GetType(int i)273     static KeywordType GetType(int i) {
274       return static_cast<KeywordType>(array[i][kTypeOffset]);
275     }
276     // Get the value of the keyword at index i.
GetValue(int i)277     static int GetValue(int i) { return array[i][kValueOffset]; }
278 
279     static const int kPrefixLength = 3;
280     static const int kTypeOffset = kPrefixLength;
281     static const int kValueOffset = kTypeOffset + 1;
282     static const int kEntrySize = kValueOffset + 1;
283     static const int8_t array[][kEntrySize];
284   };
285 
286   class TimeZoneComposer BASE_EMBEDDED {
287    public:
TimeZoneComposer()288     TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
Set(int offset_in_hours)289     void Set(int offset_in_hours) {
290       sign_ = offset_in_hours < 0 ? -1 : 1;
291       hour_ = offset_in_hours * sign_;
292       minute_ = 0;
293     }
SetSign(int sign)294     void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
SetAbsoluteHour(int hour)295     void SetAbsoluteHour(int hour) { hour_ = hour; }
SetAbsoluteMinute(int minute)296     void SetAbsoluteMinute(int minute) { minute_ = minute; }
IsExpecting(int n)297     bool IsExpecting(int n) const {
298       return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
299     }
IsUTC()300     bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
301     bool Write(FixedArray* output);
IsEmpty()302     bool IsEmpty() { return hour_ == kNone; }
303    private:
304     int sign_;
305     int hour_;
306     int minute_;
307   };
308 
309   class TimeComposer BASE_EMBEDDED {
310    public:
TimeComposer()311     TimeComposer() : index_(0), hour_offset_(kNone) {}
IsEmpty()312     bool IsEmpty() const { return index_ == 0; }
IsExpecting(int n)313     bool IsExpecting(int n) const {
314       return (index_ == 1 && IsMinute(n)) ||
315              (index_ == 2 && IsSecond(n)) ||
316              (index_ == 3 && IsMillisecond(n));
317     }
Add(int n)318     bool Add(int n) {
319       return index_ < kSize ? (comp_[index_++] = n, true) : false;
320     }
AddFinal(int n)321     bool AddFinal(int n) {
322       if (!Add(n)) return false;
323       while (index_ < kSize) comp_[index_++] = 0;
324       return true;
325     }
SetHourOffset(int n)326     void SetHourOffset(int n) { hour_offset_ = n; }
327     bool Write(FixedArray* output);
328 
IsMinute(int x)329     static bool IsMinute(int x) { return Between(x, 0, 59); }
IsHour(int x)330     static bool IsHour(int x) { return Between(x, 0, 23); }
IsSecond(int x)331     static bool IsSecond(int x) { return Between(x, 0, 59); }
332 
333    private:
IsHour12(int x)334     static bool IsHour12(int x) { return Between(x, 0, 12); }
IsMillisecond(int x)335     static bool IsMillisecond(int x) { return Between(x, 0, 999); }
336 
337     static const int kSize = 4;
338     int comp_[kSize];
339     int index_;
340     int hour_offset_;
341   };
342 
343   class DayComposer BASE_EMBEDDED {
344    public:
DayComposer()345     DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
IsEmpty()346     bool IsEmpty() const { return index_ == 0; }
Add(int n)347     bool Add(int n) {
348       if (index_ < kSize) {
349         comp_[index_] = n;
350         index_++;
351         return true;
352       }
353       return false;
354     }
SetNamedMonth(int n)355     void SetNamedMonth(int n) { named_month_ = n; }
356     bool Write(FixedArray* output);
set_iso_date()357     void set_iso_date() { is_iso_date_ = true; }
IsMonth(int x)358     static bool IsMonth(int x) { return Between(x, 1, 12); }
IsDay(int x)359     static bool IsDay(int x) { return Between(x, 1, 31); }
360 
361    private:
362     static const int kSize = 3;
363     int comp_[kSize];
364     int index_;
365     int named_month_;
366     // If set, ensures that data is always parsed in year-month-date order.
367     bool is_iso_date_;
368   };
369 
370   // Tries to parse an ES5 Date Time String. Returns the next token
371   // to continue with in the legacy date string parser. If parsing is
372   // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
373   // returns DateToken::Invalid(). Otherwise parsing continues in the
374   // legacy parser.
375   template <typename Char>
376   static DateParser::DateToken ParseES5DateTime(
377       DateStringTokenizer<Char>* scanner,
378       DayComposer* day,
379       TimeComposer* time,
380       TimeZoneComposer* tz);
381 };
382 
383 
384 } }  // namespace v8::internal
385 
386 #endif  // V8_DATEPARSER_H_
387