• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Copyright 2011 the V8 project authors. All rights reserved.
2  // Use of this source code is governed by a BSD-style license that can be
3  // found in the LICENSE file.
4  
5  #ifndef V8_DATEPARSER_H_
6  #define V8_DATEPARSER_H_
7  
8  #include "src/allocation.h"
9  #include "src/char-predicates.h"
10  #include "src/unicode-cache.h"
11  
12  namespace v8 {
13  namespace internal {
14  
15  class DateParser : public AllStatic {
16   public:
17    // Parse the string as a date. If parsing succeeds, return true after
18    // filling out the output array as follows (all integers are Smis):
19    // [0]: year
20    // [1]: month (0 = Jan, 1 = Feb, ...)
21    // [2]: day
22    // [3]: hour
23    // [4]: minute
24    // [5]: second
25    // [6]: millisecond
26    // [7]: UTC offset in seconds, or null value if no timezone specified
27    // If parsing fails, return false (content of output array is not defined).
28    template <typename Char>
29    static bool Parse(Isolate* isolate, Vector<Char> str, FixedArray* output);
30  
31    enum {
32      YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
33    };
34  
35   private:
36    // Range testing
Between(int x,int lo,int hi)37    static inline bool Between(int x, int lo, int hi) {
38      return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
39    }
40  
41    // Indicates a missing value.
42    static const int kNone = kMaxInt;
43  
44    // Maximal number of digits used to build the value of a numeral.
45    // Remaining digits are ignored.
46    static const int kMaxSignificantDigits = 9;
47  
48    // InputReader provides basic string parsing and character classification.
49    template <typename Char>
50    class InputReader BASE_EMBEDDED {
51     public:
InputReader(UnicodeCache * unicode_cache,Vector<Char> s)52      InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
53          : index_(0),
54            buffer_(s),
55            unicode_cache_(unicode_cache) {
56        Next();
57      }
58  
position()59      int position() { return index_; }
60  
61      // Advance to the next character of the string.
Next()62      void Next() {
63        ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
64        index_++;
65      }
66  
67      // Read a string of digits as an unsigned number. Cap value at
68      // kMaxSignificantDigits, but skip remaining digits if the numeral
69      // is longer.
ReadUnsignedNumeral()70      int ReadUnsignedNumeral() {
71        int n = 0;
72        int i = 0;
73        while (IsAsciiDigit()) {
74          if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
75          i++;
76          Next();
77        }
78        return n;
79      }
80  
81      // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
82      // lower-case prefix, and pad any remainder of the buffer with zeroes.
83      // Return word length.
ReadWord(uint32_t * prefix,int prefix_size)84      int ReadWord(uint32_t* prefix, int prefix_size) {
85        int len;
86        for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
87          if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
88        }
89        for (int i = len; i < prefix_size; i++) prefix[i] = 0;
90        return len;
91      }
92  
93      // The skip methods return whether they actually skipped something.
Skip(uint32_t c)94      bool Skip(uint32_t c) {
95        if (ch_ == c) {
96          Next();
97          return true;
98        }
99        return false;
100      }
101  
102      inline bool SkipWhiteSpace();
103      inline bool SkipParentheses();
104  
105      // Character testing/classification. Non-ASCII digits are not supported.
Is(uint32_t c)106      bool Is(uint32_t c) const { return ch_ == c; }
IsEnd()107      bool IsEnd() const { return ch_ == 0; }
IsAsciiDigit()108      bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
IsAsciiAlphaOrAbove()109      bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
IsAsciiSign()110      bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
111  
112      // Return 1 for '+' and -1 for '-'.
GetAsciiSignValue()113      int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
114  
115     private:
116      int index_;
117      Vector<Char> buffer_;
118      uint32_t ch_;
119      UnicodeCache* unicode_cache_;
120    };
121  
122    enum KeywordType {
123        INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
124    };
125  
126    struct DateToken {
127     public:
IsInvalidDateToken128      bool IsInvalid() { return tag_ == kInvalidTokenTag; }
IsUnknownDateToken129      bool IsUnknown() { return tag_ == kUnknownTokenTag; }
IsNumberDateToken130      bool IsNumber() { return tag_ == kNumberTag; }
IsSymbolDateToken131      bool IsSymbol() { return tag_ == kSymbolTag; }
IsWhiteSpaceDateToken132      bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
IsEndOfInputDateToken133      bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
IsKeywordDateToken134      bool IsKeyword() { return tag_ >= kKeywordTagStart; }
135  
lengthDateToken136      int length() { return length_; }
137  
numberDateToken138      int number() {
139        DCHECK(IsNumber());
140        return value_;
141      }
keyword_typeDateToken142      KeywordType keyword_type() {
143        DCHECK(IsKeyword());
144        return static_cast<KeywordType>(tag_);
145      }
keyword_valueDateToken146      int keyword_value() {
147        DCHECK(IsKeyword());
148        return value_;
149      }
symbolDateToken150      char symbol() {
151        DCHECK(IsSymbol());
152        return static_cast<char>(value_);
153      }
IsSymbolDateToken154      bool IsSymbol(char symbol) {
155        return IsSymbol() && this->symbol() == symbol;
156      }
IsKeywordTypeDateToken157      bool IsKeywordType(KeywordType tag) {
158        return tag_ == tag;
159      }
IsFixedLengthNumberDateToken160      bool IsFixedLengthNumber(int length) {
161        return IsNumber() && length_ == length;
162      }
IsAsciiSignDateToken163      bool IsAsciiSign() {
164        return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
165      }
ascii_signDateToken166      int ascii_sign() {
167        DCHECK(IsAsciiSign());
168        return 44 - value_;
169      }
IsKeywordZDateToken170      bool IsKeywordZ() {
171        return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
172      }
IsUnknownDateToken173      bool IsUnknown(int character) {
174        return IsUnknown() && value_ == character;
175      }
176      // Factory functions.
KeywordDateToken177      static DateToken Keyword(KeywordType tag, int value, int length) {
178        return DateToken(tag, length, value);
179      }
NumberDateToken180      static DateToken Number(int value, int length) {
181        return DateToken(kNumberTag, length, value);
182      }
SymbolDateToken183      static DateToken Symbol(char symbol) {
184        return DateToken(kSymbolTag, 1, symbol);
185      }
EndOfInputDateToken186      static DateToken EndOfInput() {
187        return DateToken(kEndOfInputTag, 0, -1);
188      }
WhiteSpaceDateToken189      static DateToken WhiteSpace(int length) {
190        return DateToken(kWhiteSpaceTag, length, -1);
191      }
UnknownDateToken192      static DateToken Unknown() {
193        return DateToken(kUnknownTokenTag, 1, -1);
194      }
InvalidDateToken195      static DateToken Invalid() {
196        return DateToken(kInvalidTokenTag, 0, -1);
197      }
198  
199     private:
200      enum TagType {
201        kInvalidTokenTag = -6,
202        kUnknownTokenTag = -5,
203        kWhiteSpaceTag = -4,
204        kNumberTag = -3,
205        kSymbolTag = -2,
206        kEndOfInputTag = -1,
207        kKeywordTagStart = 0
208      };
DateTokenDateToken209      DateToken(int tag, int length, int value)
210          : tag_(tag),
211            length_(length),
212            value_(value) { }
213  
214      int tag_;
215      int length_;  // Number of characters.
216      int value_;
217    };
218  
219    template <typename Char>
220    class DateStringTokenizer {
221     public:
DateStringTokenizer(InputReader<Char> * in)222      explicit DateStringTokenizer(InputReader<Char>* in)
223          : in_(in), next_(Scan()) { }
Next()224      DateToken Next() {
225        DateToken result = next_;
226        next_ = Scan();
227        return result;
228      }
229  
Peek()230      DateToken Peek() {
231        return next_;
232      }
SkipSymbol(char symbol)233      bool SkipSymbol(char symbol) {
234        if (next_.IsSymbol(symbol)) {
235          next_ = Scan();
236          return true;
237        }
238        return false;
239      }
240  
241     private:
242      DateToken Scan();
243  
244      InputReader<Char>* in_;
245      DateToken next_;
246    };
247  
248    static int ReadMilliseconds(DateToken number);
249  
250    // KeywordTable maps names of months, time zones, am/pm to numbers.
251    class KeywordTable : public AllStatic {
252     public:
253      // Look up a word in the keyword table and return an index.
254      // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
255      // and 'len' is the word length.
256      static int Lookup(const uint32_t* pre, int len);
257      // Get the type of the keyword at index i.
GetType(int i)258      static KeywordType GetType(int i) {
259        return static_cast<KeywordType>(array[i][kTypeOffset]);
260      }
261      // Get the value of the keyword at index i.
GetValue(int i)262      static int GetValue(int i) { return array[i][kValueOffset]; }
263  
264      static const int kPrefixLength = 3;
265      static const int kTypeOffset = kPrefixLength;
266      static const int kValueOffset = kTypeOffset + 1;
267      static const int kEntrySize = kValueOffset + 1;
268      static const int8_t array[][kEntrySize];
269    };
270  
271    class TimeZoneComposer BASE_EMBEDDED {
272     public:
TimeZoneComposer()273      TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
Set(int offset_in_hours)274      void Set(int offset_in_hours) {
275        sign_ = offset_in_hours < 0 ? -1 : 1;
276        hour_ = offset_in_hours * sign_;
277        minute_ = 0;
278      }
SetSign(int sign)279      void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
SetAbsoluteHour(int hour)280      void SetAbsoluteHour(int hour) { hour_ = hour; }
SetAbsoluteMinute(int minute)281      void SetAbsoluteMinute(int minute) { minute_ = minute; }
IsExpecting(int n)282      bool IsExpecting(int n) const {
283        return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
284      }
IsUTC()285      bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
286      bool Write(FixedArray* output);
IsEmpty()287      bool IsEmpty() { return hour_ == kNone; }
288     private:
289      int sign_;
290      int hour_;
291      int minute_;
292    };
293  
294    class TimeComposer BASE_EMBEDDED {
295     public:
TimeComposer()296      TimeComposer() : index_(0), hour_offset_(kNone) {}
IsEmpty()297      bool IsEmpty() const { return index_ == 0; }
IsExpecting(int n)298      bool IsExpecting(int n) const {
299        return (index_ == 1 && IsMinute(n)) ||
300               (index_ == 2 && IsSecond(n)) ||
301               (index_ == 3 && IsMillisecond(n));
302      }
Add(int n)303      bool Add(int n) {
304        return index_ < kSize ? (comp_[index_++] = n, true) : false;
305      }
AddFinal(int n)306      bool AddFinal(int n) {
307        if (!Add(n)) return false;
308        while (index_ < kSize) comp_[index_++] = 0;
309        return true;
310      }
SetHourOffset(int n)311      void SetHourOffset(int n) { hour_offset_ = n; }
312      bool Write(FixedArray* output);
313  
IsMinute(int x)314      static bool IsMinute(int x) { return Between(x, 0, 59); }
IsHour(int x)315      static bool IsHour(int x) { return Between(x, 0, 23); }
IsSecond(int x)316      static bool IsSecond(int x) { return Between(x, 0, 59); }
317  
318     private:
IsHour12(int x)319      static bool IsHour12(int x) { return Between(x, 0, 12); }
IsMillisecond(int x)320      static bool IsMillisecond(int x) { return Between(x, 0, 999); }
321  
322      static const int kSize = 4;
323      int comp_[kSize];
324      int index_;
325      int hour_offset_;
326    };
327  
328    class DayComposer BASE_EMBEDDED {
329     public:
DayComposer()330      DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
IsEmpty()331      bool IsEmpty() const { return index_ == 0; }
Add(int n)332      bool Add(int n) {
333        if (index_ < kSize) {
334          comp_[index_] = n;
335          index_++;
336          return true;
337        }
338        return false;
339      }
SetNamedMonth(int n)340      void SetNamedMonth(int n) { named_month_ = n; }
341      bool Write(FixedArray* output);
set_iso_date()342      void set_iso_date() { is_iso_date_ = true; }
IsMonth(int x)343      static bool IsMonth(int x) { return Between(x, 1, 12); }
IsDay(int x)344      static bool IsDay(int x) { return Between(x, 1, 31); }
345  
346     private:
347      static const int kSize = 3;
348      int comp_[kSize];
349      int index_;
350      int named_month_;
351      // If set, ensures that data is always parsed in year-month-date order.
352      bool is_iso_date_;
353    };
354  
355    // Tries to parse an ES5 Date Time String. Returns the next token
356    // to continue with in the legacy date string parser. If parsing is
357    // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
358    // returns DateToken::Invalid(). Otherwise parsing continues in the
359    // legacy parser.
360    template <typename Char>
361    static DateParser::DateToken ParseES5DateTime(
362        DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time,
363        TimeZoneComposer* tz);
364  };
365  
366  
367  }  // namespace internal
368  }  // namespace v8
369  
370  #endif  // V8_DATEPARSER_H_
371