1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Features shared by parsing and pre-parsing scanners.
6 
7 #ifndef V8_PARSING_SCANNER_H_
8 #define V8_PARSING_SCANNER_H_
9 
10 #include "src/allocation.h"
11 #include "src/base/logging.h"
12 #include "src/char-predicates.h"
13 #include "src/globals.h"
14 #include "src/hashmap.h"
15 #include "src/list.h"
16 #include "src/parsing/token.h"
17 #include "src/unicode.h"
18 #include "src/unicode-decoder.h"
19 #include "src/utils.h"
20 
21 namespace v8 {
22 namespace internal {
23 
24 
25 class AstRawString;
26 class AstValueFactory;
27 class ParserRecorder;
28 class UnicodeCache;
29 
30 
31 // ---------------------------------------------------------------------
32 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
33 // A code unit is a 16 bit value representing either a 16 bit code point
34 // or one part of a surrogate pair that make a single 21 bit code point.
35 
36 class Utf16CharacterStream {
37  public:
Utf16CharacterStream()38   Utf16CharacterStream() : pos_(0) { }
~Utf16CharacterStream()39   virtual ~Utf16CharacterStream() { }
40 
41   // Returns and advances past the next UTF-16 code unit in the input
42   // stream. If there are no more code units, it returns a negative
43   // value.
Advance()44   inline uc32 Advance() {
45     if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
46       pos_++;
47       return static_cast<uc32>(*(buffer_cursor_++));
48     }
49     // Note: currently the following increment is necessary to avoid a
50     // parser problem! The scanner treats the final kEndOfInput as
51     // a code unit with a position, and does math relative to that
52     // position.
53     pos_++;
54 
55     return kEndOfInput;
56   }
57 
58   // Return the current position in the code unit stream.
59   // Starts at zero.
pos()60   inline size_t pos() const { return pos_; }
61 
62   // Skips forward past the next code_unit_count UTF-16 code units
63   // in the input, or until the end of input if that comes sooner.
64   // Returns the number of code units actually skipped. If less
65   // than code_unit_count,
SeekForward(size_t code_unit_count)66   inline size_t SeekForward(size_t code_unit_count) {
67     size_t buffered_chars = buffer_end_ - buffer_cursor_;
68     if (code_unit_count <= buffered_chars) {
69       buffer_cursor_ += code_unit_count;
70       pos_ += code_unit_count;
71       return code_unit_count;
72     }
73     return SlowSeekForward(code_unit_count);
74   }
75 
76   // Pushes back the most recently read UTF-16 code unit (or negative
77   // value if at end of input), i.e., the value returned by the most recent
78   // call to Advance.
79   // Must not be used right after calling SeekForward.
80   virtual void PushBack(int32_t code_unit) = 0;
81 
82   virtual bool SetBookmark();
83   virtual void ResetToBookmark();
84 
85  protected:
86   static const uc32 kEndOfInput = -1;
87 
88   // Ensures that the buffer_cursor_ points to the code_unit at
89   // position pos_ of the input, if possible. If the position
90   // is at or after the end of the input, return false. If there
91   // are more code_units available, return true.
92   virtual bool ReadBlock() = 0;
93   virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
94 
95   const uint16_t* buffer_cursor_;
96   const uint16_t* buffer_end_;
97   size_t pos_;
98 };
99 
100 
101 // ---------------------------------------------------------------------
102 // DuplicateFinder discovers duplicate symbols.
103 
104 class DuplicateFinder {
105  public:
DuplicateFinder(UnicodeCache * constants)106   explicit DuplicateFinder(UnicodeCache* constants)
107       : unicode_constants_(constants),
108         backing_store_(16),
109         map_(&Match) { }
110 
111   int AddOneByteSymbol(Vector<const uint8_t> key, int value);
112   int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
113   // Add a a number literal by converting it (if necessary)
114   // to the string that ToString(ToNumber(literal)) would generate.
115   // and then adding that string with AddOneByteSymbol.
116   // This string is the actual value used as key in an object literal,
117   // and the one that must be different from the other keys.
118   int AddNumber(Vector<const uint8_t> key, int value);
119 
120  private:
121   int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
122   // Backs up the key and its length in the backing store.
123   // The backup is stored with a base 127 encoding of the
124   // length (plus a bit saying whether the string is one byte),
125   // followed by the bytes of the key.
126   uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
127 
128   // Compare two encoded keys (both pointing into the backing store)
129   // for having the same base-127 encoded lengths and representation.
130   // and then having the same 'length' bytes following.
131   static bool Match(void* first, void* second);
132   // Creates a hash from a sequence of bytes.
133   static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
134   // Checks whether a string containing a JS number is its canonical
135   // form.
136   static bool IsNumberCanonical(Vector<const uint8_t> key);
137 
138   // Size of buffer. Sufficient for using it to call DoubleToCString in
139   // from conversions.h.
140   static const int kBufferSize = 100;
141 
142   UnicodeCache* unicode_constants_;
143   // Backing store used to store strings used as hashmap keys.
144   SequenceCollector<unsigned char> backing_store_;
145   HashMap map_;
146   // Buffer used for string->number->canonical string conversions.
147   char number_buffer_[kBufferSize];
148 };
149 
150 
151 // ----------------------------------------------------------------------------
152 // LiteralBuffer -  Collector of chars of literals.
153 
154 class LiteralBuffer {
155  public:
LiteralBuffer()156   LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }
157 
~LiteralBuffer()158   ~LiteralBuffer() { backing_store_.Dispose(); }
159 
INLINE(void AddChar (uint32_t code_unit))160   INLINE(void AddChar(uint32_t code_unit)) {
161     if (position_ >= backing_store_.length()) ExpandBuffer();
162     if (is_one_byte_) {
163       if (code_unit <= unibrow::Latin1::kMaxChar) {
164         backing_store_[position_] = static_cast<byte>(code_unit);
165         position_ += kOneByteSize;
166         return;
167       }
168       ConvertToTwoByte();
169     }
170     if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
171       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
172       position_ += kUC16Size;
173     } else {
174       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
175           unibrow::Utf16::LeadSurrogate(code_unit);
176       position_ += kUC16Size;
177       if (position_ >= backing_store_.length()) ExpandBuffer();
178       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
179           unibrow::Utf16::TrailSurrogate(code_unit);
180       position_ += kUC16Size;
181     }
182   }
183 
is_one_byte()184   bool is_one_byte() const { return is_one_byte_; }
185 
is_contextual_keyword(Vector<const char> keyword)186   bool is_contextual_keyword(Vector<const char> keyword) const {
187     return is_one_byte() && keyword.length() == position_ &&
188         (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
189   }
190 
two_byte_literal()191   Vector<const uint16_t> two_byte_literal() const {
192     DCHECK(!is_one_byte_);
193     DCHECK((position_ & 0x1) == 0);
194     return Vector<const uint16_t>(
195         reinterpret_cast<const uint16_t*>(backing_store_.start()),
196         position_ >> 1);
197   }
198 
one_byte_literal()199   Vector<const uint8_t> one_byte_literal() const {
200     DCHECK(is_one_byte_);
201     return Vector<const uint8_t>(
202         reinterpret_cast<const uint8_t*>(backing_store_.start()),
203         position_);
204   }
205 
length()206   int length() const {
207     return is_one_byte_ ? position_ : (position_ >> 1);
208   }
209 
ReduceLength(int delta)210   void ReduceLength(int delta) {
211     position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
212   }
213 
Reset()214   void Reset() {
215     position_ = 0;
216     is_one_byte_ = true;
217   }
218 
219   Handle<String> Internalize(Isolate* isolate) const;
220 
CopyFrom(const LiteralBuffer * other)221   void CopyFrom(const LiteralBuffer* other) {
222     if (other == nullptr) {
223       Reset();
224     } else {
225       is_one_byte_ = other->is_one_byte_;
226       position_ = other->position_;
227       backing_store_.Dispose();
228       backing_store_ = other->backing_store_.Clone();
229     }
230   }
231 
232  private:
233   static const int kInitialCapacity = 16;
234   static const int kGrowthFactory = 4;
235   static const int kMinConversionSlack = 256;
236   static const int kMaxGrowth = 1 * MB;
NewCapacity(int min_capacity)237   inline int NewCapacity(int min_capacity) {
238     int capacity = Max(min_capacity, backing_store_.length());
239     int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
240     return new_capacity;
241   }
242 
ExpandBuffer()243   void ExpandBuffer() {
244     Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
245     MemCopy(new_store.start(), backing_store_.start(), position_);
246     backing_store_.Dispose();
247     backing_store_ = new_store;
248   }
249 
ConvertToTwoByte()250   void ConvertToTwoByte() {
251     DCHECK(is_one_byte_);
252     Vector<byte> new_store;
253     int new_content_size = position_ * kUC16Size;
254     if (new_content_size >= backing_store_.length()) {
255       // Ensure room for all currently read code units as UC16 as well
256       // as the code unit about to be stored.
257       new_store = Vector<byte>::New(NewCapacity(new_content_size));
258     } else {
259       new_store = backing_store_;
260     }
261     uint8_t* src = backing_store_.start();
262     uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
263     for (int i = position_ - 1; i >= 0; i--) {
264       dst[i] = src[i];
265     }
266     if (new_store.start() != backing_store_.start()) {
267       backing_store_.Dispose();
268       backing_store_ = new_store;
269     }
270     position_ = new_content_size;
271     is_one_byte_ = false;
272   }
273 
274   bool is_one_byte_;
275   int position_;
276   Vector<byte> backing_store_;
277 
278   DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
279 };
280 
281 
282 // ----------------------------------------------------------------------------
283 // JavaScript Scanner.
284 
285 class Scanner {
286  public:
287   // Scoped helper for literal recording. Automatically drops the literal
288   // if aborting the scanning before it's complete.
289   class LiteralScope {
290    public:
LiteralScope(Scanner * self)291     explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
292       scanner_->StartLiteral();
293     }
~LiteralScope()294      ~LiteralScope() {
295        if (!complete_) scanner_->DropLiteral();
296      }
Complete()297     void Complete() {
298       complete_ = true;
299     }
300 
301    private:
302     Scanner* scanner_;
303     bool complete_;
304   };
305 
306   // Scoped helper for a re-settable bookmark.
307   class BookmarkScope {
308    public:
BookmarkScope(Scanner * scanner)309     explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) {
310       DCHECK_NOT_NULL(scanner_);
311     }
~BookmarkScope()312     ~BookmarkScope() { scanner_->DropBookmark(); }
313 
Set()314     bool Set() { return scanner_->SetBookmark(); }
Reset()315     void Reset() { scanner_->ResetToBookmark(); }
HasBeenSet()316     bool HasBeenSet() { return scanner_->BookmarkHasBeenSet(); }
HasBeenReset()317     bool HasBeenReset() { return scanner_->BookmarkHasBeenReset(); }
318 
319    private:
320     Scanner* scanner_;
321 
322     DISALLOW_COPY_AND_ASSIGN(BookmarkScope);
323   };
324 
325   // Representation of an interval of source positions.
326   struct Location {
LocationLocation327     Location(int b, int e) : beg_pos(b), end_pos(e) { }
LocationLocation328     Location() : beg_pos(0), end_pos(0) { }
329 
IsValidLocation330     bool IsValid() const {
331       return beg_pos >= 0 && end_pos >= beg_pos;
332     }
333 
invalidLocation334     static Location invalid() { return Location(-1, -1); }
335 
336     int beg_pos;
337     int end_pos;
338   };
339 
340   // -1 is outside of the range of any real source code.
341   static const int kNoOctalLocation = -1;
342 
343   explicit Scanner(UnicodeCache* scanner_contants);
344 
345   void Initialize(Utf16CharacterStream* source);
346 
347   // Returns the next token and advances input.
348   Token::Value Next();
349   // Returns the token following peek()
350   Token::Value PeekAhead();
351   // Returns the current token again.
current_token()352   Token::Value current_token() { return current_.token; }
353   // Returns the location information for the current token
354   // (the token last returned by Next()).
location()355   Location location() const { return current_.location; }
356 
357   // Similar functions for the upcoming token.
358 
359   // One token look-ahead (past the token returned by Next()).
peek()360   Token::Value peek() const { return next_.token; }
361 
peek_location()362   Location peek_location() const { return next_.location; }
363 
literal_contains_escapes()364   bool literal_contains_escapes() const {
365     return LiteralContainsEscapes(current_);
366   }
next_literal_contains_escapes()367   bool next_literal_contains_escapes() const {
368     return LiteralContainsEscapes(next_);
369   }
is_literal_contextual_keyword(Vector<const char> keyword)370   bool is_literal_contextual_keyword(Vector<const char> keyword) {
371     DCHECK_NOT_NULL(current_.literal_chars);
372     return current_.literal_chars->is_contextual_keyword(keyword);
373   }
is_next_contextual_keyword(Vector<const char> keyword)374   bool is_next_contextual_keyword(Vector<const char> keyword) {
375     DCHECK_NOT_NULL(next_.literal_chars);
376     return next_.literal_chars->is_contextual_keyword(keyword);
377   }
378 
379   const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
380   const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
381   const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
382 
383   double DoubleValue();
384   bool ContainsDot();
385   bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
386     if (is_literal_one_byte() &&
387         literal_length() == length &&
388         (allow_escapes || !literal_contains_escapes())) {
389       const char* token =
390           reinterpret_cast<const char*>(literal_one_byte_string().start());
391       return !strncmp(token, data, length);
392     }
393     return false;
394   }
UnescapedLiteralMatches(const char * data,int length)395   inline bool UnescapedLiteralMatches(const char* data, int length) {
396     return LiteralMatches(data, length, false);
397   }
398 
IsGetOrSet(bool * is_get,bool * is_set)399   void IsGetOrSet(bool* is_get, bool* is_set) {
400     if (is_literal_one_byte() &&
401         literal_length() == 3 &&
402         !literal_contains_escapes()) {
403       const char* token =
404           reinterpret_cast<const char*>(literal_one_byte_string().start());
405       *is_get = strncmp(token, "get", 3) == 0;
406       *is_set = !*is_get && strncmp(token, "set", 3) == 0;
407     }
408   }
409 
410   int FindSymbol(DuplicateFinder* finder, int value);
411 
unicode_cache()412   UnicodeCache* unicode_cache() { return unicode_cache_; }
413 
414   // Returns the location of the last seen octal literal.
octal_position()415   Location octal_position() const { return octal_pos_; }
clear_octal_position()416   void clear_octal_position() { octal_pos_ = Location::invalid(); }
417 
418   // Returns the value of the last smi that was scanned.
smi_value()419   int smi_value() const { return current_.smi_value_; }
420 
421   // Seek forward to the given position.  This operation does not
422   // work in general, for instance when there are pushed back
423   // characters, but works for seeking forward until simple delimiter
424   // tokens, which is what it is used for.
425   void SeekForward(int pos);
426 
427   // Returns true if there was a line terminator before the peek'ed token,
428   // possibly inside a multi-line comment.
HasAnyLineTerminatorBeforeNext()429   bool HasAnyLineTerminatorBeforeNext() const {
430     return has_line_terminator_before_next_ ||
431            has_multiline_comment_before_next_;
432   }
433 
434   // Scans the input as a regular expression pattern, previous
435   // character(s) must be /(=). Returns true if a pattern is scanned.
436   bool ScanRegExpPattern(bool seen_equal);
437   // Scans the input as regular expression flags. Returns the flags on success.
438   Maybe<RegExp::Flags> ScanRegExpFlags();
439 
440   // Scans the input as a template literal
441   Token::Value ScanTemplateStart();
442   Token::Value ScanTemplateContinuation();
443 
source_url()444   const LiteralBuffer* source_url() const { return &source_url_; }
source_mapping_url()445   const LiteralBuffer* source_mapping_url() const {
446     return &source_mapping_url_;
447   }
448 
449   bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;
450 
451  private:
452   // The current and look-ahead token.
453   struct TokenDesc {
454     Token::Value token;
455     Location location;
456     LiteralBuffer* literal_chars;
457     LiteralBuffer* raw_literal_chars;
458     int smi_value_;
459   };
460 
461   static const int kCharacterLookaheadBufferSize = 1;
462 
463   // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
464   template <bool capture_raw>
465   uc32 ScanOctalEscape(uc32 c, int length);
466 
467   // Call this after setting source_ to the input.
Init()468   void Init() {
469     // Set c0_ (one character ahead)
470     STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
471     Advance();
472     // Initialize current_ to not refer to a literal.
473     current_.literal_chars = NULL;
474     current_.raw_literal_chars = NULL;
475     next_next_.token = Token::UNINITIALIZED;
476   }
477 
478   // Support BookmarkScope functionality.
479   bool SetBookmark();
480   void ResetToBookmark();
481   bool BookmarkHasBeenSet();
482   bool BookmarkHasBeenReset();
483   void DropBookmark();
484   static void CopyTokenDesc(TokenDesc* to, TokenDesc* from);
485 
486   // Literal buffer support
StartLiteral()487   inline void StartLiteral() {
488     LiteralBuffer* free_buffer =
489         (current_.literal_chars == &literal_buffer0_)
490             ? &literal_buffer1_
491             : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_
492                                                             : &literal_buffer0_;
493     free_buffer->Reset();
494     next_.literal_chars = free_buffer;
495   }
496 
StartRawLiteral()497   inline void StartRawLiteral() {
498     LiteralBuffer* free_buffer =
499         (current_.raw_literal_chars == &raw_literal_buffer0_)
500             ? &raw_literal_buffer1_
501             : (current_.raw_literal_chars == &raw_literal_buffer1_)
502                   ? &raw_literal_buffer2_
503                   : &raw_literal_buffer0_;
504     free_buffer->Reset();
505     next_.raw_literal_chars = free_buffer;
506   }
507 
INLINE(void AddLiteralChar (uc32 c))508   INLINE(void AddLiteralChar(uc32 c)) {
509     DCHECK_NOT_NULL(next_.literal_chars);
510     next_.literal_chars->AddChar(c);
511   }
512 
INLINE(void AddRawLiteralChar (uc32 c))513   INLINE(void AddRawLiteralChar(uc32 c)) {
514     DCHECK_NOT_NULL(next_.raw_literal_chars);
515     next_.raw_literal_chars->AddChar(c);
516   }
517 
INLINE(void ReduceRawLiteralLength (int delta))518   INLINE(void ReduceRawLiteralLength(int delta)) {
519     DCHECK_NOT_NULL(next_.raw_literal_chars);
520     next_.raw_literal_chars->ReduceLength(delta);
521   }
522 
523   // Stops scanning of a literal and drop the collected characters,
524   // e.g., due to an encountered error.
DropLiteral()525   inline void DropLiteral() {
526     next_.literal_chars = NULL;
527     next_.raw_literal_chars = NULL;
528   }
529 
AddLiteralCharAdvance()530   inline void AddLiteralCharAdvance() {
531     AddLiteralChar(c0_);
532     Advance();
533   }
534 
535   // Low-level scanning support.
536   template <bool capture_raw = false, bool check_surrogate = true>
Advance()537   void Advance() {
538     if (capture_raw) {
539       AddRawLiteralChar(c0_);
540     }
541     c0_ = source_->Advance();
542     if (check_surrogate) HandleLeadSurrogate();
543   }
544 
HandleLeadSurrogate()545   void HandleLeadSurrogate() {
546     if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
547       uc32 c1 = source_->Advance();
548       if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
549         source_->PushBack(c1);
550       } else {
551         c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
552       }
553     }
554   }
555 
PushBack(uc32 ch)556   void PushBack(uc32 ch) {
557     if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
558       source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
559       source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
560     } else {
561       source_->PushBack(c0_);
562     }
563     c0_ = ch;
564   }
565 
Select(Token::Value tok)566   inline Token::Value Select(Token::Value tok) {
567     Advance();
568     return tok;
569   }
570 
Select(uc32 next,Token::Value then,Token::Value else_)571   inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
572     Advance();
573     if (c0_ == next) {
574       Advance();
575       return then;
576     } else {
577       return else_;
578     }
579   }
580 
581   // Returns the literal string, if any, for the current token (the
582   // token last returned by Next()). The string is 0-terminated.
583   // Literal strings are collected for identifiers, strings, numbers as well
584   // as for template literals. For template literals we also collect the raw
585   // form.
586   // These functions only give the correct result if the literal was scanned
587   // when a LiteralScope object is alive.
literal_one_byte_string()588   Vector<const uint8_t> literal_one_byte_string() {
589     DCHECK_NOT_NULL(current_.literal_chars);
590     return current_.literal_chars->one_byte_literal();
591   }
literal_two_byte_string()592   Vector<const uint16_t> literal_two_byte_string() {
593     DCHECK_NOT_NULL(current_.literal_chars);
594     return current_.literal_chars->two_byte_literal();
595   }
is_literal_one_byte()596   bool is_literal_one_byte() {
597     DCHECK_NOT_NULL(current_.literal_chars);
598     return current_.literal_chars->is_one_byte();
599   }
literal_length()600   int literal_length() const {
601     DCHECK_NOT_NULL(current_.literal_chars);
602     return current_.literal_chars->length();
603   }
604   // Returns the literal string for the next token (the token that
605   // would be returned if Next() were called).
next_literal_one_byte_string()606   Vector<const uint8_t> next_literal_one_byte_string() {
607     DCHECK_NOT_NULL(next_.literal_chars);
608     return next_.literal_chars->one_byte_literal();
609   }
next_literal_two_byte_string()610   Vector<const uint16_t> next_literal_two_byte_string() {
611     DCHECK_NOT_NULL(next_.literal_chars);
612     return next_.literal_chars->two_byte_literal();
613   }
is_next_literal_one_byte()614   bool is_next_literal_one_byte() {
615     DCHECK_NOT_NULL(next_.literal_chars);
616     return next_.literal_chars->is_one_byte();
617   }
raw_literal_one_byte_string()618   Vector<const uint8_t> raw_literal_one_byte_string() {
619     DCHECK_NOT_NULL(current_.raw_literal_chars);
620     return current_.raw_literal_chars->one_byte_literal();
621   }
raw_literal_two_byte_string()622   Vector<const uint16_t> raw_literal_two_byte_string() {
623     DCHECK_NOT_NULL(current_.raw_literal_chars);
624     return current_.raw_literal_chars->two_byte_literal();
625   }
is_raw_literal_one_byte()626   bool is_raw_literal_one_byte() {
627     DCHECK_NOT_NULL(current_.raw_literal_chars);
628     return current_.raw_literal_chars->is_one_byte();
629   }
630 
631   template <bool capture_raw>
632   uc32 ScanHexNumber(int expected_length);
633   // Scan a number of any length but not bigger than max_value. For example, the
634   // number can be 000000001, so it's very long in characters but its value is
635   // small.
636   template <bool capture_raw>
637   uc32 ScanUnlimitedLengthHexNumber(int max_value);
638 
639   // Scans a single JavaScript token.
640   void Scan();
641 
642   bool SkipWhiteSpace();
643   Token::Value SkipSingleLineComment();
644   Token::Value SkipSourceURLComment();
645   void TryToParseSourceURLComment();
646   Token::Value SkipMultiLineComment();
647   // Scans a possible HTML comment -- begins with '<!'.
648   Token::Value ScanHtmlComment();
649 
650   void ScanDecimalDigits();
651   Token::Value ScanNumber(bool seen_period);
652   Token::Value ScanIdentifierOrKeyword();
653   Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped);
654 
655   Token::Value ScanString();
656 
657   // Scans an escape-sequence which is part of a string and adds the
658   // decoded character to the current literal. Returns true if a pattern
659   // is scanned.
660   template <bool capture_raw, bool in_template_literal>
661   bool ScanEscape();
662 
663   // Decodes a Unicode escape-sequence which is part of an identifier.
664   // If the escape sequence cannot be decoded the result is kBadChar.
665   uc32 ScanIdentifierUnicodeEscape();
666   // Helper for the above functions.
667   template <bool capture_raw>
668   uc32 ScanUnicodeEscape();
669 
670   Token::Value ScanTemplateSpan();
671 
672   // Return the current source position.
source_pos()673   int source_pos() {
674     return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
675   }
676 
LiteralContainsEscapes(const TokenDesc & token)677   static bool LiteralContainsEscapes(const TokenDesc& token) {
678     Location location = token.location;
679     int source_length = (location.end_pos - location.beg_pos);
680     if (token.token == Token::STRING) {
681       // Subtract delimiters.
682       source_length -= 2;
683     }
684     return token.literal_chars->length() != source_length;
685   }
686 
687   UnicodeCache* unicode_cache_;
688 
689   // Buffers collecting literal strings, numbers, etc.
690   LiteralBuffer literal_buffer0_;
691   LiteralBuffer literal_buffer1_;
692   LiteralBuffer literal_buffer2_;
693 
694   // Values parsed from magic comments.
695   LiteralBuffer source_url_;
696   LiteralBuffer source_mapping_url_;
697 
698   // Buffer to store raw string values
699   LiteralBuffer raw_literal_buffer0_;
700   LiteralBuffer raw_literal_buffer1_;
701   LiteralBuffer raw_literal_buffer2_;
702 
703   TokenDesc current_;    // desc for current token (as returned by Next())
704   TokenDesc next_;       // desc for next token (one token look-ahead)
705   TokenDesc next_next_;  // desc for the token after next (after PeakAhead())
706 
707   // Variables for Scanner::BookmarkScope and the *Bookmark implementation.
708   // These variables contain the scanner state when a bookmark is set.
709   //
710   // We will use bookmark_c0_ as a 'control' variable, where:
711   // - bookmark_c0_ >= 0: A bookmark has been set and this contains c0_.
712   // - bookmark_c0_ == -1: No bookmark has been set.
713   // - bookmark_c0_ == -2: The bookmark has been applied (ResetToBookmark).
714   //
715   // Which state is being bookmarked? The parser state is distributed over
716   // several variables, roughly like this:
717   //   ...    1234        +       5678 ..... [character stream]
718   //       [current_] [next_] c0_ |      [scanner state]
719   // So when the scanner is logically at the beginning of an expression
720   // like "1234 + 4567", then:
721   // - current_ contains "1234"
722   // - next_ contains "+"
723   // - c0_ contains ' ' (the space between "+" and "5678",
724   // - the source_ character stream points to the beginning of "5678".
725   // To be able to restore this state, we will keep copies of current_, next_,
726   // and c0_; we'll ask the stream to bookmark itself, and we'll copy the
727   // contents of current_'s and next_'s literal buffers to bookmark_*_literal_.
728   static const uc32 kNoBookmark = -1;
729   static const uc32 kBookmarkWasApplied = -2;
730   uc32 bookmark_c0_;
731   TokenDesc bookmark_current_;
732   TokenDesc bookmark_next_;
733   LiteralBuffer bookmark_current_literal_;
734   LiteralBuffer bookmark_current_raw_literal_;
735   LiteralBuffer bookmark_next_literal_;
736   LiteralBuffer bookmark_next_raw_literal_;
737 
738   // Input stream. Must be initialized to an Utf16CharacterStream.
739   Utf16CharacterStream* source_;
740 
741 
742   // Start position of the octal literal last scanned.
743   Location octal_pos_;
744 
745   // One Unicode character look-ahead; c0_ < 0 at the end of the input.
746   uc32 c0_;
747 
748   // Whether there is a line terminator whitespace character after
749   // the current token, and  before the next. Does not count newlines
750   // inside multiline comments.
751   bool has_line_terminator_before_next_;
752   // Whether there is a multi-line comment that contains a
753   // line-terminator after the current token, and before the next.
754   bool has_multiline_comment_before_next_;
755 };
756 
757 }  // namespace internal
758 }  // namespace v8
759 
760 #endif  // V8_PARSING_SCANNER_H_
761