1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Features shared by parsing and pre-parsing scanners. 6 7 #ifndef V8_PARSING_SCANNER_H_ 8 #define V8_PARSING_SCANNER_H_ 9 10 #include "src/allocation.h" 11 #include "src/base/logging.h" 12 #include "src/char-predicates.h" 13 #include "src/globals.h" 14 #include "src/hashmap.h" 15 #include "src/list.h" 16 #include "src/parsing/token.h" 17 #include "src/unicode.h" 18 #include "src/unicode-decoder.h" 19 #include "src/utils.h" 20 21 namespace v8 { 22 namespace internal { 23 24 25 class AstRawString; 26 class AstValueFactory; 27 class ParserRecorder; 28 class UnicodeCache; 29 30 31 // --------------------------------------------------------------------- 32 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. 33 // A code unit is a 16 bit value representing either a 16 bit code point 34 // or one part of a surrogate pair that make a single 21 bit code point. 35 36 class Utf16CharacterStream { 37 public: Utf16CharacterStream()38 Utf16CharacterStream() : pos_(0) { } ~Utf16CharacterStream()39 virtual ~Utf16CharacterStream() { } 40 41 // Returns and advances past the next UTF-16 code unit in the input 42 // stream. If there are no more code units, it returns a negative 43 // value. Advance()44 inline uc32 Advance() { 45 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { 46 pos_++; 47 return static_cast<uc32>(*(buffer_cursor_++)); 48 } 49 // Note: currently the following increment is necessary to avoid a 50 // parser problem! The scanner treats the final kEndOfInput as 51 // a code unit with a position, and does math relative to that 52 // position. 53 pos_++; 54 55 return kEndOfInput; 56 } 57 58 // Return the current position in the code unit stream. 59 // Starts at zero. pos()60 inline size_t pos() const { return pos_; } 61 62 // Skips forward past the next code_unit_count UTF-16 code units 63 // in the input, or until the end of input if that comes sooner. 64 // Returns the number of code units actually skipped. If less 65 // than code_unit_count, SeekForward(size_t code_unit_count)66 inline size_t SeekForward(size_t code_unit_count) { 67 size_t buffered_chars = buffer_end_ - buffer_cursor_; 68 if (code_unit_count <= buffered_chars) { 69 buffer_cursor_ += code_unit_count; 70 pos_ += code_unit_count; 71 return code_unit_count; 72 } 73 return SlowSeekForward(code_unit_count); 74 } 75 76 // Pushes back the most recently read UTF-16 code unit (or negative 77 // value if at end of input), i.e., the value returned by the most recent 78 // call to Advance. 79 // Must not be used right after calling SeekForward. 80 virtual void PushBack(int32_t code_unit) = 0; 81 82 virtual bool SetBookmark(); 83 virtual void ResetToBookmark(); 84 85 protected: 86 static const uc32 kEndOfInput = -1; 87 88 // Ensures that the buffer_cursor_ points to the code_unit at 89 // position pos_ of the input, if possible. If the position 90 // is at or after the end of the input, return false. If there 91 // are more code_units available, return true. 92 virtual bool ReadBlock() = 0; 93 virtual size_t SlowSeekForward(size_t code_unit_count) = 0; 94 95 const uint16_t* buffer_cursor_; 96 const uint16_t* buffer_end_; 97 size_t pos_; 98 }; 99 100 101 // --------------------------------------------------------------------- 102 // DuplicateFinder discovers duplicate symbols. 103 104 class DuplicateFinder { 105 public: DuplicateFinder(UnicodeCache * constants)106 explicit DuplicateFinder(UnicodeCache* constants) 107 : unicode_constants_(constants), 108 backing_store_(16), 109 map_(&Match) { } 110 111 int AddOneByteSymbol(Vector<const uint8_t> key, int value); 112 int AddTwoByteSymbol(Vector<const uint16_t> key, int value); 113 // Add a a number literal by converting it (if necessary) 114 // to the string that ToString(ToNumber(literal)) would generate. 115 // and then adding that string with AddOneByteSymbol. 116 // This string is the actual value used as key in an object literal, 117 // and the one that must be different from the other keys. 118 int AddNumber(Vector<const uint8_t> key, int value); 119 120 private: 121 int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value); 122 // Backs up the key and its length in the backing store. 123 // The backup is stored with a base 127 encoding of the 124 // length (plus a bit saying whether the string is one byte), 125 // followed by the bytes of the key. 126 uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte); 127 128 // Compare two encoded keys (both pointing into the backing store) 129 // for having the same base-127 encoded lengths and representation. 130 // and then having the same 'length' bytes following. 131 static bool Match(void* first, void* second); 132 // Creates a hash from a sequence of bytes. 133 static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte); 134 // Checks whether a string containing a JS number is its canonical 135 // form. 136 static bool IsNumberCanonical(Vector<const uint8_t> key); 137 138 // Size of buffer. Sufficient for using it to call DoubleToCString in 139 // from conversions.h. 140 static const int kBufferSize = 100; 141 142 UnicodeCache* unicode_constants_; 143 // Backing store used to store strings used as hashmap keys. 144 SequenceCollector<unsigned char> backing_store_; 145 HashMap map_; 146 // Buffer used for string->number->canonical string conversions. 147 char number_buffer_[kBufferSize]; 148 }; 149 150 151 // ---------------------------------------------------------------------------- 152 // LiteralBuffer - Collector of chars of literals. 153 154 class LiteralBuffer { 155 public: LiteralBuffer()156 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { } 157 ~LiteralBuffer()158 ~LiteralBuffer() { backing_store_.Dispose(); } 159 INLINE(void AddChar (uint32_t code_unit))160 INLINE(void AddChar(uint32_t code_unit)) { 161 if (position_ >= backing_store_.length()) ExpandBuffer(); 162 if (is_one_byte_) { 163 if (code_unit <= unibrow::Latin1::kMaxChar) { 164 backing_store_[position_] = static_cast<byte>(code_unit); 165 position_ += kOneByteSize; 166 return; 167 } 168 ConvertToTwoByte(); 169 } 170 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { 171 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit; 172 position_ += kUC16Size; 173 } else { 174 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = 175 unibrow::Utf16::LeadSurrogate(code_unit); 176 position_ += kUC16Size; 177 if (position_ >= backing_store_.length()) ExpandBuffer(); 178 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = 179 unibrow::Utf16::TrailSurrogate(code_unit); 180 position_ += kUC16Size; 181 } 182 } 183 is_one_byte()184 bool is_one_byte() const { return is_one_byte_; } 185 is_contextual_keyword(Vector<const char> keyword)186 bool is_contextual_keyword(Vector<const char> keyword) const { 187 return is_one_byte() && keyword.length() == position_ && 188 (memcmp(keyword.start(), backing_store_.start(), position_) == 0); 189 } 190 two_byte_literal()191 Vector<const uint16_t> two_byte_literal() const { 192 DCHECK(!is_one_byte_); 193 DCHECK((position_ & 0x1) == 0); 194 return Vector<const uint16_t>( 195 reinterpret_cast<const uint16_t*>(backing_store_.start()), 196 position_ >> 1); 197 } 198 one_byte_literal()199 Vector<const uint8_t> one_byte_literal() const { 200 DCHECK(is_one_byte_); 201 return Vector<const uint8_t>( 202 reinterpret_cast<const uint8_t*>(backing_store_.start()), 203 position_); 204 } 205 length()206 int length() const { 207 return is_one_byte_ ? position_ : (position_ >> 1); 208 } 209 ReduceLength(int delta)210 void ReduceLength(int delta) { 211 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); 212 } 213 Reset()214 void Reset() { 215 position_ = 0; 216 is_one_byte_ = true; 217 } 218 219 Handle<String> Internalize(Isolate* isolate) const; 220 CopyFrom(const LiteralBuffer * other)221 void CopyFrom(const LiteralBuffer* other) { 222 if (other == nullptr) { 223 Reset(); 224 } else { 225 is_one_byte_ = other->is_one_byte_; 226 position_ = other->position_; 227 backing_store_.Dispose(); 228 backing_store_ = other->backing_store_.Clone(); 229 } 230 } 231 232 private: 233 static const int kInitialCapacity = 16; 234 static const int kGrowthFactory = 4; 235 static const int kMinConversionSlack = 256; 236 static const int kMaxGrowth = 1 * MB; NewCapacity(int min_capacity)237 inline int NewCapacity(int min_capacity) { 238 int capacity = Max(min_capacity, backing_store_.length()); 239 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); 240 return new_capacity; 241 } 242 ExpandBuffer()243 void ExpandBuffer() { 244 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); 245 MemCopy(new_store.start(), backing_store_.start(), position_); 246 backing_store_.Dispose(); 247 backing_store_ = new_store; 248 } 249 ConvertToTwoByte()250 void ConvertToTwoByte() { 251 DCHECK(is_one_byte_); 252 Vector<byte> new_store; 253 int new_content_size = position_ * kUC16Size; 254 if (new_content_size >= backing_store_.length()) { 255 // Ensure room for all currently read code units as UC16 as well 256 // as the code unit about to be stored. 257 new_store = Vector<byte>::New(NewCapacity(new_content_size)); 258 } else { 259 new_store = backing_store_; 260 } 261 uint8_t* src = backing_store_.start(); 262 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start()); 263 for (int i = position_ - 1; i >= 0; i--) { 264 dst[i] = src[i]; 265 } 266 if (new_store.start() != backing_store_.start()) { 267 backing_store_.Dispose(); 268 backing_store_ = new_store; 269 } 270 position_ = new_content_size; 271 is_one_byte_ = false; 272 } 273 274 bool is_one_byte_; 275 int position_; 276 Vector<byte> backing_store_; 277 278 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); 279 }; 280 281 282 // ---------------------------------------------------------------------------- 283 // JavaScript Scanner. 284 285 class Scanner { 286 public: 287 // Scoped helper for literal recording. Automatically drops the literal 288 // if aborting the scanning before it's complete. 289 class LiteralScope { 290 public: LiteralScope(Scanner * self)291 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { 292 scanner_->StartLiteral(); 293 } ~LiteralScope()294 ~LiteralScope() { 295 if (!complete_) scanner_->DropLiteral(); 296 } Complete()297 void Complete() { 298 complete_ = true; 299 } 300 301 private: 302 Scanner* scanner_; 303 bool complete_; 304 }; 305 306 // Scoped helper for a re-settable bookmark. 307 class BookmarkScope { 308 public: BookmarkScope(Scanner * scanner)309 explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) { 310 DCHECK_NOT_NULL(scanner_); 311 } ~BookmarkScope()312 ~BookmarkScope() { scanner_->DropBookmark(); } 313 Set()314 bool Set() { return scanner_->SetBookmark(); } Reset()315 void Reset() { scanner_->ResetToBookmark(); } HasBeenSet()316 bool HasBeenSet() { return scanner_->BookmarkHasBeenSet(); } HasBeenReset()317 bool HasBeenReset() { return scanner_->BookmarkHasBeenReset(); } 318 319 private: 320 Scanner* scanner_; 321 322 DISALLOW_COPY_AND_ASSIGN(BookmarkScope); 323 }; 324 325 // Representation of an interval of source positions. 326 struct Location { LocationLocation327 Location(int b, int e) : beg_pos(b), end_pos(e) { } LocationLocation328 Location() : beg_pos(0), end_pos(0) { } 329 IsValidLocation330 bool IsValid() const { 331 return beg_pos >= 0 && end_pos >= beg_pos; 332 } 333 invalidLocation334 static Location invalid() { return Location(-1, -1); } 335 336 int beg_pos; 337 int end_pos; 338 }; 339 340 // -1 is outside of the range of any real source code. 341 static const int kNoOctalLocation = -1; 342 343 explicit Scanner(UnicodeCache* scanner_contants); 344 345 void Initialize(Utf16CharacterStream* source); 346 347 // Returns the next token and advances input. 348 Token::Value Next(); 349 // Returns the token following peek() 350 Token::Value PeekAhead(); 351 // Returns the current token again. current_token()352 Token::Value current_token() { return current_.token; } 353 // Returns the location information for the current token 354 // (the token last returned by Next()). location()355 Location location() const { return current_.location; } 356 357 // Similar functions for the upcoming token. 358 359 // One token look-ahead (past the token returned by Next()). peek()360 Token::Value peek() const { return next_.token; } 361 peek_location()362 Location peek_location() const { return next_.location; } 363 literal_contains_escapes()364 bool literal_contains_escapes() const { 365 return LiteralContainsEscapes(current_); 366 } next_literal_contains_escapes()367 bool next_literal_contains_escapes() const { 368 return LiteralContainsEscapes(next_); 369 } is_literal_contextual_keyword(Vector<const char> keyword)370 bool is_literal_contextual_keyword(Vector<const char> keyword) { 371 DCHECK_NOT_NULL(current_.literal_chars); 372 return current_.literal_chars->is_contextual_keyword(keyword); 373 } is_next_contextual_keyword(Vector<const char> keyword)374 bool is_next_contextual_keyword(Vector<const char> keyword) { 375 DCHECK_NOT_NULL(next_.literal_chars); 376 return next_.literal_chars->is_contextual_keyword(keyword); 377 } 378 379 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); 380 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); 381 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory); 382 383 double DoubleValue(); 384 bool ContainsDot(); 385 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { 386 if (is_literal_one_byte() && 387 literal_length() == length && 388 (allow_escapes || !literal_contains_escapes())) { 389 const char* token = 390 reinterpret_cast<const char*>(literal_one_byte_string().start()); 391 return !strncmp(token, data, length); 392 } 393 return false; 394 } UnescapedLiteralMatches(const char * data,int length)395 inline bool UnescapedLiteralMatches(const char* data, int length) { 396 return LiteralMatches(data, length, false); 397 } 398 IsGetOrSet(bool * is_get,bool * is_set)399 void IsGetOrSet(bool* is_get, bool* is_set) { 400 if (is_literal_one_byte() && 401 literal_length() == 3 && 402 !literal_contains_escapes()) { 403 const char* token = 404 reinterpret_cast<const char*>(literal_one_byte_string().start()); 405 *is_get = strncmp(token, "get", 3) == 0; 406 *is_set = !*is_get && strncmp(token, "set", 3) == 0; 407 } 408 } 409 410 int FindSymbol(DuplicateFinder* finder, int value); 411 unicode_cache()412 UnicodeCache* unicode_cache() { return unicode_cache_; } 413 414 // Returns the location of the last seen octal literal. octal_position()415 Location octal_position() const { return octal_pos_; } clear_octal_position()416 void clear_octal_position() { octal_pos_ = Location::invalid(); } 417 418 // Returns the value of the last smi that was scanned. smi_value()419 int smi_value() const { return current_.smi_value_; } 420 421 // Seek forward to the given position. This operation does not 422 // work in general, for instance when there are pushed back 423 // characters, but works for seeking forward until simple delimiter 424 // tokens, which is what it is used for. 425 void SeekForward(int pos); 426 427 // Returns true if there was a line terminator before the peek'ed token, 428 // possibly inside a multi-line comment. HasAnyLineTerminatorBeforeNext()429 bool HasAnyLineTerminatorBeforeNext() const { 430 return has_line_terminator_before_next_ || 431 has_multiline_comment_before_next_; 432 } 433 434 // Scans the input as a regular expression pattern, previous 435 // character(s) must be /(=). Returns true if a pattern is scanned. 436 bool ScanRegExpPattern(bool seen_equal); 437 // Scans the input as regular expression flags. Returns the flags on success. 438 Maybe<RegExp::Flags> ScanRegExpFlags(); 439 440 // Scans the input as a template literal 441 Token::Value ScanTemplateStart(); 442 Token::Value ScanTemplateContinuation(); 443 source_url()444 const LiteralBuffer* source_url() const { return &source_url_; } source_mapping_url()445 const LiteralBuffer* source_mapping_url() const { 446 return &source_mapping_url_; 447 } 448 449 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; 450 451 private: 452 // The current and look-ahead token. 453 struct TokenDesc { 454 Token::Value token; 455 Location location; 456 LiteralBuffer* literal_chars; 457 LiteralBuffer* raw_literal_chars; 458 int smi_value_; 459 }; 460 461 static const int kCharacterLookaheadBufferSize = 1; 462 463 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. 464 template <bool capture_raw> 465 uc32 ScanOctalEscape(uc32 c, int length); 466 467 // Call this after setting source_ to the input. Init()468 void Init() { 469 // Set c0_ (one character ahead) 470 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); 471 Advance(); 472 // Initialize current_ to not refer to a literal. 473 current_.literal_chars = NULL; 474 current_.raw_literal_chars = NULL; 475 next_next_.token = Token::UNINITIALIZED; 476 } 477 478 // Support BookmarkScope functionality. 479 bool SetBookmark(); 480 void ResetToBookmark(); 481 bool BookmarkHasBeenSet(); 482 bool BookmarkHasBeenReset(); 483 void DropBookmark(); 484 static void CopyTokenDesc(TokenDesc* to, TokenDesc* from); 485 486 // Literal buffer support StartLiteral()487 inline void StartLiteral() { 488 LiteralBuffer* free_buffer = 489 (current_.literal_chars == &literal_buffer0_) 490 ? &literal_buffer1_ 491 : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_ 492 : &literal_buffer0_; 493 free_buffer->Reset(); 494 next_.literal_chars = free_buffer; 495 } 496 StartRawLiteral()497 inline void StartRawLiteral() { 498 LiteralBuffer* free_buffer = 499 (current_.raw_literal_chars == &raw_literal_buffer0_) 500 ? &raw_literal_buffer1_ 501 : (current_.raw_literal_chars == &raw_literal_buffer1_) 502 ? &raw_literal_buffer2_ 503 : &raw_literal_buffer0_; 504 free_buffer->Reset(); 505 next_.raw_literal_chars = free_buffer; 506 } 507 INLINE(void AddLiteralChar (uc32 c))508 INLINE(void AddLiteralChar(uc32 c)) { 509 DCHECK_NOT_NULL(next_.literal_chars); 510 next_.literal_chars->AddChar(c); 511 } 512 INLINE(void AddRawLiteralChar (uc32 c))513 INLINE(void AddRawLiteralChar(uc32 c)) { 514 DCHECK_NOT_NULL(next_.raw_literal_chars); 515 next_.raw_literal_chars->AddChar(c); 516 } 517 INLINE(void ReduceRawLiteralLength (int delta))518 INLINE(void ReduceRawLiteralLength(int delta)) { 519 DCHECK_NOT_NULL(next_.raw_literal_chars); 520 next_.raw_literal_chars->ReduceLength(delta); 521 } 522 523 // Stops scanning of a literal and drop the collected characters, 524 // e.g., due to an encountered error. DropLiteral()525 inline void DropLiteral() { 526 next_.literal_chars = NULL; 527 next_.raw_literal_chars = NULL; 528 } 529 AddLiteralCharAdvance()530 inline void AddLiteralCharAdvance() { 531 AddLiteralChar(c0_); 532 Advance(); 533 } 534 535 // Low-level scanning support. 536 template <bool capture_raw = false, bool check_surrogate = true> Advance()537 void Advance() { 538 if (capture_raw) { 539 AddRawLiteralChar(c0_); 540 } 541 c0_ = source_->Advance(); 542 if (check_surrogate) HandleLeadSurrogate(); 543 } 544 HandleLeadSurrogate()545 void HandleLeadSurrogate() { 546 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { 547 uc32 c1 = source_->Advance(); 548 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { 549 source_->PushBack(c1); 550 } else { 551 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); 552 } 553 } 554 } 555 PushBack(uc32 ch)556 void PushBack(uc32 ch) { 557 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { 558 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); 559 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); 560 } else { 561 source_->PushBack(c0_); 562 } 563 c0_ = ch; 564 } 565 Select(Token::Value tok)566 inline Token::Value Select(Token::Value tok) { 567 Advance(); 568 return tok; 569 } 570 Select(uc32 next,Token::Value then,Token::Value else_)571 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { 572 Advance(); 573 if (c0_ == next) { 574 Advance(); 575 return then; 576 } else { 577 return else_; 578 } 579 } 580 581 // Returns the literal string, if any, for the current token (the 582 // token last returned by Next()). The string is 0-terminated. 583 // Literal strings are collected for identifiers, strings, numbers as well 584 // as for template literals. For template literals we also collect the raw 585 // form. 586 // These functions only give the correct result if the literal was scanned 587 // when a LiteralScope object is alive. literal_one_byte_string()588 Vector<const uint8_t> literal_one_byte_string() { 589 DCHECK_NOT_NULL(current_.literal_chars); 590 return current_.literal_chars->one_byte_literal(); 591 } literal_two_byte_string()592 Vector<const uint16_t> literal_two_byte_string() { 593 DCHECK_NOT_NULL(current_.literal_chars); 594 return current_.literal_chars->two_byte_literal(); 595 } is_literal_one_byte()596 bool is_literal_one_byte() { 597 DCHECK_NOT_NULL(current_.literal_chars); 598 return current_.literal_chars->is_one_byte(); 599 } literal_length()600 int literal_length() const { 601 DCHECK_NOT_NULL(current_.literal_chars); 602 return current_.literal_chars->length(); 603 } 604 // Returns the literal string for the next token (the token that 605 // would be returned if Next() were called). next_literal_one_byte_string()606 Vector<const uint8_t> next_literal_one_byte_string() { 607 DCHECK_NOT_NULL(next_.literal_chars); 608 return next_.literal_chars->one_byte_literal(); 609 } next_literal_two_byte_string()610 Vector<const uint16_t> next_literal_two_byte_string() { 611 DCHECK_NOT_NULL(next_.literal_chars); 612 return next_.literal_chars->two_byte_literal(); 613 } is_next_literal_one_byte()614 bool is_next_literal_one_byte() { 615 DCHECK_NOT_NULL(next_.literal_chars); 616 return next_.literal_chars->is_one_byte(); 617 } raw_literal_one_byte_string()618 Vector<const uint8_t> raw_literal_one_byte_string() { 619 DCHECK_NOT_NULL(current_.raw_literal_chars); 620 return current_.raw_literal_chars->one_byte_literal(); 621 } raw_literal_two_byte_string()622 Vector<const uint16_t> raw_literal_two_byte_string() { 623 DCHECK_NOT_NULL(current_.raw_literal_chars); 624 return current_.raw_literal_chars->two_byte_literal(); 625 } is_raw_literal_one_byte()626 bool is_raw_literal_one_byte() { 627 DCHECK_NOT_NULL(current_.raw_literal_chars); 628 return current_.raw_literal_chars->is_one_byte(); 629 } 630 631 template <bool capture_raw> 632 uc32 ScanHexNumber(int expected_length); 633 // Scan a number of any length but not bigger than max_value. For example, the 634 // number can be 000000001, so it's very long in characters but its value is 635 // small. 636 template <bool capture_raw> 637 uc32 ScanUnlimitedLengthHexNumber(int max_value); 638 639 // Scans a single JavaScript token. 640 void Scan(); 641 642 bool SkipWhiteSpace(); 643 Token::Value SkipSingleLineComment(); 644 Token::Value SkipSourceURLComment(); 645 void TryToParseSourceURLComment(); 646 Token::Value SkipMultiLineComment(); 647 // Scans a possible HTML comment -- begins with '<!'. 648 Token::Value ScanHtmlComment(); 649 650 void ScanDecimalDigits(); 651 Token::Value ScanNumber(bool seen_period); 652 Token::Value ScanIdentifierOrKeyword(); 653 Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped); 654 655 Token::Value ScanString(); 656 657 // Scans an escape-sequence which is part of a string and adds the 658 // decoded character to the current literal. Returns true if a pattern 659 // is scanned. 660 template <bool capture_raw, bool in_template_literal> 661 bool ScanEscape(); 662 663 // Decodes a Unicode escape-sequence which is part of an identifier. 664 // If the escape sequence cannot be decoded the result is kBadChar. 665 uc32 ScanIdentifierUnicodeEscape(); 666 // Helper for the above functions. 667 template <bool capture_raw> 668 uc32 ScanUnicodeEscape(); 669 670 Token::Value ScanTemplateSpan(); 671 672 // Return the current source position. source_pos()673 int source_pos() { 674 return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize; 675 } 676 LiteralContainsEscapes(const TokenDesc & token)677 static bool LiteralContainsEscapes(const TokenDesc& token) { 678 Location location = token.location; 679 int source_length = (location.end_pos - location.beg_pos); 680 if (token.token == Token::STRING) { 681 // Subtract delimiters. 682 source_length -= 2; 683 } 684 return token.literal_chars->length() != source_length; 685 } 686 687 UnicodeCache* unicode_cache_; 688 689 // Buffers collecting literal strings, numbers, etc. 690 LiteralBuffer literal_buffer0_; 691 LiteralBuffer literal_buffer1_; 692 LiteralBuffer literal_buffer2_; 693 694 // Values parsed from magic comments. 695 LiteralBuffer source_url_; 696 LiteralBuffer source_mapping_url_; 697 698 // Buffer to store raw string values 699 LiteralBuffer raw_literal_buffer0_; 700 LiteralBuffer raw_literal_buffer1_; 701 LiteralBuffer raw_literal_buffer2_; 702 703 TokenDesc current_; // desc for current token (as returned by Next()) 704 TokenDesc next_; // desc for next token (one token look-ahead) 705 TokenDesc next_next_; // desc for the token after next (after PeakAhead()) 706 707 // Variables for Scanner::BookmarkScope and the *Bookmark implementation. 708 // These variables contain the scanner state when a bookmark is set. 709 // 710 // We will use bookmark_c0_ as a 'control' variable, where: 711 // - bookmark_c0_ >= 0: A bookmark has been set and this contains c0_. 712 // - bookmark_c0_ == -1: No bookmark has been set. 713 // - bookmark_c0_ == -2: The bookmark has been applied (ResetToBookmark). 714 // 715 // Which state is being bookmarked? The parser state is distributed over 716 // several variables, roughly like this: 717 // ... 1234 + 5678 ..... [character stream] 718 // [current_] [next_] c0_ | [scanner state] 719 // So when the scanner is logically at the beginning of an expression 720 // like "1234 + 4567", then: 721 // - current_ contains "1234" 722 // - next_ contains "+" 723 // - c0_ contains ' ' (the space between "+" and "5678", 724 // - the source_ character stream points to the beginning of "5678". 725 // To be able to restore this state, we will keep copies of current_, next_, 726 // and c0_; we'll ask the stream to bookmark itself, and we'll copy the 727 // contents of current_'s and next_'s literal buffers to bookmark_*_literal_. 728 static const uc32 kNoBookmark = -1; 729 static const uc32 kBookmarkWasApplied = -2; 730 uc32 bookmark_c0_; 731 TokenDesc bookmark_current_; 732 TokenDesc bookmark_next_; 733 LiteralBuffer bookmark_current_literal_; 734 LiteralBuffer bookmark_current_raw_literal_; 735 LiteralBuffer bookmark_next_literal_; 736 LiteralBuffer bookmark_next_raw_literal_; 737 738 // Input stream. Must be initialized to an Utf16CharacterStream. 739 Utf16CharacterStream* source_; 740 741 742 // Start position of the octal literal last scanned. 743 Location octal_pos_; 744 745 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 746 uc32 c0_; 747 748 // Whether there is a line terminator whitespace character after 749 // the current token, and before the next. Does not count newlines 750 // inside multiline comments. 751 bool has_line_terminator_before_next_; 752 // Whether there is a multi-line comment that contains a 753 // line-terminator after the current token, and before the next. 754 bool has_multiline_comment_before_next_; 755 }; 756 757 } // namespace internal 758 } // namespace v8 759 760 #endif // V8_PARSING_SCANNER_H_ 761