1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
6 #define V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
7 
8 #include "src/handles.h"
9 #include "src/parsing/scanner.h"
10 #include "src/vector.h"
11 
12 namespace v8 {
13 namespace internal {
14 
15 // Forward declarations.
16 class ExternalTwoByteString;
17 
18 // A buffered character stream based on a random access character
19 // source (ReadBlock can be called with pos_ pointing to any position,
20 // even positions before the current).
21 class BufferedUtf16CharacterStream: public Utf16CharacterStream {
22  public:
23   BufferedUtf16CharacterStream();
24   ~BufferedUtf16CharacterStream() override;
25 
26   void PushBack(uc32 character) override;
27 
28  protected:
29   static const size_t kBufferSize = 512;
30   static const size_t kPushBackStepSize = 16;
31 
32   size_t SlowSeekForward(size_t delta) override;
33   bool ReadBlock() override;
34   virtual void SlowPushBack(uc16 character);
35 
36   virtual size_t BufferSeekForward(size_t delta) = 0;
37   virtual size_t FillBuffer(size_t position) = 0;
38 
39   const uc16* pushback_limit_;
40   uc16 buffer_[kBufferSize];
41 };
42 
43 
44 // Generic string stream.
45 class GenericStringUtf16CharacterStream: public BufferedUtf16CharacterStream {
46  public:
47   GenericStringUtf16CharacterStream(Handle<String> data, size_t start_position,
48                                     size_t end_position);
49   ~GenericStringUtf16CharacterStream() override;
50 
51   bool SetBookmark() override;
52   void ResetToBookmark() override;
53 
54  protected:
55   static const size_t kNoBookmark = -1;
56 
57   size_t BufferSeekForward(size_t delta) override;
58   size_t FillBuffer(size_t position) override;
59 
60   Handle<String> string_;
61   size_t length_;
62   size_t bookmark_;
63 };
64 
65 
66 // Utf16 stream based on a literal UTF-8 string.
67 class Utf8ToUtf16CharacterStream: public BufferedUtf16CharacterStream {
68  public:
69   Utf8ToUtf16CharacterStream(const byte* data, size_t length);
70   ~Utf8ToUtf16CharacterStream() override;
71 
72   static size_t CopyChars(uint16_t* dest, size_t length, const byte* src,
73                           size_t* src_pos, size_t src_length);
74 
75  protected:
76   size_t BufferSeekForward(size_t delta) override;
77   size_t FillBuffer(size_t char_position) override;
78   void SetRawPosition(size_t char_position);
79 
80   const byte* raw_data_;
81   size_t raw_data_length_;  // Measured in bytes, not characters.
82   size_t raw_data_pos_;
83   // The character position of the character at raw_data[raw_data_pos_].
84   // Not necessarily the same as pos_.
85   size_t raw_character_position_;
86 };
87 
88 
89 // ExternalStreamingStream is a wrapper around an ExternalSourceStream (see
90 // include/v8.h) subclass implemented by the embedder.
91 class ExternalStreamingStream : public BufferedUtf16CharacterStream {
92  public:
ExternalStreamingStream(ScriptCompiler::ExternalSourceStream * source_stream,v8::ScriptCompiler::StreamedSource::Encoding encoding)93   ExternalStreamingStream(ScriptCompiler::ExternalSourceStream* source_stream,
94                           v8::ScriptCompiler::StreamedSource::Encoding encoding)
95       : source_stream_(source_stream),
96         encoding_(encoding),
97         current_data_(NULL),
98         current_data_offset_(0),
99         current_data_length_(0),
100         utf8_split_char_buffer_length_(0),
101         bookmark_(0),
102         bookmark_data_is_from_current_data_(false),
103         bookmark_data_offset_(0),
104         bookmark_utf8_split_char_buffer_length_(0) {}
105 
~ExternalStreamingStream()106   ~ExternalStreamingStream() override {
107     delete[] current_data_;
108     bookmark_buffer_.Dispose();
109     bookmark_data_.Dispose();
110   }
111 
BufferSeekForward(size_t delta)112   size_t BufferSeekForward(size_t delta) override {
113     // We never need to seek forward when streaming scripts. We only seek
114     // forward when we want to parse a function whose location we already know,
115     // and when streaming, we don't know the locations of anything we haven't
116     // seen yet.
117     UNREACHABLE();
118     return 0;
119   }
120 
121   size_t FillBuffer(size_t position) override;
122 
123   bool SetBookmark() override;
124   void ResetToBookmark() override;
125 
126  private:
127   void HandleUtf8SplitCharacters(size_t* data_in_buffer);
128   void FlushCurrent();
129 
130   ScriptCompiler::ExternalSourceStream* source_stream_;
131   v8::ScriptCompiler::StreamedSource::Encoding encoding_;
132   const uint8_t* current_data_;
133   size_t current_data_offset_;
134   size_t current_data_length_;
135   // For converting UTF-8 characters which are split across two data chunks.
136   uint8_t utf8_split_char_buffer_[4];
137   size_t utf8_split_char_buffer_length_;
138 
139   // Bookmark support. See comments in ExternalStreamingStream::SetBookmark
140   // for additional details.
141   size_t bookmark_;
142   Vector<uint16_t> bookmark_buffer_;
143   Vector<uint8_t> bookmark_data_;
144   bool bookmark_data_is_from_current_data_;
145   size_t bookmark_data_offset_;
146   uint8_t bookmark_utf8_split_char_buffer_[4];
147   size_t bookmark_utf8_split_char_buffer_length_;
148 };
149 
150 
151 // UTF16 buffer to read characters from an external string.
152 class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream {
153  public:
154   ExternalTwoByteStringUtf16CharacterStream(Handle<ExternalTwoByteString> data,
155                                             int start_position,
156                                             int end_position);
157   ~ExternalTwoByteStringUtf16CharacterStream() override;
158 
PushBack(uc32 character)159   void PushBack(uc32 character) override {
160     DCHECK(buffer_cursor_ > raw_data_);
161     buffer_cursor_--;
162     pos_--;
163   }
164 
165   bool SetBookmark() override;
166   void ResetToBookmark() override;
167 
168  protected:
SlowSeekForward(size_t delta)169   size_t SlowSeekForward(size_t delta) override {
170     // Fast case always handles seeking.
171     return 0;
172   }
ReadBlock()173   bool ReadBlock() override {
174     // Entire string is read at start.
175     return false;
176   }
177   Handle<ExternalTwoByteString> source_;
178   const uc16* raw_data_;  // Pointer to the actual array of characters.
179 
180  private:
181   static const size_t kNoBookmark = -1;
182 
183   size_t bookmark_;
184 };
185 
186 }  // namespace internal
187 }  // namespace v8
188 
189 #endif  // V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
190