1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_SCANNER_CHARACTER_STREAMS_H_
6 #define V8_SCANNER_CHARACTER_STREAMS_H_
7 
8 #include "src/scanner.h"
9 
10 namespace v8 {
11 namespace internal {
12 
13 // A buffered character stream based on a random access character
14 // source (ReadBlock can be called with pos_ pointing to any position,
15 // even positions before the current).
16 class BufferedUtf16CharacterStream: public Utf16CharacterStream {
17  public:
18   BufferedUtf16CharacterStream();
19   virtual ~BufferedUtf16CharacterStream();
20 
21   virtual void PushBack(uc32 character);
22 
23  protected:
24   static const unsigned kBufferSize = 512;
25   static const unsigned kPushBackStepSize = 16;
26 
27   virtual unsigned SlowSeekForward(unsigned delta);
28   virtual bool ReadBlock();
29   virtual void SlowPushBack(uc16 character);
30 
31   virtual unsigned BufferSeekForward(unsigned delta) = 0;
32   virtual unsigned FillBuffer(unsigned position) = 0;
33 
34   const uc16* pushback_limit_;
35   uc16 buffer_[kBufferSize];
36 };
37 
38 
39 // Generic string stream.
40 class GenericStringUtf16CharacterStream: public BufferedUtf16CharacterStream {
41  public:
42   GenericStringUtf16CharacterStream(Handle<String> data,
43                                     unsigned start_position,
44                                     unsigned end_position);
45   virtual ~GenericStringUtf16CharacterStream();
46 
47  protected:
48   virtual unsigned BufferSeekForward(unsigned delta);
49   virtual unsigned FillBuffer(unsigned position);
50 
51   Handle<String> string_;
52   unsigned length_;
53 };
54 
55 
56 // Utf16 stream based on a literal UTF-8 string.
57 class Utf8ToUtf16CharacterStream: public BufferedUtf16CharacterStream {
58  public:
59   Utf8ToUtf16CharacterStream(const byte* data, unsigned length);
60   virtual ~Utf8ToUtf16CharacterStream();
61 
62   static unsigned CopyChars(uint16_t* dest, unsigned length, const byte* src,
63                             unsigned* src_pos, unsigned src_length);
64 
65  protected:
66   virtual unsigned BufferSeekForward(unsigned delta);
67   virtual unsigned FillBuffer(unsigned char_position);
68   void SetRawPosition(unsigned char_position);
69 
70   const byte* raw_data_;
71   unsigned raw_data_length_;  // Measured in bytes, not characters.
72   unsigned raw_data_pos_;
73   // The character position of the character at raw_data[raw_data_pos_].
74   // Not necessarily the same as pos_.
75   unsigned raw_character_position_;
76 };
77 
78 
79 // ExternalStreamingStream is a wrapper around an ExternalSourceStream (see
80 // include/v8.h) subclass implemented by the embedder.
81 class ExternalStreamingStream : public BufferedUtf16CharacterStream {
82  public:
ExternalStreamingStream(ScriptCompiler::ExternalSourceStream * source_stream,v8::ScriptCompiler::StreamedSource::Encoding encoding)83   ExternalStreamingStream(ScriptCompiler::ExternalSourceStream* source_stream,
84                           v8::ScriptCompiler::StreamedSource::Encoding encoding)
85       : source_stream_(source_stream),
86         encoding_(encoding),
87         current_data_(NULL),
88         current_data_offset_(0),
89         current_data_length_(0),
90         utf8_split_char_buffer_length_(0) {}
91 
~ExternalStreamingStream()92   virtual ~ExternalStreamingStream() { delete[] current_data_; }
93 
BufferSeekForward(unsigned delta)94   virtual unsigned BufferSeekForward(unsigned delta) OVERRIDE {
95     // We never need to seek forward when streaming scripts. We only seek
96     // forward when we want to parse a function whose location we already know,
97     // and when streaming, we don't know the locations of anything we haven't
98     // seen yet.
99     UNREACHABLE();
100     return 0;
101   }
102 
103   virtual unsigned FillBuffer(unsigned position);
104 
105  private:
106   void HandleUtf8SplitCharacters(unsigned* data_in_buffer);
107 
108   ScriptCompiler::ExternalSourceStream* source_stream_;
109   v8::ScriptCompiler::StreamedSource::Encoding encoding_;
110   const uint8_t* current_data_;
111   unsigned current_data_offset_;
112   unsigned current_data_length_;
113   // For converting UTF-8 characters which are split across two data chunks.
114   uint8_t utf8_split_char_buffer_[4];
115   unsigned utf8_split_char_buffer_length_;
116 };
117 
118 
119 // UTF16 buffer to read characters from an external string.
120 class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream {
121  public:
122   ExternalTwoByteStringUtf16CharacterStream(Handle<ExternalTwoByteString> data,
123                                             int start_position,
124                                             int end_position);
125   virtual ~ExternalTwoByteStringUtf16CharacterStream();
126 
PushBack(uc32 character)127   virtual void PushBack(uc32 character) {
128     DCHECK(buffer_cursor_ > raw_data_);
129     buffer_cursor_--;
130     pos_--;
131   }
132 
133  protected:
SlowSeekForward(unsigned delta)134   virtual unsigned SlowSeekForward(unsigned delta) {
135     // Fast case always handles seeking.
136     return 0;
137   }
ReadBlock()138   virtual bool ReadBlock() {
139     // Entire string is read at start.
140     return false;
141   }
142   Handle<ExternalTwoByteString> source_;
143   const uc16* raw_data_;  // Pointer to the actual array of characters.
144 };
145 
146 } }  // namespace v8::internal
147 
148 #endif  // V8_SCANNER_CHARACTER_STREAMS_H_
149