1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_STRING_BUILDER_INL_H_
6 #define V8_STRING_BUILDER_INL_H_
7 
8 #include "src/assert-scope.h"
9 #include "src/handles-inl.h"
10 #include "src/heap/factory.h"
11 #include "src/isolate.h"
12 #include "src/objects.h"
13 #include "src/objects/fixed-array.h"
14 #include "src/objects/string-inl.h"
15 #include "src/utils.h"
16 
17 namespace v8 {
18 namespace internal {
19 
20 const int kStringBuilderConcatHelperLengthBits = 11;
21 const int kStringBuilderConcatHelperPositionBits = 19;
22 
23 typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
24     StringBuilderSubstringLength;
25 typedef BitField<int, kStringBuilderConcatHelperLengthBits,
26                  kStringBuilderConcatHelperPositionBits>
27     StringBuilderSubstringPosition;
28 
29 template <typename sinkchar>
30 void StringBuilderConcatHelper(String* special, sinkchar* sink,
31                                FixedArray* fixed_array, int array_length);
32 
33 // Returns the result length of the concatenation.
34 // On illegal argument, -1 is returned.
35 int StringBuilderConcatLength(int special_length, FixedArray* fixed_array,
36                               int array_length, bool* one_byte);
37 
38 class FixedArrayBuilder {
39  public:
40   explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity);
41   explicit FixedArrayBuilder(Handle<FixedArray> backing_store);
42 
43   bool HasCapacity(int elements);
44   void EnsureCapacity(Isolate* isolate, int elements);
45 
46   void Add(Object* value);
47   void Add(Smi* value);
48 
array()49   Handle<FixedArray> array() { return array_; }
50 
length()51   int length() { return length_; }
52 
53   int capacity();
54 
55   Handle<JSArray> ToJSArray(Handle<JSArray> target_array);
56 
57  private:
58   Handle<FixedArray> array_;
59   int length_;
60   bool has_non_smi_elements_;
61 };
62 
63 class ReplacementStringBuilder {
64  public:
65   ReplacementStringBuilder(Heap* heap, Handle<String> subject,
66                            int estimated_part_count);
67 
AddSubjectSlice(FixedArrayBuilder * builder,int from,int to)68   static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
69                                      int to) {
70     DCHECK_GE(from, 0);
71     int length = to - from;
72     DCHECK_GT(length, 0);
73     if (StringBuilderSubstringLength::is_valid(length) &&
74         StringBuilderSubstringPosition::is_valid(from)) {
75       int encoded_slice = StringBuilderSubstringLength::encode(length) |
76                           StringBuilderSubstringPosition::encode(from);
77       builder->Add(Smi::FromInt(encoded_slice));
78     } else {
79       // Otherwise encode as two smis.
80       builder->Add(Smi::FromInt(-length));
81       builder->Add(Smi::FromInt(from));
82     }
83   }
84 
85   void EnsureCapacity(int elements);
86 
AddSubjectSlice(int from,int to)87   void AddSubjectSlice(int from, int to) {
88     AddSubjectSlice(&array_builder_, from, to);
89     IncrementCharacterCount(to - from);
90   }
91 
92   void AddString(Handle<String> string);
93 
94   MaybeHandle<String> ToString();
95 
IncrementCharacterCount(int by)96   void IncrementCharacterCount(int by) {
97     if (character_count_ > String::kMaxLength - by) {
98       STATIC_ASSERT(String::kMaxLength < kMaxInt);
99       character_count_ = kMaxInt;
100     } else {
101       character_count_ += by;
102     }
103   }
104 
105  private:
106   void AddElement(Object* element);
107 
108   Heap* heap_;
109   FixedArrayBuilder array_builder_;
110   Handle<String> subject_;
111   int character_count_;
112   bool is_one_byte_;
113 };
114 
115 class IncrementalStringBuilder {
116  public:
117   explicit IncrementalStringBuilder(Isolate* isolate);
118 
CurrentEncoding()119   V8_INLINE String::Encoding CurrentEncoding() { return encoding_; }
120 
121   template <typename SrcChar, typename DestChar>
122   V8_INLINE void Append(SrcChar c);
123 
AppendCharacter(uint8_t c)124   V8_INLINE void AppendCharacter(uint8_t c) {
125     if (encoding_ == String::ONE_BYTE_ENCODING) {
126       Append<uint8_t, uint8_t>(c);
127     } else {
128       Append<uint8_t, uc16>(c);
129     }
130   }
131 
AppendCString(const char * s)132   V8_INLINE void AppendCString(const char* s) {
133     const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
134     if (encoding_ == String::ONE_BYTE_ENCODING) {
135       while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
136     } else {
137       while (*u != '\0') Append<uint8_t, uc16>(*(u++));
138     }
139   }
140 
AppendCString(const uc16 * s)141   V8_INLINE void AppendCString(const uc16* s) {
142     if (encoding_ == String::ONE_BYTE_ENCODING) {
143       while (*s != '\0') Append<uc16, uint8_t>(*(s++));
144     } else {
145       while (*s != '\0') Append<uc16, uc16>(*(s++));
146     }
147   }
148 
CurrentPartCanFit(int length)149   V8_INLINE bool CurrentPartCanFit(int length) {
150     return part_length_ - current_index_ > length;
151   }
152 
153   // We make a rough estimate to find out if the current string can be
154   // serialized without allocating a new string part. The worst case length of
155   // an escaped character is 6. Shifting the remaining string length right by 3
156   // is a more pessimistic estimate, but faster to calculate.
EscapedLengthIfCurrentPartFits(int length)157   V8_INLINE int EscapedLengthIfCurrentPartFits(int length) {
158     if (length > kMaxPartLength) return 0;
159     STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength);
160     // This shift will not overflow because length is already less than the
161     // maximum part length.
162     int worst_case_length = length << 3;
163     return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0;
164   }
165 
166   void AppendString(Handle<String> string);
167 
168   MaybeHandle<String> Finish();
169 
HasOverflowed()170   V8_INLINE bool HasOverflowed() const { return overflowed_; }
171 
172   int Length() const;
173 
174   // Change encoding to two-byte.
ChangeEncoding()175   void ChangeEncoding() {
176     DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
177     ShrinkCurrentPart();
178     encoding_ = String::TWO_BYTE_ENCODING;
179     Extend();
180   }
181 
182   template <typename DestChar>
183   class NoExtend {
184    public:
NoExtend(Handle<String> string,int offset)185     explicit NoExtend(Handle<String> string, int offset) {
186       DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString());
187       if (sizeof(DestChar) == 1) {
188         start_ = reinterpret_cast<DestChar*>(
189             Handle<SeqOneByteString>::cast(string)->GetChars() + offset);
190       } else {
191         start_ = reinterpret_cast<DestChar*>(
192             Handle<SeqTwoByteString>::cast(string)->GetChars() + offset);
193       }
194       cursor_ = start_;
195     }
196 
Append(DestChar c)197     V8_INLINE void Append(DestChar c) { *(cursor_++) = c; }
AppendCString(const char * s)198     V8_INLINE void AppendCString(const char* s) {
199       const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
200       while (*u != '\0') Append(*(u++));
201     }
202 
written()203     int written() { return static_cast<int>(cursor_ - start_); }
204 
205    private:
206     DestChar* start_;
207     DestChar* cursor_;
208     DisallowHeapAllocation no_gc_;
209   };
210 
211   template <typename DestChar>
212   class NoExtendString : public NoExtend<DestChar> {
213    public:
NoExtendString(Handle<String> string,int required_length)214     NoExtendString(Handle<String> string, int required_length)
215         : NoExtend<DestChar>(string, 0), string_(string) {
216       DCHECK(string->length() >= required_length);
217     }
218 
Finalize()219     Handle<String> Finalize() {
220       Handle<SeqString> string = Handle<SeqString>::cast(string_);
221       int length = NoExtend<DestChar>::written();
222       Handle<String> result = SeqString::Truncate(string, length);
223       string_ = Handle<String>();
224       return result;
225     }
226 
227    private:
228     Handle<String> string_;
229   };
230 
231   template <typename DestChar>
232   class NoExtendBuilder : public NoExtend<DestChar> {
233    public:
NoExtendBuilder(IncrementalStringBuilder * builder,int required_length)234     NoExtendBuilder(IncrementalStringBuilder* builder, int required_length)
235         : NoExtend<DestChar>(builder->current_part(), builder->current_index_),
236           builder_(builder) {
237       DCHECK(builder->CurrentPartCanFit(required_length));
238     }
239 
~NoExtendBuilder()240     ~NoExtendBuilder() {
241       builder_->current_index_ += NoExtend<DestChar>::written();
242     }
243 
244    private:
245     IncrementalStringBuilder* builder_;
246   };
247 
248  private:
factory()249   Factory* factory() { return isolate_->factory(); }
250 
accumulator()251   V8_INLINE Handle<String> accumulator() { return accumulator_; }
252 
set_accumulator(Handle<String> string)253   V8_INLINE void set_accumulator(Handle<String> string) {
254     *accumulator_.location() = *string;
255   }
256 
current_part()257   V8_INLINE Handle<String> current_part() { return current_part_; }
258 
set_current_part(Handle<String> string)259   V8_INLINE void set_current_part(Handle<String> string) {
260     *current_part_.location() = *string;
261   }
262 
263   // Add the current part to the accumulator.
264   void Accumulate(Handle<String> new_part);
265 
266   // Finish the current part and allocate a new part.
267   void Extend();
268 
269   // Shrink current part to the right size.
ShrinkCurrentPart()270   void ShrinkCurrentPart() {
271     DCHECK(current_index_ < part_length_);
272     set_current_part(SeqString::Truncate(
273         Handle<SeqString>::cast(current_part()), current_index_));
274   }
275 
276   static const int kInitialPartLength = 32;
277   static const int kMaxPartLength = 16 * 1024;
278   static const int kPartLengthGrowthFactor = 2;
279 
280   Isolate* isolate_;
281   String::Encoding encoding_;
282   bool overflowed_;
283   int part_length_;
284   int current_index_;
285   Handle<String> accumulator_;
286   Handle<String> current_part_;
287 };
288 
289 template <typename SrcChar, typename DestChar>
Append(SrcChar c)290 void IncrementalStringBuilder::Append(SrcChar c) {
291   DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
292   if (sizeof(DestChar) == 1) {
293     DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
294     SeqOneByteString::cast(*current_part_)
295         ->SeqOneByteStringSet(current_index_++, c);
296   } else {
297     DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
298     SeqTwoByteString::cast(*current_part_)
299         ->SeqTwoByteStringSet(current_index_++, c);
300   }
301   if (current_index_ == part_length_) Extend();
302 }
303 }  // namespace internal
304 }  // namespace v8
305 
306 #endif  // V8_STRING_BUILDER_INL_H_
307