1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/v8.h"
6 
7 #include "src/assembler.h"
8 #include "src/ast.h"
9 #include "src/regexp-macro-assembler.h"
10 #include "src/regexp-stack.h"
11 #include "src/simulator.h"
12 
13 namespace v8 {
14 namespace internal {
15 
RegExpMacroAssembler(Zone * zone)16 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
17   : slow_safe_compiler_(false),
18     global_mode_(NOT_GLOBAL),
19     zone_(zone) {
20 }
21 
22 
~RegExpMacroAssembler()23 RegExpMacroAssembler::~RegExpMacroAssembler() {
24 }
25 
26 
27 #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
28 
NativeRegExpMacroAssembler(Zone * zone)29 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
30     : RegExpMacroAssembler(zone) {
31 }
32 
33 
~NativeRegExpMacroAssembler()34 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
35 }
36 
37 
CanReadUnaligned()38 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
39   return FLAG_enable_unaligned_accesses && !slow_safe();
40 }
41 
StringCharacterPosition(String * subject,int start_index)42 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
43     String* subject,
44     int start_index) {
45   // Not just flat, but ultra flat.
46   DCHECK(subject->IsExternalString() || subject->IsSeqString());
47   DCHECK(start_index >= 0);
48   DCHECK(start_index <= subject->length());
49   if (subject->IsOneByteRepresentation()) {
50     const byte* address;
51     if (StringShape(subject).IsExternal()) {
52       const uint8_t* data = ExternalOneByteString::cast(subject)->GetChars();
53       address = reinterpret_cast<const byte*>(data);
54     } else {
55       DCHECK(subject->IsSeqOneByteString());
56       const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
57       address = reinterpret_cast<const byte*>(data);
58     }
59     return address + start_index;
60   }
61   const uc16* data;
62   if (StringShape(subject).IsExternal()) {
63     data = ExternalTwoByteString::cast(subject)->GetChars();
64   } else {
65     DCHECK(subject->IsSeqTwoByteString());
66     data = SeqTwoByteString::cast(subject)->GetChars();
67   }
68   return reinterpret_cast<const byte*>(data + start_index);
69 }
70 
71 
Match(Handle<Code> regexp_code,Handle<String> subject,int * offsets_vector,int offsets_vector_length,int previous_index,Isolate * isolate)72 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
73     Handle<Code> regexp_code,
74     Handle<String> subject,
75     int* offsets_vector,
76     int offsets_vector_length,
77     int previous_index,
78     Isolate* isolate) {
79 
80   DCHECK(subject->IsFlat());
81   DCHECK(previous_index >= 0);
82   DCHECK(previous_index <= subject->length());
83 
84   // No allocations before calling the regexp, but we can't use
85   // DisallowHeapAllocation, since regexps might be preempted, and another
86   // thread might do allocation anyway.
87 
88   String* subject_ptr = *subject;
89   // Character offsets into string.
90   int start_offset = previous_index;
91   int char_length = subject_ptr->length() - start_offset;
92   int slice_offset = 0;
93 
94   // The string has been flattened, so if it is a cons string it contains the
95   // full string in the first part.
96   if (StringShape(subject_ptr).IsCons()) {
97     DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
98     subject_ptr = ConsString::cast(subject_ptr)->first();
99   } else if (StringShape(subject_ptr).IsSliced()) {
100     SlicedString* slice = SlicedString::cast(subject_ptr);
101     subject_ptr = slice->parent();
102     slice_offset = slice->offset();
103   }
104   // Ensure that an underlying string has the same representation.
105   bool is_one_byte = subject_ptr->IsOneByteRepresentation();
106   DCHECK(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
107   // String is now either Sequential or External
108   int char_size_shift = is_one_byte ? 0 : 1;
109 
110   const byte* input_start =
111       StringCharacterPosition(subject_ptr, start_offset + slice_offset);
112   int byte_length = char_length << char_size_shift;
113   const byte* input_end = input_start + byte_length;
114   Result res = Execute(*regexp_code,
115                        *subject,
116                        start_offset,
117                        input_start,
118                        input_end,
119                        offsets_vector,
120                        offsets_vector_length,
121                        isolate);
122   return res;
123 }
124 
125 
Execute(Code * code,String * input,int start_offset,const byte * input_start,const byte * input_end,int * output,int output_size,Isolate * isolate)126 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
127     Code* code,
128     String* input,  // This needs to be the unpacked (sliced, cons) string.
129     int start_offset,
130     const byte* input_start,
131     const byte* input_end,
132     int* output,
133     int output_size,
134     Isolate* isolate) {
135   // Ensure that the minimum stack has been allocated.
136   RegExpStackScope stack_scope(isolate);
137   Address stack_base = stack_scope.stack()->stack_base();
138 
139   int direct_call = 0;
140   int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
141                                           input,
142                                           start_offset,
143                                           input_start,
144                                           input_end,
145                                           output,
146                                           output_size,
147                                           stack_base,
148                                           direct_call,
149                                           isolate);
150   DCHECK(result >= RETRY);
151 
152   if (result == EXCEPTION && !isolate->has_pending_exception()) {
153     // We detected a stack overflow (on the backtrack stack) in RegExp code,
154     // but haven't created the exception yet.
155     isolate->StackOverflow();
156   }
157   return static_cast<Result>(result);
158 }
159 
160 
161 const byte NativeRegExpMacroAssembler::word_character_map[] = {
162     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
163     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
164     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
165     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
166 
167     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
168     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
169     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
170     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
171 
172     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
173     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
174     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
175     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
176 
177     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
178     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
179     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
180     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
181     // Latin-1 range
182     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
183     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
184     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
185     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
186 
187     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
188     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
189     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
190     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
191 
192     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196 
197     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201 };
202 
203 
CaseInsensitiveCompareUC16(Address byte_offset1,Address byte_offset2,size_t byte_length,Isolate * isolate)204 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
205     Address byte_offset1,
206     Address byte_offset2,
207     size_t byte_length,
208     Isolate* isolate) {
209   unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
210       isolate->regexp_macro_assembler_canonicalize();
211   // This function is not allowed to cause a garbage collection.
212   // A GC might move the calling generated code and invalidate the
213   // return address on the stack.
214   DCHECK(byte_length % 2 == 0);
215   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
216   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
217   size_t length = byte_length >> 1;
218 
219   for (size_t i = 0; i < length; i++) {
220     unibrow::uchar c1 = substring1[i];
221     unibrow::uchar c2 = substring2[i];
222     if (c1 != c2) {
223       unibrow::uchar s1[1] = { c1 };
224       canonicalize->get(c1, '\0', s1);
225       if (s1[0] != c2) {
226         unibrow::uchar s2[1] = { c2 };
227         canonicalize->get(c2, '\0', s2);
228         if (s1[0] != s2[0]) {
229           return 0;
230         }
231       }
232     }
233   }
234   return 1;
235 }
236 
237 
GrowStack(Address stack_pointer,Address * stack_base,Isolate * isolate)238 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
239                                               Address* stack_base,
240                                               Isolate* isolate) {
241   RegExpStack* regexp_stack = isolate->regexp_stack();
242   size_t size = regexp_stack->stack_capacity();
243   Address old_stack_base = regexp_stack->stack_base();
244   DCHECK(old_stack_base == *stack_base);
245   DCHECK(stack_pointer <= old_stack_base);
246   DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
247   Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
248   if (new_stack_base == NULL) {
249     return NULL;
250   }
251   *stack_base = new_stack_base;
252   intptr_t stack_content_size = old_stack_base - stack_pointer;
253   return new_stack_base - stack_content_size;
254 }
255 
256 #endif  // V8_INTERPRETED_REGEXP
257 
258 } }  // namespace v8::internal
259