1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/v8.h"
6
7 #if V8_TARGET_ARCH_ARM64
8
9 #include "src/code-stubs.h"
10 #include "src/cpu-profiler.h"
11 #include "src/log.h"
12 #include "src/macro-assembler.h"
13 #include "src/regexp-macro-assembler.h"
14 #include "src/regexp-stack.h"
15 #include "src/unicode.h"
16
17 #include "src/arm64/regexp-macro-assembler-arm64.h"
18
19 namespace v8 {
20 namespace internal {
21
22 #ifndef V8_INTERPRETED_REGEXP
23 /*
24 * This assembler uses the following register assignment convention:
25 * - w19 : Used to temporarely store a value before a call to C code.
26 * See CheckNotBackReferenceIgnoreCase.
27 * - x20 : Pointer to the current code object (Code*),
28 * it includes the heap object tag.
29 * - w21 : Current position in input, as negative offset from
30 * the end of the string. Please notice that this is
31 * the byte offset, not the character offset!
32 * - w22 : Currently loaded character. Must be loaded using
33 * LoadCurrentCharacter before using any of the dispatch methods.
34 * - x23 : Points to tip of backtrack stack.
35 * - w24 : Position of the first character minus one: non_position_value.
36 * Used to initialize capture registers.
37 * - x25 : Address at the end of the input string: input_end.
38 * Points to byte after last character in input.
39 * - x26 : Address at the start of the input string: input_start.
40 * - w27 : Where to start in the input string.
41 * - x28 : Output array pointer.
42 * - x29/fp : Frame pointer. Used to access arguments, local variables and
43 * RegExp registers.
44 * - x16/x17 : IP registers, used by assembler. Very volatile.
45 * - csp : Points to tip of C stack.
46 *
47 * - x0-x7 : Used as a cache to store 32 bit capture registers. These
48 * registers need to be retained every time a call to C code
49 * is done.
50 *
51 * The remaining registers are free for computations.
52 * Each call to a public method should retain this convention.
53 *
54 * The stack will have the following structure:
55 *
56 * Location Name Description
57 * (as referred to in
58 * the code)
59 *
60 * - fp[104] isolate Address of the current isolate.
61 * - fp[96] return_address Secondary link/return address
62 * used by an exit frame if this is a
63 * native call.
64 * ^^^ csp when called ^^^
65 * - fp[88] lr Return from the RegExp code.
66 * - fp[80] r29 Old frame pointer (CalleeSaved).
67 * - fp[0..72] r19-r28 Backup of CalleeSaved registers.
68 * - fp[-8] direct_call 1 => Direct call from JavaScript code.
69 * 0 => Call through the runtime system.
70 * - fp[-16] stack_base High end of the memory area to use as
71 * the backtracking stack.
72 * - fp[-24] output_size Output may fit multiple sets of matches.
73 * - fp[-32] input Handle containing the input string.
74 * - fp[-40] success_counter
75 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
76 * - fp[-44] register N Capture registers initialized with
77 * - fp[-48] register N + 1 non_position_value.
78 * ... The first kNumCachedRegisters (N) registers
79 * ... are cached in x0 to x7.
80 * ... Only positions must be stored in the first
81 * - ... num_saved_registers_ registers.
82 * - ...
83 * - register N + num_registers - 1
84 * ^^^^^^^^^ csp ^^^^^^^^^
85 *
86 * The first num_saved_registers_ registers are initialized to point to
87 * "character -1" in the string (i.e., char_size() bytes before the first
88 * character of the string). The remaining registers start out as garbage.
89 *
90 * The data up to the return address must be placed there by the calling
91 * code and the remaining arguments are passed in registers, e.g. by calling the
92 * code entry as cast to a function with the signature:
93 * int (*match)(String* input,
94 * int start_offset,
95 * Address input_start,
96 * Address input_end,
97 * int* output,
98 * int output_size,
99 * Address stack_base,
100 * bool direct_call = false,
101 * Address secondary_return_address, // Only used by native call.
102 * Isolate* isolate)
103 * The call is performed by NativeRegExpMacroAssembler::Execute()
104 * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
105 * in arm64/simulator-arm64.h.
106 * When calling as a non-direct call (i.e., from C++ code), the return address
107 * area is overwritten with the LR register by the RegExp code. When doing a
108 * direct call from generated code, the return address is placed there by
109 * the calling code, as in a normal exit frame.
110 */
111
112 #define __ ACCESS_MASM(masm_)
113
RegExpMacroAssemblerARM64(Mode mode,int registers_to_save,Zone * zone)114 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(
115 Mode mode,
116 int registers_to_save,
117 Zone* zone)
118 : NativeRegExpMacroAssembler(zone),
119 masm_(new MacroAssembler(zone->isolate(), NULL, kRegExpCodeSize)),
120 mode_(mode),
121 num_registers_(registers_to_save),
122 num_saved_registers_(registers_to_save),
123 entry_label_(),
124 start_label_(),
125 success_label_(),
126 backtrack_label_(),
127 exit_label_() {
128 __ SetStackPointer(csp);
129 DCHECK_EQ(0, registers_to_save % 2);
130 // We can cache at most 16 W registers in x0-x7.
131 STATIC_ASSERT(kNumCachedRegisters <= 16);
132 STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
133 __ B(&entry_label_); // We'll write the entry code later.
134 __ Bind(&start_label_); // And then continue from here.
135 }
136
137
~RegExpMacroAssemblerARM64()138 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
139 delete masm_;
140 // Unuse labels in case we throw away the assembler without calling GetCode.
141 entry_label_.Unuse();
142 start_label_.Unuse();
143 success_label_.Unuse();
144 backtrack_label_.Unuse();
145 exit_label_.Unuse();
146 check_preempt_label_.Unuse();
147 stack_overflow_label_.Unuse();
148 }
149
stack_limit_slack()150 int RegExpMacroAssemblerARM64::stack_limit_slack() {
151 return RegExpStack::kStackLimitSlack;
152 }
153
154
AdvanceCurrentPosition(int by)155 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
156 if (by != 0) {
157 __ Add(current_input_offset(),
158 current_input_offset(), by * char_size());
159 }
160 }
161
162
AdvanceRegister(int reg,int by)163 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
164 DCHECK((reg >= 0) && (reg < num_registers_));
165 if (by != 0) {
166 Register to_advance;
167 RegisterState register_state = GetRegisterState(reg);
168 switch (register_state) {
169 case STACKED:
170 __ Ldr(w10, register_location(reg));
171 __ Add(w10, w10, by);
172 __ Str(w10, register_location(reg));
173 break;
174 case CACHED_LSW:
175 to_advance = GetCachedRegister(reg);
176 __ Add(to_advance, to_advance, by);
177 break;
178 case CACHED_MSW:
179 to_advance = GetCachedRegister(reg);
180 __ Add(to_advance, to_advance,
181 static_cast<int64_t>(by) << kWRegSizeInBits);
182 break;
183 default:
184 UNREACHABLE();
185 break;
186 }
187 }
188 }
189
190
Backtrack()191 void RegExpMacroAssemblerARM64::Backtrack() {
192 CheckPreemption();
193 Pop(w10);
194 __ Add(x10, code_pointer(), Operand(w10, UXTW));
195 __ Br(x10);
196 }
197
198
Bind(Label * label)199 void RegExpMacroAssemblerARM64::Bind(Label* label) {
200 __ Bind(label);
201 }
202
203
CheckCharacter(uint32_t c,Label * on_equal)204 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
205 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
206 }
207
208
CheckCharacterGT(uc16 limit,Label * on_greater)209 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
210 Label* on_greater) {
211 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
212 }
213
214
CheckAtStart(Label * on_at_start)215 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) {
216 Label not_at_start;
217 // Did we start the match at the start of the input string?
218 CompareAndBranchOrBacktrack(start_offset(), 0, ne, ¬_at_start);
219 // If we did, are we still at the start of the input string?
220 __ Add(x10, input_end(), Operand(current_input_offset(), SXTW));
221 __ Cmp(x10, input_start());
222 BranchOrBacktrack(eq, on_at_start);
223 __ Bind(¬_at_start);
224 }
225
226
CheckNotAtStart(Label * on_not_at_start)227 void RegExpMacroAssemblerARM64::CheckNotAtStart(Label* on_not_at_start) {
228 // Did we start the match at the start of the input string?
229 CompareAndBranchOrBacktrack(start_offset(), 0, ne, on_not_at_start);
230 // If we did, are we still at the start of the input string?
231 __ Add(x10, input_end(), Operand(current_input_offset(), SXTW));
232 __ Cmp(x10, input_start());
233 BranchOrBacktrack(ne, on_not_at_start);
234 }
235
236
CheckCharacterLT(uc16 limit,Label * on_less)237 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) {
238 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
239 }
240
241
CheckCharacters(Vector<const uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)242 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str,
243 int cp_offset,
244 Label* on_failure,
245 bool check_end_of_string) {
246 // This method is only ever called from the cctests.
247
248 if (check_end_of_string) {
249 // Is last character of required match inside string.
250 CheckPosition(cp_offset + str.length() - 1, on_failure);
251 }
252
253 Register characters_address = x11;
254
255 __ Add(characters_address,
256 input_end(),
257 Operand(current_input_offset(), SXTW));
258 if (cp_offset != 0) {
259 __ Add(characters_address, characters_address, cp_offset * char_size());
260 }
261
262 for (int i = 0; i < str.length(); i++) {
263 if (mode_ == LATIN1) {
264 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
265 DCHECK(str[i] <= String::kMaxOneByteCharCode);
266 } else {
267 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
268 }
269 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
270 }
271 }
272
273
CheckGreedyLoop(Label * on_equal)274 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
275 __ Ldr(w10, MemOperand(backtrack_stackpointer()));
276 __ Cmp(current_input_offset(), w10);
277 __ Cset(x11, eq);
278 __ Add(backtrack_stackpointer(),
279 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
280 BranchOrBacktrack(eq, on_equal);
281 }
282
CheckNotBackReferenceIgnoreCase(int start_reg,Label * on_no_match)283 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
284 int start_reg,
285 Label* on_no_match) {
286 Label fallthrough;
287
288 Register capture_start_offset = w10;
289 // Save the capture length in a callee-saved register so it will
290 // be preserved if we call a C helper.
291 Register capture_length = w19;
292 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
293
294 // Find length of back-referenced capture.
295 DCHECK((start_reg % 2) == 0);
296 if (start_reg < kNumCachedRegisters) {
297 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
298 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
299 } else {
300 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
301 }
302 __ Sub(capture_length, w11, capture_start_offset); // Length to check.
303 // Succeed on empty capture (including no capture).
304 __ Cbz(capture_length, &fallthrough);
305
306 // Check that there are enough characters left in the input.
307 __ Cmn(capture_length, current_input_offset());
308 BranchOrBacktrack(gt, on_no_match);
309
310 if (mode_ == LATIN1) {
311 Label success;
312 Label fail;
313 Label loop_check;
314
315 Register capture_start_address = x12;
316 Register capture_end_addresss = x13;
317 Register current_position_address = x14;
318
319 __ Add(capture_start_address,
320 input_end(),
321 Operand(capture_start_offset, SXTW));
322 __ Add(capture_end_addresss,
323 capture_start_address,
324 Operand(capture_length, SXTW));
325 __ Add(current_position_address,
326 input_end(),
327 Operand(current_input_offset(), SXTW));
328
329 Label loop;
330 __ Bind(&loop);
331 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
332 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
333 __ Cmp(w10, w11);
334 __ B(eq, &loop_check);
335
336 // Mismatch, try case-insensitive match (converting letters to lower-case).
337 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case.
338 __ Orr(w11, w11, 0x20); // Also convert input character.
339 __ Cmp(w11, w10);
340 __ B(ne, &fail);
341 __ Sub(w10, w10, 'a');
342 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter?
343 __ B(ls, &loop_check); // In range 'a'-'z'.
344 // Latin-1: Check for values in range [224,254] but not 247.
345 __ Sub(w10, w10, 224 - 'a');
346 __ Cmp(w10, 254 - 224);
347 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247.
348 __ B(eq, &fail); // Weren't Latin-1 letters.
349
350 __ Bind(&loop_check);
351 __ Cmp(capture_start_address, capture_end_addresss);
352 __ B(lt, &loop);
353 __ B(&success);
354
355 __ Bind(&fail);
356 BranchOrBacktrack(al, on_no_match);
357
358 __ Bind(&success);
359 // Compute new value of character position after the matched part.
360 __ Sub(current_input_offset().X(), current_position_address, input_end());
361 if (masm_->emit_debug_code()) {
362 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
363 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
364 // The current input offset should be <= 0, and fit in a W register.
365 __ Check(le, kOffsetOutOfRange);
366 }
367 } else {
368 DCHECK(mode_ == UC16);
369 int argument_count = 4;
370
371 // The cached registers need to be retained.
372 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
373 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
374 __ PushCPURegList(cached_registers);
375
376 // Put arguments into arguments registers.
377 // Parameters are
378 // x0: Address byte_offset1 - Address captured substring's start.
379 // x1: Address byte_offset2 - Address of current character position.
380 // w2: size_t byte_length - length of capture in bytes(!)
381 // x3: Isolate* isolate
382
383 // Address of start of capture.
384 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
385 // Length of capture.
386 __ Mov(w2, capture_length);
387 // Address of current input position.
388 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
389 // Isolate.
390 __ Mov(x3, ExternalReference::isolate_address(isolate()));
391
392 {
393 AllowExternalCallThatCantCauseGC scope(masm_);
394 ExternalReference function =
395 ExternalReference::re_case_insensitive_compare_uc16(isolate());
396 __ CallCFunction(function, argument_count);
397 }
398
399 // Check if function returned non-zero for success or zero for failure.
400 // x0 is one of the registers used as a cache so it must be tested before
401 // the cache is restored.
402 __ Cmp(x0, 0);
403 __ PopCPURegList(cached_registers);
404 BranchOrBacktrack(eq, on_no_match);
405
406 // On success, increment position by length of capture.
407 __ Add(current_input_offset(), current_input_offset(), capture_length);
408 }
409
410 __ Bind(&fallthrough);
411 }
412
CheckNotBackReference(int start_reg,Label * on_no_match)413 void RegExpMacroAssemblerARM64::CheckNotBackReference(
414 int start_reg,
415 Label* on_no_match) {
416 Label fallthrough;
417
418 Register capture_start_address = x12;
419 Register capture_end_address = x13;
420 Register current_position_address = x14;
421 Register capture_length = w15;
422
423 // Find length of back-referenced capture.
424 DCHECK((start_reg % 2) == 0);
425 if (start_reg < kNumCachedRegisters) {
426 __ Mov(x10, GetCachedRegister(start_reg));
427 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
428 } else {
429 __ Ldp(w11, w10, capture_location(start_reg, x10));
430 }
431 __ Sub(capture_length, w11, w10); // Length to check.
432 // Succeed on empty capture (including no capture).
433 __ Cbz(capture_length, &fallthrough);
434
435 // Check that there are enough characters left in the input.
436 __ Cmn(capture_length, current_input_offset());
437 BranchOrBacktrack(gt, on_no_match);
438
439 // Compute pointers to match string and capture string
440 __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
441 __ Add(capture_end_address,
442 capture_start_address,
443 Operand(capture_length, SXTW));
444 __ Add(current_position_address,
445 input_end(),
446 Operand(current_input_offset(), SXTW));
447
448 Label loop;
449 __ Bind(&loop);
450 if (mode_ == LATIN1) {
451 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
452 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
453 } else {
454 DCHECK(mode_ == UC16);
455 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
456 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
457 }
458 __ Cmp(w10, w11);
459 BranchOrBacktrack(ne, on_no_match);
460 __ Cmp(capture_start_address, capture_end_address);
461 __ B(lt, &loop);
462
463 // Move current character position to position after match.
464 __ Sub(current_input_offset().X(), current_position_address, input_end());
465 if (masm_->emit_debug_code()) {
466 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
467 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
468 // The current input offset should be <= 0, and fit in a W register.
469 __ Check(le, kOffsetOutOfRange);
470 }
471 __ Bind(&fallthrough);
472 }
473
474
CheckNotCharacter(unsigned c,Label * on_not_equal)475 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
476 Label* on_not_equal) {
477 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
478 }
479
480
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)481 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
482 uint32_t mask,
483 Label* on_equal) {
484 __ And(w10, current_character(), mask);
485 CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
486 }
487
488
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)489 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
490 unsigned mask,
491 Label* on_not_equal) {
492 __ And(w10, current_character(), mask);
493 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
494 }
495
496
CheckNotCharacterAfterMinusAnd(uc16 c,uc16 minus,uc16 mask,Label * on_not_equal)497 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
498 uc16 c,
499 uc16 minus,
500 uc16 mask,
501 Label* on_not_equal) {
502 DCHECK(minus < String::kMaxUtf16CodeUnit);
503 __ Sub(w10, current_character(), minus);
504 __ And(w10, w10, mask);
505 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
506 }
507
508
CheckCharacterInRange(uc16 from,uc16 to,Label * on_in_range)509 void RegExpMacroAssemblerARM64::CheckCharacterInRange(
510 uc16 from,
511 uc16 to,
512 Label* on_in_range) {
513 __ Sub(w10, current_character(), from);
514 // Unsigned lower-or-same condition.
515 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
516 }
517
518
CheckCharacterNotInRange(uc16 from,uc16 to,Label * on_not_in_range)519 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
520 uc16 from,
521 uc16 to,
522 Label* on_not_in_range) {
523 __ Sub(w10, current_character(), from);
524 // Unsigned higher condition.
525 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
526 }
527
528
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)529 void RegExpMacroAssemblerARM64::CheckBitInTable(
530 Handle<ByteArray> table,
531 Label* on_bit_set) {
532 __ Mov(x11, Operand(table));
533 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
534 __ And(w10, current_character(), kTableMask);
535 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
536 } else {
537 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
538 }
539 __ Ldrb(w11, MemOperand(x11, w10, UXTW));
540 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
541 }
542
543
CheckSpecialCharacterClass(uc16 type,Label * on_no_match)544 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
545 Label* on_no_match) {
546 // Range checks (c in min..max) are generally implemented by an unsigned
547 // (c - min) <= (max - min) check
548 switch (type) {
549 case 's':
550 // Match space-characters
551 if (mode_ == LATIN1) {
552 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
553 Label success;
554 // Check for ' ' or 0x00a0.
555 __ Cmp(current_character(), ' ');
556 __ Ccmp(current_character(), 0x00a0, ZFlag, ne);
557 __ B(eq, &success);
558 // Check range 0x09..0x0d.
559 __ Sub(w10, current_character(), '\t');
560 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
561 __ Bind(&success);
562 return true;
563 }
564 return false;
565 case 'S':
566 // The emitted code for generic character classes is good enough.
567 return false;
568 case 'd':
569 // Match ASCII digits ('0'..'9').
570 __ Sub(w10, current_character(), '0');
571 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
572 return true;
573 case 'D':
574 // Match ASCII non-digits.
575 __ Sub(w10, current_character(), '0');
576 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
577 return true;
578 case '.': {
579 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
580 // Here we emit the conditional branch only once at the end to make branch
581 // prediction more efficient, even though we could branch out of here
582 // as soon as a character matches.
583 __ Cmp(current_character(), 0x0a);
584 __ Ccmp(current_character(), 0x0d, ZFlag, ne);
585 if (mode_ == UC16) {
586 __ Sub(w10, current_character(), 0x2028);
587 // If the Z flag was set we clear the flags to force a branch.
588 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
589 // ls -> !((C==1) && (Z==0))
590 BranchOrBacktrack(ls, on_no_match);
591 } else {
592 BranchOrBacktrack(eq, on_no_match);
593 }
594 return true;
595 }
596 case 'n': {
597 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
598 // We have to check all 4 newline characters before emitting
599 // the conditional branch.
600 __ Cmp(current_character(), 0x0a);
601 __ Ccmp(current_character(), 0x0d, ZFlag, ne);
602 if (mode_ == UC16) {
603 __ Sub(w10, current_character(), 0x2028);
604 // If the Z flag was set we clear the flags to force a fall-through.
605 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
606 // hi -> (C==1) && (Z==0)
607 BranchOrBacktrack(hi, on_no_match);
608 } else {
609 BranchOrBacktrack(ne, on_no_match);
610 }
611 return true;
612 }
613 case 'w': {
614 if (mode_ != LATIN1) {
615 // Table is 256 entries, so all Latin1 characters can be tested.
616 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
617 }
618 ExternalReference map = ExternalReference::re_word_character_map();
619 __ Mov(x10, map);
620 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
621 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
622 return true;
623 }
624 case 'W': {
625 Label done;
626 if (mode_ != LATIN1) {
627 // Table is 256 entries, so all Latin1 characters can be tested.
628 __ Cmp(current_character(), 'z');
629 __ B(hi, &done);
630 }
631 ExternalReference map = ExternalReference::re_word_character_map();
632 __ Mov(x10, map);
633 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
634 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
635 __ Bind(&done);
636 return true;
637 }
638 case '*':
639 // Match any character.
640 return true;
641 // No custom implementation (yet): s(UC16), S(UC16).
642 default:
643 return false;
644 }
645 }
646
647
Fail()648 void RegExpMacroAssemblerARM64::Fail() {
649 __ Mov(w0, FAILURE);
650 __ B(&exit_label_);
651 }
652
653
GetCode(Handle<String> source)654 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
655 Label return_w0;
656 // Finalize code - write the entry point code now we know how many
657 // registers we need.
658
659 // Entry code:
660 __ Bind(&entry_label_);
661
662 // Arguments on entry:
663 // x0: String* input
664 // x1: int start_offset
665 // x2: byte* input_start
666 // x3: byte* input_end
667 // x4: int* output array
668 // x5: int output array size
669 // x6: Address stack_base
670 // x7: int direct_call
671
672 // The stack pointer should be csp on entry.
673 // csp[8]: address of the current isolate
674 // csp[0]: secondary link/return address used by native call
675
676 // Tell the system that we have a stack frame. Because the type is MANUAL, no
677 // code is generated.
678 FrameScope scope(masm_, StackFrame::MANUAL);
679
680 // Push registers on the stack, only push the argument registers that we need.
681 CPURegList argument_registers(x0, x5, x6, x7);
682
683 CPURegList registers_to_retain = kCalleeSaved;
684 DCHECK(kCalleeSaved.Count() == 11);
685 registers_to_retain.Combine(lr);
686
687 DCHECK(csp.Is(__ StackPointer()));
688 __ PushCPURegList(registers_to_retain);
689 __ PushCPURegList(argument_registers);
690
691 // Set frame pointer in place.
692 __ Add(frame_pointer(), csp, argument_registers.Count() * kPointerSize);
693
694 // Initialize callee-saved registers.
695 __ Mov(start_offset(), w1);
696 __ Mov(input_start(), x2);
697 __ Mov(input_end(), x3);
698 __ Mov(output_array(), x4);
699
700 // Set the number of registers we will need to allocate, that is:
701 // - success_counter (X register)
702 // - (num_registers_ - kNumCachedRegisters) (W registers)
703 int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters;
704 // Do not allocate registers on the stack if they can all be cached.
705 if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; }
706 // Make room for the success_counter.
707 num_wreg_to_allocate += 2;
708
709 // Make sure the stack alignment will be respected.
710 int alignment = masm_->ActivationFrameAlignment();
711 DCHECK_EQ(alignment % 16, 0);
712 int align_mask = (alignment / kWRegSize) - 1;
713 num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask;
714
715 // Check if we have space on the stack.
716 Label stack_limit_hit;
717 Label stack_ok;
718
719 ExternalReference stack_limit =
720 ExternalReference::address_of_stack_limit(isolate());
721 __ Mov(x10, stack_limit);
722 __ Ldr(x10, MemOperand(x10));
723 __ Subs(x10, csp, x10);
724
725 // Handle it if the stack pointer is already below the stack limit.
726 __ B(ls, &stack_limit_hit);
727
728 // Check if there is room for the variable number of registers above
729 // the stack limit.
730 __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
731 __ B(hs, &stack_ok);
732
733 // Exit with OutOfMemory exception. There is not enough space on the stack
734 // for our working registers.
735 __ Mov(w0, EXCEPTION);
736 __ B(&return_w0);
737
738 __ Bind(&stack_limit_hit);
739 CallCheckStackGuardState(x10);
740 // If returned value is non-zero, we exit with the returned value as result.
741 __ Cbnz(w0, &return_w0);
742
743 __ Bind(&stack_ok);
744
745 // Allocate space on stack.
746 __ Claim(num_wreg_to_allocate, kWRegSize);
747
748 // Initialize success_counter with 0.
749 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
750
751 // Find negative length (offset of start relative to end).
752 __ Sub(x10, input_start(), input_end());
753 if (masm_->emit_debug_code()) {
754 // Check that the input string length is < 2^30.
755 __ Neg(x11, x10);
756 __ Cmp(x11, (1<<30) - 1);
757 __ Check(ls, kInputStringTooLong);
758 }
759 __ Mov(current_input_offset(), w10);
760
761 // The non-position value is used as a clearing value for the
762 // capture registers, it corresponds to the position of the first character
763 // minus one.
764 __ Sub(non_position_value(), current_input_offset(), char_size());
765 __ Sub(non_position_value(), non_position_value(),
766 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
767 // We can store this value twice in an X register for initializing
768 // on-stack registers later.
769 __ Orr(twice_non_position_value(),
770 non_position_value().X(),
771 Operand(non_position_value().X(), LSL, kWRegSizeInBits));
772
773 // Initialize code pointer register.
774 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
775
776 Label load_char_start_regexp, start_regexp;
777 // Load newline if index is at start, previous character otherwise.
778 __ Cbnz(start_offset(), &load_char_start_regexp);
779 __ Mov(current_character(), '\n');
780 __ B(&start_regexp);
781
782 // Global regexp restarts matching here.
783 __ Bind(&load_char_start_regexp);
784 // Load previous char as initial value of current character register.
785 LoadCurrentCharacterUnchecked(-1, 1);
786 __ Bind(&start_regexp);
787 // Initialize on-stack registers.
788 if (num_saved_registers_ > 0) {
789 ClearRegisters(0, num_saved_registers_ - 1);
790 }
791
792 // Initialize backtrack stack pointer.
793 __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase));
794
795 // Execute
796 __ B(&start_label_);
797
798 if (backtrack_label_.is_linked()) {
799 __ Bind(&backtrack_label_);
800 Backtrack();
801 }
802
803 if (success_label_.is_linked()) {
804 Register first_capture_start = w15;
805
806 // Save captures when successful.
807 __ Bind(&success_label_);
808
809 if (num_saved_registers_ > 0) {
810 // V8 expects the output to be an int32_t array.
811 Register capture_start = w12;
812 Register capture_end = w13;
813 Register input_length = w14;
814
815 // Copy captures to output.
816
817 // Get string length.
818 __ Sub(x10, input_end(), input_start());
819 if (masm_->emit_debug_code()) {
820 // Check that the input string length is < 2^30.
821 __ Cmp(x10, (1<<30) - 1);
822 __ Check(ls, kInputStringTooLong);
823 }
824 // input_start has a start_offset offset on entry. We need to include
825 // it when computing the length of the whole string.
826 if (mode_ == UC16) {
827 __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
828 } else {
829 __ Add(input_length, start_offset(), w10);
830 }
831
832 // Copy the results to the output array from the cached registers first.
833 for (int i = 0;
834 (i < num_saved_registers_) && (i < kNumCachedRegisters);
835 i += 2) {
836 __ Mov(capture_start.X(), GetCachedRegister(i));
837 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
838 if ((i == 0) && global_with_zero_length_check()) {
839 // Keep capture start for the zero-length check later.
840 __ Mov(first_capture_start, capture_start);
841 }
842 // Offsets need to be relative to the start of the string.
843 if (mode_ == UC16) {
844 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
845 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
846 } else {
847 __ Add(capture_start, input_length, capture_start);
848 __ Add(capture_end, input_length, capture_end);
849 }
850 // The output pointer advances for a possible global match.
851 __ Stp(capture_start,
852 capture_end,
853 MemOperand(output_array(), kPointerSize, PostIndex));
854 }
855
856 // Only carry on if there are more than kNumCachedRegisters capture
857 // registers.
858 int num_registers_left_on_stack =
859 num_saved_registers_ - kNumCachedRegisters;
860 if (num_registers_left_on_stack > 0) {
861 Register base = x10;
862 // There are always an even number of capture registers. A couple of
863 // registers determine one match with two offsets.
864 DCHECK_EQ(0, num_registers_left_on_stack % 2);
865 __ Add(base, frame_pointer(), kFirstCaptureOnStack);
866
867 // We can unroll the loop here, we should not unroll for less than 2
868 // registers.
869 STATIC_ASSERT(kNumRegistersToUnroll > 2);
870 if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
871 for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
872 __ Ldp(capture_end,
873 capture_start,
874 MemOperand(base, -kPointerSize, PostIndex));
875 if ((i == 0) && global_with_zero_length_check()) {
876 // Keep capture start for the zero-length check later.
877 __ Mov(first_capture_start, capture_start);
878 }
879 // Offsets need to be relative to the start of the string.
880 if (mode_ == UC16) {
881 __ Add(capture_start,
882 input_length,
883 Operand(capture_start, ASR, 1));
884 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
885 } else {
886 __ Add(capture_start, input_length, capture_start);
887 __ Add(capture_end, input_length, capture_end);
888 }
889 // The output pointer advances for a possible global match.
890 __ Stp(capture_start,
891 capture_end,
892 MemOperand(output_array(), kPointerSize, PostIndex));
893 }
894 } else {
895 Label loop, start;
896 __ Mov(x11, num_registers_left_on_stack);
897
898 __ Ldp(capture_end,
899 capture_start,
900 MemOperand(base, -kPointerSize, PostIndex));
901 if (global_with_zero_length_check()) {
902 __ Mov(first_capture_start, capture_start);
903 }
904 __ B(&start);
905
906 __ Bind(&loop);
907 __ Ldp(capture_end,
908 capture_start,
909 MemOperand(base, -kPointerSize, PostIndex));
910 __ Bind(&start);
911 if (mode_ == UC16) {
912 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
913 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
914 } else {
915 __ Add(capture_start, input_length, capture_start);
916 __ Add(capture_end, input_length, capture_end);
917 }
918 // The output pointer advances for a possible global match.
919 __ Stp(capture_start,
920 capture_end,
921 MemOperand(output_array(), kPointerSize, PostIndex));
922 __ Sub(x11, x11, 2);
923 __ Cbnz(x11, &loop);
924 }
925 }
926 }
927
928 if (global()) {
929 Register success_counter = w0;
930 Register output_size = x10;
931 // Restart matching if the regular expression is flagged as global.
932
933 // Increment success counter.
934 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
935 __ Add(success_counter, success_counter, 1);
936 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
937
938 // Capture results have been stored, so the number of remaining global
939 // output registers is reduced by the number of stored captures.
940 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
941 __ Sub(output_size, output_size, num_saved_registers_);
942 // Check whether we have enough room for another set of capture results.
943 __ Cmp(output_size, num_saved_registers_);
944 __ B(lt, &return_w0);
945
946 // The output pointer is already set to the next field in the output
947 // array.
948 // Update output size on the frame before we restart matching.
949 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
950
951 if (global_with_zero_length_check()) {
952 // Special case for zero-length matches.
953 __ Cmp(current_input_offset(), first_capture_start);
954 // Not a zero-length match, restart.
955 __ B(ne, &load_char_start_regexp);
956 // Offset from the end is zero if we already reached the end.
957 __ Cbz(current_input_offset(), &return_w0);
958 // Advance current position after a zero-length match.
959 __ Add(current_input_offset(),
960 current_input_offset(),
961 Operand((mode_ == UC16) ? 2 : 1));
962 }
963
964 __ B(&load_char_start_regexp);
965 } else {
966 __ Mov(w0, SUCCESS);
967 }
968 }
969
970 if (exit_label_.is_linked()) {
971 // Exit and return w0
972 __ Bind(&exit_label_);
973 if (global()) {
974 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
975 }
976 }
977
978 __ Bind(&return_w0);
979
980 // Set stack pointer back to first register to retain
981 DCHECK(csp.Is(__ StackPointer()));
982 __ Mov(csp, fp);
983 __ AssertStackConsistency();
984
985 // Restore registers.
986 __ PopCPURegList(registers_to_retain);
987
988 __ Ret();
989
990 Label exit_with_exception;
991 // Registers x0 to x7 are used to store the first captures, they need to be
992 // retained over calls to C++ code.
993 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
994 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
995
996 if (check_preempt_label_.is_linked()) {
997 __ Bind(&check_preempt_label_);
998 SaveLinkRegister();
999 // The cached registers need to be retained.
1000 __ PushCPURegList(cached_registers);
1001 CallCheckStackGuardState(x10);
1002 // Returning from the regexp code restores the stack (csp <- fp)
1003 // so we don't need to drop the link register from it before exiting.
1004 __ Cbnz(w0, &return_w0);
1005 // Reset the cached registers.
1006 __ PopCPURegList(cached_registers);
1007 RestoreLinkRegister();
1008 __ Ret();
1009 }
1010
1011 if (stack_overflow_label_.is_linked()) {
1012 __ Bind(&stack_overflow_label_);
1013 SaveLinkRegister();
1014 // The cached registers need to be retained.
1015 __ PushCPURegList(cached_registers);
1016 // Call GrowStack(backtrack_stackpointer(), &stack_base)
1017 __ Mov(x2, ExternalReference::isolate_address(isolate()));
1018 __ Add(x1, frame_pointer(), kStackBase);
1019 __ Mov(x0, backtrack_stackpointer());
1020 ExternalReference grow_stack =
1021 ExternalReference::re_grow_stack(isolate());
1022 __ CallCFunction(grow_stack, 3);
1023 // If return NULL, we have failed to grow the stack, and
1024 // must exit with a stack-overflow exception.
1025 // Returning from the regexp code restores the stack (csp <- fp)
1026 // so we don't need to drop the link register from it before exiting.
1027 __ Cbz(w0, &exit_with_exception);
1028 // Otherwise use return value as new stack pointer.
1029 __ Mov(backtrack_stackpointer(), x0);
1030 // Reset the cached registers.
1031 __ PopCPURegList(cached_registers);
1032 RestoreLinkRegister();
1033 __ Ret();
1034 }
1035
1036 if (exit_with_exception.is_linked()) {
1037 __ Bind(&exit_with_exception);
1038 __ Mov(w0, EXCEPTION);
1039 __ B(&return_w0);
1040 }
1041
1042 CodeDesc code_desc;
1043 masm_->GetCode(&code_desc);
1044 Handle<Code> code = isolate()->factory()->NewCode(
1045 code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
1046 PROFILE(masm_->isolate(), RegExpCodeCreateEvent(*code, *source));
1047 return Handle<HeapObject>::cast(code);
1048 }
1049
1050
GoTo(Label * to)1051 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1052 BranchOrBacktrack(al, to);
1053 }
1054
IfRegisterGE(int reg,int comparand,Label * if_ge)1055 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1056 Label* if_ge) {
1057 Register to_compare = GetRegister(reg, w10);
1058 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1059 }
1060
1061
IfRegisterLT(int reg,int comparand,Label * if_lt)1062 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1063 Label* if_lt) {
1064 Register to_compare = GetRegister(reg, w10);
1065 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1066 }
1067
1068
IfRegisterEqPos(int reg,Label * if_eq)1069 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1070 Register to_compare = GetRegister(reg, w10);
1071 __ Cmp(to_compare, current_input_offset());
1072 BranchOrBacktrack(eq, if_eq);
1073 }
1074
1075 RegExpMacroAssembler::IrregexpImplementation
Implementation()1076 RegExpMacroAssemblerARM64::Implementation() {
1077 return kARM64Implementation;
1078 }
1079
1080
LoadCurrentCharacter(int cp_offset,Label * on_end_of_input,bool check_bounds,int characters)1081 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
1082 Label* on_end_of_input,
1083 bool check_bounds,
1084 int characters) {
1085 // TODO(pielan): Make sure long strings are caught before this, and not
1086 // just asserted in debug mode.
1087 DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
1088 // Be sane! (And ensure that an int32_t can be used to index the string)
1089 DCHECK(cp_offset < (1<<30));
1090 if (check_bounds) {
1091 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1092 }
1093 LoadCurrentCharacterUnchecked(cp_offset, characters);
1094 }
1095
1096
PopCurrentPosition()1097 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1098 Pop(current_input_offset());
1099 }
1100
1101
PopRegister(int register_index)1102 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1103 Pop(w10);
1104 StoreRegister(register_index, w10);
1105 }
1106
1107
PushBacktrack(Label * label)1108 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1109 if (label->is_bound()) {
1110 int target = label->pos();
1111 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1112 } else {
1113 __ Adr(x10, label, MacroAssembler::kAdrFar);
1114 __ Sub(x10, x10, code_pointer());
1115 if (masm_->emit_debug_code()) {
1116 __ Cmp(x10, kWRegMask);
1117 // The code offset has to fit in a W register.
1118 __ Check(ls, kOffsetOutOfRange);
1119 }
1120 }
1121 Push(w10);
1122 CheckStackLimit();
1123 }
1124
1125
PushCurrentPosition()1126 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1127 Push(current_input_offset());
1128 }
1129
1130
PushRegister(int register_index,StackCheckFlag check_stack_limit)1131 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1132 StackCheckFlag check_stack_limit) {
1133 Register to_push = GetRegister(register_index, w10);
1134 Push(to_push);
1135 if (check_stack_limit) CheckStackLimit();
1136 }
1137
1138
ReadCurrentPositionFromRegister(int reg)1139 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1140 Register cached_register;
1141 RegisterState register_state = GetRegisterState(reg);
1142 switch (register_state) {
1143 case STACKED:
1144 __ Ldr(current_input_offset(), register_location(reg));
1145 break;
1146 case CACHED_LSW:
1147 cached_register = GetCachedRegister(reg);
1148 __ Mov(current_input_offset(), cached_register.W());
1149 break;
1150 case CACHED_MSW:
1151 cached_register = GetCachedRegister(reg);
1152 __ Lsr(current_input_offset().X(), cached_register, kWRegSizeInBits);
1153 break;
1154 default:
1155 UNREACHABLE();
1156 break;
1157 }
1158 }
1159
1160
ReadStackPointerFromRegister(int reg)1161 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1162 Register read_from = GetRegister(reg, w10);
1163 __ Ldr(x11, MemOperand(frame_pointer(), kStackBase));
1164 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1165 }
1166
1167
SetCurrentPositionFromEnd(int by)1168 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1169 Label after_position;
1170 __ Cmp(current_input_offset(), -by * char_size());
1171 __ B(ge, &after_position);
1172 __ Mov(current_input_offset(), -by * char_size());
1173 // On RegExp code entry (where this operation is used), the character before
1174 // the current position is expected to be already loaded.
1175 // We have advanced the position, so it's safe to read backwards.
1176 LoadCurrentCharacterUnchecked(-1, 1);
1177 __ Bind(&after_position);
1178 }
1179
1180
SetRegister(int register_index,int to)1181 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1182 DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1183 Register set_to = wzr;
1184 if (to != 0) {
1185 set_to = w10;
1186 __ Mov(set_to, to);
1187 }
1188 StoreRegister(register_index, set_to);
1189 }
1190
1191
Succeed()1192 bool RegExpMacroAssemblerARM64::Succeed() {
1193 __ B(&success_label_);
1194 return global();
1195 }
1196
1197
WriteCurrentPositionToRegister(int reg,int cp_offset)1198 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1199 int cp_offset) {
1200 Register position = current_input_offset();
1201 if (cp_offset != 0) {
1202 position = w10;
1203 __ Add(position, current_input_offset(), cp_offset * char_size());
1204 }
1205 StoreRegister(reg, position);
1206 }
1207
1208
ClearRegisters(int reg_from,int reg_to)1209 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1210 DCHECK(reg_from <= reg_to);
1211 int num_registers = reg_to - reg_from + 1;
1212
1213 // If the first capture register is cached in a hardware register but not
1214 // aligned on a 64-bit one, we need to clear the first one specifically.
1215 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1216 StoreRegister(reg_from, non_position_value());
1217 num_registers--;
1218 reg_from++;
1219 }
1220
1221 // Clear cached registers in pairs as far as possible.
1222 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1223 DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1224 __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1225 reg_from += 2;
1226 num_registers -= 2;
1227 }
1228
1229 if ((num_registers % 2) == 1) {
1230 StoreRegister(reg_from, non_position_value());
1231 num_registers--;
1232 reg_from++;
1233 }
1234
1235 if (num_registers > 0) {
1236 // If there are some remaining registers, they are stored on the stack.
1237 DCHECK(reg_from >= kNumCachedRegisters);
1238
1239 // Move down the indexes of the registers on stack to get the correct offset
1240 // in memory.
1241 reg_from -= kNumCachedRegisters;
1242 reg_to -= kNumCachedRegisters;
1243 // We should not unroll the loop for less than 2 registers.
1244 STATIC_ASSERT(kNumRegistersToUnroll > 2);
1245 // We position the base pointer to (reg_from + 1).
1246 int base_offset = kFirstRegisterOnStack -
1247 kWRegSize - (kWRegSize * reg_from);
1248 if (num_registers > kNumRegistersToUnroll) {
1249 Register base = x10;
1250 __ Add(base, frame_pointer(), base_offset);
1251
1252 Label loop;
1253 __ Mov(x11, num_registers);
1254 __ Bind(&loop);
1255 __ Str(twice_non_position_value(),
1256 MemOperand(base, -kPointerSize, PostIndex));
1257 __ Sub(x11, x11, 2);
1258 __ Cbnz(x11, &loop);
1259 } else {
1260 for (int i = reg_from; i <= reg_to; i += 2) {
1261 __ Str(twice_non_position_value(),
1262 MemOperand(frame_pointer(), base_offset));
1263 base_offset -= kWRegSize * 2;
1264 }
1265 }
1266 }
1267 }
1268
1269
WriteStackPointerToRegister(int reg)1270 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1271 __ Ldr(x10, MemOperand(frame_pointer(), kStackBase));
1272 __ Sub(x10, backtrack_stackpointer(), x10);
1273 if (masm_->emit_debug_code()) {
1274 __ Cmp(x10, Operand(w10, SXTW));
1275 // The stack offset needs to fit in a W register.
1276 __ Check(eq, kOffsetOutOfRange);
1277 }
1278 StoreRegister(reg, w10);
1279 }
1280
1281
1282 // Helper function for reading a value out of a stack frame.
1283 template <typename T>
frame_entry(Address re_frame,int frame_offset)1284 static T& frame_entry(Address re_frame, int frame_offset) {
1285 return *reinterpret_cast<T*>(re_frame + frame_offset);
1286 }
1287
1288
CheckStackGuardState(Address * return_address,Code * re_code,Address re_frame,int start_offset,const byte ** input_start,const byte ** input_end)1289 int RegExpMacroAssemblerARM64::CheckStackGuardState(Address* return_address,
1290 Code* re_code,
1291 Address re_frame,
1292 int start_offset,
1293 const byte** input_start,
1294 const byte** input_end) {
1295 Isolate* isolate = frame_entry<Isolate*>(re_frame, kIsolate);
1296 StackLimitCheck check(isolate);
1297 if (check.JsHasOverflowed()) {
1298 isolate->StackOverflow();
1299 return EXCEPTION;
1300 }
1301
1302 // If not real stack overflow the stack guard was used to interrupt
1303 // execution for another purpose.
1304
1305 // If this is a direct call from JavaScript retry the RegExp forcing the call
1306 // through the runtime system. Currently the direct call cannot handle a GC.
1307 if (frame_entry<int>(re_frame, kDirectCall) == 1) {
1308 return RETRY;
1309 }
1310
1311 // Prepare for possible GC.
1312 HandleScope handles(isolate);
1313 Handle<Code> code_handle(re_code);
1314
1315 Handle<String> subject(frame_entry<String*>(re_frame, kInput));
1316
1317 // Current string.
1318 bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
1319
1320 DCHECK(re_code->instruction_start() <= *return_address);
1321 DCHECK(*return_address <=
1322 re_code->instruction_start() + re_code->instruction_size());
1323
1324 Object* result = isolate->stack_guard()->HandleInterrupts();
1325
1326 if (*code_handle != re_code) { // Return address no longer valid
1327 int delta = code_handle->address() - re_code->address();
1328 // Overwrite the return address on the stack.
1329 *return_address += delta;
1330 }
1331
1332 if (result->IsException()) {
1333 return EXCEPTION;
1334 }
1335
1336 Handle<String> subject_tmp = subject;
1337 int slice_offset = 0;
1338
1339 // Extract the underlying string and the slice offset.
1340 if (StringShape(*subject_tmp).IsCons()) {
1341 subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
1342 } else if (StringShape(*subject_tmp).IsSliced()) {
1343 SlicedString* slice = SlicedString::cast(*subject_tmp);
1344 subject_tmp = Handle<String>(slice->parent());
1345 slice_offset = slice->offset();
1346 }
1347
1348 // String might have changed.
1349 if (subject_tmp->IsOneByteRepresentation() != is_one_byte) {
1350 // If we changed between an Latin1 and an UC16 string, the specialized
1351 // code cannot be used, and we need to restart regexp matching from
1352 // scratch (including, potentially, compiling a new version of the code).
1353 return RETRY;
1354 }
1355
1356 // Otherwise, the content of the string might have moved. It must still
1357 // be a sequential or external string with the same content.
1358 // Update the start and end pointers in the stack frame to the current
1359 // location (whether it has actually moved or not).
1360 DCHECK(StringShape(*subject_tmp).IsSequential() ||
1361 StringShape(*subject_tmp).IsExternal());
1362
1363 // The original start address of the characters to match.
1364 const byte* start_address = *input_start;
1365
1366 // Find the current start address of the same character at the current string
1367 // position.
1368 const byte* new_address = StringCharacterPosition(*subject_tmp,
1369 start_offset + slice_offset);
1370
1371 if (start_address != new_address) {
1372 // If there is a difference, update the object pointer and start and end
1373 // addresses in the RegExp stack frame to match the new value.
1374 const byte* end_address = *input_end;
1375 int byte_length = static_cast<int>(end_address - start_address);
1376 frame_entry<const String*>(re_frame, kInput) = *subject;
1377 *input_start = new_address;
1378 *input_end = new_address + byte_length;
1379 } else if (frame_entry<const String*>(re_frame, kInput) != *subject) {
1380 // Subject string might have been a ConsString that underwent
1381 // short-circuiting during GC. That will not change start_address but
1382 // will change pointer inside the subject handle.
1383 frame_entry<const String*>(re_frame, kInput) = *subject;
1384 }
1385
1386 return 0;
1387 }
1388
1389
CheckPosition(int cp_offset,Label * on_outside_input)1390 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1391 Label* on_outside_input) {
1392 CompareAndBranchOrBacktrack(current_input_offset(),
1393 -cp_offset * char_size(),
1394 ge,
1395 on_outside_input);
1396 }
1397
1398
CanReadUnaligned()1399 bool RegExpMacroAssemblerARM64::CanReadUnaligned() {
1400 // TODO(pielan): See whether or not we should disable unaligned accesses.
1401 return !slow_safe();
1402 }
1403
1404
1405 // Private methods:
1406
CallCheckStackGuardState(Register scratch)1407 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1408 // Allocate space on the stack to store the return address. The
1409 // CheckStackGuardState C++ function will override it if the code
1410 // moved. Allocate extra space for 2 arguments passed by pointers.
1411 // AAPCS64 requires the stack to be 16 byte aligned.
1412 int alignment = masm_->ActivationFrameAlignment();
1413 DCHECK_EQ(alignment % 16, 0);
1414 int align_mask = (alignment / kXRegSize) - 1;
1415 int xreg_to_claim = (3 + align_mask) & ~align_mask;
1416
1417 DCHECK(csp.Is(__ StackPointer()));
1418 __ Claim(xreg_to_claim);
1419
1420 // CheckStackGuardState needs the end and start addresses of the input string.
1421 __ Poke(input_end(), 2 * kPointerSize);
1422 __ Add(x5, csp, 2 * kPointerSize);
1423 __ Poke(input_start(), kPointerSize);
1424 __ Add(x4, csp, kPointerSize);
1425
1426 __ Mov(w3, start_offset());
1427 // RegExp code frame pointer.
1428 __ Mov(x2, frame_pointer());
1429 // Code* of self.
1430 __ Mov(x1, Operand(masm_->CodeObject()));
1431
1432 // We need to pass a pointer to the return address as first argument.
1433 // The DirectCEntry stub will place the return address on the stack before
1434 // calling so the stack pointer will point to it.
1435 __ Mov(x0, csp);
1436
1437 ExternalReference check_stack_guard_state =
1438 ExternalReference::re_check_stack_guard_state(isolate());
1439 __ Mov(scratch, check_stack_guard_state);
1440 DirectCEntryStub stub(isolate());
1441 stub.GenerateCall(masm_, scratch);
1442
1443 // The input string may have been moved in memory, we need to reload it.
1444 __ Peek(input_start(), kPointerSize);
1445 __ Peek(input_end(), 2 * kPointerSize);
1446
1447 DCHECK(csp.Is(__ StackPointer()));
1448 __ Drop(xreg_to_claim);
1449
1450 // Reload the Code pointer.
1451 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1452 }
1453
BranchOrBacktrack(Condition condition,Label * to)1454 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1455 Label* to) {
1456 if (condition == al) { // Unconditional.
1457 if (to == NULL) {
1458 Backtrack();
1459 return;
1460 }
1461 __ B(to);
1462 return;
1463 }
1464 if (to == NULL) {
1465 to = &backtrack_label_;
1466 }
1467 __ B(condition, to);
1468 }
1469
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1470 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1471 int immediate,
1472 Condition condition,
1473 Label* to) {
1474 if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1475 if (to == NULL) {
1476 to = &backtrack_label_;
1477 }
1478 if (condition == eq) {
1479 __ Cbz(reg, to);
1480 } else {
1481 __ Cbnz(reg, to);
1482 }
1483 } else {
1484 __ Cmp(reg, immediate);
1485 BranchOrBacktrack(condition, to);
1486 }
1487 }
1488
1489
CheckPreemption()1490 void RegExpMacroAssemblerARM64::CheckPreemption() {
1491 // Check for preemption.
1492 ExternalReference stack_limit =
1493 ExternalReference::address_of_stack_limit(isolate());
1494 __ Mov(x10, stack_limit);
1495 __ Ldr(x10, MemOperand(x10));
1496 DCHECK(csp.Is(__ StackPointer()));
1497 __ Cmp(csp, x10);
1498 CallIf(&check_preempt_label_, ls);
1499 }
1500
1501
CheckStackLimit()1502 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1503 ExternalReference stack_limit =
1504 ExternalReference::address_of_regexp_stack_limit(isolate());
1505 __ Mov(x10, stack_limit);
1506 __ Ldr(x10, MemOperand(x10));
1507 __ Cmp(backtrack_stackpointer(), x10);
1508 CallIf(&stack_overflow_label_, ls);
1509 }
1510
1511
Push(Register source)1512 void RegExpMacroAssemblerARM64::Push(Register source) {
1513 DCHECK(source.Is32Bits());
1514 DCHECK(!source.is(backtrack_stackpointer()));
1515 __ Str(source,
1516 MemOperand(backtrack_stackpointer(),
1517 -static_cast<int>(kWRegSize),
1518 PreIndex));
1519 }
1520
1521
Pop(Register target)1522 void RegExpMacroAssemblerARM64::Pop(Register target) {
1523 DCHECK(target.Is32Bits());
1524 DCHECK(!target.is(backtrack_stackpointer()));
1525 __ Ldr(target,
1526 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1527 }
1528
1529
GetCachedRegister(int register_index)1530 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1531 DCHECK(register_index < kNumCachedRegisters);
1532 return Register::Create(register_index / 2, kXRegSizeInBits);
1533 }
1534
1535
GetRegister(int register_index,Register maybe_result)1536 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1537 Register maybe_result) {
1538 DCHECK(maybe_result.Is32Bits());
1539 DCHECK(register_index >= 0);
1540 if (num_registers_ <= register_index) {
1541 num_registers_ = register_index + 1;
1542 }
1543 Register result;
1544 RegisterState register_state = GetRegisterState(register_index);
1545 switch (register_state) {
1546 case STACKED:
1547 __ Ldr(maybe_result, register_location(register_index));
1548 result = maybe_result;
1549 break;
1550 case CACHED_LSW:
1551 result = GetCachedRegister(register_index).W();
1552 break;
1553 case CACHED_MSW:
1554 __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1555 kWRegSizeInBits);
1556 result = maybe_result;
1557 break;
1558 default:
1559 UNREACHABLE();
1560 break;
1561 }
1562 DCHECK(result.Is32Bits());
1563 return result;
1564 }
1565
1566
StoreRegister(int register_index,Register source)1567 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1568 Register source) {
1569 DCHECK(source.Is32Bits());
1570 DCHECK(register_index >= 0);
1571 if (num_registers_ <= register_index) {
1572 num_registers_ = register_index + 1;
1573 }
1574
1575 Register cached_register;
1576 RegisterState register_state = GetRegisterState(register_index);
1577 switch (register_state) {
1578 case STACKED:
1579 __ Str(source, register_location(register_index));
1580 break;
1581 case CACHED_LSW:
1582 cached_register = GetCachedRegister(register_index);
1583 if (!source.Is(cached_register.W())) {
1584 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1585 }
1586 break;
1587 case CACHED_MSW:
1588 cached_register = GetCachedRegister(register_index);
1589 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1590 break;
1591 default:
1592 UNREACHABLE();
1593 break;
1594 }
1595 }
1596
1597
CallIf(Label * to,Condition condition)1598 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1599 Label skip_call;
1600 if (condition != al) __ B(&skip_call, NegateCondition(condition));
1601 __ Bl(to);
1602 __ Bind(&skip_call);
1603 }
1604
1605
RestoreLinkRegister()1606 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1607 DCHECK(csp.Is(__ StackPointer()));
1608 __ Pop(lr, xzr);
1609 __ Add(lr, lr, Operand(masm_->CodeObject()));
1610 }
1611
1612
SaveLinkRegister()1613 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1614 DCHECK(csp.Is(__ StackPointer()));
1615 __ Sub(lr, lr, Operand(masm_->CodeObject()));
1616 __ Push(xzr, lr);
1617 }
1618
1619
register_location(int register_index)1620 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1621 DCHECK(register_index < (1<<30));
1622 DCHECK(register_index >= kNumCachedRegisters);
1623 if (num_registers_ <= register_index) {
1624 num_registers_ = register_index + 1;
1625 }
1626 register_index -= kNumCachedRegisters;
1627 int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1628 return MemOperand(frame_pointer(), offset);
1629 }
1630
capture_location(int register_index,Register scratch)1631 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1632 Register scratch) {
1633 DCHECK(register_index < (1<<30));
1634 DCHECK(register_index < num_saved_registers_);
1635 DCHECK(register_index >= kNumCachedRegisters);
1636 DCHECK_EQ(register_index % 2, 0);
1637 register_index -= kNumCachedRegisters;
1638 int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1639 // capture_location is used with Stp instructions to load/store 2 registers.
1640 // The immediate field in the encoding is limited to 7 bits (signed).
1641 if (is_int7(offset)) {
1642 return MemOperand(frame_pointer(), offset);
1643 } else {
1644 __ Add(scratch, frame_pointer(), offset);
1645 return MemOperand(scratch);
1646 }
1647 }
1648
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1649 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1650 int characters) {
1651 Register offset = current_input_offset();
1652
1653 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1654 // and the operating system running on the target allow it.
1655 // If unaligned load/stores are not supported then this function must only
1656 // be used to load a single character at a time.
1657
1658 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1659 // disable it.
1660 // TODO(pielan): See whether or not we should disable unaligned accesses.
1661 if (!CanReadUnaligned()) {
1662 DCHECK(characters == 1);
1663 }
1664
1665 if (cp_offset != 0) {
1666 if (masm_->emit_debug_code()) {
1667 __ Mov(x10, cp_offset * char_size());
1668 __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1669 __ Cmp(x10, Operand(w10, SXTW));
1670 // The offset needs to fit in a W register.
1671 __ Check(eq, kOffsetOutOfRange);
1672 } else {
1673 __ Add(w10, current_input_offset(), cp_offset * char_size());
1674 }
1675 offset = w10;
1676 }
1677
1678 if (mode_ == LATIN1) {
1679 if (characters == 4) {
1680 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1681 } else if (characters == 2) {
1682 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1683 } else {
1684 DCHECK(characters == 1);
1685 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1686 }
1687 } else {
1688 DCHECK(mode_ == UC16);
1689 if (characters == 2) {
1690 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1691 } else {
1692 DCHECK(characters == 1);
1693 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1694 }
1695 }
1696 }
1697
1698 #endif // V8_INTERPRETED_REGEXP
1699
1700 }} // namespace v8::internal
1701
1702 #endif // V8_TARGET_ARCH_ARM64
1703