1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/v8.h"
6 
7 #if V8_TARGET_ARCH_ARM64
8 
9 #include "src/code-stubs.h"
10 #include "src/cpu-profiler.h"
11 #include "src/log.h"
12 #include "src/macro-assembler.h"
13 #include "src/regexp-macro-assembler.h"
14 #include "src/regexp-stack.h"
15 #include "src/unicode.h"
16 
17 #include "src/arm64/regexp-macro-assembler-arm64.h"
18 
19 namespace v8 {
20 namespace internal {
21 
22 #ifndef V8_INTERPRETED_REGEXP
23 /*
24  * This assembler uses the following register assignment convention:
25  * - w19     : Used to temporarely store a value before a call to C code.
26  *             See CheckNotBackReferenceIgnoreCase.
27  * - x20     : Pointer to the current code object (Code*),
28  *             it includes the heap object tag.
29  * - w21     : Current position in input, as negative offset from
30  *             the end of the string. Please notice that this is
31  *             the byte offset, not the character offset!
32  * - w22     : Currently loaded character. Must be loaded using
33  *             LoadCurrentCharacter before using any of the dispatch methods.
34  * - x23     : Points to tip of backtrack stack.
35  * - w24     : Position of the first character minus one: non_position_value.
36  *             Used to initialize capture registers.
37  * - x25     : Address at the end of the input string: input_end.
38  *             Points to byte after last character in input.
39  * - x26     : Address at the start of the input string: input_start.
40  * - w27     : Where to start in the input string.
41  * - x28     : Output array pointer.
42  * - x29/fp  : Frame pointer. Used to access arguments, local variables and
43  *             RegExp registers.
44  * - x16/x17 : IP registers, used by assembler. Very volatile.
45  * - csp     : Points to tip of C stack.
46  *
47  * - x0-x7   : Used as a cache to store 32 bit capture registers. These
48  *             registers need to be retained every time a call to C code
49  *             is done.
50  *
51  * The remaining registers are free for computations.
52  * Each call to a public method should retain this convention.
53  *
54  * The stack will have the following structure:
55  *
56  *  Location    Name               Description
57  *              (as referred to in
58  *              the code)
59  *
60  *  - fp[104]   isolate            Address of the current isolate.
61  *  - fp[96]    return_address     Secondary link/return address
62  *                                 used by an exit frame if this is a
63  *                                 native call.
64  *  ^^^ csp when called ^^^
65  *  - fp[88]    lr                 Return from the RegExp code.
66  *  - fp[80]    r29                Old frame pointer (CalleeSaved).
67  *  - fp[0..72] r19-r28            Backup of CalleeSaved registers.
68  *  - fp[-8]    direct_call        1 => Direct call from JavaScript code.
69  *                                 0 => Call through the runtime system.
70  *  - fp[-16]   stack_base         High end of the memory area to use as
71  *                                 the backtracking stack.
72  *  - fp[-24]   output_size        Output may fit multiple sets of matches.
73  *  - fp[-32]   input              Handle containing the input string.
74  *  - fp[-40]   success_counter
75  *  ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
76  *  - fp[-44]   register N         Capture registers initialized with
77  *  - fp[-48]   register N + 1     non_position_value.
78  *              ...                The first kNumCachedRegisters (N) registers
79  *              ...                are cached in x0 to x7.
80  *              ...                Only positions must be stored in the first
81  *  -           ...                num_saved_registers_ registers.
82  *  -           ...
83  *  -           register N + num_registers - 1
84  *  ^^^^^^^^^ csp ^^^^^^^^^
85  *
86  * The first num_saved_registers_ registers are initialized to point to
87  * "character -1" in the string (i.e., char_size() bytes before the first
88  * character of the string). The remaining registers start out as garbage.
89  *
90  * The data up to the return address must be placed there by the calling
91  * code and the remaining arguments are passed in registers, e.g. by calling the
92  * code entry as cast to a function with the signature:
93  * int (*match)(String* input,
94  *              int start_offset,
95  *              Address input_start,
96  *              Address input_end,
97  *              int* output,
98  *              int output_size,
99  *              Address stack_base,
100  *              bool direct_call = false,
101  *              Address secondary_return_address,  // Only used by native call.
102  *              Isolate* isolate)
103  * The call is performed by NativeRegExpMacroAssembler::Execute()
104  * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
105  * in arm64/simulator-arm64.h.
106  * When calling as a non-direct call (i.e., from C++ code), the return address
107  * area is overwritten with the LR register by the RegExp code. When doing a
108  * direct call from generated code, the return address is placed there by
109  * the calling code, as in a normal exit frame.
110  */
111 
112 #define __ ACCESS_MASM(masm_)
113 
RegExpMacroAssemblerARM64(Mode mode,int registers_to_save,Zone * zone)114 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(
115     Mode mode,
116     int registers_to_save,
117     Zone* zone)
118     : NativeRegExpMacroAssembler(zone),
119       masm_(new MacroAssembler(zone->isolate(), NULL, kRegExpCodeSize)),
120       mode_(mode),
121       num_registers_(registers_to_save),
122       num_saved_registers_(registers_to_save),
123       entry_label_(),
124       start_label_(),
125       success_label_(),
126       backtrack_label_(),
127       exit_label_() {
128   __ SetStackPointer(csp);
129   DCHECK_EQ(0, registers_to_save % 2);
130   // We can cache at most 16 W registers in x0-x7.
131   STATIC_ASSERT(kNumCachedRegisters <= 16);
132   STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
133   __ B(&entry_label_);   // We'll write the entry code later.
134   __ Bind(&start_label_);  // And then continue from here.
135 }
136 
137 
~RegExpMacroAssemblerARM64()138 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
139   delete masm_;
140   // Unuse labels in case we throw away the assembler without calling GetCode.
141   entry_label_.Unuse();
142   start_label_.Unuse();
143   success_label_.Unuse();
144   backtrack_label_.Unuse();
145   exit_label_.Unuse();
146   check_preempt_label_.Unuse();
147   stack_overflow_label_.Unuse();
148 }
149 
stack_limit_slack()150 int RegExpMacroAssemblerARM64::stack_limit_slack()  {
151   return RegExpStack::kStackLimitSlack;
152 }
153 
154 
AdvanceCurrentPosition(int by)155 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
156   if (by != 0) {
157     __ Add(current_input_offset(),
158            current_input_offset(), by * char_size());
159   }
160 }
161 
162 
AdvanceRegister(int reg,int by)163 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
164   DCHECK((reg >= 0) && (reg < num_registers_));
165   if (by != 0) {
166     Register to_advance;
167     RegisterState register_state = GetRegisterState(reg);
168     switch (register_state) {
169       case STACKED:
170         __ Ldr(w10, register_location(reg));
171         __ Add(w10, w10, by);
172         __ Str(w10, register_location(reg));
173         break;
174       case CACHED_LSW:
175         to_advance = GetCachedRegister(reg);
176         __ Add(to_advance, to_advance, by);
177         break;
178       case CACHED_MSW:
179         to_advance = GetCachedRegister(reg);
180         __ Add(to_advance, to_advance,
181                static_cast<int64_t>(by) << kWRegSizeInBits);
182         break;
183       default:
184         UNREACHABLE();
185         break;
186     }
187   }
188 }
189 
190 
Backtrack()191 void RegExpMacroAssemblerARM64::Backtrack() {
192   CheckPreemption();
193   Pop(w10);
194   __ Add(x10, code_pointer(), Operand(w10, UXTW));
195   __ Br(x10);
196 }
197 
198 
Bind(Label * label)199 void RegExpMacroAssemblerARM64::Bind(Label* label) {
200   __ Bind(label);
201 }
202 
203 
CheckCharacter(uint32_t c,Label * on_equal)204 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
205   CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
206 }
207 
208 
CheckCharacterGT(uc16 limit,Label * on_greater)209 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
210                                                  Label* on_greater) {
211   CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
212 }
213 
214 
CheckAtStart(Label * on_at_start)215 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) {
216   Label not_at_start;
217   // Did we start the match at the start of the input string?
218   CompareAndBranchOrBacktrack(start_offset(), 0, ne, &not_at_start);
219   // If we did, are we still at the start of the input string?
220   __ Add(x10, input_end(), Operand(current_input_offset(), SXTW));
221   __ Cmp(x10, input_start());
222   BranchOrBacktrack(eq, on_at_start);
223   __ Bind(&not_at_start);
224 }
225 
226 
CheckNotAtStart(Label * on_not_at_start)227 void RegExpMacroAssemblerARM64::CheckNotAtStart(Label* on_not_at_start) {
228   // Did we start the match at the start of the input string?
229   CompareAndBranchOrBacktrack(start_offset(), 0, ne, on_not_at_start);
230   // If we did, are we still at the start of the input string?
231   __ Add(x10, input_end(), Operand(current_input_offset(), SXTW));
232   __ Cmp(x10, input_start());
233   BranchOrBacktrack(ne, on_not_at_start);
234 }
235 
236 
CheckCharacterLT(uc16 limit,Label * on_less)237 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) {
238   CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
239 }
240 
241 
CheckCharacters(Vector<const uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)242 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str,
243                                               int cp_offset,
244                                               Label* on_failure,
245                                               bool check_end_of_string) {
246   // This method is only ever called from the cctests.
247 
248   if (check_end_of_string) {
249     // Is last character of required match inside string.
250     CheckPosition(cp_offset + str.length() - 1, on_failure);
251   }
252 
253   Register characters_address = x11;
254 
255   __ Add(characters_address,
256          input_end(),
257          Operand(current_input_offset(), SXTW));
258   if (cp_offset != 0) {
259     __ Add(characters_address, characters_address, cp_offset * char_size());
260   }
261 
262   for (int i = 0; i < str.length(); i++) {
263     if (mode_ == LATIN1) {
264       __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
265       DCHECK(str[i] <= String::kMaxOneByteCharCode);
266     } else {
267       __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
268     }
269     CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
270   }
271 }
272 
273 
CheckGreedyLoop(Label * on_equal)274 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
275   __ Ldr(w10, MemOperand(backtrack_stackpointer()));
276   __ Cmp(current_input_offset(), w10);
277   __ Cset(x11, eq);
278   __ Add(backtrack_stackpointer(),
279          backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
280   BranchOrBacktrack(eq, on_equal);
281 }
282 
CheckNotBackReferenceIgnoreCase(int start_reg,Label * on_no_match)283 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
284     int start_reg,
285     Label* on_no_match) {
286   Label fallthrough;
287 
288   Register capture_start_offset = w10;
289   // Save the capture length in a callee-saved register so it will
290   // be preserved if we call a C helper.
291   Register capture_length = w19;
292   DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
293 
294   // Find length of back-referenced capture.
295   DCHECK((start_reg % 2) == 0);
296   if (start_reg < kNumCachedRegisters) {
297     __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
298     __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
299   } else {
300     __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
301   }
302   __ Sub(capture_length, w11, capture_start_offset);  // Length to check.
303   // Succeed on empty capture (including no capture).
304   __ Cbz(capture_length, &fallthrough);
305 
306   // Check that there are enough characters left in the input.
307   __ Cmn(capture_length, current_input_offset());
308   BranchOrBacktrack(gt, on_no_match);
309 
310   if (mode_ == LATIN1) {
311     Label success;
312     Label fail;
313     Label loop_check;
314 
315     Register capture_start_address = x12;
316     Register capture_end_addresss = x13;
317     Register current_position_address = x14;
318 
319     __ Add(capture_start_address,
320            input_end(),
321            Operand(capture_start_offset, SXTW));
322     __ Add(capture_end_addresss,
323            capture_start_address,
324            Operand(capture_length, SXTW));
325     __ Add(current_position_address,
326            input_end(),
327            Operand(current_input_offset(), SXTW));
328 
329     Label loop;
330     __ Bind(&loop);
331     __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
332     __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
333     __ Cmp(w10, w11);
334     __ B(eq, &loop_check);
335 
336     // Mismatch, try case-insensitive match (converting letters to lower-case).
337     __ Orr(w10, w10, 0x20);  // Convert capture character to lower-case.
338     __ Orr(w11, w11, 0x20);  // Also convert input character.
339     __ Cmp(w11, w10);
340     __ B(ne, &fail);
341     __ Sub(w10, w10, 'a');
342     __ Cmp(w10, 'z' - 'a');  // Is w10 a lowercase letter?
343     __ B(ls, &loop_check);  // In range 'a'-'z'.
344     // Latin-1: Check for values in range [224,254] but not 247.
345     __ Sub(w10, w10, 224 - 'a');
346     __ Cmp(w10, 254 - 224);
347     __ Ccmp(w10, 247 - 224, ZFlag, ls);  // Check for 247.
348     __ B(eq, &fail);  // Weren't Latin-1 letters.
349 
350     __ Bind(&loop_check);
351     __ Cmp(capture_start_address, capture_end_addresss);
352     __ B(lt, &loop);
353     __ B(&success);
354 
355     __ Bind(&fail);
356     BranchOrBacktrack(al, on_no_match);
357 
358     __ Bind(&success);
359     // Compute new value of character position after the matched part.
360     __ Sub(current_input_offset().X(), current_position_address, input_end());
361     if (masm_->emit_debug_code()) {
362       __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
363       __ Ccmp(current_input_offset(), 0, NoFlag, eq);
364       // The current input offset should be <= 0, and fit in a W register.
365       __ Check(le, kOffsetOutOfRange);
366     }
367   } else {
368     DCHECK(mode_ == UC16);
369     int argument_count = 4;
370 
371     // The cached registers need to be retained.
372     CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
373     DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
374     __ PushCPURegList(cached_registers);
375 
376     // Put arguments into arguments registers.
377     // Parameters are
378     //   x0: Address byte_offset1 - Address captured substring's start.
379     //   x1: Address byte_offset2 - Address of current character position.
380     //   w2: size_t byte_length - length of capture in bytes(!)
381     //   x3: Isolate* isolate
382 
383     // Address of start of capture.
384     __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
385     // Length of capture.
386     __ Mov(w2, capture_length);
387     // Address of current input position.
388     __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
389     // Isolate.
390     __ Mov(x3, ExternalReference::isolate_address(isolate()));
391 
392     {
393       AllowExternalCallThatCantCauseGC scope(masm_);
394       ExternalReference function =
395           ExternalReference::re_case_insensitive_compare_uc16(isolate());
396       __ CallCFunction(function, argument_count);
397     }
398 
399     // Check if function returned non-zero for success or zero for failure.
400     // x0 is one of the registers used as a cache so it must be tested before
401     // the cache is restored.
402     __ Cmp(x0, 0);
403     __ PopCPURegList(cached_registers);
404     BranchOrBacktrack(eq, on_no_match);
405 
406     // On success, increment position by length of capture.
407     __ Add(current_input_offset(), current_input_offset(), capture_length);
408   }
409 
410   __ Bind(&fallthrough);
411 }
412 
CheckNotBackReference(int start_reg,Label * on_no_match)413 void RegExpMacroAssemblerARM64::CheckNotBackReference(
414     int start_reg,
415     Label* on_no_match) {
416   Label fallthrough;
417 
418   Register capture_start_address = x12;
419   Register capture_end_address = x13;
420   Register current_position_address = x14;
421   Register capture_length = w15;
422 
423   // Find length of back-referenced capture.
424   DCHECK((start_reg % 2) == 0);
425   if (start_reg < kNumCachedRegisters) {
426     __ Mov(x10, GetCachedRegister(start_reg));
427     __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
428   } else {
429     __ Ldp(w11, w10, capture_location(start_reg, x10));
430   }
431   __ Sub(capture_length, w11, w10);  // Length to check.
432   // Succeed on empty capture (including no capture).
433   __ Cbz(capture_length, &fallthrough);
434 
435   // Check that there are enough characters left in the input.
436   __ Cmn(capture_length, current_input_offset());
437   BranchOrBacktrack(gt, on_no_match);
438 
439   // Compute pointers to match string and capture string
440   __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
441   __ Add(capture_end_address,
442          capture_start_address,
443          Operand(capture_length, SXTW));
444   __ Add(current_position_address,
445          input_end(),
446          Operand(current_input_offset(), SXTW));
447 
448   Label loop;
449   __ Bind(&loop);
450   if (mode_ == LATIN1) {
451     __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
452     __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
453   } else {
454     DCHECK(mode_ == UC16);
455     __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
456     __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
457   }
458   __ Cmp(w10, w11);
459   BranchOrBacktrack(ne, on_no_match);
460   __ Cmp(capture_start_address, capture_end_address);
461   __ B(lt, &loop);
462 
463   // Move current character position to position after match.
464   __ Sub(current_input_offset().X(), current_position_address, input_end());
465   if (masm_->emit_debug_code()) {
466     __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
467     __ Ccmp(current_input_offset(), 0, NoFlag, eq);
468     // The current input offset should be <= 0, and fit in a W register.
469     __ Check(le, kOffsetOutOfRange);
470   }
471   __ Bind(&fallthrough);
472 }
473 
474 
CheckNotCharacter(unsigned c,Label * on_not_equal)475 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
476                                                   Label* on_not_equal) {
477   CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
478 }
479 
480 
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)481 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
482                                                        uint32_t mask,
483                                                        Label* on_equal) {
484   __ And(w10, current_character(), mask);
485   CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
486 }
487 
488 
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)489 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
490                                                           unsigned mask,
491                                                           Label* on_not_equal) {
492   __ And(w10, current_character(), mask);
493   CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
494 }
495 
496 
CheckNotCharacterAfterMinusAnd(uc16 c,uc16 minus,uc16 mask,Label * on_not_equal)497 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
498     uc16 c,
499     uc16 minus,
500     uc16 mask,
501     Label* on_not_equal) {
502   DCHECK(minus < String::kMaxUtf16CodeUnit);
503   __ Sub(w10, current_character(), minus);
504   __ And(w10, w10, mask);
505   CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
506 }
507 
508 
CheckCharacterInRange(uc16 from,uc16 to,Label * on_in_range)509 void RegExpMacroAssemblerARM64::CheckCharacterInRange(
510     uc16 from,
511     uc16 to,
512     Label* on_in_range) {
513   __ Sub(w10, current_character(), from);
514   // Unsigned lower-or-same condition.
515   CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
516 }
517 
518 
CheckCharacterNotInRange(uc16 from,uc16 to,Label * on_not_in_range)519 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
520     uc16 from,
521     uc16 to,
522     Label* on_not_in_range) {
523   __ Sub(w10, current_character(), from);
524   // Unsigned higher condition.
525   CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
526 }
527 
528 
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)529 void RegExpMacroAssemblerARM64::CheckBitInTable(
530     Handle<ByteArray> table,
531     Label* on_bit_set) {
532   __ Mov(x11, Operand(table));
533   if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
534     __ And(w10, current_character(), kTableMask);
535     __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
536   } else {
537     __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
538   }
539   __ Ldrb(w11, MemOperand(x11, w10, UXTW));
540   CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
541 }
542 
543 
CheckSpecialCharacterClass(uc16 type,Label * on_no_match)544 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
545                                                            Label* on_no_match) {
546   // Range checks (c in min..max) are generally implemented by an unsigned
547   // (c - min) <= (max - min) check
548   switch (type) {
549   case 's':
550     // Match space-characters
551     if (mode_ == LATIN1) {
552       // One byte space characters are '\t'..'\r', ' ' and \u00a0.
553       Label success;
554       // Check for ' ' or 0x00a0.
555       __ Cmp(current_character(), ' ');
556       __ Ccmp(current_character(), 0x00a0, ZFlag, ne);
557       __ B(eq, &success);
558       // Check range 0x09..0x0d.
559       __ Sub(w10, current_character(), '\t');
560       CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
561       __ Bind(&success);
562       return true;
563     }
564     return false;
565   case 'S':
566     // The emitted code for generic character classes is good enough.
567     return false;
568   case 'd':
569     // Match ASCII digits ('0'..'9').
570     __ Sub(w10, current_character(), '0');
571     CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
572     return true;
573   case 'D':
574     // Match ASCII non-digits.
575     __ Sub(w10, current_character(), '0');
576     CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
577     return true;
578   case '.': {
579     // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
580     // Here we emit the conditional branch only once at the end to make branch
581     // prediction more efficient, even though we could branch out of here
582     // as soon as a character matches.
583     __ Cmp(current_character(), 0x0a);
584     __ Ccmp(current_character(), 0x0d, ZFlag, ne);
585     if (mode_ == UC16) {
586       __ Sub(w10, current_character(), 0x2028);
587       // If the Z flag was set we clear the flags to force a branch.
588       __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
589       // ls -> !((C==1) && (Z==0))
590       BranchOrBacktrack(ls, on_no_match);
591     } else {
592       BranchOrBacktrack(eq, on_no_match);
593     }
594     return true;
595   }
596   case 'n': {
597     // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
598     // We have to check all 4 newline characters before emitting
599     // the conditional branch.
600     __ Cmp(current_character(), 0x0a);
601     __ Ccmp(current_character(), 0x0d, ZFlag, ne);
602     if (mode_ == UC16) {
603       __ Sub(w10, current_character(), 0x2028);
604       // If the Z flag was set we clear the flags to force a fall-through.
605       __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
606       // hi -> (C==1) && (Z==0)
607       BranchOrBacktrack(hi, on_no_match);
608     } else {
609       BranchOrBacktrack(ne, on_no_match);
610     }
611     return true;
612   }
613   case 'w': {
614     if (mode_ != LATIN1) {
615       // Table is 256 entries, so all Latin1 characters can be tested.
616       CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
617     }
618     ExternalReference map = ExternalReference::re_word_character_map();
619     __ Mov(x10, map);
620     __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
621     CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
622     return true;
623   }
624   case 'W': {
625     Label done;
626     if (mode_ != LATIN1) {
627       // Table is 256 entries, so all Latin1 characters can be tested.
628       __ Cmp(current_character(), 'z');
629       __ B(hi, &done);
630     }
631     ExternalReference map = ExternalReference::re_word_character_map();
632     __ Mov(x10, map);
633     __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
634     CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
635     __ Bind(&done);
636     return true;
637   }
638   case '*':
639     // Match any character.
640     return true;
641   // No custom implementation (yet): s(UC16), S(UC16).
642   default:
643     return false;
644   }
645 }
646 
647 
Fail()648 void RegExpMacroAssemblerARM64::Fail() {
649   __ Mov(w0, FAILURE);
650   __ B(&exit_label_);
651 }
652 
653 
GetCode(Handle<String> source)654 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
655   Label return_w0;
656   // Finalize code - write the entry point code now we know how many
657   // registers we need.
658 
659   // Entry code:
660   __ Bind(&entry_label_);
661 
662   // Arguments on entry:
663   // x0:  String*  input
664   // x1:  int      start_offset
665   // x2:  byte*    input_start
666   // x3:  byte*    input_end
667   // x4:  int*     output array
668   // x5:  int      output array size
669   // x6:  Address  stack_base
670   // x7:  int      direct_call
671 
672   // The stack pointer should be csp on entry.
673   //  csp[8]:  address of the current isolate
674   //  csp[0]:  secondary link/return address used by native call
675 
676   // Tell the system that we have a stack frame.  Because the type is MANUAL, no
677   // code is generated.
678   FrameScope scope(masm_, StackFrame::MANUAL);
679 
680   // Push registers on the stack, only push the argument registers that we need.
681   CPURegList argument_registers(x0, x5, x6, x7);
682 
683   CPURegList registers_to_retain = kCalleeSaved;
684   DCHECK(kCalleeSaved.Count() == 11);
685   registers_to_retain.Combine(lr);
686 
687   DCHECK(csp.Is(__ StackPointer()));
688   __ PushCPURegList(registers_to_retain);
689   __ PushCPURegList(argument_registers);
690 
691   // Set frame pointer in place.
692   __ Add(frame_pointer(), csp, argument_registers.Count() * kPointerSize);
693 
694   // Initialize callee-saved registers.
695   __ Mov(start_offset(), w1);
696   __ Mov(input_start(), x2);
697   __ Mov(input_end(), x3);
698   __ Mov(output_array(), x4);
699 
700   // Set the number of registers we will need to allocate, that is:
701   //   - success_counter (X register)
702   //   - (num_registers_ - kNumCachedRegisters) (W registers)
703   int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters;
704   // Do not allocate registers on the stack if they can all be cached.
705   if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; }
706   // Make room for the success_counter.
707   num_wreg_to_allocate += 2;
708 
709   // Make sure the stack alignment will be respected.
710   int alignment = masm_->ActivationFrameAlignment();
711   DCHECK_EQ(alignment % 16, 0);
712   int align_mask = (alignment / kWRegSize) - 1;
713   num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask;
714 
715   // Check if we have space on the stack.
716   Label stack_limit_hit;
717   Label stack_ok;
718 
719   ExternalReference stack_limit =
720       ExternalReference::address_of_stack_limit(isolate());
721   __ Mov(x10, stack_limit);
722   __ Ldr(x10, MemOperand(x10));
723   __ Subs(x10, csp, x10);
724 
725   // Handle it if the stack pointer is already below the stack limit.
726   __ B(ls, &stack_limit_hit);
727 
728   // Check if there is room for the variable number of registers above
729   // the stack limit.
730   __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
731   __ B(hs, &stack_ok);
732 
733   // Exit with OutOfMemory exception. There is not enough space on the stack
734   // for our working registers.
735   __ Mov(w0, EXCEPTION);
736   __ B(&return_w0);
737 
738   __ Bind(&stack_limit_hit);
739   CallCheckStackGuardState(x10);
740   // If returned value is non-zero, we exit with the returned value as result.
741   __ Cbnz(w0, &return_w0);
742 
743   __ Bind(&stack_ok);
744 
745   // Allocate space on stack.
746   __ Claim(num_wreg_to_allocate, kWRegSize);
747 
748   // Initialize success_counter with 0.
749   __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
750 
751   // Find negative length (offset of start relative to end).
752   __ Sub(x10, input_start(), input_end());
753   if (masm_->emit_debug_code()) {
754     // Check that the input string length is < 2^30.
755     __ Neg(x11, x10);
756     __ Cmp(x11, (1<<30) - 1);
757     __ Check(ls, kInputStringTooLong);
758   }
759   __ Mov(current_input_offset(), w10);
760 
761   // The non-position value is used as a clearing value for the
762   // capture registers, it corresponds to the position of the first character
763   // minus one.
764   __ Sub(non_position_value(), current_input_offset(), char_size());
765   __ Sub(non_position_value(), non_position_value(),
766          Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
767   // We can store this value twice in an X register for initializing
768   // on-stack registers later.
769   __ Orr(twice_non_position_value(),
770          non_position_value().X(),
771          Operand(non_position_value().X(), LSL, kWRegSizeInBits));
772 
773   // Initialize code pointer register.
774   __ Mov(code_pointer(), Operand(masm_->CodeObject()));
775 
776   Label load_char_start_regexp, start_regexp;
777   // Load newline if index is at start, previous character otherwise.
778   __ Cbnz(start_offset(), &load_char_start_regexp);
779   __ Mov(current_character(), '\n');
780   __ B(&start_regexp);
781 
782   // Global regexp restarts matching here.
783   __ Bind(&load_char_start_regexp);
784   // Load previous char as initial value of current character register.
785   LoadCurrentCharacterUnchecked(-1, 1);
786   __ Bind(&start_regexp);
787   // Initialize on-stack registers.
788   if (num_saved_registers_ > 0) {
789     ClearRegisters(0, num_saved_registers_ - 1);
790   }
791 
792   // Initialize backtrack stack pointer.
793   __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase));
794 
795   // Execute
796   __ B(&start_label_);
797 
798   if (backtrack_label_.is_linked()) {
799     __ Bind(&backtrack_label_);
800     Backtrack();
801   }
802 
803   if (success_label_.is_linked()) {
804     Register first_capture_start = w15;
805 
806     // Save captures when successful.
807     __ Bind(&success_label_);
808 
809     if (num_saved_registers_ > 0) {
810       // V8 expects the output to be an int32_t array.
811       Register capture_start = w12;
812       Register capture_end = w13;
813       Register input_length = w14;
814 
815       // Copy captures to output.
816 
817       // Get string length.
818       __ Sub(x10, input_end(), input_start());
819       if (masm_->emit_debug_code()) {
820         // Check that the input string length is < 2^30.
821         __ Cmp(x10, (1<<30) - 1);
822         __ Check(ls, kInputStringTooLong);
823       }
824       // input_start has a start_offset offset on entry. We need to include
825       // it when computing the length of the whole string.
826       if (mode_ == UC16) {
827         __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
828       } else {
829         __ Add(input_length, start_offset(), w10);
830       }
831 
832       // Copy the results to the output array from the cached registers first.
833       for (int i = 0;
834            (i < num_saved_registers_) && (i < kNumCachedRegisters);
835            i += 2) {
836         __ Mov(capture_start.X(), GetCachedRegister(i));
837         __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
838         if ((i == 0) && global_with_zero_length_check()) {
839           // Keep capture start for the zero-length check later.
840           __ Mov(first_capture_start, capture_start);
841         }
842         // Offsets need to be relative to the start of the string.
843         if (mode_ == UC16) {
844           __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
845           __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
846         } else {
847           __ Add(capture_start, input_length, capture_start);
848           __ Add(capture_end, input_length, capture_end);
849         }
850         // The output pointer advances for a possible global match.
851         __ Stp(capture_start,
852                capture_end,
853                MemOperand(output_array(), kPointerSize, PostIndex));
854       }
855 
856       // Only carry on if there are more than kNumCachedRegisters capture
857       // registers.
858       int num_registers_left_on_stack =
859           num_saved_registers_ - kNumCachedRegisters;
860       if (num_registers_left_on_stack > 0) {
861         Register base = x10;
862         // There are always an even number of capture registers. A couple of
863         // registers determine one match with two offsets.
864         DCHECK_EQ(0, num_registers_left_on_stack % 2);
865         __ Add(base, frame_pointer(), kFirstCaptureOnStack);
866 
867         // We can unroll the loop here, we should not unroll for less than 2
868         // registers.
869         STATIC_ASSERT(kNumRegistersToUnroll > 2);
870         if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
871           for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
872             __ Ldp(capture_end,
873                    capture_start,
874                    MemOperand(base, -kPointerSize, PostIndex));
875             if ((i == 0) && global_with_zero_length_check()) {
876               // Keep capture start for the zero-length check later.
877               __ Mov(first_capture_start, capture_start);
878             }
879             // Offsets need to be relative to the start of the string.
880             if (mode_ == UC16) {
881               __ Add(capture_start,
882                      input_length,
883                      Operand(capture_start, ASR, 1));
884               __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
885             } else {
886               __ Add(capture_start, input_length, capture_start);
887               __ Add(capture_end, input_length, capture_end);
888             }
889             // The output pointer advances for a possible global match.
890             __ Stp(capture_start,
891                    capture_end,
892                    MemOperand(output_array(), kPointerSize, PostIndex));
893           }
894         } else {
895           Label loop, start;
896           __ Mov(x11, num_registers_left_on_stack);
897 
898           __ Ldp(capture_end,
899                  capture_start,
900                  MemOperand(base, -kPointerSize, PostIndex));
901           if (global_with_zero_length_check()) {
902             __ Mov(first_capture_start, capture_start);
903           }
904           __ B(&start);
905 
906           __ Bind(&loop);
907           __ Ldp(capture_end,
908                  capture_start,
909                  MemOperand(base, -kPointerSize, PostIndex));
910           __ Bind(&start);
911           if (mode_ == UC16) {
912             __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
913             __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
914           } else {
915             __ Add(capture_start, input_length, capture_start);
916             __ Add(capture_end, input_length, capture_end);
917           }
918           // The output pointer advances for a possible global match.
919           __ Stp(capture_start,
920                  capture_end,
921                  MemOperand(output_array(), kPointerSize, PostIndex));
922           __ Sub(x11, x11, 2);
923           __ Cbnz(x11, &loop);
924         }
925       }
926     }
927 
928     if (global()) {
929       Register success_counter = w0;
930       Register output_size = x10;
931       // Restart matching if the regular expression is flagged as global.
932 
933       // Increment success counter.
934       __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
935       __ Add(success_counter, success_counter, 1);
936       __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
937 
938       // Capture results have been stored, so the number of remaining global
939       // output registers is reduced by the number of stored captures.
940       __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
941       __ Sub(output_size, output_size, num_saved_registers_);
942       // Check whether we have enough room for another set of capture results.
943       __ Cmp(output_size, num_saved_registers_);
944       __ B(lt, &return_w0);
945 
946       // The output pointer is already set to the next field in the output
947       // array.
948       // Update output size on the frame before we restart matching.
949       __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
950 
951       if (global_with_zero_length_check()) {
952         // Special case for zero-length matches.
953         __ Cmp(current_input_offset(), first_capture_start);
954         // Not a zero-length match, restart.
955         __ B(ne, &load_char_start_regexp);
956         // Offset from the end is zero if we already reached the end.
957         __ Cbz(current_input_offset(), &return_w0);
958         // Advance current position after a zero-length match.
959         __ Add(current_input_offset(),
960                current_input_offset(),
961                Operand((mode_ == UC16) ? 2 : 1));
962       }
963 
964       __ B(&load_char_start_regexp);
965     } else {
966       __ Mov(w0, SUCCESS);
967     }
968   }
969 
970   if (exit_label_.is_linked()) {
971     // Exit and return w0
972     __ Bind(&exit_label_);
973     if (global()) {
974       __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
975     }
976   }
977 
978   __ Bind(&return_w0);
979 
980   // Set stack pointer back to first register to retain
981   DCHECK(csp.Is(__ StackPointer()));
982   __ Mov(csp, fp);
983   __ AssertStackConsistency();
984 
985   // Restore registers.
986   __ PopCPURegList(registers_to_retain);
987 
988   __ Ret();
989 
990   Label exit_with_exception;
991   // Registers x0 to x7 are used to store the first captures, they need to be
992   // retained over calls to C++ code.
993   CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
994   DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
995 
996   if (check_preempt_label_.is_linked()) {
997     __ Bind(&check_preempt_label_);
998     SaveLinkRegister();
999     // The cached registers need to be retained.
1000     __ PushCPURegList(cached_registers);
1001     CallCheckStackGuardState(x10);
1002     // Returning from the regexp code restores the stack (csp <- fp)
1003     // so we don't need to drop the link register from it before exiting.
1004     __ Cbnz(w0, &return_w0);
1005     // Reset the cached registers.
1006     __ PopCPURegList(cached_registers);
1007     RestoreLinkRegister();
1008     __ Ret();
1009   }
1010 
1011   if (stack_overflow_label_.is_linked()) {
1012     __ Bind(&stack_overflow_label_);
1013     SaveLinkRegister();
1014     // The cached registers need to be retained.
1015     __ PushCPURegList(cached_registers);
1016     // Call GrowStack(backtrack_stackpointer(), &stack_base)
1017     __ Mov(x2, ExternalReference::isolate_address(isolate()));
1018     __ Add(x1, frame_pointer(), kStackBase);
1019     __ Mov(x0, backtrack_stackpointer());
1020     ExternalReference grow_stack =
1021         ExternalReference::re_grow_stack(isolate());
1022     __ CallCFunction(grow_stack, 3);
1023     // If return NULL, we have failed to grow the stack, and
1024     // must exit with a stack-overflow exception.
1025     // Returning from the regexp code restores the stack (csp <- fp)
1026     // so we don't need to drop the link register from it before exiting.
1027     __ Cbz(w0, &exit_with_exception);
1028     // Otherwise use return value as new stack pointer.
1029     __ Mov(backtrack_stackpointer(), x0);
1030     // Reset the cached registers.
1031     __ PopCPURegList(cached_registers);
1032     RestoreLinkRegister();
1033     __ Ret();
1034   }
1035 
1036   if (exit_with_exception.is_linked()) {
1037     __ Bind(&exit_with_exception);
1038     __ Mov(w0, EXCEPTION);
1039     __ B(&return_w0);
1040   }
1041 
1042   CodeDesc code_desc;
1043   masm_->GetCode(&code_desc);
1044   Handle<Code> code = isolate()->factory()->NewCode(
1045       code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
1046   PROFILE(masm_->isolate(), RegExpCodeCreateEvent(*code, *source));
1047   return Handle<HeapObject>::cast(code);
1048 }
1049 
1050 
GoTo(Label * to)1051 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1052   BranchOrBacktrack(al, to);
1053 }
1054 
IfRegisterGE(int reg,int comparand,Label * if_ge)1055 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1056                                              Label* if_ge) {
1057   Register to_compare = GetRegister(reg, w10);
1058   CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1059 }
1060 
1061 
IfRegisterLT(int reg,int comparand,Label * if_lt)1062 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1063                                              Label* if_lt) {
1064   Register to_compare = GetRegister(reg, w10);
1065   CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1066 }
1067 
1068 
IfRegisterEqPos(int reg,Label * if_eq)1069 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1070   Register to_compare = GetRegister(reg, w10);
1071   __ Cmp(to_compare, current_input_offset());
1072   BranchOrBacktrack(eq, if_eq);
1073 }
1074 
1075 RegExpMacroAssembler::IrregexpImplementation
Implementation()1076     RegExpMacroAssemblerARM64::Implementation() {
1077   return kARM64Implementation;
1078 }
1079 
1080 
LoadCurrentCharacter(int cp_offset,Label * on_end_of_input,bool check_bounds,int characters)1081 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
1082                                                      Label* on_end_of_input,
1083                                                      bool check_bounds,
1084                                                      int characters) {
1085   // TODO(pielan): Make sure long strings are caught before this, and not
1086   // just asserted in debug mode.
1087   DCHECK(cp_offset >= -1);      // ^ and \b can look behind one character.
1088   // Be sane! (And ensure that an int32_t can be used to index the string)
1089   DCHECK(cp_offset < (1<<30));
1090   if (check_bounds) {
1091     CheckPosition(cp_offset + characters - 1, on_end_of_input);
1092   }
1093   LoadCurrentCharacterUnchecked(cp_offset, characters);
1094 }
1095 
1096 
PopCurrentPosition()1097 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1098   Pop(current_input_offset());
1099 }
1100 
1101 
PopRegister(int register_index)1102 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1103   Pop(w10);
1104   StoreRegister(register_index, w10);
1105 }
1106 
1107 
PushBacktrack(Label * label)1108 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1109   if (label->is_bound()) {
1110     int target = label->pos();
1111     __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1112   } else {
1113     __ Adr(x10, label, MacroAssembler::kAdrFar);
1114     __ Sub(x10, x10, code_pointer());
1115     if (masm_->emit_debug_code()) {
1116       __ Cmp(x10, kWRegMask);
1117       // The code offset has to fit in a W register.
1118       __ Check(ls, kOffsetOutOfRange);
1119     }
1120   }
1121   Push(w10);
1122   CheckStackLimit();
1123 }
1124 
1125 
PushCurrentPosition()1126 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1127   Push(current_input_offset());
1128 }
1129 
1130 
PushRegister(int register_index,StackCheckFlag check_stack_limit)1131 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1132                                              StackCheckFlag check_stack_limit) {
1133   Register to_push = GetRegister(register_index, w10);
1134   Push(to_push);
1135   if (check_stack_limit) CheckStackLimit();
1136 }
1137 
1138 
ReadCurrentPositionFromRegister(int reg)1139 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1140   Register cached_register;
1141   RegisterState register_state = GetRegisterState(reg);
1142   switch (register_state) {
1143     case STACKED:
1144       __ Ldr(current_input_offset(), register_location(reg));
1145       break;
1146     case CACHED_LSW:
1147       cached_register = GetCachedRegister(reg);
1148       __ Mov(current_input_offset(), cached_register.W());
1149       break;
1150     case CACHED_MSW:
1151       cached_register = GetCachedRegister(reg);
1152       __ Lsr(current_input_offset().X(), cached_register, kWRegSizeInBits);
1153       break;
1154     default:
1155       UNREACHABLE();
1156       break;
1157   }
1158 }
1159 
1160 
ReadStackPointerFromRegister(int reg)1161 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1162   Register read_from = GetRegister(reg, w10);
1163   __ Ldr(x11, MemOperand(frame_pointer(), kStackBase));
1164   __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1165 }
1166 
1167 
SetCurrentPositionFromEnd(int by)1168 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1169   Label after_position;
1170   __ Cmp(current_input_offset(), -by * char_size());
1171   __ B(ge, &after_position);
1172   __ Mov(current_input_offset(), -by * char_size());
1173   // On RegExp code entry (where this operation is used), the character before
1174   // the current position is expected to be already loaded.
1175   // We have advanced the position, so it's safe to read backwards.
1176   LoadCurrentCharacterUnchecked(-1, 1);
1177   __ Bind(&after_position);
1178 }
1179 
1180 
SetRegister(int register_index,int to)1181 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1182   DCHECK(register_index >= num_saved_registers_);  // Reserved for positions!
1183   Register set_to = wzr;
1184   if (to != 0) {
1185     set_to = w10;
1186     __ Mov(set_to, to);
1187   }
1188   StoreRegister(register_index, set_to);
1189 }
1190 
1191 
Succeed()1192 bool RegExpMacroAssemblerARM64::Succeed() {
1193   __ B(&success_label_);
1194   return global();
1195 }
1196 
1197 
WriteCurrentPositionToRegister(int reg,int cp_offset)1198 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1199                                                                int cp_offset) {
1200   Register position = current_input_offset();
1201   if (cp_offset != 0) {
1202     position = w10;
1203     __ Add(position, current_input_offset(), cp_offset * char_size());
1204   }
1205   StoreRegister(reg, position);
1206 }
1207 
1208 
ClearRegisters(int reg_from,int reg_to)1209 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1210   DCHECK(reg_from <= reg_to);
1211   int num_registers = reg_to - reg_from + 1;
1212 
1213   // If the first capture register is cached in a hardware register but not
1214   // aligned on a 64-bit one, we need to clear the first one specifically.
1215   if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1216     StoreRegister(reg_from, non_position_value());
1217     num_registers--;
1218     reg_from++;
1219   }
1220 
1221   // Clear cached registers in pairs as far as possible.
1222   while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1223     DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1224     __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1225     reg_from += 2;
1226     num_registers -= 2;
1227   }
1228 
1229   if ((num_registers % 2) == 1) {
1230     StoreRegister(reg_from, non_position_value());
1231     num_registers--;
1232     reg_from++;
1233   }
1234 
1235   if (num_registers > 0) {
1236     // If there are some remaining registers, they are stored on the stack.
1237     DCHECK(reg_from >= kNumCachedRegisters);
1238 
1239     // Move down the indexes of the registers on stack to get the correct offset
1240     // in memory.
1241     reg_from -= kNumCachedRegisters;
1242     reg_to -= kNumCachedRegisters;
1243     // We should not unroll the loop for less than 2 registers.
1244     STATIC_ASSERT(kNumRegistersToUnroll > 2);
1245     // We position the base pointer to (reg_from + 1).
1246     int base_offset = kFirstRegisterOnStack -
1247         kWRegSize - (kWRegSize * reg_from);
1248     if (num_registers > kNumRegistersToUnroll) {
1249       Register base = x10;
1250       __ Add(base, frame_pointer(), base_offset);
1251 
1252       Label loop;
1253       __ Mov(x11, num_registers);
1254       __ Bind(&loop);
1255       __ Str(twice_non_position_value(),
1256              MemOperand(base, -kPointerSize, PostIndex));
1257       __ Sub(x11, x11, 2);
1258       __ Cbnz(x11, &loop);
1259     } else {
1260       for (int i = reg_from; i <= reg_to; i += 2) {
1261         __ Str(twice_non_position_value(),
1262                MemOperand(frame_pointer(), base_offset));
1263         base_offset -= kWRegSize * 2;
1264       }
1265     }
1266   }
1267 }
1268 
1269 
WriteStackPointerToRegister(int reg)1270 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1271   __ Ldr(x10, MemOperand(frame_pointer(), kStackBase));
1272   __ Sub(x10, backtrack_stackpointer(), x10);
1273   if (masm_->emit_debug_code()) {
1274     __ Cmp(x10, Operand(w10, SXTW));
1275     // The stack offset needs to fit in a W register.
1276     __ Check(eq, kOffsetOutOfRange);
1277   }
1278   StoreRegister(reg, w10);
1279 }
1280 
1281 
1282 // Helper function for reading a value out of a stack frame.
1283 template <typename T>
frame_entry(Address re_frame,int frame_offset)1284 static T& frame_entry(Address re_frame, int frame_offset) {
1285   return *reinterpret_cast<T*>(re_frame + frame_offset);
1286 }
1287 
1288 
CheckStackGuardState(Address * return_address,Code * re_code,Address re_frame,int start_offset,const byte ** input_start,const byte ** input_end)1289 int RegExpMacroAssemblerARM64::CheckStackGuardState(Address* return_address,
1290                                                   Code* re_code,
1291                                                   Address re_frame,
1292                                                   int start_offset,
1293                                                   const byte** input_start,
1294                                                   const byte** input_end) {
1295   Isolate* isolate = frame_entry<Isolate*>(re_frame, kIsolate);
1296   StackLimitCheck check(isolate);
1297   if (check.JsHasOverflowed()) {
1298     isolate->StackOverflow();
1299     return EXCEPTION;
1300   }
1301 
1302   // If not real stack overflow the stack guard was used to interrupt
1303   // execution for another purpose.
1304 
1305   // If this is a direct call from JavaScript retry the RegExp forcing the call
1306   // through the runtime system. Currently the direct call cannot handle a GC.
1307   if (frame_entry<int>(re_frame, kDirectCall) == 1) {
1308     return RETRY;
1309   }
1310 
1311   // Prepare for possible GC.
1312   HandleScope handles(isolate);
1313   Handle<Code> code_handle(re_code);
1314 
1315   Handle<String> subject(frame_entry<String*>(re_frame, kInput));
1316 
1317   // Current string.
1318   bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
1319 
1320   DCHECK(re_code->instruction_start() <= *return_address);
1321   DCHECK(*return_address <=
1322       re_code->instruction_start() + re_code->instruction_size());
1323 
1324   Object* result = isolate->stack_guard()->HandleInterrupts();
1325 
1326   if (*code_handle != re_code) {  // Return address no longer valid
1327     int delta = code_handle->address() - re_code->address();
1328     // Overwrite the return address on the stack.
1329     *return_address += delta;
1330   }
1331 
1332   if (result->IsException()) {
1333     return EXCEPTION;
1334   }
1335 
1336   Handle<String> subject_tmp = subject;
1337   int slice_offset = 0;
1338 
1339   // Extract the underlying string and the slice offset.
1340   if (StringShape(*subject_tmp).IsCons()) {
1341     subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
1342   } else if (StringShape(*subject_tmp).IsSliced()) {
1343     SlicedString* slice = SlicedString::cast(*subject_tmp);
1344     subject_tmp = Handle<String>(slice->parent());
1345     slice_offset = slice->offset();
1346   }
1347 
1348   // String might have changed.
1349   if (subject_tmp->IsOneByteRepresentation() != is_one_byte) {
1350     // If we changed between an Latin1 and an UC16 string, the specialized
1351     // code cannot be used, and we need to restart regexp matching from
1352     // scratch (including, potentially, compiling a new version of the code).
1353     return RETRY;
1354   }
1355 
1356   // Otherwise, the content of the string might have moved. It must still
1357   // be a sequential or external string with the same content.
1358   // Update the start and end pointers in the stack frame to the current
1359   // location (whether it has actually moved or not).
1360   DCHECK(StringShape(*subject_tmp).IsSequential() ||
1361          StringShape(*subject_tmp).IsExternal());
1362 
1363   // The original start address of the characters to match.
1364   const byte* start_address = *input_start;
1365 
1366   // Find the current start address of the same character at the current string
1367   // position.
1368   const byte* new_address = StringCharacterPosition(*subject_tmp,
1369       start_offset + slice_offset);
1370 
1371   if (start_address != new_address) {
1372     // If there is a difference, update the object pointer and start and end
1373     // addresses in the RegExp stack frame to match the new value.
1374     const byte* end_address = *input_end;
1375     int byte_length = static_cast<int>(end_address - start_address);
1376     frame_entry<const String*>(re_frame, kInput) = *subject;
1377     *input_start = new_address;
1378     *input_end = new_address + byte_length;
1379   } else if (frame_entry<const String*>(re_frame, kInput) != *subject) {
1380     // Subject string might have been a ConsString that underwent
1381     // short-circuiting during GC. That will not change start_address but
1382     // will change pointer inside the subject handle.
1383     frame_entry<const String*>(re_frame, kInput) = *subject;
1384   }
1385 
1386   return 0;
1387 }
1388 
1389 
CheckPosition(int cp_offset,Label * on_outside_input)1390 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1391                                               Label* on_outside_input) {
1392   CompareAndBranchOrBacktrack(current_input_offset(),
1393                               -cp_offset * char_size(),
1394                               ge,
1395                               on_outside_input);
1396 }
1397 
1398 
CanReadUnaligned()1399 bool RegExpMacroAssemblerARM64::CanReadUnaligned() {
1400   // TODO(pielan): See whether or not we should disable unaligned accesses.
1401   return !slow_safe();
1402 }
1403 
1404 
1405 // Private methods:
1406 
CallCheckStackGuardState(Register scratch)1407 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1408   // Allocate space on the stack to store the return address. The
1409   // CheckStackGuardState C++ function will override it if the code
1410   // moved. Allocate extra space for 2 arguments passed by pointers.
1411   // AAPCS64 requires the stack to be 16 byte aligned.
1412   int alignment = masm_->ActivationFrameAlignment();
1413   DCHECK_EQ(alignment % 16, 0);
1414   int align_mask = (alignment / kXRegSize) - 1;
1415   int xreg_to_claim = (3 + align_mask) & ~align_mask;
1416 
1417   DCHECK(csp.Is(__ StackPointer()));
1418   __ Claim(xreg_to_claim);
1419 
1420   // CheckStackGuardState needs the end and start addresses of the input string.
1421   __ Poke(input_end(), 2 * kPointerSize);
1422   __ Add(x5, csp, 2 * kPointerSize);
1423   __ Poke(input_start(), kPointerSize);
1424   __ Add(x4, csp, kPointerSize);
1425 
1426   __ Mov(w3, start_offset());
1427   // RegExp code frame pointer.
1428   __ Mov(x2, frame_pointer());
1429   // Code* of self.
1430   __ Mov(x1, Operand(masm_->CodeObject()));
1431 
1432   // We need to pass a pointer to the return address as first argument.
1433   // The DirectCEntry stub will place the return address on the stack before
1434   // calling so the stack pointer will point to it.
1435   __ Mov(x0, csp);
1436 
1437   ExternalReference check_stack_guard_state =
1438       ExternalReference::re_check_stack_guard_state(isolate());
1439   __ Mov(scratch, check_stack_guard_state);
1440   DirectCEntryStub stub(isolate());
1441   stub.GenerateCall(masm_, scratch);
1442 
1443   // The input string may have been moved in memory, we need to reload it.
1444   __ Peek(input_start(), kPointerSize);
1445   __ Peek(input_end(), 2 * kPointerSize);
1446 
1447   DCHECK(csp.Is(__ StackPointer()));
1448   __ Drop(xreg_to_claim);
1449 
1450   // Reload the Code pointer.
1451   __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1452 }
1453 
BranchOrBacktrack(Condition condition,Label * to)1454 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1455                                                   Label* to) {
1456   if (condition == al) {  // Unconditional.
1457     if (to == NULL) {
1458       Backtrack();
1459       return;
1460     }
1461     __ B(to);
1462     return;
1463   }
1464   if (to == NULL) {
1465     to = &backtrack_label_;
1466   }
1467   __ B(condition, to);
1468 }
1469 
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1470 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1471                                                             int immediate,
1472                                                             Condition condition,
1473                                                             Label* to) {
1474   if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1475     if (to == NULL) {
1476       to = &backtrack_label_;
1477     }
1478     if (condition == eq) {
1479       __ Cbz(reg, to);
1480     } else {
1481       __ Cbnz(reg, to);
1482     }
1483   } else {
1484     __ Cmp(reg, immediate);
1485     BranchOrBacktrack(condition, to);
1486   }
1487 }
1488 
1489 
CheckPreemption()1490 void RegExpMacroAssemblerARM64::CheckPreemption() {
1491   // Check for preemption.
1492   ExternalReference stack_limit =
1493       ExternalReference::address_of_stack_limit(isolate());
1494   __ Mov(x10, stack_limit);
1495   __ Ldr(x10, MemOperand(x10));
1496   DCHECK(csp.Is(__ StackPointer()));
1497   __ Cmp(csp, x10);
1498   CallIf(&check_preempt_label_, ls);
1499 }
1500 
1501 
CheckStackLimit()1502 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1503   ExternalReference stack_limit =
1504       ExternalReference::address_of_regexp_stack_limit(isolate());
1505   __ Mov(x10, stack_limit);
1506   __ Ldr(x10, MemOperand(x10));
1507   __ Cmp(backtrack_stackpointer(), x10);
1508   CallIf(&stack_overflow_label_, ls);
1509 }
1510 
1511 
Push(Register source)1512 void RegExpMacroAssemblerARM64::Push(Register source) {
1513   DCHECK(source.Is32Bits());
1514   DCHECK(!source.is(backtrack_stackpointer()));
1515   __ Str(source,
1516          MemOperand(backtrack_stackpointer(),
1517                     -static_cast<int>(kWRegSize),
1518                     PreIndex));
1519 }
1520 
1521 
Pop(Register target)1522 void RegExpMacroAssemblerARM64::Pop(Register target) {
1523   DCHECK(target.Is32Bits());
1524   DCHECK(!target.is(backtrack_stackpointer()));
1525   __ Ldr(target,
1526          MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1527 }
1528 
1529 
GetCachedRegister(int register_index)1530 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1531   DCHECK(register_index < kNumCachedRegisters);
1532   return Register::Create(register_index / 2, kXRegSizeInBits);
1533 }
1534 
1535 
GetRegister(int register_index,Register maybe_result)1536 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1537                                                 Register maybe_result) {
1538   DCHECK(maybe_result.Is32Bits());
1539   DCHECK(register_index >= 0);
1540   if (num_registers_ <= register_index) {
1541     num_registers_ = register_index + 1;
1542   }
1543   Register result;
1544   RegisterState register_state = GetRegisterState(register_index);
1545   switch (register_state) {
1546     case STACKED:
1547       __ Ldr(maybe_result, register_location(register_index));
1548       result = maybe_result;
1549       break;
1550     case CACHED_LSW:
1551       result = GetCachedRegister(register_index).W();
1552       break;
1553     case CACHED_MSW:
1554       __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1555              kWRegSizeInBits);
1556       result = maybe_result;
1557       break;
1558     default:
1559       UNREACHABLE();
1560       break;
1561   }
1562   DCHECK(result.Is32Bits());
1563   return result;
1564 }
1565 
1566 
StoreRegister(int register_index,Register source)1567 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1568                                               Register source) {
1569   DCHECK(source.Is32Bits());
1570   DCHECK(register_index >= 0);
1571   if (num_registers_ <= register_index) {
1572     num_registers_ = register_index + 1;
1573   }
1574 
1575   Register cached_register;
1576   RegisterState register_state = GetRegisterState(register_index);
1577   switch (register_state) {
1578     case STACKED:
1579       __ Str(source, register_location(register_index));
1580       break;
1581     case CACHED_LSW:
1582       cached_register = GetCachedRegister(register_index);
1583       if (!source.Is(cached_register.W())) {
1584         __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1585       }
1586       break;
1587     case CACHED_MSW:
1588       cached_register = GetCachedRegister(register_index);
1589       __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1590       break;
1591     default:
1592       UNREACHABLE();
1593       break;
1594   }
1595 }
1596 
1597 
CallIf(Label * to,Condition condition)1598 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1599   Label skip_call;
1600   if (condition != al) __ B(&skip_call, NegateCondition(condition));
1601   __ Bl(to);
1602   __ Bind(&skip_call);
1603 }
1604 
1605 
RestoreLinkRegister()1606 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1607   DCHECK(csp.Is(__ StackPointer()));
1608   __ Pop(lr, xzr);
1609   __ Add(lr, lr, Operand(masm_->CodeObject()));
1610 }
1611 
1612 
SaveLinkRegister()1613 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1614   DCHECK(csp.Is(__ StackPointer()));
1615   __ Sub(lr, lr, Operand(masm_->CodeObject()));
1616   __ Push(xzr, lr);
1617 }
1618 
1619 
register_location(int register_index)1620 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1621   DCHECK(register_index < (1<<30));
1622   DCHECK(register_index >= kNumCachedRegisters);
1623   if (num_registers_ <= register_index) {
1624     num_registers_ = register_index + 1;
1625   }
1626   register_index -= kNumCachedRegisters;
1627   int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1628   return MemOperand(frame_pointer(), offset);
1629 }
1630 
capture_location(int register_index,Register scratch)1631 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1632                                                      Register scratch) {
1633   DCHECK(register_index < (1<<30));
1634   DCHECK(register_index < num_saved_registers_);
1635   DCHECK(register_index >= kNumCachedRegisters);
1636   DCHECK_EQ(register_index % 2, 0);
1637   register_index -= kNumCachedRegisters;
1638   int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1639   // capture_location is used with Stp instructions to load/store 2 registers.
1640   // The immediate field in the encoding is limited to 7 bits (signed).
1641   if (is_int7(offset)) {
1642     return MemOperand(frame_pointer(), offset);
1643   } else {
1644     __ Add(scratch, frame_pointer(), offset);
1645     return MemOperand(scratch);
1646   }
1647 }
1648 
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1649 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1650                                                               int characters) {
1651   Register offset = current_input_offset();
1652 
1653   // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1654   // and the operating system running on the target allow it.
1655   // If unaligned load/stores are not supported then this function must only
1656   // be used to load a single character at a time.
1657 
1658   // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1659   // disable it.
1660   // TODO(pielan): See whether or not we should disable unaligned accesses.
1661   if (!CanReadUnaligned()) {
1662     DCHECK(characters == 1);
1663   }
1664 
1665   if (cp_offset != 0) {
1666     if (masm_->emit_debug_code()) {
1667       __ Mov(x10, cp_offset * char_size());
1668       __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1669       __ Cmp(x10, Operand(w10, SXTW));
1670       // The offset needs to fit in a W register.
1671       __ Check(eq, kOffsetOutOfRange);
1672     } else {
1673       __ Add(w10, current_input_offset(), cp_offset * char_size());
1674     }
1675     offset = w10;
1676   }
1677 
1678   if (mode_ == LATIN1) {
1679     if (characters == 4) {
1680       __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1681     } else if (characters == 2) {
1682       __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1683     } else {
1684       DCHECK(characters == 1);
1685       __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1686     }
1687   } else {
1688     DCHECK(mode_ == UC16);
1689     if (characters == 2) {
1690       __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1691     } else {
1692       DCHECK(characters == 1);
1693       __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1694     }
1695   }
1696 }
1697 
1698 #endif  // V8_INTERPRETED_REGEXP
1699 
1700 }}  // namespace v8::internal
1701 
1702 #endif  // V8_TARGET_ARCH_ARM64
1703