1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if V8_TARGET_ARCH_ARM64
6 
7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
8 
9 #include "src/arm64/macro-assembler-arm64-inl.h"
10 #include "src/code-stubs.h"
11 #include "src/log.h"
12 #include "src/macro-assembler.h"
13 #include "src/objects-inl.h"
14 #include "src/regexp/regexp-macro-assembler.h"
15 #include "src/regexp/regexp-stack.h"
16 #include "src/unicode.h"
17 
18 namespace v8 {
19 namespace internal {
20 
21 #ifndef V8_INTERPRETED_REGEXP
22 /*
23  * This assembler uses the following register assignment convention:
24  * - w19     : Used to temporarely store a value before a call to C code.
25  *             See CheckNotBackReferenceIgnoreCase.
26  * - x20     : Pointer to the current code object (Code*),
27  *             it includes the heap object tag.
28  * - w21     : Current position in input, as negative offset from
29  *             the end of the string. Please notice that this is
30  *             the byte offset, not the character offset!
31  * - w22     : Currently loaded character. Must be loaded using
32  *             LoadCurrentCharacter before using any of the dispatch methods.
33  * - x23     : Points to tip of backtrack stack.
34  * - w24     : Position of the first character minus one: non_position_value.
35  *             Used to initialize capture registers.
36  * - x25     : Address at the end of the input string: input_end.
37  *             Points to byte after last character in input.
38  * - x26     : Address at the start of the input string: input_start.
39  * - w27     : Where to start in the input string.
40  * - x28     : Output array pointer.
41  * - x29/fp  : Frame pointer. Used to access arguments, local variables and
42  *             RegExp registers.
43  * - x16/x17 : IP registers, used by assembler. Very volatile.
44  * - sp      : Points to tip of C stack.
45  *
46  * - x0-x7   : Used as a cache to store 32 bit capture registers. These
47  *             registers need to be retained every time a call to C code
48  *             is done.
49  *
50  * The remaining registers are free for computations.
51  * Each call to a public method should retain this convention.
52  *
53  * The stack will have the following structure:
54  *
55  *  Location    Name               Description
56  *              (as referred to in
57  *              the code)
58  *
59  *  - fp[96]   isolate            Address of the current isolate.
60  *  ^^^ sp when called ^^^
61  *  - fp[88]    lr                 Return from the RegExp code.
62  *  - fp[80]    r29                Old frame pointer (CalleeSaved).
63  *  - fp[0..72] r19-r28            Backup of CalleeSaved registers.
64  *  - fp[-8]    direct_call        1 => Direct call from JavaScript code.
65  *                                 0 => Call through the runtime system.
66  *  - fp[-16]   stack_base         High end of the memory area to use as
67  *                                 the backtracking stack.
68  *  - fp[-24]   output_size        Output may fit multiple sets of matches.
69  *  - fp[-32]   input              Handle containing the input string.
70  *  - fp[-40]   success_counter
71  *  ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
72  *  - fp[-44]   register N         Capture registers initialized with
73  *  - fp[-48]   register N + 1     non_position_value.
74  *              ...                The first kNumCachedRegisters (N) registers
75  *              ...                are cached in x0 to x7.
76  *              ...                Only positions must be stored in the first
77  *  -           ...                num_saved_registers_ registers.
78  *  -           ...
79  *  -           register N + num_registers - 1
80  *  ^^^^^^^^^ sp ^^^^^^^^^
81  *
82  * The first num_saved_registers_ registers are initialized to point to
83  * "character -1" in the string (i.e., char_size() bytes before the first
84  * character of the string). The remaining registers start out as garbage.
85  *
86  * The data up to the return address must be placed there by the calling
87  * code and the remaining arguments are passed in registers, e.g. by calling the
88  * code entry as cast to a function with the signature:
89  * int (*match)(String* input_string,
90  *              int start_index,
91  *              Address start,
92  *              Address end,
93  *              int* capture_output_array,
94  *              int num_capture_registers,
95  *              byte* stack_area_base,
96  *              bool direct_call = false,
97  *              Isolate* isolate);
98  * The call is performed by NativeRegExpMacroAssembler::Execute()
99  * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
100  */
101 
102 #define __ ACCESS_MASM(masm_)
103 
RegExpMacroAssemblerARM64(Isolate * isolate,Zone * zone,Mode mode,int registers_to_save)104 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
105                                                      Zone* zone, Mode mode,
106                                                      int registers_to_save)
107     : NativeRegExpMacroAssembler(isolate, zone),
108       masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize,
109                                CodeObjectRequired::kYes)),
110       mode_(mode),
111       num_registers_(registers_to_save),
112       num_saved_registers_(registers_to_save),
113       entry_label_(),
114       start_label_(),
115       success_label_(),
116       backtrack_label_(),
117       exit_label_() {
118   DCHECK_EQ(0, registers_to_save % 2);
119   // We can cache at most 16 W registers in x0-x7.
120   STATIC_ASSERT(kNumCachedRegisters <= 16);
121   STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
122   __ B(&entry_label_);   // We'll write the entry code later.
123   __ Bind(&start_label_);  // And then continue from here.
124 }
125 
126 
~RegExpMacroAssemblerARM64()127 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
128   delete masm_;
129   // Unuse labels in case we throw away the assembler without calling GetCode.
130   entry_label_.Unuse();
131   start_label_.Unuse();
132   success_label_.Unuse();
133   backtrack_label_.Unuse();
134   exit_label_.Unuse();
135   check_preempt_label_.Unuse();
136   stack_overflow_label_.Unuse();
137 }
138 
stack_limit_slack()139 int RegExpMacroAssemblerARM64::stack_limit_slack()  {
140   return RegExpStack::kStackLimitSlack;
141 }
142 
143 
AdvanceCurrentPosition(int by)144 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
145   if (by != 0) {
146     __ Add(current_input_offset(),
147            current_input_offset(), by * char_size());
148   }
149 }
150 
151 
AdvanceRegister(int reg,int by)152 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
153   DCHECK((reg >= 0) && (reg < num_registers_));
154   if (by != 0) {
155     RegisterState register_state = GetRegisterState(reg);
156     switch (register_state) {
157       case STACKED:
158         __ Ldr(w10, register_location(reg));
159         __ Add(w10, w10, by);
160         __ Str(w10, register_location(reg));
161         break;
162       case CACHED_LSW: {
163         Register to_advance = GetCachedRegister(reg);
164         __ Add(to_advance, to_advance, by);
165         break;
166       }
167       case CACHED_MSW: {
168         Register to_advance = GetCachedRegister(reg);
169         __ Add(to_advance, to_advance,
170                static_cast<int64_t>(by) << kWRegSizeInBits);
171         break;
172       }
173       default:
174         UNREACHABLE();
175         break;
176     }
177   }
178 }
179 
180 
Backtrack()181 void RegExpMacroAssemblerARM64::Backtrack() {
182   CheckPreemption();
183   Pop(w10);
184   __ Add(x10, code_pointer(), Operand(w10, UXTW));
185   __ Br(x10);
186 }
187 
188 
Bind(Label * label)189 void RegExpMacroAssemblerARM64::Bind(Label* label) {
190   __ Bind(label);
191 }
192 
193 
CheckCharacter(uint32_t c,Label * on_equal)194 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
195   CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
196 }
197 
198 
CheckCharacterGT(uc16 limit,Label * on_greater)199 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
200                                                  Label* on_greater) {
201   CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
202 }
203 
204 
CheckAtStart(Label * on_at_start)205 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) {
206   __ Add(w10, current_input_offset(), Operand(-char_size()));
207   __ Cmp(w10, string_start_minus_one());
208   BranchOrBacktrack(eq, on_at_start);
209 }
210 
211 
CheckNotAtStart(int cp_offset,Label * on_not_at_start)212 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
213                                                 Label* on_not_at_start) {
214   __ Add(w10, current_input_offset(),
215          Operand(-char_size() + cp_offset * char_size()));
216   __ Cmp(w10, string_start_minus_one());
217   BranchOrBacktrack(ne, on_not_at_start);
218 }
219 
220 
CheckCharacterLT(uc16 limit,Label * on_less)221 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) {
222   CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
223 }
224 
225 
CheckCharacters(Vector<const uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)226 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str,
227                                               int cp_offset,
228                                               Label* on_failure,
229                                               bool check_end_of_string) {
230   // This method is only ever called from the cctests.
231 
232   if (check_end_of_string) {
233     // Is last character of required match inside string.
234     CheckPosition(cp_offset + str.length() - 1, on_failure);
235   }
236 
237   Register characters_address = x11;
238 
239   __ Add(characters_address,
240          input_end(),
241          Operand(current_input_offset(), SXTW));
242   if (cp_offset != 0) {
243     __ Add(characters_address, characters_address, cp_offset * char_size());
244   }
245 
246   for (int i = 0; i < str.length(); i++) {
247     if (mode_ == LATIN1) {
248       __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
249       DCHECK_GE(String::kMaxOneByteCharCode, str[i]);
250     } else {
251       __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
252     }
253     CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
254   }
255 }
256 
257 
CheckGreedyLoop(Label * on_equal)258 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
259   __ Ldr(w10, MemOperand(backtrack_stackpointer()));
260   __ Cmp(current_input_offset(), w10);
261   __ Cset(x11, eq);
262   __ Add(backtrack_stackpointer(),
263          backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
264   BranchOrBacktrack(eq, on_equal);
265 }
266 
267 
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,bool unicode,Label * on_no_match)268 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
269     int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
270   Label fallthrough;
271 
272   Register capture_start_offset = w10;
273   // Save the capture length in a callee-saved register so it will
274   // be preserved if we call a C helper.
275   Register capture_length = w19;
276   DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
277 
278   // Find length of back-referenced capture.
279   DCHECK_EQ(0, start_reg % 2);
280   if (start_reg < kNumCachedRegisters) {
281     __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
282     __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
283   } else {
284     __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
285   }
286   __ Sub(capture_length, w11, capture_start_offset);  // Length to check.
287 
288   // At this point, the capture registers are either both set or both cleared.
289   // If the capture length is zero, then the capture is either empty or cleared.
290   // Fall through in both cases.
291   __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
292 
293   // Check that there are enough characters left in the input.
294   if (read_backward) {
295     __ Add(w12, string_start_minus_one(), capture_length);
296     __ Cmp(current_input_offset(), w12);
297     BranchOrBacktrack(le, on_no_match);
298   } else {
299     __ Cmn(capture_length, current_input_offset());
300     BranchOrBacktrack(gt, on_no_match);
301   }
302 
303   if (mode_ == LATIN1) {
304     Label success;
305     Label fail;
306     Label loop_check;
307 
308     Register capture_start_address = x12;
309     Register capture_end_addresss = x13;
310     Register current_position_address = x14;
311 
312     __ Add(capture_start_address,
313            input_end(),
314            Operand(capture_start_offset, SXTW));
315     __ Add(capture_end_addresss,
316            capture_start_address,
317            Operand(capture_length, SXTW));
318     __ Add(current_position_address,
319            input_end(),
320            Operand(current_input_offset(), SXTW));
321     if (read_backward) {
322       // Offset by length when matching backwards.
323       __ Sub(current_position_address, current_position_address,
324              Operand(capture_length, SXTW));
325     }
326 
327     Label loop;
328     __ Bind(&loop);
329     __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
330     __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
331     __ Cmp(w10, w11);
332     __ B(eq, &loop_check);
333 
334     // Mismatch, try case-insensitive match (converting letters to lower-case).
335     __ Orr(w10, w10, 0x20);  // Convert capture character to lower-case.
336     __ Orr(w11, w11, 0x20);  // Also convert input character.
337     __ Cmp(w11, w10);
338     __ B(ne, &fail);
339     __ Sub(w10, w10, 'a');
340     __ Cmp(w10, 'z' - 'a');  // Is w10 a lowercase letter?
341     __ B(ls, &loop_check);  // In range 'a'-'z'.
342     // Latin-1: Check for values in range [224,254] but not 247.
343     __ Sub(w10, w10, 224 - 'a');
344     __ Cmp(w10, 254 - 224);
345     __ Ccmp(w10, 247 - 224, ZFlag, ls);  // Check for 247.
346     __ B(eq, &fail);  // Weren't Latin-1 letters.
347 
348     __ Bind(&loop_check);
349     __ Cmp(capture_start_address, capture_end_addresss);
350     __ B(lt, &loop);
351     __ B(&success);
352 
353     __ Bind(&fail);
354     BranchOrBacktrack(al, on_no_match);
355 
356     __ Bind(&success);
357     // Compute new value of character position after the matched part.
358     __ Sub(current_input_offset().X(), current_position_address, input_end());
359     if (read_backward) {
360       __ Sub(current_input_offset().X(), current_input_offset().X(),
361              Operand(capture_length, SXTW));
362     }
363     if (masm_->emit_debug_code()) {
364       __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
365       __ Ccmp(current_input_offset(), 0, NoFlag, eq);
366       // The current input offset should be <= 0, and fit in a W register.
367       __ Check(le, AbortReason::kOffsetOutOfRange);
368     }
369   } else {
370     DCHECK(mode_ == UC16);
371     int argument_count = 4;
372 
373     // The cached registers need to be retained.
374     CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
375     DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
376     __ PushCPURegList(cached_registers);
377 
378     // Put arguments into arguments registers.
379     // Parameters are
380     //   x0: Address byte_offset1 - Address captured substring's start.
381     //   x1: Address byte_offset2 - Address of current character position.
382     //   w2: size_t byte_length - length of capture in bytes(!)
383     //   x3: Isolate* isolate or 0 if unicode flag
384 
385     // Address of start of capture.
386     __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
387     // Length of capture.
388     __ Mov(w2, capture_length);
389     // Address of current input position.
390     __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
391     if (read_backward) {
392       __ Sub(x1, x1, Operand(capture_length, SXTW));
393     }
394     // Isolate.
395 #ifdef V8_INTL_SUPPORT
396     if (unicode) {
397       __ Mov(x3, Operand(0));
398     } else  // NOLINT
399 #endif      // V8_INTL_SUPPORT
400     {
401       __ Mov(x3, ExternalReference::isolate_address(isolate()));
402     }
403 
404     {
405       AllowExternalCallThatCantCauseGC scope(masm_);
406       ExternalReference function =
407           ExternalReference::re_case_insensitive_compare_uc16(isolate());
408       __ CallCFunction(function, argument_count);
409     }
410 
411     // Check if function returned non-zero for success or zero for failure.
412     // x0 is one of the registers used as a cache so it must be tested before
413     // the cache is restored.
414     __ Cmp(x0, 0);
415     __ PopCPURegList(cached_registers);
416     BranchOrBacktrack(eq, on_no_match);
417 
418     // On success, advance position by length of capture.
419     if (read_backward) {
420       __ Sub(current_input_offset(), current_input_offset(), capture_length);
421     } else {
422       __ Add(current_input_offset(), current_input_offset(), capture_length);
423     }
424   }
425 
426   __ Bind(&fallthrough);
427 }
428 
CheckNotBackReference(int start_reg,bool read_backward,Label * on_no_match)429 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
430                                                       bool read_backward,
431                                                       Label* on_no_match) {
432   Label fallthrough;
433 
434   Register capture_start_address = x12;
435   Register capture_end_address = x13;
436   Register current_position_address = x14;
437   Register capture_length = w15;
438 
439   // Find length of back-referenced capture.
440   DCHECK_EQ(0, start_reg % 2);
441   if (start_reg < kNumCachedRegisters) {
442     __ Mov(x10, GetCachedRegister(start_reg));
443     __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
444   } else {
445     __ Ldp(w11, w10, capture_location(start_reg, x10));
446   }
447   __ Sub(capture_length, w11, w10);  // Length to check.
448 
449   // At this point, the capture registers are either both set or both cleared.
450   // If the capture length is zero, then the capture is either empty or cleared.
451   // Fall through in both cases.
452   __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
453 
454   // Check that there are enough characters left in the input.
455   if (read_backward) {
456     __ Add(w12, string_start_minus_one(), capture_length);
457     __ Cmp(current_input_offset(), w12);
458     BranchOrBacktrack(le, on_no_match);
459   } else {
460     __ Cmn(capture_length, current_input_offset());
461     BranchOrBacktrack(gt, on_no_match);
462   }
463 
464   // Compute pointers to match string and capture string
465   __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
466   __ Add(capture_end_address,
467          capture_start_address,
468          Operand(capture_length, SXTW));
469   __ Add(current_position_address,
470          input_end(),
471          Operand(current_input_offset(), SXTW));
472   if (read_backward) {
473     // Offset by length when matching backwards.
474     __ Sub(current_position_address, current_position_address,
475            Operand(capture_length, SXTW));
476   }
477 
478   Label loop;
479   __ Bind(&loop);
480   if (mode_ == LATIN1) {
481     __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
482     __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
483   } else {
484     DCHECK(mode_ == UC16);
485     __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
486     __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
487   }
488   __ Cmp(w10, w11);
489   BranchOrBacktrack(ne, on_no_match);
490   __ Cmp(capture_start_address, capture_end_address);
491   __ B(lt, &loop);
492 
493   // Move current character position to position after match.
494   __ Sub(current_input_offset().X(), current_position_address, input_end());
495   if (read_backward) {
496     __ Sub(current_input_offset().X(), current_input_offset().X(),
497            Operand(capture_length, SXTW));
498   }
499 
500   if (masm_->emit_debug_code()) {
501     __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
502     __ Ccmp(current_input_offset(), 0, NoFlag, eq);
503     // The current input offset should be <= 0, and fit in a W register.
504     __ Check(le, AbortReason::kOffsetOutOfRange);
505   }
506   __ Bind(&fallthrough);
507 }
508 
509 
CheckNotCharacter(unsigned c,Label * on_not_equal)510 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
511                                                   Label* on_not_equal) {
512   CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
513 }
514 
515 
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)516 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
517                                                        uint32_t mask,
518                                                        Label* on_equal) {
519   __ And(w10, current_character(), mask);
520   CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
521 }
522 
523 
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)524 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
525                                                           unsigned mask,
526                                                           Label* on_not_equal) {
527   __ And(w10, current_character(), mask);
528   CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
529 }
530 
531 
CheckNotCharacterAfterMinusAnd(uc16 c,uc16 minus,uc16 mask,Label * on_not_equal)532 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
533     uc16 c,
534     uc16 minus,
535     uc16 mask,
536     Label* on_not_equal) {
537   DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
538   __ Sub(w10, current_character(), minus);
539   __ And(w10, w10, mask);
540   CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
541 }
542 
543 
CheckCharacterInRange(uc16 from,uc16 to,Label * on_in_range)544 void RegExpMacroAssemblerARM64::CheckCharacterInRange(
545     uc16 from,
546     uc16 to,
547     Label* on_in_range) {
548   __ Sub(w10, current_character(), from);
549   // Unsigned lower-or-same condition.
550   CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
551 }
552 
553 
CheckCharacterNotInRange(uc16 from,uc16 to,Label * on_not_in_range)554 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
555     uc16 from,
556     uc16 to,
557     Label* on_not_in_range) {
558   __ Sub(w10, current_character(), from);
559   // Unsigned higher condition.
560   CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
561 }
562 
563 
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)564 void RegExpMacroAssemblerARM64::CheckBitInTable(
565     Handle<ByteArray> table,
566     Label* on_bit_set) {
567   __ Mov(x11, Operand(table));
568   if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
569     __ And(w10, current_character(), kTableMask);
570     __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
571   } else {
572     __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
573   }
574   __ Ldrb(w11, MemOperand(x11, w10, UXTW));
575   CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
576 }
577 
578 
CheckSpecialCharacterClass(uc16 type,Label * on_no_match)579 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
580                                                            Label* on_no_match) {
581   // Range checks (c in min..max) are generally implemented by an unsigned
582   // (c - min) <= (max - min) check
583   switch (type) {
584   case 's':
585     // Match space-characters
586     if (mode_ == LATIN1) {
587       // One byte space characters are '\t'..'\r', ' ' and \u00a0.
588       Label success;
589       // Check for ' ' or 0x00A0.
590       __ Cmp(current_character(), ' ');
591       __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
592       __ B(eq, &success);
593       // Check range 0x09..0x0D.
594       __ Sub(w10, current_character(), '\t');
595       CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
596       __ Bind(&success);
597       return true;
598     }
599     return false;
600   case 'S':
601     // The emitted code for generic character classes is good enough.
602     return false;
603   case 'd':
604     // Match ASCII digits ('0'..'9').
605     __ Sub(w10, current_character(), '0');
606     CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
607     return true;
608   case 'D':
609     // Match ASCII non-digits.
610     __ Sub(w10, current_character(), '0');
611     CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
612     return true;
613   case '.': {
614     // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
615     // Here we emit the conditional branch only once at the end to make branch
616     // prediction more efficient, even though we could branch out of here
617     // as soon as a character matches.
618     __ Cmp(current_character(), 0x0A);
619     __ Ccmp(current_character(), 0x0D, ZFlag, ne);
620     if (mode_ == UC16) {
621       __ Sub(w10, current_character(), 0x2028);
622       // If the Z flag was set we clear the flags to force a branch.
623       __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
624       // ls -> !((C==1) && (Z==0))
625       BranchOrBacktrack(ls, on_no_match);
626     } else {
627       BranchOrBacktrack(eq, on_no_match);
628     }
629     return true;
630   }
631   case 'n': {
632     // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
633     // We have to check all 4 newline characters before emitting
634     // the conditional branch.
635     __ Cmp(current_character(), 0x0A);
636     __ Ccmp(current_character(), 0x0D, ZFlag, ne);
637     if (mode_ == UC16) {
638       __ Sub(w10, current_character(), 0x2028);
639       // If the Z flag was set we clear the flags to force a fall-through.
640       __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
641       // hi -> (C==1) && (Z==0)
642       BranchOrBacktrack(hi, on_no_match);
643     } else {
644       BranchOrBacktrack(ne, on_no_match);
645     }
646     return true;
647   }
648   case 'w': {
649     if (mode_ != LATIN1) {
650       // Table is 256 entries, so all Latin1 characters can be tested.
651       CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
652     }
653     ExternalReference map = ExternalReference::re_word_character_map(isolate());
654     __ Mov(x10, map);
655     __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
656     CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
657     return true;
658   }
659   case 'W': {
660     Label done;
661     if (mode_ != LATIN1) {
662       // Table is 256 entries, so all Latin1 characters can be tested.
663       __ Cmp(current_character(), 'z');
664       __ B(hi, &done);
665     }
666     ExternalReference map = ExternalReference::re_word_character_map(isolate());
667     __ Mov(x10, map);
668     __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
669     CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
670     __ Bind(&done);
671     return true;
672   }
673   case '*':
674     // Match any character.
675     return true;
676   // No custom implementation (yet): s(UC16), S(UC16).
677   default:
678     return false;
679   }
680 }
681 
682 
Fail()683 void RegExpMacroAssemblerARM64::Fail() {
684   __ Mov(w0, FAILURE);
685   __ B(&exit_label_);
686 }
687 
688 
GetCode(Handle<String> source)689 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
690   Label return_w0;
691   // Finalize code - write the entry point code now we know how many
692   // registers we need.
693 
694   // Entry code:
695   __ Bind(&entry_label_);
696 
697   // Arguments on entry:
698   // x0:  String*  input
699   // x1:  int      start_offset
700   // x2:  byte*    input_start
701   // x3:  byte*    input_end
702   // x4:  int*     output array
703   // x5:  int      output array size
704   // x6:  Address  stack_base
705   // x7:  int      direct_call
706 
707   //  sp[8]:  address of the current isolate
708   //  sp[0]:  secondary link/return address used by native call
709 
710   // Tell the system that we have a stack frame.  Because the type is MANUAL, no
711   // code is generated.
712   FrameScope scope(masm_, StackFrame::MANUAL);
713 
714   // Push registers on the stack, only push the argument registers that we need.
715   CPURegList argument_registers(x0, x5, x6, x7);
716 
717   CPURegList registers_to_retain = kCalleeSaved;
718   DCHECK_EQ(11, kCalleeSaved.Count());
719   registers_to_retain.Combine(lr);
720 
721   __ PushCPURegList(registers_to_retain);
722   __ PushCPURegList(argument_registers);
723 
724   // Set frame pointer in place.
725   __ Add(frame_pointer(), sp, argument_registers.Count() * kPointerSize);
726 
727   // Initialize callee-saved registers.
728   __ Mov(start_offset(), w1);
729   __ Mov(input_start(), x2);
730   __ Mov(input_end(), x3);
731   __ Mov(output_array(), x4);
732 
733   // Set the number of registers we will need to allocate, that is:
734   //   - success_counter (X register)
735   //   - (num_registers_ - kNumCachedRegisters) (W registers)
736   int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters;
737   // Do not allocate registers on the stack if they can all be cached.
738   if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; }
739   // Make room for the success_counter.
740   num_wreg_to_allocate += 2;
741 
742   // Make sure the stack alignment will be respected.
743   int alignment = masm_->ActivationFrameAlignment();
744   DCHECK_EQ(alignment % 16, 0);
745   int align_mask = (alignment / kWRegSize) - 1;
746   num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask;
747 
748   // Check if we have space on the stack.
749   Label stack_limit_hit;
750   Label stack_ok;
751 
752   ExternalReference stack_limit =
753       ExternalReference::address_of_stack_limit(isolate());
754   __ Mov(x10, stack_limit);
755   __ Ldr(x10, MemOperand(x10));
756   __ Subs(x10, sp, x10);
757 
758   // Handle it if the stack pointer is already below the stack limit.
759   __ B(ls, &stack_limit_hit);
760 
761   // Check if there is room for the variable number of registers above
762   // the stack limit.
763   __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
764   __ B(hs, &stack_ok);
765 
766   // Exit with OutOfMemory exception. There is not enough space on the stack
767   // for our working registers.
768   __ Mov(w0, EXCEPTION);
769   __ B(&return_w0);
770 
771   __ Bind(&stack_limit_hit);
772   CallCheckStackGuardState(x10);
773   // If returned value is non-zero, we exit with the returned value as result.
774   __ Cbnz(w0, &return_w0);
775 
776   __ Bind(&stack_ok);
777 
778   // Allocate space on stack.
779   __ Claim(num_wreg_to_allocate, kWRegSize);
780 
781   // Initialize success_counter with 0.
782   __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
783 
784   // Find negative length (offset of start relative to end).
785   __ Sub(x10, input_start(), input_end());
786   if (masm_->emit_debug_code()) {
787     // Check that the size of the input string chars is in range.
788     __ Neg(x11, x10);
789     __ Cmp(x11, SeqTwoByteString::kMaxCharsSize);
790     __ Check(ls, AbortReason::kInputStringTooLong);
791   }
792   __ Mov(current_input_offset(), w10);
793 
794   // The non-position value is used as a clearing value for the
795   // capture registers, it corresponds to the position of the first character
796   // minus one.
797   __ Sub(string_start_minus_one(), current_input_offset(), char_size());
798   __ Sub(string_start_minus_one(), string_start_minus_one(),
799          Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
800   // We can store this value twice in an X register for initializing
801   // on-stack registers later.
802   __ Orr(twice_non_position_value(), string_start_minus_one().X(),
803          Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
804 
805   // Initialize code pointer register.
806   __ Mov(code_pointer(), Operand(masm_->CodeObject()));
807 
808   Label load_char_start_regexp, start_regexp;
809   // Load newline if index is at start, previous character otherwise.
810   __ Cbnz(start_offset(), &load_char_start_regexp);
811   __ Mov(current_character(), '\n');
812   __ B(&start_regexp);
813 
814   // Global regexp restarts matching here.
815   __ Bind(&load_char_start_regexp);
816   // Load previous char as initial value of current character register.
817   LoadCurrentCharacterUnchecked(-1, 1);
818   __ Bind(&start_regexp);
819   // Initialize on-stack registers.
820   if (num_saved_registers_ > 0) {
821     ClearRegisters(0, num_saved_registers_ - 1);
822   }
823 
824   // Initialize backtrack stack pointer.
825   __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase));
826 
827   // Execute
828   __ B(&start_label_);
829 
830   if (backtrack_label_.is_linked()) {
831     __ Bind(&backtrack_label_);
832     Backtrack();
833   }
834 
835   if (success_label_.is_linked()) {
836     Register first_capture_start = w15;
837 
838     // Save captures when successful.
839     __ Bind(&success_label_);
840 
841     if (num_saved_registers_ > 0) {
842       // V8 expects the output to be an int32_t array.
843       Register capture_start = w12;
844       Register capture_end = w13;
845       Register input_length = w14;
846 
847       // Copy captures to output.
848 
849       // Get string length.
850       __ Sub(x10, input_end(), input_start());
851       if (masm_->emit_debug_code()) {
852         // Check that the size of the input string chars is in range.
853         __ Cmp(x10, SeqTwoByteString::kMaxCharsSize);
854         __ Check(ls, AbortReason::kInputStringTooLong);
855       }
856       // input_start has a start_offset offset on entry. We need to include
857       // it when computing the length of the whole string.
858       if (mode_ == UC16) {
859         __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
860       } else {
861         __ Add(input_length, start_offset(), w10);
862       }
863 
864       // Copy the results to the output array from the cached registers first.
865       for (int i = 0;
866            (i < num_saved_registers_) && (i < kNumCachedRegisters);
867            i += 2) {
868         __ Mov(capture_start.X(), GetCachedRegister(i));
869         __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
870         if ((i == 0) && global_with_zero_length_check()) {
871           // Keep capture start for the zero-length check later.
872           __ Mov(first_capture_start, capture_start);
873         }
874         // Offsets need to be relative to the start of the string.
875         if (mode_ == UC16) {
876           __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
877           __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
878         } else {
879           __ Add(capture_start, input_length, capture_start);
880           __ Add(capture_end, input_length, capture_end);
881         }
882         // The output pointer advances for a possible global match.
883         __ Stp(capture_start,
884                capture_end,
885                MemOperand(output_array(), kPointerSize, PostIndex));
886       }
887 
888       // Only carry on if there are more than kNumCachedRegisters capture
889       // registers.
890       int num_registers_left_on_stack =
891           num_saved_registers_ - kNumCachedRegisters;
892       if (num_registers_left_on_stack > 0) {
893         Register base = x10;
894         // There are always an even number of capture registers. A couple of
895         // registers determine one match with two offsets.
896         DCHECK_EQ(0, num_registers_left_on_stack % 2);
897         __ Add(base, frame_pointer(), kFirstCaptureOnStack);
898 
899         // We can unroll the loop here, we should not unroll for less than 2
900         // registers.
901         STATIC_ASSERT(kNumRegistersToUnroll > 2);
902         if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
903           for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
904             __ Ldp(capture_end,
905                    capture_start,
906                    MemOperand(base, -kPointerSize, PostIndex));
907             if ((i == 0) && global_with_zero_length_check()) {
908               // Keep capture start for the zero-length check later.
909               __ Mov(first_capture_start, capture_start);
910             }
911             // Offsets need to be relative to the start of the string.
912             if (mode_ == UC16) {
913               __ Add(capture_start,
914                      input_length,
915                      Operand(capture_start, ASR, 1));
916               __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
917             } else {
918               __ Add(capture_start, input_length, capture_start);
919               __ Add(capture_end, input_length, capture_end);
920             }
921             // The output pointer advances for a possible global match.
922             __ Stp(capture_start,
923                    capture_end,
924                    MemOperand(output_array(), kPointerSize, PostIndex));
925           }
926         } else {
927           Label loop, start;
928           __ Mov(x11, num_registers_left_on_stack);
929 
930           __ Ldp(capture_end,
931                  capture_start,
932                  MemOperand(base, -kPointerSize, PostIndex));
933           if (global_with_zero_length_check()) {
934             __ Mov(first_capture_start, capture_start);
935           }
936           __ B(&start);
937 
938           __ Bind(&loop);
939           __ Ldp(capture_end,
940                  capture_start,
941                  MemOperand(base, -kPointerSize, PostIndex));
942           __ Bind(&start);
943           if (mode_ == UC16) {
944             __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
945             __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
946           } else {
947             __ Add(capture_start, input_length, capture_start);
948             __ Add(capture_end, input_length, capture_end);
949           }
950           // The output pointer advances for a possible global match.
951           __ Stp(capture_start,
952                  capture_end,
953                  MemOperand(output_array(), kPointerSize, PostIndex));
954           __ Sub(x11, x11, 2);
955           __ Cbnz(x11, &loop);
956         }
957       }
958     }
959 
960     if (global()) {
961       Register success_counter = w0;
962       Register output_size = x10;
963       // Restart matching if the regular expression is flagged as global.
964 
965       // Increment success counter.
966       __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
967       __ Add(success_counter, success_counter, 1);
968       __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
969 
970       // Capture results have been stored, so the number of remaining global
971       // output registers is reduced by the number of stored captures.
972       __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
973       __ Sub(output_size, output_size, num_saved_registers_);
974       // Check whether we have enough room for another set of capture results.
975       __ Cmp(output_size, num_saved_registers_);
976       __ B(lt, &return_w0);
977 
978       // The output pointer is already set to the next field in the output
979       // array.
980       // Update output size on the frame before we restart matching.
981       __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
982 
983       if (global_with_zero_length_check()) {
984         // Special case for zero-length matches.
985         __ Cmp(current_input_offset(), first_capture_start);
986         // Not a zero-length match, restart.
987         __ B(ne, &load_char_start_regexp);
988         // Offset from the end is zero if we already reached the end.
989         __ Cbz(current_input_offset(), &return_w0);
990         // Advance current position after a zero-length match.
991         Label advance;
992         __ bind(&advance);
993         __ Add(current_input_offset(),
994                current_input_offset(),
995                Operand((mode_ == UC16) ? 2 : 1));
996         if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
997       }
998 
999       __ B(&load_char_start_regexp);
1000     } else {
1001       __ Mov(w0, SUCCESS);
1002     }
1003   }
1004 
1005   if (exit_label_.is_linked()) {
1006     // Exit and return w0
1007     __ Bind(&exit_label_);
1008     if (global()) {
1009       __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
1010     }
1011   }
1012 
1013   __ Bind(&return_w0);
1014 
1015   // Set stack pointer back to first register to retain
1016   __ Mov(sp, fp);
1017 
1018   // Restore registers.
1019   __ PopCPURegList(registers_to_retain);
1020 
1021   __ Ret();
1022 
1023   Label exit_with_exception;
1024   // Registers x0 to x7 are used to store the first captures, they need to be
1025   // retained over calls to C++ code.
1026   CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
1027   DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
1028 
1029   if (check_preempt_label_.is_linked()) {
1030     __ Bind(&check_preempt_label_);
1031     SaveLinkRegister();
1032     // The cached registers need to be retained.
1033     __ PushCPURegList(cached_registers);
1034     CallCheckStackGuardState(x10);
1035     // Returning from the regexp code restores the stack (sp <- fp)
1036     // so we don't need to drop the link register from it before exiting.
1037     __ Cbnz(w0, &return_w0);
1038     // Reset the cached registers.
1039     __ PopCPURegList(cached_registers);
1040     RestoreLinkRegister();
1041     __ Ret();
1042   }
1043 
1044   if (stack_overflow_label_.is_linked()) {
1045     __ Bind(&stack_overflow_label_);
1046     SaveLinkRegister();
1047     // The cached registers need to be retained.
1048     __ PushCPURegList(cached_registers);
1049     // Call GrowStack(backtrack_stackpointer(), &stack_base)
1050     __ Mov(x2, ExternalReference::isolate_address(isolate()));
1051     __ Add(x1, frame_pointer(), kStackBase);
1052     __ Mov(x0, backtrack_stackpointer());
1053     ExternalReference grow_stack =
1054         ExternalReference::re_grow_stack(isolate());
1055     __ CallCFunction(grow_stack, 3);
1056     // If return nullptr, we have failed to grow the stack, and
1057     // must exit with a stack-overflow exception.
1058     // Returning from the regexp code restores the stack (sp <- fp)
1059     // so we don't need to drop the link register from it before exiting.
1060     __ Cbz(w0, &exit_with_exception);
1061     // Otherwise use return value as new stack pointer.
1062     __ Mov(backtrack_stackpointer(), x0);
1063     // Reset the cached registers.
1064     __ PopCPURegList(cached_registers);
1065     RestoreLinkRegister();
1066     __ Ret();
1067   }
1068 
1069   if (exit_with_exception.is_linked()) {
1070     __ Bind(&exit_with_exception);
1071     __ Mov(w0, EXCEPTION);
1072     __ B(&return_w0);
1073   }
1074 
1075   CodeDesc code_desc;
1076   masm_->GetCode(isolate(), &code_desc);
1077   Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP,
1078                                                     masm_->CodeObject());
1079   PROFILE(masm_->isolate(),
1080           RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
1081   return Handle<HeapObject>::cast(code);
1082 }
1083 
1084 
GoTo(Label * to)1085 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1086   BranchOrBacktrack(al, to);
1087 }
1088 
IfRegisterGE(int reg,int comparand,Label * if_ge)1089 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1090                                              Label* if_ge) {
1091   Register to_compare = GetRegister(reg, w10);
1092   CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1093 }
1094 
1095 
IfRegisterLT(int reg,int comparand,Label * if_lt)1096 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1097                                              Label* if_lt) {
1098   Register to_compare = GetRegister(reg, w10);
1099   CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1100 }
1101 
1102 
IfRegisterEqPos(int reg,Label * if_eq)1103 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1104   Register to_compare = GetRegister(reg, w10);
1105   __ Cmp(to_compare, current_input_offset());
1106   BranchOrBacktrack(eq, if_eq);
1107 }
1108 
1109 RegExpMacroAssembler::IrregexpImplementation
Implementation()1110     RegExpMacroAssemblerARM64::Implementation() {
1111   return kARM64Implementation;
1112 }
1113 
1114 
LoadCurrentCharacter(int cp_offset,Label * on_end_of_input,bool check_bounds,int characters)1115 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
1116                                                      Label* on_end_of_input,
1117                                                      bool check_bounds,
1118                                                      int characters) {
1119   // TODO(pielan): Make sure long strings are caught before this, and not
1120   // just asserted in debug mode.
1121   // Be sane! (And ensure that an int32_t can be used to index the string)
1122   DCHECK(cp_offset < (1<<30));
1123   if (check_bounds) {
1124     if (cp_offset >= 0) {
1125       CheckPosition(cp_offset + characters - 1, on_end_of_input);
1126     } else {
1127       CheckPosition(cp_offset, on_end_of_input);
1128     }
1129   }
1130   LoadCurrentCharacterUnchecked(cp_offset, characters);
1131 }
1132 
1133 
PopCurrentPosition()1134 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1135   Pop(current_input_offset());
1136 }
1137 
1138 
PopRegister(int register_index)1139 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1140   Pop(w10);
1141   StoreRegister(register_index, w10);
1142 }
1143 
1144 
PushBacktrack(Label * label)1145 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1146   if (label->is_bound()) {
1147     int target = label->pos();
1148     __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1149   } else {
1150     __ Adr(x10, label, MacroAssembler::kAdrFar);
1151     __ Sub(x10, x10, code_pointer());
1152     if (masm_->emit_debug_code()) {
1153       __ Cmp(x10, kWRegMask);
1154       // The code offset has to fit in a W register.
1155       __ Check(ls, AbortReason::kOffsetOutOfRange);
1156     }
1157   }
1158   Push(w10);
1159   CheckStackLimit();
1160 }
1161 
1162 
PushCurrentPosition()1163 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1164   Push(current_input_offset());
1165 }
1166 
1167 
PushRegister(int register_index,StackCheckFlag check_stack_limit)1168 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1169                                              StackCheckFlag check_stack_limit) {
1170   Register to_push = GetRegister(register_index, w10);
1171   Push(to_push);
1172   if (check_stack_limit) CheckStackLimit();
1173 }
1174 
1175 
ReadCurrentPositionFromRegister(int reg)1176 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1177   RegisterState register_state = GetRegisterState(reg);
1178   switch (register_state) {
1179     case STACKED:
1180       __ Ldr(current_input_offset(), register_location(reg));
1181       break;
1182     case CACHED_LSW:
1183       __ Mov(current_input_offset(), GetCachedRegister(reg).W());
1184       break;
1185     case CACHED_MSW:
1186       __ Lsr(current_input_offset().X(), GetCachedRegister(reg),
1187              kWRegSizeInBits);
1188       break;
1189     default:
1190       UNREACHABLE();
1191       break;
1192   }
1193 }
1194 
1195 
ReadStackPointerFromRegister(int reg)1196 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1197   Register read_from = GetRegister(reg, w10);
1198   __ Ldr(x11, MemOperand(frame_pointer(), kStackBase));
1199   __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1200 }
1201 
1202 
SetCurrentPositionFromEnd(int by)1203 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1204   Label after_position;
1205   __ Cmp(current_input_offset(), -by * char_size());
1206   __ B(ge, &after_position);
1207   __ Mov(current_input_offset(), -by * char_size());
1208   // On RegExp code entry (where this operation is used), the character before
1209   // the current position is expected to be already loaded.
1210   // We have advanced the position, so it's safe to read backwards.
1211   LoadCurrentCharacterUnchecked(-1, 1);
1212   __ Bind(&after_position);
1213 }
1214 
1215 
SetRegister(int register_index,int to)1216 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1217   DCHECK(register_index >= num_saved_registers_);  // Reserved for positions!
1218   Register set_to = wzr;
1219   if (to != 0) {
1220     set_to = w10;
1221     __ Mov(set_to, to);
1222   }
1223   StoreRegister(register_index, set_to);
1224 }
1225 
1226 
Succeed()1227 bool RegExpMacroAssemblerARM64::Succeed() {
1228   __ B(&success_label_);
1229   return global();
1230 }
1231 
1232 
WriteCurrentPositionToRegister(int reg,int cp_offset)1233 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1234                                                                int cp_offset) {
1235   Register position = current_input_offset();
1236   if (cp_offset != 0) {
1237     position = w10;
1238     __ Add(position, current_input_offset(), cp_offset * char_size());
1239   }
1240   StoreRegister(reg, position);
1241 }
1242 
1243 
ClearRegisters(int reg_from,int reg_to)1244 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1245   DCHECK(reg_from <= reg_to);
1246   int num_registers = reg_to - reg_from + 1;
1247 
1248   // If the first capture register is cached in a hardware register but not
1249   // aligned on a 64-bit one, we need to clear the first one specifically.
1250   if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1251     StoreRegister(reg_from, string_start_minus_one());
1252     num_registers--;
1253     reg_from++;
1254   }
1255 
1256   // Clear cached registers in pairs as far as possible.
1257   while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1258     DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1259     __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1260     reg_from += 2;
1261     num_registers -= 2;
1262   }
1263 
1264   if ((num_registers % 2) == 1) {
1265     StoreRegister(reg_from, string_start_minus_one());
1266     num_registers--;
1267     reg_from++;
1268   }
1269 
1270   if (num_registers > 0) {
1271     // If there are some remaining registers, they are stored on the stack.
1272     DCHECK_LE(kNumCachedRegisters, reg_from);
1273 
1274     // Move down the indexes of the registers on stack to get the correct offset
1275     // in memory.
1276     reg_from -= kNumCachedRegisters;
1277     reg_to -= kNumCachedRegisters;
1278     // We should not unroll the loop for less than 2 registers.
1279     STATIC_ASSERT(kNumRegistersToUnroll > 2);
1280     // We position the base pointer to (reg_from + 1).
1281     int base_offset = kFirstRegisterOnStack -
1282         kWRegSize - (kWRegSize * reg_from);
1283     if (num_registers > kNumRegistersToUnroll) {
1284       Register base = x10;
1285       __ Add(base, frame_pointer(), base_offset);
1286 
1287       Label loop;
1288       __ Mov(x11, num_registers);
1289       __ Bind(&loop);
1290       __ Str(twice_non_position_value(),
1291              MemOperand(base, -kPointerSize, PostIndex));
1292       __ Sub(x11, x11, 2);
1293       __ Cbnz(x11, &loop);
1294     } else {
1295       for (int i = reg_from; i <= reg_to; i += 2) {
1296         __ Str(twice_non_position_value(),
1297                MemOperand(frame_pointer(), base_offset));
1298         base_offset -= kWRegSize * 2;
1299       }
1300     }
1301   }
1302 }
1303 
1304 
WriteStackPointerToRegister(int reg)1305 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1306   __ Ldr(x10, MemOperand(frame_pointer(), kStackBase));
1307   __ Sub(x10, backtrack_stackpointer(), x10);
1308   if (masm_->emit_debug_code()) {
1309     __ Cmp(x10, Operand(w10, SXTW));
1310     // The stack offset needs to fit in a W register.
1311     __ Check(eq, AbortReason::kOffsetOutOfRange);
1312   }
1313   StoreRegister(reg, w10);
1314 }
1315 
1316 
1317 // Helper function for reading a value out of a stack frame.
1318 template <typename T>
frame_entry(Address re_frame,int frame_offset)1319 static T& frame_entry(Address re_frame, int frame_offset) {
1320   return *reinterpret_cast<T*>(re_frame + frame_offset);
1321 }
1322 
1323 
1324 template <typename T>
frame_entry_address(Address re_frame,int frame_offset)1325 static T* frame_entry_address(Address re_frame, int frame_offset) {
1326   return reinterpret_cast<T*>(re_frame + frame_offset);
1327 }
1328 
1329 
CheckStackGuardState(Address * return_address,Code * re_code,Address re_frame,int start_index,const byte ** input_start,const byte ** input_end)1330 int RegExpMacroAssemblerARM64::CheckStackGuardState(
1331     Address* return_address, Code* re_code, Address re_frame, int start_index,
1332     const byte** input_start, const byte** input_end) {
1333   return NativeRegExpMacroAssembler::CheckStackGuardState(
1334       frame_entry<Isolate*>(re_frame, kIsolate), start_index,
1335       frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
1336       frame_entry_address<String*>(re_frame, kInput), input_start, input_end);
1337 }
1338 
1339 
CheckPosition(int cp_offset,Label * on_outside_input)1340 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1341                                               Label* on_outside_input) {
1342   if (cp_offset >= 0) {
1343     CompareAndBranchOrBacktrack(current_input_offset(),
1344                                 -cp_offset * char_size(), ge, on_outside_input);
1345   } else {
1346     __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
1347     __ Cmp(w12, string_start_minus_one());
1348     BranchOrBacktrack(le, on_outside_input);
1349   }
1350 }
1351 
1352 
1353 // Private methods:
1354 
CallCheckStackGuardState(Register scratch)1355 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1356   // Allocate space on the stack to store the return address. The
1357   // CheckStackGuardState C++ function will override it if the code
1358   // moved. Allocate extra space for 2 arguments passed by pointers.
1359   // AAPCS64 requires the stack to be 16 byte aligned.
1360   int alignment = masm_->ActivationFrameAlignment();
1361   DCHECK_EQ(alignment % 16, 0);
1362   int align_mask = (alignment / kXRegSize) - 1;
1363   int xreg_to_claim = (3 + align_mask) & ~align_mask;
1364 
1365   __ Claim(xreg_to_claim);
1366 
1367   // CheckStackGuardState needs the end and start addresses of the input string.
1368   __ Poke(input_end(), 2 * kPointerSize);
1369   __ Add(x5, sp, 2 * kPointerSize);
1370   __ Poke(input_start(), kPointerSize);
1371   __ Add(x4, sp, kPointerSize);
1372 
1373   __ Mov(w3, start_offset());
1374   // RegExp code frame pointer.
1375   __ Mov(x2, frame_pointer());
1376   // Code* of self.
1377   __ Mov(x1, Operand(masm_->CodeObject()));
1378 
1379   // We need to pass a pointer to the return address as first argument.
1380   // The DirectCEntry stub will place the return address on the stack before
1381   // calling so the stack pointer will point to it.
1382   __ Mov(x0, sp);
1383 
1384   ExternalReference check_stack_guard_state =
1385       ExternalReference::re_check_stack_guard_state(isolate());
1386   __ Mov(scratch, check_stack_guard_state);
1387   DirectCEntryStub stub(isolate());
1388   stub.GenerateCall(masm_, scratch);
1389 
1390   // The input string may have been moved in memory, we need to reload it.
1391   __ Peek(input_start(), kPointerSize);
1392   __ Peek(input_end(), 2 * kPointerSize);
1393 
1394   __ Drop(xreg_to_claim);
1395 
1396   // Reload the Code pointer.
1397   __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1398 }
1399 
BranchOrBacktrack(Condition condition,Label * to)1400 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1401                                                   Label* to) {
1402   if (condition == al) {  // Unconditional.
1403     if (to == nullptr) {
1404       Backtrack();
1405       return;
1406     }
1407     __ B(to);
1408     return;
1409   }
1410   if (to == nullptr) {
1411     to = &backtrack_label_;
1412   }
1413   __ B(condition, to);
1414 }
1415 
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1416 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1417                                                             int immediate,
1418                                                             Condition condition,
1419                                                             Label* to) {
1420   if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1421     if (to == nullptr) {
1422       to = &backtrack_label_;
1423     }
1424     if (condition == eq) {
1425       __ Cbz(reg, to);
1426     } else {
1427       __ Cbnz(reg, to);
1428     }
1429   } else {
1430     __ Cmp(reg, immediate);
1431     BranchOrBacktrack(condition, to);
1432   }
1433 }
1434 
1435 
CheckPreemption()1436 void RegExpMacroAssemblerARM64::CheckPreemption() {
1437   // Check for preemption.
1438   ExternalReference stack_limit =
1439       ExternalReference::address_of_stack_limit(isolate());
1440   __ Mov(x10, stack_limit);
1441   __ Ldr(x10, MemOperand(x10));
1442   __ Cmp(sp, x10);
1443   CallIf(&check_preempt_label_, ls);
1444 }
1445 
1446 
CheckStackLimit()1447 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1448   ExternalReference stack_limit =
1449       ExternalReference::address_of_regexp_stack_limit(isolate());
1450   __ Mov(x10, stack_limit);
1451   __ Ldr(x10, MemOperand(x10));
1452   __ Cmp(backtrack_stackpointer(), x10);
1453   CallIf(&stack_overflow_label_, ls);
1454 }
1455 
1456 
Push(Register source)1457 void RegExpMacroAssemblerARM64::Push(Register source) {
1458   DCHECK(source.Is32Bits());
1459   DCHECK(!source.is(backtrack_stackpointer()));
1460   __ Str(source,
1461          MemOperand(backtrack_stackpointer(),
1462                     -static_cast<int>(kWRegSize),
1463                     PreIndex));
1464 }
1465 
1466 
Pop(Register target)1467 void RegExpMacroAssemblerARM64::Pop(Register target) {
1468   DCHECK(target.Is32Bits());
1469   DCHECK(!target.is(backtrack_stackpointer()));
1470   __ Ldr(target,
1471          MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1472 }
1473 
1474 
GetCachedRegister(int register_index)1475 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1476   DCHECK_GT(kNumCachedRegisters, register_index);
1477   return Register::Create(register_index / 2, kXRegSizeInBits);
1478 }
1479 
1480 
GetRegister(int register_index,Register maybe_result)1481 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1482                                                 Register maybe_result) {
1483   DCHECK(maybe_result.Is32Bits());
1484   DCHECK_LE(0, register_index);
1485   if (num_registers_ <= register_index) {
1486     num_registers_ = register_index + 1;
1487   }
1488   Register result = NoReg;
1489   RegisterState register_state = GetRegisterState(register_index);
1490   switch (register_state) {
1491     case STACKED:
1492       __ Ldr(maybe_result, register_location(register_index));
1493       result = maybe_result;
1494       break;
1495     case CACHED_LSW:
1496       result = GetCachedRegister(register_index).W();
1497       break;
1498     case CACHED_MSW:
1499       __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1500              kWRegSizeInBits);
1501       result = maybe_result;
1502       break;
1503     default:
1504       UNREACHABLE();
1505       break;
1506   }
1507   DCHECK(result.Is32Bits());
1508   return result;
1509 }
1510 
1511 
StoreRegister(int register_index,Register source)1512 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1513                                               Register source) {
1514   DCHECK(source.Is32Bits());
1515   DCHECK_LE(0, register_index);
1516   if (num_registers_ <= register_index) {
1517     num_registers_ = register_index + 1;
1518   }
1519 
1520   RegisterState register_state = GetRegisterState(register_index);
1521   switch (register_state) {
1522     case STACKED:
1523       __ Str(source, register_location(register_index));
1524       break;
1525     case CACHED_LSW: {
1526       Register cached_register = GetCachedRegister(register_index);
1527       if (!source.Is(cached_register.W())) {
1528         __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1529       }
1530       break;
1531     }
1532     case CACHED_MSW: {
1533       Register cached_register = GetCachedRegister(register_index);
1534       __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1535       break;
1536     }
1537     default:
1538       UNREACHABLE();
1539       break;
1540   }
1541 }
1542 
1543 
CallIf(Label * to,Condition condition)1544 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1545   Label skip_call;
1546   if (condition != al) __ B(&skip_call, NegateCondition(condition));
1547   __ Bl(to);
1548   __ Bind(&skip_call);
1549 }
1550 
1551 
RestoreLinkRegister()1552 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1553   __ Pop(lr, xzr);
1554   __ Add(lr, lr, Operand(masm_->CodeObject()));
1555 }
1556 
1557 
SaveLinkRegister()1558 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1559   __ Sub(lr, lr, Operand(masm_->CodeObject()));
1560   __ Push(xzr, lr);
1561 }
1562 
1563 
register_location(int register_index)1564 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1565   DCHECK(register_index < (1<<30));
1566   DCHECK_LE(kNumCachedRegisters, register_index);
1567   if (num_registers_ <= register_index) {
1568     num_registers_ = register_index + 1;
1569   }
1570   register_index -= kNumCachedRegisters;
1571   int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1572   return MemOperand(frame_pointer(), offset);
1573 }
1574 
capture_location(int register_index,Register scratch)1575 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1576                                                      Register scratch) {
1577   DCHECK(register_index < (1<<30));
1578   DCHECK(register_index < num_saved_registers_);
1579   DCHECK_LE(kNumCachedRegisters, register_index);
1580   DCHECK_EQ(register_index % 2, 0);
1581   register_index -= kNumCachedRegisters;
1582   int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1583   // capture_location is used with Stp instructions to load/store 2 registers.
1584   // The immediate field in the encoding is limited to 7 bits (signed).
1585   if (is_int7(offset)) {
1586     return MemOperand(frame_pointer(), offset);
1587   } else {
1588     __ Add(scratch, frame_pointer(), offset);
1589     return MemOperand(scratch);
1590   }
1591 }
1592 
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1593 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1594                                                               int characters) {
1595   Register offset = current_input_offset();
1596 
1597   // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1598   // and the operating system running on the target allow it.
1599   // If unaligned load/stores are not supported then this function must only
1600   // be used to load a single character at a time.
1601 
1602   // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1603   // disable it.
1604   // TODO(pielan): See whether or not we should disable unaligned accesses.
1605   if (!CanReadUnaligned()) {
1606     DCHECK_EQ(1, characters);
1607   }
1608 
1609   if (cp_offset != 0) {
1610     if (masm_->emit_debug_code()) {
1611       __ Mov(x10, cp_offset * char_size());
1612       __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1613       __ Cmp(x10, Operand(w10, SXTW));
1614       // The offset needs to fit in a W register.
1615       __ Check(eq, AbortReason::kOffsetOutOfRange);
1616     } else {
1617       __ Add(w10, current_input_offset(), cp_offset * char_size());
1618     }
1619     offset = w10;
1620   }
1621 
1622   if (mode_ == LATIN1) {
1623     if (characters == 4) {
1624       __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1625     } else if (characters == 2) {
1626       __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1627     } else {
1628       DCHECK_EQ(1, characters);
1629       __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1630     }
1631   } else {
1632     DCHECK(mode_ == UC16);
1633     if (characters == 2) {
1634       __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1635     } else {
1636       DCHECK_EQ(1, characters);
1637       __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1638     }
1639   }
1640 }
1641 
1642 #endif  // V8_INTERPRETED_REGEXP
1643 
1644 }  // namespace internal
1645 }  // namespace v8
1646 
1647 #undef __
1648 
1649 #endif  // V8_TARGET_ARCH_ARM64
1650