1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
6 #define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
7 
8 #include "src/assembler.h"
9 #include "src/regexp/regexp-ast.h"
10 
11 namespace v8 {
12 namespace internal {
13 
14 struct DisjunctDecisionRow {
15   RegExpCharacterClass cc;
16   Label* on_match;
17 };
18 
19 
20 class RegExpMacroAssembler {
21  public:
22   // The implementation must be able to handle at least:
23   static const int kMaxRegister = (1 << 16) - 1;
24   static const int kMaxCPOffset = (1 << 15) - 1;
25   static const int kMinCPOffset = -(1 << 15);
26 
27   static const int kTableSizeBits = 7;
28   static const int kTableSize = 1 << kTableSizeBits;
29   static const int kTableMask = kTableSize - 1;
30 
31   enum IrregexpImplementation {
32     kIA32Implementation,
33     kARMImplementation,
34     kARM64Implementation,
35     kMIPSImplementation,
36     kPPCImplementation,
37     kX64Implementation,
38     kX87Implementation,
39     kBytecodeImplementation
40   };
41 
42   enum StackCheckFlag {
43     kNoStackLimitCheck = false,
44     kCheckStackLimit = true
45   };
46 
47   RegExpMacroAssembler(Isolate* isolate, Zone* zone);
48   virtual ~RegExpMacroAssembler();
49   // This function is called when code generation is aborted, so that
50   // the assembler could clean up internal data structures.
AbortedCodeGeneration()51   virtual void AbortedCodeGeneration() {}
52   // The maximal number of pushes between stack checks. Users must supply
53   // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck)
54   // at least once for every stack_limit() pushes that are executed.
55   virtual int stack_limit_slack() = 0;
56   virtual bool CanReadUnaligned() = 0;
57   virtual void AdvanceCurrentPosition(int by) = 0;  // Signed cp change.
58   virtual void AdvanceRegister(int reg, int by) = 0;  // r[reg] += by.
59   // Continues execution from the position pushed on the top of the backtrack
60   // stack by an earlier PushBacktrack(Label*).
61   virtual void Backtrack() = 0;
62   virtual void Bind(Label* label) = 0;
63   virtual void CheckAtStart(Label* on_at_start) = 0;
64   // Dispatch after looking the current character up in a 2-bits-per-entry
65   // map.  The destinations vector has up to 4 labels.
66   virtual void CheckCharacter(unsigned c, Label* on_equal) = 0;
67   // Bitwise and the current character with the given constant and then
68   // check for a match with c.
69   virtual void CheckCharacterAfterAnd(unsigned c,
70                                       unsigned and_with,
71                                       Label* on_equal) = 0;
72   virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
73   virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
74   virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
75   virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start) = 0;
76   virtual void CheckNotBackReference(int start_reg, bool read_backward,
77                                      Label* on_no_match) = 0;
78   virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
79                                                bool read_backward,
80                                                Label* on_no_match) = 0;
81   // Check the current character for a match with a literal character.  If we
82   // fail to match then goto the on_failure label.  End of input always
83   // matches.  If the label is NULL then we should pop a backtrack address off
84   // the stack and go to that.
85   virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0;
86   virtual void CheckNotCharacterAfterAnd(unsigned c,
87                                          unsigned and_with,
88                                          Label* on_not_equal) = 0;
89   // Subtract a constant from the current character, then and with the given
90   // constant and then check for a match with c.
91   virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
92                                               uc16 minus,
93                                               uc16 and_with,
94                                               Label* on_not_equal) = 0;
95   virtual void CheckCharacterInRange(uc16 from,
96                                      uc16 to,  // Both inclusive.
97                                      Label* on_in_range) = 0;
98   virtual void CheckCharacterNotInRange(uc16 from,
99                                         uc16 to,  // Both inclusive.
100                                         Label* on_not_in_range) = 0;
101 
102   // The current character (modulus the kTableSize) is looked up in the byte
103   // array, and if the found byte is non-zero, we jump to the on_bit_set label.
104   virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) = 0;
105 
106   // Checks whether the given offset from the current position is before
107   // the end of the string.  May overwrite the current character.
108   virtual void CheckPosition(int cp_offset, Label* on_outside_input) = 0;
109   // Check whether a standard/default character class matches the current
110   // character. Returns false if the type of special character class does
111   // not have custom support.
112   // May clobber the current loaded character.
113   virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match) = 0;
114   virtual void Fail() = 0;
115   virtual Handle<HeapObject> GetCode(Handle<String> source) = 0;
116   virtual void GoTo(Label* label) = 0;
117   // Check whether a register is >= a given constant and go to a label if it
118   // is.  Backtracks instead if the label is NULL.
119   virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0;
120   // Check whether a register is < a given constant and go to a label if it is.
121   // Backtracks instead if the label is NULL.
122   virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
123   // Check whether a register is == to the current position and go to a
124   // label if it is.
125   virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
126   virtual IrregexpImplementation Implementation() = 0;
127   virtual void LoadCurrentCharacter(int cp_offset,
128                                     Label* on_end_of_input,
129                                     bool check_bounds = true,
130                                     int characters = 1) = 0;
131   virtual void PopCurrentPosition() = 0;
132   virtual void PopRegister(int register_index) = 0;
133   // Pushes the label on the backtrack stack, so that a following Backtrack
134   // will go to this label. Always checks the backtrack stack limit.
135   virtual void PushBacktrack(Label* label) = 0;
136   virtual void PushCurrentPosition() = 0;
137   virtual void PushRegister(int register_index,
138                             StackCheckFlag check_stack_limit) = 0;
139   virtual void ReadCurrentPositionFromRegister(int reg) = 0;
140   virtual void ReadStackPointerFromRegister(int reg) = 0;
141   virtual void SetCurrentPositionFromEnd(int by) = 0;
142   virtual void SetRegister(int register_index, int to) = 0;
143   // Return whether the matching (with a global regexp) will be restarted.
144   virtual bool Succeed() = 0;
145   virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
146   virtual void ClearRegisters(int reg_from, int reg_to) = 0;
147   virtual void WriteStackPointerToRegister(int reg) = 0;
148 
149   // Controls the generation of large inlined constants in the code.
set_slow_safe(bool ssc)150   void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
slow_safe()151   bool slow_safe() { return slow_safe_compiler_; }
152 
153   enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK };
154   // Set whether the regular expression has the global flag.  Exiting due to
155   // a failure in a global regexp may still mean success overall.
set_global_mode(GlobalMode mode)156   inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
global()157   inline bool global() { return global_mode_ != NOT_GLOBAL; }
global_with_zero_length_check()158   inline bool global_with_zero_length_check() {
159     return global_mode_ == GLOBAL;
160   }
161 
isolate()162   Isolate* isolate() const { return isolate_; }
zone()163   Zone* zone() const { return zone_; }
164 
165  private:
166   bool slow_safe_compiler_;
167   bool global_mode_;
168   Isolate* isolate_;
169   Zone* zone_;
170 };
171 
172 
173 #ifndef V8_INTERPRETED_REGEXP  // Avoid compiling unused code.
174 
175 class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
176  public:
177   // Type of input string to generate code for.
178   enum Mode { LATIN1 = 1, UC16 = 2 };
179 
180   // Result of calling generated native RegExp code.
181   // RETRY: Something significant changed during execution, and the matching
182   //        should be retried from scratch.
183   // EXCEPTION: Something failed during execution. If no exception has been
184   //        thrown, it's an internal out-of-memory, and the caller should
185   //        throw the exception.
186   // FAILURE: Matching failed.
187   // SUCCESS: Matching succeeded, and the output array has been filled with
188   //        capture positions.
189   enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
190 
191   NativeRegExpMacroAssembler(Isolate* isolate, Zone* zone);
192   virtual ~NativeRegExpMacroAssembler();
193   virtual bool CanReadUnaligned();
194 
195   static Result Match(Handle<Code> regexp,
196                       Handle<String> subject,
197                       int* offsets_vector,
198                       int offsets_vector_length,
199                       int previous_index,
200                       Isolate* isolate);
201 
202   // Compares two-byte strings case insensitively.
203   // Called from generated RegExp code.
204   static int CaseInsensitiveCompareUC16(Address byte_offset1,
205                                         Address byte_offset2,
206                                         size_t byte_length,
207                                         Isolate* isolate);
208 
209   // Called from RegExp if the backtrack stack limit is hit.
210   // Tries to expand the stack. Returns the new stack-pointer if
211   // successful, and updates the stack_top address, or returns 0 if unable
212   // to grow the stack.
213   // This function must not trigger a garbage collection.
214   static Address GrowStack(Address stack_pointer, Address* stack_top,
215                            Isolate* isolate);
216 
217   static const byte* StringCharacterPosition(String* subject, int start_index);
218 
219   static int CheckStackGuardState(Isolate* isolate, int start_index,
220                                   bool is_direct_call, Address* return_address,
221                                   Code* re_code, String** subject,
222                                   const byte** input_start,
223                                   const byte** input_end);
224 
225   // Byte map of one byte characters with a 0xff if the character is a word
226   // character (digit, letter or underscore) and 0x00 otherwise.
227   // Used by generated RegExp code.
228   static const byte word_character_map[256];
229 
word_character_map_address()230   static Address word_character_map_address() {
231     return const_cast<Address>(&word_character_map[0]);
232   }
233 
234   static Result Execute(Code* code,
235                         String* input,
236                         int start_offset,
237                         const byte* input_start,
238                         const byte* input_end,
239                         int* output,
240                         int output_size,
241                         Isolate* isolate);
242 };
243 
244 #endif  // V8_INTERPRETED_REGEXP
245 
246 }  // namespace internal
247 }  // namespace v8
248 
249 #endif  // V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
250