1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* This file contains codegen for the Thumb2 ISA. */
18 
19 #include "codegen_arm64.h"
20 
21 #include "arm64_lir.h"
22 #include "art_method.h"
23 #include "base/logging.h"
24 #include "dex/mir_graph.h"
25 #include "dex/quick/dex_file_to_method_inliner_map.h"
26 #include "dex/quick/mir_to_lir-inl.h"
27 #include "driver/compiler_driver.h"
28 #include "driver/compiler_options.h"
29 #include "gc/accounting/card_table.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "mirror/object_array-inl.h"
32 #include "utils/dex_cache_arrays_layout-inl.h"
33 
34 namespace art {
35 
36 /*
37  * The sparse table in the literal pool is an array of <key,displacement>
38  * pairs.  For each set, we'll load them as a pair using ldp.
39  * The test loop will look something like:
40  *
41  *   adr   r_base, <table>
42  *   ldr   r_val, [rA64_SP, v_reg_off]
43  *   mov   r_idx, #table_size
44  * loop:
45  *   cbz   r_idx, quit
46  *   ldp   r_key, r_disp, [r_base], #8
47  *   sub   r_idx, #1
48  *   cmp   r_val, r_key
49  *   b.ne  loop
50  *   adr   r_base, #0        ; This is the instruction from which we compute displacements
51  *   add   r_base, r_disp
52  *   br    r_base
53  * quit:
54  */
GenLargeSparseSwitch(MIR * mir,uint32_t table_offset,RegLocation rl_src)55 void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
56   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
57   // Add the table to the list - we'll process it later
58   SwitchTable *tab_rec =
59       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
60   tab_rec->switch_mir = mir;
61   tab_rec->table = table;
62   tab_rec->vaddr = current_dalvik_offset_;
63   uint32_t size = table[1];
64   switch_tables_.push_back(tab_rec);
65 
66   // Get the switch value
67   rl_src = LoadValue(rl_src, kCoreReg);
68   RegStorage r_base = AllocTempWide();
69   // Allocate key and disp temps.
70   RegStorage r_key = AllocTemp();
71   RegStorage r_disp = AllocTemp();
72   // Materialize a pointer to the switch table
73   NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec));
74   // Set up r_idx
75   RegStorage r_idx = AllocTemp();
76   LoadConstant(r_idx, size);
77 
78   // Entry of loop.
79   LIR* loop_entry = NewLIR0(kPseudoTargetLabel);
80   LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0);
81 
82   // Load next key/disp.
83   NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2);
84   OpRegRegImm(kOpSub, r_idx, r_idx, 1);
85 
86   // Go to next case, if key does not match.
87   OpRegReg(kOpCmp, r_key, rl_src.reg);
88   OpCondBranch(kCondNe, loop_entry);
89 
90   // Key does match: branch to case label.
91   LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1);
92   tab_rec->anchor = switch_label;
93 
94   // Add displacement to base branch address and go!
95   OpRegRegRegExtend(kOpAdd, r_base, r_base, As64BitReg(r_disp), kA64Sxtw, 0U);
96   NewLIR1(kA64Br1x, r_base.GetReg());
97 
98   // Loop exit label.
99   LIR* loop_exit = NewLIR0(kPseudoTargetLabel);
100   branch_out->target = loop_exit;
101 }
102 
103 
GenLargePackedSwitch(MIR * mir,uint32_t table_offset,RegLocation rl_src)104 void Arm64Mir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
105   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
106   // Add the table to the list - we'll process it later
107   SwitchTable *tab_rec =
108       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
109   tab_rec->switch_mir = mir;
110   tab_rec->table = table;
111   tab_rec->vaddr = current_dalvik_offset_;
112   uint32_t size = table[1];
113   switch_tables_.push_back(tab_rec);
114 
115   // Get the switch value
116   rl_src = LoadValue(rl_src, kCoreReg);
117   RegStorage table_base = AllocTempWide();
118   // Materialize a pointer to the switch table
119   NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
120   int low_key = s4FromSwitchData(&table[2]);
121   RegStorage key_reg;
122   // Remove the bias, if necessary
123   if (low_key == 0) {
124     key_reg = rl_src.reg;
125   } else {
126     key_reg = AllocTemp();
127     OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key);
128   }
129   // Bounds check - if < 0 or >= size continue following switch
130   OpRegImm(kOpCmp, key_reg, size - 1);
131   LIR* branch_over = OpCondBranch(kCondHi, nullptr);
132 
133   // Load the displacement from the switch table
134   RegStorage disp_reg = AllocTemp();
135   LoadBaseIndexed(table_base, As64BitReg(key_reg), disp_reg, 2, k32);
136 
137   // Get base branch address.
138   RegStorage branch_reg = AllocTempWide();
139   LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
140   tab_rec->anchor = switch_label;
141 
142   // Add displacement to base branch address and go!
143   OpRegRegRegExtend(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), kA64Sxtw, 0U);
144   NewLIR1(kA64Br1x, branch_reg.GetReg());
145 
146   // branch_over target here
147   LIR* target = NewLIR0(kPseudoTargetLabel);
148   branch_over->target = target;
149 }
150 
151 /*
152  * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
153  * details see monitor.cc.
154  */
GenMonitorEnter(int opt_flags,RegLocation rl_src)155 void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
156   // x0/w0 = object
157   // w1    = thin lock thread id
158   // x2    = address of lock word
159   // w3    = lock word / store failure
160   // TUNING: How much performance we get when we inline this?
161   // Since we've already flush all register.
162   FlushAllRegs();
163   LoadValueDirectFixed(rl_src, rs_x0);  // = TargetReg(kArg0, kRef)
164   LockCallTemps();  // Prepare for explicit register usage
165   LIR* null_check_branch = nullptr;
166   if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
167     null_check_branch = nullptr;  // No null check.
168   } else {
169     // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
170     if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
171       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, nullptr);
172     }
173   }
174   Load32Disp(rs_xSELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
175   OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
176   NewLIR2(kA64Ldxr2rX, rw3, rx2);
177   MarkPossibleNullPointerException(opt_flags);
178   // Zero out the read barrier bits.
179   OpRegRegImm(kOpAnd, rs_w2, rs_w3, LockWord::kReadBarrierStateMaskShiftedToggled);
180   LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_w2, 0, nullptr);
181   // w3 is zero except for the rb bits here. Copy the read barrier bits into w1.
182   OpRegRegReg(kOpOr, rs_w1, rs_w1, rs_w3);
183   OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
184   NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2);
185   LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_w3, 0, nullptr);
186 
187   LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
188   not_unlocked_branch->target = slow_path_target;
189   if (null_check_branch != nullptr) {
190     null_check_branch->target = slow_path_target;
191   }
192   // TODO: move to a slow path.
193   // Go expensive route - artLockObjectFromCode(obj);
194   LoadWordDisp(rs_xSELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_xLR);
195   ClobberCallerSave();
196   LIR* call_inst = OpReg(kOpBlx, rs_xLR);
197   MarkSafepointPC(call_inst);
198 
199   LIR* success_target = NewLIR0(kPseudoTargetLabel);
200   lock_success_branch->target = success_target;
201   GenMemBarrier(kLoadAny);
202 }
203 
204 /*
205  * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
206  * details see monitor.cc. Note the code below doesn't use ldxr/stxr as the code holds the lock
207  * and can only give away ownership if its suspended.
208  */
GenMonitorExit(int opt_flags,RegLocation rl_src)209 void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
210   // x0/w0 = object
211   // w1    = thin lock thread id
212   // w2    = lock word
213   // TUNING: How much performance we get when we inline this?
214   // Since we've already flush all register.
215   FlushAllRegs();
216   LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
217   LockCallTemps();  // Prepare for explicit register usage
218   LIR* null_check_branch = nullptr;
219   if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
220     null_check_branch = nullptr;  // No null check.
221   } else {
222     // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
223     if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
224       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, nullptr);
225     }
226   }
227   Load32Disp(rs_xSELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
228   if (!kUseReadBarrier) {
229     Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
230   } else {
231     OpRegRegImm(kOpAdd, rs_x3, rs_x0, mirror::Object::MonitorOffset().Int32Value());
232     NewLIR2(kA64Ldxr2rX, rw2, rx3);
233   }
234   MarkPossibleNullPointerException(opt_flags);
235   // Zero out the read barrier bits.
236   OpRegRegImm(kOpAnd, rs_w3, rs_w2, LockWord::kReadBarrierStateMaskShiftedToggled);
237   // Zero out except the read barrier bits.
238   OpRegRegImm(kOpAnd, rs_w2, rs_w2, LockWord::kReadBarrierStateMaskShifted);
239   LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w3, rs_w1, nullptr);
240   GenMemBarrier(kAnyStore);
241   LIR* unlock_success_branch;
242   if (!kUseReadBarrier) {
243     Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
244     unlock_success_branch = OpUnconditionalBranch(nullptr);
245   } else {
246     OpRegRegImm(kOpAdd, rs_x3, rs_x0, mirror::Object::MonitorOffset().Int32Value());
247     NewLIR3(kA64Stxr3wrX, rw1, rw2, rx3);
248     unlock_success_branch = OpCmpImmBranch(kCondEq, rs_w1, 0, nullptr);
249   }
250   LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
251   slow_unlock_branch->target = slow_path_target;
252   if (null_check_branch != nullptr) {
253     null_check_branch->target = slow_path_target;
254   }
255   // TODO: move to a slow path.
256   // Go expensive route - artUnlockObjectFromCode(obj);
257   LoadWordDisp(rs_xSELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_xLR);
258   ClobberCallerSave();
259   LIR* call_inst = OpReg(kOpBlx, rs_xLR);
260   MarkSafepointPC(call_inst);
261 
262   LIR* success_target = NewLIR0(kPseudoTargetLabel);
263   unlock_success_branch->target = success_target;
264 }
265 
GenMoveException(RegLocation rl_dest)266 void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
267   int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
268   RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
269   LoadRefDisp(rs_xSELF, ex_offset, rl_result.reg, kNotVolatile);
270   StoreRefDisp(rs_xSELF, ex_offset, rs_xzr, kNotVolatile);
271   StoreValue(rl_dest, rl_result);
272 }
273 
UnconditionallyMarkGCCard(RegStorage tgt_addr_reg)274 void Arm64Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
275   RegStorage reg_card_base = AllocTempWide();
276   RegStorage reg_card_no = AllocTempWide();  // Needs to be wide as addr is ref=64b
277   LoadWordDisp(rs_xSELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
278   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
279   // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
280   StoreBaseIndexed(reg_card_base, reg_card_no, As32BitReg(reg_card_base),
281                    0, kUnsignedByte);
282   FreeTemp(reg_card_base);
283   FreeTemp(reg_card_no);
284 }
285 
DwarfCoreReg(int num)286 static dwarf::Reg DwarfCoreReg(int num) {
287   return dwarf::Reg::Arm64Core(num);
288 }
289 
GenEntrySequence(RegLocation * ArgLocs,RegLocation rl_method)290 void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
291   DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
292 
293   /*
294    * On entry, x0 to x7 are live.  Let the register allocation
295    * mechanism know so it doesn't try to use any of them when
296    * expanding the frame or flushing.
297    * Reserve x8 & x9 for temporaries.
298    */
299   LockTemp(rs_x0);
300   LockTemp(rs_x1);
301   LockTemp(rs_x2);
302   LockTemp(rs_x3);
303   LockTemp(rs_x4);
304   LockTemp(rs_x5);
305   LockTemp(rs_x6);
306   LockTemp(rs_x7);
307   LockTemp(rs_xIP0);
308   LockTemp(rs_xIP1);
309 
310   /* TUNING:
311    * Use AllocTemp() and reuse LR if possible to give us the freedom on adjusting the number
312    * of temp registers.
313    */
314 
315   /*
316    * We can safely skip the stack overflow check if we're
317    * a leaf *and* our frame size < fudge factor.
318    */
319   bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
320     !FrameNeedsStackCheck(frame_size_, kArm64);
321 
322   const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm64);
323   const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
324   bool generate_explicit_stack_overflow_check = large_frame ||
325     !cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks();
326   const int spill_count = num_core_spills_ + num_fp_spills_;
327   const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
328   const int frame_size_without_spills = frame_size_ - spill_size;
329 
330   if (!skip_overflow_check) {
331     if (generate_explicit_stack_overflow_check) {
332       // Load stack limit
333       LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1);
334     } else {
335       // Implicit stack overflow check.
336       // Generate a load from [sp, #-framesize].  If this is in the stack
337       // redzone we will get a segmentation fault.
338 
339       // TODO: If the frame size is small enough, is it possible to make this a pre-indexed load,
340       //       so that we can avoid the following "sub sp" when spilling?
341       OpRegRegImm(kOpSub, rs_x8, rs_sp, GetStackOverflowReservedBytes(kArm64));
342       Load32Disp(rs_x8, 0, rs_wzr);
343       MarkPossibleStackOverflowException();
344     }
345   }
346 
347   int spilled_already = 0;
348   if (spill_size > 0) {
349     spilled_already = SpillRegs(rs_sp, core_spill_mask_, fp_spill_mask_, frame_size_);
350     DCHECK(spill_size == spilled_already || frame_size_ == spilled_already);
351   }
352 
353   if (spilled_already != frame_size_) {
354     OpRegImm(kOpSub, rs_sp, frame_size_without_spills);
355     cfi_.AdjustCFAOffset(frame_size_without_spills);
356   }
357 
358   if (!skip_overflow_check) {
359     if (generate_explicit_stack_overflow_check) {
360       class StackOverflowSlowPath: public LIRSlowPath {
361       public:
362         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
363             : LIRSlowPath(m2l, branch),
364               sp_displace_(sp_displace) {
365         }
366         void Compile() OVERRIDE {
367           m2l_->ResetRegPool();
368           m2l_->ResetDefTracking();
369           GenerateTargetLabel(kPseudoThrowTarget);
370           // Unwinds stack.
371           m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_);
372           m2l_->cfi().AdjustCFAOffset(-sp_displace_);
373           m2l_->ClobberCallerSave();
374           ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
375           m2l_->LockTemp(rs_xIP0);
376           m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0);
377           m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg());
378           m2l_->FreeTemp(rs_xIP0);
379           m2l_->cfi().AdjustCFAOffset(sp_displace_);
380         }
381 
382       private:
383         const size_t sp_displace_;
384       };
385 
386       LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_xIP1, nullptr);
387       AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_));
388     }
389   }
390 
391   FlushIns(ArgLocs, rl_method);
392 
393   FreeTemp(rs_x0);
394   FreeTemp(rs_x1);
395   FreeTemp(rs_x2);
396   FreeTemp(rs_x3);
397   FreeTemp(rs_x4);
398   FreeTemp(rs_x5);
399   FreeTemp(rs_x6);
400   FreeTemp(rs_x7);
401   FreeTemp(rs_xIP0);
402   FreeTemp(rs_xIP1);
403 }
404 
GenExitSequence()405 void Arm64Mir2Lir::GenExitSequence() {
406   cfi_.RememberState();
407   /*
408    * In the exit path, r0/r1 are live - make sure they aren't
409    * allocated by the register utilities as temps.
410    */
411   LockTemp(rs_x0);
412   LockTemp(rs_x1);
413   UnspillRegs(rs_sp, core_spill_mask_, fp_spill_mask_, frame_size_);
414 
415   // Finally return.
416   NewLIR0(kA64Ret);
417   // The CFI should be restored for any code that follows the exit block.
418   cfi_.RestoreState();
419   cfi_.DefCFAOffset(frame_size_);
420 }
421 
GenSpecialExitSequence()422 void Arm64Mir2Lir::GenSpecialExitSequence() {
423   NewLIR0(kA64Ret);
424 }
425 
GenSpecialEntryForSuspend()426 void Arm64Mir2Lir::GenSpecialEntryForSuspend() {
427   // Keep 16-byte stack alignment - push x0, i.e. ArtMethod*, lr.
428   core_spill_mask_ = (1u << rs_xLR.GetRegNum());
429   num_core_spills_ = 1u;
430   fp_spill_mask_ = 0u;
431   num_fp_spills_ = 0u;
432   frame_size_ = 16u;
433   core_vmap_table_.clear();
434   fp_vmap_table_.clear();
435   NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8);
436   cfi_.AdjustCFAOffset(frame_size_);
437   // Do not generate CFI for scratch register x0.
438   cfi_.RelOffset(DwarfCoreReg(rxLR), 8);
439 }
440 
GenSpecialExitForSuspend()441 void Arm64Mir2Lir::GenSpecialExitForSuspend() {
442   // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
443   NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8);
444   cfi_.AdjustCFAOffset(-frame_size_);
445   cfi_.Restore(DwarfCoreReg(rxLR));
446 }
447 
Arm64UseRelativeCall(CompilationUnit * cu,const MethodReference & target_method)448 static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
449   // Emit relative calls anywhere in the image or within a dex file otherwise.
450   return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file;
451 }
452 
453 /*
454  * Bit of a hack here - in the absence of a real scheduling pass,
455  * emit the next instruction in static & direct invoke sequences.
456  */
Arm64NextSDCallInsn(CompilationUnit * cu,CallInfo * info,int state,const MethodReference & target_method,uint32_t unused_idx ATTRIBUTE_UNUSED,uintptr_t direct_code,uintptr_t direct_method,InvokeType type)457 int Arm64Mir2Lir::Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
458                                       int state, const MethodReference& target_method,
459                                       uint32_t unused_idx ATTRIBUTE_UNUSED,
460                                       uintptr_t direct_code, uintptr_t direct_method,
461                                       InvokeType type) {
462   Arm64Mir2Lir* cg = static_cast<Arm64Mir2Lir*>(cu->cg.get());
463   if (info->string_init_offset != 0) {
464     RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
465     switch (state) {
466     case 0: {  // Grab target method* from thread pointer
467       cg->LoadWordDisp(rs_xSELF, info->string_init_offset, arg0_ref);
468       break;
469     }
470     case 1:  // Grab the code from the method*
471       if (direct_code == 0) {
472         // kInvokeTgt := arg0_ref->entrypoint
473         cg->LoadWordDisp(arg0_ref,
474                          ArtMethod::EntryPointFromQuickCompiledCodeOffset(
475                              kArm64PointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
476       }
477       break;
478     default:
479       return -1;
480     }
481   } else if (direct_code != 0 && direct_method != 0) {
482     switch (state) {
483     case 0:  // Get the current Method* [sets kArg0]
484       if (direct_code != static_cast<uintptr_t>(-1)) {
485         cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
486       } else if (Arm64UseRelativeCall(cu, target_method)) {
487         // Defer to linker patch.
488       } else {
489         cg->LoadCodeAddress(target_method, type, kInvokeTgt);
490       }
491       if (direct_method != static_cast<uintptr_t>(-1)) {
492         cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
493       } else {
494         cg->LoadMethodAddress(target_method, type, kArg0);
495       }
496       break;
497     default:
498       return -1;
499     }
500   } else {
501     bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
502     RegStorage arg0_ref = cg->TargetPtrReg(kArg0);
503     switch (state) {
504     case 0:  // Get the current Method* [sets kArg0]
505       // TUNING: we can save a reg copy if Method* has been promoted.
506       if (!use_pc_rel) {
507         cg->LoadCurrMethodDirect(arg0_ref);
508         break;
509       }
510       ++state;
511       FALLTHROUGH_INTENDED;
512     case 1:  // Get method->dex_cache_resolved_methods_
513       if (!use_pc_rel) {
514         cg->LoadRefDisp(arg0_ref,
515                         ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
516                         arg0_ref,
517                         kNotVolatile);
518       }
519       // Set up direct code if known.
520       if (direct_code != 0) {
521         if (direct_code != static_cast<uintptr_t>(-1)) {
522           cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
523         } else if (Arm64UseRelativeCall(cu, target_method)) {
524           // Defer to linker patch.
525         } else {
526           CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
527           cg->LoadCodeAddress(target_method, type, kInvokeTgt);
528         }
529       }
530       if (!use_pc_rel || direct_code != 0) {
531         break;
532       }
533       ++state;
534       FALLTHROUGH_INTENDED;
535     case 2:  // Grab target method*
536       CHECK_EQ(cu->dex_file, target_method.dex_file);
537       if (!use_pc_rel) {
538         cg->LoadWordDisp(arg0_ref,
539                          mirror::Array::DataOffset(kArm64PointerSize).Uint32Value() +
540                          target_method.dex_method_index * kArm64PointerSize, arg0_ref);
541       } else {
542         size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
543         cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref, true);
544       }
545       break;
546     case 3:  // Grab the code from the method*
547       if (direct_code == 0) {
548         // kInvokeTgt := arg0_ref->entrypoint
549         cg->LoadWordDisp(arg0_ref,
550                          ArtMethod::EntryPointFromQuickCompiledCodeOffset(
551                              kArm64PointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
552       }
553       break;
554     default:
555       return -1;
556     }
557   }
558   return state + 1;
559 }
560 
GetNextSDCallInsn()561 NextCallInsn Arm64Mir2Lir::GetNextSDCallInsn() {
562   return Arm64NextSDCallInsn;
563 }
564 
CallWithLinkerFixup(const MethodReference & target_method,InvokeType type)565 LIR* Arm64Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
566   // For ARM64, just generate a relative BL instruction that will be filled in at 'link time'.
567   // If the target turns out to be too far, the linker will generate a thunk for dispatch.
568   int target_method_idx = target_method.dex_method_index;
569   const DexFile* target_dex_file = target_method.dex_file;
570 
571   // Generate the call instruction and save index, dex_file, and type.
572   // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
573   // as a placeholder for the offset.
574   LIR* call = RawLIR(current_dalvik_offset_, kA64Bl1t, 0,
575                      target_method_idx, WrapPointer(target_dex_file), type);
576   AppendLIR(call);
577   call_method_insns_.push_back(call);
578   return call;
579 }
580 
GenCallInsn(const MirMethodLoweringInfo & method_info)581 LIR* Arm64Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
582   LIR* call_insn;
583   if (method_info.FastPath() && Arm64UseRelativeCall(cu_, method_info.GetTargetMethod()) &&
584       (method_info.GetSharpType() == kDirect || method_info.GetSharpType() == kStatic) &&
585       method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
586     call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType());
587   } else {
588     call_insn = OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
589   }
590   return call_insn;
591 }
592 
593 }  // namespace art
594