1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "jni_macro_assembler_arm_vixl.h"
18 
19 #include <iostream>
20 #include <type_traits>
21 
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "indirect_reference_table.h"
24 #include "jni/jni_env_ext.h"
25 #include "jni/local_reference_table.h"
26 #include "lock_word.h"
27 #include "thread.h"
28 
29 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
30 namespace vixl32 = vixl::aarch32;
31 
32 using vixl::ExactAssemblyScope;
33 
34 namespace art HIDDEN {
35 namespace arm {
36 
37 #ifdef ___
38 #error "ARM Assembler macro already defined."
39 #else
40 #define ___   asm_.GetVIXLAssembler()->
41 #endif
42 
43 // The AAPCS requires 8-byte alignment. This is not as strict as the Managed ABI stack alignment.
44 static constexpr size_t kAapcsStackAlignment = 8u;
45 static_assert(kAapcsStackAlignment < kStackAlignment);
46 
47 // STRD immediate can encode any 4-byte aligned offset smaller than this cutoff.
48 static constexpr size_t kStrdOffsetCutoff = 1024u;
49 
50 // ADD sp, imm can encode 4-byte aligned immediate smaller than this cutoff.
51 static constexpr size_t kAddSpImmCutoff = 1024u;
52 
AsVIXLRegister(ArmManagedRegister reg)53 vixl::aarch32::Register AsVIXLRegister(ArmManagedRegister reg) {
54   CHECK(reg.IsCoreRegister());
55   return vixl::aarch32::Register(reg.RegId());
56 }
57 
AsVIXLSRegister(ArmManagedRegister reg)58 static inline vixl::aarch32::SRegister AsVIXLSRegister(ArmManagedRegister reg) {
59   CHECK(reg.IsSRegister());
60   return vixl::aarch32::SRegister(reg.RegId() - kNumberOfCoreRegIds);
61 }
62 
AsVIXLDRegister(ArmManagedRegister reg)63 static inline vixl::aarch32::DRegister AsVIXLDRegister(ArmManagedRegister reg) {
64   CHECK(reg.IsDRegister());
65   return vixl::aarch32::DRegister(reg.RegId() - kNumberOfCoreRegIds - kNumberOfSRegIds);
66 }
67 
AsVIXLRegisterPairLow(ArmManagedRegister reg)68 static inline vixl::aarch32::Register AsVIXLRegisterPairLow(ArmManagedRegister reg) {
69   return vixl::aarch32::Register(reg.AsRegisterPairLow());
70 }
71 
AsVIXLRegisterPairHigh(ArmManagedRegister reg)72 static inline vixl::aarch32::Register AsVIXLRegisterPairHigh(ArmManagedRegister reg) {
73   return vixl::aarch32::Register(reg.AsRegisterPairHigh());
74 }
75 
FinalizeCode()76 void ArmVIXLJNIMacroAssembler::FinalizeCode() {
77   asm_.FinalizeCode();
78 }
79 
80 static constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize);
81 
BuildFrame(size_t frame_size,ManagedRegister method_reg,ArrayRef<const ManagedRegister> callee_save_regs)82 void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size,
83                                           ManagedRegister method_reg,
84                                           ArrayRef<const ManagedRegister> callee_save_regs) {
85   // If we're creating an actual frame with the method, enforce managed stack alignment,
86   // otherwise only the native stack alignment.
87   if (method_reg.IsNoRegister()) {
88     CHECK_ALIGNED_PARAM(frame_size, kAapcsStackAlignment);
89   } else {
90     CHECK_ALIGNED_PARAM(frame_size, kStackAlignment);
91   }
92 
93   // Push callee saves and link register.
94   RegList core_spill_mask = 0;
95   uint32_t fp_spill_mask = 0;
96   for (const ManagedRegister& reg : callee_save_regs) {
97     if (reg.AsArm().IsCoreRegister()) {
98       core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
99     } else {
100       fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
101     }
102   }
103   if (core_spill_mask == (1u << lr.GetCode()) &&
104       fp_spill_mask == 0u &&
105       frame_size == 2 * kFramePointerSize &&
106       !method_reg.IsRegister()) {
107     // Special case: Only LR to push and one word to skip. Do this with a single
108     // 16-bit PUSH instruction by arbitrarily pushing r3 (without CFI for r3).
109     core_spill_mask |= 1u << r3.GetCode();
110     ___ Push(RegisterList(core_spill_mask));
111     cfi().AdjustCFAOffset(2 * kFramePointerSize);
112     cfi().RelOffset(DWARFReg(lr), kFramePointerSize);
113   } else if (core_spill_mask != 0u) {
114     ___ Push(RegisterList(core_spill_mask));
115     cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
116     cfi().RelOffsetForMany(DWARFReg(r0), 0, core_spill_mask, kFramePointerSize);
117   }
118   if (fp_spill_mask != 0) {
119     uint32_t first = CTZ(fp_spill_mask);
120 
121     // Check that list is contiguous.
122     DCHECK_EQ(fp_spill_mask >> CTZ(fp_spill_mask), ~0u >> (32 - POPCOUNT(fp_spill_mask)));
123 
124     ___ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fp_spill_mask)));
125     cfi().AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
126     cfi().RelOffsetForMany(DWARFReg(s0), 0, fp_spill_mask, kFramePointerSize);
127   }
128 
129   // Increase frame to required size.
130   int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
131   // Must at least have space for Method* if we're going to spill it.
132   CHECK_GE(frame_size, (pushed_values + (method_reg.IsRegister() ? 1u : 0u)) * kFramePointerSize);
133   IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize);  // handles CFI as well.
134 
135   if (method_reg.IsRegister()) {
136     // Write out Method*.
137     CHECK(r0.Is(AsVIXLRegister(method_reg.AsArm())));
138     asm_.StoreToOffset(kStoreWord, r0, sp, 0);
139   }
140 }
141 
RemoveFrame(size_t frame_size,ArrayRef<const ManagedRegister> callee_save_regs,bool may_suspend)142 void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size,
143                                            ArrayRef<const ManagedRegister> callee_save_regs,
144                                            bool may_suspend) {
145   CHECK_ALIGNED(frame_size, kAapcsStackAlignment);
146 
147   // Compute callee saves to pop.
148   RegList core_spill_mask = 0u;
149   uint32_t fp_spill_mask = 0u;
150   for (const ManagedRegister& reg : callee_save_regs) {
151     if (reg.AsArm().IsCoreRegister()) {
152       core_spill_mask |= 1u << reg.AsArm().AsCoreRegister();
153     } else {
154       fp_spill_mask |= 1u << reg.AsArm().AsSRegister();
155     }
156   }
157 
158   // Pop LR to PC unless we need to emit some read barrier code just before returning.
159   bool emit_code_before_return =
160       kReserveMarkingRegister &&
161       (may_suspend || (kIsDebugBuild && emit_run_time_checks_in_debug_mode_));
162   if ((core_spill_mask & (1u << lr.GetCode())) != 0u && !emit_code_before_return) {
163     DCHECK_EQ(core_spill_mask & (1u << pc.GetCode()), 0u);
164     core_spill_mask ^= (1u << lr.GetCode()) | (1u << pc.GetCode());
165   }
166 
167   // If there are no FP registers to pop and we pop PC, we can avoid emitting any CFI.
168   if (fp_spill_mask == 0u && (core_spill_mask & (1u << pc.GetCode())) != 0u) {
169     if (frame_size == POPCOUNT(core_spill_mask) * kFramePointerSize) {
170       // Just pop all registers and avoid CFI.
171       ___ Pop(RegisterList(core_spill_mask));
172       return;
173     } else if (frame_size == 8u && core_spill_mask == (1u << pc.GetCode())) {
174       // Special case: One word to ignore and one to pop to PC. We are free to clobber the
175       // caller-save register r3 on return, so use a 16-bit POP instruction and avoid CFI.
176       ___ Pop(RegisterList((1u << r3.GetCode()) | (1u << pc.GetCode())));
177       return;
178     }
179   }
180 
181   // We shall need to adjust CFI and restore it after the frame exit sequence.
182   cfi().RememberState();
183 
184   // Decrease frame to start of callee saves.
185   size_t pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
186   CHECK_GE(frame_size, pop_values * kFramePointerSize);
187   DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize));  // handles CFI as well.
188 
189   // Pop FP callee saves.
190   if (fp_spill_mask != 0u) {
191     uint32_t first = CTZ(fp_spill_mask);
192     // Check that list is contiguous.
193      DCHECK_EQ(fp_spill_mask >> CTZ(fp_spill_mask), ~0u >> (32 - POPCOUNT(fp_spill_mask)));
194 
195     ___ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fp_spill_mask)));
196     cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
197     cfi().RestoreMany(DWARFReg(s0), fp_spill_mask);
198   }
199 
200   // Pop core callee saves.
201   if (core_spill_mask != 0u) {
202     ___ Pop(RegisterList(core_spill_mask));
203     if ((core_spill_mask & (1u << pc.GetCode())) == 0u) {
204       cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(core_spill_mask));
205       cfi().RestoreMany(DWARFReg(r0), core_spill_mask);
206     }
207   }
208 
209   // Emit marking register refresh even with all GCs as we are still using the
210   // register due to nterp's dependency.
211   if (kReserveMarkingRegister) {
212     if (may_suspend) {
213       // The method may be suspended; refresh the Marking Register.
214       ___ Ldr(mr, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
215     } else {
216       // The method shall not be suspended; no need to refresh the Marking Register.
217 
218       // The Marking Register is a callee-save register, and thus has been
219       // preserved by native code following the AAPCS calling convention.
220 
221       // The following condition is a compile-time one, so it does not have a run-time cost.
222       if (kIsDebugBuild) {
223         // The following condition is a run-time one; it is executed after the
224         // previous compile-time test, to avoid penalizing non-debug builds.
225         if (emit_run_time_checks_in_debug_mode_) {
226           // Emit a run-time check verifying that the Marking Register is up-to-date.
227           UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
228           vixl32::Register temp = temps.Acquire();
229           // Ensure we are not clobbering a callee-save register that was restored before.
230           DCHECK_EQ(core_spill_mask & (1 << temp.GetCode()), 0)
231               << "core_spill_mask hould not contain scratch register R" << temp.GetCode();
232           asm_.GenerateMarkingRegisterCheck(temp);
233         }
234       }
235     }
236   }
237 
238   // Return to LR.
239   if ((core_spill_mask & (1u << pc.GetCode())) == 0u) {
240     ___ Bx(vixl32::lr);
241   }
242 
243   // The CFI should be restored for any code that follows the exit block.
244   cfi().RestoreState();
245   cfi().DefCFAOffset(frame_size);
246 }
247 
248 
IncreaseFrameSize(size_t adjust)249 void ArmVIXLJNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
250   if (adjust != 0u) {
251     asm_.AddConstant(sp, -adjust);
252     cfi().AdjustCFAOffset(adjust);
253   }
254 }
255 
DecreaseFrameSize(size_t adjust)256 void ArmVIXLJNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
257   if (adjust != 0u) {
258     asm_.AddConstant(sp, adjust);
259     cfi().AdjustCFAOffset(-adjust);
260   }
261 }
262 
CoreRegisterWithSize(ManagedRegister src,size_t size)263 ManagedRegister ArmVIXLJNIMacroAssembler::CoreRegisterWithSize(ManagedRegister src, size_t size) {
264   DCHECK(src.AsArm().IsCoreRegister());
265   DCHECK_EQ(size, 4u);
266   return src;
267 }
268 
Store(FrameOffset dest,ManagedRegister m_src,size_t size)269 void ArmVIXLJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister m_src, size_t size) {
270   Store(ArmManagedRegister::FromCoreRegister(SP), MemberOffset(dest.Int32Value()), m_src, size);
271 }
272 
Store(ManagedRegister m_base,MemberOffset offs,ManagedRegister m_src,size_t size)273 void ArmVIXLJNIMacroAssembler::Store(ManagedRegister m_base,
274                                      MemberOffset offs,
275                                      ManagedRegister m_src,
276                                      size_t size) {
277   ArmManagedRegister base = m_base.AsArm();
278   ArmManagedRegister src = m_src.AsArm();
279   if (src.IsNoRegister()) {
280     CHECK_EQ(0u, size);
281   } else if (src.IsCoreRegister()) {
282     CHECK_EQ(4u, size);
283     UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
284     temps.Exclude(AsVIXLRegister(src));
285     asm_.StoreToOffset(kStoreWord, AsVIXLRegister(src), AsVIXLRegister(base), offs.Int32Value());
286   } else if (src.IsRegisterPair()) {
287     CHECK_EQ(8u, size);
288     ___ Strd(AsVIXLRegisterPairLow(src),
289              AsVIXLRegisterPairHigh(src),
290              MemOperand(AsVIXLRegister(base), offs.Int32Value()));
291   } else if (src.IsSRegister()) {
292     CHECK_EQ(4u, size);
293     asm_.StoreSToOffset(AsVIXLSRegister(src), AsVIXLRegister(base), offs.Int32Value());
294   } else {
295     CHECK_EQ(8u, size);
296     CHECK(src.IsDRegister()) << src;
297     asm_.StoreDToOffset(AsVIXLDRegister(src), AsVIXLRegister(base), offs.Int32Value());
298   }
299 }
300 
StoreRawPtr(FrameOffset dest,ManagedRegister msrc)301 void ArmVIXLJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
302   vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm());
303   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
304   temps.Exclude(src);
305   asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value());
306 }
307 
Load(ManagedRegister m_dst,FrameOffset src,size_t size)308 void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
309   return Load(m_dst.AsArm(), sp, src.Int32Value(), size);
310 }
311 
Load(ManagedRegister m_dst,ManagedRegister m_base,MemberOffset offs,size_t size)312 void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst,
313                                     ManagedRegister m_base,
314                                     MemberOffset offs,
315                                     size_t size) {
316   return Load(m_dst.AsArm(), AsVIXLRegister(m_base.AsArm()), offs.Int32Value(), size);
317 }
318 
319 
LoadRawPtrFromThread(ManagedRegister mdest,ThreadOffset32 offs)320 void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
321   vixl::aarch32::Register dest = AsVIXLRegister(mdest.AsArm());
322   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
323   temps.Exclude(dest);
324   asm_.LoadFromOffset(kLoadWord, dest, tr, offs.Int32Value());
325 }
326 
StoreStackPointerToThread(ThreadOffset32 thr_offs,bool tag_sp)327 void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) {
328   if (tag_sp) {
329     UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
330     vixl32::Register reg = temps.Acquire();
331     ___ Orr(reg, sp, 0x2);
332     asm_.StoreToOffset(kStoreWord, reg, tr, thr_offs.Int32Value());
333   } else {
334     asm_.StoreToOffset(kStoreWord, sp, tr, thr_offs.Int32Value());
335   }
336 }
337 
SignExtend(ManagedRegister mreg,size_t size)338 void ArmVIXLJNIMacroAssembler::SignExtend([[maybe_unused]] ManagedRegister mreg,
339                                           [[maybe_unused]] size_t size) {
340   UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm";
341 }
342 
ZeroExtend(ManagedRegister mreg,size_t size)343 void ArmVIXLJNIMacroAssembler::ZeroExtend([[maybe_unused]] ManagedRegister mreg,
344                                           [[maybe_unused]] size_t size) {
345   UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm";
346 }
347 
IsCoreRegisterOrPair(ArmManagedRegister reg)348 static inline bool IsCoreRegisterOrPair(ArmManagedRegister reg) {
349   return reg.IsCoreRegister() || reg.IsRegisterPair();
350 }
351 
NoSpillGap(const ArgumentLocation & loc1,const ArgumentLocation & loc2)352 static inline bool NoSpillGap(const ArgumentLocation& loc1, const ArgumentLocation& loc2) {
353   DCHECK(!loc1.IsRegister());
354   DCHECK(!loc2.IsRegister());
355   uint32_t loc1_offset = loc1.GetFrameOffset().Uint32Value();
356   uint32_t loc2_offset = loc2.GetFrameOffset().Uint32Value();
357   return loc1_offset + loc1.GetSize() == loc2_offset;
358 }
359 
GetSRegisterNumber(ArmManagedRegister reg)360 static inline uint32_t GetSRegisterNumber(ArmManagedRegister reg) {
361   if (reg.IsSRegister()) {
362     return static_cast<uint32_t>(reg.AsSRegister());
363   } else {
364     DCHECK(reg.IsDRegister());
365     return 2u * static_cast<uint32_t>(reg.AsDRegister());
366   }
367 }
368 
369 // Get the number of locations to spill together.
GetSpillChunkSize(ArrayRef<ArgumentLocation> dests,ArrayRef<ArgumentLocation> srcs,size_t start)370 static inline size_t GetSpillChunkSize(ArrayRef<ArgumentLocation> dests,
371                                        ArrayRef<ArgumentLocation> srcs,
372                                        size_t start) {
373   DCHECK_LT(start, dests.size());
374   DCHECK_ALIGNED(dests[start].GetFrameOffset().Uint32Value(), 4u);
375   const ArgumentLocation& first_src = srcs[start];
376   DCHECK(first_src.IsRegister());
377   ArmManagedRegister first_src_reg = first_src.GetRegister().AsArm();
378   size_t end = start + 1u;
379   if (IsCoreRegisterOrPair(first_src_reg)) {
380     while (end != dests.size() &&
381            NoSpillGap(dests[end - 1u], dests[end]) &&
382            srcs[end].IsRegister() &&
383            IsCoreRegisterOrPair(srcs[end].GetRegister().AsArm())) {
384       ++end;
385     }
386   } else {
387     DCHECK(first_src_reg.IsSRegister() || first_src_reg.IsDRegister());
388     uint32_t next_sreg = GetSRegisterNumber(first_src_reg) + first_src.GetSize() / kSRegSizeInBytes;
389     while (end != dests.size() &&
390            NoSpillGap(dests[end - 1u], dests[end]) &&
391            srcs[end].IsRegister() &&
392            !IsCoreRegisterOrPair(srcs[end].GetRegister().AsArm()) &&
393            GetSRegisterNumber(srcs[end].GetRegister().AsArm()) == next_sreg) {
394       next_sreg += srcs[end].GetSize() / kSRegSizeInBytes;
395       ++end;
396     }
397   }
398   return end - start;
399 }
400 
GetCoreRegisterMask(ArmManagedRegister reg)401 static inline uint32_t GetCoreRegisterMask(ArmManagedRegister reg) {
402   if (reg.IsCoreRegister()) {
403     return 1u << static_cast<size_t>(reg.AsCoreRegister());
404   } else {
405     DCHECK(reg.IsRegisterPair());
406     DCHECK_LT(reg.AsRegisterPairLow(), reg.AsRegisterPairHigh());
407     return (1u << static_cast<size_t>(reg.AsRegisterPairLow())) |
408            (1u << static_cast<size_t>(reg.AsRegisterPairHigh()));
409   }
410 }
411 
GetCoreRegisterMask(ArrayRef<ArgumentLocation> srcs)412 static inline uint32_t GetCoreRegisterMask(ArrayRef<ArgumentLocation> srcs) {
413   uint32_t mask = 0u;
414   for (const ArgumentLocation& loc : srcs) {
415     DCHECK(loc.IsRegister());
416     mask |= GetCoreRegisterMask(loc.GetRegister().AsArm());
417   }
418   return mask;
419 }
420 
UseStrdForChunk(ArrayRef<ArgumentLocation> srcs,size_t start,size_t length)421 static inline bool UseStrdForChunk(ArrayRef<ArgumentLocation> srcs, size_t start, size_t length) {
422   DCHECK_GE(length, 2u);
423   DCHECK(srcs[start].IsRegister());
424   DCHECK(srcs[start + 1u].IsRegister());
425   // The destination may not be 8B aligned (but it is 4B aligned).
426   // Allow arbitrary destination offset, macro assembler will use a temp if needed.
427   // Note: T32 allows unrelated registers in STRD. (A32 does not.)
428   return length == 2u &&
429          srcs[start].GetRegister().AsArm().IsCoreRegister() &&
430          srcs[start + 1u].GetRegister().AsArm().IsCoreRegister();
431 }
432 
UseVstrForChunk(ArrayRef<ArgumentLocation> srcs,size_t start,size_t length)433 static inline bool UseVstrForChunk(ArrayRef<ArgumentLocation> srcs, size_t start, size_t length) {
434   DCHECK_GE(length, 2u);
435   DCHECK(srcs[start].IsRegister());
436   DCHECK(srcs[start + 1u].IsRegister());
437   // The destination may not be 8B aligned (but it is 4B aligned).
438   // Allow arbitrary destination offset, macro assembler will use a temp if needed.
439   return length == 2u &&
440          srcs[start].GetRegister().AsArm().IsSRegister() &&
441          srcs[start + 1u].GetRegister().AsArm().IsSRegister() &&
442          IsAligned<2u>(static_cast<size_t>(srcs[start].GetRegister().AsArm().AsSRegister()));
443 }
444 
MoveArguments(ArrayRef<ArgumentLocation> dests,ArrayRef<ArgumentLocation> srcs,ArrayRef<FrameOffset> refs)445 void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
446                                              ArrayRef<ArgumentLocation> srcs,
447                                              ArrayRef<FrameOffset> refs) {
448   size_t arg_count = dests.size();
449   DCHECK_EQ(arg_count, srcs.size());
450   DCHECK_EQ(arg_count, refs.size());
451 
452   // Convert reference registers to `jobject` values.
453   // TODO: Delay this for references that are copied to another register.
454   for (size_t i = 0; i != arg_count; ++i) {
455     if (refs[i] != kInvalidReferenceOffset && srcs[i].IsRegister()) {
456       // Note: We can clobber `srcs[i]` here as the register cannot hold more than one argument.
457       ManagedRegister src_i_reg = srcs[i].GetRegister();
458       CreateJObject(src_i_reg, refs[i], src_i_reg, /*null_allowed=*/ i != 0u);
459     }
460   }
461 
462   // Native ABI is soft-float, so all destinations should be core registers or stack offsets.
463   // And register locations should be first, followed by stack locations.
464   auto is_register = [](const ArgumentLocation& loc) { return loc.IsRegister(); };
465   DCHECK(std::is_partitioned(dests.begin(), dests.end(), is_register));
466   size_t num_reg_dests =
467       std::distance(dests.begin(), std::partition_point(dests.begin(), dests.end(), is_register));
468 
469   // Collect registers to move. No need to record FP regs as destinations are only core regs.
470   uint32_t src_regs = 0u;
471   uint32_t dest_regs = 0u;
472   uint32_t same_regs = 0u;
473   for (size_t i = 0; i != num_reg_dests; ++i) {
474     const ArgumentLocation& src = srcs[i];
475     const ArgumentLocation& dest = dests[i];
476     DCHECK(dest.IsRegister() && IsCoreRegisterOrPair(dest.GetRegister().AsArm()));
477     if (src.IsRegister() && IsCoreRegisterOrPair(src.GetRegister().AsArm())) {
478       if (src.GetRegister().Equals(dest.GetRegister())) {
479         same_regs |= GetCoreRegisterMask(src.GetRegister().AsArm());
480         continue;
481       }
482       src_regs |= GetCoreRegisterMask(src.GetRegister().AsArm());
483     }
484     dest_regs |= GetCoreRegisterMask(dest.GetRegister().AsArm());
485   }
486 
487   // Spill register arguments to stack slots.
488   for (size_t i = num_reg_dests; i != arg_count; ) {
489     const ArgumentLocation& src = srcs[i];
490     if (!src.IsRegister()) {
491       ++i;
492       continue;
493     }
494     const ArgumentLocation& dest = dests[i];
495     DCHECK_EQ(src.GetSize(), dest.GetSize());  // Even for references.
496     DCHECK(!dest.IsRegister());
497     uint32_t frame_offset = dest.GetFrameOffset().Uint32Value();
498     size_t chunk_size = GetSpillChunkSize(dests, srcs, i);
499     DCHECK_NE(chunk_size, 0u);
500     if (chunk_size == 1u) {
501       Store(dest.GetFrameOffset(), src.GetRegister(), dest.GetSize());
502     } else if (UseStrdForChunk(srcs, i, chunk_size)) {
503       ___ Strd(AsVIXLRegister(srcs[i].GetRegister().AsArm()),
504                AsVIXLRegister(srcs[i + 1u].GetRegister().AsArm()),
505                MemOperand(sp, frame_offset));
506     } else if (UseVstrForChunk(srcs, i, chunk_size)) {
507       size_t sreg = GetSRegisterNumber(src.GetRegister().AsArm());
508       DCHECK_ALIGNED(sreg, 2u);
509       ___ Vstr(vixl32::DRegister(sreg / 2u), MemOperand(sp, frame_offset));
510     } else {
511       UseScratchRegisterScope temps2(asm_.GetVIXLAssembler());
512       vixl32::Register base_reg;
513       if (frame_offset == 0u) {
514         base_reg = sp;
515       } else {
516         base_reg = temps2.Acquire();
517         ___ Add(base_reg, sp, frame_offset);
518       }
519 
520       ArmManagedRegister src_reg = src.GetRegister().AsArm();
521       if (IsCoreRegisterOrPair(src_reg)) {
522         uint32_t core_reg_mask = GetCoreRegisterMask(srcs.SubArray(i, chunk_size));
523         ___ Stm(base_reg, NO_WRITE_BACK, RegisterList(core_reg_mask));
524       } else {
525         uint32_t start_sreg = GetSRegisterNumber(src_reg);
526         const ArgumentLocation& last_dest = dests[i + chunk_size - 1u];
527         uint32_t total_size =
528             last_dest.GetFrameOffset().Uint32Value() + last_dest.GetSize() - frame_offset;
529         if (IsAligned<2u>(start_sreg) &&
530             IsAligned<kDRegSizeInBytes>(frame_offset) &&
531             IsAligned<kDRegSizeInBytes>(total_size)) {
532           uint32_t dreg_count = total_size / kDRegSizeInBytes;
533           DRegisterList dreg_list(vixl32::DRegister(start_sreg / 2u), dreg_count);
534           ___ Vstm(F64, base_reg, NO_WRITE_BACK, dreg_list);
535         } else {
536           uint32_t sreg_count = total_size / kSRegSizeInBytes;
537           SRegisterList sreg_list(vixl32::SRegister(start_sreg), sreg_count);
538           ___ Vstm(F32, base_reg, NO_WRITE_BACK, sreg_list);
539         }
540       }
541     }
542     i += chunk_size;
543   }
544 
545   // Copy incoming stack arguments to outgoing stack arguments.
546   // Registers r0-r3 are argument registers for both managed and native ABI and r4
547   // is a scratch register in managed ABI but also a hidden argument register for
548   // @CriticalNative call. We can use these registers as temporaries for copying
549   // stack arguments as long as they do not currently hold live values.
550   // TODO: Use the callee-save scratch registers instead to avoid using calling
551   // convention knowledge in the assembler. This would require reordering the
552   // argument move with pushing the IRT frame where those registers are used.
553   uint32_t copy_temp_regs = ((1u << 5) - 1u) & ~(same_regs | src_regs);
554   if ((dest_regs & (1u << R4)) != 0) {
555     // For @CriticalNative, R4 shall hold the hidden argument but it is available
556     // for use as a temporary at this point. However, it may be the only available
557     // register, so we shall use IP as the second temporary if needed.
558     // We do not need to worry about `CreateJObject` for @CriticalNative.
559     DCHECK_NE(copy_temp_regs, 0u);
560     DCHECK(std::all_of(refs.begin(),
561                        refs.end(),
562                        [](FrameOffset r) { return r == kInvalidReferenceOffset; }));
563   } else {
564     // For normal native and @FastNative, R4 and at least one of R0-R3 should be
565     // available because there are only 3 destination registers R1-R3 where the
566     // source registers can be moved. The R0 shall be filled by the `JNIEnv*`
567     // argument later. We need to keep IP available for `CreateJObject()`.
568     DCHECK_GE(POPCOUNT(copy_temp_regs), 2);
569   }
570   vixl32::Register copy_temp1 = vixl32::Register(LeastSignificantBit(copy_temp_regs));
571   copy_temp_regs ^= 1u << copy_temp1.GetCode();
572   vixl32::Register copy_xtemp = (copy_temp_regs != 0u)
573       ? vixl32::Register(LeastSignificantBit(copy_temp_regs))
574       : vixl32::Register();
575   for (size_t i = num_reg_dests; i != arg_count; ++i) {
576     if (srcs[i].IsRegister()) {
577       continue;
578     }
579     FrameOffset src_offset = srcs[i].GetFrameOffset();
580     DCHECK_ALIGNED(src_offset.Uint32Value(), 4u);
581     FrameOffset dest_offset = dests[i].GetFrameOffset();
582     DCHECK_ALIGNED(dest_offset.Uint32Value(), 4u);
583     // Look for opportunities to move 2 words at a time with LDRD/STRD
584     // when the source types are word-sized.
585     if (srcs[i].GetSize() == 4u &&
586         i + 1u != arg_count &&
587         !srcs[i + 1u].IsRegister() &&
588         srcs[i + 1u].GetSize() == 4u &&
589         NoSpillGap(srcs[i], srcs[i + 1u]) &&
590         NoSpillGap(dests[i], dests[i + 1u]) &&
591         dest_offset.Uint32Value() < kStrdOffsetCutoff) {
592       UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
593       vixl32::Register copy_temp2 = copy_xtemp.IsValid() ? copy_xtemp : temps.Acquire();
594       ___ Ldrd(copy_temp1, copy_temp2, MemOperand(sp, src_offset.Uint32Value()));
595       if (refs[i] != kInvalidReferenceOffset) {
596         ArmManagedRegister m_copy_temp1 = ArmManagedRegister::FromCoreRegister(
597             enum_cast<Register>(copy_temp1.GetCode()));
598         CreateJObject(m_copy_temp1, refs[i], m_copy_temp1, /*null_allowed=*/ i != 0u);
599       }
600       if (refs[i + 1u] != kInvalidReferenceOffset) {
601         ArmManagedRegister m_copy_temp2 = ArmManagedRegister::FromCoreRegister(
602             enum_cast<Register>(copy_temp2.GetCode()));
603         CreateJObject(m_copy_temp2, refs[i + 1u], m_copy_temp2, /*null_allowed=*/ true);
604       }
605       ___ Strd(copy_temp1, copy_temp2, MemOperand(sp, dest_offset.Uint32Value()));
606       ++i;
607     } else if (dests[i].GetSize() == 8u && dest_offset.Uint32Value() < kStrdOffsetCutoff) {
608       UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
609       vixl32::Register copy_temp2 = copy_xtemp.IsValid() ? copy_xtemp : temps.Acquire();
610       ___ Ldrd(copy_temp1, copy_temp2, MemOperand(sp, src_offset.Uint32Value()));
611       ___ Strd(copy_temp1, copy_temp2, MemOperand(sp, dest_offset.Uint32Value()));
612     } else if (refs[i] != kInvalidReferenceOffset) {
613       // Do not use the `CreateJObject()` overload for stack target as it generates
614       // worse code than explicitly using a low register temporary.
615       ___ Ldr(copy_temp1, MemOperand(sp, src_offset.Uint32Value()));
616       ArmManagedRegister m_copy_temp1 = ArmManagedRegister::FromCoreRegister(
617           enum_cast<Register>(copy_temp1.GetCode()));
618       CreateJObject(m_copy_temp1, refs[i], m_copy_temp1, /*null_allowed=*/ i != 0u);
619       ___ Str(copy_temp1, MemOperand(sp, dest_offset.Uint32Value()));
620     } else {
621       Copy(dest_offset, src_offset, dests[i].GetSize());
622     }
623   }
624 
625   // Fill destination registers from source core registers.
626   // There should be no cycles, so this algorithm should make progress.
627   while (src_regs != 0u) {
628     uint32_t old_src_regs = src_regs;
629     for (size_t i = 0; i != num_reg_dests; ++i) {
630       DCHECK(dests[i].IsRegister() && IsCoreRegisterOrPair(dests[i].GetRegister().AsArm()));
631       if (!srcs[i].IsRegister() || !IsCoreRegisterOrPair(srcs[i].GetRegister().AsArm())) {
632         continue;
633       }
634       uint32_t dest_reg_mask = GetCoreRegisterMask(dests[i].GetRegister().AsArm());
635       if ((dest_reg_mask & dest_regs) == 0u) {
636         continue;  // Equals source, or already filled in one of previous iterations.
637       }
638       // There are no partial overlaps of 8-byte arguments, otherwise we would have to
639       // tweak this check; Move() can deal with partial overlap for historical reasons.
640       if ((dest_reg_mask & src_regs) != 0u) {
641         continue;  // Cannot clobber this register yet.
642       }
643       Move(dests[i].GetRegister(), srcs[i].GetRegister(), dests[i].GetSize());
644       uint32_t src_reg_mask = GetCoreRegisterMask(srcs[i].GetRegister().AsArm());
645       DCHECK_EQ(src_regs & src_reg_mask, src_reg_mask);
646       src_regs &= ~src_reg_mask;  // Allow clobbering the source register or pair.
647       dest_regs &= ~dest_reg_mask;  // Destination register or pair was filled.
648     }
649     CHECK_NE(old_src_regs, src_regs);
650     DCHECK_EQ(0u, src_regs & ~old_src_regs);
651   }
652 
653   // Now fill destination registers from FP registers or stack slots, looking for
654   // opportunities to use LDRD/VMOV to fill 2 registers with one instruction.
655   for (size_t i = 0, j; i != num_reg_dests; i = j) {
656     j = i + 1u;
657     DCHECK(dests[i].IsRegister());
658     ArmManagedRegister dest_reg = dests[i].GetRegister().AsArm();
659     DCHECK(IsCoreRegisterOrPair(dest_reg));
660     if (srcs[i].IsRegister() && IsCoreRegisterOrPair(srcs[i].GetRegister().AsArm())) {
661       DCHECK_EQ(GetCoreRegisterMask(dests[i].GetRegister().AsArm()) & dest_regs, 0u);
662       continue;  // Equals destination or moved above.
663     }
664     DCHECK_NE(GetCoreRegisterMask(dest_reg) & dest_regs, 0u);
665     if (dests[i].GetSize() == 4u) {
666       // Find next register to load.
667       while (j != num_reg_dests &&
668              (srcs[j].IsRegister() && IsCoreRegisterOrPair(srcs[j].GetRegister().AsArm()))) {
669         DCHECK_EQ(GetCoreRegisterMask(dests[j].GetRegister().AsArm()) & dest_regs, 0u);
670         ++j;  // Equals destination or moved above.
671       }
672       if (j != num_reg_dests && dests[j].GetSize() == 4u) {
673         if (!srcs[i].IsRegister() && !srcs[j].IsRegister() && NoSpillGap(srcs[i], srcs[j])) {
674           ___ Ldrd(AsVIXLRegister(dests[i].GetRegister().AsArm()),
675                    AsVIXLRegister(dests[j].GetRegister().AsArm()),
676                    MemOperand(sp, srcs[i].GetFrameOffset().Uint32Value()));
677           if (refs[i] != kInvalidReferenceOffset) {
678             DCHECK_EQ(refs[i], srcs[i].GetFrameOffset());
679             CreateJObject(dest_reg, refs[i], dest_reg, /*null_allowed=*/ i != 0u);
680           }
681           if (refs[j] != kInvalidReferenceOffset) {
682             DCHECK_EQ(refs[j], srcs[j].GetFrameOffset());
683             ManagedRegister dest_j_reg = dests[j].GetRegister();
684             CreateJObject(dest_j_reg, refs[j], dest_j_reg, /*null_allowed=*/ true);
685           }
686           ++j;
687           continue;
688         }
689         if (srcs[i].IsRegister() && srcs[j].IsRegister()) {
690           uint32_t first_sreg = GetSRegisterNumber(srcs[i].GetRegister().AsArm());
691           if (IsAligned<2u>(first_sreg) &&
692               first_sreg + 1u == GetSRegisterNumber(srcs[j].GetRegister().AsArm())) {
693             ___ Vmov(AsVIXLRegister(dest_reg),
694                      AsVIXLRegister(dests[j].GetRegister().AsArm()),
695                      vixl32::DRegister(first_sreg / 2u));
696             ++j;
697             continue;
698           }
699         }
700       }
701     }
702     if (srcs[i].IsRegister()) {
703       Move(dests[i].GetRegister(), srcs[i].GetRegister(), dests[i].GetSize());
704     } else if (refs[i] != kInvalidReferenceOffset) {
705       CreateJObject(dest_reg, refs[i], ManagedRegister::NoRegister(), /*null_allowed=*/ i != 0u);
706     } else {
707       Load(dest_reg, srcs[i].GetFrameOffset(), dests[i].GetSize());
708     }
709   }
710 }
711 
Move(ManagedRegister mdst,ManagedRegister msrc,size_t size)712 void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst,
713                                     ManagedRegister msrc,
714                                     [[maybe_unused]] size_t size) {
715   ArmManagedRegister dst = mdst.AsArm();
716   if (kIsDebugBuild) {
717     // Check that the destination is not a scratch register.
718     UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
719     if (dst.IsCoreRegister()) {
720       CHECK(!temps.IsAvailable(AsVIXLRegister(dst)));
721     } else if (dst.IsDRegister()) {
722       CHECK(!temps.IsAvailable(AsVIXLDRegister(dst)));
723     } else if (dst.IsSRegister()) {
724       CHECK(!temps.IsAvailable(AsVIXLSRegister(dst)));
725     } else {
726       CHECK(dst.IsRegisterPair()) << dst;
727       CHECK(!temps.IsAvailable(AsVIXLRegisterPairLow(dst)));
728       CHECK(!temps.IsAvailable(AsVIXLRegisterPairHigh(dst)));
729     }
730   }
731   ArmManagedRegister src = msrc.AsArm();
732   if (!dst.Equals(src)) {
733     if (dst.IsCoreRegister()) {
734       if (src.IsCoreRegister()) {
735         ___ Mov(AsVIXLRegister(dst), AsVIXLRegister(src));
736       } else {
737         CHECK(src.IsSRegister()) << src;
738         ___ Vmov(AsVIXLRegister(dst), AsVIXLSRegister(src));
739       }
740     } else if (dst.IsDRegister()) {
741       if (src.IsDRegister()) {
742         ___ Vmov(F64, AsVIXLDRegister(dst), AsVIXLDRegister(src));
743       } else {
744         // VMOV Dn, Rlo, Rhi (Dn = {Rlo, Rhi})
745         CHECK(src.IsRegisterPair()) << src;
746         ___ Vmov(AsVIXLDRegister(dst), AsVIXLRegisterPairLow(src), AsVIXLRegisterPairHigh(src));
747       }
748     } else if (dst.IsSRegister()) {
749       if (src.IsSRegister()) {
750         ___ Vmov(F32, AsVIXLSRegister(dst), AsVIXLSRegister(src));
751       } else {
752         // VMOV Sn, Rn  (Sn = Rn)
753         CHECK(src.IsCoreRegister()) << src;
754         ___ Vmov(AsVIXLSRegister(dst), AsVIXLRegister(src));
755       }
756     } else {
757       CHECK(dst.IsRegisterPair()) << dst;
758       if (src.IsRegisterPair()) {
759         // Ensure that the first move doesn't clobber the input of the second.
760         if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) {
761           ___ Mov(AsVIXLRegisterPairLow(dst),  AsVIXLRegisterPairLow(src));
762           ___ Mov(AsVIXLRegisterPairHigh(dst), AsVIXLRegisterPairHigh(src));
763         } else {
764           ___ Mov(AsVIXLRegisterPairHigh(dst), AsVIXLRegisterPairHigh(src));
765           ___ Mov(AsVIXLRegisterPairLow(dst),  AsVIXLRegisterPairLow(src));
766         }
767       } else {
768         CHECK(src.IsDRegister()) << src;
769         ___ Vmov(AsVIXLRegisterPairLow(dst), AsVIXLRegisterPairHigh(dst), AsVIXLDRegister(src));
770       }
771     }
772   }
773 }
774 
Move(ManagedRegister mdst,size_t value)775 void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, size_t value) {
776   ArmManagedRegister dst = mdst.AsArm();
777   ___ Mov(AsVIXLRegister(dst), static_cast<uint32_t>(value));
778 }
779 
Copy(FrameOffset dest,FrameOffset src,size_t size)780 void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
781   DCHECK(size == 4 || size == 8) << size;
782   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
783   vixl32::Register scratch = temps.Acquire();
784   if (size == 4) {
785     asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value());
786     asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
787   } else if (size == 8) {
788     asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value());
789     asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
790     asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value() + 4);
791     asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value() + 4);
792   }
793 }
794 
CreateJObject(ManagedRegister mout_reg,FrameOffset spilled_reference_offset,ManagedRegister min_reg,bool null_allowed)795 void ArmVIXLJNIMacroAssembler::CreateJObject(ManagedRegister mout_reg,
796                                              FrameOffset spilled_reference_offset,
797                                              ManagedRegister min_reg,
798                                              bool null_allowed) {
799   vixl::aarch32::Register out_reg = AsVIXLRegister(mout_reg.AsArm());
800   vixl::aarch32::Register in_reg =
801       min_reg.AsArm().IsNoRegister() ? vixl::aarch32::Register() : AsVIXLRegister(min_reg.AsArm());
802   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
803   temps.Exclude(out_reg);
804   if (null_allowed) {
805     // Null values get a jobject value null. Otherwise, the jobject is
806     // the address of the spilled reference.
807     // e.g. out_reg = (handle == 0) ? 0 : (SP+spilled_reference_offset)
808     if (!in_reg.IsValid()) {
809       asm_.LoadFromOffset(kLoadWord, out_reg, sp, spilled_reference_offset.Int32Value());
810       in_reg = out_reg;
811     }
812 
813     if (out_reg.IsLow() && spilled_reference_offset.Uint32Value() < kAddSpImmCutoff) {
814       // There is a 16-bit "ADD Rd, SP, <imm>" instruction we can use in IT-block.
815       if (out_reg.Is(in_reg)) {
816         ___ Cmp(in_reg, 0);
817       } else {
818         ___ Movs(out_reg, in_reg);
819       }
820       ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
821                                2 * vixl32::k16BitT32InstructionSizeInBytes);
822       ___ it(ne);
823       ___ add(ne, Narrow, out_reg, sp, spilled_reference_offset.Int32Value());
824     } else {
825       vixl32::Register addr_reg = out_reg.Is(in_reg) ? temps.Acquire() : out_reg;
826       vixl32::Register cond_mov_src_reg = out_reg.Is(in_reg) ? addr_reg : in_reg;
827       vixl32::Condition cond = out_reg.Is(in_reg) ? ne : eq;
828       ___ Add(addr_reg, sp, spilled_reference_offset.Int32Value());
829       ___ Cmp(in_reg, 0);
830       ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
831                                2 * vixl32::k16BitT32InstructionSizeInBytes);
832       ___ it(cond);
833       ___ mov(cond, Narrow, out_reg, cond_mov_src_reg);
834     }
835   } else {
836     asm_.AddConstant(out_reg, sp, spilled_reference_offset.Int32Value());
837   }
838 }
839 
DecodeJNITransitionOrLocalJObject(ManagedRegister mreg,JNIMacroLabel * slow_path,JNIMacroLabel * resume)840 void ArmVIXLJNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister mreg,
841                                                                  JNIMacroLabel* slow_path,
842                                                                  JNIMacroLabel* resume) {
843   constexpr uint32_t kGlobalOrWeakGlobalMask =
844       dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetGlobalOrWeakGlobalMask());
845   constexpr uint32_t kIndirectRefKindMask =
846       dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetIndirectRefKindMask());
847   vixl32::Register reg = AsVIXLRegister(mreg.AsArm());
848   ___ Tst(reg, kGlobalOrWeakGlobalMask);
849   ___ B(ne, ArmVIXLJNIMacroLabel::Cast(slow_path)->AsArm());
850   ___ Bics(reg, reg, kIndirectRefKindMask);
851   ___ B(eq, ArmVIXLJNIMacroLabel::Cast(resume)->AsArm());  // Skip load for null.
852   ___ Ldr(reg, MemOperand(reg));
853 }
854 
VerifyObject(ManagedRegister src,bool could_be_null)855 void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] ManagedRegister src,
856                                             [[maybe_unused]] bool could_be_null) {
857   // TODO: not validating references.
858 }
859 
VerifyObject(FrameOffset src,bool could_be_null)860 void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src,
861                                             [[maybe_unused]] bool could_be_null) {
862   // TODO: not validating references.
863 }
864 
Jump(ManagedRegister mbase,Offset offset)865 void ArmVIXLJNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset) {
866   vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm());
867   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
868   vixl32::Register scratch = temps.Acquire();
869   asm_.LoadFromOffset(kLoadWord, scratch, base, offset.Int32Value());
870   ___ Bx(scratch);
871 }
872 
Call(ManagedRegister mbase,Offset offset)873 void ArmVIXLJNIMacroAssembler::Call(ManagedRegister mbase, Offset offset) {
874   vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm());
875   asm_.LoadFromOffset(kLoadWord, lr, base, offset.Int32Value());
876   ___ Blx(lr);
877   // TODO: place reference map on call.
878 }
879 
CallFromThread(ThreadOffset32 offset)880 void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset) {
881   // Call *(TR + offset)
882   asm_.LoadFromOffset(kLoadWord, lr, tr, offset.Int32Value());
883   ___ Blx(lr);
884   // TODO: place reference map on call
885 }
886 
GetCurrentThread(ManagedRegister dest)887 void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
888   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
889   temps.Exclude(AsVIXLRegister(dest.AsArm()));
890   ___ Mov(AsVIXLRegister(dest.AsArm()), tr);
891 }
892 
GetCurrentThread(FrameOffset dest_offset)893 void ArmVIXLJNIMacroAssembler::GetCurrentThread(FrameOffset dest_offset) {
894   asm_.StoreToOffset(kStoreWord, tr, sp, dest_offset.Int32Value());
895 }
896 
TryToTransitionFromRunnableToNative(JNIMacroLabel * label,ArrayRef<const ManagedRegister> scratch_regs)897 void ArmVIXLJNIMacroAssembler::TryToTransitionFromRunnableToNative(
898     JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) {
899   constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
900   constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
901   constexpr ThreadOffset32 thread_flags_offset = Thread::ThreadFlagsOffset<kArmPointerSize>();
902   constexpr ThreadOffset32 thread_held_mutex_mutator_lock_offset =
903       Thread::HeldMutexOffset<kArmPointerSize>(kMutatorLock);
904 
905   DCHECK_GE(scratch_regs.size(), 2u);
906   vixl32::Register scratch = AsVIXLRegister(scratch_regs[0].AsArm());
907   vixl32::Register scratch2 = AsVIXLRegister(scratch_regs[1].AsArm());
908 
909   // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
910   vixl32::Label retry;
911   ___ Bind(&retry);
912   ___ Ldrex(scratch, MemOperand(tr, thread_flags_offset.Int32Value()));
913   ___ Mov(scratch2, kNativeStateValue);
914   // If any flags are set, go to the slow path.
915   ___ Cmp(scratch, kRunnableStateValue);
916   ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
917   ___ Dmb(DmbOptions::ISH);  // Memory barrier "any-store" for the "release" operation.
918   ___ Strex(scratch, scratch2, MemOperand(tr, thread_flags_offset.Int32Value()));
919   ___ Cmp(scratch, 0);
920   ___ B(ne, &retry);
921 
922   // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`; `scratch` holds 0 at this point.
923   ___ Str(scratch, MemOperand(tr, thread_held_mutex_mutator_lock_offset.Int32Value()));
924 }
925 
TryToTransitionFromNativeToRunnable(JNIMacroLabel * label,ArrayRef<const ManagedRegister> scratch_regs,ManagedRegister return_reg)926 void ArmVIXLJNIMacroAssembler::TryToTransitionFromNativeToRunnable(
927     JNIMacroLabel* label,
928     ArrayRef<const ManagedRegister> scratch_regs,
929     ManagedRegister return_reg) {
930   constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
931   constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
932   constexpr ThreadOffset32 thread_flags_offset = Thread::ThreadFlagsOffset<kArmPointerSize>();
933   constexpr ThreadOffset32 thread_held_mutex_mutator_lock_offset =
934       Thread::HeldMutexOffset<kArmPointerSize>(kMutatorLock);
935   constexpr ThreadOffset32 thread_mutator_lock_offset =
936       Thread::MutatorLockOffset<kArmPointerSize>();
937 
938   // There must be at least two scratch registers.
939   DCHECK_GE(scratch_regs.size(), 2u);
940   DCHECK(!scratch_regs[0].AsArm().Overlaps(return_reg.AsArm()));
941   vixl32::Register scratch = AsVIXLRegister(scratch_regs[0].AsArm());
942   DCHECK(!scratch_regs[1].AsArm().Overlaps(return_reg.AsArm()));
943   vixl32::Register scratch2 = AsVIXLRegister(scratch_regs[1].AsArm());
944 
945   // CAS acquire, old_value = kNativeStateValue, new_value = kRunnableStateValue, no flags.
946   vixl32::Label retry;
947   ___ Bind(&retry);
948   ___ Ldrex(scratch, MemOperand(tr, thread_flags_offset.Int32Value()));
949   // If any flags are set, or the state is not Native, go to the slow path.
950   // (While the thread can theoretically transition between different Suspended states,
951   // it would be very unexpected to see a state other than Native at this point.)
952   ___ Eors(scratch2, scratch, kNativeStateValue);
953   ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
954   static_assert(kRunnableStateValue == 0u);
955   ___ Strex(scratch, scratch2, MemOperand(tr, thread_flags_offset.Int32Value()));
956   ___ Cmp(scratch, 0);
957   ___ B(ne, &retry);
958   ___ Dmb(DmbOptions::ISH);  // Memory barrier "load-any" for the "acquire" operation.
959 
960   // Set `self->tlsPtr_.held_mutexes[kMutatorLock]` to the mutator lock.
961   ___ Ldr(scratch, MemOperand(tr, thread_mutator_lock_offset.Int32Value()));
962   ___ Str(scratch, MemOperand(tr, thread_held_mutex_mutator_lock_offset.Int32Value()));
963 }
964 
SuspendCheck(JNIMacroLabel * label)965 void ArmVIXLJNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
966   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
967   vixl32::Register scratch = temps.Acquire();
968   asm_.LoadFromOffset(kLoadWord,
969                       scratch,
970                       tr,
971                       Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
972 
973   ___ Tst(scratch, Thread::SuspendOrCheckpointRequestFlags());
974   ___ BPreferNear(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
975   // TODO: think about using CBNZ here.
976 }
977 
ExceptionPoll(JNIMacroLabel * label)978 void ArmVIXLJNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
979   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
980   vixl32::Register scratch = temps.Acquire();
981   asm_.LoadFromOffset(kLoadWord,
982                       scratch,
983                       tr,
984                       Thread::ExceptionOffset<kArmPointerSize>().Int32Value());
985 
986   ___ Cmp(scratch, 0);
987   ___ BPreferNear(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
988   // TODO: think about using CBNZ here.
989 }
990 
DeliverPendingException()991 void ArmVIXLJNIMacroAssembler::DeliverPendingException() {
992   // Pass exception object as argument.
993   // Don't care about preserving r0 as this won't return.
994   // Note: The scratch register from `ExceptionPoll()` may have been clobbered.
995   asm_.LoadFromOffset(kLoadWord,
996                       r0,
997                       tr,
998                       Thread::ExceptionOffset<kArmPointerSize>().Int32Value());
999   ___ Ldr(lr,
1000           MemOperand(tr,
1001               QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value()));
1002   ___ Blx(lr);
1003 }
1004 
CreateLabel()1005 std::unique_ptr<JNIMacroLabel> ArmVIXLJNIMacroAssembler::CreateLabel() {
1006   return std::unique_ptr<JNIMacroLabel>(new (asm_.GetAllocator()) ArmVIXLJNIMacroLabel());
1007 }
1008 
Jump(JNIMacroLabel * label)1009 void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label) {
1010   CHECK(label != nullptr);
1011   ___ B(ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1012 }
1013 
TestGcMarking(JNIMacroLabel * label,JNIMacroUnaryCondition cond)1014 void ArmVIXLJNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
1015   CHECK(label != nullptr);
1016 
1017   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
1018   vixl32::Register test_reg;
1019   DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
1020   if (kUseBakerReadBarrier) {
1021     // TestGcMarking() is used in the JNI stub entry when the marking register is up to date.
1022     if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) {
1023       vixl32::Register temp = temps.Acquire();
1024       asm_.GenerateMarkingRegisterCheck(temp);
1025     }
1026     test_reg = mr;
1027   } else {
1028     test_reg = temps.Acquire();
1029     ___ Ldr(test_reg, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
1030   }
1031   switch (cond) {
1032     case JNIMacroUnaryCondition::kZero:
1033       ___ CompareAndBranchIfZero(test_reg, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1034       break;
1035     case JNIMacroUnaryCondition::kNotZero:
1036       ___ CompareAndBranchIfNonZero(test_reg, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1037       break;
1038   }
1039 }
1040 
TestMarkBit(ManagedRegister mref,JNIMacroLabel * label,JNIMacroUnaryCondition cond)1041 void ArmVIXLJNIMacroAssembler::TestMarkBit(ManagedRegister mref,
1042                                            JNIMacroLabel* label,
1043                                            JNIMacroUnaryCondition cond) {
1044   DCHECK(kUseBakerReadBarrier);
1045   vixl32::Register ref = AsVIXLRegister(mref.AsArm());
1046   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
1047   vixl32::Register scratch = temps.Acquire();
1048   ___ Ldr(scratch, MemOperand(ref, mirror::Object::MonitorOffset().SizeValue()));
1049   static_assert(LockWord::kMarkBitStateSize == 1u);
1050   ___ Tst(scratch, LockWord::kMarkBitStateMaskShifted);
1051   switch (cond) {
1052     case JNIMacroUnaryCondition::kZero:
1053       ___ B(eq, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1054       break;
1055     case JNIMacroUnaryCondition::kNotZero:
1056       ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1057       break;
1058   }
1059 }
1060 
TestByteAndJumpIfNotZero(uintptr_t address,JNIMacroLabel * label)1061 void ArmVIXLJNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
1062   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
1063   vixl32::Register scratch = temps.Acquire();
1064   ___ Mov(scratch, static_cast<uint32_t>(address));
1065   ___ Ldrb(scratch, MemOperand(scratch, 0));
1066   ___ CompareAndBranchIfNonZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1067 }
1068 
Bind(JNIMacroLabel * label)1069 void ArmVIXLJNIMacroAssembler::Bind(JNIMacroLabel* label) {
1070   CHECK(label != nullptr);
1071   ___ Bind(ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1072 }
1073 
Load(ArmManagedRegister dest,vixl32::Register base,int32_t offset,size_t size)1074 void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister dest,
1075                                     vixl32::Register base,
1076                                     int32_t offset,
1077                                     size_t size) {
1078   if (dest.IsNoRegister()) {
1079     CHECK_EQ(0u, size) << dest;
1080   } else if (dest.IsCoreRegister()) {
1081     vixl::aarch32::Register dst = AsVIXLRegister(dest);
1082     CHECK(!dst.Is(sp)) << dest;
1083 
1084     UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
1085     temps.Exclude(dst);
1086 
1087     if (size == 1u) {
1088       ___ Ldrb(dst, MemOperand(base, offset));
1089     } else {
1090       CHECK_EQ(4u, size) << dest;
1091       ___ Ldr(dst, MemOperand(base, offset));
1092     }
1093   } else if (dest.IsRegisterPair()) {
1094     CHECK_EQ(8u, size) << dest;
1095     // TODO: Use LDRD to improve stubs for @CriticalNative methods with parameters
1096     // (long, long, ...). A single 32-bit LDRD is presumably faster than two 16-bit LDRs.
1097     ___ Ldr(AsVIXLRegisterPairLow(dest),  MemOperand(base, offset));
1098     ___ Ldr(AsVIXLRegisterPairHigh(dest), MemOperand(base, offset + 4));
1099   } else if (dest.IsSRegister()) {
1100     ___ Vldr(AsVIXLSRegister(dest), MemOperand(base, offset));
1101   } else {
1102     CHECK(dest.IsDRegister()) << dest;
1103     ___ Vldr(AsVIXLDRegister(dest), MemOperand(base, offset));
1104   }
1105 }
1106 
LoadLocalReferenceTableStates(ManagedRegister jni_env_reg,ManagedRegister previous_state_reg,ManagedRegister current_state_reg)1107 void ArmVIXLJNIMacroAssembler::LoadLocalReferenceTableStates(ManagedRegister jni_env_reg,
1108                                                              ManagedRegister previous_state_reg,
1109                                                              ManagedRegister current_state_reg) {
1110   constexpr size_t kLRTSegmentStateSize = sizeof(jni::LRTSegmentState);
1111   DCHECK_EQ(kLRTSegmentStateSize, kRegSizeInBytes);
1112   const MemberOffset previous_state_offset = JNIEnvExt::LrtPreviousStateOffset(kArmPointerSize);
1113   const MemberOffset current_state_offset = JNIEnvExt::LrtSegmentStateOffset(kArmPointerSize);
1114   DCHECK_EQ(previous_state_offset.SizeValue() + kLRTSegmentStateSize,
1115             current_state_offset.SizeValue());
1116 
1117   ___ Ldrd(AsVIXLRegister(previous_state_reg.AsArm()),
1118            AsVIXLRegister(current_state_reg.AsArm()),
1119            MemOperand(AsVIXLRegister(jni_env_reg.AsArm()), previous_state_offset.Int32Value()));
1120 }
1121 
StoreLocalReferenceTableStates(ManagedRegister jni_env_reg,ManagedRegister previous_state_reg,ManagedRegister current_state_reg)1122 void ArmVIXLJNIMacroAssembler::StoreLocalReferenceTableStates(ManagedRegister jni_env_reg,
1123                                                               ManagedRegister previous_state_reg,
1124                                                               ManagedRegister current_state_reg) {
1125   constexpr size_t kLRTSegmentStateSize = sizeof(jni::LRTSegmentState);
1126   DCHECK_EQ(kLRTSegmentStateSize, kRegSizeInBytes);
1127   const MemberOffset previous_state_offset = JNIEnvExt::LrtPreviousStateOffset(kArmPointerSize);
1128   const MemberOffset current_state_offset = JNIEnvExt::LrtSegmentStateOffset(kArmPointerSize);
1129   DCHECK_EQ(previous_state_offset.SizeValue() + kLRTSegmentStateSize,
1130             current_state_offset.SizeValue());
1131 
1132   ___ Strd(AsVIXLRegister(previous_state_reg.AsArm()),
1133            AsVIXLRegister(current_state_reg.AsArm()),
1134            MemOperand(AsVIXLRegister(jni_env_reg.AsArm()), previous_state_offset.Int32Value()));
1135 }
1136 
1137 }  // namespace arm
1138 }  // namespace art
1139