1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "jni_macro_assembler_arm_vixl.h"
18
19 #include <iostream>
20 #include <type_traits>
21
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "indirect_reference_table.h"
24 #include "jni/jni_env_ext.h"
25 #include "jni/local_reference_table.h"
26 #include "lock_word.h"
27 #include "thread.h"
28
29 using namespace vixl::aarch32; // NOLINT(build/namespaces)
30 namespace vixl32 = vixl::aarch32;
31
32 using vixl::ExactAssemblyScope;
33
34 namespace art HIDDEN {
35 namespace arm {
36
37 #ifdef ___
38 #error "ARM Assembler macro already defined."
39 #else
40 #define ___ asm_.GetVIXLAssembler()->
41 #endif
42
43 // The AAPCS requires 8-byte alignment. This is not as strict as the Managed ABI stack alignment.
44 static constexpr size_t kAapcsStackAlignment = 8u;
45 static_assert(kAapcsStackAlignment < kStackAlignment);
46
47 // STRD immediate can encode any 4-byte aligned offset smaller than this cutoff.
48 static constexpr size_t kStrdOffsetCutoff = 1024u;
49
50 // ADD sp, imm can encode 4-byte aligned immediate smaller than this cutoff.
51 static constexpr size_t kAddSpImmCutoff = 1024u;
52
AsVIXLRegister(ArmManagedRegister reg)53 vixl::aarch32::Register AsVIXLRegister(ArmManagedRegister reg) {
54 CHECK(reg.IsCoreRegister());
55 return vixl::aarch32::Register(reg.RegId());
56 }
57
AsVIXLSRegister(ArmManagedRegister reg)58 static inline vixl::aarch32::SRegister AsVIXLSRegister(ArmManagedRegister reg) {
59 CHECK(reg.IsSRegister());
60 return vixl::aarch32::SRegister(reg.RegId() - kNumberOfCoreRegIds);
61 }
62
AsVIXLDRegister(ArmManagedRegister reg)63 static inline vixl::aarch32::DRegister AsVIXLDRegister(ArmManagedRegister reg) {
64 CHECK(reg.IsDRegister());
65 return vixl::aarch32::DRegister(reg.RegId() - kNumberOfCoreRegIds - kNumberOfSRegIds);
66 }
67
AsVIXLRegisterPairLow(ArmManagedRegister reg)68 static inline vixl::aarch32::Register AsVIXLRegisterPairLow(ArmManagedRegister reg) {
69 return vixl::aarch32::Register(reg.AsRegisterPairLow());
70 }
71
AsVIXLRegisterPairHigh(ArmManagedRegister reg)72 static inline vixl::aarch32::Register AsVIXLRegisterPairHigh(ArmManagedRegister reg) {
73 return vixl::aarch32::Register(reg.AsRegisterPairHigh());
74 }
75
FinalizeCode()76 void ArmVIXLJNIMacroAssembler::FinalizeCode() {
77 asm_.FinalizeCode();
78 }
79
80 static constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize);
81
BuildFrame(size_t frame_size,ManagedRegister method_reg,ArrayRef<const ManagedRegister> callee_save_regs)82 void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size,
83 ManagedRegister method_reg,
84 ArrayRef<const ManagedRegister> callee_save_regs) {
85 // If we're creating an actual frame with the method, enforce managed stack alignment,
86 // otherwise only the native stack alignment.
87 if (method_reg.IsNoRegister()) {
88 CHECK_ALIGNED_PARAM(frame_size, kAapcsStackAlignment);
89 } else {
90 CHECK_ALIGNED_PARAM(frame_size, kStackAlignment);
91 }
92
93 // Push callee saves and link register.
94 RegList core_spill_mask = 0;
95 uint32_t fp_spill_mask = 0;
96 for (const ManagedRegister& reg : callee_save_regs) {
97 if (reg.AsArm().IsCoreRegister()) {
98 core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
99 } else {
100 fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
101 }
102 }
103 if (core_spill_mask == (1u << lr.GetCode()) &&
104 fp_spill_mask == 0u &&
105 frame_size == 2 * kFramePointerSize &&
106 !method_reg.IsRegister()) {
107 // Special case: Only LR to push and one word to skip. Do this with a single
108 // 16-bit PUSH instruction by arbitrarily pushing r3 (without CFI for r3).
109 core_spill_mask |= 1u << r3.GetCode();
110 ___ Push(RegisterList(core_spill_mask));
111 cfi().AdjustCFAOffset(2 * kFramePointerSize);
112 cfi().RelOffset(DWARFReg(lr), kFramePointerSize);
113 } else if (core_spill_mask != 0u) {
114 ___ Push(RegisterList(core_spill_mask));
115 cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
116 cfi().RelOffsetForMany(DWARFReg(r0), 0, core_spill_mask, kFramePointerSize);
117 }
118 if (fp_spill_mask != 0) {
119 uint32_t first = CTZ(fp_spill_mask);
120
121 // Check that list is contiguous.
122 DCHECK_EQ(fp_spill_mask >> CTZ(fp_spill_mask), ~0u >> (32 - POPCOUNT(fp_spill_mask)));
123
124 ___ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fp_spill_mask)));
125 cfi().AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
126 cfi().RelOffsetForMany(DWARFReg(s0), 0, fp_spill_mask, kFramePointerSize);
127 }
128
129 // Increase frame to required size.
130 int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
131 // Must at least have space for Method* if we're going to spill it.
132 CHECK_GE(frame_size, (pushed_values + (method_reg.IsRegister() ? 1u : 0u)) * kFramePointerSize);
133 IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize); // handles CFI as well.
134
135 if (method_reg.IsRegister()) {
136 // Write out Method*.
137 CHECK(r0.Is(AsVIXLRegister(method_reg.AsArm())));
138 asm_.StoreToOffset(kStoreWord, r0, sp, 0);
139 }
140 }
141
RemoveFrame(size_t frame_size,ArrayRef<const ManagedRegister> callee_save_regs,bool may_suspend)142 void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size,
143 ArrayRef<const ManagedRegister> callee_save_regs,
144 bool may_suspend) {
145 CHECK_ALIGNED(frame_size, kAapcsStackAlignment);
146
147 // Compute callee saves to pop.
148 RegList core_spill_mask = 0u;
149 uint32_t fp_spill_mask = 0u;
150 for (const ManagedRegister& reg : callee_save_regs) {
151 if (reg.AsArm().IsCoreRegister()) {
152 core_spill_mask |= 1u << reg.AsArm().AsCoreRegister();
153 } else {
154 fp_spill_mask |= 1u << reg.AsArm().AsSRegister();
155 }
156 }
157
158 // Pop LR to PC unless we need to emit some read barrier code just before returning.
159 bool emit_code_before_return =
160 kReserveMarkingRegister &&
161 (may_suspend || (kIsDebugBuild && emit_run_time_checks_in_debug_mode_));
162 if ((core_spill_mask & (1u << lr.GetCode())) != 0u && !emit_code_before_return) {
163 DCHECK_EQ(core_spill_mask & (1u << pc.GetCode()), 0u);
164 core_spill_mask ^= (1u << lr.GetCode()) | (1u << pc.GetCode());
165 }
166
167 // If there are no FP registers to pop and we pop PC, we can avoid emitting any CFI.
168 if (fp_spill_mask == 0u && (core_spill_mask & (1u << pc.GetCode())) != 0u) {
169 if (frame_size == POPCOUNT(core_spill_mask) * kFramePointerSize) {
170 // Just pop all registers and avoid CFI.
171 ___ Pop(RegisterList(core_spill_mask));
172 return;
173 } else if (frame_size == 8u && core_spill_mask == (1u << pc.GetCode())) {
174 // Special case: One word to ignore and one to pop to PC. We are free to clobber the
175 // caller-save register r3 on return, so use a 16-bit POP instruction and avoid CFI.
176 ___ Pop(RegisterList((1u << r3.GetCode()) | (1u << pc.GetCode())));
177 return;
178 }
179 }
180
181 // We shall need to adjust CFI and restore it after the frame exit sequence.
182 cfi().RememberState();
183
184 // Decrease frame to start of callee saves.
185 size_t pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
186 CHECK_GE(frame_size, pop_values * kFramePointerSize);
187 DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well.
188
189 // Pop FP callee saves.
190 if (fp_spill_mask != 0u) {
191 uint32_t first = CTZ(fp_spill_mask);
192 // Check that list is contiguous.
193 DCHECK_EQ(fp_spill_mask >> CTZ(fp_spill_mask), ~0u >> (32 - POPCOUNT(fp_spill_mask)));
194
195 ___ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fp_spill_mask)));
196 cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
197 cfi().RestoreMany(DWARFReg(s0), fp_spill_mask);
198 }
199
200 // Pop core callee saves.
201 if (core_spill_mask != 0u) {
202 ___ Pop(RegisterList(core_spill_mask));
203 if ((core_spill_mask & (1u << pc.GetCode())) == 0u) {
204 cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(core_spill_mask));
205 cfi().RestoreMany(DWARFReg(r0), core_spill_mask);
206 }
207 }
208
209 // Emit marking register refresh even with all GCs as we are still using the
210 // register due to nterp's dependency.
211 if (kReserveMarkingRegister) {
212 if (may_suspend) {
213 // The method may be suspended; refresh the Marking Register.
214 ___ Ldr(mr, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
215 } else {
216 // The method shall not be suspended; no need to refresh the Marking Register.
217
218 // The Marking Register is a callee-save register, and thus has been
219 // preserved by native code following the AAPCS calling convention.
220
221 // The following condition is a compile-time one, so it does not have a run-time cost.
222 if (kIsDebugBuild) {
223 // The following condition is a run-time one; it is executed after the
224 // previous compile-time test, to avoid penalizing non-debug builds.
225 if (emit_run_time_checks_in_debug_mode_) {
226 // Emit a run-time check verifying that the Marking Register is up-to-date.
227 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
228 vixl32::Register temp = temps.Acquire();
229 // Ensure we are not clobbering a callee-save register that was restored before.
230 DCHECK_EQ(core_spill_mask & (1 << temp.GetCode()), 0)
231 << "core_spill_mask hould not contain scratch register R" << temp.GetCode();
232 asm_.GenerateMarkingRegisterCheck(temp);
233 }
234 }
235 }
236 }
237
238 // Return to LR.
239 if ((core_spill_mask & (1u << pc.GetCode())) == 0u) {
240 ___ Bx(vixl32::lr);
241 }
242
243 // The CFI should be restored for any code that follows the exit block.
244 cfi().RestoreState();
245 cfi().DefCFAOffset(frame_size);
246 }
247
248
IncreaseFrameSize(size_t adjust)249 void ArmVIXLJNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
250 if (adjust != 0u) {
251 asm_.AddConstant(sp, -adjust);
252 cfi().AdjustCFAOffset(adjust);
253 }
254 }
255
DecreaseFrameSize(size_t adjust)256 void ArmVIXLJNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
257 if (adjust != 0u) {
258 asm_.AddConstant(sp, adjust);
259 cfi().AdjustCFAOffset(-adjust);
260 }
261 }
262
CoreRegisterWithSize(ManagedRegister src,size_t size)263 ManagedRegister ArmVIXLJNIMacroAssembler::CoreRegisterWithSize(ManagedRegister src, size_t size) {
264 DCHECK(src.AsArm().IsCoreRegister());
265 DCHECK_EQ(size, 4u);
266 return src;
267 }
268
Store(FrameOffset dest,ManagedRegister m_src,size_t size)269 void ArmVIXLJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister m_src, size_t size) {
270 Store(ArmManagedRegister::FromCoreRegister(SP), MemberOffset(dest.Int32Value()), m_src, size);
271 }
272
Store(ManagedRegister m_base,MemberOffset offs,ManagedRegister m_src,size_t size)273 void ArmVIXLJNIMacroAssembler::Store(ManagedRegister m_base,
274 MemberOffset offs,
275 ManagedRegister m_src,
276 size_t size) {
277 ArmManagedRegister base = m_base.AsArm();
278 ArmManagedRegister src = m_src.AsArm();
279 if (src.IsNoRegister()) {
280 CHECK_EQ(0u, size);
281 } else if (src.IsCoreRegister()) {
282 CHECK_EQ(4u, size);
283 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
284 temps.Exclude(AsVIXLRegister(src));
285 asm_.StoreToOffset(kStoreWord, AsVIXLRegister(src), AsVIXLRegister(base), offs.Int32Value());
286 } else if (src.IsRegisterPair()) {
287 CHECK_EQ(8u, size);
288 ___ Strd(AsVIXLRegisterPairLow(src),
289 AsVIXLRegisterPairHigh(src),
290 MemOperand(AsVIXLRegister(base), offs.Int32Value()));
291 } else if (src.IsSRegister()) {
292 CHECK_EQ(4u, size);
293 asm_.StoreSToOffset(AsVIXLSRegister(src), AsVIXLRegister(base), offs.Int32Value());
294 } else {
295 CHECK_EQ(8u, size);
296 CHECK(src.IsDRegister()) << src;
297 asm_.StoreDToOffset(AsVIXLDRegister(src), AsVIXLRegister(base), offs.Int32Value());
298 }
299 }
300
StoreRawPtr(FrameOffset dest,ManagedRegister msrc)301 void ArmVIXLJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
302 vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm());
303 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
304 temps.Exclude(src);
305 asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value());
306 }
307
Load(ManagedRegister m_dst,FrameOffset src,size_t size)308 void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) {
309 return Load(m_dst.AsArm(), sp, src.Int32Value(), size);
310 }
311
Load(ManagedRegister m_dst,ManagedRegister m_base,MemberOffset offs,size_t size)312 void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst,
313 ManagedRegister m_base,
314 MemberOffset offs,
315 size_t size) {
316 return Load(m_dst.AsArm(), AsVIXLRegister(m_base.AsArm()), offs.Int32Value(), size);
317 }
318
319
LoadRawPtrFromThread(ManagedRegister mdest,ThreadOffset32 offs)320 void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
321 vixl::aarch32::Register dest = AsVIXLRegister(mdest.AsArm());
322 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
323 temps.Exclude(dest);
324 asm_.LoadFromOffset(kLoadWord, dest, tr, offs.Int32Value());
325 }
326
StoreStackPointerToThread(ThreadOffset32 thr_offs,bool tag_sp)327 void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) {
328 if (tag_sp) {
329 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
330 vixl32::Register reg = temps.Acquire();
331 ___ Orr(reg, sp, 0x2);
332 asm_.StoreToOffset(kStoreWord, reg, tr, thr_offs.Int32Value());
333 } else {
334 asm_.StoreToOffset(kStoreWord, sp, tr, thr_offs.Int32Value());
335 }
336 }
337
SignExtend(ManagedRegister mreg,size_t size)338 void ArmVIXLJNIMacroAssembler::SignExtend([[maybe_unused]] ManagedRegister mreg,
339 [[maybe_unused]] size_t size) {
340 UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm";
341 }
342
ZeroExtend(ManagedRegister mreg,size_t size)343 void ArmVIXLJNIMacroAssembler::ZeroExtend([[maybe_unused]] ManagedRegister mreg,
344 [[maybe_unused]] size_t size) {
345 UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm";
346 }
347
IsCoreRegisterOrPair(ArmManagedRegister reg)348 static inline bool IsCoreRegisterOrPair(ArmManagedRegister reg) {
349 return reg.IsCoreRegister() || reg.IsRegisterPair();
350 }
351
NoSpillGap(const ArgumentLocation & loc1,const ArgumentLocation & loc2)352 static inline bool NoSpillGap(const ArgumentLocation& loc1, const ArgumentLocation& loc2) {
353 DCHECK(!loc1.IsRegister());
354 DCHECK(!loc2.IsRegister());
355 uint32_t loc1_offset = loc1.GetFrameOffset().Uint32Value();
356 uint32_t loc2_offset = loc2.GetFrameOffset().Uint32Value();
357 return loc1_offset + loc1.GetSize() == loc2_offset;
358 }
359
GetSRegisterNumber(ArmManagedRegister reg)360 static inline uint32_t GetSRegisterNumber(ArmManagedRegister reg) {
361 if (reg.IsSRegister()) {
362 return static_cast<uint32_t>(reg.AsSRegister());
363 } else {
364 DCHECK(reg.IsDRegister());
365 return 2u * static_cast<uint32_t>(reg.AsDRegister());
366 }
367 }
368
369 // Get the number of locations to spill together.
GetSpillChunkSize(ArrayRef<ArgumentLocation> dests,ArrayRef<ArgumentLocation> srcs,size_t start)370 static inline size_t GetSpillChunkSize(ArrayRef<ArgumentLocation> dests,
371 ArrayRef<ArgumentLocation> srcs,
372 size_t start) {
373 DCHECK_LT(start, dests.size());
374 DCHECK_ALIGNED(dests[start].GetFrameOffset().Uint32Value(), 4u);
375 const ArgumentLocation& first_src = srcs[start];
376 DCHECK(first_src.IsRegister());
377 ArmManagedRegister first_src_reg = first_src.GetRegister().AsArm();
378 size_t end = start + 1u;
379 if (IsCoreRegisterOrPair(first_src_reg)) {
380 while (end != dests.size() &&
381 NoSpillGap(dests[end - 1u], dests[end]) &&
382 srcs[end].IsRegister() &&
383 IsCoreRegisterOrPair(srcs[end].GetRegister().AsArm())) {
384 ++end;
385 }
386 } else {
387 DCHECK(first_src_reg.IsSRegister() || first_src_reg.IsDRegister());
388 uint32_t next_sreg = GetSRegisterNumber(first_src_reg) + first_src.GetSize() / kSRegSizeInBytes;
389 while (end != dests.size() &&
390 NoSpillGap(dests[end - 1u], dests[end]) &&
391 srcs[end].IsRegister() &&
392 !IsCoreRegisterOrPair(srcs[end].GetRegister().AsArm()) &&
393 GetSRegisterNumber(srcs[end].GetRegister().AsArm()) == next_sreg) {
394 next_sreg += srcs[end].GetSize() / kSRegSizeInBytes;
395 ++end;
396 }
397 }
398 return end - start;
399 }
400
GetCoreRegisterMask(ArmManagedRegister reg)401 static inline uint32_t GetCoreRegisterMask(ArmManagedRegister reg) {
402 if (reg.IsCoreRegister()) {
403 return 1u << static_cast<size_t>(reg.AsCoreRegister());
404 } else {
405 DCHECK(reg.IsRegisterPair());
406 DCHECK_LT(reg.AsRegisterPairLow(), reg.AsRegisterPairHigh());
407 return (1u << static_cast<size_t>(reg.AsRegisterPairLow())) |
408 (1u << static_cast<size_t>(reg.AsRegisterPairHigh()));
409 }
410 }
411
GetCoreRegisterMask(ArrayRef<ArgumentLocation> srcs)412 static inline uint32_t GetCoreRegisterMask(ArrayRef<ArgumentLocation> srcs) {
413 uint32_t mask = 0u;
414 for (const ArgumentLocation& loc : srcs) {
415 DCHECK(loc.IsRegister());
416 mask |= GetCoreRegisterMask(loc.GetRegister().AsArm());
417 }
418 return mask;
419 }
420
UseStrdForChunk(ArrayRef<ArgumentLocation> srcs,size_t start,size_t length)421 static inline bool UseStrdForChunk(ArrayRef<ArgumentLocation> srcs, size_t start, size_t length) {
422 DCHECK_GE(length, 2u);
423 DCHECK(srcs[start].IsRegister());
424 DCHECK(srcs[start + 1u].IsRegister());
425 // The destination may not be 8B aligned (but it is 4B aligned).
426 // Allow arbitrary destination offset, macro assembler will use a temp if needed.
427 // Note: T32 allows unrelated registers in STRD. (A32 does not.)
428 return length == 2u &&
429 srcs[start].GetRegister().AsArm().IsCoreRegister() &&
430 srcs[start + 1u].GetRegister().AsArm().IsCoreRegister();
431 }
432
UseVstrForChunk(ArrayRef<ArgumentLocation> srcs,size_t start,size_t length)433 static inline bool UseVstrForChunk(ArrayRef<ArgumentLocation> srcs, size_t start, size_t length) {
434 DCHECK_GE(length, 2u);
435 DCHECK(srcs[start].IsRegister());
436 DCHECK(srcs[start + 1u].IsRegister());
437 // The destination may not be 8B aligned (but it is 4B aligned).
438 // Allow arbitrary destination offset, macro assembler will use a temp if needed.
439 return length == 2u &&
440 srcs[start].GetRegister().AsArm().IsSRegister() &&
441 srcs[start + 1u].GetRegister().AsArm().IsSRegister() &&
442 IsAligned<2u>(static_cast<size_t>(srcs[start].GetRegister().AsArm().AsSRegister()));
443 }
444
MoveArguments(ArrayRef<ArgumentLocation> dests,ArrayRef<ArgumentLocation> srcs,ArrayRef<FrameOffset> refs)445 void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
446 ArrayRef<ArgumentLocation> srcs,
447 ArrayRef<FrameOffset> refs) {
448 size_t arg_count = dests.size();
449 DCHECK_EQ(arg_count, srcs.size());
450 DCHECK_EQ(arg_count, refs.size());
451
452 // Convert reference registers to `jobject` values.
453 // TODO: Delay this for references that are copied to another register.
454 for (size_t i = 0; i != arg_count; ++i) {
455 if (refs[i] != kInvalidReferenceOffset && srcs[i].IsRegister()) {
456 // Note: We can clobber `srcs[i]` here as the register cannot hold more than one argument.
457 ManagedRegister src_i_reg = srcs[i].GetRegister();
458 CreateJObject(src_i_reg, refs[i], src_i_reg, /*null_allowed=*/ i != 0u);
459 }
460 }
461
462 // Native ABI is soft-float, so all destinations should be core registers or stack offsets.
463 // And register locations should be first, followed by stack locations.
464 auto is_register = [](const ArgumentLocation& loc) { return loc.IsRegister(); };
465 DCHECK(std::is_partitioned(dests.begin(), dests.end(), is_register));
466 size_t num_reg_dests =
467 std::distance(dests.begin(), std::partition_point(dests.begin(), dests.end(), is_register));
468
469 // Collect registers to move. No need to record FP regs as destinations are only core regs.
470 uint32_t src_regs = 0u;
471 uint32_t dest_regs = 0u;
472 uint32_t same_regs = 0u;
473 for (size_t i = 0; i != num_reg_dests; ++i) {
474 const ArgumentLocation& src = srcs[i];
475 const ArgumentLocation& dest = dests[i];
476 DCHECK(dest.IsRegister() && IsCoreRegisterOrPair(dest.GetRegister().AsArm()));
477 if (src.IsRegister() && IsCoreRegisterOrPair(src.GetRegister().AsArm())) {
478 if (src.GetRegister().Equals(dest.GetRegister())) {
479 same_regs |= GetCoreRegisterMask(src.GetRegister().AsArm());
480 continue;
481 }
482 src_regs |= GetCoreRegisterMask(src.GetRegister().AsArm());
483 }
484 dest_regs |= GetCoreRegisterMask(dest.GetRegister().AsArm());
485 }
486
487 // Spill register arguments to stack slots.
488 for (size_t i = num_reg_dests; i != arg_count; ) {
489 const ArgumentLocation& src = srcs[i];
490 if (!src.IsRegister()) {
491 ++i;
492 continue;
493 }
494 const ArgumentLocation& dest = dests[i];
495 DCHECK_EQ(src.GetSize(), dest.GetSize()); // Even for references.
496 DCHECK(!dest.IsRegister());
497 uint32_t frame_offset = dest.GetFrameOffset().Uint32Value();
498 size_t chunk_size = GetSpillChunkSize(dests, srcs, i);
499 DCHECK_NE(chunk_size, 0u);
500 if (chunk_size == 1u) {
501 Store(dest.GetFrameOffset(), src.GetRegister(), dest.GetSize());
502 } else if (UseStrdForChunk(srcs, i, chunk_size)) {
503 ___ Strd(AsVIXLRegister(srcs[i].GetRegister().AsArm()),
504 AsVIXLRegister(srcs[i + 1u].GetRegister().AsArm()),
505 MemOperand(sp, frame_offset));
506 } else if (UseVstrForChunk(srcs, i, chunk_size)) {
507 size_t sreg = GetSRegisterNumber(src.GetRegister().AsArm());
508 DCHECK_ALIGNED(sreg, 2u);
509 ___ Vstr(vixl32::DRegister(sreg / 2u), MemOperand(sp, frame_offset));
510 } else {
511 UseScratchRegisterScope temps2(asm_.GetVIXLAssembler());
512 vixl32::Register base_reg;
513 if (frame_offset == 0u) {
514 base_reg = sp;
515 } else {
516 base_reg = temps2.Acquire();
517 ___ Add(base_reg, sp, frame_offset);
518 }
519
520 ArmManagedRegister src_reg = src.GetRegister().AsArm();
521 if (IsCoreRegisterOrPair(src_reg)) {
522 uint32_t core_reg_mask = GetCoreRegisterMask(srcs.SubArray(i, chunk_size));
523 ___ Stm(base_reg, NO_WRITE_BACK, RegisterList(core_reg_mask));
524 } else {
525 uint32_t start_sreg = GetSRegisterNumber(src_reg);
526 const ArgumentLocation& last_dest = dests[i + chunk_size - 1u];
527 uint32_t total_size =
528 last_dest.GetFrameOffset().Uint32Value() + last_dest.GetSize() - frame_offset;
529 if (IsAligned<2u>(start_sreg) &&
530 IsAligned<kDRegSizeInBytes>(frame_offset) &&
531 IsAligned<kDRegSizeInBytes>(total_size)) {
532 uint32_t dreg_count = total_size / kDRegSizeInBytes;
533 DRegisterList dreg_list(vixl32::DRegister(start_sreg / 2u), dreg_count);
534 ___ Vstm(F64, base_reg, NO_WRITE_BACK, dreg_list);
535 } else {
536 uint32_t sreg_count = total_size / kSRegSizeInBytes;
537 SRegisterList sreg_list(vixl32::SRegister(start_sreg), sreg_count);
538 ___ Vstm(F32, base_reg, NO_WRITE_BACK, sreg_list);
539 }
540 }
541 }
542 i += chunk_size;
543 }
544
545 // Copy incoming stack arguments to outgoing stack arguments.
546 // Registers r0-r3 are argument registers for both managed and native ABI and r4
547 // is a scratch register in managed ABI but also a hidden argument register for
548 // @CriticalNative call. We can use these registers as temporaries for copying
549 // stack arguments as long as they do not currently hold live values.
550 // TODO: Use the callee-save scratch registers instead to avoid using calling
551 // convention knowledge in the assembler. This would require reordering the
552 // argument move with pushing the IRT frame where those registers are used.
553 uint32_t copy_temp_regs = ((1u << 5) - 1u) & ~(same_regs | src_regs);
554 if ((dest_regs & (1u << R4)) != 0) {
555 // For @CriticalNative, R4 shall hold the hidden argument but it is available
556 // for use as a temporary at this point. However, it may be the only available
557 // register, so we shall use IP as the second temporary if needed.
558 // We do not need to worry about `CreateJObject` for @CriticalNative.
559 DCHECK_NE(copy_temp_regs, 0u);
560 DCHECK(std::all_of(refs.begin(),
561 refs.end(),
562 [](FrameOffset r) { return r == kInvalidReferenceOffset; }));
563 } else {
564 // For normal native and @FastNative, R4 and at least one of R0-R3 should be
565 // available because there are only 3 destination registers R1-R3 where the
566 // source registers can be moved. The R0 shall be filled by the `JNIEnv*`
567 // argument later. We need to keep IP available for `CreateJObject()`.
568 DCHECK_GE(POPCOUNT(copy_temp_regs), 2);
569 }
570 vixl32::Register copy_temp1 = vixl32::Register(LeastSignificantBit(copy_temp_regs));
571 copy_temp_regs ^= 1u << copy_temp1.GetCode();
572 vixl32::Register copy_xtemp = (copy_temp_regs != 0u)
573 ? vixl32::Register(LeastSignificantBit(copy_temp_regs))
574 : vixl32::Register();
575 for (size_t i = num_reg_dests; i != arg_count; ++i) {
576 if (srcs[i].IsRegister()) {
577 continue;
578 }
579 FrameOffset src_offset = srcs[i].GetFrameOffset();
580 DCHECK_ALIGNED(src_offset.Uint32Value(), 4u);
581 FrameOffset dest_offset = dests[i].GetFrameOffset();
582 DCHECK_ALIGNED(dest_offset.Uint32Value(), 4u);
583 // Look for opportunities to move 2 words at a time with LDRD/STRD
584 // when the source types are word-sized.
585 if (srcs[i].GetSize() == 4u &&
586 i + 1u != arg_count &&
587 !srcs[i + 1u].IsRegister() &&
588 srcs[i + 1u].GetSize() == 4u &&
589 NoSpillGap(srcs[i], srcs[i + 1u]) &&
590 NoSpillGap(dests[i], dests[i + 1u]) &&
591 dest_offset.Uint32Value() < kStrdOffsetCutoff) {
592 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
593 vixl32::Register copy_temp2 = copy_xtemp.IsValid() ? copy_xtemp : temps.Acquire();
594 ___ Ldrd(copy_temp1, copy_temp2, MemOperand(sp, src_offset.Uint32Value()));
595 if (refs[i] != kInvalidReferenceOffset) {
596 ArmManagedRegister m_copy_temp1 = ArmManagedRegister::FromCoreRegister(
597 enum_cast<Register>(copy_temp1.GetCode()));
598 CreateJObject(m_copy_temp1, refs[i], m_copy_temp1, /*null_allowed=*/ i != 0u);
599 }
600 if (refs[i + 1u] != kInvalidReferenceOffset) {
601 ArmManagedRegister m_copy_temp2 = ArmManagedRegister::FromCoreRegister(
602 enum_cast<Register>(copy_temp2.GetCode()));
603 CreateJObject(m_copy_temp2, refs[i + 1u], m_copy_temp2, /*null_allowed=*/ true);
604 }
605 ___ Strd(copy_temp1, copy_temp2, MemOperand(sp, dest_offset.Uint32Value()));
606 ++i;
607 } else if (dests[i].GetSize() == 8u && dest_offset.Uint32Value() < kStrdOffsetCutoff) {
608 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
609 vixl32::Register copy_temp2 = copy_xtemp.IsValid() ? copy_xtemp : temps.Acquire();
610 ___ Ldrd(copy_temp1, copy_temp2, MemOperand(sp, src_offset.Uint32Value()));
611 ___ Strd(copy_temp1, copy_temp2, MemOperand(sp, dest_offset.Uint32Value()));
612 } else if (refs[i] != kInvalidReferenceOffset) {
613 // Do not use the `CreateJObject()` overload for stack target as it generates
614 // worse code than explicitly using a low register temporary.
615 ___ Ldr(copy_temp1, MemOperand(sp, src_offset.Uint32Value()));
616 ArmManagedRegister m_copy_temp1 = ArmManagedRegister::FromCoreRegister(
617 enum_cast<Register>(copy_temp1.GetCode()));
618 CreateJObject(m_copy_temp1, refs[i], m_copy_temp1, /*null_allowed=*/ i != 0u);
619 ___ Str(copy_temp1, MemOperand(sp, dest_offset.Uint32Value()));
620 } else {
621 Copy(dest_offset, src_offset, dests[i].GetSize());
622 }
623 }
624
625 // Fill destination registers from source core registers.
626 // There should be no cycles, so this algorithm should make progress.
627 while (src_regs != 0u) {
628 uint32_t old_src_regs = src_regs;
629 for (size_t i = 0; i != num_reg_dests; ++i) {
630 DCHECK(dests[i].IsRegister() && IsCoreRegisterOrPair(dests[i].GetRegister().AsArm()));
631 if (!srcs[i].IsRegister() || !IsCoreRegisterOrPair(srcs[i].GetRegister().AsArm())) {
632 continue;
633 }
634 uint32_t dest_reg_mask = GetCoreRegisterMask(dests[i].GetRegister().AsArm());
635 if ((dest_reg_mask & dest_regs) == 0u) {
636 continue; // Equals source, or already filled in one of previous iterations.
637 }
638 // There are no partial overlaps of 8-byte arguments, otherwise we would have to
639 // tweak this check; Move() can deal with partial overlap for historical reasons.
640 if ((dest_reg_mask & src_regs) != 0u) {
641 continue; // Cannot clobber this register yet.
642 }
643 Move(dests[i].GetRegister(), srcs[i].GetRegister(), dests[i].GetSize());
644 uint32_t src_reg_mask = GetCoreRegisterMask(srcs[i].GetRegister().AsArm());
645 DCHECK_EQ(src_regs & src_reg_mask, src_reg_mask);
646 src_regs &= ~src_reg_mask; // Allow clobbering the source register or pair.
647 dest_regs &= ~dest_reg_mask; // Destination register or pair was filled.
648 }
649 CHECK_NE(old_src_regs, src_regs);
650 DCHECK_EQ(0u, src_regs & ~old_src_regs);
651 }
652
653 // Now fill destination registers from FP registers or stack slots, looking for
654 // opportunities to use LDRD/VMOV to fill 2 registers with one instruction.
655 for (size_t i = 0, j; i != num_reg_dests; i = j) {
656 j = i + 1u;
657 DCHECK(dests[i].IsRegister());
658 ArmManagedRegister dest_reg = dests[i].GetRegister().AsArm();
659 DCHECK(IsCoreRegisterOrPair(dest_reg));
660 if (srcs[i].IsRegister() && IsCoreRegisterOrPair(srcs[i].GetRegister().AsArm())) {
661 DCHECK_EQ(GetCoreRegisterMask(dests[i].GetRegister().AsArm()) & dest_regs, 0u);
662 continue; // Equals destination or moved above.
663 }
664 DCHECK_NE(GetCoreRegisterMask(dest_reg) & dest_regs, 0u);
665 if (dests[i].GetSize() == 4u) {
666 // Find next register to load.
667 while (j != num_reg_dests &&
668 (srcs[j].IsRegister() && IsCoreRegisterOrPair(srcs[j].GetRegister().AsArm()))) {
669 DCHECK_EQ(GetCoreRegisterMask(dests[j].GetRegister().AsArm()) & dest_regs, 0u);
670 ++j; // Equals destination or moved above.
671 }
672 if (j != num_reg_dests && dests[j].GetSize() == 4u) {
673 if (!srcs[i].IsRegister() && !srcs[j].IsRegister() && NoSpillGap(srcs[i], srcs[j])) {
674 ___ Ldrd(AsVIXLRegister(dests[i].GetRegister().AsArm()),
675 AsVIXLRegister(dests[j].GetRegister().AsArm()),
676 MemOperand(sp, srcs[i].GetFrameOffset().Uint32Value()));
677 if (refs[i] != kInvalidReferenceOffset) {
678 DCHECK_EQ(refs[i], srcs[i].GetFrameOffset());
679 CreateJObject(dest_reg, refs[i], dest_reg, /*null_allowed=*/ i != 0u);
680 }
681 if (refs[j] != kInvalidReferenceOffset) {
682 DCHECK_EQ(refs[j], srcs[j].GetFrameOffset());
683 ManagedRegister dest_j_reg = dests[j].GetRegister();
684 CreateJObject(dest_j_reg, refs[j], dest_j_reg, /*null_allowed=*/ true);
685 }
686 ++j;
687 continue;
688 }
689 if (srcs[i].IsRegister() && srcs[j].IsRegister()) {
690 uint32_t first_sreg = GetSRegisterNumber(srcs[i].GetRegister().AsArm());
691 if (IsAligned<2u>(first_sreg) &&
692 first_sreg + 1u == GetSRegisterNumber(srcs[j].GetRegister().AsArm())) {
693 ___ Vmov(AsVIXLRegister(dest_reg),
694 AsVIXLRegister(dests[j].GetRegister().AsArm()),
695 vixl32::DRegister(first_sreg / 2u));
696 ++j;
697 continue;
698 }
699 }
700 }
701 }
702 if (srcs[i].IsRegister()) {
703 Move(dests[i].GetRegister(), srcs[i].GetRegister(), dests[i].GetSize());
704 } else if (refs[i] != kInvalidReferenceOffset) {
705 CreateJObject(dest_reg, refs[i], ManagedRegister::NoRegister(), /*null_allowed=*/ i != 0u);
706 } else {
707 Load(dest_reg, srcs[i].GetFrameOffset(), dests[i].GetSize());
708 }
709 }
710 }
711
Move(ManagedRegister mdst,ManagedRegister msrc,size_t size)712 void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst,
713 ManagedRegister msrc,
714 [[maybe_unused]] size_t size) {
715 ArmManagedRegister dst = mdst.AsArm();
716 if (kIsDebugBuild) {
717 // Check that the destination is not a scratch register.
718 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
719 if (dst.IsCoreRegister()) {
720 CHECK(!temps.IsAvailable(AsVIXLRegister(dst)));
721 } else if (dst.IsDRegister()) {
722 CHECK(!temps.IsAvailable(AsVIXLDRegister(dst)));
723 } else if (dst.IsSRegister()) {
724 CHECK(!temps.IsAvailable(AsVIXLSRegister(dst)));
725 } else {
726 CHECK(dst.IsRegisterPair()) << dst;
727 CHECK(!temps.IsAvailable(AsVIXLRegisterPairLow(dst)));
728 CHECK(!temps.IsAvailable(AsVIXLRegisterPairHigh(dst)));
729 }
730 }
731 ArmManagedRegister src = msrc.AsArm();
732 if (!dst.Equals(src)) {
733 if (dst.IsCoreRegister()) {
734 if (src.IsCoreRegister()) {
735 ___ Mov(AsVIXLRegister(dst), AsVIXLRegister(src));
736 } else {
737 CHECK(src.IsSRegister()) << src;
738 ___ Vmov(AsVIXLRegister(dst), AsVIXLSRegister(src));
739 }
740 } else if (dst.IsDRegister()) {
741 if (src.IsDRegister()) {
742 ___ Vmov(F64, AsVIXLDRegister(dst), AsVIXLDRegister(src));
743 } else {
744 // VMOV Dn, Rlo, Rhi (Dn = {Rlo, Rhi})
745 CHECK(src.IsRegisterPair()) << src;
746 ___ Vmov(AsVIXLDRegister(dst), AsVIXLRegisterPairLow(src), AsVIXLRegisterPairHigh(src));
747 }
748 } else if (dst.IsSRegister()) {
749 if (src.IsSRegister()) {
750 ___ Vmov(F32, AsVIXLSRegister(dst), AsVIXLSRegister(src));
751 } else {
752 // VMOV Sn, Rn (Sn = Rn)
753 CHECK(src.IsCoreRegister()) << src;
754 ___ Vmov(AsVIXLSRegister(dst), AsVIXLRegister(src));
755 }
756 } else {
757 CHECK(dst.IsRegisterPair()) << dst;
758 if (src.IsRegisterPair()) {
759 // Ensure that the first move doesn't clobber the input of the second.
760 if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) {
761 ___ Mov(AsVIXLRegisterPairLow(dst), AsVIXLRegisterPairLow(src));
762 ___ Mov(AsVIXLRegisterPairHigh(dst), AsVIXLRegisterPairHigh(src));
763 } else {
764 ___ Mov(AsVIXLRegisterPairHigh(dst), AsVIXLRegisterPairHigh(src));
765 ___ Mov(AsVIXLRegisterPairLow(dst), AsVIXLRegisterPairLow(src));
766 }
767 } else {
768 CHECK(src.IsDRegister()) << src;
769 ___ Vmov(AsVIXLRegisterPairLow(dst), AsVIXLRegisterPairHigh(dst), AsVIXLDRegister(src));
770 }
771 }
772 }
773 }
774
Move(ManagedRegister mdst,size_t value)775 void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, size_t value) {
776 ArmManagedRegister dst = mdst.AsArm();
777 ___ Mov(AsVIXLRegister(dst), static_cast<uint32_t>(value));
778 }
779
Copy(FrameOffset dest,FrameOffset src,size_t size)780 void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
781 DCHECK(size == 4 || size == 8) << size;
782 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
783 vixl32::Register scratch = temps.Acquire();
784 if (size == 4) {
785 asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value());
786 asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
787 } else if (size == 8) {
788 asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value());
789 asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
790 asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value() + 4);
791 asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value() + 4);
792 }
793 }
794
CreateJObject(ManagedRegister mout_reg,FrameOffset spilled_reference_offset,ManagedRegister min_reg,bool null_allowed)795 void ArmVIXLJNIMacroAssembler::CreateJObject(ManagedRegister mout_reg,
796 FrameOffset spilled_reference_offset,
797 ManagedRegister min_reg,
798 bool null_allowed) {
799 vixl::aarch32::Register out_reg = AsVIXLRegister(mout_reg.AsArm());
800 vixl::aarch32::Register in_reg =
801 min_reg.AsArm().IsNoRegister() ? vixl::aarch32::Register() : AsVIXLRegister(min_reg.AsArm());
802 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
803 temps.Exclude(out_reg);
804 if (null_allowed) {
805 // Null values get a jobject value null. Otherwise, the jobject is
806 // the address of the spilled reference.
807 // e.g. out_reg = (handle == 0) ? 0 : (SP+spilled_reference_offset)
808 if (!in_reg.IsValid()) {
809 asm_.LoadFromOffset(kLoadWord, out_reg, sp, spilled_reference_offset.Int32Value());
810 in_reg = out_reg;
811 }
812
813 if (out_reg.IsLow() && spilled_reference_offset.Uint32Value() < kAddSpImmCutoff) {
814 // There is a 16-bit "ADD Rd, SP, <imm>" instruction we can use in IT-block.
815 if (out_reg.Is(in_reg)) {
816 ___ Cmp(in_reg, 0);
817 } else {
818 ___ Movs(out_reg, in_reg);
819 }
820 ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
821 2 * vixl32::k16BitT32InstructionSizeInBytes);
822 ___ it(ne);
823 ___ add(ne, Narrow, out_reg, sp, spilled_reference_offset.Int32Value());
824 } else {
825 vixl32::Register addr_reg = out_reg.Is(in_reg) ? temps.Acquire() : out_reg;
826 vixl32::Register cond_mov_src_reg = out_reg.Is(in_reg) ? addr_reg : in_reg;
827 vixl32::Condition cond = out_reg.Is(in_reg) ? ne : eq;
828 ___ Add(addr_reg, sp, spilled_reference_offset.Int32Value());
829 ___ Cmp(in_reg, 0);
830 ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
831 2 * vixl32::k16BitT32InstructionSizeInBytes);
832 ___ it(cond);
833 ___ mov(cond, Narrow, out_reg, cond_mov_src_reg);
834 }
835 } else {
836 asm_.AddConstant(out_reg, sp, spilled_reference_offset.Int32Value());
837 }
838 }
839
DecodeJNITransitionOrLocalJObject(ManagedRegister mreg,JNIMacroLabel * slow_path,JNIMacroLabel * resume)840 void ArmVIXLJNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister mreg,
841 JNIMacroLabel* slow_path,
842 JNIMacroLabel* resume) {
843 constexpr uint32_t kGlobalOrWeakGlobalMask =
844 dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetGlobalOrWeakGlobalMask());
845 constexpr uint32_t kIndirectRefKindMask =
846 dchecked_integral_cast<uint32_t>(IndirectReferenceTable::GetIndirectRefKindMask());
847 vixl32::Register reg = AsVIXLRegister(mreg.AsArm());
848 ___ Tst(reg, kGlobalOrWeakGlobalMask);
849 ___ B(ne, ArmVIXLJNIMacroLabel::Cast(slow_path)->AsArm());
850 ___ Bics(reg, reg, kIndirectRefKindMask);
851 ___ B(eq, ArmVIXLJNIMacroLabel::Cast(resume)->AsArm()); // Skip load for null.
852 ___ Ldr(reg, MemOperand(reg));
853 }
854
VerifyObject(ManagedRegister src,bool could_be_null)855 void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] ManagedRegister src,
856 [[maybe_unused]] bool could_be_null) {
857 // TODO: not validating references.
858 }
859
VerifyObject(FrameOffset src,bool could_be_null)860 void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src,
861 [[maybe_unused]] bool could_be_null) {
862 // TODO: not validating references.
863 }
864
Jump(ManagedRegister mbase,Offset offset)865 void ArmVIXLJNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset) {
866 vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm());
867 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
868 vixl32::Register scratch = temps.Acquire();
869 asm_.LoadFromOffset(kLoadWord, scratch, base, offset.Int32Value());
870 ___ Bx(scratch);
871 }
872
Call(ManagedRegister mbase,Offset offset)873 void ArmVIXLJNIMacroAssembler::Call(ManagedRegister mbase, Offset offset) {
874 vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm());
875 asm_.LoadFromOffset(kLoadWord, lr, base, offset.Int32Value());
876 ___ Blx(lr);
877 // TODO: place reference map on call.
878 }
879
CallFromThread(ThreadOffset32 offset)880 void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset) {
881 // Call *(TR + offset)
882 asm_.LoadFromOffset(kLoadWord, lr, tr, offset.Int32Value());
883 ___ Blx(lr);
884 // TODO: place reference map on call
885 }
886
GetCurrentThread(ManagedRegister dest)887 void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
888 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
889 temps.Exclude(AsVIXLRegister(dest.AsArm()));
890 ___ Mov(AsVIXLRegister(dest.AsArm()), tr);
891 }
892
GetCurrentThread(FrameOffset dest_offset)893 void ArmVIXLJNIMacroAssembler::GetCurrentThread(FrameOffset dest_offset) {
894 asm_.StoreToOffset(kStoreWord, tr, sp, dest_offset.Int32Value());
895 }
896
TryToTransitionFromRunnableToNative(JNIMacroLabel * label,ArrayRef<const ManagedRegister> scratch_regs)897 void ArmVIXLJNIMacroAssembler::TryToTransitionFromRunnableToNative(
898 JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) {
899 constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
900 constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
901 constexpr ThreadOffset32 thread_flags_offset = Thread::ThreadFlagsOffset<kArmPointerSize>();
902 constexpr ThreadOffset32 thread_held_mutex_mutator_lock_offset =
903 Thread::HeldMutexOffset<kArmPointerSize>(kMutatorLock);
904
905 DCHECK_GE(scratch_regs.size(), 2u);
906 vixl32::Register scratch = AsVIXLRegister(scratch_regs[0].AsArm());
907 vixl32::Register scratch2 = AsVIXLRegister(scratch_regs[1].AsArm());
908
909 // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
910 vixl32::Label retry;
911 ___ Bind(&retry);
912 ___ Ldrex(scratch, MemOperand(tr, thread_flags_offset.Int32Value()));
913 ___ Mov(scratch2, kNativeStateValue);
914 // If any flags are set, go to the slow path.
915 ___ Cmp(scratch, kRunnableStateValue);
916 ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
917 ___ Dmb(DmbOptions::ISH); // Memory barrier "any-store" for the "release" operation.
918 ___ Strex(scratch, scratch2, MemOperand(tr, thread_flags_offset.Int32Value()));
919 ___ Cmp(scratch, 0);
920 ___ B(ne, &retry);
921
922 // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`; `scratch` holds 0 at this point.
923 ___ Str(scratch, MemOperand(tr, thread_held_mutex_mutator_lock_offset.Int32Value()));
924 }
925
TryToTransitionFromNativeToRunnable(JNIMacroLabel * label,ArrayRef<const ManagedRegister> scratch_regs,ManagedRegister return_reg)926 void ArmVIXLJNIMacroAssembler::TryToTransitionFromNativeToRunnable(
927 JNIMacroLabel* label,
928 ArrayRef<const ManagedRegister> scratch_regs,
929 ManagedRegister return_reg) {
930 constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
931 constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
932 constexpr ThreadOffset32 thread_flags_offset = Thread::ThreadFlagsOffset<kArmPointerSize>();
933 constexpr ThreadOffset32 thread_held_mutex_mutator_lock_offset =
934 Thread::HeldMutexOffset<kArmPointerSize>(kMutatorLock);
935 constexpr ThreadOffset32 thread_mutator_lock_offset =
936 Thread::MutatorLockOffset<kArmPointerSize>();
937
938 // There must be at least two scratch registers.
939 DCHECK_GE(scratch_regs.size(), 2u);
940 DCHECK(!scratch_regs[0].AsArm().Overlaps(return_reg.AsArm()));
941 vixl32::Register scratch = AsVIXLRegister(scratch_regs[0].AsArm());
942 DCHECK(!scratch_regs[1].AsArm().Overlaps(return_reg.AsArm()));
943 vixl32::Register scratch2 = AsVIXLRegister(scratch_regs[1].AsArm());
944
945 // CAS acquire, old_value = kNativeStateValue, new_value = kRunnableStateValue, no flags.
946 vixl32::Label retry;
947 ___ Bind(&retry);
948 ___ Ldrex(scratch, MemOperand(tr, thread_flags_offset.Int32Value()));
949 // If any flags are set, or the state is not Native, go to the slow path.
950 // (While the thread can theoretically transition between different Suspended states,
951 // it would be very unexpected to see a state other than Native at this point.)
952 ___ Eors(scratch2, scratch, kNativeStateValue);
953 ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
954 static_assert(kRunnableStateValue == 0u);
955 ___ Strex(scratch, scratch2, MemOperand(tr, thread_flags_offset.Int32Value()));
956 ___ Cmp(scratch, 0);
957 ___ B(ne, &retry);
958 ___ Dmb(DmbOptions::ISH); // Memory barrier "load-any" for the "acquire" operation.
959
960 // Set `self->tlsPtr_.held_mutexes[kMutatorLock]` to the mutator lock.
961 ___ Ldr(scratch, MemOperand(tr, thread_mutator_lock_offset.Int32Value()));
962 ___ Str(scratch, MemOperand(tr, thread_held_mutex_mutator_lock_offset.Int32Value()));
963 }
964
SuspendCheck(JNIMacroLabel * label)965 void ArmVIXLJNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
966 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
967 vixl32::Register scratch = temps.Acquire();
968 asm_.LoadFromOffset(kLoadWord,
969 scratch,
970 tr,
971 Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
972
973 ___ Tst(scratch, Thread::SuspendOrCheckpointRequestFlags());
974 ___ BPreferNear(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
975 // TODO: think about using CBNZ here.
976 }
977
ExceptionPoll(JNIMacroLabel * label)978 void ArmVIXLJNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
979 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
980 vixl32::Register scratch = temps.Acquire();
981 asm_.LoadFromOffset(kLoadWord,
982 scratch,
983 tr,
984 Thread::ExceptionOffset<kArmPointerSize>().Int32Value());
985
986 ___ Cmp(scratch, 0);
987 ___ BPreferNear(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
988 // TODO: think about using CBNZ here.
989 }
990
DeliverPendingException()991 void ArmVIXLJNIMacroAssembler::DeliverPendingException() {
992 // Pass exception object as argument.
993 // Don't care about preserving r0 as this won't return.
994 // Note: The scratch register from `ExceptionPoll()` may have been clobbered.
995 asm_.LoadFromOffset(kLoadWord,
996 r0,
997 tr,
998 Thread::ExceptionOffset<kArmPointerSize>().Int32Value());
999 ___ Ldr(lr,
1000 MemOperand(tr,
1001 QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value()));
1002 ___ Blx(lr);
1003 }
1004
CreateLabel()1005 std::unique_ptr<JNIMacroLabel> ArmVIXLJNIMacroAssembler::CreateLabel() {
1006 return std::unique_ptr<JNIMacroLabel>(new (asm_.GetAllocator()) ArmVIXLJNIMacroLabel());
1007 }
1008
Jump(JNIMacroLabel * label)1009 void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label) {
1010 CHECK(label != nullptr);
1011 ___ B(ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1012 }
1013
TestGcMarking(JNIMacroLabel * label,JNIMacroUnaryCondition cond)1014 void ArmVIXLJNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
1015 CHECK(label != nullptr);
1016
1017 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
1018 vixl32::Register test_reg;
1019 DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
1020 if (kUseBakerReadBarrier) {
1021 // TestGcMarking() is used in the JNI stub entry when the marking register is up to date.
1022 if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) {
1023 vixl32::Register temp = temps.Acquire();
1024 asm_.GenerateMarkingRegisterCheck(temp);
1025 }
1026 test_reg = mr;
1027 } else {
1028 test_reg = temps.Acquire();
1029 ___ Ldr(test_reg, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
1030 }
1031 switch (cond) {
1032 case JNIMacroUnaryCondition::kZero:
1033 ___ CompareAndBranchIfZero(test_reg, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1034 break;
1035 case JNIMacroUnaryCondition::kNotZero:
1036 ___ CompareAndBranchIfNonZero(test_reg, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1037 break;
1038 }
1039 }
1040
TestMarkBit(ManagedRegister mref,JNIMacroLabel * label,JNIMacroUnaryCondition cond)1041 void ArmVIXLJNIMacroAssembler::TestMarkBit(ManagedRegister mref,
1042 JNIMacroLabel* label,
1043 JNIMacroUnaryCondition cond) {
1044 DCHECK(kUseBakerReadBarrier);
1045 vixl32::Register ref = AsVIXLRegister(mref.AsArm());
1046 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
1047 vixl32::Register scratch = temps.Acquire();
1048 ___ Ldr(scratch, MemOperand(ref, mirror::Object::MonitorOffset().SizeValue()));
1049 static_assert(LockWord::kMarkBitStateSize == 1u);
1050 ___ Tst(scratch, LockWord::kMarkBitStateMaskShifted);
1051 switch (cond) {
1052 case JNIMacroUnaryCondition::kZero:
1053 ___ B(eq, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1054 break;
1055 case JNIMacroUnaryCondition::kNotZero:
1056 ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1057 break;
1058 }
1059 }
1060
TestByteAndJumpIfNotZero(uintptr_t address,JNIMacroLabel * label)1061 void ArmVIXLJNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
1062 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
1063 vixl32::Register scratch = temps.Acquire();
1064 ___ Mov(scratch, static_cast<uint32_t>(address));
1065 ___ Ldrb(scratch, MemOperand(scratch, 0));
1066 ___ CompareAndBranchIfNonZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1067 }
1068
Bind(JNIMacroLabel * label)1069 void ArmVIXLJNIMacroAssembler::Bind(JNIMacroLabel* label) {
1070 CHECK(label != nullptr);
1071 ___ Bind(ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
1072 }
1073
Load(ArmManagedRegister dest,vixl32::Register base,int32_t offset,size_t size)1074 void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister dest,
1075 vixl32::Register base,
1076 int32_t offset,
1077 size_t size) {
1078 if (dest.IsNoRegister()) {
1079 CHECK_EQ(0u, size) << dest;
1080 } else if (dest.IsCoreRegister()) {
1081 vixl::aarch32::Register dst = AsVIXLRegister(dest);
1082 CHECK(!dst.Is(sp)) << dest;
1083
1084 UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
1085 temps.Exclude(dst);
1086
1087 if (size == 1u) {
1088 ___ Ldrb(dst, MemOperand(base, offset));
1089 } else {
1090 CHECK_EQ(4u, size) << dest;
1091 ___ Ldr(dst, MemOperand(base, offset));
1092 }
1093 } else if (dest.IsRegisterPair()) {
1094 CHECK_EQ(8u, size) << dest;
1095 // TODO: Use LDRD to improve stubs for @CriticalNative methods with parameters
1096 // (long, long, ...). A single 32-bit LDRD is presumably faster than two 16-bit LDRs.
1097 ___ Ldr(AsVIXLRegisterPairLow(dest), MemOperand(base, offset));
1098 ___ Ldr(AsVIXLRegisterPairHigh(dest), MemOperand(base, offset + 4));
1099 } else if (dest.IsSRegister()) {
1100 ___ Vldr(AsVIXLSRegister(dest), MemOperand(base, offset));
1101 } else {
1102 CHECK(dest.IsDRegister()) << dest;
1103 ___ Vldr(AsVIXLDRegister(dest), MemOperand(base, offset));
1104 }
1105 }
1106
LoadLocalReferenceTableStates(ManagedRegister jni_env_reg,ManagedRegister previous_state_reg,ManagedRegister current_state_reg)1107 void ArmVIXLJNIMacroAssembler::LoadLocalReferenceTableStates(ManagedRegister jni_env_reg,
1108 ManagedRegister previous_state_reg,
1109 ManagedRegister current_state_reg) {
1110 constexpr size_t kLRTSegmentStateSize = sizeof(jni::LRTSegmentState);
1111 DCHECK_EQ(kLRTSegmentStateSize, kRegSizeInBytes);
1112 const MemberOffset previous_state_offset = JNIEnvExt::LrtPreviousStateOffset(kArmPointerSize);
1113 const MemberOffset current_state_offset = JNIEnvExt::LrtSegmentStateOffset(kArmPointerSize);
1114 DCHECK_EQ(previous_state_offset.SizeValue() + kLRTSegmentStateSize,
1115 current_state_offset.SizeValue());
1116
1117 ___ Ldrd(AsVIXLRegister(previous_state_reg.AsArm()),
1118 AsVIXLRegister(current_state_reg.AsArm()),
1119 MemOperand(AsVIXLRegister(jni_env_reg.AsArm()), previous_state_offset.Int32Value()));
1120 }
1121
StoreLocalReferenceTableStates(ManagedRegister jni_env_reg,ManagedRegister previous_state_reg,ManagedRegister current_state_reg)1122 void ArmVIXLJNIMacroAssembler::StoreLocalReferenceTableStates(ManagedRegister jni_env_reg,
1123 ManagedRegister previous_state_reg,
1124 ManagedRegister current_state_reg) {
1125 constexpr size_t kLRTSegmentStateSize = sizeof(jni::LRTSegmentState);
1126 DCHECK_EQ(kLRTSegmentStateSize, kRegSizeInBytes);
1127 const MemberOffset previous_state_offset = JNIEnvExt::LrtPreviousStateOffset(kArmPointerSize);
1128 const MemberOffset current_state_offset = JNIEnvExt::LrtSegmentStateOffset(kArmPointerSize);
1129 DCHECK_EQ(previous_state_offset.SizeValue() + kLRTSegmentStateSize,
1130 current_state_offset.SizeValue());
1131
1132 ___ Strd(AsVIXLRegister(previous_state_reg.AsArm()),
1133 AsVIXLRegister(current_state_reg.AsArm()),
1134 MemOperand(AsVIXLRegister(jni_env_reg.AsArm()), previous_state_offset.Int32Value()));
1135 }
1136
1137 } // namespace arm
1138 } // namespace art
1139