1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <iostream>
18 #include <type_traits>
19
20 #include "assembler_arm_vixl.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23
24 using namespace vixl::aarch32; // NOLINT(build/namespaces)
25
26 using vixl::ExactAssemblyScope;
27 using vixl::CodeBufferCheckScope;
28
29 namespace art {
30 namespace arm {
31
32 #ifdef ___
33 #error "ARM Assembler macro already defined."
34 #else
35 #define ___ vixl_masm_.
36 #endif
37
38 extern const vixl32::Register tr(TR);
39
FinalizeCode()40 void ArmVIXLAssembler::FinalizeCode() {
41 vixl_masm_.FinalizeCode();
42 }
43
CodeSize() const44 size_t ArmVIXLAssembler::CodeSize() const {
45 return vixl_masm_.GetSizeOfCodeGenerated();
46 }
47
CodeBufferBaseAddress() const48 const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const {
49 return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
50 }
51
FinalizeInstructions(const MemoryRegion & region)52 void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) {
53 // Copy the instructions from the buffer.
54 MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
55 region.CopyFrom(0, from);
56 }
57
PoisonHeapReference(vixl::aarch32::Register reg)58 void ArmVIXLAssembler::PoisonHeapReference(vixl::aarch32::Register reg) {
59 // reg = -reg.
60 ___ Rsb(reg, reg, 0);
61 }
62
UnpoisonHeapReference(vixl::aarch32::Register reg)63 void ArmVIXLAssembler::UnpoisonHeapReference(vixl::aarch32::Register reg) {
64 // reg = -reg.
65 ___ Rsb(reg, reg, 0);
66 }
67
MaybePoisonHeapReference(vixl32::Register reg)68 void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) {
69 if (kPoisonHeapReferences) {
70 PoisonHeapReference(reg);
71 }
72 }
73
MaybeUnpoisonHeapReference(vixl32::Register reg)74 void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
75 if (kPoisonHeapReferences) {
76 UnpoisonHeapReference(reg);
77 }
78 }
79
LoadImmediate(vixl32::Register rd,int32_t value)80 void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) {
81 // TODO(VIXL): Implement this optimization in VIXL.
82 if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) {
83 ___ Mvn(rd, ~value);
84 } else {
85 ___ Mov(rd, value);
86 }
87 }
88
ShifterOperandCanAlwaysHold(uint32_t immediate)89 bool ArmVIXLAssembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
90 return vixl_masm_.IsModifiedImmediate(immediate);
91 }
92
ShifterOperandCanHold(Opcode opcode,uint32_t immediate,SetCc set_cc)93 bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc) {
94 switch (opcode) {
95 case ADD:
96 case SUB:
97 // Less than (or equal to) 12 bits can be done if we don't need to set condition codes.
98 if (IsUint<12>(immediate) && set_cc != kCcSet) {
99 return true;
100 }
101 return ShifterOperandCanAlwaysHold(immediate);
102
103 case MOV:
104 // TODO: Support less than or equal to 12bits.
105 return ShifterOperandCanAlwaysHold(immediate);
106
107 case MVN:
108 default:
109 return ShifterOperandCanAlwaysHold(immediate);
110 }
111 }
112
CanSplitLoadStoreOffset(int32_t allowed_offset_bits,int32_t offset,int32_t * add_to_base,int32_t * offset_for_load_store)113 bool ArmVIXLAssembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
114 int32_t offset,
115 /*out*/ int32_t* add_to_base,
116 /*out*/ int32_t* offset_for_load_store) {
117 int32_t other_bits = offset & ~allowed_offset_bits;
118 if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
119 *add_to_base = offset & ~allowed_offset_bits;
120 *offset_for_load_store = offset & allowed_offset_bits;
121 return true;
122 }
123 return false;
124 }
125
AdjustLoadStoreOffset(int32_t allowed_offset_bits,vixl32::Register temp,vixl32::Register base,int32_t offset)126 int32_t ArmVIXLAssembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
127 vixl32::Register temp,
128 vixl32::Register base,
129 int32_t offset) {
130 DCHECK_NE(offset & ~allowed_offset_bits, 0);
131 int32_t add_to_base, offset_for_load;
132 if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
133 ___ Add(temp, base, add_to_base);
134 return offset_for_load;
135 } else {
136 ___ Mov(temp, offset);
137 ___ Add(temp, temp, base);
138 return 0;
139 }
140 }
141
142 // TODO(VIXL): Implement this in VIXL.
GetAllowedLoadOffsetBits(LoadOperandType type)143 int32_t ArmVIXLAssembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
144 switch (type) {
145 case kLoadSignedByte:
146 case kLoadSignedHalfword:
147 case kLoadUnsignedHalfword:
148 case kLoadUnsignedByte:
149 case kLoadWord:
150 // We can encode imm12 offset.
151 return 0xfff;
152 case kLoadSWord:
153 case kLoadDWord:
154 case kLoadWordPair:
155 // We can encode imm8:'00' offset.
156 return 0xff << 2;
157 default:
158 LOG(FATAL) << "UNREACHABLE";
159 UNREACHABLE();
160 }
161 }
162
163 // TODO(VIXL): Implement this in VIXL.
GetAllowedStoreOffsetBits(StoreOperandType type)164 int32_t ArmVIXLAssembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
165 switch (type) {
166 case kStoreHalfword:
167 case kStoreByte:
168 case kStoreWord:
169 // We can encode imm12 offset.
170 return 0xfff;
171 case kStoreSWord:
172 case kStoreDWord:
173 case kStoreWordPair:
174 // We can encode imm8:'00' offset.
175 return 0xff << 2;
176 default:
177 LOG(FATAL) << "UNREACHABLE";
178 UNREACHABLE();
179 }
180 }
181
182 // TODO(VIXL): Implement this in VIXL.
CanHoldLoadOffsetThumb(LoadOperandType type,int offset)183 static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
184 switch (type) {
185 case kLoadSignedByte:
186 case kLoadSignedHalfword:
187 case kLoadUnsignedHalfword:
188 case kLoadUnsignedByte:
189 case kLoadWord:
190 return IsAbsoluteUint<12>(offset);
191 case kLoadSWord:
192 case kLoadDWord:
193 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); // VFP addressing mode.
194 case kLoadWordPair:
195 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
196 default:
197 LOG(FATAL) << "UNREACHABLE";
198 UNREACHABLE();
199 }
200 }
201
202 // TODO(VIXL): Implement this in VIXL.
CanHoldStoreOffsetThumb(StoreOperandType type,int offset)203 static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
204 switch (type) {
205 case kStoreHalfword:
206 case kStoreByte:
207 case kStoreWord:
208 return IsAbsoluteUint<12>(offset);
209 case kStoreSWord:
210 case kStoreDWord:
211 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); // VFP addressing mode.
212 case kStoreWordPair:
213 return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
214 default:
215 LOG(FATAL) << "UNREACHABLE";
216 UNREACHABLE();
217 }
218 }
219
220 // Implementation note: this method must emit at most one instruction when
221 // Address::CanHoldStoreOffsetThumb.
222 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
StoreToOffset(StoreOperandType type,vixl32::Register reg,vixl32::Register base,int32_t offset)223 void ArmVIXLAssembler::StoreToOffset(StoreOperandType type,
224 vixl32::Register reg,
225 vixl32::Register base,
226 int32_t offset) {
227 vixl32::Register tmp_reg;
228 UseScratchRegisterScope temps(&vixl_masm_);
229
230 if (!CanHoldStoreOffsetThumb(type, offset)) {
231 CHECK_NE(base.GetCode(), kIpCode);
232 if ((reg.GetCode() != kIpCode) &&
233 (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) &&
234 ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) {
235 tmp_reg = temps.Acquire();
236 } else {
237 // Be careful not to use ip twice (for `reg` (or `reg` + 1 in
238 // the case of a word-pair store) and `base`) to build the
239 // Address object used by the store instruction(s) below.
240 // Instead, save R5 on the stack (or R6 if R5 is already used by
241 // `base`), use it as secondary temporary register, and restore
242 // it after the store instruction has been emitted.
243 tmp_reg = (base.GetCode() != 5) ? r5 : r6;
244 ___ Push(tmp_reg);
245 if (base.GetCode() == kSpCode) {
246 offset += kRegisterSize;
247 }
248 }
249 // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
250 // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
251 offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset);
252 base = tmp_reg;
253 }
254 DCHECK(CanHoldStoreOffsetThumb(type, offset));
255 switch (type) {
256 case kStoreByte:
257 ___ Strb(reg, MemOperand(base, offset));
258 break;
259 case kStoreHalfword:
260 ___ Strh(reg, MemOperand(base, offset));
261 break;
262 case kStoreWord:
263 ___ Str(reg, MemOperand(base, offset));
264 break;
265 case kStoreWordPair:
266 ___ Strd(reg, vixl32::Register(reg.GetCode() + 1), MemOperand(base, offset));
267 break;
268 default:
269 LOG(FATAL) << "UNREACHABLE";
270 UNREACHABLE();
271 }
272 if ((tmp_reg.IsValid()) && (tmp_reg.GetCode() != kIpCode)) {
273 CHECK(tmp_reg.Is(r5) || tmp_reg.Is(r6)) << tmp_reg;
274 ___ Pop(tmp_reg);
275 }
276 }
277
278 // Implementation note: this method must emit at most one instruction when
279 // Address::CanHoldLoadOffsetThumb.
280 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
LoadFromOffset(LoadOperandType type,vixl32::Register dest,vixl32::Register base,int32_t offset)281 void ArmVIXLAssembler::LoadFromOffset(LoadOperandType type,
282 vixl32::Register dest,
283 vixl32::Register base,
284 int32_t offset) {
285 if (!CanHoldLoadOffsetThumb(type, offset)) {
286 CHECK(!base.Is(ip));
287 // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
288 int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
289 DCHECK_NE(offset & ~allowed_offset_bits, 0);
290 int32_t add_to_base, offset_for_load;
291 if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
292 // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
293 AddConstant(dest, base, add_to_base);
294 base = dest;
295 offset = offset_for_load;
296 } else {
297 UseScratchRegisterScope temps(&vixl_masm_);
298 vixl32::Register temp = (dest.Is(base)) ? temps.Acquire() : dest;
299 LoadImmediate(temp, offset);
300 // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
301 // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
302 ___ Add(dest, dest, (dest.Is(base)) ? temp : base);
303 base = dest;
304 offset = 0;
305 }
306 }
307
308 DCHECK(CanHoldLoadOffsetThumb(type, offset));
309 switch (type) {
310 case kLoadSignedByte:
311 ___ Ldrsb(dest, MemOperand(base, offset));
312 break;
313 case kLoadUnsignedByte:
314 ___ Ldrb(dest, MemOperand(base, offset));
315 break;
316 case kLoadSignedHalfword:
317 ___ Ldrsh(dest, MemOperand(base, offset));
318 break;
319 case kLoadUnsignedHalfword:
320 ___ Ldrh(dest, MemOperand(base, offset));
321 break;
322 case kLoadWord:
323 CHECK(!dest.IsSP());
324 ___ Ldr(dest, MemOperand(base, offset));
325 break;
326 case kLoadWordPair:
327 ___ Ldrd(dest, vixl32::Register(dest.GetCode() + 1), MemOperand(base, offset));
328 break;
329 default:
330 LOG(FATAL) << "UNREACHABLE";
331 UNREACHABLE();
332 }
333 }
334
StoreSToOffset(vixl32::SRegister source,vixl32::Register base,int32_t offset)335 void ArmVIXLAssembler::StoreSToOffset(vixl32::SRegister source,
336 vixl32::Register base,
337 int32_t offset) {
338 ___ Vstr(source, MemOperand(base, offset));
339 }
340
StoreDToOffset(vixl32::DRegister source,vixl32::Register base,int32_t offset)341 void ArmVIXLAssembler::StoreDToOffset(vixl32::DRegister source,
342 vixl32::Register base,
343 int32_t offset) {
344 ___ Vstr(source, MemOperand(base, offset));
345 }
346
LoadSFromOffset(vixl32::SRegister reg,vixl32::Register base,int32_t offset)347 void ArmVIXLAssembler::LoadSFromOffset(vixl32::SRegister reg,
348 vixl32::Register base,
349 int32_t offset) {
350 ___ Vldr(reg, MemOperand(base, offset));
351 }
352
LoadDFromOffset(vixl32::DRegister reg,vixl32::Register base,int32_t offset)353 void ArmVIXLAssembler::LoadDFromOffset(vixl32::DRegister reg,
354 vixl32::Register base,
355 int32_t offset) {
356 ___ Vldr(reg, MemOperand(base, offset));
357 }
358
359 // Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and
360 // ArmVIXLAssembler::LoadRegisterList where this generates less code (size).
361 static constexpr int kRegListThreshold = 4;
362
StoreRegisterList(RegList regs,size_t stack_offset)363 void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) {
364 int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
365 if (number_of_regs != 0) {
366 if (number_of_regs > kRegListThreshold) {
367 UseScratchRegisterScope temps(GetVIXLAssembler());
368 vixl32::Register base = sp;
369 if (stack_offset != 0) {
370 base = temps.Acquire();
371 DCHECK_EQ(regs & (1u << base.GetCode()), 0u);
372 ___ Add(base, sp, Operand::From(stack_offset));
373 }
374 ___ Stm(base, NO_WRITE_BACK, RegisterList(regs));
375 } else {
376 for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
377 ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset));
378 stack_offset += kRegSizeInBytes;
379 }
380 }
381 }
382 }
383
LoadRegisterList(RegList regs,size_t stack_offset)384 void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) {
385 int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
386 if (number_of_regs != 0) {
387 if (number_of_regs > kRegListThreshold) {
388 UseScratchRegisterScope temps(GetVIXLAssembler());
389 vixl32::Register base = sp;
390 if (stack_offset != 0) {
391 base = temps.Acquire();
392 ___ Add(base, sp, Operand::From(stack_offset));
393 }
394 ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs));
395 } else {
396 for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
397 ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset));
398 stack_offset += kRegSizeInBytes;
399 }
400 }
401 }
402 }
403
AddConstant(vixl32::Register rd,int32_t value)404 void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) {
405 AddConstant(rd, rd, value);
406 }
407
408 // TODO(VIXL): think about using adds which updates flags where possible.
AddConstant(vixl32::Register rd,vixl32::Register rn,int32_t value)409 void ArmVIXLAssembler::AddConstant(vixl32::Register rd,
410 vixl32::Register rn,
411 int32_t value) {
412 DCHECK(vixl_masm_.OutsideITBlock());
413 // TODO(VIXL): implement this optimization in VIXL.
414 if (value == 0) {
415 if (!rd.Is(rn)) {
416 ___ Mov(rd, rn);
417 }
418 return;
419 }
420 ___ Add(rd, rn, value);
421 }
422
423 // Inside IT block we must use assembler, macroassembler instructions are not permitted.
AddConstantInIt(vixl32::Register rd,vixl32::Register rn,int32_t value,vixl32::Condition cond)424 void ArmVIXLAssembler::AddConstantInIt(vixl32::Register rd,
425 vixl32::Register rn,
426 int32_t value,
427 vixl32::Condition cond) {
428 DCHECK(vixl_masm_.InITBlock());
429 if (value == 0) {
430 ___ mov(cond, rd, rn);
431 } else {
432 ___ add(cond, rd, rn, value);
433 }
434 }
435
CompareAndBranchIfZero(vixl32::Register rn,vixl32::Label * label,bool is_far_target)436 void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn,
437 vixl32::Label* label,
438 bool is_far_target) {
439 if (!is_far_target && rn.IsLow() && !label->IsBound()) {
440 // In T32, Cbz/Cbnz instructions have following limitations:
441 // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target).
442 // - Only low registers (i.e R0 .. R7) can be encoded.
443 // - Only forward branches (unbound labels) are supported.
444 Cbz(rn, label);
445 return;
446 }
447 Cmp(rn, 0);
448 B(eq, label, is_far_target);
449 }
450
CompareAndBranchIfNonZero(vixl32::Register rn,vixl32::Label * label,bool is_far_target)451 void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn,
452 vixl32::Label* label,
453 bool is_far_target) {
454 if (!is_far_target && rn.IsLow() && !label->IsBound()) {
455 Cbnz(rn, label);
456 return;
457 }
458 Cmp(rn, 0);
459 B(ne, label, is_far_target);
460 }
461
B(vixl32::Label * label)462 void ArmVIXLMacroAssembler::B(vixl32::Label* label) {
463 if (!label->IsBound()) {
464 // Try to use 16-bit T2 encoding of B instruction.
465 DCHECK(OutsideITBlock());
466 ExactAssemblyScope guard(this,
467 k16BitT32InstructionSizeInBytes,
468 CodeBufferCheckScope::kMaximumSize);
469 b(al, Narrow, label);
470 AddBranchLabel(label);
471 return;
472 }
473 MacroAssembler::B(label);
474 }
475
B(vixl32::Condition cond,vixl32::Label * label,bool is_far_target)476 void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) {
477 if (!label->IsBound() && !is_far_target) {
478 // Try to use 16-bit T2 encoding of B instruction.
479 DCHECK(OutsideITBlock());
480 ExactAssemblyScope guard(this,
481 k16BitT32InstructionSizeInBytes,
482 CodeBufferCheckScope::kMaximumSize);
483 b(cond, Narrow, label);
484 AddBranchLabel(label);
485 return;
486 }
487 // To further reduce the Bcc encoding size and use 16-bit T1 encoding,
488 // we can provide a hint to this function: i.e. far_target=false.
489 // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding.
490 MacroAssembler::B(cond, label);
491 }
492
493 } // namespace arm
494 } // namespace art
495