1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <iostream>
18 #include <type_traits>
19 
20 #include "assembler_arm_vixl.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23 
24 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
25 
26 using vixl::ExactAssemblyScope;
27 using vixl::CodeBufferCheckScope;
28 
29 namespace art {
30 namespace arm {
31 
32 #ifdef ___
33 #error "ARM Assembler macro already defined."
34 #else
35 #define ___   vixl_masm_.
36 #endif
37 
38 extern const vixl32::Register tr(TR);
39 
FinalizeCode()40 void ArmVIXLAssembler::FinalizeCode() {
41   vixl_masm_.FinalizeCode();
42 }
43 
CodeSize() const44 size_t ArmVIXLAssembler::CodeSize() const {
45   return vixl_masm_.GetSizeOfCodeGenerated();
46 }
47 
CodeBufferBaseAddress() const48 const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const {
49   return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
50 }
51 
FinalizeInstructions(const MemoryRegion & region)52 void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) {
53   // Copy the instructions from the buffer.
54   MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
55   region.CopyFrom(0, from);
56 }
57 
PoisonHeapReference(vixl::aarch32::Register reg)58 void ArmVIXLAssembler::PoisonHeapReference(vixl::aarch32::Register reg) {
59   // reg = -reg.
60   ___ Rsb(reg, reg, 0);
61 }
62 
UnpoisonHeapReference(vixl::aarch32::Register reg)63 void ArmVIXLAssembler::UnpoisonHeapReference(vixl::aarch32::Register reg) {
64   // reg = -reg.
65   ___ Rsb(reg, reg, 0);
66 }
67 
MaybePoisonHeapReference(vixl32::Register reg)68 void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) {
69   if (kPoisonHeapReferences) {
70     PoisonHeapReference(reg);
71   }
72 }
73 
MaybeUnpoisonHeapReference(vixl32::Register reg)74 void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
75   if (kPoisonHeapReferences) {
76     UnpoisonHeapReference(reg);
77   }
78 }
79 
LoadImmediate(vixl32::Register rd,int32_t value)80 void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) {
81   // TODO(VIXL): Implement this optimization in VIXL.
82   if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) {
83     ___ Mvn(rd, ~value);
84   } else {
85     ___ Mov(rd, value);
86   }
87 }
88 
ShifterOperandCanAlwaysHold(uint32_t immediate)89 bool ArmVIXLAssembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
90   return vixl_masm_.IsModifiedImmediate(immediate);
91 }
92 
ShifterOperandCanHold(Opcode opcode,uint32_t immediate,SetCc set_cc)93 bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc) {
94   switch (opcode) {
95     case ADD:
96     case SUB:
97       // Less than (or equal to) 12 bits can be done if we don't need to set condition codes.
98       if (IsUint<12>(immediate) && set_cc != kCcSet) {
99         return true;
100       }
101       return ShifterOperandCanAlwaysHold(immediate);
102 
103     case MOV:
104       // TODO: Support less than or equal to 12bits.
105       return ShifterOperandCanAlwaysHold(immediate);
106 
107     case MVN:
108     default:
109       return ShifterOperandCanAlwaysHold(immediate);
110   }
111 }
112 
CanSplitLoadStoreOffset(int32_t allowed_offset_bits,int32_t offset,int32_t * add_to_base,int32_t * offset_for_load_store)113 bool ArmVIXLAssembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
114                                                int32_t offset,
115                                                /*out*/ int32_t* add_to_base,
116                                                /*out*/ int32_t* offset_for_load_store) {
117   int32_t other_bits = offset & ~allowed_offset_bits;
118   if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
119     *add_to_base = offset & ~allowed_offset_bits;
120     *offset_for_load_store = offset & allowed_offset_bits;
121     return true;
122   }
123   return false;
124 }
125 
AdjustLoadStoreOffset(int32_t allowed_offset_bits,vixl32::Register temp,vixl32::Register base,int32_t offset)126 int32_t ArmVIXLAssembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
127                                                 vixl32::Register temp,
128                                                 vixl32::Register base,
129                                                 int32_t offset) {
130   DCHECK_NE(offset & ~allowed_offset_bits, 0);
131   int32_t add_to_base, offset_for_load;
132   if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
133     ___ Add(temp, base, add_to_base);
134     return offset_for_load;
135   } else {
136     ___ Mov(temp, offset);
137     ___ Add(temp, temp, base);
138     return 0;
139   }
140 }
141 
142 // TODO(VIXL): Implement this in VIXL.
GetAllowedLoadOffsetBits(LoadOperandType type)143 int32_t ArmVIXLAssembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
144   switch (type) {
145     case kLoadSignedByte:
146     case kLoadSignedHalfword:
147     case kLoadUnsignedHalfword:
148     case kLoadUnsignedByte:
149     case kLoadWord:
150       // We can encode imm12 offset.
151       return 0xfff;
152     case kLoadSWord:
153     case kLoadDWord:
154     case kLoadWordPair:
155       // We can encode imm8:'00' offset.
156       return 0xff << 2;
157     default:
158       LOG(FATAL) << "UNREACHABLE";
159       UNREACHABLE();
160   }
161 }
162 
163 // TODO(VIXL): Implement this in VIXL.
GetAllowedStoreOffsetBits(StoreOperandType type)164 int32_t ArmVIXLAssembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
165   switch (type) {
166     case kStoreHalfword:
167     case kStoreByte:
168     case kStoreWord:
169       // We can encode imm12 offset.
170       return 0xfff;
171     case kStoreSWord:
172     case kStoreDWord:
173     case kStoreWordPair:
174       // We can encode imm8:'00' offset.
175       return 0xff << 2;
176     default:
177       LOG(FATAL) << "UNREACHABLE";
178       UNREACHABLE();
179   }
180 }
181 
182 // TODO(VIXL): Implement this in VIXL.
CanHoldLoadOffsetThumb(LoadOperandType type,int offset)183 static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
184   switch (type) {
185     case kLoadSignedByte:
186     case kLoadSignedHalfword:
187     case kLoadUnsignedHalfword:
188     case kLoadUnsignedByte:
189     case kLoadWord:
190       return IsAbsoluteUint<12>(offset);
191     case kLoadSWord:
192     case kLoadDWord:
193       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);  // VFP addressing mode.
194     case kLoadWordPair:
195       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
196     default:
197       LOG(FATAL) << "UNREACHABLE";
198       UNREACHABLE();
199   }
200 }
201 
202 // TODO(VIXL): Implement this in VIXL.
CanHoldStoreOffsetThumb(StoreOperandType type,int offset)203 static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
204   switch (type) {
205     case kStoreHalfword:
206     case kStoreByte:
207     case kStoreWord:
208       return IsAbsoluteUint<12>(offset);
209     case kStoreSWord:
210     case kStoreDWord:
211       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);  // VFP addressing mode.
212     case kStoreWordPair:
213       return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset);
214     default:
215       LOG(FATAL) << "UNREACHABLE";
216       UNREACHABLE();
217   }
218 }
219 
220 // Implementation note: this method must emit at most one instruction when
221 // Address::CanHoldStoreOffsetThumb.
222 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
StoreToOffset(StoreOperandType type,vixl32::Register reg,vixl32::Register base,int32_t offset)223 void ArmVIXLAssembler::StoreToOffset(StoreOperandType type,
224                                      vixl32::Register reg,
225                                      vixl32::Register base,
226                                      int32_t offset) {
227   vixl32::Register tmp_reg;
228   UseScratchRegisterScope temps(&vixl_masm_);
229 
230   if (!CanHoldStoreOffsetThumb(type, offset)) {
231     CHECK_NE(base.GetCode(), kIpCode);
232     if ((reg.GetCode() != kIpCode) &&
233         (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) &&
234         ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) {
235       tmp_reg = temps.Acquire();
236     } else {
237       // Be careful not to use ip twice (for `reg` (or `reg` + 1 in
238       // the case of a word-pair store) and `base`) to build the
239       // Address object used by the store instruction(s) below.
240       // Instead, save R5 on the stack (or R6 if R5 is already used by
241       // `base`), use it as secondary temporary register, and restore
242       // it after the store instruction has been emitted.
243       tmp_reg = (base.GetCode() != 5) ? r5 : r6;
244       ___ Push(tmp_reg);
245       if (base.GetCode() == kSpCode) {
246         offset += kRegisterSize;
247       }
248     }
249     // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
250     // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
251     offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset);
252     base = tmp_reg;
253   }
254   DCHECK(CanHoldStoreOffsetThumb(type, offset));
255   switch (type) {
256     case kStoreByte:
257       ___ Strb(reg, MemOperand(base, offset));
258       break;
259     case kStoreHalfword:
260       ___ Strh(reg, MemOperand(base, offset));
261       break;
262     case kStoreWord:
263       ___ Str(reg, MemOperand(base, offset));
264       break;
265     case kStoreWordPair:
266       ___ Strd(reg, vixl32::Register(reg.GetCode() + 1), MemOperand(base, offset));
267       break;
268     default:
269       LOG(FATAL) << "UNREACHABLE";
270       UNREACHABLE();
271   }
272   if ((tmp_reg.IsValid()) && (tmp_reg.GetCode() != kIpCode)) {
273     CHECK(tmp_reg.Is(r5) || tmp_reg.Is(r6)) << tmp_reg;
274     ___ Pop(tmp_reg);
275   }
276 }
277 
278 // Implementation note: this method must emit at most one instruction when
279 // Address::CanHoldLoadOffsetThumb.
280 // TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL.
LoadFromOffset(LoadOperandType type,vixl32::Register dest,vixl32::Register base,int32_t offset)281 void ArmVIXLAssembler::LoadFromOffset(LoadOperandType type,
282                                       vixl32::Register dest,
283                                       vixl32::Register base,
284                                       int32_t offset) {
285   if (!CanHoldLoadOffsetThumb(type, offset)) {
286     CHECK(!base.Is(ip));
287     // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
288     int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
289     DCHECK_NE(offset & ~allowed_offset_bits, 0);
290     int32_t add_to_base, offset_for_load;
291     if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
292       // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
293       AddConstant(dest, base, add_to_base);
294       base = dest;
295       offset = offset_for_load;
296     } else {
297       UseScratchRegisterScope temps(&vixl_masm_);
298       vixl32::Register temp = (dest.Is(base)) ? temps.Acquire() : dest;
299       LoadImmediate(temp, offset);
300       // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
301       // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
302       ___ Add(dest, dest, (dest.Is(base)) ? temp : base);
303       base = dest;
304       offset = 0;
305     }
306   }
307 
308   DCHECK(CanHoldLoadOffsetThumb(type, offset));
309   switch (type) {
310     case kLoadSignedByte:
311       ___ Ldrsb(dest, MemOperand(base, offset));
312       break;
313     case kLoadUnsignedByte:
314       ___ Ldrb(dest, MemOperand(base, offset));
315       break;
316     case kLoadSignedHalfword:
317       ___ Ldrsh(dest, MemOperand(base, offset));
318       break;
319     case kLoadUnsignedHalfword:
320       ___ Ldrh(dest, MemOperand(base, offset));
321       break;
322     case kLoadWord:
323       CHECK(!dest.IsSP());
324       ___ Ldr(dest, MemOperand(base, offset));
325       break;
326     case kLoadWordPair:
327       ___ Ldrd(dest, vixl32::Register(dest.GetCode() + 1), MemOperand(base, offset));
328       break;
329     default:
330       LOG(FATAL) << "UNREACHABLE";
331       UNREACHABLE();
332   }
333 }
334 
StoreSToOffset(vixl32::SRegister source,vixl32::Register base,int32_t offset)335 void ArmVIXLAssembler::StoreSToOffset(vixl32::SRegister source,
336                                       vixl32::Register base,
337                                       int32_t offset) {
338   ___ Vstr(source, MemOperand(base, offset));
339 }
340 
StoreDToOffset(vixl32::DRegister source,vixl32::Register base,int32_t offset)341 void ArmVIXLAssembler::StoreDToOffset(vixl32::DRegister source,
342                                       vixl32::Register base,
343                                       int32_t offset) {
344   ___ Vstr(source, MemOperand(base, offset));
345 }
346 
LoadSFromOffset(vixl32::SRegister reg,vixl32::Register base,int32_t offset)347 void ArmVIXLAssembler::LoadSFromOffset(vixl32::SRegister reg,
348                                        vixl32::Register base,
349                                        int32_t offset) {
350   ___ Vldr(reg, MemOperand(base, offset));
351 }
352 
LoadDFromOffset(vixl32::DRegister reg,vixl32::Register base,int32_t offset)353 void ArmVIXLAssembler::LoadDFromOffset(vixl32::DRegister reg,
354                                        vixl32::Register base,
355                                        int32_t offset) {
356   ___ Vldr(reg, MemOperand(base, offset));
357 }
358 
359 // Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and
360 // ArmVIXLAssembler::LoadRegisterList where this generates less code (size).
361 static constexpr int kRegListThreshold = 4;
362 
StoreRegisterList(RegList regs,size_t stack_offset)363 void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) {
364   int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
365   if (number_of_regs != 0) {
366     if (number_of_regs > kRegListThreshold) {
367       UseScratchRegisterScope temps(GetVIXLAssembler());
368       vixl32::Register base = sp;
369       if (stack_offset != 0) {
370         base = temps.Acquire();
371         DCHECK_EQ(regs & (1u << base.GetCode()), 0u);
372         ___ Add(base, sp, Operand::From(stack_offset));
373       }
374       ___ Stm(base, NO_WRITE_BACK, RegisterList(regs));
375     } else {
376       for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
377         ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset));
378         stack_offset += kRegSizeInBytes;
379       }
380     }
381   }
382 }
383 
LoadRegisterList(RegList regs,size_t stack_offset)384 void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) {
385   int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
386   if (number_of_regs != 0) {
387     if (number_of_regs > kRegListThreshold) {
388       UseScratchRegisterScope temps(GetVIXLAssembler());
389       vixl32::Register base = sp;
390       if (stack_offset != 0) {
391         base = temps.Acquire();
392         ___ Add(base, sp, Operand::From(stack_offset));
393       }
394       ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs));
395     } else {
396       for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
397         ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset));
398         stack_offset += kRegSizeInBytes;
399       }
400     }
401   }
402 }
403 
AddConstant(vixl32::Register rd,int32_t value)404 void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) {
405   AddConstant(rd, rd, value);
406 }
407 
408 // TODO(VIXL): think about using adds which updates flags where possible.
AddConstant(vixl32::Register rd,vixl32::Register rn,int32_t value)409 void ArmVIXLAssembler::AddConstant(vixl32::Register rd,
410                                    vixl32::Register rn,
411                                    int32_t value) {
412   DCHECK(vixl_masm_.OutsideITBlock());
413   // TODO(VIXL): implement this optimization in VIXL.
414   if (value == 0) {
415     if (!rd.Is(rn)) {
416       ___ Mov(rd, rn);
417     }
418     return;
419   }
420   ___ Add(rd, rn, value);
421 }
422 
423 // Inside IT block we must use assembler, macroassembler instructions are not permitted.
AddConstantInIt(vixl32::Register rd,vixl32::Register rn,int32_t value,vixl32::Condition cond)424 void ArmVIXLAssembler::AddConstantInIt(vixl32::Register rd,
425                                        vixl32::Register rn,
426                                        int32_t value,
427                                        vixl32::Condition cond) {
428   DCHECK(vixl_masm_.InITBlock());
429   if (value == 0) {
430     ___ mov(cond, rd, rn);
431   } else {
432     ___ add(cond, rd, rn, value);
433   }
434 }
435 
CompareAndBranchIfZero(vixl32::Register rn,vixl32::Label * label,bool is_far_target)436 void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn,
437                                                    vixl32::Label* label,
438                                                    bool is_far_target) {
439   if (!is_far_target && rn.IsLow() && !label->IsBound()) {
440     // In T32, Cbz/Cbnz instructions have following limitations:
441     // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target).
442     // - Only low registers (i.e R0 .. R7) can be encoded.
443     // - Only forward branches (unbound labels) are supported.
444     Cbz(rn, label);
445     return;
446   }
447   Cmp(rn, 0);
448   B(eq, label, is_far_target);
449 }
450 
CompareAndBranchIfNonZero(vixl32::Register rn,vixl32::Label * label,bool is_far_target)451 void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn,
452                                                       vixl32::Label* label,
453                                                       bool is_far_target) {
454   if (!is_far_target && rn.IsLow() && !label->IsBound()) {
455     Cbnz(rn, label);
456     return;
457   }
458   Cmp(rn, 0);
459   B(ne, label, is_far_target);
460 }
461 
B(vixl32::Label * label)462 void ArmVIXLMacroAssembler::B(vixl32::Label* label) {
463   if (!label->IsBound()) {
464     // Try to use 16-bit T2 encoding of B instruction.
465     DCHECK(OutsideITBlock());
466     ExactAssemblyScope guard(this,
467                              k16BitT32InstructionSizeInBytes,
468                              CodeBufferCheckScope::kMaximumSize);
469     b(al, Narrow, label);
470     AddBranchLabel(label);
471     return;
472   }
473   MacroAssembler::B(label);
474 }
475 
B(vixl32::Condition cond,vixl32::Label * label,bool is_far_target)476 void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) {
477   if (!label->IsBound() && !is_far_target) {
478     // Try to use 16-bit T2 encoding of B instruction.
479     DCHECK(OutsideITBlock());
480     ExactAssemblyScope guard(this,
481                              k16BitT32InstructionSizeInBytes,
482                              CodeBufferCheckScope::kMaximumSize);
483     b(cond, Narrow, label);
484     AddBranchLabel(label);
485     return;
486   }
487   // To further reduce the Bcc encoding size and use 16-bit T1 encoding,
488   // we can provide a hint to this function: i.e. far_target=false.
489   // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding.
490   MacroAssembler::B(cond, label);
491 }
492 
493 }  // namespace arm
494 }  // namespace art
495