1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "linker/arm64/relative_patcher_arm64.h"
18 
19 #include "arch/arm64/asm_support_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method.h"
22 #include "base/bit_utils.h"
23 #include "base/malloc_arena_pool.h"
24 #include "compiled_method-inl.h"
25 #include "driver/compiler_driver.h"
26 #include "entrypoints/quick/quick_entrypoints_enum.h"
27 #include "heap_poisoning.h"
28 #include "linker/linker_patch.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/object.h"
32 #include "oat.h"
33 #include "oat_quick_method_header.h"
34 #include "read_barrier.h"
35 #include "stream/output_stream.h"
36 #include "utils/arm64/assembler_arm64.h"
37 
38 namespace art {
39 namespace linker {
40 
41 namespace {
42 
43 // Maximum positive and negative displacement for method call measured from the patch location.
44 // (Signed 28 bit displacement with the last two bits 0 has range [-2^27, 2^27-4] measured from
45 // the ARM64 PC pointing to the BL.)
46 constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 27) - 4u;
47 constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 27);
48 
49 // Maximum positive and negative displacement for a conditional branch measured from the patch
50 // location. (Signed 21 bit displacement with the last two bits 0 has range [-2^20, 2^20-4]
51 // measured from the ARM64 PC pointing to the B.cond.)
52 constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 4u;
53 constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20);
54 
55 // The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes.
56 constexpr uint32_t kAdrpThunkSize = 8u;
57 
IsAdrpPatch(const LinkerPatch & patch)58 inline bool IsAdrpPatch(const LinkerPatch& patch) {
59   switch (patch.GetType()) {
60     case LinkerPatch::Type::kCallRelative:
61     case LinkerPatch::Type::kBakerReadBarrierBranch:
62       return false;
63     case LinkerPatch::Type::kIntrinsicReference:
64     case LinkerPatch::Type::kDataBimgRelRo:
65     case LinkerPatch::Type::kMethodRelative:
66     case LinkerPatch::Type::kMethodBssEntry:
67     case LinkerPatch::Type::kTypeRelative:
68     case LinkerPatch::Type::kTypeBssEntry:
69     case LinkerPatch::Type::kStringRelative:
70     case LinkerPatch::Type::kStringBssEntry:
71       return patch.LiteralOffset() == patch.PcInsnOffset();
72   }
73 }
74 
MaxExtraSpace(size_t num_adrp,size_t code_size)75 inline uint32_t MaxExtraSpace(size_t num_adrp, size_t code_size) {
76   if (num_adrp == 0u) {
77     return 0u;
78   }
79   uint32_t alignment_bytes =
80       CompiledMethod::AlignCode(code_size, InstructionSet::kArm64) - code_size;
81   return kAdrpThunkSize * num_adrp + alignment_bytes;
82 }
83 
84 }  // anonymous namespace
85 
Arm64RelativePatcher(RelativePatcherThunkProvider * thunk_provider,RelativePatcherTargetProvider * target_provider,const Arm64InstructionSetFeatures * features)86 Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherThunkProvider* thunk_provider,
87                                            RelativePatcherTargetProvider* target_provider,
88                                            const Arm64InstructionSetFeatures* features)
89     : ArmBaseRelativePatcher(thunk_provider, target_provider, InstructionSet::kArm64),
90       fix_cortex_a53_843419_(features->NeedFixCortexA53_843419()),
91       reserved_adrp_thunks_(0u),
92       processed_adrp_thunks_(0u) {
93   if (fix_cortex_a53_843419_) {
94     adrp_thunk_locations_.reserve(16u);
95     current_method_thunks_.reserve(16u * kAdrpThunkSize);
96   }
97 }
98 
ReserveSpace(uint32_t offset,const CompiledMethod * compiled_method,MethodReference method_ref)99 uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset,
100                                             const CompiledMethod* compiled_method,
101                                             MethodReference method_ref) {
102   if (!fix_cortex_a53_843419_) {
103     DCHECK(adrp_thunk_locations_.empty());
104     return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u);
105   }
106 
107   // Add thunks for previous method if any.
108   if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) {
109     size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_;
110     offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64) +
111              kAdrpThunkSize * num_adrp_thunks;
112     reserved_adrp_thunks_ = adrp_thunk_locations_.size();
113   }
114 
115   // Count the number of ADRP insns as the upper bound on the number of thunks needed
116   // and use it to reserve space for other linker patches.
117   size_t num_adrp = 0u;
118   DCHECK(compiled_method != nullptr);
119   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
120     if (IsAdrpPatch(patch)) {
121       ++num_adrp;
122     }
123   }
124   ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
125   uint32_t max_extra_space = MaxExtraSpace(num_adrp, code.size());
126   offset = ReserveSpaceInternal(offset, compiled_method, method_ref, max_extra_space);
127   if (num_adrp == 0u) {
128     return offset;
129   }
130 
131   // Now that we have the actual offset where the code will be placed, locate the ADRP insns
132   // that actually require the thunk.
133   uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
134   uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
135   DCHECK(compiled_method != nullptr);
136   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
137     if (IsAdrpPatch(patch)) {
138       uint32_t patch_offset = quick_code_offset + patch.LiteralOffset();
139       if (NeedsErratum843419Thunk(code, patch.LiteralOffset(), patch_offset)) {
140         adrp_thunk_locations_.emplace_back(patch_offset, thunk_offset);
141         thunk_offset += kAdrpThunkSize;
142       }
143     }
144   }
145   return offset;
146 }
147 
ReserveSpaceEnd(uint32_t offset)148 uint32_t Arm64RelativePatcher::ReserveSpaceEnd(uint32_t offset) {
149   if (!fix_cortex_a53_843419_) {
150     DCHECK(adrp_thunk_locations_.empty());
151   } else {
152     // Add thunks for the last method if any.
153     if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) {
154       size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_;
155       offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64) +
156                kAdrpThunkSize * num_adrp_thunks;
157       reserved_adrp_thunks_ = adrp_thunk_locations_.size();
158     }
159   }
160   return ArmBaseRelativePatcher::ReserveSpaceEnd(offset);
161 }
162 
WriteThunks(OutputStream * out,uint32_t offset)163 uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) {
164   if (fix_cortex_a53_843419_) {
165     if (!current_method_thunks_.empty()) {
166       uint32_t aligned_offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64);
167       if (kIsDebugBuild) {
168         CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize);
169         size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize;
170         CHECK_LE(num_thunks, processed_adrp_thunks_);
171         for (size_t i = 0u; i != num_thunks; ++i) {
172           const auto& entry = adrp_thunk_locations_[processed_adrp_thunks_ - num_thunks + i];
173           CHECK_EQ(entry.second, aligned_offset + i * kAdrpThunkSize);
174         }
175       }
176       uint32_t aligned_code_delta = aligned_offset - offset;
177       if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
178         return 0u;
179       }
180       if (!WriteMiscThunk(out, ArrayRef<const uint8_t>(current_method_thunks_))) {
181         return 0u;
182       }
183       offset = aligned_offset + current_method_thunks_.size();
184       current_method_thunks_.clear();
185     }
186   }
187   return ArmBaseRelativePatcher::WriteThunks(out, offset);
188 }
189 
PatchCall(std::vector<uint8_t> * code,uint32_t literal_offset,uint32_t patch_offset,uint32_t target_offset)190 void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code,
191                                      uint32_t literal_offset,
192                                      uint32_t patch_offset, uint32_t
193                                      target_offset) {
194   DCHECK_LE(literal_offset + 4u, code->size());
195   DCHECK_EQ(literal_offset & 3u, 0u);
196   DCHECK_EQ(patch_offset & 3u, 0u);
197   DCHECK_EQ(target_offset & 3u, 0u);
198   uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u);
199   DCHECK_EQ(displacement & 3u, 0u);
200   DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u);  // 28-bit signed.
201   uint32_t insn = (displacement & 0x0fffffffu) >> 2;
202   insn |= 0x94000000;  // BL
203 
204   // Check that we're just overwriting an existing BL.
205   DCHECK_EQ(GetInsn(code, literal_offset) & 0xfc000000u, 0x94000000u);
206   // Write the new BL.
207   SetInsn(code, literal_offset, insn);
208 }
209 
PatchPcRelativeReference(std::vector<uint8_t> * code,const LinkerPatch & patch,uint32_t patch_offset,uint32_t target_offset)210 void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
211                                                     const LinkerPatch& patch,
212                                                     uint32_t patch_offset,
213                                                     uint32_t target_offset) {
214   DCHECK_EQ(patch_offset & 3u, 0u);
215   DCHECK_EQ(target_offset & 3u, 0u);
216   uint32_t literal_offset = patch.LiteralOffset();
217   uint32_t insn = GetInsn(code, literal_offset);
218   uint32_t pc_insn_offset = patch.PcInsnOffset();
219   uint32_t disp = target_offset - ((patch_offset - literal_offset + pc_insn_offset) & ~0xfffu);
220   bool wide = (insn & 0x40000000) != 0;
221   uint32_t shift = wide ? 3u : 2u;
222   if (literal_offset == pc_insn_offset) {
223     // Check it's an ADRP with imm == 0 (unset).
224     DCHECK_EQ((insn & 0xffffffe0u), 0x90000000u)
225         << literal_offset << ", " << pc_insn_offset << ", 0x" << std::hex << insn;
226     if (fix_cortex_a53_843419_ && processed_adrp_thunks_ != adrp_thunk_locations_.size() &&
227         adrp_thunk_locations_[processed_adrp_thunks_].first == patch_offset) {
228       DCHECK(NeedsErratum843419Thunk(ArrayRef<const uint8_t>(*code),
229                                      literal_offset, patch_offset));
230       uint32_t thunk_offset = adrp_thunk_locations_[processed_adrp_thunks_].second;
231       uint32_t adrp_disp = target_offset - (thunk_offset & ~0xfffu);
232       uint32_t adrp = PatchAdrp(insn, adrp_disp);
233 
234       uint32_t out_disp = thunk_offset - patch_offset;
235       DCHECK_EQ(out_disp & 3u, 0u);
236       DCHECK((out_disp >> 27) == 0u || (out_disp >> 27) == 31u);  // 28-bit signed.
237       insn = (out_disp & 0x0fffffffu) >> shift;
238       insn |= 0x14000000;  // B <thunk>
239 
240       uint32_t back_disp = -out_disp;
241       DCHECK_EQ(back_disp & 3u, 0u);
242       DCHECK((back_disp >> 27) == 0u || (back_disp >> 27) == 31u);  // 28-bit signed.
243       uint32_t b_back = (back_disp & 0x0fffffffu) >> 2;
244       b_back |= 0x14000000;  // B <back>
245       size_t thunks_code_offset = current_method_thunks_.size();
246       current_method_thunks_.resize(thunks_code_offset + kAdrpThunkSize);
247       SetInsn(&current_method_thunks_, thunks_code_offset, adrp);
248       SetInsn(&current_method_thunks_, thunks_code_offset + 4u, b_back);
249       static_assert(kAdrpThunkSize == 2 * 4u, "thunk has 2 instructions");
250 
251       processed_adrp_thunks_ += 1u;
252     } else {
253       insn = PatchAdrp(insn, disp);
254     }
255     // Write the new ADRP (or B to the erratum 843419 thunk).
256     SetInsn(code, literal_offset, insn);
257   } else {
258     if ((insn & 0xfffffc00) == 0x91000000) {
259       // ADD immediate, 64-bit with imm12 == 0 (unset).
260       if (!kEmitCompilerReadBarrier) {
261         DCHECK(patch.GetType() == LinkerPatch::Type::kIntrinsicReference ||
262                patch.GetType() == LinkerPatch::Type::kMethodRelative ||
263                patch.GetType() == LinkerPatch::Type::kTypeRelative ||
264                patch.GetType() == LinkerPatch::Type::kStringRelative) << patch.GetType();
265       } else {
266         // With the read barrier (non-Baker) enabled, it could be kStringBssEntry or kTypeBssEntry.
267         DCHECK(patch.GetType() == LinkerPatch::Type::kIntrinsicReference ||
268                patch.GetType() == LinkerPatch::Type::kMethodRelative ||
269                patch.GetType() == LinkerPatch::Type::kTypeRelative ||
270                patch.GetType() == LinkerPatch::Type::kStringRelative ||
271                patch.GetType() == LinkerPatch::Type::kTypeBssEntry ||
272                patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType();
273       }
274       shift = 0u;  // No shift for ADD.
275     } else {
276       // LDR/STR 32-bit or 64-bit with imm12 == 0 (unset).
277       DCHECK(patch.GetType() == LinkerPatch::Type::kDataBimgRelRo ||
278              patch.GetType() == LinkerPatch::Type::kMethodBssEntry ||
279              patch.GetType() == LinkerPatch::Type::kTypeBssEntry ||
280              patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType();
281       DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn;
282     }
283     if (kIsDebugBuild) {
284       uint32_t adrp = GetInsn(code, pc_insn_offset);
285       if ((adrp & 0x9f000000u) != 0x90000000u) {
286         CHECK(fix_cortex_a53_843419_);
287         CHECK_EQ(adrp & 0xfc000000u, 0x14000000u);  // B <thunk>
288         CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize);
289         size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize;
290         CHECK_LE(num_thunks, processed_adrp_thunks_);
291         uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset;
292         for (size_t i = processed_adrp_thunks_ - num_thunks; ; ++i) {
293           CHECK_NE(i, processed_adrp_thunks_);
294           if (adrp_thunk_locations_[i].first == b_offset) {
295             size_t idx = num_thunks - (processed_adrp_thunks_ - i);
296             adrp = GetInsn(&current_method_thunks_, idx * kAdrpThunkSize);
297             break;
298           }
299         }
300       }
301       CHECK_EQ(adrp & 0x9f00001fu,                    // Check that pc_insn_offset points
302                0x90000000 | ((insn >> 5) & 0x1fu));   // to ADRP with matching register.
303     }
304     uint32_t imm12 = (disp & 0xfffu) >> shift;
305     insn = (insn & ~(0xfffu << 10)) | (imm12 << 10);
306     SetInsn(code, literal_offset, insn);
307   }
308 }
309 
PatchBakerReadBarrierBranch(std::vector<uint8_t> * code,const LinkerPatch & patch,uint32_t patch_offset)310 void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
311                                                        const LinkerPatch& patch,
312                                                        uint32_t patch_offset) {
313   DCHECK_ALIGNED(patch_offset, 4u);
314   uint32_t literal_offset = patch.LiteralOffset();
315   DCHECK_ALIGNED(literal_offset, 4u);
316   DCHECK_LT(literal_offset, code->size());
317   uint32_t insn = GetInsn(code, literal_offset);
318   DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000);  // CBNZ Xt, +0 (unpatched)
319   ThunkKey key = GetBakerThunkKey(patch);
320   uint32_t target_offset = GetThunkTargetOffset(key, patch_offset);
321   DCHECK_ALIGNED(target_offset, 4u);
322   uint32_t disp = target_offset - patch_offset;
323   DCHECK((disp >> 20) == 0u || (disp >> 20) == 4095u);  // 21-bit signed.
324   insn |= (disp << (5 - 2)) & 0x00ffffe0u;              // Shift bits 2-20 to 5-23.
325   SetInsn(code, literal_offset, insn);
326 }
327 
MaxPositiveDisplacement(const ThunkKey & key)328 uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
329   switch (key.GetType()) {
330     case ThunkType::kMethodCall:
331       return kMaxMethodCallPositiveDisplacement;
332     case ThunkType::kBakerReadBarrier:
333       return kMaxBcondPositiveDisplacement;
334   }
335 }
336 
MaxNegativeDisplacement(const ThunkKey & key)337 uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
338   switch (key.GetType()) {
339     case ThunkType::kMethodCall:
340       return kMaxMethodCallNegativeDisplacement;
341     case ThunkType::kBakerReadBarrier:
342       return kMaxBcondNegativeDisplacement;
343   }
344 }
345 
PatchAdrp(uint32_t adrp,uint32_t disp)346 uint32_t Arm64RelativePatcher::PatchAdrp(uint32_t adrp, uint32_t disp) {
347   return (adrp & 0x9f00001fu) |  // Clear offset bits, keep ADRP with destination reg.
348       // Bottom 12 bits are ignored, the next 2 lowest bits are encoded in bits 29-30.
349       ((disp & 0x00003000u) << (29 - 12)) |
350       // The next 16 bits are encoded in bits 5-22.
351       ((disp & 0xffffc000u) >> (12 + 2 - 5)) |
352       // Since the target_offset is based on the beginning of the oat file and the
353       // image space precedes the oat file, the target_offset into image space will
354       // be negative yet passed as uint32_t. Therefore we limit the displacement
355       // to +-2GiB (rather than the maximim +-4GiB) and determine the sign bit from
356       // the highest bit of the displacement. This is encoded in bit 23.
357       ((disp & 0x80000000u) >> (31 - 23));
358 }
359 
NeedsErratum843419Thunk(ArrayRef<const uint8_t> code,uint32_t literal_offset,uint32_t patch_offset)360 bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code,
361                                                    uint32_t literal_offset,
362                                                    uint32_t patch_offset) {
363   DCHECK_EQ(patch_offset & 0x3u, 0u);
364   if ((patch_offset & 0xff8) == 0xff8) {  // ...ff8 or ...ffc
365     uint32_t adrp = GetInsn(code, literal_offset);
366     DCHECK_EQ(adrp & 0x9f000000, 0x90000000);
367     uint32_t next_offset = patch_offset + 4u;
368     uint32_t next_insn = GetInsn(code, literal_offset + 4u);
369 
370     // Below we avoid patching sequences where the adrp is followed by a load which can easily
371     // be proved to be aligned.
372 
373     // First check if the next insn is the LDR using the result of the ADRP.
374     // LDR <Wt>, [<Xn>, #pimm], where <Xn> == ADRP destination reg.
375     if ((next_insn & 0xffc00000) == 0xb9400000 &&
376         (((next_insn >> 5) ^ adrp) & 0x1f) == 0) {
377       return false;
378     }
379 
380     // And since LinkerPatch::Type::k{Method,Type,String}Relative is using the result
381     // of the ADRP for an ADD immediate, check for that as well. We generalize a bit
382     // to include ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination
383     // or stores the result to a different register.
384     if ((next_insn & 0x1f000000) == 0x11000000 &&
385         ((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) {
386       return false;
387     }
388 
389     // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing.
390     if ((next_insn & 0xff000000) == 0x18000000) {
391       return false;
392     }
393 
394     // LDR <Xt>, <label> is aligned iff the pc + displacement is a multiple of 8.
395     if ((next_insn & 0xff000000) == 0x58000000) {
396       bool is_aligned_load = (((next_offset >> 2) ^ (next_insn >> 5)) & 1) == 0;
397       return !is_aligned_load;
398     }
399 
400     // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned loads, as SP is
401     // guaranteed to be 128-bits aligned and <pimm> is multiple of the load size.
402     if ((next_insn & 0xbfc003e0) == 0xb94003e0) {
403       return false;
404     }
405     return true;
406   }
407   return false;
408 }
409 
SetInsn(std::vector<uint8_t> * code,uint32_t offset,uint32_t value)410 void Arm64RelativePatcher::SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
411   DCHECK_LE(offset + 4u, code->size());
412   DCHECK_EQ(offset & 3u, 0u);
413   uint8_t* addr = &(*code)[offset];
414   addr[0] = (value >> 0) & 0xff;
415   addr[1] = (value >> 8) & 0xff;
416   addr[2] = (value >> 16) & 0xff;
417   addr[3] = (value >> 24) & 0xff;
418 }
419 
GetInsn(ArrayRef<const uint8_t> code,uint32_t offset)420 uint32_t Arm64RelativePatcher::GetInsn(ArrayRef<const uint8_t> code, uint32_t offset) {
421   DCHECK_LE(offset + 4u, code.size());
422   DCHECK_EQ(offset & 3u, 0u);
423   const uint8_t* addr = &code[offset];
424   return
425       (static_cast<uint32_t>(addr[0]) << 0) +
426       (static_cast<uint32_t>(addr[1]) << 8) +
427       (static_cast<uint32_t>(addr[2]) << 16)+
428       (static_cast<uint32_t>(addr[3]) << 24);
429 }
430 
431 template <typename Alloc>
GetInsn(std::vector<uint8_t,Alloc> * code,uint32_t offset)432 uint32_t Arm64RelativePatcher::GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset) {
433   return GetInsn(ArrayRef<const uint8_t>(*code), offset);
434 }
435 
436 }  // namespace linker
437 }  // namespace art
438