1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
28 #include <sys/auxv.h>
29 #define VIXL_USE_LINUX_HWCAP 1
30 #endif
31 
32 #include "../utils-vixl.h"
33 
34 #include "cpu-aarch64.h"
35 
36 namespace vixl {
37 namespace aarch64 {
38 
39 
40 const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
41 const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
42 const IDRegister::Field AA64PFR0::kRAS(28);
43 const IDRegister::Field AA64PFR0::kSVE(32);
44 const IDRegister::Field AA64PFR0::kDIT(48);
45 const IDRegister::Field AA64PFR0::kCSV2(56);
46 const IDRegister::Field AA64PFR0::kCSV3(60);
47 
48 const IDRegister::Field AA64PFR1::kBT(0);
49 const IDRegister::Field AA64PFR1::kSSBS(4);
50 const IDRegister::Field AA64PFR1::kMTE(8);
51 
52 const IDRegister::Field AA64ISAR0::kAES(4);
53 const IDRegister::Field AA64ISAR0::kSHA1(8);
54 const IDRegister::Field AA64ISAR0::kSHA2(12);
55 const IDRegister::Field AA64ISAR0::kCRC32(16);
56 const IDRegister::Field AA64ISAR0::kAtomic(20);
57 const IDRegister::Field AA64ISAR0::kRDM(28);
58 const IDRegister::Field AA64ISAR0::kSHA3(32);
59 const IDRegister::Field AA64ISAR0::kSM3(36);
60 const IDRegister::Field AA64ISAR0::kSM4(40);
61 const IDRegister::Field AA64ISAR0::kDP(44);
62 const IDRegister::Field AA64ISAR0::kFHM(48);
63 const IDRegister::Field AA64ISAR0::kTS(52);
64 const IDRegister::Field AA64ISAR0::kRNDR(60);
65 
66 const IDRegister::Field AA64ISAR1::kDPB(0);
67 const IDRegister::Field AA64ISAR1::kAPA(4);
68 const IDRegister::Field AA64ISAR1::kAPI(8);
69 const IDRegister::Field AA64ISAR1::kJSCVT(12);
70 const IDRegister::Field AA64ISAR1::kFCMA(16);
71 const IDRegister::Field AA64ISAR1::kLRCPC(20);
72 const IDRegister::Field AA64ISAR1::kGPA(24);
73 const IDRegister::Field AA64ISAR1::kGPI(28);
74 const IDRegister::Field AA64ISAR1::kFRINTTS(32);
75 const IDRegister::Field AA64ISAR1::kSB(36);
76 const IDRegister::Field AA64ISAR1::kSPECRES(40);
77 const IDRegister::Field AA64ISAR1::kBF16(44);
78 const IDRegister::Field AA64ISAR1::kDGH(48);
79 const IDRegister::Field AA64ISAR1::kI8MM(52);
80 
81 const IDRegister::Field AA64MMFR1::kLO(16);
82 
83 const IDRegister::Field AA64MMFR2::kAT(32);
84 
85 const IDRegister::Field AA64ZFR0::kBF16(20);
86 const IDRegister::Field AA64ZFR0::kI8MM(44);
87 const IDRegister::Field AA64ZFR0::kF32MM(52);
88 const IDRegister::Field AA64ZFR0::kF64MM(56);
89 
GetCPUFeatures() const90 CPUFeatures AA64PFR0::GetCPUFeatures() const {
91   CPUFeatures f;
92   if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
93   if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
94   if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
95   if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
96   if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
97   if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
98   if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
99   if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
100   if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
101   if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
102   return f;
103 }
104 
GetCPUFeatures() const105 CPUFeatures AA64PFR1::GetCPUFeatures() const {
106   CPUFeatures f;
107   if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
108   if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
109   if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
110   if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
111   if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
112   return f;
113 }
114 
GetCPUFeatures() const115 CPUFeatures AA64ISAR0::GetCPUFeatures() const {
116   CPUFeatures f;
117   if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
118   if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
119   if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
120   if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
121   if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
122   if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
123   if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
124   if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
125   if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
126   if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
127   if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
128   if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
129   if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
130   if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
131   if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
132   if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
133   return f;
134 }
135 
GetCPUFeatures() const136 CPUFeatures AA64ISAR1::GetCPUFeatures() const {
137   CPUFeatures f;
138   if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
139   if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
140   if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
141   if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
142   if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
143   if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
144   if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
145   if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
146   if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
147   if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
148   if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
149   if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
150 
151   // Only one of these fields should be non-zero, but they have the same
152   // encodings, so merge the logic.
153   int apx = std::max(Get(kAPI), Get(kAPA));
154   if (apx >= 1) {
155     f.Combine(CPUFeatures::kPAuth);
156     // APA (rather than API) indicates QARMA.
157     if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
158     if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
159     if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
160     if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
161     if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
162   }
163 
164   if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
165   if (Get(kGPA) >= 1) {
166     f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
167   }
168   return f;
169 }
170 
GetCPUFeatures() const171 CPUFeatures AA64MMFR1::GetCPUFeatures() const {
172   CPUFeatures f;
173   if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
174   return f;
175 }
176 
GetCPUFeatures() const177 CPUFeatures AA64MMFR2::GetCPUFeatures() const {
178   CPUFeatures f;
179   if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
180   return f;
181 }
182 
GetCPUFeatures() const183 CPUFeatures AA64ZFR0::GetCPUFeatures() const {
184   // This register is only available with SVE, but reads-as-zero in its absence,
185   // so it's always safe to read it.
186   CPUFeatures f;
187   if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
188   if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
189   if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
190   if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
191   return f;
192 }
193 
Get(IDRegister::Field field) const194 int IDRegister::Get(IDRegister::Field field) const {
195   int msb = field.GetMsb();
196   int lsb = field.GetLsb();
197   VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
198                      (sizeof(int) * kBitsPerByte));
199   switch (field.GetType()) {
200     case Field::kSigned:
201       return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
202     case Field::kUnsigned:
203       return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
204   }
205   VIXL_UNREACHABLE();
206   return 0;
207 }
208 
InferCPUFeaturesFromIDRegisters()209 CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
210   CPUFeatures f;
211 #define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
212   f.Combine(Read##NAME().GetCPUFeatures());
213   VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
214 #undef VIXL_COMBINE_ID_REG
215   return f;
216 }
217 
InferCPUFeaturesFromOS(CPUFeatures::QueryIDRegistersOption option)218 CPUFeatures CPU::InferCPUFeaturesFromOS(
219     CPUFeatures::QueryIDRegistersOption option) {
220   CPUFeatures features;
221 
222 #if VIXL_USE_LINUX_HWCAP
223   // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
224   // than explicit bits, but explicit bits allow us to identify features that
225   // the toolchain doesn't know about.
226   static const CPUFeatures::Feature kFeatureBits[] =
227       {// Bits 0-7
228        CPUFeatures::kFP,
229        CPUFeatures::kNEON,
230        CPUFeatures::kNone,  // "EVTSTRM", which VIXL doesn't track.
231        CPUFeatures::kAES,
232        CPUFeatures::kPmull1Q,
233        CPUFeatures::kSHA1,
234        CPUFeatures::kSHA2,
235        CPUFeatures::kCRC32,
236        // Bits 8-15
237        CPUFeatures::kAtomics,
238        CPUFeatures::kFPHalf,
239        CPUFeatures::kNEONHalf,
240        CPUFeatures::kIDRegisterEmulation,
241        CPUFeatures::kRDM,
242        CPUFeatures::kJSCVT,
243        CPUFeatures::kFcma,
244        CPUFeatures::kRCpc,
245        // Bits 16-23
246        CPUFeatures::kDCPoP,
247        CPUFeatures::kSHA3,
248        CPUFeatures::kSM3,
249        CPUFeatures::kSM4,
250        CPUFeatures::kDotProduct,
251        CPUFeatures::kSHA512,
252        CPUFeatures::kSVE,
253        CPUFeatures::kFHM,
254        // Bits 24-31
255        CPUFeatures::kDIT,
256        CPUFeatures::kUSCAT,
257        CPUFeatures::kRCpcImm,
258        CPUFeatures::kFlagM,
259        CPUFeatures::kSSBSControl,
260        CPUFeatures::kSB,
261        CPUFeatures::kPAuth,
262        CPUFeatures::kPAuthGeneric,
263        // Bits 32-39
264        CPUFeatures::kDCCVADP,
265        CPUFeatures::kNone,  // "sve2"
266        CPUFeatures::kNone,  // "sveaes"
267        CPUFeatures::kNone,  // "svepmull"
268        CPUFeatures::kNone,  // "svebitperm"
269        CPUFeatures::kNone,  // "svesha3"
270        CPUFeatures::kNone,  // "svesm4"
271        CPUFeatures::kFrintToFixedSizedInt,
272        // Bits 40-47
273        CPUFeatures::kSVEI8MM,
274        CPUFeatures::kSVEF32MM,
275        CPUFeatures::kSVEF64MM,
276        CPUFeatures::kSVEBF16,
277        CPUFeatures::kI8MM,
278        CPUFeatures::kBF16,
279        CPUFeatures::kDGH,
280        CPUFeatures::kRNG,
281        // Bits 48+
282        CPUFeatures::kBTI};
283 
284   uint64_t hwcap_low32 = getauxval(AT_HWCAP);
285   uint64_t hwcap_high32 = getauxval(AT_HWCAP2);
286   VIXL_ASSERT(IsUint32(hwcap_low32));
287   VIXL_ASSERT(IsUint32(hwcap_high32));
288   uint64_t hwcap = hwcap_low32 | (hwcap_high32 << 32);
289 
290   VIXL_STATIC_ASSERT(ArrayLength(kFeatureBits) < 64);
291   for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) {
292     if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]);
293   }
294 #endif  // VIXL_USE_LINUX_HWCAP
295 
296   if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
297       (features.Has(CPUFeatures::kIDRegisterEmulation))) {
298     features.Combine(InferCPUFeaturesFromIDRegisters());
299   }
300   return features;
301 }
302 
303 
304 #ifdef __aarch64__
305 #define VIXL_READ_ID_REG(NAME, MRS_ARG)        \
306   NAME CPU::Read##NAME() {                     \
307     uint64_t value = 0;                        \
308     __asm__("mrs %0, " MRS_ARG : "=r"(value)); \
309     return NAME(value);                        \
310   }
311 #else  // __aarch64__
312 #define VIXL_READ_ID_REG(NAME, MRS_ARG) \
313   NAME CPU::Read##NAME() {              \
314     VIXL_UNREACHABLE();                 \
315     return NAME(0);                     \
316   }
317 #endif  // __aarch64__
318 
319 VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
320 
321 #undef VIXL_READ_ID_REG
322 
323 
324 // Initialise to smallest possible cache size.
325 unsigned CPU::dcache_line_size_ = 1;
326 unsigned CPU::icache_line_size_ = 1;
327 
328 
329 // Currently computes I and D cache line size.
SetUp()330 void CPU::SetUp() {
331   uint32_t cache_type_register = GetCacheType();
332 
333   // The cache type register holds information about the caches, including I
334   // D caches line size.
335   static const int kDCacheLineSizeShift = 16;
336   static const int kICacheLineSizeShift = 0;
337   static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
338   static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
339 
340   // The cache type register holds the size of the I and D caches in words as
341   // a power of two.
342   uint32_t dcache_line_size_power_of_two =
343       (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
344   uint32_t icache_line_size_power_of_two =
345       (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
346 
347   dcache_line_size_ = 4 << dcache_line_size_power_of_two;
348   icache_line_size_ = 4 << icache_line_size_power_of_two;
349 }
350 
351 
GetCacheType()352 uint32_t CPU::GetCacheType() {
353 #ifdef __aarch64__
354   uint64_t cache_type_register;
355   // Copy the content of the cache type register to a core register.
356   __asm__ __volatile__("mrs %[ctr], ctr_el0"  // NOLINT(runtime/references)
357                        : [ctr] "=r"(cache_type_register));
358   VIXL_ASSERT(IsUint32(cache_type_register));
359   return static_cast<uint32_t>(cache_type_register);
360 #else
361   // This will lead to a cache with 1 byte long lines, which is fine since
362   // neither EnsureIAndDCacheCoherency nor the simulator will need this
363   // information.
364   return 0;
365 #endif
366 }
367 
368 
369 // Query the SVE vector length. This requires CPUFeatures::kSVE.
ReadSVEVectorLengthInBits()370 int CPU::ReadSVEVectorLengthInBits() {
371 #ifdef __aarch64__
372   uint64_t vl;
373   // To support compilers that don't understand `rdvl`, encode the value
374   // directly and move it manually.
375   __asm__(
376       "   .word 0x04bf5100\n"  // rdvl x0, #8
377       "   mov %[vl], x0\n"
378       : [vl] "=r"(vl)
379       :
380       : "x0");
381   VIXL_ASSERT(vl <= INT_MAX);
382   return static_cast<int>(vl);
383 #else
384   VIXL_UNREACHABLE();
385   return 0;
386 #endif
387 }
388 
389 
EnsureIAndDCacheCoherency(void * address,size_t length)390 void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
391 #ifdef __aarch64__
392   // Implement the cache synchronisation for all targets where AArch64 is the
393   // host, even if we're building the simulator for an AAarch64 host. This
394   // allows for cases where the user wants to simulate code as well as run it
395   // natively.
396 
397   if (length == 0) {
398     return;
399   }
400 
401   // The code below assumes user space cache operations are allowed.
402 
403   // Work out the line sizes for each cache, and use them to determine the
404   // start addresses.
405   uintptr_t start = reinterpret_cast<uintptr_t>(address);
406   uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_);
407   uintptr_t isize = static_cast<uintptr_t>(icache_line_size_);
408   uintptr_t dline = start & ~(dsize - 1);
409   uintptr_t iline = start & ~(isize - 1);
410 
411   // Cache line sizes are always a power of 2.
412   VIXL_ASSERT(IsPowerOf2(dsize));
413   VIXL_ASSERT(IsPowerOf2(isize));
414   uintptr_t end = start + length;
415 
416   do {
417     __asm__ __volatile__(
418         // Clean each line of the D cache containing the target data.
419         //
420         // dc       : Data Cache maintenance
421         //     c    : Clean
422         //      va  : by (Virtual) Address
423         //        u : to the point of Unification
424         // The point of unification for a processor is the point by which the
425         // instruction and data caches are guaranteed to see the same copy of a
426         // memory location. See ARM DDI 0406B page B2-12 for more information.
427         "   dc    cvau, %[dline]\n"
428         :
429         : [dline] "r"(dline)
430         // This code does not write to memory, but the "memory" dependency
431         // prevents GCC from reordering the code.
432         : "memory");
433     dline += dsize;
434   } while (dline < end);
435 
436   __asm__ __volatile__(
437       // Make sure that the data cache operations (above) complete before the
438       // instruction cache operations (below).
439       //
440       // dsb      : Data Synchronisation Barrier
441       //      ish : Inner SHareable domain
442       //
443       // The point of unification for an Inner Shareable shareability domain is
444       // the point by which the instruction and data caches of all the
445       // processors
446       // in that Inner Shareable shareability domain are guaranteed to see the
447       // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
448       // information.
449       "   dsb   ish\n"
450       :
451       :
452       : "memory");
453 
454   do {
455     __asm__ __volatile__(
456         // Invalidate each line of the I cache containing the target data.
457         //
458         // ic      : Instruction Cache maintenance
459         //    i    : Invalidate
460         //     va  : by Address
461         //       u : to the point of Unification
462         "   ic   ivau, %[iline]\n"
463         :
464         : [iline] "r"(iline)
465         : "memory");
466     iline += isize;
467   } while (iline < end);
468 
469   __asm__ __volatile__(
470       // Make sure that the instruction cache operations (above) take effect
471       // before the isb (below).
472       "   dsb  ish\n"
473 
474       // Ensure that any instructions already in the pipeline are discarded and
475       // reloaded from the new data.
476       // isb : Instruction Synchronisation Barrier
477       "   isb\n"
478       :
479       :
480       : "memory");
481 #else
482   // If the host isn't AArch64, we must be using the simulator, so this function
483   // doesn't have to do anything.
484   USE(address, length);
485 #endif
486 }
487 
488 }  // namespace aarch64
489 }  // namespace vixl
490