1 // Copyright 2018, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_CPU_FEATURES_H
28 #define VIXL_CPU_FEATURES_H
29 
30 #include <bitset>
31 #include <ostream>
32 
33 #include "globals-vixl.h"
34 
35 namespace vixl {
36 
37 
38 // VIXL aims to handle and detect all architectural features that are likely to
39 // influence code-generation decisions at EL0 (user-space).
40 //
41 // - There may be multiple VIXL feature flags for a given architectural
42 //   extension. This occurs where the extension allow components to be
43 //   implemented independently, or where kernel support is needed, and is likely
44 //   to be fragmented.
45 //
46 //   For example, Pointer Authentication (kPAuth*) has a separate feature flag
47 //   for access to PACGA, and to indicate that the QARMA algorithm is
48 //   implemented.
49 //
50 // - Conversely, some extensions have configuration options that do not affect
51 //   EL0, so these are presented as a single VIXL feature.
52 //
53 //   For example, the RAS extension (kRAS) has several variants, but the only
54 //   feature relevant to VIXL is the addition of the ESB instruction so we only
55 //   need a single flag.
56 //
57 // - VIXL offers separate flags for separate features even if they're
58 //   architecturally linked.
59 //
60 //   For example, the architecture requires kFPHalf and kNEONHalf to be equal,
61 //   but they have separate hardware ID register fields so VIXL presents them as
62 //   separate features.
63 //
64 // - VIXL can detect every feature for which it can generate code.
65 //
66 // - VIXL can detect some features for which it cannot generate code.
67 //
68 // The CPUFeatures::Feature enum — derived from the macro list below — is
69 // frequently extended. New features may be added to the list at any point, and
70 // no assumptions should be made about the numerical values assigned to each
71 // enum constant. The symbolic names can be considered to be stable.
72 //
73 // The debug descriptions are used only for debug output. The 'cpuinfo' strings
74 // are informative; VIXL does not use /proc/cpuinfo for feature detection.
75 
76 // clang-format off
77 #define VIXL_CPU_FEATURE_LIST(V)                                               \
78   /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_*  */ \
79   /* registers, so that the detailed feature registers can be read          */ \
80   /* directly.                                                              */ \
81                                                                                \
82   /* Constant name        Debug description         Linux 'cpuinfo' string. */ \
83   V(kIDRegisterEmulation, "ID register emulation",  "cpuid")                   \
84                                                                                \
85   V(kFP,                  "FP",                     "fp")                      \
86   V(kNEON,                "NEON",                   "asimd")                   \
87   V(kCRC32,               "CRC32",                  "crc32")                   \
88   V(kDGH,                 "DGH",                    "dgh")                     \
89   /* Speculation control features.                                          */ \
90   V(kCSV2,                "CSV2",                   NULL)                      \
91   V(kSCXTNUM,             "SCXTNUM",                NULL)                      \
92   V(kCSV3,                "CSV3",                   NULL)                      \
93   V(kSB,                  "SB",                     "sb")                      \
94   V(kSPECRES,             "SPECRES",                NULL)                      \
95   V(kSSBS,                "SSBS",                   NULL)                      \
96   V(kSSBSControl,         "SSBS (PSTATE control)",  "ssbs")                    \
97   /* Cryptographic support instructions.                                    */ \
98   V(kAES,                 "AES",                    "aes")                     \
99   V(kSHA1,                "SHA1",                   "sha1")                    \
100   V(kSHA2,                "SHA2",                   "sha2")                    \
101   /* A form of PMULL{2} with a 128-bit (1Q) result.                         */ \
102   V(kPmull1Q,             "Pmull1Q",                "pmull")                   \
103   /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc.              */ \
104   V(kAtomics,             "Atomics",                "atomics")                 \
105   /* Limited ordering regions: LDLAR, STLLR and their variants.             */ \
106   V(kLORegions,           "LORegions",              NULL)                      \
107   /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH.        */ \
108   V(kRDM,                 "RDM",                    "asimdrdm")                \
109   /* Scalable Vector Extension.                                             */ \
110   V(kSVE,                 "SVE",                    "sve")                     \
111   V(kSVEF64MM,            "SVE F64MM",              "svef64mm")                \
112   V(kSVEF32MM,            "SVE F32MM",              "svef32mm")                \
113   V(kSVEI8MM,             "SVE I8MM",               "svei8imm")                \
114   V(kSVEBF16,             "SVE BFloat16",           "svebf16")                 \
115   /* SDOT and UDOT support (in NEON).                                       */ \
116   V(kDotProduct,          "DotProduct",             "asimddp")                 \
117   /* Int8 matrix multiplication (in NEON).                                  */ \
118   V(kI8MM,                "NEON I8MM",              "i8mm")                    \
119   /* Half-precision (FP16) support for FP and NEON, respectively.           */ \
120   V(kFPHalf,              "FPHalf",                 "fphp")                    \
121   V(kNEONHalf,            "NEONHalf",               "asimdhp")                 \
122   /* BFloat16 support (in both FP and NEON.)                                */ \
123   V(kBF16,                "FP/NEON BFloat 16",      "bf16")                    \
124   /* The RAS extension, including the ESB instruction.                      */ \
125   V(kRAS,                 "RAS",                    NULL)                      \
126   /* Data cache clean to the point of persistence: DC CVAP.                 */ \
127   V(kDCPoP,               "DCPoP",                  "dcpop")                   \
128   /* Data cache clean to the point of deep persistence: DC CVADP.           */ \
129   V(kDCCVADP,             "DCCVADP",                "dcpodp")                  \
130   /* Cryptographic support instructions.                                    */ \
131   V(kSHA3,                "SHA3",                   "sha3")                    \
132   V(kSHA512,              "SHA512",                 "sha512")                  \
133   V(kSM3,                 "SM3",                    "sm3")                     \
134   V(kSM4,                 "SM4",                    "sm4")                     \
135   /* Pointer authentication for addresses.                                  */ \
136   V(kPAuth,               "PAuth",                  "paca")                    \
137   /* Pointer authentication for addresses uses QARMA.                       */ \
138   V(kPAuthQARMA,          "PAuthQARMA",             NULL)                      \
139   /* Generic authentication (using the PACGA instruction).                  */ \
140   V(kPAuthGeneric,        "PAuthGeneric",           "pacg")                    \
141   /* Generic authentication uses QARMA.                                     */ \
142   V(kPAuthGenericQARMA,   "PAuthGenericQARMA",      NULL)                      \
143   /* JavaScript-style FP -> integer conversion instruction: FJCVTZS.        */ \
144   V(kJSCVT,               "JSCVT",                  "jscvt")                   \
145   /* Complex number support for NEON: FCMLA and FCADD.                      */ \
146   V(kFcma,                "Fcma",                   "fcma")                    \
147   /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
148   V(kRCpc,                "RCpc",                   "lrcpc")                   \
149   V(kRCpcImm,             "RCpc (imm)",             "ilrcpc")                  \
150   /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF.               */ \
151   V(kFlagM,               "FlagM",                  "flagm")                   \
152   /* Unaligned single-copy atomicity.                                       */ \
153   V(kUSCAT,               "USCAT",                  "uscat")                   \
154   /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}.         */ \
155   V(kFHM,                 "FHM",                    "asimdfhm")                \
156   /* Data-independent timing (for selected instructions).                   */ \
157   V(kDIT,                 "DIT",                    "dit")                     \
158   /* Branch target identification.                                          */ \
159   V(kBTI,                 "BTI",                    "bti")                     \
160   /* Flag manipulation instructions: {AX,XA}FLAG                            */ \
161   V(kAXFlag,              "AXFlag",                 "flagm2")                  \
162   /* Random number generation extension,                                    */ \
163   V(kRNG,                 "RNG",                    "rng")                     \
164   /* Floating-point round to {32,64}-bit integer.                           */ \
165   V(kFrintToFixedSizedInt,"Frint (bounded)",        "frint")                   \
166   /* Memory Tagging Extension.                                              */ \
167   V(kMTEInstructions,     "MTE (EL0 instructions)", NULL)                      \
168   V(kMTE,                 "MTE",                    NULL)                      \
169   /* PAuth extensions.                                                      */ \
170   V(kPAuthEnhancedPAC,    "PAuth EnhancedPAC",      NULL)                      \
171   V(kPAuthEnhancedPAC2,   "PAuth EnhancedPAC2",     NULL)                      \
172   V(kPAuthFPAC,           "PAuth FPAC",             NULL)                      \
173   V(kPAuthFPACCombined,   "PAuth FPACCombined",     NULL)
174 // clang-format on
175 
176 
177 class CPUFeaturesConstIterator;
178 
179 // A representation of the set of features known to be supported by the target
180 // device. Each feature is represented by a simple boolean flag.
181 //
182 //   - When the Assembler is asked to assemble an instruction, it asserts (in
183 //     debug mode) that the necessary features are available.
184 //
185 //   - TODO: The MacroAssembler relies on the Assembler's assertions, but in
186 //     some cases it may be useful for macros to generate a fall-back sequence
187 //     in case features are not available.
188 //
189 //   - The Simulator assumes by default that all features are available, but it
190 //     is possible to configure it to fail if the simulated code uses features
191 //     that are not enabled.
192 //
193 //     The Simulator also offers pseudo-instructions to allow features to be
194 //     enabled and disabled dynamically. This is useful when you want to ensure
195 //     that some features are constrained to certain areas of code.
196 //
197 //   - The base Disassembler knows nothing about CPU features, but the
198 //     PrintDisassembler can be configured to annotate its output with warnings
199 //     about unavailable features. The Simulator uses this feature when
200 //     instruction trace is enabled.
201 //
202 //   - The Decoder-based components -- the Simulator and PrintDisassembler --
203 //     rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of
204 //     features actually encountered so that a large block of code can be
205 //     examined (either directly or through simulation), and the required
206 //     features analysed later.
207 //
208 // Expected usage:
209 //
210 //     // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for
211 //     // compatibility with older version of VIXL.
212 //     MacroAssembler masm;
213 //
214 //     // Generate code only for the current CPU.
215 //     masm.SetCPUFeatures(CPUFeatures::InferFromOS());
216 //
217 //     // Turn off feature checking entirely.
218 //     masm.SetCPUFeatures(CPUFeatures::All());
219 //
220 // Feature set manipulation:
221 //
222 //     CPUFeatures f;  // The default constructor gives an empty set.
223 //     // Individual features can be added (or removed).
224 //     f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES);
225 //     f.Remove(CPUFeatures::kNEON);
226 //
227 //     // Some helpers exist for extensions that provide several features.
228 //     f.Remove(CPUFeatures::All());
229 //     f.Combine(CPUFeatures::AArch64LegacyBaseline());
230 //
231 //     // Chained construction is also possible.
232 //     CPUFeatures g =
233 //         f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32);
234 //
235 //     // Features can be queried. Where multiple features are given, they are
236 //     // combined with logical AND.
237 //     if (h.Has(CPUFeatures::kNEON)) { ... }
238 //     if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... }
239 //     if (h.Has(g)) { ... }
240 //     // If the empty set is requested, the result is always 'true'.
241 //     VIXL_ASSERT(h.Has(CPUFeatures()));
242 //
243 //     // For debug and reporting purposes, features can be enumerated (or
244 //     // printed directly):
245 //     std::cout << CPUFeatures::kNEON;  // Prints something like "NEON".
246 //     std::cout << f;  // Prints something like "FP, NEON, CRC32".
247 class CPUFeatures {
248  public:
249   // clang-format off
250   // Individual features.
251   // These should be treated as opaque tokens. User code should not rely on
252   // specific numeric values or ordering.
253   enum Feature {
254     // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that
255     // this class supports.
256 
257     kNone = -1,
258 #define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL,
259     VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE)
260 #undef VIXL_DECLARE_FEATURE
261     kNumberOfFeatures
262   };
263   // clang-format on
264 
265   // By default, construct with no features enabled.
CPUFeatures()266   CPUFeatures() : features_{} {}
267 
268   // Construct with some features already enabled.
269   template <typename T, typename... U>
CPUFeatures(T first,U...others)270   CPUFeatures(T first, U... others) : features_{} {
271     Combine(first, others...);
272   }
273 
274   // Construct with all features enabled. This can be used to disable feature
275   // checking: `Has(...)` returns true regardless of the argument.
276   static CPUFeatures All();
277 
278   // Construct an empty CPUFeatures. This is equivalent to the default
279   // constructor, but is provided for symmetry and convenience.
None()280   static CPUFeatures None() { return CPUFeatures(); }
281 
282   // The presence of these features was assumed by version of VIXL before this
283   // API was added, so using this set by default ensures API compatibility.
AArch64LegacyBaseline()284   static CPUFeatures AArch64LegacyBaseline() {
285     return CPUFeatures(kFP, kNEON, kCRC32);
286   }
287 
288   // Construct a new CPUFeatures object using ID registers. This assumes that
289   // kIDRegisterEmulation is present.
290   static CPUFeatures InferFromIDRegisters();
291 
292   enum QueryIDRegistersOption {
293     kDontQueryIDRegisters,
294     kQueryIDRegistersIfAvailable
295   };
296 
297   // Construct a new CPUFeatures object based on what the OS reports.
298   static CPUFeatures InferFromOS(
299       QueryIDRegistersOption option = kQueryIDRegistersIfAvailable);
300 
301   // Combine another CPUFeatures object into this one. Features that already
302   // exist in this set are left unchanged.
303   void Combine(const CPUFeatures& other);
304 
305   // Combine a specific feature into this set. If it already exists in the set,
306   // the set is left unchanged.
307   void Combine(Feature feature);
308 
309   // Combine multiple features (or feature sets) into this set.
310   template <typename T, typename... U>
Combine(T first,U...others)311   void Combine(T first, U... others) {
312     Combine(first);
313     Combine(others...);
314   }
315 
316   // Remove features in another CPUFeatures object from this one.
317   void Remove(const CPUFeatures& other);
318 
319   // Remove a specific feature from this set. This has no effect if the feature
320   // doesn't exist in the set.
321   void Remove(Feature feature0);
322 
323   // Remove multiple features (or feature sets) from this set.
324   template <typename T, typename... U>
Remove(T first,U...others)325   void Remove(T first, U... others) {
326     Remove(first);
327     Remove(others...);
328   }
329 
330   // Chaining helpers for convenient construction by combining other CPUFeatures
331   // or individual Features.
332   template <typename... T>
With(T...others)333   CPUFeatures With(T... others) const {
334     CPUFeatures f(*this);
335     f.Combine(others...);
336     return f;
337   }
338 
339   template <typename... T>
Without(T...others)340   CPUFeatures Without(T... others) const {
341     CPUFeatures f(*this);
342     f.Remove(others...);
343     return f;
344   }
345 
346   // Test whether the `other` feature set is equal to or a subset of this one.
347   bool Has(const CPUFeatures& other) const;
348 
349   // Test whether a single feature exists in this set.
350   // Note that `Has(kNone)` always returns true.
351   bool Has(Feature feature) const;
352 
353   // Test whether all of the specified features exist in this set.
354   template <typename T, typename... U>
Has(T first,U...others)355   bool Has(T first, U... others) const {
356     return Has(first) && Has(others...);
357   }
358 
359   // Return the number of enabled features.
360   size_t Count() const;
HasNoFeatures()361   bool HasNoFeatures() const { return Count() == 0; }
362 
363   // Check for equivalence.
364   bool operator==(const CPUFeatures& other) const {
365     return Has(other) && other.Has(*this);
366   }
367   bool operator!=(const CPUFeatures& other) const { return !(*this == other); }
368 
369   typedef CPUFeaturesConstIterator const_iterator;
370 
371   const_iterator begin() const;
372   const_iterator end() const;
373 
374  private:
375   // Each bit represents a feature. This set will be extended as needed.
376   std::bitset<kNumberOfFeatures> features_;
377 
378   friend std::ostream& operator<<(std::ostream& os,
379                                   const vixl::CPUFeatures& features);
380 };
381 
382 std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature);
383 std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features);
384 
385 // This is not a proper C++ iterator type, but it simulates enough of
386 // ForwardIterator that simple loops can be written.
387 class CPUFeaturesConstIterator {
388  public:
389   CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL,
390                            CPUFeatures::Feature start = CPUFeatures::kNone)
cpu_features_(cpu_features)391       : cpu_features_(cpu_features), feature_(start) {
392     VIXL_ASSERT(IsValid());
393   }
394 
395   bool operator==(const CPUFeaturesConstIterator& other) const;
396   bool operator!=(const CPUFeaturesConstIterator& other) const {
397     return !(*this == other);
398   }
399   CPUFeaturesConstIterator& operator++();
400   CPUFeaturesConstIterator operator++(int);
401 
402   CPUFeatures::Feature operator*() const {
403     VIXL_ASSERT(IsValid());
404     return feature_;
405   }
406 
407   // For proper support of C++'s simplest "Iterator" concept, this class would
408   // have to define member types (such as CPUFeaturesIterator::pointer) to make
409   // it appear as if it iterates over Feature objects in memory. That is, we'd
410   // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator.
411   // This is at least partially possible -- the std::vector<bool> specialisation
412   // does something similar -- but it doesn't seem worthwhile for a
413   // special-purpose debug helper, so they are omitted here.
414  private:
415   const CPUFeatures* cpu_features_;
416   CPUFeatures::Feature feature_;
417 
IsValid()418   bool IsValid() const {
419     if (cpu_features_ == NULL) {
420       return feature_ == CPUFeatures::kNone;
421     }
422     return cpu_features_->Has(feature_);
423   }
424 };
425 
426 // A convenience scope for temporarily modifying a CPU features object. This
427 // allows features to be enabled for short sequences.
428 //
429 // Expected usage:
430 //
431 //  {
432 //    CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32);
433 //    // This scope can now use CRC32, as well as anything else that was enabled
434 //    // before the scope.
435 //
436 //    ...
437 //
438 //    // At the end of the scope, the original CPU features are restored.
439 //  }
440 class CPUFeaturesScope {
441  public:
442   // Start a CPUFeaturesScope on any object that implements
443   // `CPUFeatures* GetCPUFeatures()`.
444   template <typename T>
CPUFeaturesScope(T * cpu_features_wrapper)445   explicit CPUFeaturesScope(T* cpu_features_wrapper)
446       : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
447         old_features_(*cpu_features_) {}
448 
449   // Start a CPUFeaturesScope on any object that implements
450   // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled.
451   template <typename T, typename U, typename... V>
CPUFeaturesScope(T * cpu_features_wrapper,U first,V...features)452   CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features)
453       : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
454         old_features_(*cpu_features_) {
455     cpu_features_->Combine(first, features...);
456   }
457 
~CPUFeaturesScope()458   ~CPUFeaturesScope() { *cpu_features_ = old_features_; }
459 
460   // For advanced usage, the CPUFeatures object can be accessed directly.
461   // The scope will restore the original state when it ends.
462 
GetCPUFeatures()463   CPUFeatures* GetCPUFeatures() const { return cpu_features_; }
464 
SetCPUFeatures(const CPUFeatures & cpu_features)465   void SetCPUFeatures(const CPUFeatures& cpu_features) {
466     *cpu_features_ = cpu_features;
467   }
468 
469  private:
470   CPUFeatures* const cpu_features_;
471   const CPUFeatures old_features_;
472 };
473 
474 
475 }  // namespace vixl
476 
477 #endif  // VIXL_CPU_FEATURES_H
478