1 //===-- cpu_model.c - Support for __cpu_model builtin  ------------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file is based on LLVM's lib/Support/Host.cpp.
10 //  It implements the operating system Host concept and builtin
11 //  __cpu_model for the compiler_rt library for x86 and
12 //  __aarch64_have_lse_atomics for AArch64.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #if defined(HAVE_INIT_PRIORITY)
17 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
18 #elif __has_attribute(__constructor__)
19 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
20 #else
21 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
22 // this runs during initialization.
23 #define CONSTRUCTOR_ATTRIBUTE
24 #endif
25 
26 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||           \
27      defined(_M_X64)) &&                                                       \
28     (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
29 
30 #include <assert.h>
31 
32 #define bool int
33 #define true 1
34 #define false 0
35 
36 #ifdef _MSC_VER
37 #include <intrin.h>
38 #endif
39 
40 #ifndef __has_attribute
41 #define __has_attribute(attr) 0
42 #endif
43 
44 enum VendorSignatures {
45   SIG_INTEL = 0x756e6547, // Genu
46   SIG_AMD = 0x68747541,   // Auth
47 };
48 
49 enum ProcessorVendors {
50   VENDOR_INTEL = 1,
51   VENDOR_AMD,
52   VENDOR_OTHER,
53   VENDOR_MAX
54 };
55 
56 enum ProcessorTypes {
57   INTEL_BONNELL = 1,
58   INTEL_CORE2,
59   INTEL_COREI7,
60   AMDFAM10H,
61   AMDFAM15H,
62   INTEL_SILVERMONT,
63   INTEL_KNL,
64   AMD_BTVER1,
65   AMD_BTVER2,
66   AMDFAM17H,
67   INTEL_KNM,
68   INTEL_GOLDMONT,
69   INTEL_GOLDMONT_PLUS,
70   INTEL_TREMONT,
71   AMDFAM19H,
72   CPU_TYPE_MAX
73 };
74 
75 enum ProcessorSubtypes {
76   INTEL_COREI7_NEHALEM = 1,
77   INTEL_COREI7_WESTMERE,
78   INTEL_COREI7_SANDYBRIDGE,
79   AMDFAM10H_BARCELONA,
80   AMDFAM10H_SHANGHAI,
81   AMDFAM10H_ISTANBUL,
82   AMDFAM15H_BDVER1,
83   AMDFAM15H_BDVER2,
84   AMDFAM15H_BDVER3,
85   AMDFAM15H_BDVER4,
86   AMDFAM17H_ZNVER1,
87   INTEL_COREI7_IVYBRIDGE,
88   INTEL_COREI7_HASWELL,
89   INTEL_COREI7_BROADWELL,
90   INTEL_COREI7_SKYLAKE,
91   INTEL_COREI7_SKYLAKE_AVX512,
92   INTEL_COREI7_CANNONLAKE,
93   INTEL_COREI7_ICELAKE_CLIENT,
94   INTEL_COREI7_ICELAKE_SERVER,
95   AMDFAM17H_ZNVER2,
96   INTEL_COREI7_CASCADELAKE,
97   INTEL_COREI7_TIGERLAKE,
98   INTEL_COREI7_COOPERLAKE,
99   INTEL_COREI7_SAPPHIRERAPIDS,
100   INTEL_COREI7_ALDERLAKE,
101   AMDFAM19H_ZNVER3,
102   CPU_SUBTYPE_MAX
103 };
104 
105 enum ProcessorFeatures {
106   FEATURE_CMOV = 0,
107   FEATURE_MMX,
108   FEATURE_POPCNT,
109   FEATURE_SSE,
110   FEATURE_SSE2,
111   FEATURE_SSE3,
112   FEATURE_SSSE3,
113   FEATURE_SSE4_1,
114   FEATURE_SSE4_2,
115   FEATURE_AVX,
116   FEATURE_AVX2,
117   FEATURE_SSE4_A,
118   FEATURE_FMA4,
119   FEATURE_XOP,
120   FEATURE_FMA,
121   FEATURE_AVX512F,
122   FEATURE_BMI,
123   FEATURE_BMI2,
124   FEATURE_AES,
125   FEATURE_PCLMUL,
126   FEATURE_AVX512VL,
127   FEATURE_AVX512BW,
128   FEATURE_AVX512DQ,
129   FEATURE_AVX512CD,
130   FEATURE_AVX512ER,
131   FEATURE_AVX512PF,
132   FEATURE_AVX512VBMI,
133   FEATURE_AVX512IFMA,
134   FEATURE_AVX5124VNNIW,
135   FEATURE_AVX5124FMAPS,
136   FEATURE_AVX512VPOPCNTDQ,
137   FEATURE_AVX512VBMI2,
138   FEATURE_GFNI,
139   FEATURE_VPCLMULQDQ,
140   FEATURE_AVX512VNNI,
141   FEATURE_AVX512BITALG,
142   FEATURE_AVX512BF16,
143   FEATURE_AVX512VP2INTERSECT,
144   CPU_FEATURE_MAX
145 };
146 
147 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
148 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
149 // support. Consequently, for i386, the presence of CPUID is checked first
150 // via the corresponding eflags bit.
isCpuIdSupported()151 static bool isCpuIdSupported() {
152 #if defined(__GNUC__) || defined(__clang__)
153 #if defined(__i386__)
154   int __cpuid_supported;
155   __asm__("  pushfl\n"
156           "  popl   %%eax\n"
157           "  movl   %%eax,%%ecx\n"
158           "  xorl   $0x00200000,%%eax\n"
159           "  pushl  %%eax\n"
160           "  popfl\n"
161           "  pushfl\n"
162           "  popl   %%eax\n"
163           "  movl   $0,%0\n"
164           "  cmpl   %%eax,%%ecx\n"
165           "  je     1f\n"
166           "  movl   $1,%0\n"
167           "1:"
168           : "=r"(__cpuid_supported)
169           :
170           : "eax", "ecx");
171   if (!__cpuid_supported)
172     return false;
173 #endif
174   return true;
175 #endif
176   return true;
177 }
178 
179 // This code is copied from lib/Support/Host.cpp.
180 // Changes to either file should be mirrored in the other.
181 
182 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
183 /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)184 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
185                                unsigned *rECX, unsigned *rEDX) {
186 #if defined(__GNUC__) || defined(__clang__)
187 #if defined(__x86_64__)
188   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
189   // FIXME: should we save this for Clang?
190   __asm__("movq\t%%rbx, %%rsi\n\t"
191           "cpuid\n\t"
192           "xchgq\t%%rbx, %%rsi\n\t"
193           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
194           : "a"(value));
195   return false;
196 #elif defined(__i386__)
197   __asm__("movl\t%%ebx, %%esi\n\t"
198           "cpuid\n\t"
199           "xchgl\t%%ebx, %%esi\n\t"
200           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
201           : "a"(value));
202   return false;
203 #else
204   return true;
205 #endif
206 #elif defined(_MSC_VER)
207   // The MSVC intrinsic is portable across x86 and x64.
208   int registers[4];
209   __cpuid(registers, value);
210   *rEAX = registers[0];
211   *rEBX = registers[1];
212   *rECX = registers[2];
213   *rEDX = registers[3];
214   return false;
215 #else
216   return true;
217 #endif
218 }
219 
220 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
221 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
222 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)223 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
224                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
225                                  unsigned *rEDX) {
226 #if defined(__GNUC__) || defined(__clang__)
227 #if defined(__x86_64__)
228   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
229   // FIXME: should we save this for Clang?
230   __asm__("movq\t%%rbx, %%rsi\n\t"
231           "cpuid\n\t"
232           "xchgq\t%%rbx, %%rsi\n\t"
233           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
234           : "a"(value), "c"(subleaf));
235   return false;
236 #elif defined(__i386__)
237   __asm__("movl\t%%ebx, %%esi\n\t"
238           "cpuid\n\t"
239           "xchgl\t%%ebx, %%esi\n\t"
240           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
241           : "a"(value), "c"(subleaf));
242   return false;
243 #else
244   return true;
245 #endif
246 #elif defined(_MSC_VER)
247   int registers[4];
248   __cpuidex(registers, value, subleaf);
249   *rEAX = registers[0];
250   *rEBX = registers[1];
251   *rECX = registers[2];
252   *rEDX = registers[3];
253   return false;
254 #else
255   return true;
256 #endif
257 }
258 
259 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)260 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
261 #if defined(__GNUC__) || defined(__clang__)
262   // Check xgetbv; this uses a .byte sequence instead of the instruction
263   // directly because older assemblers do not include support for xgetbv and
264   // there is no easy way to conditionally compile based on the assembler used.
265   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
266   return false;
267 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
268   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
269   *rEAX = Result;
270   *rEDX = Result >> 32;
271   return false;
272 #else
273   return true;
274 #endif
275 }
276 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)277 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
278                                  unsigned *Model) {
279   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
280   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
281   if (*Family == 6 || *Family == 0xf) {
282     if (*Family == 0xf)
283       // Examine extended family ID if family ID is F.
284       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
285     // Examine extended model ID if family ID is 6 or F.
286     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
287   }
288 }
289 
290 static const char *
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)291 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
292                                 const unsigned *Features,
293                                 unsigned *Type, unsigned *Subtype) {
294 #define testFeature(F)                                                         \
295   (Features[F / 32] & (1 << (F % 32))) != 0
296 
297   // We select CPU strings to match the code in Host.cpp, but we don't use them
298   // in compiler-rt.
299   const char *CPU = 0;
300 
301   switch (Family) {
302   case 6:
303     switch (Model) {
304     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
305                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
306                // mobile processor, Intel Core 2 Extreme processor, Intel
307                // Pentium Dual-Core processor, Intel Xeon processor, model
308                // 0Fh. All processors are manufactured using the 65 nm process.
309     case 0x16: // Intel Celeron processor model 16h. All processors are
310                // manufactured using the 65 nm process
311       CPU = "core2";
312       *Type = INTEL_CORE2;
313       break;
314     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
315                // 17h. All processors are manufactured using the 45 nm process.
316                //
317                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
318     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
319                // the 45 nm process.
320       CPU = "penryn";
321       *Type = INTEL_CORE2;
322       break;
323     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
324                // processors are manufactured using the 45 nm process.
325     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
326                // As found in a Summer 2010 model iMac.
327     case 0x1f:
328     case 0x2e:              // Nehalem EX
329       CPU = "nehalem";
330       *Type = INTEL_COREI7;
331       *Subtype = INTEL_COREI7_NEHALEM;
332       break;
333     case 0x25: // Intel Core i7, laptop version.
334     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
335                // processors are manufactured using the 32 nm process.
336     case 0x2f: // Westmere EX
337       CPU = "westmere";
338       *Type = INTEL_COREI7;
339       *Subtype = INTEL_COREI7_WESTMERE;
340       break;
341     case 0x2a: // Intel Core i7 processor. All processors are manufactured
342                // using the 32 nm process.
343     case 0x2d:
344       CPU = "sandybridge";
345       *Type = INTEL_COREI7;
346       *Subtype = INTEL_COREI7_SANDYBRIDGE;
347       break;
348     case 0x3a:
349     case 0x3e:              // Ivy Bridge EP
350       CPU = "ivybridge";
351       *Type = INTEL_COREI7;
352       *Subtype = INTEL_COREI7_IVYBRIDGE;
353       break;
354 
355     // Haswell:
356     case 0x3c:
357     case 0x3f:
358     case 0x45:
359     case 0x46:
360       CPU = "haswell";
361       *Type = INTEL_COREI7;
362       *Subtype = INTEL_COREI7_HASWELL;
363       break;
364 
365     // Broadwell:
366     case 0x3d:
367     case 0x47:
368     case 0x4f:
369     case 0x56:
370       CPU = "broadwell";
371       *Type = INTEL_COREI7;
372       *Subtype = INTEL_COREI7_BROADWELL;
373       break;
374 
375     // Skylake:
376     case 0x4e:              // Skylake mobile
377     case 0x5e:              // Skylake desktop
378     case 0x8e:              // Kaby Lake mobile
379     case 0x9e:              // Kaby Lake desktop
380     case 0xa5:              // Comet Lake-H/S
381     case 0xa6:              // Comet Lake-U
382       CPU = "skylake";
383       *Type = INTEL_COREI7;
384       *Subtype = INTEL_COREI7_SKYLAKE;
385       break;
386 
387     // Skylake Xeon:
388     case 0x55:
389       *Type = INTEL_COREI7;
390       if (testFeature(FEATURE_AVX512BF16)) {
391         CPU = "cooperlake";
392         *Subtype = INTEL_COREI7_COOPERLAKE;
393       } else if (testFeature(FEATURE_AVX512VNNI)) {
394         CPU = "cascadelake";
395         *Subtype = INTEL_COREI7_CASCADELAKE;
396       } else {
397         CPU = "skylake-avx512";
398         *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
399       }
400       break;
401 
402     // Cannonlake:
403     case 0x66:
404       CPU = "cannonlake";
405       *Type = INTEL_COREI7;
406       *Subtype = INTEL_COREI7_CANNONLAKE;
407       break;
408 
409     // Icelake:
410     case 0x7d:
411     case 0x7e:
412       CPU = "icelake-client";
413       *Type = INTEL_COREI7;
414       *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
415       break;
416 
417     // Icelake Xeon:
418     case 0x6a:
419     case 0x6c:
420       CPU = "icelake-server";
421       *Type = INTEL_COREI7;
422       *Subtype = INTEL_COREI7_ICELAKE_SERVER;
423       break;
424 
425     // Sapphire Rapids:
426     case 0x8f:
427       CPU = "sapphirerapids";
428       *Type = INTEL_COREI7;
429       *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
430       break;
431 
432     case 0x1c: // Most 45 nm Intel Atom processors
433     case 0x26: // 45 nm Atom Lincroft
434     case 0x27: // 32 nm Atom Medfield
435     case 0x35: // 32 nm Atom Midview
436     case 0x36: // 32 nm Atom Midview
437       CPU = "bonnell";
438       *Type = INTEL_BONNELL;
439       break;
440 
441     // Atom Silvermont codes from the Intel software optimization guide.
442     case 0x37:
443     case 0x4a:
444     case 0x4d:
445     case 0x5a:
446     case 0x5d:
447     case 0x4c: // really airmont
448       CPU = "silvermont";
449       *Type = INTEL_SILVERMONT;
450       break;
451     // Goldmont:
452     case 0x5c: // Apollo Lake
453     case 0x5f: // Denverton
454       CPU = "goldmont";
455       *Type = INTEL_GOLDMONT;
456       break; // "goldmont"
457     case 0x7a:
458       CPU = "goldmont-plus";
459       *Type = INTEL_GOLDMONT_PLUS;
460       break;
461     case 0x86:
462       CPU = "tremont";
463       *Type = INTEL_TREMONT;
464       break;
465 
466     case 0x57:
467       CPU = "knl";
468       *Type = INTEL_KNL;
469       break;
470 
471     case 0x85:
472       CPU = "knm";
473       *Type = INTEL_KNM;
474       break;
475 
476     default: // Unknown family 6 CPU.
477       break;
478     }
479     break;
480   default:
481     break; // Unknown.
482   }
483 
484   return CPU;
485 }
486 
487 static const char *
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)488 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
489                               const unsigned *Features,
490                               unsigned *Type, unsigned *Subtype) {
491   // We select CPU strings to match the code in Host.cpp, but we don't use them
492   // in compiler-rt.
493   const char *CPU = 0;
494 
495   switch (Family) {
496   case 16:
497     CPU = "amdfam10";
498     *Type = AMDFAM10H;
499     switch (Model) {
500     case 2:
501       *Subtype = AMDFAM10H_BARCELONA;
502       break;
503     case 4:
504       *Subtype = AMDFAM10H_SHANGHAI;
505       break;
506     case 8:
507       *Subtype = AMDFAM10H_ISTANBUL;
508       break;
509     }
510     break;
511   case 20:
512     CPU = "btver1";
513     *Type = AMD_BTVER1;
514     break;
515   case 21:
516     CPU = "bdver1";
517     *Type = AMDFAM15H;
518     if (Model >= 0x60 && Model <= 0x7f) {
519       CPU = "bdver4";
520       *Subtype = AMDFAM15H_BDVER4;
521       break; // 60h-7Fh: Excavator
522     }
523     if (Model >= 0x30 && Model <= 0x3f) {
524       CPU = "bdver3";
525       *Subtype = AMDFAM15H_BDVER3;
526       break; // 30h-3Fh: Steamroller
527     }
528     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
529       CPU = "bdver2";
530       *Subtype = AMDFAM15H_BDVER2;
531       break; // 02h, 10h-1Fh: Piledriver
532     }
533     if (Model <= 0x0f) {
534       *Subtype = AMDFAM15H_BDVER1;
535       break; // 00h-0Fh: Bulldozer
536     }
537     break;
538   case 22:
539     CPU = "btver2";
540     *Type = AMD_BTVER2;
541     break;
542   case 23:
543     CPU = "znver1";
544     *Type = AMDFAM17H;
545     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
546       CPU = "znver2";
547       *Subtype = AMDFAM17H_ZNVER2;
548       break; // 30h-3fh, 71h: Zen2
549     }
550     if (Model <= 0x0f) {
551       *Subtype = AMDFAM17H_ZNVER1;
552       break; // 00h-0Fh: Zen1
553     }
554     break;
555   case 25:
556     CPU = "znver3";
557     *Type = AMDFAM19H;
558     if (Model <= 0x0f) {
559       *Subtype = AMDFAM19H_ZNVER3;
560       break; // 00h-0Fh: Zen3
561     }
562     break;
563   default:
564     break; // Unknown AMD CPU.
565   }
566 
567   return CPU;
568 }
569 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)570 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
571                                  unsigned *Features) {
572   unsigned EAX, EBX;
573 
574 #define setFeature(F)                                                          \
575   Features[F / 32] |= 1U << (F % 32)
576 
577   if ((EDX >> 15) & 1)
578     setFeature(FEATURE_CMOV);
579   if ((EDX >> 23) & 1)
580     setFeature(FEATURE_MMX);
581   if ((EDX >> 25) & 1)
582     setFeature(FEATURE_SSE);
583   if ((EDX >> 26) & 1)
584     setFeature(FEATURE_SSE2);
585 
586   if ((ECX >> 0) & 1)
587     setFeature(FEATURE_SSE3);
588   if ((ECX >> 1) & 1)
589     setFeature(FEATURE_PCLMUL);
590   if ((ECX >> 9) & 1)
591     setFeature(FEATURE_SSSE3);
592   if ((ECX >> 12) & 1)
593     setFeature(FEATURE_FMA);
594   if ((ECX >> 19) & 1)
595     setFeature(FEATURE_SSE4_1);
596   if ((ECX >> 20) & 1)
597     setFeature(FEATURE_SSE4_2);
598   if ((ECX >> 23) & 1)
599     setFeature(FEATURE_POPCNT);
600   if ((ECX >> 25) & 1)
601     setFeature(FEATURE_AES);
602 
603   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
604   // indicates that the AVX registers will be saved and restored on context
605   // switch, then we have full AVX support.
606   const unsigned AVXBits = (1 << 27) | (1 << 28);
607   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
608                 ((EAX & 0x6) == 0x6);
609 #if defined(__APPLE__)
610   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
611   // save the AVX512 context if we use AVX512 instructions, even the bit is not
612   // set right now.
613   bool HasAVX512Save = true;
614 #else
615   // AVX512 requires additional context to be saved by the OS.
616   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
617 #endif
618 
619   if (HasAVX)
620     setFeature(FEATURE_AVX);
621 
622   bool HasLeaf7 =
623       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
624 
625   if (HasLeaf7 && ((EBX >> 3) & 1))
626     setFeature(FEATURE_BMI);
627   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
628     setFeature(FEATURE_AVX2);
629   if (HasLeaf7 && ((EBX >> 8) & 1))
630     setFeature(FEATURE_BMI2);
631   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
632     setFeature(FEATURE_AVX512F);
633   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
634     setFeature(FEATURE_AVX512DQ);
635   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
636     setFeature(FEATURE_AVX512IFMA);
637   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
638     setFeature(FEATURE_AVX512PF);
639   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
640     setFeature(FEATURE_AVX512ER);
641   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
642     setFeature(FEATURE_AVX512CD);
643   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
644     setFeature(FEATURE_AVX512BW);
645   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
646     setFeature(FEATURE_AVX512VL);
647 
648   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
649     setFeature(FEATURE_AVX512VBMI);
650   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
651     setFeature(FEATURE_AVX512VBMI2);
652   if (HasLeaf7 && ((ECX >> 8) & 1))
653     setFeature(FEATURE_GFNI);
654   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
655     setFeature(FEATURE_VPCLMULQDQ);
656   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
657     setFeature(FEATURE_AVX512VNNI);
658   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
659     setFeature(FEATURE_AVX512BITALG);
660   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
661     setFeature(FEATURE_AVX512VPOPCNTDQ);
662 
663   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
664     setFeature(FEATURE_AVX5124VNNIW);
665   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
666     setFeature(FEATURE_AVX5124FMAPS);
667   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
668     setFeature(FEATURE_AVX512VP2INTERSECT);
669 
670   bool HasLeaf7Subleaf1 =
671       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
672   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
673     setFeature(FEATURE_AVX512BF16);
674 
675   unsigned MaxExtLevel;
676   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
677 
678   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
679                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
680   if (HasExtLeaf1 && ((ECX >> 6) & 1))
681     setFeature(FEATURE_SSE4_A);
682   if (HasExtLeaf1 && ((ECX >> 11) & 1))
683     setFeature(FEATURE_XOP);
684   if (HasExtLeaf1 && ((ECX >> 16) & 1))
685     setFeature(FEATURE_FMA4);
686 #undef setFeature
687 }
688 
689 #ifndef _WIN32
690 __attribute__((visibility("hidden")))
691 #endif
692 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
693 
694 #ifndef _WIN32
695 __attribute__((visibility("hidden")))
696 #endif
697 struct __processor_model {
698   unsigned int __cpu_vendor;
699   unsigned int __cpu_type;
700   unsigned int __cpu_subtype;
701   unsigned int __cpu_features[1];
702 } __cpu_model = {0, 0, 0, {0}};
703 
704 #ifndef _WIN32
705 __attribute__((visibility("hidden")))
706 #endif
707 unsigned int __cpu_features2 = 0;
708 
709 // A constructor function that is sets __cpu_model and __cpu_features2 with
710 // the right values.  This needs to run only once.  This constructor is
711 // given the highest priority and it should run before constructors without
712 // the priority set.  However, it still runs after ifunc initializers and
713 // needs to be called explicitly there.
714 
__cpu_indicator_init(void)715 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
716   unsigned EAX, EBX, ECX, EDX;
717   unsigned MaxLeaf = 5;
718   unsigned Vendor;
719   unsigned Model, Family;
720   unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
721 
722   // This function needs to run just once.
723   if (__cpu_model.__cpu_vendor)
724     return 0;
725 
726   if (!isCpuIdSupported() ||
727       getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
728     __cpu_model.__cpu_vendor = VENDOR_OTHER;
729     return -1;
730   }
731 
732   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
733   detectX86FamilyModel(EAX, &Family, &Model);
734 
735   // Find available features.
736   getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
737 
738   assert((sizeof(Features)/sizeof(Features[0])) == 2);
739   __cpu_model.__cpu_features[0] = Features[0];
740   __cpu_features2 = Features[1];
741 
742   if (Vendor == SIG_INTEL) {
743     // Get CPU type.
744     getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
745                                     &(__cpu_model.__cpu_type),
746                                     &(__cpu_model.__cpu_subtype));
747     __cpu_model.__cpu_vendor = VENDOR_INTEL;
748   } else if (Vendor == SIG_AMD) {
749     // Get CPU type.
750     getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
751                                   &(__cpu_model.__cpu_type),
752                                   &(__cpu_model.__cpu_subtype));
753     __cpu_model.__cpu_vendor = VENDOR_AMD;
754   } else
755     __cpu_model.__cpu_vendor = VENDOR_OTHER;
756 
757   assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
758   assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
759   assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
760 
761   return 0;
762 }
763 #elif defined(__aarch64__)
764 // LSE support detection for out-of-line atomics
765 // using HWCAP and Auxiliary vector
766 _Bool __aarch64_have_lse_atomics
767     __attribute__((visibility("hidden"), nocommon));
768 #if defined(__has_include)
769 #if __has_include(<sys/auxv.h>)
770 #include <sys/auxv.h>
771 #ifndef AT_HWCAP
772 #define AT_HWCAP 16
773 #endif
774 #ifndef HWCAP_ATOMICS
775 #define HWCAP_ATOMICS (1 << 8)
776 #endif
init_have_lse_atomics(void)777 static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
778   unsigned long hwcap = getauxval(AT_HWCAP);
779   __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
780 }
781 #endif // defined(__has_include)
782 #endif // __has_include(<sys/auxv.h>)
783 #endif // defined(__aarch64__)
784