1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is based on LLVM's lib/Support/Host.cpp.
10 // It implements the operating system Host concept and builtin
11 // __cpu_model for the compiler_rt library for x86 and
12 // __aarch64_have_lse_atomics for AArch64.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #if defined(HAVE_INIT_PRIORITY)
17 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
18 #elif __has_attribute(__constructor__)
19 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
20 #else
21 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
22 // this runs during initialization.
23 #define CONSTRUCTOR_ATTRIBUTE
24 #endif
25
26 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
27 defined(_M_X64)) && \
28 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
29
30 #include <assert.h>
31
32 #define bool int
33 #define true 1
34 #define false 0
35
36 #ifdef _MSC_VER
37 #include <intrin.h>
38 #endif
39
40 #ifndef __has_attribute
41 #define __has_attribute(attr) 0
42 #endif
43
44 enum VendorSignatures {
45 SIG_INTEL = 0x756e6547, // Genu
46 SIG_AMD = 0x68747541, // Auth
47 };
48
49 enum ProcessorVendors {
50 VENDOR_INTEL = 1,
51 VENDOR_AMD,
52 VENDOR_OTHER,
53 VENDOR_MAX
54 };
55
56 enum ProcessorTypes {
57 INTEL_BONNELL = 1,
58 INTEL_CORE2,
59 INTEL_COREI7,
60 AMDFAM10H,
61 AMDFAM15H,
62 INTEL_SILVERMONT,
63 INTEL_KNL,
64 AMD_BTVER1,
65 AMD_BTVER2,
66 AMDFAM17H,
67 INTEL_KNM,
68 INTEL_GOLDMONT,
69 INTEL_GOLDMONT_PLUS,
70 INTEL_TREMONT,
71 AMDFAM19H,
72 CPU_TYPE_MAX
73 };
74
75 enum ProcessorSubtypes {
76 INTEL_COREI7_NEHALEM = 1,
77 INTEL_COREI7_WESTMERE,
78 INTEL_COREI7_SANDYBRIDGE,
79 AMDFAM10H_BARCELONA,
80 AMDFAM10H_SHANGHAI,
81 AMDFAM10H_ISTANBUL,
82 AMDFAM15H_BDVER1,
83 AMDFAM15H_BDVER2,
84 AMDFAM15H_BDVER3,
85 AMDFAM15H_BDVER4,
86 AMDFAM17H_ZNVER1,
87 INTEL_COREI7_IVYBRIDGE,
88 INTEL_COREI7_HASWELL,
89 INTEL_COREI7_BROADWELL,
90 INTEL_COREI7_SKYLAKE,
91 INTEL_COREI7_SKYLAKE_AVX512,
92 INTEL_COREI7_CANNONLAKE,
93 INTEL_COREI7_ICELAKE_CLIENT,
94 INTEL_COREI7_ICELAKE_SERVER,
95 AMDFAM17H_ZNVER2,
96 INTEL_COREI7_CASCADELAKE,
97 INTEL_COREI7_TIGERLAKE,
98 INTEL_COREI7_COOPERLAKE,
99 INTEL_COREI7_SAPPHIRERAPIDS,
100 INTEL_COREI7_ALDERLAKE,
101 AMDFAM19H_ZNVER3,
102 CPU_SUBTYPE_MAX
103 };
104
105 enum ProcessorFeatures {
106 FEATURE_CMOV = 0,
107 FEATURE_MMX,
108 FEATURE_POPCNT,
109 FEATURE_SSE,
110 FEATURE_SSE2,
111 FEATURE_SSE3,
112 FEATURE_SSSE3,
113 FEATURE_SSE4_1,
114 FEATURE_SSE4_2,
115 FEATURE_AVX,
116 FEATURE_AVX2,
117 FEATURE_SSE4_A,
118 FEATURE_FMA4,
119 FEATURE_XOP,
120 FEATURE_FMA,
121 FEATURE_AVX512F,
122 FEATURE_BMI,
123 FEATURE_BMI2,
124 FEATURE_AES,
125 FEATURE_PCLMUL,
126 FEATURE_AVX512VL,
127 FEATURE_AVX512BW,
128 FEATURE_AVX512DQ,
129 FEATURE_AVX512CD,
130 FEATURE_AVX512ER,
131 FEATURE_AVX512PF,
132 FEATURE_AVX512VBMI,
133 FEATURE_AVX512IFMA,
134 FEATURE_AVX5124VNNIW,
135 FEATURE_AVX5124FMAPS,
136 FEATURE_AVX512VPOPCNTDQ,
137 FEATURE_AVX512VBMI2,
138 FEATURE_GFNI,
139 FEATURE_VPCLMULQDQ,
140 FEATURE_AVX512VNNI,
141 FEATURE_AVX512BITALG,
142 FEATURE_AVX512BF16,
143 FEATURE_AVX512VP2INTERSECT,
144 CPU_FEATURE_MAX
145 };
146
147 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
148 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
149 // support. Consequently, for i386, the presence of CPUID is checked first
150 // via the corresponding eflags bit.
isCpuIdSupported()151 static bool isCpuIdSupported() {
152 #if defined(__GNUC__) || defined(__clang__)
153 #if defined(__i386__)
154 int __cpuid_supported;
155 __asm__(" pushfl\n"
156 " popl %%eax\n"
157 " movl %%eax,%%ecx\n"
158 " xorl $0x00200000,%%eax\n"
159 " pushl %%eax\n"
160 " popfl\n"
161 " pushfl\n"
162 " popl %%eax\n"
163 " movl $0,%0\n"
164 " cmpl %%eax,%%ecx\n"
165 " je 1f\n"
166 " movl $1,%0\n"
167 "1:"
168 : "=r"(__cpuid_supported)
169 :
170 : "eax", "ecx");
171 if (!__cpuid_supported)
172 return false;
173 #endif
174 return true;
175 #endif
176 return true;
177 }
178
179 // This code is copied from lib/Support/Host.cpp.
180 // Changes to either file should be mirrored in the other.
181
182 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
183 /// the specified arguments. If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)184 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
185 unsigned *rECX, unsigned *rEDX) {
186 #if defined(__GNUC__) || defined(__clang__)
187 #if defined(__x86_64__)
188 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
189 // FIXME: should we save this for Clang?
190 __asm__("movq\t%%rbx, %%rsi\n\t"
191 "cpuid\n\t"
192 "xchgq\t%%rbx, %%rsi\n\t"
193 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
194 : "a"(value));
195 return false;
196 #elif defined(__i386__)
197 __asm__("movl\t%%ebx, %%esi\n\t"
198 "cpuid\n\t"
199 "xchgl\t%%ebx, %%esi\n\t"
200 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
201 : "a"(value));
202 return false;
203 #else
204 return true;
205 #endif
206 #elif defined(_MSC_VER)
207 // The MSVC intrinsic is portable across x86 and x64.
208 int registers[4];
209 __cpuid(registers, value);
210 *rEAX = registers[0];
211 *rEBX = registers[1];
212 *rECX = registers[2];
213 *rEDX = registers[3];
214 return false;
215 #else
216 return true;
217 #endif
218 }
219
220 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
221 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
222 /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)223 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
224 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
225 unsigned *rEDX) {
226 #if defined(__GNUC__) || defined(__clang__)
227 #if defined(__x86_64__)
228 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
229 // FIXME: should we save this for Clang?
230 __asm__("movq\t%%rbx, %%rsi\n\t"
231 "cpuid\n\t"
232 "xchgq\t%%rbx, %%rsi\n\t"
233 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
234 : "a"(value), "c"(subleaf));
235 return false;
236 #elif defined(__i386__)
237 __asm__("movl\t%%ebx, %%esi\n\t"
238 "cpuid\n\t"
239 "xchgl\t%%ebx, %%esi\n\t"
240 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
241 : "a"(value), "c"(subleaf));
242 return false;
243 #else
244 return true;
245 #endif
246 #elif defined(_MSC_VER)
247 int registers[4];
248 __cpuidex(registers, value, subleaf);
249 *rEAX = registers[0];
250 *rEBX = registers[1];
251 *rECX = registers[2];
252 *rEDX = registers[3];
253 return false;
254 #else
255 return true;
256 #endif
257 }
258
259 // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)260 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
261 #if defined(__GNUC__) || defined(__clang__)
262 // Check xgetbv; this uses a .byte sequence instead of the instruction
263 // directly because older assemblers do not include support for xgetbv and
264 // there is no easy way to conditionally compile based on the assembler used.
265 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
266 return false;
267 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
268 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
269 *rEAX = Result;
270 *rEDX = Result >> 32;
271 return false;
272 #else
273 return true;
274 #endif
275 }
276
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)277 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
278 unsigned *Model) {
279 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
280 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
281 if (*Family == 6 || *Family == 0xf) {
282 if (*Family == 0xf)
283 // Examine extended family ID if family ID is F.
284 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
285 // Examine extended model ID if family ID is 6 or F.
286 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
287 }
288 }
289
290 static const char *
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)291 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
292 const unsigned *Features,
293 unsigned *Type, unsigned *Subtype) {
294 #define testFeature(F) \
295 (Features[F / 32] & (1 << (F % 32))) != 0
296
297 // We select CPU strings to match the code in Host.cpp, but we don't use them
298 // in compiler-rt.
299 const char *CPU = 0;
300
301 switch (Family) {
302 case 6:
303 switch (Model) {
304 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
305 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
306 // mobile processor, Intel Core 2 Extreme processor, Intel
307 // Pentium Dual-Core processor, Intel Xeon processor, model
308 // 0Fh. All processors are manufactured using the 65 nm process.
309 case 0x16: // Intel Celeron processor model 16h. All processors are
310 // manufactured using the 65 nm process
311 CPU = "core2";
312 *Type = INTEL_CORE2;
313 break;
314 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
315 // 17h. All processors are manufactured using the 45 nm process.
316 //
317 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
318 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
319 // the 45 nm process.
320 CPU = "penryn";
321 *Type = INTEL_CORE2;
322 break;
323 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
324 // processors are manufactured using the 45 nm process.
325 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
326 // As found in a Summer 2010 model iMac.
327 case 0x1f:
328 case 0x2e: // Nehalem EX
329 CPU = "nehalem";
330 *Type = INTEL_COREI7;
331 *Subtype = INTEL_COREI7_NEHALEM;
332 break;
333 case 0x25: // Intel Core i7, laptop version.
334 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
335 // processors are manufactured using the 32 nm process.
336 case 0x2f: // Westmere EX
337 CPU = "westmere";
338 *Type = INTEL_COREI7;
339 *Subtype = INTEL_COREI7_WESTMERE;
340 break;
341 case 0x2a: // Intel Core i7 processor. All processors are manufactured
342 // using the 32 nm process.
343 case 0x2d:
344 CPU = "sandybridge";
345 *Type = INTEL_COREI7;
346 *Subtype = INTEL_COREI7_SANDYBRIDGE;
347 break;
348 case 0x3a:
349 case 0x3e: // Ivy Bridge EP
350 CPU = "ivybridge";
351 *Type = INTEL_COREI7;
352 *Subtype = INTEL_COREI7_IVYBRIDGE;
353 break;
354
355 // Haswell:
356 case 0x3c:
357 case 0x3f:
358 case 0x45:
359 case 0x46:
360 CPU = "haswell";
361 *Type = INTEL_COREI7;
362 *Subtype = INTEL_COREI7_HASWELL;
363 break;
364
365 // Broadwell:
366 case 0x3d:
367 case 0x47:
368 case 0x4f:
369 case 0x56:
370 CPU = "broadwell";
371 *Type = INTEL_COREI7;
372 *Subtype = INTEL_COREI7_BROADWELL;
373 break;
374
375 // Skylake:
376 case 0x4e: // Skylake mobile
377 case 0x5e: // Skylake desktop
378 case 0x8e: // Kaby Lake mobile
379 case 0x9e: // Kaby Lake desktop
380 case 0xa5: // Comet Lake-H/S
381 case 0xa6: // Comet Lake-U
382 CPU = "skylake";
383 *Type = INTEL_COREI7;
384 *Subtype = INTEL_COREI7_SKYLAKE;
385 break;
386
387 // Skylake Xeon:
388 case 0x55:
389 *Type = INTEL_COREI7;
390 if (testFeature(FEATURE_AVX512BF16)) {
391 CPU = "cooperlake";
392 *Subtype = INTEL_COREI7_COOPERLAKE;
393 } else if (testFeature(FEATURE_AVX512VNNI)) {
394 CPU = "cascadelake";
395 *Subtype = INTEL_COREI7_CASCADELAKE;
396 } else {
397 CPU = "skylake-avx512";
398 *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
399 }
400 break;
401
402 // Cannonlake:
403 case 0x66:
404 CPU = "cannonlake";
405 *Type = INTEL_COREI7;
406 *Subtype = INTEL_COREI7_CANNONLAKE;
407 break;
408
409 // Icelake:
410 case 0x7d:
411 case 0x7e:
412 CPU = "icelake-client";
413 *Type = INTEL_COREI7;
414 *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
415 break;
416
417 // Icelake Xeon:
418 case 0x6a:
419 case 0x6c:
420 CPU = "icelake-server";
421 *Type = INTEL_COREI7;
422 *Subtype = INTEL_COREI7_ICELAKE_SERVER;
423 break;
424
425 // Sapphire Rapids:
426 case 0x8f:
427 CPU = "sapphirerapids";
428 *Type = INTEL_COREI7;
429 *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
430 break;
431
432 case 0x1c: // Most 45 nm Intel Atom processors
433 case 0x26: // 45 nm Atom Lincroft
434 case 0x27: // 32 nm Atom Medfield
435 case 0x35: // 32 nm Atom Midview
436 case 0x36: // 32 nm Atom Midview
437 CPU = "bonnell";
438 *Type = INTEL_BONNELL;
439 break;
440
441 // Atom Silvermont codes from the Intel software optimization guide.
442 case 0x37:
443 case 0x4a:
444 case 0x4d:
445 case 0x5a:
446 case 0x5d:
447 case 0x4c: // really airmont
448 CPU = "silvermont";
449 *Type = INTEL_SILVERMONT;
450 break;
451 // Goldmont:
452 case 0x5c: // Apollo Lake
453 case 0x5f: // Denverton
454 CPU = "goldmont";
455 *Type = INTEL_GOLDMONT;
456 break; // "goldmont"
457 case 0x7a:
458 CPU = "goldmont-plus";
459 *Type = INTEL_GOLDMONT_PLUS;
460 break;
461 case 0x86:
462 CPU = "tremont";
463 *Type = INTEL_TREMONT;
464 break;
465
466 case 0x57:
467 CPU = "knl";
468 *Type = INTEL_KNL;
469 break;
470
471 case 0x85:
472 CPU = "knm";
473 *Type = INTEL_KNM;
474 break;
475
476 default: // Unknown family 6 CPU.
477 break;
478 }
479 break;
480 default:
481 break; // Unknown.
482 }
483
484 return CPU;
485 }
486
487 static const char *
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)488 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
489 const unsigned *Features,
490 unsigned *Type, unsigned *Subtype) {
491 // We select CPU strings to match the code in Host.cpp, but we don't use them
492 // in compiler-rt.
493 const char *CPU = 0;
494
495 switch (Family) {
496 case 16:
497 CPU = "amdfam10";
498 *Type = AMDFAM10H;
499 switch (Model) {
500 case 2:
501 *Subtype = AMDFAM10H_BARCELONA;
502 break;
503 case 4:
504 *Subtype = AMDFAM10H_SHANGHAI;
505 break;
506 case 8:
507 *Subtype = AMDFAM10H_ISTANBUL;
508 break;
509 }
510 break;
511 case 20:
512 CPU = "btver1";
513 *Type = AMD_BTVER1;
514 break;
515 case 21:
516 CPU = "bdver1";
517 *Type = AMDFAM15H;
518 if (Model >= 0x60 && Model <= 0x7f) {
519 CPU = "bdver4";
520 *Subtype = AMDFAM15H_BDVER4;
521 break; // 60h-7Fh: Excavator
522 }
523 if (Model >= 0x30 && Model <= 0x3f) {
524 CPU = "bdver3";
525 *Subtype = AMDFAM15H_BDVER3;
526 break; // 30h-3Fh: Steamroller
527 }
528 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
529 CPU = "bdver2";
530 *Subtype = AMDFAM15H_BDVER2;
531 break; // 02h, 10h-1Fh: Piledriver
532 }
533 if (Model <= 0x0f) {
534 *Subtype = AMDFAM15H_BDVER1;
535 break; // 00h-0Fh: Bulldozer
536 }
537 break;
538 case 22:
539 CPU = "btver2";
540 *Type = AMD_BTVER2;
541 break;
542 case 23:
543 CPU = "znver1";
544 *Type = AMDFAM17H;
545 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
546 CPU = "znver2";
547 *Subtype = AMDFAM17H_ZNVER2;
548 break; // 30h-3fh, 71h: Zen2
549 }
550 if (Model <= 0x0f) {
551 *Subtype = AMDFAM17H_ZNVER1;
552 break; // 00h-0Fh: Zen1
553 }
554 break;
555 case 25:
556 CPU = "znver3";
557 *Type = AMDFAM19H;
558 if (Model <= 0x0f) {
559 *Subtype = AMDFAM19H_ZNVER3;
560 break; // 00h-0Fh: Zen3
561 }
562 break;
563 default:
564 break; // Unknown AMD CPU.
565 }
566
567 return CPU;
568 }
569
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)570 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
571 unsigned *Features) {
572 unsigned EAX, EBX;
573
574 #define setFeature(F) \
575 Features[F / 32] |= 1U << (F % 32)
576
577 if ((EDX >> 15) & 1)
578 setFeature(FEATURE_CMOV);
579 if ((EDX >> 23) & 1)
580 setFeature(FEATURE_MMX);
581 if ((EDX >> 25) & 1)
582 setFeature(FEATURE_SSE);
583 if ((EDX >> 26) & 1)
584 setFeature(FEATURE_SSE2);
585
586 if ((ECX >> 0) & 1)
587 setFeature(FEATURE_SSE3);
588 if ((ECX >> 1) & 1)
589 setFeature(FEATURE_PCLMUL);
590 if ((ECX >> 9) & 1)
591 setFeature(FEATURE_SSSE3);
592 if ((ECX >> 12) & 1)
593 setFeature(FEATURE_FMA);
594 if ((ECX >> 19) & 1)
595 setFeature(FEATURE_SSE4_1);
596 if ((ECX >> 20) & 1)
597 setFeature(FEATURE_SSE4_2);
598 if ((ECX >> 23) & 1)
599 setFeature(FEATURE_POPCNT);
600 if ((ECX >> 25) & 1)
601 setFeature(FEATURE_AES);
602
603 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
604 // indicates that the AVX registers will be saved and restored on context
605 // switch, then we have full AVX support.
606 const unsigned AVXBits = (1 << 27) | (1 << 28);
607 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
608 ((EAX & 0x6) == 0x6);
609 #if defined(__APPLE__)
610 // Darwin lazily saves the AVX512 context on first use: trust that the OS will
611 // save the AVX512 context if we use AVX512 instructions, even the bit is not
612 // set right now.
613 bool HasAVX512Save = true;
614 #else
615 // AVX512 requires additional context to be saved by the OS.
616 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
617 #endif
618
619 if (HasAVX)
620 setFeature(FEATURE_AVX);
621
622 bool HasLeaf7 =
623 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
624
625 if (HasLeaf7 && ((EBX >> 3) & 1))
626 setFeature(FEATURE_BMI);
627 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
628 setFeature(FEATURE_AVX2);
629 if (HasLeaf7 && ((EBX >> 8) & 1))
630 setFeature(FEATURE_BMI2);
631 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
632 setFeature(FEATURE_AVX512F);
633 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
634 setFeature(FEATURE_AVX512DQ);
635 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
636 setFeature(FEATURE_AVX512IFMA);
637 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
638 setFeature(FEATURE_AVX512PF);
639 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
640 setFeature(FEATURE_AVX512ER);
641 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
642 setFeature(FEATURE_AVX512CD);
643 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
644 setFeature(FEATURE_AVX512BW);
645 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
646 setFeature(FEATURE_AVX512VL);
647
648 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
649 setFeature(FEATURE_AVX512VBMI);
650 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
651 setFeature(FEATURE_AVX512VBMI2);
652 if (HasLeaf7 && ((ECX >> 8) & 1))
653 setFeature(FEATURE_GFNI);
654 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
655 setFeature(FEATURE_VPCLMULQDQ);
656 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
657 setFeature(FEATURE_AVX512VNNI);
658 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
659 setFeature(FEATURE_AVX512BITALG);
660 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
661 setFeature(FEATURE_AVX512VPOPCNTDQ);
662
663 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
664 setFeature(FEATURE_AVX5124VNNIW);
665 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
666 setFeature(FEATURE_AVX5124FMAPS);
667 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
668 setFeature(FEATURE_AVX512VP2INTERSECT);
669
670 bool HasLeaf7Subleaf1 =
671 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
672 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
673 setFeature(FEATURE_AVX512BF16);
674
675 unsigned MaxExtLevel;
676 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
677
678 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
679 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
680 if (HasExtLeaf1 && ((ECX >> 6) & 1))
681 setFeature(FEATURE_SSE4_A);
682 if (HasExtLeaf1 && ((ECX >> 11) & 1))
683 setFeature(FEATURE_XOP);
684 if (HasExtLeaf1 && ((ECX >> 16) & 1))
685 setFeature(FEATURE_FMA4);
686 #undef setFeature
687 }
688
689 #ifndef _WIN32
690 __attribute__((visibility("hidden")))
691 #endif
692 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
693
694 #ifndef _WIN32
695 __attribute__((visibility("hidden")))
696 #endif
697 struct __processor_model {
698 unsigned int __cpu_vendor;
699 unsigned int __cpu_type;
700 unsigned int __cpu_subtype;
701 unsigned int __cpu_features[1];
702 } __cpu_model = {0, 0, 0, {0}};
703
704 #ifndef _WIN32
705 __attribute__((visibility("hidden")))
706 #endif
707 unsigned int __cpu_features2 = 0;
708
709 // A constructor function that is sets __cpu_model and __cpu_features2 with
710 // the right values. This needs to run only once. This constructor is
711 // given the highest priority and it should run before constructors without
712 // the priority set. However, it still runs after ifunc initializers and
713 // needs to be called explicitly there.
714
__cpu_indicator_init(void)715 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
716 unsigned EAX, EBX, ECX, EDX;
717 unsigned MaxLeaf = 5;
718 unsigned Vendor;
719 unsigned Model, Family;
720 unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
721
722 // This function needs to run just once.
723 if (__cpu_model.__cpu_vendor)
724 return 0;
725
726 if (!isCpuIdSupported() ||
727 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
728 __cpu_model.__cpu_vendor = VENDOR_OTHER;
729 return -1;
730 }
731
732 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
733 detectX86FamilyModel(EAX, &Family, &Model);
734
735 // Find available features.
736 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
737
738 assert((sizeof(Features)/sizeof(Features[0])) == 2);
739 __cpu_model.__cpu_features[0] = Features[0];
740 __cpu_features2 = Features[1];
741
742 if (Vendor == SIG_INTEL) {
743 // Get CPU type.
744 getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
745 &(__cpu_model.__cpu_type),
746 &(__cpu_model.__cpu_subtype));
747 __cpu_model.__cpu_vendor = VENDOR_INTEL;
748 } else if (Vendor == SIG_AMD) {
749 // Get CPU type.
750 getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
751 &(__cpu_model.__cpu_type),
752 &(__cpu_model.__cpu_subtype));
753 __cpu_model.__cpu_vendor = VENDOR_AMD;
754 } else
755 __cpu_model.__cpu_vendor = VENDOR_OTHER;
756
757 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
758 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
759 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
760
761 return 0;
762 }
763 #elif defined(__aarch64__)
764 // LSE support detection for out-of-line atomics
765 // using HWCAP and Auxiliary vector
766 _Bool __aarch64_have_lse_atomics
767 __attribute__((visibility("hidden"), nocommon));
768 #if defined(__has_include)
769 #if __has_include(<sys/auxv.h>)
770 #include <sys/auxv.h>
771 #ifndef AT_HWCAP
772 #define AT_HWCAP 16
773 #endif
774 #ifndef HWCAP_ATOMICS
775 #define HWCAP_ATOMICS (1 << 8)
776 #endif
init_have_lse_atomics(void)777 static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
778 unsigned long hwcap = getauxval(AT_HWCAP);
779 __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
780 }
781 #endif // defined(__has_include)
782 #endif // __has_include(<sys/auxv.h>)
783 #endif // defined(__aarch64__)
784