diff options
Diffstat (limited to 'contrib/compiler-rt/lib/builtins/cpu_model.c')
-rw-r--r-- | contrib/compiler-rt/lib/builtins/cpu_model.c | 651 |
1 files changed, 651 insertions, 0 deletions
diff --git a/contrib/compiler-rt/lib/builtins/cpu_model.c b/contrib/compiler-rt/lib/builtins/cpu_model.c new file mode 100644 index 000000000000..fb2b899fc70a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/cpu_model.c @@ -0,0 +1,651 @@ +//===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is based on LLVM's lib/Support/Host.cpp. +// It implements the operating system Host concept and builtin +// __cpu_model for the compiler_rt library, for x86 only. +// +//===----------------------------------------------------------------------===// + +#if (defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64__) || defined(_M_X64)) && \ + (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) + +#include <assert.h> + +#define bool int +#define true 1 +#define false 0 + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#ifndef __has_attribute +#define __has_attribute(attr) 0 +#endif + +enum VendorSignatures { + SIG_INTEL = 0x756e6547 /* Genu */, + SIG_AMD = 0x68747541 /* Auth */ +}; + +enum ProcessorVendors { + VENDOR_INTEL = 1, + VENDOR_AMD, + VENDOR_OTHER, + VENDOR_MAX +}; + +enum ProcessorTypes { + INTEL_BONNELL = 1, + INTEL_CORE2, + INTEL_COREI7, + AMDFAM10H, + AMDFAM15H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + AMDFAM17H, + INTEL_KNM, + INTEL_GOLDMONT, + INTEL_GOLDMONT_PLUS, + INTEL_TREMONT, + CPU_TYPE_MAX +}; + +enum ProcessorSubtypes { + INTEL_COREI7_NEHALEM = 1, + INTEL_COREI7_WESTMERE, + INTEL_COREI7_SANDYBRIDGE, + AMDFAM10H_BARCELONA, + AMDFAM10H_SHANGHAI, + AMDFAM10H_ISTANBUL, + AMDFAM15H_BDVER1, + AMDFAM15H_BDVER2, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, + INTEL_COREI7_IVYBRIDGE, + INTEL_COREI7_HASWELL, + INTEL_COREI7_BROADWELL, + INTEL_COREI7_SKYLAKE, + INTEL_COREI7_SKYLAKE_AVX512, + INTEL_COREI7_CANNONLAKE, + INTEL_COREI7_ICELAKE_CLIENT, + INTEL_COREI7_ICELAKE_SERVER, + CPU_SUBTYPE_MAX +}; + +enum ProcessorFeatures { + FEATURE_CMOV = 0, + FEATURE_MMX, + FEATURE_POPCNT, + FEATURE_SSE, + FEATURE_SSE2, + FEATURE_SSE3, + FEATURE_SSSE3, + FEATURE_SSE4_1, + FEATURE_SSE4_2, + FEATURE_AVX, + FEATURE_AVX2, + FEATURE_SSE4_A, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, + FEATURE_AVX512F, + FEATURE_BMI, + FEATURE_BMI2, + FEATURE_AES, + FEATURE_PCLMUL, + FEATURE_AVX512VL, + FEATURE_AVX512BW, + FEATURE_AVX512DQ, + FEATURE_AVX512CD, + FEATURE_AVX512ER, + FEATURE_AVX512PF, + FEATURE_AVX512VBMI, + FEATURE_AVX512IFMA, + FEATURE_AVX5124VNNIW, + FEATURE_AVX5124FMAPS, + FEATURE_AVX512VPOPCNTDQ, + FEATURE_AVX512VBMI2, + FEATURE_GFNI, + FEATURE_VPCLMULQDQ, + FEATURE_AVX512VNNI, + FEATURE_AVX512BITALG +}; + +// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). +// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID +// support. Consequently, for i386, the presence of CPUID is checked first +// via the corresponding eflags bit. +static bool isCpuIdSupported() { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__i386__) + int __cpuid_supported; + __asm__(" pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x00200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " movl $0,%0\n" + " cmpl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + "1:" + : "=r"(__cpuid_supported) + : + : "eax", "ecx"); + if (!__cpuid_supported) + return false; +#endif + return true; +#endif + return true; +} + +// This code is copied from lib/Support/Host.cpp. +// Changes to either file should be mirrored in the other. + +/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in +/// the specified arguments. If we can't run cpuid on the host, return true. +static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, + unsigned *rECX, unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? + __asm__("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value)); + return false; +#elif defined(__i386__) + __asm__("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value)); + return false; +#else + return true; +#endif +#elif defined(_MSC_VER) + // The MSVC intrinsic is portable across x86 and x64. + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; +#else + return true; +#endif +} + +/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return +/// the 4 values in the specified arguments. If we can't run cpuid on the host, +/// return true. +static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, + unsigned *rEAX, unsigned *rEBX, unsigned *rECX, + unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? + __asm__("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +#elif defined(__i386__) + __asm__("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +#else + return true; +#endif +#elif defined(_MSC_VER) + int registers[4]; + __cpuidex(registers, value, subleaf); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; +#else + return true; +#endif +} + +// Read control register 0 (XCR0). Used to detect features such as AVX. +static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) + // Check xgetbv; this uses a .byte sequence instead of the instruction + // directly because older assemblers do not include support for xgetbv and + // there is no easy way to conditionally compile based on the assembler used. + __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); + return false; +#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + *rEAX = Result; + *rEDX = Result >> 32; + return false; +#else + return true; +#endif +} + +static void detectX86FamilyModel(unsigned EAX, unsigned *Family, + unsigned *Model) { + *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (*Family == 6 || *Family == 0xf) { + if (*Family == 0xf) + // Examine extended family ID if family ID is F. + *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} + +static void +getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Brand_id, unsigned Features, + unsigned *Type, unsigned *Subtype) { + if (Brand_id != 0) + return; + switch (Family) { + case 6: + switch (Model) { + case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile + // processor, Intel Core 2 Quad processor, Intel Core 2 Quad + // mobile processor, Intel Core 2 Extreme processor, Intel + // Pentium Dual-Core processor, Intel Xeon processor, model + // 0Fh. All processors are manufactured using the 65 nm process. + case 0x16: // Intel Celeron processor model 16h. All processors are + // manufactured using the 65 nm process + case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model + // 17h. All processors are manufactured using the 45 nm process. + // + // 45nm: Penryn , Wolfdale, Yorkfield (XE) + case 0x1d: // Intel Xeon processor MP. All processors are manufactured using + // the 45 nm process. + *Type = INTEL_CORE2; // "penryn" + break; + case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 45 nm process. + case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. + // As found in a Summer 2010 model iMac. + case 0x1f: + case 0x2e: // Nehalem EX + *Type = INTEL_COREI7; // "nehalem" + *Subtype = INTEL_COREI7_NEHALEM; + break; + case 0x25: // Intel Core i7, laptop version. + case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 32 nm process. + case 0x2f: // Westmere EX + *Type = INTEL_COREI7; // "westmere" + *Subtype = INTEL_COREI7_WESTMERE; + break; + case 0x2a: // Intel Core i7 processor. All processors are manufactured + // using the 32 nm process. + case 0x2d: + *Type = INTEL_COREI7; //"sandybridge" + *Subtype = INTEL_COREI7_SANDYBRIDGE; + break; + case 0x3a: + case 0x3e: // Ivy Bridge EP + *Type = INTEL_COREI7; // "ivybridge" + *Subtype = INTEL_COREI7_IVYBRIDGE; + break; + + // Haswell: + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + *Type = INTEL_COREI7; // "haswell" + *Subtype = INTEL_COREI7_HASWELL; + break; + + // Broadwell: + case 0x3d: + case 0x47: + case 0x4f: + case 0x56: + *Type = INTEL_COREI7; // "broadwell" + *Subtype = INTEL_COREI7_BROADWELL; + break; + + // Skylake: + case 0x4e: // Skylake mobile + case 0x5e: // Skylake desktop + case 0x8e: // Kaby Lake mobile + case 0x9e: // Kaby Lake desktop + *Type = INTEL_COREI7; // "skylake" + *Subtype = INTEL_COREI7_SKYLAKE; + break; + + // Skylake Xeon: + case 0x55: + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" + break; + + // Cannonlake: + case 0x66: + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake" + break; + + case 0x1c: // Most 45 nm Intel Atom processors + case 0x26: // 45 nm Atom Lincroft + case 0x27: // 32 nm Atom Medfield + case 0x35: // 32 nm Atom Midview + case 0x36: // 32 nm Atom Midview + *Type = INTEL_BONNELL; + break; // "bonnell" + + // Atom Silvermont codes from the Intel software optimization guide. + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + case 0x4c: // really airmont + *Type = INTEL_SILVERMONT; + break; // "silvermont" + // Goldmont: + case 0x5c: // Apollo Lake + case 0x5f: // Denverton + *Type = INTEL_GOLDMONT; + break; // "goldmont" + case 0x7a: + *Type = INTEL_GOLDMONT_PLUS; + break; + + case 0x57: + *Type = INTEL_KNL; // knl + break; + + case 0x85: + *Type = INTEL_KNM; // knm + break; + + default: // Unknown family 6 CPU. + break; + break; + } + default: + break; // Unknown. + } +} + +static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Features, unsigned *Type, + unsigned *Subtype) { + // FIXME: this poorly matches the generated SubtargetFeatureKV table. There + // appears to be no way to generate the wide variety of AMD-specific targets + // from the information returned from CPUID. + switch (Family) { + case 16: + *Type = AMDFAM10H; // "amdfam10" + switch (Model) { + case 2: + *Subtype = AMDFAM10H_BARCELONA; + break; + case 4: + *Subtype = AMDFAM10H_SHANGHAI; + break; + case 8: + *Subtype = AMDFAM10H_ISTANBUL; + break; + } + break; + case 20: + *Type = AMD_BTVER1; + break; // "btver1"; + case 21: + *Type = AMDFAM15H; + if (Model >= 0x60 && Model <= 0x7f) { + *Subtype = AMDFAM15H_BDVER4; + break; // "bdver4"; 60h-7Fh: Excavator + } + if (Model >= 0x30 && Model <= 0x3f) { + *Subtype = AMDFAM15H_BDVER3; + break; // "bdver3"; 30h-3Fh: Steamroller + } + if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { + *Subtype = AMDFAM15H_BDVER2; + break; // "bdver2"; 02h, 10h-1Fh: Piledriver + } + if (Model <= 0x0f) { + *Subtype = AMDFAM15H_BDVER1; + break; // "bdver1"; 00h-0Fh: Bulldozer + } + break; + case 22: + *Type = AMD_BTVER2; + break; // "btver2" + case 23: + *Type = AMDFAM17H; + *Subtype = AMDFAM17H_ZNVER1; + break; + default: + break; // "generic" + } +} + +static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, + unsigned *FeaturesOut, + unsigned *Features2Out) { + unsigned Features = 0; + unsigned Features2 = 0; + unsigned EAX, EBX; + +#define setFeature(F) \ + do { \ + if (F < 32) \ + Features |= 1U << (F & 0x1f); \ + else if (F < 64) \ + Features2 |= 1U << ((F - 32) & 0x1f); \ + } while (0) + + if ((EDX >> 15) & 1) + setFeature(FEATURE_CMOV); + if ((EDX >> 23) & 1) + setFeature(FEATURE_MMX); + if ((EDX >> 25) & 1) + setFeature(FEATURE_SSE); + if ((EDX >> 26) & 1) + setFeature(FEATURE_SSE2); + + if ((ECX >> 0) & 1) + setFeature(FEATURE_SSE3); + if ((ECX >> 1) & 1) + setFeature(FEATURE_PCLMUL); + if ((ECX >> 9) & 1) + setFeature(FEATURE_SSSE3); + if ((ECX >> 12) & 1) + setFeature(FEATURE_FMA); + if ((ECX >> 19) & 1) + setFeature(FEATURE_SSE4_1); + if ((ECX >> 20) & 1) + setFeature(FEATURE_SSE4_2); + if ((ECX >> 23) & 1) + setFeature(FEATURE_POPCNT); + if ((ECX >> 25) & 1) + setFeature(FEATURE_AES); + + // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV + // indicates that the AVX registers will be saved and restored on context + // switch, then we have full AVX support. + const unsigned AVXBits = (1 << 27) | (1 << 28); + bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && + ((EAX & 0x6) == 0x6); + bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + + if (HasAVX) + setFeature(FEATURE_AVX); + + bool HasLeaf7 = + MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + + if (HasLeaf7 && ((EBX >> 3) & 1)) + setFeature(FEATURE_BMI); + if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) + setFeature(FEATURE_AVX2); + if (HasLeaf7 && ((EBX >> 9) & 1)) + setFeature(FEATURE_BMI2); + if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512F); + if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512DQ); + if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512IFMA); + if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512PF); + if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512ER); + if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512CD); + if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512BW); + if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VL); + + if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VBMI); + if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VBMI2); + if (HasLeaf7 && ((ECX >> 8) & 1)) + setFeature(FEATURE_GFNI); + if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) + setFeature(FEATURE_VPCLMULQDQ); + if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VNNI); + if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512BITALG); + if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VPOPCNTDQ); + + if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX5124VNNIW); + if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX5124FMAPS); + + unsigned MaxExtLevel; + getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf1 && ((ECX >> 6) & 1)) + setFeature(FEATURE_SSE4_A); + if (HasExtLeaf1 && ((ECX >> 11) & 1)) + setFeature(FEATURE_XOP); + if (HasExtLeaf1 && ((ECX >> 16) & 1)) + setFeature(FEATURE_FMA4); + + *FeaturesOut = Features; + *Features2Out = Features2; +#undef setFeature +} + +#if defined(HAVE_INIT_PRIORITY) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) +#elif __has_attribute(__constructor__) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) +#else +// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that +// this runs during initialization. +#define CONSTRUCTOR_ATTRIBUTE +#endif + +int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; + +struct __processor_model { + unsigned int __cpu_vendor; + unsigned int __cpu_type; + unsigned int __cpu_subtype; + unsigned int __cpu_features[1]; +} __cpu_model = {0, 0, 0, {0}}; +unsigned int __cpu_features2; + +/* A constructor function that is sets __cpu_model and __cpu_features2 with + the right values. This needs to run only once. This constructor is + given the highest priority and it should run before constructors without + the priority set. However, it still runs after ifunc initializers and + needs to be called explicitly there. */ + +int CONSTRUCTOR_ATTRIBUTE +__cpu_indicator_init(void) { + unsigned EAX, EBX, ECX, EDX; + unsigned MaxLeaf = 5; + unsigned Vendor; + unsigned Model, Family, Brand_id; + unsigned Features = 0; + unsigned Features2 = 0; + + /* This function needs to run just once. */ + if (__cpu_model.__cpu_vendor) + return 0; + + if (!isCpuIdSupported()) + return -1; + + /* Assume cpuid insn present. Run in level 0 to get vendor id. */ + if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { + __cpu_model.__cpu_vendor = VENDOR_OTHER; + return -1; + } + getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); + detectX86FamilyModel(EAX, &Family, &Model); + Brand_id = EBX & 0xff; + + /* Find available features. */ + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); + __cpu_model.__cpu_features[0] = Features; + __cpu_features2 = Features2; + + if (Vendor == SIG_INTEL) { + /* Get CPU type. */ + getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, + &(__cpu_model.__cpu_type), + &(__cpu_model.__cpu_subtype)); + __cpu_model.__cpu_vendor = VENDOR_INTEL; + } else if (Vendor == SIG_AMD) { + /* Get CPU type. */ + getAMDProcessorTypeAndSubtype(Family, Model, Features, + &(__cpu_model.__cpu_type), + &(__cpu_model.__cpu_subtype)); + __cpu_model.__cpu_vendor = VENDOR_AMD; + } else + __cpu_model.__cpu_vendor = VENDOR_OTHER; + + assert(__cpu_model.__cpu_vendor < VENDOR_MAX); + assert(__cpu_model.__cpu_type < CPU_TYPE_MAX); + assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX); + + return 0; +} + +#endif |