Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lib/x86/cpu_features.c: cleanup #321

Merged
merged 1 commit into from
Aug 14, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 27 additions & 47 deletions lib/x86/cpu_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,17 @@

#if HAVE_DYNAMIC_X86_CPU_FEATURES

/* With old GCC versions we have to manually save and restore the x86_32 PIC
* register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602 */
/*
* With old GCC versions we have to manually save and restore the x86_32 PIC
* register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602
*/
#if defined(ARCH_X86_32) && defined(__PIC__)
# define EBX_CONSTRAINT "=&r"
#else
# define EBX_CONSTRAINT "=b"
#endif

/* Execute the CPUID instruction. */
/* Execute the CPUID instruction. */
static inline void
cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
{
Expand All @@ -59,14 +61,14 @@ cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d)
#endif
}

/* Read an extended control register. */
/* Read an extended control register. */
static inline u64
read_xcr(u32 index)
{
#ifdef _MSC_VER
return _xgetbv(index);
#else
u32 edx, eax;
u32 d, a;

/*
* Execute the "xgetbv" instruction. Old versions of binutils do not
Expand All @@ -76,21 +78,12 @@ read_xcr(u32 index)
* from under the check for OSXSAVE.
*/
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" :
"=d" (edx), "=a" (eax) : "c" (index));
"=d" (d), "=a" (a) : "c" (index));

return ((u64)edx << 32) | eax;
return ((u64)d << 32) | a;
#endif
}

#undef BIT
#define BIT(nr) (1UL << (nr))

#define XCR0_BIT_SSE BIT(1)
#define XCR0_BIT_AVX BIT(2)

#define IS_SET(reg, nr) ((reg) & BIT(nr))
#define IS_ALL_SET(reg, mask) (((reg) & (mask)) == (mask))

static const struct cpu_feature x86_cpu_feature_table[] = {
{X86_CPU_FEATURE_SSE2, "sse2"},
{X86_CPU_FEATURE_PCLMUL, "pclmul"},
Expand All @@ -104,47 +97,34 @@ volatile u32 libdeflate_x86_cpu_features = 0;
/* Initialize libdeflate_x86_cpu_features. */
void libdeflate_init_x86_cpu_features(void)
{
u32 max_leaf, a, b, c, d;
u64 xcr0 = 0;
u32 features = 0;
u32 dummy1, dummy2, dummy3, dummy4;
u32 max_function;
u32 features_1, features_2, features_3, features_4;
bool os_avx_support = false;

/* Get maximum supported function */
cpuid(0, 0, &max_function, &dummy2, &dummy3, &dummy4);
if (max_function < 1)
goto out;

/* Standard feature flags */
cpuid(1, 0, &dummy1, &dummy2, &features_2, &features_1);
/* EAX=0: Highest Function Parameter and Manufacturer ID */
cpuid(0, 0, &max_leaf, &b, &c, &d);
if (max_leaf < 1)
goto out;

if (IS_SET(features_1, 26))
/* EAX=1: Processor Info and Feature Bits */
cpuid(1, 0, &a, &b, &c, &d);
if (d & (1 << 26))
features |= X86_CPU_FEATURE_SSE2;

if (IS_SET(features_2, 1))
if (c & (1 << 1))
features |= X86_CPU_FEATURE_PCLMUL;

if (IS_SET(features_2, 27)) { /* OSXSAVE set? */
u64 xcr0 = read_xcr(0);

os_avx_support = IS_ALL_SET(xcr0,
XCR0_BIT_SSE |
XCR0_BIT_AVX);
}

if (os_avx_support && IS_SET(features_2, 28))
if (c & (1 << 27))
xcr0 = read_xcr(0);
if ((c & (1 << 28)) && ((xcr0 & 0x6) == 0x6))
features |= X86_CPU_FEATURE_AVX;

if (max_function < 7)
if (max_leaf < 7)
goto out;

/* Extended feature flags */
cpuid(7, 0, &dummy1, &features_3, &features_4, &dummy4);

if (os_avx_support && IS_SET(features_3, 5))
/* EAX=7, ECX=0: Extended Features */
cpuid(7, 0, &a, &b, &c, &d);
if ((b & (1 << 5)) && ((xcr0 & 0x6) == 0x6))
features |= X86_CPU_FEATURE_AVX2;

if (IS_SET(features_3, 8))
if (b & (1 << 8))
features |= X86_CPU_FEATURE_BMI2;

out:
Expand Down