main.cpp
#include <stdio.h>
#include "../../commonfiles/miscdefs.h"
#include <memory.h>
// This structure is used to save cpuid instruction results. It must
// match the structure that's defined in AvxCpuid_.asm.
typedef struct
{
Uint32 EAX;
Uint32 EBX;
Uint32 ECX;
Uint32 EDX;
} CpuidRegs;
// This structure contains status flags for cpuid reportable features
// used in this book.
typedef struct
{
// General information
Uint32 MaxEAX; // Maximum EAX value supported by cpuid
char VendorId[13]; // Processor vendor id string
// Processor feature flags. Set to 'true' if feature extension
// or instruction group is available for use.
bool SSE;
bool SSE2;
bool SSE3;
bool SSSE3;
bool SSE4_1;
bool SSE4_2;
bool AVX;
bool AVX2;
bool F16C;
bool FMA;
bool POPCNT;
bool BMI1;
bool BMI2;
bool LZCNT;
bool MOVBE;
// OS enabled feature information
bool OSXSAVE; // True if XSAVE feature set is enabled by the OS
bool SSE_STATE; // True if XMM state is enabled by the OS
bool AVX_STATE; // True if YMM state is enabled by the OS
} CpuidFeatures;
extern "C" Uint32 Cpuid(Uint32 r_eax, Uint32 r_ecx, CpuidRegs* out);
extern "C" void Xgetbv(Uint32 r_ecx, Uint32* r_eax, Uint32* r_edx);
// This function will not work on older CPUs, especially
// those introduced before 2006. It has been tested using
// only Windows 7 (SP1) and Windows 8.1.
void GetCpuidFeatures(CpuidFeatures* cf)
{
CpuidRegs r_out;
memset(cf, 0, sizeof(CpuidFeatures));
// Get MaxEAX and VendorID
Cpuid(0, 0, &r_out);
cf->MaxEAX = r_out.EAX;
*(Uint32 *)(cf->VendorId + 0) = r_out.EBX;
*(Uint32 *)(cf->VendorId + 4) = r_out.EDX;
*(Uint32 *)(cf->VendorId + 8) = r_out.ECX;
cf->VendorId[12] = '\0';
// Quit if processor is too old
if (cf->MaxEAX < 10)
return;
// Get CPUID.01H feature flags
Cpuid(1, 0, &r_out);
Uint32 cpuid01_ecx = r_out.ECX;
Uint32 cpuid01_edx = r_out.EDX;
// Get CPUID (EAX = 07H, ECX = 00H) feature flags
Cpuid(7, 0, &r_out);
Uint32 cpuid07_ebx = r_out.EBX;
// CPUID.01H:EDX.SSE[bit 25]
cf->SSE = (cpuid01_edx & (0x1 << 25)) ? true : false;
// CPUID.01H:EDX.SSE2[bit 26]
if (cf->SSE)
cf->SSE2 = (cpuid01_edx & (0x1 << 26)) ? true : false;
// CPUID.01H:ECX.SSE3[bit 0]
if (cf->SSE2)
cf->SSE3 = (cpuid01_ecx & (0x1 << 0)) ? true : false;
// CPUID.01H:ECX.SSSE3[bit 9]
if (cf->SSE3)
cf->SSSE3 = (cpuid01_ecx & (0x1 << 9)) ? true : false;
// CPUID.01H:ECX.SSE4.1[bit 19]
if (cf->SSSE3)
cf->SSE4_1 = (cpuid01_ecx & (0x1 << 19)) ? true : false;
// CPUID.01H:ECX.SSE4.2[bit 20]
if (cf->SSE4_1)
cf->SSE4_2 = (cpuid01_ecx & (0x1 << 20)) ? true : false;
// CPUID.01H:ECX.POPCNT[bit 23]
if (cf->SSE4_2)
cf->POPCNT = (cpuid01_ecx & (0x1 << 23)) ? true : false;
// CPUID.01H:ECX.OSXSAVE[bit 27]
cf->OSXSAVE = (cpuid01_ecx & (0x1 << 27)) ? true : false;
// Test OSXSAVE status to verify XGETBV is enabled
if (cf->OSXSAVE)
{
// Use XGETBV to obtain following information
// XSAVE uses SSE state if (XCR0[1] == 1) is true
// XSAVE uses AVX state if (XCR0[2] == 1) is true
Uint32 xgetbv_eax, xgetbv_edx;
Xgetbv(0, &xgetbv_eax, &xgetbv_edx);
cf->SSE_STATE = (xgetbv_eax & (0x1 << 1)) ? true : false;
cf->AVX_STATE = (xgetbv_eax & (0x1 << 2)) ? true : false;
// Is SSE and AVX state information supported by the OS?
if (cf->SSE_STATE && cf->AVX_STATE)
{
// CPUID.01H:ECX.AVX[bit 28] = 1
cf->AVX = (cpuid01_ecx & (0x1 << 28)) ? true : false;
if (cf->AVX)
{
// CPUID.01H:ECX.F16C[bit 29]
cf->F16C = (cpuid01_ecx & (0x1 << 29)) ? true : false;
// CPUID.01H:ECX.FMA[bit 12]
cf->FMA = (cpuid01_ecx & (0x1 << 12)) ? true : false;
// CPUID.(EAX = 07H, ECX = 00H):EBX.AVX2[bit 5]
cf->AVX2 = (cpuid07_ebx & (0x1 << 5)) ? true : false;
}
}
}
// CPUID.(EAX = 07H, ECX = 00H):EBX.BMI1[bit 3]
cf->BMI1 = (cpuid07_ebx & (0x1 << 3)) ? true : false;
// CPUID.(EAX = 07H, ECX = 00H):EBX.BMI2[bit 8]
cf->BMI2 = (cpuid07_ebx & (0x1 << 8)) ? true : false;
// CPUID.80000001H:ECX.LZCNT[bit 5]
Cpuid(0x80000001, 0, &r_out);
cf->LZCNT = (r_out.ECX & (0x1 << 5)) ? true : false;
// Get MOVBE
// CPUID.01H:ECX.MOVBE[bit 22]
cf->MOVBE = cpuid01_ecx & (0x1 << 22) ? true : false;
}
int main(int argc, char* argv[])
{
CpuidFeatures cf;
GetCpuidFeatures(&cf);
printf("Results for AvxCpuid\n");
printf("MaxEAX: %d\n", cf.MaxEAX);
printf("VendorId: %s\n", cf.VendorId);
printf("SSE: %d\n", cf.SSE);
printf("SSE2: %d\n", cf.SSE2);
printf("SSE3: %d\n", cf.SSE3);
printf("SSSE3: %d\n", cf.SSSE3);
printf("SSE4_1: %d\n", cf.SSE4_1);
printf("SSE4_2: %d\n", cf.SSE4_2);
printf("POPCNT: %d\n", cf.POPCNT);
printf("AVX: %d\n", cf.AVX);
printf("F16C: %d\n", cf.F16C);
printf("FMA: %d\n", cf.FMA);
printf("AVX2: %d\n", cf.AVX2);
printf("BMI1 %d\n", cf.BMI1);
printf("BMI2 %d\n", cf.BMI2);
printf("LZCNT %d\n", cf.LZCNT);
printf("MOVBE %d\n", cf.MOVBE);
printf("\n");
printf("OSXSAVE %d\n", cf.OSXSAVE);
printf("SSE_STATE %d\n", cf.SSE_STATE);
printf("AVX_STATE %d\n", cf.AVX_STATE);
return 0;
}
avxcpuid.asm
; Name: avxcpuid.asm
;
; Build: g++ -c -m32 main.cpp -o main.o
; nasm -f elf32 -o avxcpuid.o avxcpuid.asm
; g++ -m32 -o avxcpuid avxcpuid.o main.o
;
; Source: Modern x86 Assembly Language Programming p. 439
global Cpuid
global Xgetbv
; This structure must match the structure that's defined
; in AvxCpuid.cpp
struc CpuidRegs
.RegEAX: resd 1
.RegEBX: resd 1
.RegECX: resd 1
.RegEDX: resd 1
endstruc
section .text
; extern "C" Uint32 Cpuid(Uint32 r_eax, Uint32 r_ecx, CpuidRegs* r_out);
;
; Description: The following function uses the CPUID instruction to
; query processor identification and feature information.
;
; Returns: eax == 0 Unsupported CPUID leaf
; eax != 0 Supported CPUID leaf
;
; The return code is valid only if r_eax <= MaxEAX.
Cpuid:
push ebp
mov ebp,esp
push ebx
push esi
; Load eax and ecx with provided values, then use cpuid
mov eax,[ebp+8]
mov ecx,[ebp+12]
cpuid
; Save results
mov esi,[ebp+16]
mov [esi+CpuidRegs.RegEAX],eax
mov [esi+CpuidRegs.RegEBX],ebx
mov [esi+CpuidRegs.RegECX],ecx
mov [esi+CpuidRegs.RegEDX],edx
; Test for unsupported CPUID leaf
or eax,ebx
or ecx,edx
or eax,ecx ;eax = return code
pop esi
pop ebx
pop ebp
ret
; extern "C" void Xgetbv(Uint32 r_ecx, Uint32* r_eax, Uint32* r_edx);
;
; Description: The following function uses the XGETBV instruction to
; obtain the contents of the extended control register
; that's specified by r_ecx.
;
; Notes: A processor exception will occur if r_ecx is invalid
; or if the XSAVE feature set is disabled.
Xgetbv:
push ebp
mov ebp,esp
mov ecx,[ebp+8] ;ecx = extended control reg
xgetbv
mov ecx,[ebp+12]
mov [ecx],eax ;save result (low dword)
mov ecx,[ebp+16]
mov [ecx],edx ;save result (high dword)
pop ebp
ret
build
g++ -c -m32 main.cpp -o main.o
nasm -f elf32 -o avxcpuid.o avxcpuid.asm
g++ -m32 -o avxcpuid avxcpuid.o main.o