main.cpp
#include "../../commonfiles/ymmval.h"
#include <limits>
using namespace std;
extern "C" void AvxPfpCompare(const YmmVal* a, const YmmVal* b, YmmVal c[8]);
int main(int argc, char* argv[])
{
char buff[256];
__attribute__ ((aligned(32))) YmmVal a;
__attribute__ ((aligned(32))) YmmVal b;
__attribute__ ((aligned(32))) YmmVal c[8];
const char* instr_names[8] =
{
"vcmpeqpd", "vcmpneqpd", "vcmpltpd", "vcmplepd",
"vcmpgtpd", "vcmpgepd", "vcmpordpd", "vcmpunordpd"
};
a.r64[0] = 42.125;
a.r64[1] = -36.875;
a.r64[2] = 22.95;
a.r64[3] = 3.75;
b.r64[0] = -0.0625;
b.r64[1] = -67.375;
b.r64[2] = 22.95;
b.r64[3] = numeric_limits<double>::quiet_NaN();
AvxPfpCompare(&a, &b, c);
printf("Results for AvxPackedFloatingPointCompare\n");
printf("a: %s\n", a.ToString_r64(buff, sizeof(buff), false));
printf("a: %s\n", a.ToString_r64(buff, sizeof(buff), true));
printf("\n");
printf("b: %s\n", b.ToString_r64(buff, sizeof(buff), false));
printf("b: %s\n", b.ToString_r64(buff, sizeof(buff), true));
for (int i = 0; i < 8; i++)
{
printf("\n%s results\n", instr_names[i]);
printf(" %s\n", c[i].ToString_x64(buff, sizeof(buff), false));
printf(" %s\n", c[i].ToString_x64(buff, sizeof(buff), true));
}
return 0;
}
avxpackedfloatingpointcompare.asm
; Name: avxpackedfloatingpointcompare.asm
;
; Build: g++ -c -m32 main.cpp -o main.o
; nasm -f elf32 -o avxpackedfloatingpointcompare.o avxpackedfloatingpointcompare.asm
; g++ -m32 -o avxpackedfloatingpointcompare avxpackedfloatingpointcompare.o main.o ../../commonfiles/ymmval.o
;
; Source: Modern x86 Assembly Language Programming p. 386
global AvxPfpCompare
section .text
; extern "C" void AvxPfpCompare_(const YmmVal* a, const YmmVal* b, YmmVal c[8]);
;
; Description: The following function demonstrates use of the
; x86-AVX compare instruction vcmppd.
;
; Requires: AVX
%define a [ebp+8]
%define b [ebp+12]
%define c [ebp+16]
AvxPfpCompare:
push ebp
mov ebp,esp
; Load argument values
mov eax,a ;eax = ptr to a
mov ecx,b ;ecx = ptr to b
mov edx,c ;edx = ptr to c
vmovapd ymm0,[eax] ;ymm0 = a
vmovapd ymm1,[ecx] ;ymm1 = b
; Compare for equality
vcmpeqpd ymm2,ymm0,ymm1
vmovapd [edx],ymm2
; Compare for inequality
vcmpneqpd ymm2,ymm0,ymm1
vmovapd [edx+32],ymm2
; Compare for less than
vcmpltpd ymm2,ymm0,ymm1
vmovapd [edx+64],ymm2
; Compare for less than or equal
vcmplepd ymm2,ymm0,ymm1
vmovapd [edx+96],ymm2
; Compare for greater than
vcmpgtpd ymm2,ymm0,ymm1
vmovapd [edx+128],ymm2
; Compare for greater than or equal
vcmpgepd ymm2,ymm0,ymm1
vmovapd [edx+160],ymm2
; Compare for ordered
vcmpordpd ymm2,ymm0,ymm1
vmovapd [edx+192],ymm2
; Compare for unordered
vcmpunordpd ymm2,ymm0,ymm1
vmovapd [edx+224],ymm2
; Zero upper 128-bit of all YMM registers to avoid potential x86-AVX
; to x86-SSE transition penalties.
vzeroupper
pop ebp
ret
build
g++ -c -m32 main.cpp -o main.o
nasm -f elf32 -o avxpackedfloatingpointcompare.o avxpackedfloatingpointcompare.asm
g++ -m32 -o avxpackedfloatingpointcompare avxpackedfloatingpointcompare.o main.o ../../commonfiles/ymmval.o