main.cpp
#include "../../commonfiles/xmmval.h"
#include <limits>
using namespace std;
extern "C" void SsePfpCompareFloat(const XmmVal* a, const XmmVal* b, XmmVal c[8]);
const char* CmpStr[8] =
{
"EQ", "LT", "LE", "UNORDERED", "NE", "NLT", "NLE", "ORDERED"
};
void SsePfpCompareFloatCpp(void)
{
__attribute__ ((aligned(16))) XmmVal a;
__attribute__ ((aligned(16))) XmmVal b;
__attribute__ ((aligned(16))) XmmVal c[8];
char buff[256];
a.r32[0] = 2.0; b.r32[0] = 1.0;
a.r32[1] = 7.0; b.r32[1] = 12.0;
a.r32[2] = -6.0; b.r32[2] = -6.0;
a.r32[3] = 3.0; b.r32[3] = 8.0;
for (int i = 0; i < 2; i++)
{
if (i == 1)
a.r32[0] = numeric_limits<float>::quiet_NaN();
SsePfpCompareFloat(&a, &b, c);
printf("\nResults for SsePfpCompareFloat_ (Iteration %d)\n", i);
printf("a: %s\n", a.ToString_r32(buff, sizeof(buff)));
printf("b: %s\n", b.ToString_r32(buff, sizeof(buff)));
printf("\n");
for (int j = 0; j < 8; j++)
{
char* s = c[j].ToString_x32(buff, sizeof(buff));
printf("%10s: %s\n", CmpStr[j], s);
}
}
}
int main(int argc, char* argv[])
{
SsePfpCompareFloatCpp();
return 0;
}
ssepackedfloatingpointcompare.asm
; Name: ssepackedfloatingpointcompare.asm
;
; Build: g++ -c -m32 main.cpp -o main.o -std=c++11
; nasm -f elf32 -o ssepackedfloatingpointcompare.o ssepackedfloatingpointcompare.asm
; g++ -m32 -o ssepackedfloatingpointcompare ssepackedfloatingpointcompare.o main.o ../../commonfiles/xmmval.o
;
; Source: Modern x86 Assembly Language Programming p. 244
global SsePfpCompareFloat
section .text
; extern "C" void SsePfpCompareFloat(const XmmVal* a, const XmmVal* b, XmmVal c[8]);
;
; Description: The following program illustrates use of the cmpps
; instruction.
;
; Requires: SSE2
%define a [ebp+8]
%define b [ebp+12]
%define c [ebp+16]
SsePfpCompareFloat:
push ebp
mov ebp,esp
mov eax,a ;eax = ptr to 'a'
mov ecx,b ;ecx = ptr to 'b'
mov edx,c ;edx = ptr to 'c'
movaps xmm0,[eax] ;load 'a' into xmm0
movaps xmm1,[ecx] ;load 'b' into xmm1
; Perform packed EQUAL compare
movaps xmm2,xmm0
cmpeqps xmm2,xmm1
movdqa [edx],xmm2
; Perform packed LESS THAN compare
movaps xmm2,xmm0
cmpltps xmm2,xmm1
movdqa [edx+16],xmm2
; Perform packed LESS THAN OR EQUAL compare
movaps xmm2,xmm0
cmpleps xmm2,xmm1
movdqa [edx+32],xmm2
; Perform packed UNORDERED compare
movaps xmm2,xmm0
cmpunordps xmm2,xmm1
movdqa [edx+48],xmm2
; Perform packed NOT EQUAL compare
movaps xmm2,xmm0
cmpneqps xmm2,xmm1
movdqa [edx+64],xmm2
; Perform packed NOT LESS THAN compare
movaps xmm2,xmm0
cmpnltps xmm2,xmm1
movdqa [edx+80],xmm2
; Perform packed NOT LESS THAN OR EQUAL compare
movaps xmm2,xmm0
cmpnleps xmm2,xmm1
movdqa [edx+96],xmm2
; Perform packed ORDERED compare
movaps xmm2,xmm0
cmpordps xmm2,xmm1
movdqa [edx+112],xmm2
pop ebp
ret
build
g++ -c -m32 main.cpp -o main.o -std=c++11
nasm -f elf32 -o ssepackedfloatingpointcompare.o ssepackedfloatingpointcompare.asm
g++ -m32 -o ssepackedfloatingpointcompare ssepackedfloatingpointcompare.o main.o ../../commonfiles/xmmval.o