main.cpp
#include "../../commonfiles/xmmval.h"
#include <limits>
using namespace std;

extern "C" void SsePfpCompareFloat(const XmmVal* a, const XmmVal* b, XmmVal c[8]);

const char* CmpStr[8] =
{
    "EQ", "LT", "LE", "UNORDERED", "NE", "NLT", "NLE", "ORDERED"
};

void SsePfpCompareFloatCpp(void)
{
	__attribute__ ((aligned(16))) XmmVal a;
	__attribute__ ((aligned(16))) XmmVal b;
	__attribute__ ((aligned(16))) XmmVal c[8];
    char buff[256];

    a.r32[0] = 2.0;         b.r32[0] = 1.0;
    a.r32[1] = 7.0;         b.r32[1] = 12.0;
    a.r32[2] = -6.0;        b.r32[2] = -6.0;
    a.r32[3] = 3.0;         b.r32[3] = 8.0;

    for (int i = 0; i < 2; i++)
    {
        if (i == 1)
            a.r32[0] = numeric_limits<float>::quiet_NaN();

        SsePfpCompareFloat(&a, &b, c);

        printf("\nResults for SsePfpCompareFloat_ (Iteration %d)\n", i);
        printf("a: %s\n", a.ToString_r32(buff, sizeof(buff)));
        printf("b: %s\n", b.ToString_r32(buff, sizeof(buff)));
        printf("\n");

        for (int j = 0; j < 8; j++)
        {
            char* s =  c[j].ToString_x32(buff, sizeof(buff));
            printf("%10s: %s\n", CmpStr[j], s);
        }
    }
}

int main(int argc, char* argv[])
{
    SsePfpCompareFloatCpp();
    return 0;
}
ssepackedfloatingpointcompare.asm
; Name:     ssepackedfloatingpointcompare.asm
;
; Build:    g++ -c -m32 main.cpp -o main.o -std=c++11
;           nasm -f elf32 -o ssepackedfloatingpointcompare.o ssepackedfloatingpointcompare.asm
;           g++ -m32 -o ssepackedfloatingpointcompare ssepackedfloatingpointcompare.o main.o ../../commonfiles/xmmval.o
;
; Source:   Modern x86 Assembly Language Programming p. 244

global	SsePfpCompareFloat

section .text

; extern "C" void SsePfpCompareFloat(const XmmVal* a, const XmmVal* b, XmmVal c[8]);
;
; Description:  The following program illustrates use of the cmpps
;               instruction.
;
; Requires:     SSE2

%define a   [ebp+8]
%define b   [ebp+12]
%define c   [ebp+16]

SsePfpCompareFloat:
    push     ebp
    mov      ebp,esp
    mov      eax,a                      ;eax = ptr to 'a'
    mov      ecx,b                      ;ecx = ptr to 'b'
    mov      edx,c                      ;edx = ptr to 'c'
    movaps   xmm0,[eax]                 ;load 'a' into xmm0
    movaps   xmm1,[ecx]                 ;load 'b' into xmm1
; Perform packed EQUAL compare
    movaps   xmm2,xmm0
    cmpeqps  xmm2,xmm1
    movdqa   [edx],xmm2
; Perform packed LESS THAN compare
    movaps   xmm2,xmm0
    cmpltps  xmm2,xmm1
    movdqa   [edx+16],xmm2
; Perform packed LESS THAN OR EQUAL compare
    movaps   xmm2,xmm0
    cmpleps  xmm2,xmm1
    movdqa   [edx+32],xmm2
; Perform packed UNORDERED compare
    movaps   xmm2,xmm0
    cmpunordps xmm2,xmm1
    movdqa   [edx+48],xmm2
; Perform packed NOT EQUAL compare
    movaps   xmm2,xmm0
    cmpneqps xmm2,xmm1
    movdqa   [edx+64],xmm2
; Perform packed NOT LESS THAN compare
    movaps   xmm2,xmm0
    cmpnltps xmm2,xmm1
    movdqa   [edx+80],xmm2
; Perform packed NOT LESS THAN OR EQUAL compare
    movaps   xmm2,xmm0
    cmpnleps xmm2,xmm1
    movdqa   [edx+96],xmm2
; Perform packed ORDERED compare
    movaps   xmm2,xmm0
    cmpordps xmm2,xmm1
    movdqa   [edx+112],xmm2
    pop      ebp
    ret
build
g++ -c -m32 main.cpp -o main.o -std=c++11
nasm -f elf32 -o ssepackedfloatingpointcompare.o ssepackedfloatingpointcompare.asm
g++ -m32 -o ssepackedfloatingpointcompare ssepackedfloatingpointcompare.o main.o ../../commonfiles/xmmval.o