main.cpp
#include "../../commonfiles/xmmval.h"
#define _USE_MATH_DEFINES
#include <math.h>

// The order of values in the following enum must match the table
// that's defined in SsePackedFloatingPointConversions_.asm.
enum CvtOp : unsigned int
{
    Cvtdq2ps,           // Packed signed doubleword to SPFP
    Cvtdq2pd,           // Packed signed doubleword to DPFP
    Cvtps2dq,           // Packed SPFP to signed doubleword
    Cvtpd2dq,           // Packed DPFP to signed doubleword
    Cvtps2pd,           // Packed SPFP to DPFP
    Cvtpd2ps            // Packed DPFP to SPFP
};

extern "C" void SsePfpConvert(const XmmVal* a, XmmVal* b, CvtOp cvt_op);

void SsePfpConversions32(void)
{
	__attribute__ ((aligned(16))) XmmVal a;
	__attribute__ ((aligned(16))) XmmVal b;
    char buff[256];

    a.i32[0] = 10;
    a.i32[1] = -500;
    a.i32[2] = 600;
    a.i32[3] = -1024;
    SsePfpConvert(&a, &b, CvtOp::Cvtdq2ps);
    printf("\nResults for CvtOp::Cvtdq2ps\n");
    printf("  a: %s\n", a.ToString_i32(buff, sizeof(buff)));
    printf("  b: %s\n", b.ToString_r32(buff, sizeof(buff)));

    a.r32[0] = 1.0f / 3.0f;
    a.r32[1] = 2.0f / 3.0f;
    a.r32[2] = -a.r32[0] * 2.0f;
    a.r32[3] = -a.r32[1] * 2.0f;
    SsePfpConvert(&a, &b, CvtOp::Cvtps2dq);
    printf("\nResults for CvtOp::Cvtps2dq\n");
    printf("  a: %s\n", a.ToString_r32(buff, sizeof(buff)));
    printf("  b: %s\n", b.ToString_i32(buff, sizeof(buff)));

    // cvtps2pd converts the two low-order SPFP values of 'a'
    a.r32[0] = 1.0f / 7.0f;
    a.r32[1] = 2.0f / 9.0f;
    a.r32[2] = 0;
    a.r32[3] = 0;
    SsePfpConvert(&a, &b, CvtOp::Cvtps2pd);
    printf("\nResults for CvtOp::Cvtps2pd\n");
    printf("  a: %s\n", a.ToString_r32(buff, sizeof(buff)));
    printf("  b: %s\n", b.ToString_r64(buff, sizeof(buff)));
}

void SsePfpConversions64(void)
{
	__attribute__ ((aligned(16))) XmmVal a;
	__attribute__ ((aligned(16))) XmmVal b;
    char buff[256];

    // cvtdq2pd converts the two low-order doubleword integers of 'a'
    a.i32[0] = 10;
    a.i32[1] = -20;
    a.i32[2] = 0;
    a.i32[3] = 0;
    SsePfpConvert(&a, &b, CvtOp::Cvtdq2pd);
    printf("\nResults for CvtOp::Cvtdq2pd\n");
    printf("  a: %s\n", a.ToString_i32(buff, sizeof(buff)));
    printf("  b: %s\n", b.ToString_r64(buff, sizeof(buff)));

    // cvtpd2dq sets the two high-order doublewords of 'b' to zero
    a.r64[0] = M_PI;
    a.r64[1] = M_E;
    SsePfpConvert(&a, &b, CvtOp::Cvtpd2dq);
    printf("\nResults for CvtOp::Cvtpd2dq\n");
    printf("  a: %s\n", a.ToString_r64(buff, sizeof(buff)));
    printf("  b: %s\n", b.ToString_i32(buff, sizeof(buff)));

    // cvtpd2ps sets the two high-order SPFP values of 'b' to zero
    a.r64[0] = M_SQRT2;
    a.r64[1] = M_SQRT1_2;
    SsePfpConvert(&a, &b, CvtOp::Cvtpd2ps);
    printf("\nResults for CvtOp::Cvtpd2ps\n");
    printf("  a: %s\n", a.ToString_r64(buff, sizeof(buff)));
    printf("  b: %s\n", b.ToString_r32(buff, sizeof(buff)));
}

int main(int argc, char* argv[])
{
    SsePfpConversions32();
    SsePfpConversions64();
    return 0;
}
ssepackedfloatingpointconversions.asm
; Name:     ssepackedfloatingpointconversions.asm
;
; Build:    g++ -c -m32 main.cpp -o main.o -std=c++11
;           nasm -f elf32 -o ssepackedfloatingpointconversions.o ssepackedfloatingpointconversions.asm
;           g++ -m32 -o ssepackedfloatingpointconversions ssepackedfloatingpointconversions.o main.o ../../commonfiles/xmmval.o
;
; Source:   Modern x86 Assembly Language Programming p. 249

global	SsePfpConvert

section .data
; The order of values in the following table must match the enum CvtOp
; that's defined in SsePackedFloatingPointConversions.cpp.
align 4

CvtOpTable: dd SsePfpConvert.sseCvtdq2ps
            dd SsePfpConvert.sseCvtdq2pd
            dd SsePfpConvert.sseCvtps2dq
            dd SsePfpConvert.sseCvtpd2dq
            dd SsePfpConvert.sseCvtps2pd
            dd SsePfpConvert.sseCvtpd2ps

CvtOpTableCount: equ ($ - CvtOpTable) / 4  ; size dword = 4

section .text

; extern "C" void SsePfpConvert(const XmmVal* a, XmmVal* b, CvtOp cvt_op);
;
; Description:  The following function demonstrates use of the packed
;               floating-point conversion instructions.
;
; Requires:     SSE2

SsePfpConvert:
        push     ebp
        mov      ebp,esp
; Load arguments and make sure 'cvt_op' is valid
        mov      eax,[ebp+8]                     ;eax = 'a'
        mov      ecx,[ebp+12]                    ;ecx = 'b'
        mov      edx,[ebp+16]                    ;edx =cvt_op
        cmp      edx,CvtOpTableCount
        jae      .badCvtOp
        jmp      [CvtOpTable+edx*4]              ;jump to specified conversion
; Convert packed doubleword signed integers to packed SPFP values
.sseCvtdq2ps:
        movdqa   xmm0,[eax]
        cvtdq2ps xmm1,xmm0
        movaps   [ecx],xmm1
        pop      ebp
        ret
; Convert packed doubleword signed integers to packed DPFP values
.sseCvtdq2pd:
        movdqa   xmm0,[eax]
        cvtdq2pd xmm1,xmm0
        movapd   [ecx],xmm1
        pop      ebp
        ret
; Convert packed SPFP values to packed doubleword signed integers
.sseCvtps2dq:
        movaps   xmm0,[eax]
        cvtps2dq xmm1,xmm0
        movdqa   [ecx],xmm1
        pop      ebp
        ret
; Convert packed DPFP values to packed doubleword signed integers
.sseCvtpd2dq:
        movapd   xmm0,[eax]
        cvtpd2dq xmm1,xmm0
        movdqa   [ecx],xmm1
        pop      ebp
        ret
; Convert packed SPFP to packed DPFP
.sseCvtps2pd:
        movaps   xmm0,[eax]
        cvtps2pd xmm1,xmm0
        movapd   [ecx],xmm1
        pop      ebp
        ret
; Convert packed DPFP to packed SPFP
.sseCvtpd2ps:
        movapd   xmm0,[eax]
        cvtpd2ps xmm1,xmm0
        movaps   [ecx],xmm1
        pop      ebp
        ret
.badCvtOp:
        pop      ebp
        ret

<

build
g++ -c -m32 main.cpp -o main.o -std=c++11
nasm -f elf32 -o ssepackedfloatingpointconversions.o ssepackedfloatingpointconversions.asm
g++ -m32 -o ssepackedfloatingpointconversions ssepackedfloatingpointconversions.o main.o ../../commonfiles/xmmval.o