main.cpp
#include <stdio.h>
#define _USE_MATH_DEFINES
#include <math.h>
#include "../commonfiles/miscdefs.h"

// Simple union for data exchange
union XmmScalar
{
    float r32;
    double r64;
    Uint32 i32;
    Uint64 i64;
};

// The order of values below must match the jump table
// that's defined in SseScalarFloatingPointConversions_.asm.
enum CvtOp : unsigned int
{
    Cvtsi2ss,       // Int32 to float
    Cvtss2si,       // float to Int32
    Cvtsi2sd,       // Int32 to double
    Cvtsd2si,       // double to Int32
    Cvtss2sd,       // float to double
    Cvtsd2ss,       // double to float
};

// Enumerated type for x86-SSE rounding mode
enum SseRm : unsigned int
{
    Nearest, Down, Up, Truncate
};

extern "C" Uint32 SseGetMxcsr(void);
extern "C" Uint32 SseSetMxcsr(Uint32 mxcsr);

extern "C" SseRm SseGetMxcsrRoundingMode(void);
extern "C" void SseSetMxcsrRoundingMode(SseRm rm);
extern "C" bool SseSfpConversion(XmmScalar* a, XmmScalar* b, CvtOp cvt_op);

const SseRm SseRmVals[] = {SseRm::Nearest, SseRm::Down, SseRm::Up, SseRm::Truncate};
const char* SseRmStrings[] = {"Nearest", "Down", "Up", "Truncate"};

void SseSfpConversions(void)
{
    XmmScalar src1, src2;
    XmmScalar des1, des2;
    const int num_rm = sizeof(SseRmVals) / sizeof (SseRm);
    Uint32 mxcsr_save = SseGetMxcsr();

    src1.r32 = (float)M_PI;
    src2.r64 = -M_E;

    for (int i = 0; i < num_rm; i++)
    {
        SseRm rm1 = SseRmVals[i];
        SseRm rm2;

        SseSetMxcsrRoundingMode(rm1);
        rm2 = SseGetMxcsrRoundingMode();
        
        if (rm2 != rm1)
        {
            printf("  SSE rounding mode change failed)\n");
            printf("  rm1: %d  rm2: %d\n", rm1, rm2);
        }
        else
        {
            printf("X86-SSE rounding mode = %s\n", SseRmStrings[rm2]);

            SseSfpConversion(&des1, &src1, CvtOp::Cvtss2si);
            printf("  cvtss2si: %12lf --> %6d\n", src1.r32, des1.i32);

            SseSfpConversion(&des2, &src2, CvtOp::Cvtsd2si);
            printf("  cvtsd2si: %12lf --> %6d\n", src2.r64, des2.i32);
        }
    }

    SseSetMxcsr(mxcsr_save);
}
int main(int argc, char* argv[])
{
    SseSfpConversions();
    return 0;
}
ssescalarfloatingpointconversions.asm
; Name:			ssescalarfloatingpointconversions.asm
;
; Build:		g++ -c -m32 main.cpp -o main.o -std=c++11
;				nasm -f elf32 -o ssescalarfloatingpointconversions.o ssescalarfloatingpointconversions.asm
;				g++ -m32 -o ssescalarfloatingpointconversions ssescalarfloatingpointconversions.o main.o
;
; Source:		Modern x86 Assembly Language Programming p. 217

global	SseSfpConversion
global	SseGetMxcsr
global	SseSetMxcsr
global	SseGetMxcsrRoundingMode
global	SseSetMxcsrRoundingMode

section .data

; The order of values in following table must match the enum CvtOp
; that's defined in SseScalarFloatingPointConversions.cpp
align 4

	CvtOpTable:  	 dd SseSfpConversion.SseCvtsi2ss
			 dd SseSfpConversion.SseCvtss2si
			 dd SseSfpConversion.SseCvtsi2sd
			 dd SseSfpConversion.SseCvtsd2si
			 dd SseSfpConversion.SseCvtss2sd
			 dd SseSfpConversion.SseCvtsd2ss
	CvtOpTableCount: equ ($ - CvtOpTable) / 4		; 4 is size dword
	MxcsrRcMask: 	 equ 9fffh                      ;bit pattern for MXCSR.RC
	MxcsrRcShift:	 equ 13                         ;shift count for MXCSR.RC

section .text
; extern "C" bool SseSfpConversion(XmmScalar* des, const XmmScalar* src, CvtOp cvt_op)
;
; Description:  The following function demonstrates use of the x86-SSE
;               scalar floating-point conversion instructions.
;
; Requires:     SSE2

%define des [ebp+8] %define src [ebp+12] %define cvt_op [ebp+16]
SseSfpConversion: push ebp mov ebp,esp ; Load argument values and make sure cvt_op is valid mov eax,cvt_op ;cvt_op mov ecx,src ;ptr to src mov edx,des ;ptr to des cmp eax,CvtOpTableCount jae .badCvtOp ;jump if cvt_op is invalid jmp [CvtOpTable+eax*4] ;jump to specified conversion .SseCvtsi2ss: mov eax,[ecx] ;load integer value cvtsi2ss xmm0,eax ;convert to float movss dword[edx],xmm0 ;save result mov eax,1 pop ebp ret .SseCvtss2si: movss xmm0,dword[ecx] ;load float value cvtss2si eax,xmm0 ;convert to integer mov [edx],eax ;save result mov eax,1 pop ebp ret .SseCvtsi2sd: mov eax,[ecx] ;load integer value cvtsi2sd xmm0,eax ;convert to double movsd qword[edx],xmm0 ;save result mov eax,1 pop ebp ret .SseCvtsd2si: movsd xmm0,qword[ecx] ;load double value cvtsd2si eax,xmm0 ;convert to integer mov [edx],eax ;save result mov eax,1 pop ebp ret .SseCvtss2sd: movss xmm0,dword[ecx] ;load float value cvtss2sd xmm1,xmm0 ;convert to double movsd qword[edx],xmm1 ;save result mov eax,1 pop ebp ret .SseCvtsd2ss: movsd xmm0,qword[ecx] ;load double value cvtsd2ss xmm1,xmm0 ;convert to float movss dword[edx],xmm1 ;save result mov eax,1 pop ebp ret .badCvtOp: xor eax,eax ;set error return code pop ebp ret
; extern "C" Uint32 SseGetMxcsr(void); ; ; Description: The following function obtains the current contents of ; the MXCSR register. ; ; Returns: Contents of MXCSR
SseGetMxcsr: push ebp mov ebp,esp sub esp,4 stmxcsr [ebp-4] ;save mxcsr register mov eax,[ebp-4] ;move to eax for return mov esp,ebp pop ebp ret ; extern "C" Uint32 SseSetMxcsr(Uint32 mxcsr); ; ; Description: The following function loads a new value into the ; MXCSR register. SseSetMxcsr: %define mxcsr [ebp+8] push ebp mov ebp,esp sub esp,4 mov eax,mxcsr ;eax = new mxcsr value and eax,0xffff ;bits mxcsr[31:16] must be 0 mov [ebp-4],eax ldmxcsr [ebp-4] ;load mxcsr register mov esp,ebp pop ebp ret
; extern "C" SseRm SseGetMxcsrRoundingMode(void); ; ; Description: The following function obtains the current x86-SSE ; floating-point rounding mode from MXCSR.RC. ; ; Returns: Current x86-SSE rounding mode. SseGetMxcsrRoundingMode: push ebp mov ebp,esp sub esp,4 stmxcsr [ebp-4] ;save mxcsr register mov eax,[ebp-4] shr eax,MxcsrRcShift ;eax[1:0] = MXCSR.RC bits and eax,3 ;masked out unwanted bits mov esp,ebp pop ebp ret
; extern "C" void SseSetMxcsrRoundingMode(SseRm rm); ; ; Description: The following function updates the rounding mode ; value in MXCSR.RC. SseSetMxcsrRoundingMode: %define rm [ebp+8] push ebp mov ebp,esp sub esp,4 mov ecx,rm ;ecx = rm and ecx,3 ;masked out unwanted bits shl ecx,MxcsrRcShift ;ecx[14:13] = rm stmxcsr [ebp-4] ;save current MXCSR mov eax,[ebp-4] and eax,MxcsrRcMask ;masked out old MXCSR.RC bits or eax,ecx ;insert new MXCSR.RC bits mov [ebp-4],eax ldmxcsr [ebp-4] ;load updated MXCSR mov esp,ebp pop ebp ret
build
g++ -c -m32 main.cpp -o main.o -std=c++11
nasm -f elf32 -o ssescalarfloatingpointconversions.o ssescalarfloatingpointconversions.asm
g++ -m32 -o ssescalarfloatingpointconversions ssescalarfloatingpointconversions.o main.o