main.cpp
#include <stdio.h>
#define _USE_MATH_DEFINES
#include <math.h>
#include "../commonfiles/miscdefs.h"
// Simple union for data exchange
union XmmScalar
{
float r32;
double r64;
Uint32 i32;
Uint64 i64;
};
// The order of values below must match the jump table
// that's defined in SseScalarFloatingPointConversions_.asm.
enum CvtOp : unsigned int
{
Cvtsi2ss, // Int32 to float
Cvtss2si, // float to Int32
Cvtsi2sd, // Int32 to double
Cvtsd2si, // double to Int32
Cvtss2sd, // float to double
Cvtsd2ss, // double to float
};
// Enumerated type for x86-SSE rounding mode
enum SseRm : unsigned int
{
Nearest, Down, Up, Truncate
};
extern "C" Uint32 SseGetMxcsr(void);
extern "C" Uint32 SseSetMxcsr(Uint32 mxcsr);
extern "C" SseRm SseGetMxcsrRoundingMode(void);
extern "C" void SseSetMxcsrRoundingMode(SseRm rm);
extern "C" bool SseSfpConversion(XmmScalar* a, XmmScalar* b, CvtOp cvt_op);
const SseRm SseRmVals[] = {SseRm::Nearest, SseRm::Down, SseRm::Up, SseRm::Truncate};
const char* SseRmStrings[] = {"Nearest", "Down", "Up", "Truncate"};
void SseSfpConversions(void)
{
XmmScalar src1, src2;
XmmScalar des1, des2;
const int num_rm = sizeof(SseRmVals) / sizeof (SseRm);
Uint32 mxcsr_save = SseGetMxcsr();
src1.r32 = (float)M_PI;
src2.r64 = -M_E;
for (int i = 0; i < num_rm; i++)
{
SseRm rm1 = SseRmVals[i];
SseRm rm2;
SseSetMxcsrRoundingMode(rm1);
rm2 = SseGetMxcsrRoundingMode();
if (rm2 != rm1)
{
printf(" SSE rounding mode change failed)\n");
printf(" rm1: %d rm2: %d\n", rm1, rm2);
}
else
{
printf("X86-SSE rounding mode = %s\n", SseRmStrings[rm2]);
SseSfpConversion(&des1, &src1, CvtOp::Cvtss2si);
printf(" cvtss2si: %12lf --> %6d\n", src1.r32, des1.i32);
SseSfpConversion(&des2, &src2, CvtOp::Cvtsd2si);
printf(" cvtsd2si: %12lf --> %6d\n", src2.r64, des2.i32);
}
}
SseSetMxcsr(mxcsr_save);
}
int main(int argc, char* argv[])
{
SseSfpConversions();
return 0;
}
ssescalarfloatingpointconversions.asm
; Name: ssescalarfloatingpointconversions.asm
;
; Build: g++ -c -m32 main.cpp -o main.o -std=c++11
; nasm -f elf32 -o ssescalarfloatingpointconversions.o ssescalarfloatingpointconversions.asm
; g++ -m32 -o ssescalarfloatingpointconversions ssescalarfloatingpointconversions.o main.o
;
; Source: Modern x86 Assembly Language Programming p. 217
global SseSfpConversion
global SseGetMxcsr
global SseSetMxcsr
global SseGetMxcsrRoundingMode
global SseSetMxcsrRoundingMode
section .data
; The order of values in following table must match the enum CvtOp
; that's defined in SseScalarFloatingPointConversions.cpp
align 4
CvtOpTable: dd SseSfpConversion.SseCvtsi2ss
dd SseSfpConversion.SseCvtss2si
dd SseSfpConversion.SseCvtsi2sd
dd SseSfpConversion.SseCvtsd2si
dd SseSfpConversion.SseCvtss2sd
dd SseSfpConversion.SseCvtsd2ss
CvtOpTableCount: equ ($ - CvtOpTable) / 4 ; 4 is size dword
MxcsrRcMask: equ 9fffh ;bit pattern for MXCSR.RC
MxcsrRcShift: equ 13 ;shift count for MXCSR.RC
section .text
; extern "C" bool SseSfpConversion(XmmScalar* des, const XmmScalar* src, CvtOp cvt_op)
;
; Description: The following function demonstrates use of the x86-SSE
; scalar floating-point conversion instructions.
;
; Requires: SSE2
%define des [ebp+8]
%define src [ebp+12]
%define cvt_op [ebp+16]
SseSfpConversion:
push ebp
mov ebp,esp
; Load argument values and make sure cvt_op is valid
mov eax,cvt_op ;cvt_op
mov ecx,src ;ptr to src
mov edx,des ;ptr to des
cmp eax,CvtOpTableCount
jae .badCvtOp ;jump if cvt_op is invalid
jmp [CvtOpTable+eax*4] ;jump to specified conversion
.SseCvtsi2ss:
mov eax,[ecx] ;load integer value
cvtsi2ss xmm0,eax ;convert to float
movss dword[edx],xmm0 ;save result
mov eax,1
pop ebp
ret
.SseCvtss2si:
movss xmm0,dword[ecx] ;load float value
cvtss2si eax,xmm0 ;convert to integer
mov [edx],eax ;save result
mov eax,1
pop ebp
ret
.SseCvtsi2sd:
mov eax,[ecx] ;load integer value
cvtsi2sd xmm0,eax ;convert to double
movsd qword[edx],xmm0 ;save result
mov eax,1
pop ebp
ret
.SseCvtsd2si:
movsd xmm0,qword[ecx] ;load double value
cvtsd2si eax,xmm0 ;convert to integer
mov [edx],eax ;save result
mov eax,1
pop ebp
ret
.SseCvtss2sd:
movss xmm0,dword[ecx] ;load float value
cvtss2sd xmm1,xmm0 ;convert to double
movsd qword[edx],xmm1 ;save result
mov eax,1
pop ebp
ret
.SseCvtsd2ss:
movsd xmm0,qword[ecx] ;load double value
cvtsd2ss xmm1,xmm0 ;convert to float
movss dword[edx],xmm1 ;save result
mov eax,1
pop ebp
ret
.badCvtOp:
xor eax,eax ;set error return code
pop ebp
ret
; extern "C" Uint32 SseGetMxcsr(void);
;
; Description: The following function obtains the current contents of
; the MXCSR register.
;
; Returns: Contents of MXCSR
SseGetMxcsr:
push ebp
mov ebp,esp
sub esp,4
stmxcsr [ebp-4] ;save mxcsr register
mov eax,[ebp-4] ;move to eax for return
mov esp,ebp
pop ebp
ret
; extern "C" Uint32 SseSetMxcsr(Uint32 mxcsr);
;
; Description: The following function loads a new value into the
; MXCSR register.
SseSetMxcsr:
%define mxcsr [ebp+8]
push ebp
mov ebp,esp
sub esp,4
mov eax,mxcsr ;eax = new mxcsr value
and eax,0xffff ;bits mxcsr[31:16] must be 0
mov [ebp-4],eax
ldmxcsr [ebp-4] ;load mxcsr register
mov esp,ebp
pop ebp
ret
; extern "C" SseRm SseGetMxcsrRoundingMode(void);
;
; Description: The following function obtains the current x86-SSE
; floating-point rounding mode from MXCSR.RC.
;
; Returns: Current x86-SSE rounding mode.
SseGetMxcsrRoundingMode:
push ebp
mov ebp,esp
sub esp,4
stmxcsr [ebp-4] ;save mxcsr register
mov eax,[ebp-4]
shr eax,MxcsrRcShift ;eax[1:0] = MXCSR.RC bits
and eax,3 ;masked out unwanted bits
mov esp,ebp
pop ebp
ret
; extern "C" void SseSetMxcsrRoundingMode(SseRm rm);
;
; Description: The following function updates the rounding mode
; value in MXCSR.RC.
SseSetMxcsrRoundingMode:
%define rm [ebp+8]
push ebp
mov ebp,esp
sub esp,4
mov ecx,rm ;ecx = rm
and ecx,3 ;masked out unwanted bits
shl ecx,MxcsrRcShift ;ecx[14:13] = rm
stmxcsr [ebp-4] ;save current MXCSR
mov eax,[ebp-4]
and eax,MxcsrRcMask ;masked out old MXCSR.RC bits
or eax,ecx ;insert new MXCSR.RC bits
mov [ebp-4],eax
ldmxcsr [ebp-4] ;load updated MXCSR
mov esp,ebp
pop ebp
ret
build
g++ -c -m32 main.cpp -o main.o -std=c++11
nasm -f elf32 -o ssescalarfloatingpointconversions.o ssescalarfloatingpointconversions.asm
g++ -m32 -o ssescalarfloatingpointconversions ssescalarfloatingpointconversions.o main.o