main.cpp
#include "../../commonfiles/xmmval.h"
#define _USE_MATH_DEFINES
#include <math.h>
extern "C" void SsePackedFpMath32(const XmmVal* a, const XmmVal* b, XmmVal c[8]);
extern "C" void SsePackedFpMath64(const XmmVal* a, const XmmVal* b, XmmVal c[8]);
void SsePackedFpMath32Cpp(void)
{
__attribute__ ((aligned(16))) XmmVal a;
__attribute__ ((aligned(16))) XmmVal b;
__attribute__ ((aligned(16))) XmmVal c[8];
char buff[256];
a.r32[0] = 36.0f;
a.r32[1] = (float)(1.0 / 32.0);
a.r32[2] = 2.0f;
a.r32[3] = 42.0f;
b.r32[0] = -(float)(1.0 / 9.0);
b.r32[1] = 64.0f;
b.r32[2] = -0.0625f;
b.r32[3] = 8.666667f;
SsePackedFpMath32(&a, &b, c);
printf("\nResults for SsePackedFpMath32_\n");
printf("a: %s\n", a.ToString_r32(buff, sizeof(buff)));
printf("b: %s\n", b.ToString_r32(buff, sizeof(buff)));
printf("\n");
printf("addps: %s\n", c[0].ToString_r32(buff, sizeof(buff)));
printf("subps: %s\n", c[1].ToString_r32(buff, sizeof(buff)));
printf("mulps: %s\n", c[2].ToString_r32(buff, sizeof(buff)));
printf("divps: %s\n", c[3].ToString_r32(buff, sizeof(buff)));
printf("absps a: %s\n", c[4].ToString_r32(buff, sizeof(buff)));
printf("sqrtps a: %s\n", c[5].ToString_r32(buff, sizeof(buff)));
printf("minps: %s\n", c[6].ToString_r32(buff, sizeof(buff)));
printf("maxps: %s\n", c[7].ToString_r32(buff, sizeof(buff)));
}
void SsePackedFpMath64Cpp(void)
{
__attribute__ ((aligned(16))) XmmVal a;
__attribute__ ((aligned(16))) XmmVal b;
__attribute__ ((aligned(16))) XmmVal c[8];
char buff[256];
a.r64[0] = 2.0;
a.r64[1] = M_PI;
b.r64[0] = M_E;
b.r64[1] = -M_1_PI;
SsePackedFpMath64(&a, &b, c);
printf("\nResults for SsePackedFpMath64_\n");
printf("a: %s\n", a.ToString_r64(buff, sizeof(buff)));
printf("b: %s\n", b.ToString_r64(buff, sizeof(buff)));
printf("\n");
printf("addpd: %s\n", c[0].ToString_r64(buff, sizeof(buff)));
printf("subpd: %s\n", c[1].ToString_r64(buff, sizeof(buff)));
printf("mulpd: %s\n", c[2].ToString_r64(buff, sizeof(buff)));
printf("divpd: %s\n", c[3].ToString_r64(buff, sizeof(buff)));
printf("abspd a: %s\n", c[4].ToString_r64(buff, sizeof(buff)));
printf("sqrtpd a: %s\n", c[5].ToString_r64(buff, sizeof(buff)));
printf("minpd: %s\n", c[6].ToString_r64(buff, sizeof(buff)));
printf("maxpd: %s\n", c[7].ToString_r64(buff, sizeof(buff)));
}
int main(int argc, char* argv[])
{
SsePackedFpMath32Cpp();
SsePackedFpMath64Cpp();
}
ssepackedfloatingpointarithmetic.asm
; Name: ssepackedfloatingpointarithmetic.asm
;
; Build: g++ -c -m32 main.cpp -o main.o -std=c++11
; nasm -f elf32 -o ssepackedfloatingpointarithmetic.o ssepackedfloatingpointarithmetic.asm
; g++ -m32 -o ssepackedfloatingpointarithmetic ssepackedfloatingpointarithmetic.o main.o ../../commonfiles/xmmval.o
;
; Source: Modern x86 Assembly Language Programming p. 238
global SsePackedFpMath32
global SsePackedFpMath64
section .data
; Mask values used to calculate floating-point absolute values
align 16
Pfp32Abs: dd 0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff
Pfp64Abs: dq 0x7fffffffffffffff,0x7fffffffffffffff
section .text
; extern "C" void SsePackedFpMath32(const XmmVal* a, const XmmVal* b, XmmVal c[8]);
;
; Description: The following function demonstrates basic math using
; packed single-precision floating-point values.
;
; Requires: SSE
%define a [ebp+8]
%define b [ebp+12]
%define c [ebp+16]
SsePackedFpMath32:
push ebp
mov ebp,esp
; Load packed SP floating-point values
mov eax,a ;eax = 'a'
mov ecx,b ;ecx = 'b'
mov edx,c ;edx = 'c'
movaps xmm0,[eax] ;xmm0 = *a
movaps xmm1,[ecx] ;xmm1 = *b
; Packed SP floating-point addition
movaps xmm2,xmm0
addps xmm2,xmm1
movaps [edx+0],xmm2
; Packed SP floating-point subtraction
movaps xmm2,xmm0
subps xmm2,xmm1
movaps [edx+16],xmm2
; Packed SP floating-point multiplication
movaps xmm2,xmm0
mulps xmm2,xmm1
movaps [edx+32],xmm2
; Packed SP floating-point division
movaps xmm2,xmm0
divps xmm2,xmm1
movaps [edx+48],xmm2
; Packed SP floating-point absolute value
movaps xmm2,xmm0
andps xmm2,[Pfp32Abs]
movaps [edx+64],xmm2
; Packed SP floating-point square root
sqrtps xmm2,xmm0
movaps [edx+80],xmm2
; Packed SP floating-point minimum
movaps xmm2,xmm0
minps xmm2,xmm1
movaps [edx+96],xmm2
; Packed SP floating-point maximum
maxps xmm0,xmm1
movaps [edx+112],xmm0
pop ebp
ret
; extern "C" void SsePackedFpMath64(const XmmVal* a, const XmmVal* b, XmmVal c[8]);
;
; Description: The following function demonstrates basic math using
; packed double-precision floating-point values.
;
; Requires: SSE2
%define a [ebp+8]
%define b [ebp+12]
%define c [ebp+16]
SsePackedFpMath64:
push ebp
mov ebp,esp
; Load packed DP floating-point values
mov eax,a ;eax = 'a'
mov ecx,b ;ecx = 'b'
mov edx,c ;edx = 'c'
movapd xmm0,[eax] ;xmm0 = *a
movapd xmm1,[ecx] ;xmm1 = *b
; Packed DP floating-point addition
movapd xmm2,xmm0
addpd xmm2,xmm1
movapd [edx+0],xmm2
; Packed DP floating-point subtraction
movapd xmm2,xmm0
subpd xmm2,xmm1
movapd [edx+16],xmm2
; Packed DP floating-point multiplication
movapd xmm2,xmm0
mulpd xmm2,xmm1
movapd [edx+32],xmm2
; Packed DP floating-point division
movapd xmm2,xmm0
divpd xmm2,xmm1
movapd [edx+48],xmm2
; Packed DP floating-point absolute value
movapd xmm2,xmm0
andpd xmm0,[Pfp64Abs]
movapd [edx+64],xmm2
; Packed DP floating-point square root
sqrtpd xmm2,xmm0
movapd [edx+80],xmm2
; Packed DP floating-point minimum
movapd xmm2,xmm0
minpd xmm2,xmm1
movapd [edx+96],xmm2
; Packed DP floating-point maximum
maxpd xmm0,xmm1
movapd [edx+112],xmm0
pop ebp
ret
build
g++ -c -m32 main.cpp -o main.o -std=c++11
nasm -f elf32 -o ssepackedfloatingpointarithmetic.o ssepackedfloatingpointarithmetic.asm
g++ -m32 -o ssepackedfloatingpointarithmetic ssepackedfloatingpointarithmetic.o main.o ../../commonfiles/xmmval.o