main.cpp
#include <stdio.h>
#include <stdlib.h>
extern "C" void AvxSfpArithmetic(double a, double b, double results[8]);
int main(int argc, char* argv[])
{
const int n = 8;
const char* inames[n] =
{
"vaddsd", "vsubsd", "vmulsd", "vdivsd",
"vminsd", "vmaxsd", "vsqrtsd a", "fabs b"
};
double a = 17.75;
double b = -39.1875;
double c[n];
AvxSfpArithmetic(a, b, c);
printf("\nResults for AvxScalarFloatingPointArithmetic\n");
printf("a: %.6lf\n", a);
printf("b: %.6lf\n", b);
for (int i = 0; i < n; i++)
printf("%-14s %-12.6lf\n", inames[i], c[i]);
return 0;
}
avxscalarfloatingpointarithmetic.asm
; Name: avxscalarfloatingpointarithmetic.asm
;
; Build: g++ -c -m32 main.cpp -o main.o
; nasm -f elf32 -o avxscalarfloatingpointarithmetic.o avxscalarfloatingpointarithmetic.asm
; g++ -m32 -o avxscalarfloatingpointarithmetic avxscalarfloatingpointarithmetic.o main.o
;
; Source: Modern x86 Assembly Language Programming p. 352
global AvxSfpArithmetic
section .data
align 16
AbsMask: dq 0x7fffffffffffffff, 0x7fffffffffffffff
section .text
; extern "C" void AvxSfpArithmetic(double a, double b, double results[8]);
;
; Description: The following function demonstrates how to use basic
; scalar DPFP arithmetic instructions.
;
; Requires: AVX
%define a [ebp+8]
%define b [ebp+16]
%define results [ebp+24]
AvxSfpArithmetic:
push ebp
mov ebp,esp
; Load argument values
mov eax,results ;eax = ptr to results array
vmovsd xmm0,a ;xmm0 = a
vmovsd xmm1,b ;xmm1 = b
; Perform basic arithmetic using AVX scalar DPFP instructions
vaddsd xmm2,xmm0,xmm1 ;xmm2 = a + b
vsubsd xmm3,xmm0,xmm1 ;xmm3 = a - b
vmulsd xmm4,xmm0,xmm1 ;xmm4 - a * b
vdivsd xmm5,xmm0,xmm1 ;xmm5 = a / b
vmovsd [eax+0],xmm2 ;save a + b
vmovsd [eax+8],xmm3 ;save a - b
vmovsd [eax+16],xmm4 ;save a * b
vmovsd [eax+24],xmm5 ;save a / b
; Compute min(a, b), max(a, b), sqrt(a) and fabs(b)
vminsd xmm2,xmm0,xmm1 ;xmm2 = min(a, b)
vmaxsd xmm3,xmm0,xmm1 ;xmm3 = max(a, b)
vsqrtsd xmm4,xmm0,xmm0 ;xmm4 = sqrt(a)
vandpd xmm5,xmm1,[AbsMask] ;xmm5 = fabs(b)
vmovsd [eax+32],xmm2 ;save min(a, b)
vmovsd [eax+40],xmm3 ;save max(a, b)
vmovsd [eax+48],xmm4 ;save sqrt(a)
vmovsd [eax+56],xmm5 ;save trunc(sqrt(a))
pop ebp
ret
build
g++ -c -m32 main.cpp -o main.o
nasm -f elf32 -o avxscalarfloatingpointarithmetic.o avxscalarfloatingpointarithmetic.asm
g++ -m32 -o avxscalarfloatingpointarithmetic avxscalarfloatingpointarithmetic.o main.o