## Ellipsoid Calculations

This section examines a sample program that calculates the volume and surface area of an ellipsoid using the scalar floating-point capabilities of x86-AVX. The example is converted to c++/nasm on Linux with Qt5 from the original source code from the book.

more on Ellipsoids : https://en.wikipedia.org/wiki/Ellipsoid

Formulas for Volume (V) and Surface (A) calculation:

The assembly code differs from the one in the book.  The calculation of 4π, 4π/3 and p-1 are made before the loop instead of in the loop.

The PROLOGUE and EPILOGUE macros are used to assemble and compile correctly against the default -fPIC (position independent code) of g++.  You can omit these by building the project by hand instead of the IDE.  In the latter case you have to remove -fPIC from g++ on the commandline.

TEMPLATE = app

```CONFIG += console c++11
CONFIG -= app_bundle
CONFIG -= qt

SOURCES += main.cpp
LIBS += -lm

QMAKE_EXTRA_COMPILERS += nasm
NASMEXTRAFLAGS = -f elf64 -g -F dwarf

OTHER_FILES += \$\$NASM_SOURCES
nasm.output = \${QMAKE_FILE_BASE}.o
nasm.commands = nasm \$\$NASMEXTRAFLAGS -o \${QMAKE_FILE_BASE}.o \${QMAKE_FILE_NAME}
nasm.input = NASM_SOURCES

NASM_SOURCES += avx64calcellipsoid_.asm```
```// build:
// nasm -f elf64 -g -F dwarf -o avx64calcellipsoid_.o avx64calcellipsoid_.asm
// g++ -c -pipe -O2 -std=gnu++11 -Wall -W -fPIC  -I. -I/home/agguro/Programs/Qt/64bits/5.11.0/gcc_64/mkspecs/linux-g++ -o main.o main.cpp
// g++ -Wl,-O1 -o Avx64CalcEllipsoid avx64calcellipsoid_.o main.o   -lm

#include <stdio.h>
#include <math.h>

extern "C" bool Avx64CalcEllipsoid_(const double* a, const double* b, const double* c, int n, double p, double* sa, double* vol);

bool Avx64CalcEllipsoid(const double* a, const double* b, const double* c, int n, double p, double* sa, double* vol)
{
if (n <= 0)
return false;

for (int i = 0; i < n; i++)
{
double a_p = pow(a[i], p);
double b_p = pow(b[i], p);
double c_p = pow(c[i], p);

double temp1 = (a_p * b_p + a_p * c_p + b_p * c_p) / 3;

sa[i] = 4 * M_PI * pow(temp1, 1.0 / p);
vol[i] = 4 * M_PI * a[i] * b[i] * c[i] / 3;
}
return true;
}

int main(void)
{
const int n = 8;
const  double p = 1.6075;
const double a[n] = { 1, 2, 6, 3, 4,  5, 5, 2};
const double b[n] = { 1, 2, 1, 7, 2,  6, 5, 7};
const double c[n] = { 1, 2, 7, 4, 3, 11, 5, 9};
double sa1[n], vol1[n];
double sa2[n], vol2[n];

Avx64CalcEllipsoid(a, b, c, n, p, sa1, vol1);
Avx64CalcEllipsoid_(a, b, c, n, p, sa2, vol2);

printf("Results for Avx64CalcEllipsoid\n\n");

for (int i = 0; i < n; i++)
{
printf("\na, b, c: %6.2lf %6.2lf %6.2lf\n", a[i], b[i], c[i]);
printf("  sa1, vol1: %14.8lf %14.8lf\n", sa1[i], vol1[i]);
printf("  sa2, vol2: %14.8lf %14.8lf\n", sa2[i], vol2[i]);
}

return 0;
}```
```; Name:         avx64calcellipsoid_.asm
;
; Source:       Modern x86 Assembly Language Programming p.590

bits 64
align 16

global Avx64CalcEllipsoid_

extern pow
extern  _GLOBAL_OFFSET_TABLE_

section .bss

section .data
r8_1p0   dq 1.0
r8_3p0   dq 3.0
r8_4p0   dq 4.0
r8_pi    dq 3.14159265358979323846

section .text

;extern "C" bool Avx64CalcEllipsoid_(const double* a, const double* b, const double* c, int n, double p, double* sa, double* vol);
;
;Description:  The following function calculates the surface area
;              and volume of an ellipsoid
;
;Requires:     x86-64, AVX
;registers: rdi  :const double* a
;           rsi  :const double* b
;           rdx  :const double* c
;	    ecx  :int n (isn't saved because of single use)
;	    xmm0 :double p
;	    r8   :double* sa  (result)
;	    r9   :double* vol (result)

%define STK_TOTAL 0x60
;stack must be kept aligned to 16 bytes (0x10).
;12 local variables = 12 x 8 = 96 (or 0x60) bytes needed. We don't need padding because we have 6 non-volatile
;registers pushed on the stack. 6 x 8 = 48 (or 0x30) which is also aligned to 16 bytes.

%macro PROLOGUE 0
push	r12
push	r13
push	r14
push	r15
push	rbx
push    rbp
mov     rbp,rsp
push    rbx
call    .get_GOT
.get_GOT:
pop     rbx
sub	rsp,STK_TOTAL
%endmacro

%macro EPILOGUE 0
mov     rbx,[rbp-8]
mov     rsp,rbp
pop     rbp
pop	rbx
pop	r15
pop	r14
pop	r13
pop	r12
ret
%endmacro

Avx64CalcEllipsoid_:
PROLOGUE
mov	    [rsp],rdi		    ;pointer to a[]
mov	    [rsp+0x8],rsi	    ;pointer to b[]
mov	    [rsp+0x10],rdx	    ;pointer to c[]
movsd   [rsp+0x18],xmm0	    ;p
mov	    [rsp+0x20],r8	    ;pointer to surface[]
mov	    [rsp+0x28],r9	    ;pointer to volume[]

;first calculate constant values outside the loop
;calculate 4*pi
vmovsd  xmm0,qword[r8_4p0]
vmulsd  xmm0,xmm0,qword[r8_pi]
vmovsd  [rsp+0x30],xmm0		;save 4*pi
;calculate 4*pi/3
vdivsd  xmm0,xmm0,qword[r8_3p0]	;4*pi/3 in xmm2 for volume
vmovsd  [rsp+0x38],xmm0		;save 4*pi/3
;calculate 1/p
vmovsd  xmm0,qword[r8_1p0]		;1 in xmm0
vmovsd  xmm1,[rsp+0x18]		;p in xmm1
vdivsd  xmm0,xmm0,xmm1		;1/p
vmovsd  [rsp+0x40],xmm0		;save 1/p

test    ecx,ecx		    ;is n <= 0
jle	    .error
mov	    r15d,ecx
xor	    r11,r11
.repeat:
;*** Calculate the ellipsoid's volume ***
;vol = (4*pi*a*b*c)/3
;xmm0 = 4*pi/3
mov	    r12,qword[rsp]		;a[]
mov	    r13,qword[rsp+0x8]		;b[]
mov	    r14,qword[rsp+0x10]		;c[]
vmulsd  xmm0,xmm0,[r12+r11]		;a*4*pi/3
vmulsd  xmm0,xmm0,[r13+r11]		;a*b*4*pi/3
vmulsd  xmm0,xmm0,[r14+r11]		;a*b*c*pi/3
mov	    r9,qword[rsp+0x28]		;vol[]
vmovsd  qword[r9+r11],xmm0		;save ellipsoid volume
mov	    r8,qword[rsp+0x20]		;vol[]
vmovsd  qword[r8+r11],xmm0

;*** Calculate surface  ***
;surface = 4*pi*(((a^p*b^p+a^p*c^p+b^p*c^p)/3)^(1/p))
movsd   xmm0,[r12+r11]		;a
vmovsd  xmm1,qword[rsp+0x18]	;p
call    pow
vmovsd  [rsp+0x48],xmm0		;a^p

vmovsd  xmm0,[r13+r11]		;b
vmovsd  xmm1,qword[rsp+0x18]	;p
call    pow
vmovsd  [rsp+0x50],xmm0		;b^p

vmovsd  xmm0,[r14+r11]		;c
vmovsd  xmm1,qword[rsp+0x18]	;p
call    pow
vmovsd  [rsp+0x58],xmm0		;c^p

vmovsd  xmm0,qword[rsp+0x48]	;a^p
vmovsd  xmm1,qword[rsp+0x50]	;b^p
vmulsd  xmm0,xmm0,xmm1		;a^p*b^p
vmovsd  xmm1,qword[rsp+0x50]	;b^p
vmovsd  xmm2,qword[rsp+0x58]	;c^p
vmulsd  xmm1,xmm1,xmm2		;b^p*c^p
vmovsd  xmm2,qword[rsp+0x48]	;a^p
vmovsd  xmm3,qword[rsp+0x58]	;c^p
vmulsd  xmm2,xmm2,xmm3		;a^p*c^p
vdivsd  xmm0,xmm0,qword[r8_3p0]	;(a^p*b^p+b^p*c^p+a^p*c^p)/3

vmovsd  xmm1,[rsp+0x40]		;1/p
call    pow				;((a^p*b^p+b^p*c^p+a^p*c^p)/3)^(1/p)

vmovsd  xmm1,[rsp+0x30]		;4*pi
vmulsd  xmm0,xmm0,xmm1		;4*pi*(((a^p*b^p+b^p*c^p+a^p*c^p)/3)^(1/p))
mov	    r8,qword[rsp+0x20]		;sa[]
vmovsd  qword[r8+r11],xmm0		;save

```If you should decide to build it from the commandline then you should use the next commands.nasm -f elf64 -g -F dwarf -o avx64calcellipsoid_.o avx64calcellipsoid_.asmg++ -c -pipe -O2 -std=gnu++11 -Wall -W -fPIC -I. -o main.o main.cppg++ -Wl,-O1 -o Avx64CalcEllipsoid avx64calcellipsoid_.o main.o -lm