main.cpp
#include <stdio.h>
#define _USE_MATH_DEFINES
#include <math.h>

extern "C" void Cc3(const double* r, const double* h, int n, double* sa_cone, double* vol_cone);

int main(int argc, char* argv[])
{
    const int n = 6;
    double r[n] = { 1, 1, 2, 2, 3, 3 };
    double h[n] = { 1, 2, 3, 4, 5, 10 };
    double sa_cone1[n], sa_cone2[n];
    double vol_cone1[n], vol_cone2[n];

    // Calculate surface area and volume of right-circular cones
    for (int i = 0; i < n; i++)
    {
        sa_cone1[i] = M_PI * r[i] * (r[i] + sqrt(r[i] * r[i] + h[i] * h[i]));
        vol_cone1[i] = M_PI * r[i] * r[i] * h[i] / 3.0;
    }

    Cc3(r, h, n, sa_cone2, vol_cone2);

    printf("\nResults for CallingConvention3\n");
    for (int i = 0; i < n; i++)
    {
        printf("  r/h: %14.2lf %14.2lf\n", r[i], h[i]);
        printf("  sa:  %14.6lf %14.6lf\n", sa_cone1[i], sa_cone2[i]);
        printf("  vol: %14.6lf %14.6lf\n", vol_cone1[i], vol_cone2[i]);
        printf("\n");
    }

    return 0;
}
callingconvention3.asm
; Name:     callingconvention3.asm
;
; Build:    g++ -c main.cpp -o main.o
;           nasm -f elf64 -o callingconvention3.o callingconvention3.asm
;           g++ -o callingconvention3 callingconvention3.o main.o
;
; Source:   Modern x86 Assembly Language Programming p.533

global Cc3

bits 64

section .data
    r8_3p0:      dq 3.0
    r8_pi:       dq 3.14159265358979323846

section .text

; extern "C" bool Cc3(const double* r, const double* h, int n, double* sa_cone, double* vol_cone);

Cc3:
; Registers: rdi    r
;            rsi    h
;            rdx    n
;            rcx    sa_cone
;            r8     vol_cone

; Initialize the processing loop variables.
xor     rax,rax             ;set error code
movsxd  rdx,edx             ;rdx = n
test    rdx,rdx             ;is n <= 0?
jle     .done               ;jump if n <= 0

xor     r9,r9               ;r9 = array element offset
movsd   xmm4,[r8_pi]        ;xmm4 = pi
movsd   xmm5,[r8_3p0]       ;xmm5 = 3.0

; Calculate cone surface areas and volumes
; sa = pi * r * (r + sqrt(r * r + h * h))
; vol = pi * r * r * h / 3
.l1:
movsd   xmm0,[rdi+r9]       ;xmm0 = r
movsd   xmm1,[rsi+r9]       ;xmm1 = h
movsd   xmm2,xmm0           ;xmm2 = r
movsd   xmm3,xmm1           ;xmm3 = h

mulsd   xmm0,xmm0           ;xmm0 = r * r
mulsd   xmm1,xmm1           ;xmm1 = h * h
addsd   xmm0,xmm1           ;xmm0 = r * r + h * h

sqrtsd  xmm0,xmm0           ;xmm0 = sqrt(r * r + h * h)
addsd   xmm0,xmm2           ;xmm0 = r + sqrt(r * r + h * h)
mulsd   xmm0,xmm2           ;xmm0 = r * (r + sqrt(r * r + h * h))
mulsd   xmm0,xmm4           ;xmm0 = pi * r * (r + sqrt(r * r + h * h))

mulsd   xmm2,xmm2           ;xmm2 = r * r
mulsd   xmm3,xmm4           ;xmm3 = h * pi
mulsd   xmm3,xmm2           ;xmm3 = pi * r * r * h
divsd   xmm3,xmm5           ;xmm3 = pi * r * r * h / 3

movsd   [rcx+r9],xmm0       ;save surface area
movsd   [r8+r9],xmm3        ;save volume

add r9,8                    ;set r9 to next element

dec rdx                     ;update counter
jnz .l1                     ;repeat until done
mov eax,1                   ;set success return code
.done:
ret
build
g++ -c main.cpp -o main.o
nasm -f elf64 -o callingconvention3.o callingconvention3.asm
g++ -o callingconvention3 callingconvention3.o main.o