addarray.asm

;name: addarray.asm
;
;description: add 2 arrays with the aid of AVX instructions
;
;source https://www.physicsforums.com/insights/an-intro-to-avx-512-assembly-programming/

bits 64

%include "../Driver/inc/addarray.inc"

global AddArray
align 64

section .rodata
    
section .text

AddArray:
	push	rbp
	mov	rbp,rsp

	;rdi : dest array
	;rsi : pointer to array1
	;rdx : pointer to array2
	
	vmovaps ymm0, [rsi] ; Load the first source array
	vmovaps ymm1, [rdx] ; Load the second source array
	vaddps ymm2, ymm0,ymm1 ; Add the two arrays
	vmovaps [rdi],ymm2 ; Store the array sum

	mov	rsp,rbp
	pop	rbp
	ret

main.cpp

/*
 *  AVXAddArrays
 * Source https://www.physicsforums.com/insights/an-intro-to-avx-512-assembly-programming/
 *        downgraded to AVX
*/

#include 

using std::cout;
using std::endl;
// Prototypes
extern "C" void AddArray(float Dest[], float Arr1[], float Arr2[]);
void PrintArray(float[], int count);

// Data is aligned to 64-byte boundaries
float Array1[] __attribute__((aligned(32))) = // First source array
{
 1, 2, 3, 4, 5, 6, 7, 8
};

float Array2[] __attribute__((aligned(32))) = // Second source array
{
 1, 2, 3, 4, 5, 6, 7, 8
};

float Dest[8] __attribute__((aligned(32))); // Destination array┬Á

int main() {
	
	AddArray(Dest, Array1, Array2); // Call the assembly routine
	PrintArray(Dest, 8);
}

void PrintArray(float Arr[], int count)
{
	for (int i = 0; i < count; i++)
	{
		cout << Arr[i] << '\t';
	}
	cout << endl;
}

}

main.cpp

CONFIG += console c++11
CONFIG -= app_bundle
CONFIG -= qt
TEMPLATE = app
QMAKE_LFLAGS += -no-pie
QMAKE_EXTRA_COMPILERS += nasm

NASMEXTRAFLAGS = -f elf64 -g -F dwarf
nasm.output = ${QMAKE_FILE_BASE}.o
nasm.commands = nasm $$NASMEXTRAFLAGS -o ${QMAKE_FILE_BASE}.o ${QMAKE_FILE_NAME}
nasm.input = NASM_SOURCES

TARGET = Driver

SOURCES += $$NASM_SOURCES \
    main.cpp

HEADERS +=

NASM_SOURCES += \
    asm/addarray.asm