addarray.asm
;name: addarray.asm
;
;description: add 2 arrays with the aid of AVX instructions
;
;source https://www.physicsforums.com/insights/an-intro-to-avx-512-assembly-programming/
bits 64
%include "../Driver/inc/addarray.inc"
global AddArray
align 64
section .rodata
section .text
AddArray:
push rbp
mov rbp,rsp
;rdi : dest array
;rsi : pointer to array1
;rdx : pointer to array2
vmovaps ymm0, [rsi] ; Load the first source array
vmovaps ymm1, [rdx] ; Load the second source array
vaddps ymm2, ymm0,ymm1 ; Add the two arrays
vmovaps [rdi],ymm2 ; Store the array sum
mov rsp,rbp
pop rbp
ret
main.cpp
/*
* AVXAddArrays
* Source https://www.physicsforums.com/insights/an-intro-to-avx-512-assembly-programming/
* downgraded to AVX
*/
#include
using std::cout;
using std::endl;
// Prototypes
extern "C" void AddArray(float Dest[], float Arr1[], float Arr2[]);
void PrintArray(float[], int count);
// Data is aligned to 64-byte boundaries
float Array1[] __attribute__((aligned(32))) = // First source array
{
1, 2, 3, 4, 5, 6, 7, 8
};
float Array2[] __attribute__((aligned(32))) = // Second source array
{
1, 2, 3, 4, 5, 6, 7, 8
};
float Dest[8] __attribute__((aligned(32))); // Destination arrayµ
int main() {
AddArray(Dest, Array1, Array2); // Call the assembly routine
PrintArray(Dest, 8);
}
void PrintArray(float Arr[], int count)
{
for (int i = 0; i < count; i++)
{
cout << Arr[i] << '\t';
}
cout << endl;
}
}
main.cpp
CONFIG += console c++11
CONFIG -= app_bundle
CONFIG -= qt
TEMPLATE = app
QMAKE_LFLAGS += -no-pie
QMAKE_EXTRA_COMPILERS += nasm
NASMEXTRAFLAGS = -f elf64 -g -F dwarf
nasm.output = ${QMAKE_FILE_BASE}.o
nasm.commands = nasm $$NASMEXTRAFLAGS -o ${QMAKE_FILE_BASE}.o ${QMAKE_FILE_NAME}
nasm.input = NASM_SOURCES
TARGET = Driver
SOURCES += $$NASM_SOURCES \
main.cpp
HEADERS +=
NASM_SOURCES += \
asm/addarray.asm