main.cpp
#include <stdio.h>
#include "../../commonfiles/xmmval.h"
#include "../../commonfiles/ymmval.h"
#include <memory.h>
#define _USE_MATH_DEFINES
#include <math.h>
// The order of values in the following enum must match the table
// that's defined in AvxBroadcast_.asm.
enum Brop : unsigned int
{
Byte, Word, Dword, Qword
};
extern "C" void AvxBroadcastIntegerYmm(YmmVal* des, const XmmVal* src, Brop op);
extern "C" void AvxBroadcastFloat(YmmVal* des, float val);
extern "C" void AvxBroadcastDouble(YmmVal* des, double val);
void AvxBroadcastInteger(void)
{
char buff[512];
__attribute__((aligned(16))) XmmVal src;
__attribute__((aligned(32))) YmmVal des;
memset(&src, 0, sizeof(XmmVal));
src.i16[0] = 42;
AvxBroadcastIntegerYmm(&des, &src, Brop::Word);
printf("\nResults for AvxBroadcastInteger() - Brop::Word\n");
printf("src %s\n", src.ToString_i16(buff, sizeof(buff)));
printf("des lo %s\n", des.ToString_i16(buff, sizeof(buff), false));
printf("des hi %s\n", des.ToString_i16(buff, sizeof(buff), true));
src.i64[0] = -80;
AvxBroadcastIntegerYmm(&des, &src, Brop::Qword);
printf("\nResults for AvxBroadcastInteger() - Brop::Qword\n");
printf("src: %s\n", src.ToString_i64(buff, sizeof(buff)));
printf("des lo: %s\n", des.ToString_i64(buff, sizeof(buff), false));
printf("des hi: %s\n", des.ToString_i64(buff, sizeof(buff), true));
}
void AvxBroadcastFloatingPoint(void)
{
char buff[512];
__attribute__((aligned(32))) YmmVal des;
AvxBroadcastFloat(&des, (float)M_SQRT2);
printf("\nResults for AvxBroadcastFloatingPoint() - float\n");
printf("des lo: %s\n", des.ToString_r32(buff, sizeof(buff), false));
printf("des hi: %s\n", des.ToString_r32(buff, sizeof(buff), true));
AvxBroadcastDouble(&des, M_PI);
printf("\nResults for AvxBroadcastFloatingPoint() - double\n");
printf("des lo: %s\n", des.ToString_r64(buff, sizeof(buff), false));
printf("des hi: %s\n", des.ToString_r64(buff, sizeof(buff), true));
}
int main(int argc, char* argv[])
{
AvxBroadcastInteger();
AvxBroadcastFloatingPoint();
return 0;
}
avxbroadcast.asm
; Name: avxbroadcast.asm
;
; Build: g++ -c -m32 main.cpp -o main.o
; nasm -f elf32 -o avxbroadcast.o avxbroadcast.asm
; g++ -m32 -o avxbroadcast avxbroadcast.o main.o
;
; Source: Modern x86 Assembly Language Programming p. 447
global AvxBroadcastIntegerYmm
global AvxBroadcastFloat
global AvxBroadcastDouble
section .data
; The order of values in the following table must match the enum Brop
; that's defined in AvxBroadcast.cpp.
align 4
BropTable dd AvxBroadcastIntegerYmm.bropByte
dd AvxBroadcastIntegerYmm.bropWord
dd AvxBroadcastIntegerYmm.bropDword
dd AvxBroadcastIntegerYmm.bropQword
BropTableCount equ ($ - BropTable) / 4 ; 4 = size of dword in bytes
section .text
; extern "C" void AvxBroadcastIntegerYmm(YmmVal* des, const XmmVal* src, Brop op);
;
; Description: The following function demonstrates use of the
; vpbroadcastX instruction.
;
; Requires: AVX2
%define des [ebp+8]
%define op [ebp+16]
%define src [ebp+12]
AvxBroadcastIntegerYmm:
push ebp
mov ebp,esp
; Make sure op is valid
mov eax,op ;eax = op
cmp eax,BropTableCount
jae .badOp ;jump if op is invalid
; Load parameters and jump to specified instruction
mov ecx,des ;ecx = des
mov edx,src ;edx = src
vmovdqa xmm0,[edx] ;xmm0 = broadcast value (low item)
mov edx,[BropTable+eax*4]
jmp edx
; Perform byte broadcast
.bropByte:
vpbroadcastb ymm1,xmm0
vmovdqa [ecx],ymm1
vzeroupper
pop ebp
ret
; Perform word broadcast
.bropWord:
vpbroadcastw ymm1,xmm0
vmovdqa [ecx],ymm1
vzeroupper
pop ebp
ret
; Perform dword broadcast
.bropDword:
vpbroadcastd ymm1,xmm0
vmovdqa [ecx],ymm1
vzeroupper
pop ebp
ret
; Perform qword broadcast
.bropQword:
vpbroadcastq ymm1,xmm0
vmovdqa [ecx],ymm1
vzeroupper
pop ebp
ret
.badOp:
pop ebp
ret
; extern "C" void AvxBroadcastFloat(YmmVal* des, float val);
;
; Description: The following function demonstrates use of the
; vbroadcastss instruction.
;
; Requires: AVX
%define des [ebp+12]
%define val [ebp+8]
AvxBroadcastFloat:
push ebp
mov ebp,esp
; Broadcast val to all elements of des
mov eax,val
vbroadcastss ymm0,dword des
vmovaps [eax],ymm0
vzeroupper
pop ebp
ret
; extern "C" void AvxBroadcastDouble(YmmVal* des, double val);
;
; Description: The following function demonstrates use of the
; vbroadcastsd instruction.
;
; Requires: AVX
%define des [ebp+12]
%define val [ebp+8]
AvxBroadcastDouble:
push ebp
mov ebp,esp
; Broadcast val to all elements of des.
mov eax,val
vbroadcastsd ymm0,qword des
vmovapd [eax],ymm0
vzeroupper
pop ebp
ret
build
gcc -m32 -c main.cpp -o main.o
nasm -f elf32 -o avxbroadcast.o avxbroadcast.asm
g++ -m32 -o avxbroadcast avxbroadcast.o main.o ../../commonfiles/xmmval.o ../../commonfiles/ymmval.o