main.cpp
#include <stdio.h>
#include "../../commonfiles/ymmval.h"
extern "C" void AvxBlendFloat(YmmVal* des, YmmVal* src1, YmmVal* src2, YmmVal* src3);
extern "C" void AvxBlendByte(YmmVal* des, YmmVal* src1, YmmVal* src2, YmmVal* src3);
void AvxBlendFloat(void)
{
char buff[256];
const Uint32 select1 = 0x00000000;
const Uint32 select2 = 0x80000000;
__attribute__((aligned(32))) YmmVal des, src1, src2, src3;
src1.r32[0] = 100.0f; src2.r32[0] = -1000.0f;
src1.r32[1] = 200.0f; src2.r32[1] = -2000.0f;
src1.r32[2] = 300.0f; src2.r32[2] = -3000.0f;
src1.r32[3] = 400.0f; src2.r32[3] = -4000.0f;
src1.r32[4] = 500.0f; src2.r32[4] = -5000.0f;
src1.r32[5] = 600.0f; src2.r32[5] = -6000.0f;
src1.r32[6] = 700.0f; src2.r32[6] = -7000.0f;
src1.r32[7] = 800.0f; src2.r32[7] = -8000.0f;
src3.u32[0] = select2;
src3.u32[1] = select2;
src3.u32[2] = select1;
src3.u32[3] = select2;
src3.u32[4] = select1;
src3.u32[5] = select1;
src3.u32[6] = select2;
src3.u32[7] = select1;
AvxBlendFloat(&des, &src1, &src2, &src3);
printf("\nResults for AvxBlendFloat()\n");
printf("src1 lo: %s\n", src1.ToString_r32(buff, sizeof(buff), false));
printf("src1 hi: %s\n", src1.ToString_r32(buff, sizeof(buff), true));
printf("src2 lo: %s\n", src2.ToString_r32(buff, sizeof(buff), false));
printf("src2 hi: %s\n", src2.ToString_r32(buff, sizeof(buff), true));
printf("\n");
printf("src3 lo: %s\n", src3.ToString_x32(buff, sizeof(buff), false));
printf("src3 hi: %s\n", src3.ToString_x32(buff, sizeof(buff), true));
printf("\n");
printf("des lo: %s\n", des.ToString_r32(buff, sizeof(buff), false));
printf("des hi: %s\n", des.ToString_r32(buff, sizeof(buff), true));
}
void AvxBlendByte(void)
{
char buff[256];
__attribute__((aligned(32))) YmmVal des, src1, src2, src3;
// Control values required to perform doubleword blend
// using vpblendvb instruction
const Uint32 select1 = 0x00000000; // select src1
const Uint32 select2 = 0x80808080; // select src2
src1.i32[0] = 100; src2.i32[0] = -1000;
src1.i32[1] = 200; src2.i32[1] = -2000;
src1.i32[2] = 300; src2.i32[2] = -3000;
src1.i32[3] = 400; src2.i32[3] = -4000;
src1.i32[4] = 500; src2.i32[4] = -5000;
src1.i32[5] = 600; src2.i32[5] = -6000;
src1.i32[6] = 700; src2.i32[6] = -7000;
src1.i32[7] = 800; src2.i32[7] = -8000;
src3.u32[0] = select1;
src3.u32[1] = select1;
src3.u32[2] = select2;
src3.u32[3] = select1;
src3.u32[4] = select2;
src3.u32[5] = select2;
src3.u32[6] = select1;
src3.u32[7] = select2;
AvxBlendByte(&des, &src1, &src2, &src3);
printf("\nResults for AvxBlendByte() - doublewords\n");
printf("src1 lo: %s\n", src1.ToString_i32(buff, sizeof(buff), false));
printf("src1 hi: %s\n", src1.ToString_i32(buff, sizeof(buff), true));
printf("src2 lo: %s\n", src2.ToString_i32(buff, sizeof(buff), false));
printf("src2 hi: %s\n", src2.ToString_i32(buff, sizeof(buff), true));
printf("\n");
printf("src3 lo: %s\n", src3.ToString_x32(buff, sizeof(buff), false));
printf("src3 hi: %s\n", src3.ToString_x32(buff, sizeof(buff), true));
printf("\n");
printf("des lo: %s\n", des.ToString_i32(buff, sizeof(buff), false));
printf("des hi: %s\n", des.ToString_i32(buff, sizeof(buff), true));
}
int main(int argc, char* argv[])
{
AvxBlendFloat();
AvxBlendByte();
return 0;
}
avxblend.asm
; Name: avxblend.asm
;
; Build: g++ -c -m32 main.cpp -o main.o
; nasm -f elf32 -o avxblend.o avxblend.asm
; g++ -m32 -o avxblend avxblend.o main.o
;
; Source: Modern x86 Assembly Language Programming p. 453
global AvxBlendFloat
global AvxBlendByte
section .text
; extern "C" void AvxBlendFloat(YmmVal* des, YmmVal* src1, YmmVal* src2, YmmVal* src3);
;
; Description: The following function demonstrates used of the vblendvps
; instruction using YMM registers.
;
; Requires: AVX
%define des [ebp+8]
%define src1 [ebp+12]
%define src3 [ebp+16]
%define src2 [ebp+20]
AvxBlendFloat:
push ebp
mov ebp,esp
; Load argument values
mov eax,src1 ;eax = ptr to src1
mov ecx,src2 ;ecx = ptr to src2
mov edx,src3 ;edx = ptr to src3
vmovaps ymm1,[eax] ;ymm1 = src1
vmovaps ymm2,[ecx] ;ymm2 = src2
vmovdqa ymm3,[edx] ;ymm3 = src3
; Perform variable SPFP blend
vblendvps ymm0,ymm1,ymm2,ymm3 ;ymm0 = blend result
mov eax,[ebp+8]
vmovaps [eax],ymm0 ;save blend result
vzeroupper
pop ebp
ret
; extern "C" void AvxBlendByte(YmmVal* des, YmmVal* src1, YmmVal* src2, YmmVal* src3);
;
; Description: The following function demonstrates use of the vpblendvb
; instruction.
;
; Requires: AVX2
%define des [ebp+8]
%define src1 [ebp+12]
%define src3 [ebp+16]
%define src2 [ebp+20]
AvxBlendByte:
push ebp
mov ebp,esp
; Load argument values
mov eax,src1 ;eax = ptr to src1
mov ecx,src2 ;ecx = ptr to src2
mov edx,src3 ;edx = ptr to src3
vmovdqa ymm1,[eax] ;ymm1 = src1
vmovdqa ymm2,[ecx] ;ymm2 = src2
vmovdqa ymm3,[edx] ;ymm3 = src3
; Perform variable byte blend
vpblendvb ymm0,ymm1,ymm2,ymm3 ;ymm0 = blend result
mov eax,des
vmovdqa [eax],ymm0 ;save blend result
vzeroupper
pop ebp
ret
build
gcc -m32 -c main.cpp -o main.o
nasm -f elf32 -o avxblend.o avxblend.asm
g++ -m32 -o avxblend avxblend.o main.o ../../commonfiles/ymmval.o