main.cpp
#include <stdio.h>
#include "../../commonfiles/miscdefs.h"
extern "C" Uint64 AvxGprMulx(Uint32 a, Uint32 b, Uint8 flags[2]);
extern "C" void AvxGprShiftx(Int32 x, Uint32 count, Int32 results[3]);
void AvxGprMulxCpp(void)
{
const int n = 3;
Uint32 a[n] = {64, 3200, 100000000};
Uint32 b[n] = {1001, 12, 250000000};
printf("Results for AvxGprMulx()\n");
for (int i = 0; i < n; i++)
{
Uint8 flags[2];
Uint64 c = AvxGprMulx(a[i], b[i], flags);
printf("Test case %d\n", i);
printf(" a: %u b: %u c: %llu\n", a[i], b[i], c);
printf(" status flags before mulx: 0x%02X\n", flags[0]);
printf(" status flags after mulx: 0x%02X\n", flags[1]);
}
}
void AvxGprShiftxCpp(void)
{
const int n = 4;
Int32 x[n] = { 0x00000008, 0x80000080, 0x00000040, 0xfffffc10 };
Uint32 count[n] = { 2, 5, 3, 4 };
printf("\nResults for AvxGprShiftx()\n");
for (int i = 0; i < n; i++)
{
Int32 results[3];
AvxGprShiftx(x[i], count[i], results);
printf("Test case %d\n", i);
printf(" x: 0x%08X (%11d) count: %u\n", x[i], x[i], count[i]);
printf(" sarx: 0x%08X (%11d)\n", results[0], results[0]);
printf(" shlx: 0x%08X (%11d)\n", results[1], results[1]);
printf(" shrx: 0x%08X (%11d)\n", results[2], results[2]);
}
}
int main(int argc, char* argv[])
{
AvxGprMulxCpp();
AvxGprShiftxCpp();
return 0;
}
avxgprmulxshiftx.asm
; Name: avxgprmulxshiftx.asm
;
; Build: g++ -c -m32 main.cpp -o main.o
; nasm -f elf32 -o avxgprmulxshiftx.o avxgprmulxshiftx.asm
; g++ -m32 -o avxgprmulxshiftx avxgprmulxshiftx.o main.o
;
; Source: Modern x86 Assembly Language Programming p. 482
global AvxGprMulx
global AvxGprShiftx
section .text
; extern "C" Uint64 AvxGprMulx(Uint32 a, Uint32 b, Uint8 flags[2]);
;
; Description: The following function demonstrates use of the
; flagless unsigned integer multiply instruction mulx.
;
; Requires BMI2.
%define a [ebp+8]
%define b [ebp+12]
%define flags [ebp+16]
AvxGprMulx:
push ebp
mov ebp,esp
; Save copy of status flags before mulx
mov ecx,flags
lahf
mov byte[ecx],ah
; Perform flagless multiplication. The mulx instruction below computes
; the product of explicit source operand [ebp+8] and implicit source
; operand edx. The 64-bit result is saved to the register pair edx:eax.
mov edx,b ;edx = b
mulx edx,eax,a ;edx:eax = [ebp+8] * edx
; Save copy of status flags after mulx
push eax
lahf
mov byte[ecx+1],ah
pop eax
pop ebp
ret
; extern "C" void AvxGprShiftx(Int32 x, Uint32 count, Int32 results[3]);
;
; Description: The following function demonstrates use of the flagless
; shift instructions sarx, shlx, and shrx.
;
; Requires BMI2
%define x [ebp+8]
%define count [ebp+12]
%define results [ebp+16]
AvxGprShiftx:
push ebp
mov ebp,esp
; Load argument values and perform shifts. Note that each shift
; instruction requires three operands: DesOp, SrcOp, and CountOp.
mov ecx,count ;ecx = shift bit count
mov edx,results ;edx = ptr to results
sarx eax,x,ecx ;shift arithmetic right
mov [edx],eax
shlx eax,x,ecx ;shift logical left
mov [edx+4],eax
shrx eax,x,ecx ;shift logical right
mov [edx+8],eax
pop ebp
ret
build
g++ -c -m32 main.cpp -o main.o
nasm -f elf32 -o avxgprmulxshiftx.o avxgprmulxshiftx.asm
g++ -m32 -o avxgprmulxshiftx avxgprmulxshiftx.o main.o