main.cpp
#include "../commonfiles/mmxval.h"
// The order of the name constants in the following enum must match
// the table that is defined in mmxAddition.asm.
enum MmxAddOp : unsigned int
{
paddb, // packed byte addition with wraparound
paddsb, // packed byte addition with signed saturation
paddusb, // packed byte addition with unsigned saturation
paddw, // packed word addition with wraparound
paddsw, // packed word addition with signed saturation
paddusw, // packed word addition with unsigned saturation
paddd // packed doubleword addition with wrapround
};
extern "C" bool MmxAdd(MmxVal a, MmxVal b, MmxAddOp op, MmxVal *c );
void MmxAddBytes(void)
{
MmxVal a, b, c;
char buff [256];
// Packed byte addition - signed integers
a.i8[0] = 50; b.i8[0] = 30;
a.i8[1] = 80; b.i8[1] = 64;
a.i8[2] = -27; b.i8[2] = -32;
a.i8[3] = -70; b.i8[3] = -80;
a.i8[4] = -42; b.i8[4] = 90;
a.i8[5] = 60; b.i8[5] = -85;
a.i8[6] = 64; b.i8[6] = 90;
a.i8[7] = 100; b.i8[7] = -30;
printf("\n\nPacked byte addition - signed integers\n");
printf("a: %s\n", a.ToString_i8(buff, sizeof(buff)));
printf("b: %s\n", b.ToString_i8(buff, sizeof(buff)));
MmxAdd(a, b, MmxAddOp::paddb, &c);
printf("\npaddb results\n");
printf("c: %s\n", c.ToString_i8(buff, sizeof(buff)));
MmxAdd(a, b, MmxAddOp::paddsb, &c);
printf("\npaddsb results\n");
printf("c: %s\n", c.ToString_i8(buff, sizeof(buff)));
// Packed byte addition - unsigned integers
a.u8[0] = 50; b.u8[0] = 30;
a.u8[1] = 80; b.u8[1] = 64;
a.u8[2] = 132; b.u8[2] = 130;
a.u8[3] = 200; b.u8[3] = 180;
a.u8[4] = 42; b.u8[4] = 90;
a.u8[5] = 60; b.u8[5] = 85;
a.u8[6] = 140; b.u8[6] = 160;
a.u8[7] = 10; b.u8[7] = 14;
printf("\n\nPacked byte addition - unsigned integers\n");
printf("a: %s\n", a.ToString_u8(buff, sizeof(buff)));
printf("b: %s\n", b.ToString_u8(buff, sizeof(buff)));
MmxAdd(a, b, MmxAddOp::paddb, &c);
printf("\npaddb results\n");
printf("c: %s\n", c.ToString_u8(buff, sizeof(buff)));
MmxAdd(a, b, MmxAddOp::paddusb, &c);
printf("\npaddusb results\n");
printf("c: %s\n", c.ToString_u8(buff, sizeof(buff)));
}
void MmxAddWords(void)
{
MmxVal a, b, c;
char buff [256];
// Packed word addition - signed integers
a.i16[0] = 550; b.i16[0] = 830;
a.i16[1] = 30000; b.i16[1] =5000;
a.i16[2] = -270; b.i16[2] = -320;
a.i16[3] = -7000; b.i16[3] = -32000;
printf("\n\nPacked word addition - signed integers\n");
printf("a: %s\n", a.ToString_i16(buff, sizeof(buff)));
printf("b: %s\n", b.ToString_i16(buff, sizeof(buff)));
MmxAdd(a, b, MmxAddOp::paddw, &c);
printf("\npaddw results\n");
printf("c: %s\n", c.ToString_i16(buff, sizeof(buff)));
MmxAdd(a, b, MmxAddOp::paddsw, &c);
printf("\npaddsw results\n");
printf("c: %s\n", c.ToString_i16(buff, sizeof(buff)));
// Packed word addition - unsigned integers
a.u16[0] = 50; b.u16[0] = 30;
a.u16[1] = 48000; b.u16[1] = 20000;
a.u16[2] = 132; b.u16[2] = 130;
a.u16[3] = 10000; b.u16[3] = 60000;
printf("\n\nPacked word addition - unsigned integers\n");
printf("a: %s\n", a.ToString_u16(buff, sizeof(buff)));
printf("b: %s\n", b.ToString_u16(buff, sizeof(buff)));
MmxAdd(a, b, MmxAddOp::paddw, &c);
printf("\npaddw results\n");
printf("c: %s\n", c.ToString_u16(buff, sizeof(buff)));
MmxAdd(a, b, MmxAddOp::paddusw, &c);
printf("\npaddusw results\n");
printf("c: %s\n", c.ToString_u16(buff, sizeof(buff)));
}
int main(int argc, char* argv[])
{
MmxAddBytes();
MmxAddWords();
return 0;
}
mmxaddition.asm
; extern "C" bool MmxAdd(MmxVal a, MmxVal b, MmxAddOp op, MmxVal *c );
;
; Description: The following function demonstrates use of the
; padd* instructions.
;
; Returns: c contains the calculated result.
; eax returns true if success otherwise false
;
; Name: mmxaddition.asm
;
; Build: g++ -c -m32 main.cpp -o main.o -std=c++11
; nasm -f elf32 -o mmxaddition.o mmxaddition.asm
; g++ -m32 -o mmxaddition mmxaddition.o main.o ../commonfiles/mmxval.o
;
; Source: Modern X86 Assembly Language Programming p.149
global MmxAdd
section .data
align 4
; The order of the labels in the following table must match
; the enum that is defined in MmxAddition.cpp.
%define DWORD_SIZE 4
AddOpTable: dd MmxAdd.mmxPaddb
dd MmxAdd.mmxPaddsb
dd MmxAdd.mmxPaddusb
dd MmxAdd.mmxPaddw
dd MmxAdd.mmxPaddsw
dd MmxAdd.mmxPaddusw
dd MmxAdd.mmxPaddd
AddOpTableCount equ ($ - AddOpTable) / DWORD_SIZE
section .text
MmxAdd:
%define a [ebp+8]
%define b [ebp+16]
%define add_op [ebp+24]
%define c [ebp+28]
push ebp
mov ebp,esp
; Make sure 'add_op' is valid
mov eax,add_op ;load 'add_op'
cmp eax,AddOpTableCount ;compare to table count
jae .badAddOp ;jump if 'add_op' is invalid
; Load parameters and execute specified instruction
movq mm0,a ;load 'a'
movq mm1,b ;load 'b'
jmp [AddOpTable+eax*DWORD_SIZE] ;jump to specified 'add_op'
.mmxPaddb:
paddb mm0,mm1 ;packed byte addition using
jmp .saveResult ;wraparound
.mmxPaddsb:
paddsb mm0,mm1 ;packed byte addition using
jmp .saveResult ;signed saturation
.mmxPaddusb:
paddusb mm0,mm1 ;packed byte addition using
jmp .saveResult ;unsigned saturation
.mmxPaddw:
paddw mm0,mm1 ;packed word addition using
jmp .saveResult ;wraparound
.mmxPaddsw:
paddsw mm0,mm1 ;packed word addition using
jmp .saveResult ;signed saturation
.mmxPaddusw:
paddusw mm0,mm1 ;packed word addition using
jmp .saveResult ;unsigned saturation
.mmxPaddd:
paddd mm0,mm1 ;packed dword addition using
jmp .saveResult ;wraparound
.badAddOp:
pxor mm0,mm0 ;return 0 if 'add_op' is bad
pxor mm2,mm2
xor eax,eax ;return value false
jmp .done
; Move final result into edx:eax
.saveResult:
; g++ doesn't return 64 bit values in edx:eax therefor the patch to store the value in c
mov eax, 1 ;return value true
pshufw mm2,mm0,01001110b ;swap high & low dwords
.done:
; save values of mmo and mm2 in c
mov edx,c
movq [edx],mm0
movq [edx+8],mm2
emms ;clear MMX state
pop ebp
ret
build
g++ -c -m32 main.cpp -o main.o -std=c++11
nasm -f elf32 -o mmxaddition.o mmxaddition.asm
g++ -m32 -o mmxaddition mmxaddition.o main.o ../commonfiles/mmxval.o