Star InactiveStar InactiveStar InactiveStar InactiveStar Inactive
 
main.cpp
#include <stdio.h>
#include <stdlib.h>
#include "../commonfiles/miscdefs.h"

// Functions defined in MmxCalcMean.asm
extern "C" bool MmxCalcMean(const Uint8* x, int n, Uint32* sum_x, double* mean);
extern "C" int NMIN = 16;               // Minimum number of elements
extern "C" int NMAX = 16777216;         // Maximum number of elements

// Common constants
const int NUM_ELEMENTS = 0x800000;
const int SRAND_SEED = 23;

bool MmxCalcMeanCpp(const Uint8* x, int n, Uint32* sum_x, double* mean_x)
{
    if ((n < NMIN) || (n > NMAX) || ((n & 0x0f) != 0))
        return false;

    Uint32 sum_x_temp = 0;
    for (int i = 0; i < n; i++)
        sum_x_temp += x[i];

    *sum_x = sum_x_temp;
    *mean_x = (double)sum_x_temp / n;
    return true;
}

void MmxCalcMean()
{
    const int n = NUM_ELEMENTS;
    Uint8* x = new Uint8[n];

    srand(SRAND_SEED);
    for (int i = 0; i < n; i++)
        x[i] = rand() % 256;

    bool rc1, rc2;
    Uint32 sum_x1 = 0, sum_x2 = 0;
    double mean_x1 = 0, mean_x2 = 0;
	
    rc1 = MmxCalcMeanCpp(x, n, &sum_x1, &mean_x1);
    rc2 = MmxCalcMean(x, n, &sum_x2, &mean_x2);

    printf("\nResults for MmxCalcMean()\n");
    printf("rc1: %d sum_x1: %u mean_x1: %12.6lf\n", rc1, sum_x1, mean_x1);
    printf("rc2: %d sum_x2: %u mean_x2: %12.6lf\n", rc2, sum_x2, mean_x2);
    delete[] x;
}

int main(int argc, char* argv[])
{
    MmxCalcMean();
    return 0;
}
mmxcalcmean.asm
; extern "C" bool MmxCalcMean(const Uint8* x, int n, Uint32* sum_x, double* mean);
;
; Description:  This function calculates the sum and mean of an array
;               containing 8-bit unsigned integers.
;
; Returns       0 = invalid 'n'
;               1 = success
;
; Name:		mmxcalcmean.asm
;
; Build:	g++ -c -m32 main.cpp -o main.o -std=c++11
;		nasm -f elf32 -o mmxcalcmean.o mmxcalcmean.asm
;		g++ -m32 -o mmxcalcmean mmxcalcmean.o main.o
;
; Source:	Modern X86 Assembly Language Programming p.173

extern NMIN
extern NMAX     ;min and max array sizes
global MmxCalcMean

section .text

MmxCalcMean:
	%define x	[ebp+8]		; pointer
	%define n	[ebp+12]	; value
	%define	sum_x	[ebp+16]	; pointer
	%define	mean	[ebp+20]	; pointer
		
	push 	ebp
	mov 	ebp,esp
	sub 	esp,8                    ;local storage for x87 transfer

; Verify n is valid
	xor 	eax,eax                  ;set error return code
	mov 	ecx,n
	cmp 	ecx,[NMIN]
	jl 	.done                    ;jump if n < NMIN
	cmp 	ecx,[NMAX]
	jg 	.done                    ;jump if n > NMAX
	test 	ecx,0fh
	jnz 	.done                    ;jump if n % 16 != 0
	shr 	ecx,4                    ;number of 16-byte blocks

; Perform required initializations
	mov 	eax,x                    ;pointer to array 'x'
	pxor 	mm4,mm4
	pxor 	mm5,mm5                  ;mm5:mm4 = packed sum (4 dwords)
	pxor 	mm7,mm7                  ;mm7 = packed zero for promotions

; Load the next block of 16 array values 
.@1:    
	movq 	mm0,[eax]
	movq 	mm1,[eax+8]              ;mm1:mm0 = 16 byte block

; Promote array values from bytes to words, then sum the words
	movq 		mm2,mm0
	movq 		mm3,mm1
	punpcklbw 	mm0,mm7              ;mm0 = 4 words
	punpcklbw 	mm1,mm7              ;mm1 = 4 words
	punpckhbw 	mm2,mm7              ;mm2 = 4 words
	punpckhbw 	mm3,mm7              ;mm3 = 4 words
	paddw 		mm0,mm2
	paddw 		mm1,mm3
	paddw 		mm0,mm1              ;mm0 = pack sums (4 words)

; Promote packed sums to dwords, then update dword sums in mm5:mm4
	movq 		mm1,mm0
	punpcklwd 	mm0,mm7              ;mm0 = packed sums (2 dwords)
	punpckhwd 	mm1,mm7              ;mm1 = packed sums (2 dwords)
	paddd		mm4,mm0
	paddd		mm5,mm1              ;mm5:mm4 = packed sums (4 dwords)

	add 	eax,16                       ;eax = next 16 byte block
	dec 	ecx
	jnz 	.@1                          ;repeat loop if not done

; Compute final sum_x
	paddd 	mm5,mm4                      ;mm5 = packed sums (2 dwords)
	pshufw 	mm6,mm5,00001110b            ;mm6[31:0] = mm5[63:32]
	paddd 	mm6,mm5                      ;mm6[31:0] = final sum_x
	movd 	eax,mm6                      ;eax = sum_x
	emms                                 ;clear mmx state

; Compute mean value
	mov 	dword[ebp-8],eax             ;save sum_x as 64-bit value
	mov 	dword[ebp-4],0
	fild 	qword [ebp-8]                ;load sum_x
	fild 	dword n                      ;load n
	fdivp                                ;mean = sum_x / n
	mov 	edx,mean
	fstp 	qword[edx]                   ;save mean
	mov 	edx,sum_x
	mov 	[edx],eax                    ;save sum_x        
	mov 	eax,1                        ;set return code

.done:
	mov 	esp,ebp
	pop 	ebp
	ret
build
g++ -c -m32 main.cpp -o main.o -std=c++11
nasm -f elf32 -o mmxcalcmean.o mmxcalcmean.asm
g++ -m32 -o mmxcalcmean mmxcalcmean.o main.o