main.cpp
#include <stdio.h>
#include <stdlib.h>
extern "C" int CalcMatrixRowColSums(const int* x, int nrows, int ncols, int* row_sums, int* col_sums);
void PrintResults(const int* x, int nrows, int ncols, int* row_sums, int* col_sums)
{
for (int i = 0; i < nrows; i++)
{
for (int j = 0; j < ncols; j++)
printf("%5d ", x[i* ncols + j]);
printf(" -- %5d\n", row_sums[i]);
}
printf("\n");
for (int i = 0; i < ncols; i++)
printf("%5d ", col_sums[i]);
printf("\n");
}
void CalcMatrixRowColSumsCpp(const int* x, int nrows, int ncols, int* row_sums, int* col_sums)
{
for (int j = 0; j < ncols; j++)
col_sums[j] = 0;
for (int i = 0; i < nrows; i++)
{
row_sums[i] = 0;
int k = i * ncols;
for (int j = 0; j < ncols; j++)
{
int temp = x[k + j];
row_sums[i] += temp;
col_sums[j] += temp;
}
}
}
int main(int argc, char* argv[])
{
const int nrows = 7, ncols = 5;
int x[nrows][ncols];
// Initialize the test matrix
srand(13);
for (int i = 0; i < nrows; i++)
{
for (int j = 0; j < ncols; j++)
x[i][j] = rand() % 100;
}
// Calculate the row and column sums
int row_sums1[nrows], col_sums1[ncols];
int row_sums2[nrows], col_sums2[ncols];
CalcMatrixRowColSumsCpp((const int*)x, nrows, ncols, row_sums1, col_sums1);
printf("\nResults using CalcMatrixRowColSumsCpp()\n");
PrintResults((const int*)x, nrows, ncols, row_sums1, col_sums1);
CalcMatrixRowColSums((const int*)x, nrows, ncols, row_sums2, col_sums2);
printf("\nResults using CalcMatrixRowColSums()\n");
PrintResults((const int*)x, nrows, ncols, row_sums2, col_sums2);
return 0;
}
calcmatrixrowcolsums.asm
; Name: calcmatrixrowcolsums.asm
;
; Build: g++ -m32 -c main.cpp -o main.o
; nasm -f elf32 -o calcmatrixrowcolsums.o calcmatrixrowcolsums.asm
; g++ -m32 -o calcmatrixrowcolsums calcmatrixrowcolsums.o main.o
;
; Source: Modern x86 Assembly Language Programming p.62
global CalcMatrixRowColSums
section .text
; extern "C" int CalcMatrixRowColSums(const int* x, int nrows, int ncols, int* row_sums, int* col_sums);
;
; Description: The following function sums the rows and columns of a
; 2-D matrix.
;
; Returns: 0 = 'nrows' or 'ncols' is invalid
; 1 = success
%define x [ebp+8] ; matrix nrows x ncols
%define nrows [ebp+12]
%define ncols [ebp+16]
%define row_sums [ebp+20] ; matrix nrows x 1
%define col_sums [ebp+24] ; matrix 1 x ncols
CalcMatrixRowColSums:
push ebp
mov ebp,esp
push ebx
push esi
push edi
; Make sure 'nrow' and 'ncol' are valid
xor eax,eax ;error return code
cmp dword nrows,0 ;[ebp+12] = 'nrows'
jle .invalidArg ;jump if nrows <= 0
mov ecx,ncols ;ecx = 'ncols'
cmp ecx,0
jle .invalidArg ;jump if ncols <= 0
; Initialize elements of 'col_sums' array to zero
mov edi,col_sums ;edi = 'col_sums'
xor eax,eax ;eax = fill value
rep stosd ;fill array with zeros
; Initialize outer loop variables
mov ebx,x ;ebx = 'x'
xor esi,esi ;i = 0
; Outer loop
.lp1:
mov edi,row_sums ;edi = 'row_sums'
mov dword[edi+esi*4],0 ;row_sums[i] = 0
xor edi,edi ;j = 0
mov edx,esi ;edx = i
imul edx,ncols ;edx = i * ncols
; Inner loop
.lp2:
mov ecx,edx ;ecx = i * ncols
add ecx,edi ;ecx = i * ncols + j
mov eax,[ebx+ecx*4] ;eax = x[i * ncols + j]
mov ecx,row_sums ;ecx = 'row_sums'
add [ecx+esi*4],eax ;row_sums[i] += eax
mov ecx,col_sums ;ecx = 'col_sums'
add [ecx+edi*4],eax ;col_sums[j] += eax
; Is inner loop finished?
inc edi ;j++
cmp edi,ncols
jl .lp2 ;jump if j < ncols
; Is outer loop finished?
inc esi ;i++
cmp esi,nrows
jl .lp1 ;jump if i < nrows
mov eax,1 ;set success return code
.invalidArg:
pop edi
pop esi
pop ebx
pop ebp
ret
build
g++ -m32 -c main.cpp -o main.o
nasm -f elf32 -o calcmatrixrowcolsums.o calcmatrixrowcolsums.asm
g++ -m32 -o calcmatrixrowcolsums calcmatrixrowcolsums.o main.o