;name: MT19937-32.asm ; ;build: nasm -felf64 MT19937-32.asm -o MT19937-32.o ; ld -melf_x86_64 -o MT19937-32 MT19937-32.o ; ;description: Mersenne twister algorithm to generate 32 bits unsigned integers. ; ;source: http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html ; http://en.wikipedia.org/wiki/Mersenne_twister ; ;remarks: The routine GenerateNumbers can be called ,the Initialize subroutine does this ; before returning to the caller,in case the user forgets it. bits 64 [list -] %include "unistd.inc" %include "sys/time.inc" [list +] ;number of numbers to generate %define NUMBER 1000000 ;macro to generate buffers in several parts in the code ;use with: BUFFER name,type,number %macro BUFFER 3 %1: %if %2=="BYTE" %assign n 1 %elif %2=="WORD" %assign n 2 %elif %2=="DWORD" %assign n 4 %elif %2=="QWORD" %assign n 8 %endif .start: resb %3*n .end: .len: equ %1.end-%1.start %endmacro ;macro to build strings %macro STRING 2 %1: .start: db %2 .end: .len: equ %1.end-%1.start %endmacro section .bss BUFFER index,"WORD",1 BUFFER MT,"DWORD",624 BUFFER decimalbuffer,"BYTE",20 BUFFER testdata,"DWORD",NUMBER section .rodata STRING sTitle,{"Implementation of the 32 bits Mersenne Twister.",10,"by Agguro - 2013",10} STRING sEOL,{10} STRING sSeparator,"," STRING sSpace," " STRING sPeriod,"." STRING sSec,"s" STRING sExecTime,"Execution time to generate and store 1000000 integers: " section .data ;time structures to calculate (more or less) the time needed to generate the list of pseudorandom numbers TIMESPEC starttime TIMESPEC endtime section .text global _start _start: mov rsi,sTitle mov rdx,sTitle.len call Write ;initialize the Mersenne Twister xor rdi,rdi ;autogenerate seed for us call Initialize ;NUMBER of random 32 bit words will be generated to measure the time needed mov rcx,NUMBER mov rdi,testdata ;get starttime call GetTime mov qword[starttime.tv_sec],rdx mov qword[starttime.tv_nsec],rax .next: push rdi call ExtractNumber pop rdi stosd loop .next ;get endtime call GetTime mov qword[endtime.tv_sec],rdx mov qword[endtime.tv_nsec],rax ;send all integers to STDOUT mov rsi,testdata mov rcx,NUMBER .nextNumber: xor rax,rax ;clear upper 32 bits lodsd ;load integer in EAX push rsi call ConvertToDecimal push rcx push rsi push rdx ;right align integers with alignment length 12 bytes mov rcx,rdx sub rcx,12 jz .noalignment ;if no difference in length no alignment neg rcx .space: mov rsi,sSpace ;write space to align call WriteChar loop .space .noalignment: pop rdx ;restore pointer to integer and length pop rsi pop rcx ;restore loopcounter call Write mov rax,rcx dec rax mov rbx,10 xor rdx,rdx idiv rbx cmp rdx,0 ;if loopcounter mod 10 = 0 then tenth column jne .sameline ;stay on same line mov rsi,sEOL ;go to next line jmp .printchar .sameline: mov rsi,sSeparator .printchar: call WriteChar pop rsi loop .nextNumber ;repeat for all numbers mov rsi,sEOL ;end of table mov rdx,1 call Write mov rsi,sExecTime mov rdx,sExecTime.len call Write ;calculate the time difference ;if end nanoseconds are less than start nanoseconds we need to adjust mov rax,qword[endtime.tv_nsec] sub rax,qword[starttime.tv_nsec] cmp rax,0 jge .calculate neg rax sub qword[endtime.tv_sec],1 ;1 second less .calculate: push rax ;store nanosec difference on stack mov rax,qword[endtime.tv_sec] sub rax,qword[starttime.tv_sec] call ConvertToDecimal ;RSI has pointer to buffer,RDX length call Write mov rsi,sPeriod ;write decimal point mov rdx,1 call Write pop rax ;restore nanosec difference on stack call ConvertToDecimal cmp rdx,9 je .noleadingzero mov rcx,9 sub rcx,rdx .leadingzero: dec rsi inc rdx mov byte[rsi],"0" loop .leadingzero .noleadingzero: call Write mov rsi,sSec mov rdx,1 call Write .done: mov rsi,sEOL mov rdx,1 call Write syscall exit,0 ;Write ;Write a string pointed by RSI with length RDX to STDOUT ;All used registers are restored. Write: push rax push rcx push rdi syscall write,stdout pop rdi pop rcx pop rax ret ;WriteChar ;Write a character pointed by RSI to STDOUT ;all used registers are restored. WriteChar: push rdx mov rdx,1 call Write pop rdx ret ;Start of MT19937 subroutines ;Initialize ;The routine accepts a seed in RDI,if this value is zero then a seed will be generated by the ;program. ;use: mov rdi,[seed or 0] ; call Initialize ;return: none. Initialize: push rax push rbx push rcx push rdx push rdi push rsi mov word[index],0 ;initialize the index with 0 cmp rdi,0 jne .start call GenerateSeed ;seed in RAX .start: mov rdi,MT ;store pointer to MT in RDI mov ecx,1 ;start with MT[1],RCX is i stosd ;save seed in MT[0] mov rsi,rdi ;RDI points to MT[i] sub rsi,4 ;RSI points to MT[i-1] .nexti: push rax ;MT[i-1] on stack ;** right shift by 30 bits(MT[i-1]) shr eax,30 mov ebx,eax pop rax ;get M[i-1] back ;** MT[i-1] xor (right shift by 30 bits(MT[i-1]) xor eax,ebx mov ebx,0x6c078965 ;1812433253 xor rdx,rdx ;prepare multiplication ;** 1812433253 * (MT[i-1] xor (right shift by 30 bits(MT[i-1])) imul ebx ;** 1812433253 * (MT[i-1] xor (right shift by 30 bits(MT[i-1]))) + i add eax,ecx ;** last 32 bits of(1812433253 * (MT[i-1] xor (right shift by 30 bits(MT[i-1]))) + i) stosd ;save M[n] RDI inc ecx ;i++ cmp ecx,623 ;repeat until i > 623 jle .nexti call GenerateNumbers pop rsi pop rdi pop rdx pop rcx pop rbx pop rax ret ;Generate ;The routine generates the random numbers. The Initialize subroutine calls this subroutine ;after the buffer is initialized with numbers based on the seed. GenerateNumbers: push rax push rbx push rcx push rdx push rsi xor rcx,rcx .repeat: mov rsi,MT mov ebx,ecx shl ebx,2 ;offset in MT array ;** y = (MT[i] and 0x80000000) + (MT[(i+1) mod 624] and 0x7FFFFFFF) ;**** calculate MT[i] and 0x80000000 mov eax,dword[rsi+rbx] and eax,0x80000000 push rax ;MT[i] and 0x80000000 ;calculate MT[(i+1) mod 624] and 0x7FFFFFFF mov eax,ecx ; inc eax ;i+1 mov ebx,624 xor edx,edx idiv ebx mov ebx,edx ;(i+1) mod 624 shl ebx,2 mov eax,dword[rsi+rbx] and eax,0x7FFFFFFF ;MT[(i+1) mod 624] and 0x7FFFFFFF pop rbx ;MT[i] and 0x80000000 add eax,ebx ;y push rax ;y on stack ;**** calculate MT[i] = MT[(i+397) mod 624] xor (right shift y by 1) shr eax,1 ;y SHR 1 push rax ;y SHR 1 on stack mov eax,ecx ;i mov ebx,397 add eax,ebx ;i+397 mov ebx,624 xor rdx,rdx idiv ebx ;(i+397) mod 624 mov ebx,edx ;in RBX shl ebx,2 mov eax,dword[rsi+rbx] ;MT[(i+397) mod 624] pop rbx ;y SHR 1 of stack xor eax,ebx ;MT[(i+397) mod 624] xor (y SHR 1) mov ebx,ecx shl ebx,2 and eax,0x7FFFFFFF ;remove highest 33 bits mov DWORD[rsi+rbx],eax ;save MT[i] pop rax ;y rcr eax,1 ;CF = y mod 2 jnc .done mov eax,dword[rsi+rbx] ;MT[i] xor eax,2567483615 ;MT[i] xor 2567483615 and eax,0x7FFFFFFF ;remove highest 33 bits mov DWORD[rsi+rbx],eax .done: inc rcx cmp ecx,624 jne .repeat pop rsi pop rdx pop rcx pop rbx pop rax ret ;ExtractNumber ;The routine extracts a number of the list and returns this in RAX,each 624 times it calls ;GenerateNumbers to create a new list of pseudo-random numbers. ExtractNumber: push rbx push rcx push rdx push rsi xor rax,rax mov ax,word[index] push rax ;index on stack inc eax ;increment index mov ebx,624 xor rdx,rdx idiv ebx mov word[index],dx pop rbx cmp ebx,0 jne .getnumber push rbx ;save i call GenerateNumbers pop rbx ;restore i .getnumber: mov rsi,MT shl rbx,2 ;** int y := MT[index] mov dword eax,[rsi+rbx] ;** y := y xor (right shift by 11 bits(y)) mov edx,eax shr edx,11 xor eax,edx mov edx,eax ;** y := y xor (left shift by 7 bits(y) and (2636928640)) shl edx,7 and edx,2636928640 xor eax,edx mov edx,eax ;** y := y xor (left shift by 15 bits(y) and (4022730752)) shl edx,15 and edx,4022730752 xor eax,edx mov edx,eax shr edx,18 ;** y := y xor (right shift by 18 bits(y)) xor eax,edx and eax,0x7FFFFFFF ;extracted number pop rsi pop rdx pop rcx pop rbx ret ;GenerateSeed ;This is just a helper subroutine,in case you don't want to provide seeds by your own. ;The routine creates a 32 bits unsigned integer based on ClockGetTime. ;Also remember,even if RCX isn't used it is changed by the syscalls. The stackpointer will ;be used as pointer to the TIMESPEC. ;CALLING : call CreateSeed ;IN : none ;OUT : seed in RAX ;Stack layout after initializing stackframe: ;RSP + 0 value of seconds ;RSP + 8 value of nanoseconds GenerateSeed: push rbp mov rbp,rsp sub rsp,2*8 ;reserve 2 QWORDS mov rax,SYS_CLOCK_GETTIME syscall clock_gettime,CLOCK_REALTIME,rsp mov rax,[rsp] ;get seconds mov rbx,[rsp+8] ;get nanosecs imul rbx shl rax,32 shr rax,32 mov rsp,rbp pop rbp ret ;COMMON SUBROUTINES ;ConvertTodecimal ;Convert the register value in RAX to decimal ASCII and stores it in a buffer. A pointer to ;the start of the decimal in the buffer is returned in RSI,the length in RDX. ;The purpose is to use this subroutine before writing the contents of RAX in decimal on STDOUT. ;A buffer "decimalbuffer" must be defined in the .bss or the .data section with a length ;suitable to store a decimal ASCII number in. ConvertToDecimal: pushfq mov rdi,decimalbuffer ;point to decimal buffer,decimal will be there add rdi,19 ;point to end of buffer mov rbx,10 ;base 10 std .repeat: xor rdx,rdx idiv rbx ;RAX = quotient,RDX = remainder xchg rax,rdx ;change quotient and remainder or al,0x30 ;make ASCII stosb xchg rax,rdx ;restore quotient cmp rax,0 ;calculations left? jne .repeat inc rdi ;adjust pointer mov rsi,rdi mov rdx,decimalbuffer.end sub rdx,rdi ;length of the decimal popfq ret ;GetTime ;This subroutine returns the time in RDX:RAX ;IN : none ;OUT : RDX = seconds ; RAX = nanoseconds GetTime: push rsi push rdi push rcx push rbp mov rbp,rsp sub rsp,16 ;reserve 2 QWORDS syscall clock_gettime,CLOCK_REALTIME,rsp mov rdx,[rsp] ;seconds mov rax,[rsp+8] ;nanoseconds mov rsp,rbp pop rbp pop rcx pop rdi pop rsi ret