Star InactiveStar InactiveStar InactiveStar InactiveStar Inactive
; Name:         optimizingstep1.asm
;

; 1st optimization step : n = Array length
; ; The bubble sort algorithm can be easily optimized by observing that ; the n-th pass finds the n-th largest element and puts it into its ; final place. So, the inner loop can avoid looking at the last n-1 ; items when running for the n-th time. ; ; source: http://en.wikipedia.org/wiki/Bubble_sort#Optimizing_bubble_sort BubbleSort: xor r8,r8 ; number of iterations xor r9,r9 ; number of swaps (informative) mov r10, array.length ; *** ADDED *** .repeat: mov rdx, FALSE ; isSwapped = false mov rcx,1 ; i = 1 mov rsi,array ; point to start of the array .for: lodsq ; RBX = array[i] mov rbx, rax lodsq ; RAX = array[i+1] cmp rax, rbx ; if array[i+1] >= array[i] jge .next xor rax, rbx ; then swap both values xor rbx, rax xor rax, rbx mov qword [rsi-datasize*2], rbx ; and store swapped values in array mov qword [rsi-datasize], rax mov rdx,TRUE ; isSwapped = true inc r9 ; increment number of swaps .next: inc r8 ; increment number of iterations sub rsi,datasize ; adjust pointer in array inc rcx ; i++ cmp rcx,r10 ; if i <= arrayLength-1 *** MODIFIED *** jle .for ; next comparison .until: dec r10 ; *** ADDED *** cmp rdx,TRUE ; if isSwapped == true je .repeat ; then repeat sort algorithm

Here below the results of this optimization

Bubblesort Algorithm - Agguro 2012
Optimization step: n-th pass finds the n-th largest elements
The UNSORTED array:
-------------------
 9223372036854775807
   15421441199845202
                  75
                  15
                   0
                -854
               -7854
           -48545825
-9223372036854775808

The SORTED array:
-----------------
-9223372036854775808
           -48545825
               -7854
                -854
                   0
                  15
                  75
   15421441199845202
 8030874076713219394

Number of iterations:                   45
Number of swaps     :                   37
Star InactiveStar InactiveStar InactiveStar InactiveStar Inactive
; Name:         optimizingstep2.asm
;

; 2nd optimization step : no check after last swap
; ; it can happen that more than one element is placed in their final position ; on a single pass. In particular, after every pass, all elements after the ; last swap are sorted, and do not need to be checked again. This allows us to ; skip over a lot of the elements, resulting in about a worst case 50% improvement ; in comparison count (though no improvement in swap counts), and adds very little ; complexity because the new code subsumes the "swapped" variable. ; ; source: http://en.wikipedia.org/wiki/Bubble_sort#Optimizing_bubble_sort BubbleSort: xor r8,r8 ; number of iterations xor r9,r9 ; number of swaps (informative) mov r10, array.length ; r10 = n = arrayLength .repeat: mov r11, 0 ; r11 = newn = 0 ADDED mov rcx,1 ; i = 1 mov rsi,array ; point to start of the array .for: lodsq ; RBX = array[i] mov rbx, rax lodsq ; RAX = array[i+1] cmp rax, rbx ; if array[i+1] >= array[i] jge .next xor rax, rbx ; then swap both values xor rbx, rax xor rax, rbx mov r11, rcx ; newn = i ADDED mov qword [rsi-datasize*2], rbx ; and store swapped values in array mov qword [rsi-datasize], rax inc r9 ; increment number of swaps .next: inc r8 ; increment number of iterations sub rsi,datasize ; adjust pointer in array inc rcx ; i++ cmp rcx, r10 ; if i <= arrayLength-1 jle .for mov r10, r11 ; n = newn ADDED .until: dec r10 cmp r10, 0 ; if r10 > 0 jg .repeat ; then repeat sort algorithm

And finally the results with this optimization

Bubblesort Algorithm - Agguro 2012
Optimization step: no check after last swap
The UNSORTED array:
-------------------
 9223372036854775807
   15421441199845202
                  75
                  15
                   0
                -854
               -7854
           -48545825
-9223372036854775808

The SORTED array:
-----------------
-9223372036854775808
           -48545825
               -7854
                -854
                   0
                  15
                  75
   15421441199845202
 8030874076713219394

Number of iterations:                   38
Number of swaps     :                   37
Star InactiveStar InactiveStar InactiveStar InactiveStar Inactive
; Name:         optimizingstep0.asm
;

; The non-optimized bubblesort algorithm
; ; source: http://en.wikipedia.org/wiki/Bubble_sort BubbleSort: xor r8,r8 ; number of iterations xor r9,r9 ; number of swaps .repeat: mov rdx, FALSE ; isSwapped = false mov rcx,1 ; i = 1 mov rsi,array ; point to start of the array .for: lodsq ; RBX = array[i] mov rbx, rax lodsq ; RAX = array[i+1] cmp rax, rbx ; if array[i+1] >= array[i] jge .next xor rax, rbx ; if less then swap both values xor rbx, rax xor rax, rbx mov qword [rsi-datasize*2], rbx ; and store swapped values in array mov qword [rsi-datasize], rax mov rdx,TRUE ; isSwapped = true inc r9 ; increment number of swaps .next: inc r8 ; increment number of iterations sub rsi,datasize ; adjust pointer in array inc rcx ; i++ cmp rcx,array.length-1 ; if i <= arrayLength-1 jle .for ; next comparison .until: cmp rdx,TRUE ; if isSwapped == true je .repeat ; then repeat sort algorithm

Her below the results of this optimization

Bubblesort Algorithm - Agguro 2012
The UNSORTED array:
-------------------
 9223372036854775807
   15421441199845202
                  75
                  15
                   0
                -854
               -7854
           -48545825
-9223372036854775808

The SORTED array:
-----------------
-9223372036854775808
           -48545825
               -7854
                -854
                   0
                  15
                  75
   15421441199845202
 9223372036854775807

Number of iterations:                   72
Number of swaps     :                   36
Star InactiveStar InactiveStar InactiveStar InactiveStar Inactive
; Name:         bubblesort.asm
;
; Build:        nasm "-felf64" bubblesort.asm -l bubblesort.lst -o bubblesort.o
;               ld -s -melf_x86_64 -o bubblesort bubblesort.o
;
; Description:  Demonstration of the BubbleSort Algorithm, with or without optimzing steps.
;               The program needs to be re-assembled and linked when changing to an
;               optimization step.
;               Source : https://en.wikipedia.org/wiki/Bubble_sort


bits 64

[list -]
    %include "unistd.inc"
[list +]

; here you can choose between the 3 different steps of optimization
; O : [default] no optimization
; 1 : n-th pass finds the n-th largest elements
; 2 : no check after last swap

%define OPTIMIZE_STEP 2                 ; select 0, 1 or 2

%define TRUE      1
%define FALSE     0

%macro STRING 1
    .start:     db %1
    .length:    equ $-.start
%endmacro
%macro ARRAY 1-*
    %rep  %0
         dq  %1
         %rotate 1
    %endrep 
%endmacro

section .bss
; 64 bit integers have a range of -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
; both included, so we need a buffer of 20 bytes at least to store them
Buffer:
    sign           resb      1              ; ascii sign
    decimal        resb     19              ; 20 bytes to store a 64 bits number + sign

section .data

; The list is intentionally sorted in descending order to demonstrate the wurst case scenario
; and determine the swaps and iterations.

    datasize:      equ       8   
    array:         ARRAY     9223372036854775807, 15421441199845202, 75, 15, 0, -854, -7854, -48545825, -9223372036854775808
    .length        equ       ($-array)/datasize

    title:         STRING    {"Bubblesort Algorithm - Agguro 2012",10}
%if OPTIMIZE_STEP == 1
    opt1:          STRING    {"Optimization step: n-th pass finds the n-th largest elements",10}
%elif OPTIMIZE_STEP == 2
    opt2:          STRING    {"Optimization step: no check after last swap",10}
%endif
    unsorted:      STRING    {"The UNSORTED array:",10,"-------------------",10}
    sorted:        STRING    {10,"The SORTED array:",10,"-----------------",10}
    iterations:    STRING    {10,"Number of iterations: "}
    swaps:         STRING    {"Number of swaps     : "}
    lf:            STRING    {10}
     
section .text
        global _start
     
_start:
    pushfq
    push      rax
    push      rbx
    push      rcx
    push      rdx
    push      rsi
    push      r9
    push      r8
; clear the screen and print title
; if the terminal you use don't support this, modify the bytes in .data
    mov       rsi, title
    mov       rdx, title.length
    call      Print.string
    %if OPTIMIZE_STEP == 1
         mov       rsi, opt1
         mov       rdx, opt1.length
         call      Print.string
    %elif OPTIMIZE_STEP == 2
         mov       rsi, opt2
         mov       rdx, opt2.length
         call      Print.string
    %endif 
; display the unsorted Array on screen
    mov       rsi, unsorted
    mov       rdx, unsorted.length
    call      Print.string
; print the Array elements
    call      ShowArray

; Here the Bubblesort algorithm starts. Depending the value of OPTIMIZE_STEP an optimized or
; non-optimized version is included. On error the default (0) is used.
%if OPTIMIZE_STEP == 1
    %include "optimizingstep1.asm"
%elif OPTIMIZE_STEP == 2
    %include "optimizingstep2.asm"
%elif
    %warning "no OPTIMIZE_STEP defined, defaulting to 0"
    %include "optimizingstep0.asm"
%endif
; End of the BubbleSort algorithm.

; all we need to do is to display the sorted Array and restore the used registers
    mov       rsi, sorted
    mov       rdx, sorted.length
    call      Print.string
    call      ShowArray
    ; show number of iterations
    mov       rsi, iterations
    mov       rdx, iterations.length
    call      Print.string
    mov       rax, r8                       ; iterations in RAX
    call      Convert                       ; RAX contains the number to convert
    call      Print.integer
    call      ClearBuffer                   ; clear buffer for next use
     ; show number of swaps
    mov       rax, r9                       ; number of swaps in RAX
    mov       rsi, swaps
    mov       rdx, swaps.length
    call      Print.string
    call      Convert                       ; RAX contains the number to convert
    call      Print.integer
    call      Print.linefeed
     ; restore used registers
    pop       r8
    pop       r9
    pop       rsi
    pop       rdx
    pop       rcx
    pop       rbx
    pop       rax
    popfq
Exit:      
    syscall   exit, 0
ShowArray:
    push      rcx
    push      rsi
    mov       rcx, array.length             ; show all integers
    mov       rsi, array                    ; start of the array
.nextInteger:      
    lodsq                                   ; get integer
    call      Convert                       ; RAX contains the number to convert
    call      Print.integer
    call      ClearBuffer                   ; clear buffer for next use
    loop      .nextInteger
    pop       rsi
    pop       rcx
    ret
Convert:
    push      rax
    push      rbx
    push      rdx
    push      rdi
    push      rcx
    mov       rdi,sign
    mov       byte[rdi]," "                 ; default no sign
    cmp       rax, 0
    jge       .noSign
    mov       byte[rdi],"-"                 ; number is zero
    neg       rax                           ; make positive
.noSign:
    mov       rdi, decimal                  ; address of buffer in RDI 
    add       rdi, 18                       ; 0..18 = 19 bytes of storage
    ; start conversion of absolute value of RAX
.repeat:      
    xor       rdx, rdx                      ; remainder will be in RDX
    mov       rbx, 10
    div       rbx                           ; RDX = remainder of division
    or        dl,"0"                        ; make remainder decimal ASCII
    mov       byte[rdi],dl                  ; and store
    dec       rdi                           ; go to previous position
    cmp       rax, 0                        ; RAX = quotient of division, if zero stop
                                            ; we can stop when AL < 10 however this will add more code to store AL
    jnz       .repeat
    ; align integers to the right
    mov       dl ,byte[sign]                ; copy sign character in [RDI] just before the number
    mov       byte[rdi], dl
    dec       rdi                           ; point to position before the sign character
    mov       rcx, rdi                      ; calculate remaining bytes
    sub       rcx, sign
    cmp       rcx, 0
    jle       .end
    inc       rcx
    mov       al," "                        ; fill remaining bytes with spaces
    std
.fill:      
    stosb
    loop      .fill
.end:      
    pop       rcx
    pop       rdi
    pop       rdx
    pop       rbx
    pop       rax
    ret
     
; **** Clear the integer buffer
ClearBuffer:
    push      rax
    push      rcx
    push      rdi
    mov       rdi, Buffer
    mov       rcx, 20                       ; 20 bytes to clear (1 added to clear last too)
    xor       rax, rax
    cld                                     ; begin at lowest address
.repeat:      
    stosb                                   ; erase [RDI] and increment RSI pointer
    loop      .repeat
    pop       rdi
    pop       rcx
    pop       rax
    ret
; *** Print routines
Print:
.integer:
    push      rdx
    push      rsi
    mov       rsi,Buffer
    mov       rdx, 20                       ; 20 bytes to display
    call      Print.string
    call      Print.linefeed
    pop       rsi
    pop       rdx
    ret
.linefeed:
    mov       rsi, lf
    mov       rdx, lf.length
.string:  
    push      rax
    push      rdi
    push      rcx                           ; even not used, RCX is changed after syscall
    syscall   write, stdout
    pop       rcx
    pop       rdi
    pop       rax
    ret