cubbi.com: fibonacci numbers in assembly languages
Assembly languages
Date: 1952
Type: Imperative language of machine commands
Usage: basic low level language on any system
I've always been a fan of assembly languages, they take the idea of attention to details to the extreme, and they are still the only way to create programs that run the fastest on a given piece of hardware. Of course, nobody needs to write entire programs in pure assembly, these examples are just for fun.
ALGORITHM 1A: NAIVE BINARY RECURSION
# This program calculates the nth fibonacci number
# using algorithm 1A: naive binary recursion
#
# AT&T Assembly Language for x86_64, Linux syscalls, ELF output
#
# compiled: as -o f1a.o f1a.s && ld -o f1a f1a.o
# executed: ./f1a n
#
.globl _start
_start:
    popq    %rcx        # this is argc, must be 2 for one argument
    cmpq    $2,%rcx
    jne     usage_exit
    addq    $8,%rsp     # skip argv[0]
    popq    %rsi        # get argv[1]
    call    rsi_to_bin  # now rax contains n
    pushq   %rax
    call    print_rax
    call    print_text
    popq    %rax
    call    fib
    call    print_rax
    call    print_newline
    xorq    %rbx,%rbx # return code will be 0
    movq    $1,%rax   # system call 1
    int     $0x80

# This procedure calculates the fibonacci number
# input n is %rax, output F(n) is in %rax
# modifies %rbx
fib:
    movq    $-2, %rdx # for the comparison in fibrec
    cmpq    $0,%rax  # first, take care of negative input
    jge     fibrec   # regular fib for n>=0
    negq    %rax     # n = -n
    testq   $1,%rax  # check n%2
    jnz     fibrec   # regular fib for odd -n
    call    fibrec
    negq    %rax     # return negative result for even -n
    ret

    .balign  4       # this alignment makes it faster. This program would sure benefit from tuning
fibrec:
    cmpq    $2,%rax   # if rax is less than 2, return it unchanged
    jl      quitrec

    decq    %rax      # ax=n-1
    pushq   %rax      # save n-1
    call    fibrec    # calculate f(n-1)
    movq    %rax,%rdi # save f(n-1)
    popq    %rax      # restore n-1
    decq    %rax      # ax=n-2
    pushq   %rdi      # save f(n-1)
    call    fibrec    # calculate f(n-2)
    popq    %rbx      # retrieve f(n-1)
    addq    %rbx,%rax # calculate the sum and place it in rax
quitrec:
    ret

# print usage and exit
usage_exit:
    movq    $errtxt_l,%rdx
    movq    $errtxt,%rcx
    call    puts
    movq    $1,%rbx # return code will be 1
    movq    $1,%rax   # system call 1
    int     $0x80
    
# convert asciiz string at (%rsi) to a number in %rax
rsi_to_bin:
     xorq    %rax,%rax # answer accumulator
     movb    $10,%cl   # base for multiplication
     cmpb    $45,(%rsi) # is it a '-' ?
     jnz     next_digit
     incq    %rsi
     movb    $1,minus_flag(%rip) # remember that it was a negative
next_digit:
     cmpb    $0,(%rsi)
     jz      end_rsi_to_bin
     movb    (%rsi),%bl     # get next digit
     subb    $48,%bl        # subtract '0'
     js      end_rsi_to_bin # abort conversion if less than zero
     cmpb    %cl,%bl
     jge     end_rsi_to_bin # abort conversion if greater than 9
     imulb   %cl            # ax = ax*10
     addq    %rbx,%rax      # ax = ax+digit
     incq    %rsi
     jmp     next_digit
end_rsi_to_bin:
     cmpb    $1, minus_flag(%rip) # check if the number was negative
     jnz     not_minus
     negq    %rax
not_minus:
     ret

# prints the number from rax with no newline
print_rax:
    movq    $10,%rbx      # base for division
    xorq    %rcx,%rcx     # digit counter
    movq    $outbyte,%rdi # output string
    cmpq    $0,%rax       # if negative rax
    jge     divloop
    negq    %rax          # make it positive
    movb    $45,(%rdi)    # and write '-' to output
    incq    %rdi
divloop:
    xorq    %rdx,%rdx
    divq    %rbx          # divide rax by ten
    addb    $48,%dl       # add ASCII code '0' to the remainder
    pushq   %rdx          # push the digit on stack
    incq    %rcx          # increment the digit counter
    testq   %rax,%rax     # if quotent was nonzero, loop
    jnz     divloop
    # at this point, rcx contains the number of digits and
    # the stack contains the ASCII codes of the digits
store:
    popq    %rax    # read the first digit from stack
    stosb           # put it into the output string
    loop    store   # continue rcx times
    # now the string is ready at [outbyte]
    subq    $outbyte,%rdi # calculate buffer length
    movq    %rdi,%rdx
    movq    $outbyte,%rcx
    jmp     puts
print_text:
    movq    $outtxt_l,%rdx
    movq    $outtxt,%rcx
    jmp     puts
print_newline:
    movq    $1,%rdx
    movq    $newline,%rcx
# print string in %rcx with length in %rdx
puts:
    movq    $1,%rbx  # file handle of stdout
    movq    $4,%rax  # system call write()
    int     $0x80
    ret

    .data
minus_flag:
    .byte 0

    .section .rodata
errtxt:
    .ascii  "Usage: ./f1a <n>"
newline:
    .ascii  "\n"
errtxt_l = .-errtxt
outtxt:
    .ascii  "th Fibonacci number is "
outtxt_l = .-outtxt

    .bss
    .lcomm outbyte, 22 # placeholder for output, size of max 64bit integer+1

ALGORITHM 2A-3: DATA STRUCTURE - SIMPLE LIST
(Intel Assembly Language for x86, MS-DOS syscalls, COM output)
	.model tiny
	.code
	org	100h
_start:
	mov	ax,cs
	mov	ds,ax
	mov	es,ax
	cld
	push	23
	call	fib
	call	print_eax
	ret
fib:	push	bp
	mov	bp,sp
	mov	di,offset fib_array+2
	mov	cx,[bp+4]
fib_loop:
	mov	ax,[di-1]
	add	ax,[di-2]
	inc	di
	mov	[di],ax
	loop	fib_loop
	mov	bx,[bp+4]
	mov	ax,word ptr [bx+fib_array]
	mov	[bp+4],ax
	pop	bp
	ret
print_eax:
	mov	bp,sp
	mov	di,offset outbyte
	mov	ax,[bp+2]
	mov	bx,10
	xor	cx,cx
divlp:	xor	dx,dx
	div	bx
	add	dl,48
	push	dx
	inc	cx
	test	ax,ax
	jnz	divlp
store:	pop	ax
	stosb
	loop	store
	mov	byte ptr [di],'$'
	mov	dx, offset outtxt
	mov	ah,9
	int     21h
	ret 2

outtxt  db  "23rd Fibonacci number is "
outtxt_l = $-outtxt+1
outbyte	db  "00000000"
fib_array db 1,1

	end _start

ALGORITHM 2B: SIMPLE RECURSION
(Sun Assembly Language for SPARC, SunOS/Solaris syscalls, ELF output)
.section        ".text"
        .global _start
_start:
        save    %sp,-112,%sp
        mov     45,%o2
        mov     1,%o1
        call    fib_lambda,1
        mov     1,%o0
        call    print_o0
        nop
        mov     1,%g1
        ta      8
fib_lambda:
        save    %sp,-112,%sp
        st      %i2,[%fp+68]
        ld      [%fp+68],%o2
        cmp     %o2,2
        bl      lambda_ends
        nop
        st      %i0,[%fp+68]
        ld      [%fp+68],%o1
        add     %o1,%i1,%o0
        call    fib_lambda,1
        dec     %o2
        st      %o0,[%fp+68]
        ld      [%fp+68],%i0
lambda_ends:
        ret
        restore
print_o0:
        save    %sp,-112,%sp
        st      %i0,[%fp+68]
        ld      [%fp+68],%o0
        sethi   %hi(outbyte_end),%l2
        or      %l2,%lo(outbyte_end),%l2
print_o0_loop:
        clr     %l1
o0_div_10_loop:
        sub     %o0,10,%o0
        cmp     %o0,0
        bge     o0_div_10_loop
        inc     %l1
        add     %o0,10+'0',%o0
        stb     %o0,[%l2]
        dec     %l1
        mov     %l1,%o0
        cmp     %l1,0
        bne     print_o0_loop
        dec     %l2
        mov     1,%o0
        sethi   %hi(outtext),%o1
        or      %o1,%lo(outtext),%o1
        mov     outtext_l,%o2
        mov     4,%g1
        ta      8
        mov     1,%o0
        sethi   %hi(outbyte),%o1
        or      %o1,%lo(outbyte),%o1
        mov     outbyte_l,%o2
        mov     4,%g1
        ta      8
        ret
        restore %g0,0,%o0
.section ".data"
outtext:
     .ascii "45th Fibonacchi number is "
outtext_l = .-outtext
outbyte:
        .ascii  "0000000000"
        .byte  012
outbyte_end = .-1
outbyte_l = .-outbyte

ALGORITHM 2C: NON-RECURSIVE LOOP
(Motorola Assembly Language for PPC, LynxOS syscalls, XCOFF output)
	.toc
outtext_a:	.tc	outtext[TC],outtext
outbyte_a:	.tc	outbyte_l[TC],outbyte_l
	.csect __start[DS]
	.globl __start
__start:	.long	.__start, TOC[tc0], 0

	.csect .text[PR]
.__start:
	li	3,45
	li	7,1
	li	8,1
fib_loop:
	add	11,7,8
	mr	7,8
	mr	8,11
	subi	3,3,1
	cmpwi	3,0
	bgt	fib_loop

	lwz	9,outbyte_a(2)
	lwz	9,0(9)
	li	10,10
div_loop:
	divwu	3,7,10
	mr	6,3
	mullw	3,3,10
	subf	3,3,7
	mr	7,6	
	addi	3,3,'0'	
	stb	3,0(9)
	subi	9,9,1
	cmpwi	6,0
	bgt	div_loop

	li	3,1
	lwz	4,outtext_a(2)
	li	5,outtext_l
	li	0,26
	sc

	li	3,7
	li	0,9
	sc
	.csect	.data[RW]
outtext:
	.byte	"45th Fibonacci number is "
	.byte   "00000000"
outbyte:
	.byte	10
outtext_l = .-outtext
outbyte_l:
	.long	outbyte-1

ALGORITHM 3A: MATRIX EQUATION
Not yet done. (I plan to do this on SGI Assembly Language for MIPS, IRIX syscalls)

ALGORITHM 3B: FAST RECURSION
(DEC Assembly Language for PDP-11, UNIX syscalls, a.out output)
	.globl  start
	.text
start:
	mov	$0,(sp)
	mov	$27,-(sp)
	jsr	pc, lambda
print_r1:
	mov	$outbyte,r3
div_loop:
	sxt	r0
	div	$12,r0
	add	$60,r1
	movb	r1,-(r3)
	mov	r0,r1
	tst	r1
	jne	div_loop
	mov	$1,r0
	sys	4; outtext; 37
	mov	$1,r0
	sys	1
lambda:
	mov	2(sp),r1
	cmp	$2,r1
	beq	gottwo
	bgt	gotone
	sxt	r0
	div	$2,r0
	tst	r1
	beq	even
odd:
	mov	2(sp),r1
	dec	r1
	sxt	r0
	div	$2,r0
	mov	r0,-(sp)
	jsr	pc,lambda
	add	$2,sp
	mov	r0,r3
	mov	r1,r2
	mov	r3,r4
	mul	r2,r4
	mov	r5,r1
	mov	r3,r4
	add	r2,r4
	mul	r2,r4
	add	r5,r1
	mul	r3,r3
	mov	r3,r0
	mul	r2,r2
	add	r3,r0
	rts	pc
even:
	mov	2(sp),r1
	sxt	r0
	div	$2,r0
	dec	r0
	mov	r0,-(sp)
	jsr	pc,lambda
	add	$2,sp
	mov	r0,r3
	mov	r1,r2
	mov	r2,r4
	mul	r2,r4
	mov	r5,r1
	mov	r2,r4
	add	r3,r4
	mul	r4,r4
	add	r5,r1
	mov	r2,r4
	add	r3,r4
	mul	r2,r4
	mov	r5,r0
	mul	r2,r3
	add	r3,r0
	rts	pc
gotone:
	mov	$1,r0
	mov	$1,r1
	rts	pc
gottwo:
	mov	$1,r0
	mov	$2,r1
	rts	pc

	.data
outtext:
	.byte 62,63,162,144,40,106,151,142,157,156
	.byte 141,143,143,151,40,156,165,155
	.byte 142,145,162,40,151,163,40
	.byte 60,60,60,60,60
outbyte:
	.byte 12

ALGORITHM 3C: BINET'S FORMULA
Not yet done. (I don't have a different hardware platform to do this on. An account, anyone?)