Ultra Fast multiplication routine

Here you can ask questions or provide insights about how to use efficiently 6502 assembly code on the Oric.
Post Reply
User avatar
Chema
Game master
Posts: 1919
Joined: Tue Jan 17, 2006 10:55 am
Location: Gijón, SPAIN
Contact:

Ultra Fast multiplication routine

Post by Chema » Mon Apr 14, 2008 12:47 pm

Greetings.

Thought this could be interesting to some of you...

I have adapted Stephen L. Judd's method for fast multiplication (published on C= Hacking) to xa and run it on an Oric.

Here is the result. It uses a 1536 bytes table and works for 8-bit unsigned numbers, but it is VERY quick. It also requires 8 zero-page variables as pointers to those tables and performs multiplication simply as the following macro (paste it on a single line in your sources):

Code: Select all

;Multiply A*Y, store in par (low byte), A (high byte)
#define MULTAY(par)  sta MultLo1 : sta MultHi1 : eor #$ff : clc : adc #1 : sta MultLo2 : sta MultHi2 : sec : lda (MultLo1),y : sbc (MultLo2),y : sta par : lda (MultHi1),y : sbc (MultHi2),y 
So MULTAY (tmp) will multiply contents in regs A and Y and store result in tmp and reg A.

One very nice property of this method is that it can be used in a highly optimized way to multiply a given number by many different factors. Imagine you want to multyply a given value V by I, J and K. You will have this second macro:

Code: Select all

;Quick version that assumes pointers are already set-up
;which means A did not change, even if Y may...

#define QMULTAY(par) sec : lda (MultLo1),y : sbc (MultLo2),y : sta par : lda (MultHi1),y : sbc (MultHi2),y 
and do something like:

Code: Select all

      lda #V
      ldy #I
      MULTAY(tmp) ; Multiply V*I
      ; Get result, do whatever... even modifying reg A
      ldy #J
      QMULTAY(tmp) ; Second multiplication V*J
      ; Again do whatever...
      ldy #K
      QMULTAY(tmp) ; Third multiplication V*K
      ; ....
It flies!

To use it in your own programs, have this zero page vars defined:

Code: Select all

; For the fast multiplication routine
MultLo1 .byt 00    			;8 bytes zp index addresses
		.byt 00
MultHi1 .byt 00
		.byt 00
MultLo2 .byt 00
		.byt 00
MultHi2 .byt 00
		.byt 00
Have this initialization code somewhere...

Code: Select all

	ldy #>tab_multLO
	sty MultLo2+1
	iny
	sty MultLo1+1
	ldy #>tab_multHI
	sty MultHi2+1
	iny
	sty MultHi1+1
And the (quite big table) at the end of this post... Beware it fails if any factor is 0, so you need to check for that first as a special case.

There is an even faster version in http://www.6502.org/source/integers/fastmult.htm but the table size goes up to 2K and it is initialized by program (just convert it to sources as .byt statements and it is ready to be used). I am not sure if these changes are worth it, though...

Now the tables... just so you can copy&paste them...

Code: Select all

; This has to be page-aligned

.dsb 256-(*&255)
tab_multLO
	.byt $00,$80,$01,$82,$04,$86,$09,$8c,$10,$94,$19,$9e,$24,$aa,$31,$b8
	.byt $40,$c8,$51,$da,$64,$ee,$79,$04,$90,$1c,$a9,$36,$c4,$52,$e1,$70
	.byt $00,$90,$21,$b2,$44,$d6,$69,$fc,$90,$24,$b9,$4e,$e4,$7a,$11,$a8
	.byt $40,$d8,$71,$0a,$a4,$3e,$d9,$74,$10,$ac,$49,$e6,$84,$22,$c1,$60
	.byt $00,$a0,$41,$e2,$84,$26,$c9,$6c,$10,$b4,$59,$fe,$a4,$4a,$f1,$98
	.byt $40,$e8,$91,$3a,$e4,$8e,$39,$e4,$90,$3c,$e9,$96,$44,$f2,$a1,$50
	.byt $00,$b0,$61,$12,$c4,$76,$29,$dc,$90,$44,$f9,$ae,$64,$1a,$d1,$88
	.byt $40,$f8,$b1,$6a,$24,$de,$99,$54,$10,$cc,$89,$46,$04,$c2,$81,$40
	.byt $00,$c0,$81,$42,$04,$c6,$89,$4c,$10,$d4,$99,$5e,$24,$ea,$b1,$78
	.byt $40,$08,$d1,$9a,$64,$2e,$f9,$c4,$90,$5c,$29,$f6,$c4,$92,$61,$30
	.byt $00,$d0,$a1,$72,$44,$16,$e9,$bc,$90,$64,$39,$0e,$e4,$ba,$91,$68
	.byt $40,$18,$f1,$ca,$a4,$7e,$59,$34,$10,$ec,$c9,$a6,$84,$62,$41,$20
	.byt $00,$e0,$c1,$a2,$84,$66,$49,$2c,$10,$f4,$d9,$be,$a4,$8a,$71,$58
	.byt $40,$28,$11,$fa,$e4,$ce,$b9,$a4,$90,$7c,$69,$56,$44,$32,$21,$10
	.byt $00,$f0,$e1,$d2,$c4,$b6,$a9,$9c,$90,$84,$79,$6e,$64,$5a,$51,$48
	.byt $40,$38,$31,$2a,$24,$1e,$19,$14,$10,$0c,$09,$06,$04,$02,$01,$00
tab_multLO2
	.byt $00,$00,$01,$02,$04,$06,$09,$0c,$10,$14,$19,$1e,$24,$2a,$31,$38
	.byt $40,$48,$51,$5a,$64,$6e,$79,$84,$90,$9c,$a9,$b6,$c4,$d2,$e1,$f0
	.byt $00,$10,$21,$32,$44,$56,$69,$7c,$90,$a4,$b9,$ce,$e4,$fa,$11,$28
	.byt $40,$58,$71,$8a,$a4,$be,$d9,$f4,$10,$2c,$49,$66,$84,$a2,$c1,$e0
	.byt $00,$20,$41,$62,$84,$a6,$c9,$ec,$10,$34,$59,$7e,$a4,$ca,$f1,$18
	.byt $40,$68,$91,$ba,$e4,$0e,$39,$64,$90,$bc,$e9,$16,$44,$72,$a1,$d0
	.byt $00,$30,$61,$92,$c4,$f6,$29,$5c,$90,$c4,$f9,$2e,$64,$9a,$d1,$08
	.byt $40,$78,$b1,$ea,$24,$5e,$99,$d4,$10,$4c,$89,$c6,$04,$42,$81,$c0
	.byt $00,$40,$81,$c2,$04,$46,$89,$cc,$10,$54,$99,$de,$24,$6a,$b1,$f8
	.byt $40,$88,$d1,$1a,$64,$ae,$f9,$44,$90,$dc,$29,$76,$c4,$12,$61,$b0
	.byt $00,$50,$a1,$f2,$44,$96,$e9,$3c,$90,$e4,$39,$8e,$e4,$3a,$91,$e8
	.byt $40,$98,$f1,$4a,$a4,$fe,$59,$b4,$10,$6c,$c9,$26,$84,$e2,$41,$a0
	.byt $00,$60,$c1,$22,$84,$e6,$49,$ac,$10,$74,$d9,$3e,$a4,$0a,$71,$d8
	.byt $40,$a8,$11,$7a,$e4,$4e,$b9,$24,$90,$fc,$69,$d6,$44,$b2,$21,$90
	.byt $00,$70,$e1,$52,$c4,$36,$a9,$1c,$90,$04,$79,$ee,$64,$da,$51,$c8
	.byt $40,$b8,$31,$aa,$24,$9e,$19,$94,$10,$8c,$09,$86,$04,$82,$01,$80
	.byt $00,$80,$01,$82,$04,$86,$09,$8c,$10,$94,$19,$9e,$24,$aa,$31,$b8
	.byt $40,$c8,$51,$da,$64,$ee,$79,$04,$90,$1c,$a9,$36,$c4,$52,$e1,$70
	.byt $00,$90,$21,$b2,$44,$d6,$69,$fc,$90,$24,$b9,$4e,$e4,$7a,$11,$a8
	.byt $40,$d8,$71,$0a,$a4,$3e,$d9,$74,$10,$ac,$49,$e6,$84,$22,$c1,$60
	.byt $00,$a0,$41,$e2,$84,$26,$c9,$6c,$10,$b4,$59,$fe,$a4,$4a,$f1,$98
	.byt $40,$e8,$91,$3a,$e4,$8e,$39,$e4,$90,$3c,$e9,$96,$44,$f2,$a1,$50
	.byt $00,$b0,$61,$12,$c4,$76,$29,$dc,$90,$44,$f9,$ae,$64,$1a,$d1,$88
	.byt $40,$f8,$b1,$6a,$24,$de,$99,$54,$10,$cc,$89,$46,$04,$c2,$81,$40
	.byt $00,$c0,$81,$42,$04,$c6,$89,$4c,$10,$d4,$99,$5e,$24,$ea,$b1,$78
	.byt $40,$08,$d1,$9a,$64,$2e,$f9,$c4,$90,$5c,$29,$f6,$c4,$92,$61,$30
	.byt $00,$d0,$a1,$72,$44,$16,$e9,$bc,$90,$64,$39,$0e,$e4,$ba,$91,$68
	.byt $40,$18,$f1,$ca,$a4,$7e,$59,$34,$10,$ec,$c9,$a6,$84,$62,$41,$20
	.byt $00,$e0,$c1,$a2,$84,$66,$49,$2c,$10,$f4,$d9,$be,$a4,$8a,$71,$58
	.byt $40,$28,$11,$fa,$e4,$ce,$b9,$a4,$90,$7c,$69,$56,$44,$32,$21,$10
	.byt $00,$f0,$e1,$d2,$c4,$b6,$a9,$9c,$90,$84,$79,$6e,$64,$5a,$51,$48
	.byt $40,$38,$31,$2a,$24,$1e,$19,$14,$10,$0c,$09,$06,$04,$02,$01,$00
tab_multHI
	.byt $40,$3f,$3f,$3e,$3e,$3d,$3d,$3c,$3c,$3b,$3b,$3a,$3a,$39,$39,$38
	.byt $38,$37,$37,$36,$36,$35,$35,$35,$34,$34,$33,$33,$32,$32,$31,$31
	.byt $31,$30,$30,$2f,$2f,$2e,$2e,$2d,$2d,$2d,$2c,$2c,$2b,$2b,$2b,$2a
	.byt $2a,$29,$29,$29,$28,$28,$27,$27,$27,$26,$26,$25,$25,$25,$24,$24
	.byt $24,$23,$23,$22,$22,$22,$21,$21,$21,$20,$20,$1f,$1f,$1f,$1e,$1e
	.byt $1e,$1d,$1d,$1d,$1c,$1c,$1c,$1b,$1b,$1b,$1a,$1a,$1a,$19,$19,$19
	.byt $19,$18,$18,$18,$17,$17,$17,$16,$16,$16,$15,$15,$15,$15,$14,$14
	.byt $14,$13,$13,$13,$13,$12,$12,$12,$12,$11,$11,$11,$11,$10,$10,$10
	.byt $10,$0f,$0f,$0f,$0f,$0e,$0e,$0e,$0e,$0d,$0d,$0d,$0d,$0c,$0c,$0c
	.byt $0c,$0c,$0b,$0b,$0b,$0b,$0a,$0a,$0a,$0a,$0a,$09,$09,$09,$09,$09
	.byt $09,$08,$08,$08,$08,$08,$07,$07,$07,$07,$07,$07,$06,$06,$06,$06
	.byt $06,$06,$05,$05,$05,$05,$05,$05,$05,$04,$04,$04,$04,$04,$04,$04
	.byt $04,$03,$03,$03,$03,$03,$03,$03,$03,$02,$02,$02,$02,$02,$02,$02
	.byt $02,$02,$02,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01
	.byt $01,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	.byt $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
tab_multHI2	
	.byt $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	.byt $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	.byt $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$02,$02
	.byt $02,$02,$02,$02,$02,$02,$02,$02,$03,$03,$03,$03,$03,$03,$03,$03
	.byt $04,$04,$04,$04,$04,$04,$04,$04,$05,$05,$05,$05,$05,$05,$05,$06
	.byt $06,$06,$06,$06,$06,$07,$07,$07,$07,$07,$07,$08,$08,$08,$08,$08
	.byt $09,$09,$09,$09,$09,$09,$0a,$0a,$0a,$0a,$0a,$0b,$0b,$0b,$0b,$0c
	.byt $0c,$0c,$0c,$0c,$0d,$0d,$0d,$0d,$0e,$0e,$0e,$0e,$0f,$0f,$0f,$0f
	.byt $10,$10,$10,$10,$11,$11,$11,$11,$12,$12,$12,$12,$13,$13,$13,$13
	.byt $14,$14,$14,$15,$15,$15,$15,$16,$16,$16,$17,$17,$17,$18,$18,$18
	.byt $19,$19,$19,$19,$1a,$1a,$1a,$1b,$1b,$1b,$1c,$1c,$1c,$1d,$1d,$1d
	.byt $1e,$1e,$1e,$1f,$1f,$1f,$20,$20,$21,$21,$21,$22,$22,$22,$23,$23
	.byt $24,$24,$24,$25,$25,$25,$26,$26,$27,$27,$27,$28,$28,$29,$29,$29
	.byt $2a,$2a,$2b,$2b,$2b,$2c,$2c,$2d,$2d,$2d,$2e,$2e,$2f,$2f,$30,$30
	.byt $31,$31,$31,$32,$32,$33,$33,$34,$34,$35,$35,$35,$36,$36,$37,$37
	.byt $38,$38,$39,$39,$3a,$3a,$3b,$3b,$3c,$3c,$3d,$3d,$3e,$3e,$3f,$3f
	.byt $40,$40,$41,$41,$42,$42,$43,$43,$44,$44,$45,$45,$46,$46,$47,$47
	.byt $48,$48,$49,$49,$4a,$4a,$4b,$4c,$4c,$4d,$4d,$4e,$4e,$4f,$4f,$50
	.byt $51,$51,$52,$52,$53,$53,$54,$54,$55,$56,$56,$57,$57,$58,$59,$59
	.byt $5a,$5a,$5b,$5c,$5c,$5d,$5d,$5e,$5f,$5f,$60,$60,$61,$62,$62,$63
	.byt $64,$64,$65,$65,$66,$67,$67,$68,$69,$69,$6a,$6a,$6b,$6c,$6c,$6d
	.byt $6e,$6e,$6f,$70,$70,$71,$72,$72,$73,$74,$74,$75,$76,$76,$77,$78
	.byt $79,$79,$7a,$7b,$7b,$7c,$7d,$7d,$7e,$7f,$7f,$80,$81,$82,$82,$83
	.byt $84,$84,$85,$86,$87,$87,$88,$89,$8a,$8a,$8b,$8c,$8d,$8d,$8e,$8f
	.byt $90,$90,$91,$92,$93,$93,$94,$95,$96,$96,$97,$98,$99,$99,$9a,$9b
	.byt $9c,$9d,$9d,$9e,$9f,$a0,$a0,$a1,$a2,$a3,$a4,$a4,$a5,$a6,$a7,$a8
	.byt $a9,$a9,$aa,$ab,$ac,$ad,$ad,$ae,$af,$b0,$b1,$b2,$b2,$b3,$b4,$b5
	.byt $b6,$b7,$b7,$b8,$b9,$ba,$bb,$bc,$bd,$bd,$be,$bf,$c0,$c1,$c2,$c3
	.byt $c4,$c4,$c5,$c6,$c7,$c8,$c9,$ca,$cb,$cb,$cc,$cd,$ce,$cf,$d0,$d1
	.byt $d2,$d3,$d4,$d4,$d5,$d6,$d7,$d8,$d9,$da,$db,$dc,$dd,$de,$df,$e0
	.byt $e1,$e1,$e2,$e3,$e4,$e5,$e6,$e7,$e8,$e9,$ea,$eb,$ec,$ed,$ee,$ef
	.byt $f0,$f1,$f2,$f3,$f4,$f5,$f6,$f7,$f8,$f9,$fa,$fb,$fc,$fd,$fe,$ff

User avatar
Dbug
Site Admin
Posts: 2300
Joined: Fri Jan 06, 2006 10:00 pm
Location: Oslo, Norway
Contact:

Post by Dbug » Mon Apr 14, 2008 10:46 pm

It's a domain I never explored :)

Great to see that we can do fast multiplications !

JamesD
Flight Lieutenant
Posts: 352
Joined: Tue Nov 07, 2006 7:38 am

Post by JamesD » Tue Apr 15, 2008 6:32 am

Pretty slick routine... and to think on the 6809 I just used the MUL instruction to do this. 1 byte... 11 clock cycles. :P

Post Reply

Who is online

Users browsing this forum: No registered users and 1 guest