Ultra Fast multiplication routine

Here you can ask questions or provide insights about how to use efficiently 6502 assembly code on the Oric.
User avatar
Chema
Game master
Posts: 3013
Joined: Tue Jan 17, 2006 10:55 am
Location: Gijón, SPAIN
Contact:

Ultra Fast multiplication routine

Post by Chema »

Greetings.

Thought this could be interesting to some of you...

I have adapted Stephen L. Judd's method for fast multiplication (published on C= Hacking) to xa and run it on an Oric.

Here is the result. It uses a 1536 bytes table and works for 8-bit unsigned numbers, but it is VERY quick. It also requires 8 zero-page variables as pointers to those tables and performs multiplication simply as the following macro (paste it on a single line in your sources):

Code: Select all

;Multiply A*Y, store in par (low byte), A (high byte)
#define MULTAY(par)  sta MultLo1 : sta MultHi1 : eor #$ff : clc : adc #1 : sta MultLo2 : sta MultHi2 : sec : lda (MultLo1),y : sbc (MultLo2),y : sta par : lda (MultHi1),y : sbc (MultHi2),y 
So MULTAY (tmp) will multiply contents in regs A and Y and store result in tmp and reg A.

One very nice property of this method is that it can be used in a highly optimized way to multiply a given number by many different factors. Imagine you want to multyply a given value V by I, J and K. You will have this second macro:

Code: Select all

;Quick version that assumes pointers are already set-up
;which means A did not change, even if Y may...

#define QMULTAY(par) sec : lda (MultLo1),y : sbc (MultLo2),y : sta par : lda (MultHi1),y : sbc (MultHi2),y 
and do something like:

Code: Select all

      lda #V
      ldy #I
      MULTAY(tmp) ; Multiply V*I
      ; Get result, do whatever... even modifying reg A
      ldy #J
      QMULTAY(tmp) ; Second multiplication V*J
      ; Again do whatever...
      ldy #K
      QMULTAY(tmp) ; Third multiplication V*K
      ; ....
It flies!

To use it in your own programs, have this zero page vars defined:

Code: Select all

; For the fast multiplication routine
MultLo1 .byt 00    			;8 bytes zp index addresses
		.byt 00
MultHi1 .byt 00
		.byt 00
MultLo2 .byt 00
		.byt 00
MultHi2 .byt 00
		.byt 00
Have this initialization code somewhere...

Code: Select all

	ldy #>tab_multLO
	sty MultLo2+1
	iny
	sty MultLo1+1
	ldy #>tab_multHI
	sty MultHi2+1
	iny
	sty MultHi1+1
And the (quite big table) at the end of this post... Beware it fails if any factor is 0, so you need to check for that first as a special case.

There is an even faster version in http://www.6502.org/source/integers/fastmult.htm but the table size goes up to 2K and it is initialized by program (just convert it to sources as .byt statements and it is ready to be used). I am not sure if these changes are worth it, though...

Now the tables... just so you can copy&paste them...

Code: Select all

; This has to be page-aligned

.dsb 256-(*&255)
tab_multLO
	.byt $00,$80,$01,$82,$04,$86,$09,$8c,$10,$94,$19,$9e,$24,$aa,$31,$b8
	.byt $40,$c8,$51,$da,$64,$ee,$79,$04,$90,$1c,$a9,$36,$c4,$52,$e1,$70
	.byt $00,$90,$21,$b2,$44,$d6,$69,$fc,$90,$24,$b9,$4e,$e4,$7a,$11,$a8
	.byt $40,$d8,$71,$0a,$a4,$3e,$d9,$74,$10,$ac,$49,$e6,$84,$22,$c1,$60
	.byt $00,$a0,$41,$e2,$84,$26,$c9,$6c,$10,$b4,$59,$fe,$a4,$4a,$f1,$98
	.byt $40,$e8,$91,$3a,$e4,$8e,$39,$e4,$90,$3c,$e9,$96,$44,$f2,$a1,$50
	.byt $00,$b0,$61,$12,$c4,$76,$29,$dc,$90,$44,$f9,$ae,$64,$1a,$d1,$88
	.byt $40,$f8,$b1,$6a,$24,$de,$99,$54,$10,$cc,$89,$46,$04,$c2,$81,$40
	.byt $00,$c0,$81,$42,$04,$c6,$89,$4c,$10,$d4,$99,$5e,$24,$ea,$b1,$78
	.byt $40,$08,$d1,$9a,$64,$2e,$f9,$c4,$90,$5c,$29,$f6,$c4,$92,$61,$30
	.byt $00,$d0,$a1,$72,$44,$16,$e9,$bc,$90,$64,$39,$0e,$e4,$ba,$91,$68
	.byt $40,$18,$f1,$ca,$a4,$7e,$59,$34,$10,$ec,$c9,$a6,$84,$62,$41,$20
	.byt $00,$e0,$c1,$a2,$84,$66,$49,$2c,$10,$f4,$d9,$be,$a4,$8a,$71,$58
	.byt $40,$28,$11,$fa,$e4,$ce,$b9,$a4,$90,$7c,$69,$56,$44,$32,$21,$10
	.byt $00,$f0,$e1,$d2,$c4,$b6,$a9,$9c,$90,$84,$79,$6e,$64,$5a,$51,$48
	.byt $40,$38,$31,$2a,$24,$1e,$19,$14,$10,$0c,$09,$06,$04,$02,$01,$00
tab_multLO2
	.byt $00,$00,$01,$02,$04,$06,$09,$0c,$10,$14,$19,$1e,$24,$2a,$31,$38
	.byt $40,$48,$51,$5a,$64,$6e,$79,$84,$90,$9c,$a9,$b6,$c4,$d2,$e1,$f0
	.byt $00,$10,$21,$32,$44,$56,$69,$7c,$90,$a4,$b9,$ce,$e4,$fa,$11,$28
	.byt $40,$58,$71,$8a,$a4,$be,$d9,$f4,$10,$2c,$49,$66,$84,$a2,$c1,$e0
	.byt $00,$20,$41,$62,$84,$a6,$c9,$ec,$10,$34,$59,$7e,$a4,$ca,$f1,$18
	.byt $40,$68,$91,$ba,$e4,$0e,$39,$64,$90,$bc,$e9,$16,$44,$72,$a1,$d0
	.byt $00,$30,$61,$92,$c4,$f6,$29,$5c,$90,$c4,$f9,$2e,$64,$9a,$d1,$08
	.byt $40,$78,$b1,$ea,$24,$5e,$99,$d4,$10,$4c,$89,$c6,$04,$42,$81,$c0
	.byt $00,$40,$81,$c2,$04,$46,$89,$cc,$10,$54,$99,$de,$24,$6a,$b1,$f8
	.byt $40,$88,$d1,$1a,$64,$ae,$f9,$44,$90,$dc,$29,$76,$c4,$12,$61,$b0
	.byt $00,$50,$a1,$f2,$44,$96,$e9,$3c,$90,$e4,$39,$8e,$e4,$3a,$91,$e8
	.byt $40,$98,$f1,$4a,$a4,$fe,$59,$b4,$10,$6c,$c9,$26,$84,$e2,$41,$a0
	.byt $00,$60,$c1,$22,$84,$e6,$49,$ac,$10,$74,$d9,$3e,$a4,$0a,$71,$d8
	.byt $40,$a8,$11,$7a,$e4,$4e,$b9,$24,$90,$fc,$69,$d6,$44,$b2,$21,$90
	.byt $00,$70,$e1,$52,$c4,$36,$a9,$1c,$90,$04,$79,$ee,$64,$da,$51,$c8
	.byt $40,$b8,$31,$aa,$24,$9e,$19,$94,$10,$8c,$09,$86,$04,$82,$01,$80
	.byt $00,$80,$01,$82,$04,$86,$09,$8c,$10,$94,$19,$9e,$24,$aa,$31,$b8
	.byt $40,$c8,$51,$da,$64,$ee,$79,$04,$90,$1c,$a9,$36,$c4,$52,$e1,$70
	.byt $00,$90,$21,$b2,$44,$d6,$69,$fc,$90,$24,$b9,$4e,$e4,$7a,$11,$a8
	.byt $40,$d8,$71,$0a,$a4,$3e,$d9,$74,$10,$ac,$49,$e6,$84,$22,$c1,$60
	.byt $00,$a0,$41,$e2,$84,$26,$c9,$6c,$10,$b4,$59,$fe,$a4,$4a,$f1,$98
	.byt $40,$e8,$91,$3a,$e4,$8e,$39,$e4,$90,$3c,$e9,$96,$44,$f2,$a1,$50
	.byt $00,$b0,$61,$12,$c4,$76,$29,$dc,$90,$44,$f9,$ae,$64,$1a,$d1,$88
	.byt $40,$f8,$b1,$6a,$24,$de,$99,$54,$10,$cc,$89,$46,$04,$c2,$81,$40
	.byt $00,$c0,$81,$42,$04,$c6,$89,$4c,$10,$d4,$99,$5e,$24,$ea,$b1,$78
	.byt $40,$08,$d1,$9a,$64,$2e,$f9,$c4,$90,$5c,$29,$f6,$c4,$92,$61,$30
	.byt $00,$d0,$a1,$72,$44,$16,$e9,$bc,$90,$64,$39,$0e,$e4,$ba,$91,$68
	.byt $40,$18,$f1,$ca,$a4,$7e,$59,$34,$10,$ec,$c9,$a6,$84,$62,$41,$20
	.byt $00,$e0,$c1,$a2,$84,$66,$49,$2c,$10,$f4,$d9,$be,$a4,$8a,$71,$58
	.byt $40,$28,$11,$fa,$e4,$ce,$b9,$a4,$90,$7c,$69,$56,$44,$32,$21,$10
	.byt $00,$f0,$e1,$d2,$c4,$b6,$a9,$9c,$90,$84,$79,$6e,$64,$5a,$51,$48
	.byt $40,$38,$31,$2a,$24,$1e,$19,$14,$10,$0c,$09,$06,$04,$02,$01,$00
tab_multHI
	.byt $40,$3f,$3f,$3e,$3e,$3d,$3d,$3c,$3c,$3b,$3b,$3a,$3a,$39,$39,$38
	.byt $38,$37,$37,$36,$36,$35,$35,$35,$34,$34,$33,$33,$32,$32,$31,$31
	.byt $31,$30,$30,$2f,$2f,$2e,$2e,$2d,$2d,$2d,$2c,$2c,$2b,$2b,$2b,$2a
	.byt $2a,$29,$29,$29,$28,$28,$27,$27,$27,$26,$26,$25,$25,$25,$24,$24
	.byt $24,$23,$23,$22,$22,$22,$21,$21,$21,$20,$20,$1f,$1f,$1f,$1e,$1e
	.byt $1e,$1d,$1d,$1d,$1c,$1c,$1c,$1b,$1b,$1b,$1a,$1a,$1a,$19,$19,$19
	.byt $19,$18,$18,$18,$17,$17,$17,$16,$16,$16,$15,$15,$15,$15,$14,$14
	.byt $14,$13,$13,$13,$13,$12,$12,$12,$12,$11,$11,$11,$11,$10,$10,$10
	.byt $10,$0f,$0f,$0f,$0f,$0e,$0e,$0e,$0e,$0d,$0d,$0d,$0d,$0c,$0c,$0c
	.byt $0c,$0c,$0b,$0b,$0b,$0b,$0a,$0a,$0a,$0a,$0a,$09,$09,$09,$09,$09
	.byt $09,$08,$08,$08,$08,$08,$07,$07,$07,$07,$07,$07,$06,$06,$06,$06
	.byt $06,$06,$05,$05,$05,$05,$05,$05,$05,$04,$04,$04,$04,$04,$04,$04
	.byt $04,$03,$03,$03,$03,$03,$03,$03,$03,$02,$02,$02,$02,$02,$02,$02
	.byt $02,$02,$02,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01
	.byt $01,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	.byt $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
tab_multHI2	
	.byt $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	.byt $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
	.byt $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$02,$02
	.byt $02,$02,$02,$02,$02,$02,$02,$02,$03,$03,$03,$03,$03,$03,$03,$03
	.byt $04,$04,$04,$04,$04,$04,$04,$04,$05,$05,$05,$05,$05,$05,$05,$06
	.byt $06,$06,$06,$06,$06,$07,$07,$07,$07,$07,$07,$08,$08,$08,$08,$08
	.byt $09,$09,$09,$09,$09,$09,$0a,$0a,$0a,$0a,$0a,$0b,$0b,$0b,$0b,$0c
	.byt $0c,$0c,$0c,$0c,$0d,$0d,$0d,$0d,$0e,$0e,$0e,$0e,$0f,$0f,$0f,$0f
	.byt $10,$10,$10,$10,$11,$11,$11,$11,$12,$12,$12,$12,$13,$13,$13,$13
	.byt $14,$14,$14,$15,$15,$15,$15,$16,$16,$16,$17,$17,$17,$18,$18,$18
	.byt $19,$19,$19,$19,$1a,$1a,$1a,$1b,$1b,$1b,$1c,$1c,$1c,$1d,$1d,$1d
	.byt $1e,$1e,$1e,$1f,$1f,$1f,$20,$20,$21,$21,$21,$22,$22,$22,$23,$23
	.byt $24,$24,$24,$25,$25,$25,$26,$26,$27,$27,$27,$28,$28,$29,$29,$29
	.byt $2a,$2a,$2b,$2b,$2b,$2c,$2c,$2d,$2d,$2d,$2e,$2e,$2f,$2f,$30,$30
	.byt $31,$31,$31,$32,$32,$33,$33,$34,$34,$35,$35,$35,$36,$36,$37,$37
	.byt $38,$38,$39,$39,$3a,$3a,$3b,$3b,$3c,$3c,$3d,$3d,$3e,$3e,$3f,$3f
	.byt $40,$40,$41,$41,$42,$42,$43,$43,$44,$44,$45,$45,$46,$46,$47,$47
	.byt $48,$48,$49,$49,$4a,$4a,$4b,$4c,$4c,$4d,$4d,$4e,$4e,$4f,$4f,$50
	.byt $51,$51,$52,$52,$53,$53,$54,$54,$55,$56,$56,$57,$57,$58,$59,$59
	.byt $5a,$5a,$5b,$5c,$5c,$5d,$5d,$5e,$5f,$5f,$60,$60,$61,$62,$62,$63
	.byt $64,$64,$65,$65,$66,$67,$67,$68,$69,$69,$6a,$6a,$6b,$6c,$6c,$6d
	.byt $6e,$6e,$6f,$70,$70,$71,$72,$72,$73,$74,$74,$75,$76,$76,$77,$78
	.byt $79,$79,$7a,$7b,$7b,$7c,$7d,$7d,$7e,$7f,$7f,$80,$81,$82,$82,$83
	.byt $84,$84,$85,$86,$87,$87,$88,$89,$8a,$8a,$8b,$8c,$8d,$8d,$8e,$8f
	.byt $90,$90,$91,$92,$93,$93,$94,$95,$96,$96,$97,$98,$99,$99,$9a,$9b
	.byt $9c,$9d,$9d,$9e,$9f,$a0,$a0,$a1,$a2,$a3,$a4,$a4,$a5,$a6,$a7,$a8
	.byt $a9,$a9,$aa,$ab,$ac,$ad,$ad,$ae,$af,$b0,$b1,$b2,$b2,$b3,$b4,$b5
	.byt $b6,$b7,$b7,$b8,$b9,$ba,$bb,$bc,$bd,$bd,$be,$bf,$c0,$c1,$c2,$c3
	.byt $c4,$c4,$c5,$c6,$c7,$c8,$c9,$ca,$cb,$cb,$cc,$cd,$ce,$cf,$d0,$d1
	.byt $d2,$d3,$d4,$d4,$d5,$d6,$d7,$d8,$d9,$da,$db,$dc,$dd,$de,$df,$e0
	.byt $e1,$e1,$e2,$e3,$e4,$e5,$e6,$e7,$e8,$e9,$ea,$eb,$ec,$ed,$ee,$ef
	.byt $f0,$f1,$f2,$f3,$f4,$f5,$f6,$f7,$f8,$f9,$fa,$fb,$fc,$fd,$fe,$ff
User avatar
Dbug
Site Admin
Posts: 4437
Joined: Fri Jan 06, 2006 10:00 pm
Location: Oslo, Norway
Contact:

Post by Dbug »

It's a domain I never explored :)

Great to see that we can do fast multiplications !
JamesD
Flight Lieutenant
Posts: 358
Joined: Tue Nov 07, 2006 7:38 am

Post by JamesD »

Pretty slick routine... and to think on the 6809 I just used the MUL instruction to do this. 1 byte... 11 clock cycles. :P
Post Reply