Make 32-64 bit multiplication routine based on the 16-32 bit one

This commit is contained in:
Sijmen 2020-12-09 13:26:53 +01:00
parent e001280dc2
commit 438b622c58
Signed by: vijfhoek
GPG key ID: DAF7821E067D9C48
3 changed files with 99 additions and 9 deletions

View file

@ -23,6 +23,26 @@
sta dst + 1
}
.macro u16_u16_move(dst, src) {
.for (var i = 0; i < 2; i++) {
lda src + i
sta dst + i
}
}
.macro u32_u32_move(dst, src) {
.for (var i = 0; i < 4; i++) {
lda src + i
sta dst + i
}
}
.macro u32_u32_move_imm(dst, imm) {
.for (var i = 0; i < 4; i++) {
lda #((imm >> (i * 8)) & $ff)
sta dst + i
}
}
//
// main
//

View file

@ -203,6 +203,63 @@ _dd: lda #0
rts
.const aa = $38
.const AA = $39
.const bb = $3a
.const BB = $3b
.const dd = $3c
.const DD = $3d
.const hh = $3e
.const HH = $3f
multiply_32bit_unsigned:
// BBbb AAaa
// DDdd CCcc
// FFff EEee
// + HHhh GGgg
// --------------------------
// 3e 3c 3a 38
// HHhh DDdd BBbb AAaa
// FFff CCcc
// GGgg EEee
u32_u32_move_imm(aa, 0)
u32_u32_move_imm(dd, 0)
// Perform <T1 * <T2 == BBbbAAaa
u16_u16_move(T1, $30)
u16_u16_move(T2, $34)
sec
jsr multiply_16bit_unsigned
u16_u16_move(aa, PRODUCT + 0) // AAaa
u16_u16_move(bb, PRODUCT + 2) // BBbb
// Perform >T1 * <T2 = DDddCCcc
u16_u16_move(T1, $32)
u16_u16_move(T2, $34)
sec
jsr multiply_16bit_unsigned
i32_i16_add(bb, PRODUCT) // CCcc
i16_i16_add(dd, PRODUCT + 2) // DDdd
// Perform <T1 * >T2 = FFffEEee
u16_u16_move(T1, $30)
u16_u16_move(T2, $36)
sec
jsr multiply_16bit_unsigned
i32_i16_add(bb, PRODUCT) // EEee
i32_i16_add(dd, PRODUCT + 2) // FFff
// Perform >T1 * >T2 = HHhhGGgg
u16_u16_move(T1, $32)
u16_u16_move(T2, $36)
sec
jsr multiply_16bit_unsigned
i32_i16_add(dd, PRODUCT) // GGgg
i16_i16_add(hh, PRODUCT + 2) // HHhh
rts
// Description: Signed 16-bit multiplication with signed 32-bit result.
//
// Input: 16-bit signed value in T1

View file

@ -96,7 +96,7 @@
sta $fe
i16_mul4(dst, dst + 1)
i16_i16_add(dst, dst + 1, $fd, $fe)
i16_i16_add(dst, $fd)
}
.macro u32_mul2(lsb) {
@ -124,6 +124,19 @@
i32_i32_add(lsb, $2c)
}
.macro i32_i16_add(dst, src) {
clc
lda dst + 0
adc src + 0
sta dst + 0
lda dst + 1
adc src + 1
sta dst + 1
bcc !+
inc dst + 2
!:
}
.macro i32_i32_add(dst, src) {
clc
lda dst + 0
@ -150,15 +163,15 @@
!cc:
}
// Destroys: a, dst_lo, dst_hi
.macro i16_i16_add(dst_lo, dst_hi, src_lo, src_hi) {
// Destroys: a
.macro i16_i16_add(dst, src) {
clc
lda dst_lo
adc src_lo
sta dst_lo
lda dst_hi
adc src_hi
sta dst_hi
lda dst
adc src
sta dst
lda dst + 1
adc src + 1
sta dst + 1
}
// Destroys: a, dst_lo, dst_hi