From 438b622c5801ebb5ebb055503a045e505f3a5b6a Mon Sep 17 00:00:00 2001 From: Sijmen Schoon Date: Wed, 9 Dec 2020 13:26:53 +0100 Subject: [PATCH] Make 32-64 bit multiplication routine based on the 16-32 bit one --- c64/main.asm | 20 ++++++++++++++++++ c64/math.asm | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++ c64/math.inc | 31 +++++++++++++++++++--------- 3 files changed, 99 insertions(+), 9 deletions(-) diff --git a/c64/main.asm b/c64/main.asm index ab54097..507dfeb 100644 --- a/c64/main.asm +++ b/c64/main.asm @@ -23,6 +23,26 @@ sta dst + 1 } +.macro u16_u16_move(dst, src) { + .for (var i = 0; i < 2; i++) { + lda src + i + sta dst + i + } +} + +.macro u32_u32_move(dst, src) { + .for (var i = 0; i < 4; i++) { + lda src + i + sta dst + i + } +} + +.macro u32_u32_move_imm(dst, imm) { + .for (var i = 0; i < 4; i++) { + lda #((imm >> (i * 8)) & $ff) + sta dst + i + } +} // // main // diff --git a/c64/math.asm b/c64/math.asm index cae2234..2a54d66 100644 --- a/c64/math.asm +++ b/c64/math.asm @@ -203,6 +203,63 @@ _dd: lda #0 rts +.const aa = $38 +.const AA = $39 +.const bb = $3a +.const BB = $3b +.const dd = $3c +.const DD = $3d +.const hh = $3e +.const HH = $3f +multiply_32bit_unsigned: + // BBbb AAaa + // DDdd CCcc + // FFff EEee + // + HHhh GGgg + // -------------------------- + // 3e 3c 3a 38 + // HHhh DDdd BBbb AAaa + // FFff CCcc + // GGgg EEee + + u32_u32_move_imm(aa, 0) + u32_u32_move_imm(dd, 0) + + // Perform T1 * T2 = FFffEEee + u16_u16_move(T1, $30) + u16_u16_move(T2, $36) + sec + jsr multiply_16bit_unsigned + i32_i16_add(bb, PRODUCT) // EEee + i32_i16_add(dd, PRODUCT + 2) // FFff + + // Perform >T1 * >T2 = HHhhGGgg + u16_u16_move(T1, $32) + u16_u16_move(T2, $36) + sec + jsr multiply_16bit_unsigned + i32_i16_add(dd, PRODUCT) // GGgg + i16_i16_add(hh, PRODUCT + 2) // HHhh + + rts + + // Description: Signed 16-bit multiplication with signed 32-bit result. // // Input: 16-bit signed value in T1 diff --git a/c64/math.inc b/c64/math.inc index 6a2b017..a92df1a 100644 --- a/c64/math.inc +++ b/c64/math.inc @@ -96,7 +96,7 @@ sta $fe i16_mul4(dst, dst + 1) - i16_i16_add(dst, dst + 1, $fd, $fe) + i16_i16_add(dst, $fd) } .macro u32_mul2(lsb) { @@ -124,6 +124,19 @@ i32_i32_add(lsb, $2c) } +.macro i32_i16_add(dst, src) { + clc + lda dst + 0 + adc src + 0 + sta dst + 0 + lda dst + 1 + adc src + 1 + sta dst + 1 + bcc !+ + inc dst + 2 +!: +} + .macro i32_i32_add(dst, src) { clc lda dst + 0 @@ -150,15 +163,15 @@ !cc: } -// Destroys: a, dst_lo, dst_hi -.macro i16_i16_add(dst_lo, dst_hi, src_lo, src_hi) { +// Destroys: a +.macro i16_i16_add(dst, src) { clc - lda dst_lo - adc src_lo - sta dst_lo - lda dst_hi - adc src_hi - sta dst_hi + lda dst + adc src + sta dst + lda dst + 1 + adc src + 1 + sta dst + 1 } // Destroys: a, dst_lo, dst_hi