2020/c64/math.asm

433 lines
10 KiB
NASM

.const T1=$03
.const T2=$fd
.const PRODUCT=$22
// Description: Unsigned 8-bit multiplication with unsigned 16-bit result.
//
// Input: 8-bit unsigned value in T1
// 8-bit unsigned value in T2
// Carry=0: Re-use T1 from previous multiplication (faster)
// Carry=1: Set T1 (slower)
//
// Output: 16-bit unsigned value in PRODUCT
//
// Clobbered: PRODUCT, X, A, C
//
// Allocation setup: T1,T2 and PRODUCT preferably on Zero-page.
// square1_lo, square1_hi, square2_lo, square2_hi must be
// page aligned. Each table are 512 bytes. Total 2kb.
//
// Table generation: I:0..511
// square1_lo = <((I*I)/4)
// square1_hi = >((I*I)/4)
// square2_lo = <(((I-255)*(I-255))/4)
// square2_hi = >(((I-255)*(I-255))/4)
multiply_8bit_unsigned:
bcc !+
lda T1
sta sm1+1
sta sm3+1
eor #$ff
sta sm2+1
sta sm4+1
!:
ldx T2
sec
sm1: lda square1_lo,x
sm2: sbc square2_lo,x
sta PRODUCT+0
sm3: lda square1_hi,x
sm4: sbc square2_hi,x
sta PRODUCT+1
rts
// Description: Signed 8-bit multiplication with signed 16-bit result.
//
// Input: 8-bit signed value in T1
// 8-bit signed value in T2
// Carry=0: Re-use T1 from previous multiplication (faster)
// Carry=1: Set T1 (slower)
//
// Output: 16-bit signed value in PRODUCT
//
// Clobbered: PRODUCT, X, A, C
multiply_8bit_signed:
jsr multiply_8bit_unsigned
// Apply sign (See C=Hacking16 for details).
lda T1
bpl !+
sec
lda PRODUCT+1
sbc T2
sta PRODUCT+1
!:
lda T2
bpl !+
sec
lda PRODUCT+1
sbc T1
sta PRODUCT+1
!:
rts
// Description: Unsigned 16-bit multiplication with unsigned 32-bit result.
//
// Input: 16-bit unsigned value in T1
// 16-bit unsigned value in T2
// Carry=0: Re-use T1 from previous multiplication (faster)
// Carry=1: Set T1 (slower)
//
// Output: 32-bit unsigned value in PRODUCT
//
// Clobbered: PRODUCT, X, A, C
//
// Allocation setup: T1,T2 and PRODUCT preferably on Zero-page.
// square1_lo, square1_hi, square2_lo, square2_hi must be
// page aligned. Each table are 512 bytes. Total 2kb.
//
// Table generation: I:0..511
// square1_lo = <((I*I)/4)
// square1_hi = >((I*I)/4)
// square2_lo = <(((I-255)*(I-255))/4)
// square2_hi = >(((I-255)*(I-255))/4)
multiply_16bit_unsigned:
// <T1 * <T2 = AAaa
// <T1 * >T2 = BBbb
// >T1 * <T2 = CCcc
// >T1 * >T2 = DDdd
//
// AAaa
// BBbb
// CCcc
// + DDdd
// ----------
// PRODUCT!
// Setup T1 if changed
bcc !+
lda T1+0
sta sm1a+1
sta sm3a+1
sta sm5a+1
sta sm7a+1
eor #$ff
sta sm2a+1
sta sm4a+1
sta sm6a+1
sta sm8a+1
lda T1+1
sta sm1b+1
sta sm3b+1
sta sm5b+1
sta sm7b+1
eor #$ff
sta sm2b+1
sta sm4b+1
sta sm6b+1
sta sm8b+1
!:
// Perform <T1 * <T2 = AAaa
ldx T2+0
sec
sm1a: lda square1_lo,x
sm2a: sbc square2_lo,x
sta PRODUCT+0
sm3a: lda square1_hi,x
sm4a: sbc square2_hi,x
sta _AA+1
// Perform >T1_hi * <T2 = CCcc
sec
sm1b: lda square1_lo,x
sm2b: sbc square2_lo,x
sta _cc+1
sm3b: lda square1_hi,x
sm4b: sbc square2_hi,x
sta _CC+1
// Perform <T1 * >T2 = BBbb
ldx T2+1
sec
sm5a: lda square1_lo,x
sm6a: sbc square2_lo,x
sta _bb+1
sm7a: lda square1_hi,x
sm8a: sbc square2_hi,x
sta _BB+1
// Perform >T1 * >T2 = DDdd
sec
sm5b: lda square1_lo,x
sm6b: sbc square2_lo,x
sta _dd+1
sm7b: lda square1_hi,x
sm8b: sbc square2_hi,x
sta PRODUCT+3
// Add the separate multiplications together
clc
_AA: lda #0
_bb: adc #0
sta PRODUCT+1
_BB: lda #0
_CC: adc #0
sta PRODUCT+2
bcc !+
inc PRODUCT+3
clc
!:
_cc: lda #0
adc PRODUCT+1
sta PRODUCT+1
_dd: lda #0
adc PRODUCT+2
sta PRODUCT+2
bcc !+
inc PRODUCT+3
!:
rts
// Description: Signed 16-bit multiplication with signed 32-bit result.
//
// Input: 16-bit signed value in T1
// 16-bit signed value in T2
// Carry=0: Re-use T1 from previous multiplication (faster)
// Carry=1: Set T1 (slower)
//
// Output: 32-bit signed value in PRODUCT
//
// Clobbered: PRODUCT, X, A, C
multiply_16bit_signed:
jsr multiply_16bit_unsigned
// Apply sign (See C=Hacking16 for details).
lda T1+1
bpl !+
sec
lda PRODUCT+2
sbc T2+0
sta PRODUCT+2
lda PRODUCT+3
sbc T2+1
sta PRODUCT+3
!:
lda T2+1
bpl !+
sec
lda PRODUCT+2
sbc T1+0
sta PRODUCT+2
lda PRODUCT+3
sbc T1+1
sta PRODUCT+3
!:
rts
generate_multiplication_tables:
ldx #0
txa
.byte $c9
!lb1:
tya
adc #0
!ml1:
sta square1_hi, x
tay
cmp #$40
txa
ror
!ml9:
adc #0
sta !ml9- + 1
inx
!ml0:
sta square1_lo, x
bne !lb1-
inc !ml0- + 2
inc !ml1- + 2
clc
iny
bne !lb1-
ldx #$00
ldy #$ff
!:
lda square1_hi + 1, x
sta square2_hi + $0100, x
lda square1_hi, x
sta square2_hi, y
lda square1_lo + 1, x
sta square2_lo + $0100, x
lda square1_lo, x
sta square2_lo, y
dey
inx
bne !-
rts
//
// udivmod32
//
// TODO consistent name
// TODO document input, output, destroyed
//
.const udivmod32_dividend = $10 // 11,12,13
.const udivmod32_divisor = $14 // 15,16,17
.const udivmod32_result = $18 // 19,1a,1b
.const udivmod32_remainder = $1c // 1d,1e,1f
.const scaled_divisor = $20 // ..23
.const multiple = $28 // ..2b
.const temp = $2b // ..2f
udivmod32:
// if (divisor == 0) {
lda udivmod32_divisor + 0
bne !if_end+
lda udivmod32_divisor + 1
bne !if_end+
lda udivmod32_divisor + 2
bne !if_end+
lda udivmod32_divisor + 3
bne !if_end+
// return 0;
rts
// }
!if_end:
// uint32_t scaled_divisor = divisor;
lda udivmod32_divisor + 0
sta scaled_divisor + 0
lda udivmod32_divisor + 1
sta scaled_divisor + 1
lda udivmod32_divisor + 2
sta scaled_divisor + 2
lda udivmod32_divisor + 3
sta scaled_divisor + 3
// remainder = dividend;
lda udivmod32_dividend + 0
sta udivmod32_remainder + 0
lda udivmod32_dividend + 0
sta udivmod32_remainder + 0
lda udivmod32_dividend + 0
sta udivmod32_remainder + 0
lda udivmod32_dividend + 0
sta udivmod32_remainder + 0
// uint32_t multiple = 1;
lda #1
sta multiple + 0
lda #0
sta multiple + 1
sta multiple + 2
sta multiple + 3
// uint32_t result = 0;
sta udivmod32_result + 0
sta udivmod32_result + 1
sta udivmod32_result + 2
sta udivmod32_result + 3
// while (scaled_divisor < dividend) {
!while_start:
sec
lda scaled_divisor + 0
sbc udivmod32_dividend + 0
lda scaled_divisor + 1
sbc udivmod32_dividend + 1
lda scaled_divisor + 2
sbc udivmod32_dividend + 2
lda scaled_divisor + 3
sbc udivmod32_dividend + 3
bcs !while_end+
// scaled_divisor <<= 1
asl scaled_divisor + 0
rol scaled_divisor + 1
rol scaled_divisor + 2
rol scaled_divisor + 3
// multiple <<= 1
asl multiple + 0
rol multiple + 1
rol multiple + 2
rol multiple + 3
// }
jmp !while_start-
!while_end:
// do {
!do_start:
// if (remainder >= scaled_divisor) {
sec
lda udivmod32_remainder + 0
sbc scaled_divisor + 0
sta temp + 0
lda udivmod32_remainder + 1
sbc scaled_divisor + 1
sta temp + 1
lda udivmod32_remainder + 2
sbc scaled_divisor + 2
sta temp + 2
lda udivmod32_remainder + 3
sbc scaled_divisor + 3
sta temp + 3
bcc !if_end+
// remain -= scaled_divisor;
lda temp + 0
sta udivmod32_remainder + 0
lda temp + 1
sta udivmod32_remainder + 1
lda temp + 2
sta udivmod32_remainder + 2
lda temp + 3
sta udivmod32_remainder + 3
// }
!if_end:
// scaled_divisor >>= 1;
lsr scaled_divisor + 3
ror scaled_divisor + 2
ror scaled_divisor + 1
ror scaled_divisor + 0
// multiple >>= 1;
lsr multiple + 3
ror multiple + 2
ror multiple + 1
ror multiple + 0
// } while (multiple != 0);
lda multiple + 0
bne !do_start-
lda multiple + 1
bne !do_start-
lda multiple + 2
bne !do_start-
lda multiple + 3
bne !do_start-
rts