Compare commits

...

5 commits

Author SHA1 Message Date
Brooke Vibber 57975b7158 not sure what's wrong have to hceck over 2023-01-22 12:02:15 -08:00
Brooke Vibber 1bef004ccd precision cleanup
using 4.12 and 8.24 consistently
2023-01-22 11:17:51 -08:00
Brooke Vibber ae9dd0674d corrupt! but it produces pixels 2023-01-22 10:42:37 -08:00
Brooke Vibber b4721ae46b fix pixel shift 2023-01-22 09:37:37 -08:00
Brooke Vibber dbbec8ed6d ok two things wrong:
1) bit masks are backwards
2) iter always returning 0
2023-01-22 09:34:42 -08:00

153
mandel.s
View file

@ -1,17 +1,17 @@
; Our zero-page vars
sx = $80 ; i16: screen pixel x
sy = $82 ; i16: screen pixel y
ox = $84 ; fixed3.13: center point x
oy = $86 ; fixed3.13: center point y
cx = $84 ; fixed3.13: c_x
cy = $86 ; fixed3.13: c_y
zx = $88 ; fixed3.13: z_x
zy = $8a ; fixed3.13: z_y
ox = $84 ; fixed4.12: center point x
oy = $86 ; fixed4.12: center point y
cx = $84 ; fixed4.12: c_x
cy = $86 ; fixed4.12: c_y
zx = $88 ; fixed4.12: z_x
zy = $8a ; fixed4.12: z_y
zx_2 = $90 ; fixed6.26: z_x^2
zy_2 = $94 ; fixed6.26: z_y^2
zx_zy = $98 ; fixed6.26: z_x * z_y
dist = $9c ; fixed6.26: z_x^2 + z_y^2
zx_2 = $90 ; fixed8.24: z_x^2
zy_2 = $94 ; fixed8.24: z_y^2
zx_zy = $98 ; fixed8.24: z_x * z_y
dist = $9c ; fixed8.24: z_x^2 + z_y^2
iter = $a0 ; u8: iteration count
zoom = $a1 ; u8: zoom shift level
@ -42,8 +42,6 @@ half_height = height >> 1
width = 160
half_width = width >> 1
stride = width >> 2
width_ratio_3_13 = (5 << 11) ; 5/4
height_ratio_3_13 = (3 << 11) ; 5/4
DMACTL = $D400
DLISTL = $D402
@ -101,18 +99,12 @@ aspect:
; 184h is the equiv of 220.8h at square pixels
; 320 / 220.8 = 1.45 display aspect ratio
aspect_x:
.word 5 << (13 - 2)
.word 5 << (12 - 2)
aspect_y:
.word 3 << (13 - 2)
.word 3 << (12 - 2)
bit_masks:
.byte 3
.byte 3 << 2
.byte 3 << 4
.byte 3 << 6
display_list_start:
; 24 lines overscan
.repeat 3
@ -168,7 +160,7 @@ color_map:
.endmacro
.macro add32 dest, arg1, arg2
add 2, dest, arg2, dest
add 4, dest, arg2, dest
.endmacro
; 2 + 9 * byte cycles
@ -244,21 +236,6 @@ color_map:
neg 4, arg
.endmacro
.macro extend_8_16 dest, src
; clobbers A, X
; 13-15 cycles
.local positive
.local negative
ldx #0 ; 2 cyc
lda src ; 3 cyc
sta dest ; 3 cyc
bpl positive ; 2 cyc
negative:
dex ; 2 cyc
positive:
stx dest + 1 ; 3 cyc
.endmacro
; inner loop for imul16
; bitnum < 8: 25 or 41 cycles
; bitnum >= 8: 30 or 46 cycles
@ -277,10 +254,10 @@ positive:
; 5 cycles either way
.if bitnum < 8
lda arg1 ; 3 cyc
and #(1 << bitnum) ; 2 cyc
and #(1 << (bitnum)) ; 2 cyc
.else
lda arg1 + 1 ; 3 cyc
and #(1 << (bitnum - 8)) ; 2 cyc
and #(1 << ((bitnum) - 8)) ; 2 cyc
.endif
bne one ; 2 cyc
@ -307,7 +284,6 @@ next:
ror result ; 5 cyc
.endif
.endmacro
; 5 to 25 cycles
@ -330,11 +306,18 @@ positive:
copy32 dest, FR2 ; 24 cyc
.endmacro
.macro imul16_round dest, arg1, arg2
.macro shift_round_16 arg, shift
.repeat shift
shl32 arg
.endrepeat
round16 arg
.endmacro
.macro imul16_round dest, arg1, arg2, shift
copy16 FR0, arg1 ; 12 cyc
copy16 FR1, arg2 ; 12 cyc
jsr imul16_func ; 470-780 cyc
round16 FR2 ; 5-28 cyc
shift_round_16 FR2, shift
copy16 dest, FR2 + 2 ; 12 cyc
.endmacro
@ -438,71 +421,60 @@ next:
; dist = 0
; iter = 0
lda #00
ldx iter - zx
ldx #(iter - zx + 1)
initloop:
sta zx,x
sta zx - 1,x
dex
bne initloop
loop:
; 1939 - 3007 cyc
; iter++ & max-iters break = 7 cyc
inc iter ; 5 cyc
bne keep_going ; 2 cyc
; iter++ & max-iters break
inc iter
bne keep_going
rts
keep_going:
.macro quick_exit arg
.local keep_going
lda arg + 1
cmp #(4 << 4)
bmi keep_going
rts
keep_going:
.endmacro
; 4.12: (-8 .. +7.9)
; zx = zx_2 - zy_2 + cx = 3 * 20 = 60 cyc
; zx = zx_2 - zy_2 + cx
sub16 zx, zx_2, zy_2
add16 zx, zx, cx
quick_exit zx
; zy = zx_zy + zx_zy + cy = 3 * 20 = 60 cyc
sub16 zy, zx_zy, zx_zy
; zy = zx_zy + zx_zy + cy
add16 zy, zx_zy, zx_zy
add16 zy, zy, cy
; 8.24: (-128 .. +127.9)
; zx_2 = zx * zx = 518 - 828 cyc
imul16 zx_2, zx, zx
; zx_2 = zx * zx
imul16_round zx_2, zx, zx, 4
quick_exit dist
; zy_2 = zy * zy = 518 - 828 cyc
imul16 zy_2, zy, zy
; zy_2 = zy * zy
imul16_round zy_2, zy, zy, 4
quick_exit dist
; zx_zy = zx * zy = 518 - 828 cyc
imul16 zx_zy, zx, zy
; zx_zy = zx * zy
imul16_round zx_zy, zx, zy, 4
quick_exit dist
; dist = zx_2 + zy_2 = 38 cyc
add32 dist, zx_2, zy_2
; if dist >= 4 break, else continue iterating = 7 cyc
lda dist + 3 ; 3 cyc
cmp #4 ; 2 cyc
bmi still_in ; 2 cyc
rts
still_in:
; shift and round zx_2 to 4.12 = (60 + 5) - (60 + 28) = 65 - 88 cyc
.repeat 4 ; 60 cyc
shl24 zx_2 ; 15 cyc
.endrepeat
round16 zx_2 ; 5-28 cycles
; shift and round zy_2 to 4.12 = (20 + 5) - (20 + 28) = 65 - 88 cyc
.repeat 4 ; 60 cyc
shl24 zy_2 ; 15 cyc
.endrepeat
round16 zy_2 ; 5-28 cycles
; shift and round zx_zy to 4.12 = (20 + 5) - (20 + 28) = 65 - 88 cyc
.repeat 4 ; 60 cyc
shl24 zx_zy ; 15 cyc
.endrepeat
round16 zx_zy ; 5-28 cycles
; dist = zx_2 + zy_2
add16 dist, zx_2, zy_2
quick_exit dist
; if may be in the lake, look for looping output with a small buffer
; as an optimization vs running to max iters
jmp loop ; 3 cycles
jmp loop
peace_out:
rts
.endproc
@ -523,7 +495,7 @@ enough:
; cy = cy * (3 / 4)
; cx = cx * (5 / 4)
imul16_round dest, dest, aspect
imul16_round dest, dest, aspect, 4
.endmacro
.proc pset
@ -584,6 +556,9 @@ point:
; pixel_mask <<= pixel_shift (shifting in ones)
and #3
sta pixel_shift
lda #3
sec
sbc pixel_shift
tax
shift_loop:
beq shift_done
@ -637,9 +612,13 @@ done:
sta ox + 1
sta oy
sta oy + 1
; zoom = 2x
lda #1
sta zoom
; Disable display DMA
lda #0
sta DMACTL
; zero the range from framebuffer_top to framebuffer_end