not sure what's wrong have to hceck over

precision cleanup
using 4.12 and 8.24 consistently
2023-01-22 12:02:15 -08:00 · 2023-01-22 11:17:51 -08:00 · 2023-01-22 10:42:37 -08:00 · 2023-01-22 09:37:37 -08:00 · 2023-01-22 09:34:42 -08:00
1 changed files with 66 additions and 87 deletions
--- a/mandel.s
+++ b/mandel.s
@ -1,17 +1,17 @@
 ; Our zero-page vars
 sx    = $80     ; i16: screen pixel x
 sy    = $82     ; i16: screen pixel y
-ox    = $84     ; fixed3.13: center point x
-oy    = $86     ; fixed3.13: center point y
-cx    = $84     ; fixed3.13: c_x
-cy    = $86     ; fixed3.13: c_y
-zx    = $88     ; fixed3.13: z_x
-zy    = $8a     ; fixed3.13: z_y
+ox    = $84     ; fixed4.12: center point x
+oy    = $86     ; fixed4.12: center point y
+cx    = $84     ; fixed4.12: c_x
+cy    = $86     ; fixed4.12: c_y
+zx    = $88     ; fixed4.12: z_x
+zy    = $8a     ; fixed4.12: z_y

-zx_2  = $90     ; fixed6.26: z_x^2
-zy_2  = $94     ; fixed6.26: z_y^2
-zx_zy = $98     ; fixed6.26: z_x * z_y
-dist  = $9c     ; fixed6.26: z_x^2 + z_y^2
+zx_2  = $90     ; fixed8.24: z_x^2
+zy_2  = $94     ; fixed8.24: z_y^2
+zx_zy = $98     ; fixed8.24: z_x * z_y
+dist  = $9c     ; fixed8.24: z_x^2 + z_y^2

 iter  = $a0     ; u8: iteration count
 zoom  = $a1     ; u8: zoom shift level
@ -42,8 +42,6 @@ half_height = height >> 1
 width = 160
 half_width = width >> 1
 stride = width >> 2
-width_ratio_3_13 = (5 << 11) ; 5/4
-height_ratio_3_13 = (3 << 11) ; 5/4

 DMACTL = $D400
 DLISTL = $D402
@ -101,18 +99,12 @@ aspect:
    ; 184h is the equiv of 220.8h at square pixels
    ; 320 / 220.8 = 1.45 display aspect ratio
 aspect_x:
-    .word 5 << (13 - 2)
+    .word 5 << (12 - 2)

 aspect_y:
-    .word 3 << (13 - 2)
+    .word 3 << (12 - 2)


-bit_masks:
-    .byte 3
-    .byte 3 << 2
-    .byte 3 << 4
-    .byte 3 << 6
-
 display_list_start:
    ; 24 lines overscan
    .repeat 3
@ -168,7 +160,7 @@ color_map:
 .endmacro

 .macro add32 dest, arg1, arg2
-    add 2, dest, arg2, dest
+    add 4, dest, arg2, dest
 .endmacro

 ; 2 + 9 * byte cycles
@ -244,21 +236,6 @@ color_map:
    neg 4, arg
 .endmacro

-.macro extend_8_16 dest, src
-    ; clobbers A, X
-    ; 13-15 cycles
-    .local positive
-    .local negative
-    ldx #0       ; 2 cyc
-    lda src      ; 3 cyc
-    sta dest     ; 3 cyc
-    bpl positive ; 2 cyc
-negative:
-    dex          ; 2 cyc
-positive:
-    stx dest + 1 ; 3 cyc
-.endmacro
-
 ; inner loop for imul16
 ; bitnum < 8: 25 or 41 cycles
 ; bitnum >= 8: 30 or 46 cycles
@ -277,10 +254,10 @@ positive:
    ; 5 cycles either way
    .if bitnum < 8
        lda arg1                 ; 3 cyc
-        and #(1 << bitnum)       ; 2 cyc
+        and #(1 << (bitnum))       ; 2 cyc
    .else
        lda arg1 + 1             ; 3 cyc
-        and #(1 << (bitnum - 8)) ; 2 cyc
+        and #(1 << ((bitnum) - 8)) ; 2 cyc
    .endif
    bne one ; 2 cyc

@ -307,7 +284,6 @@ next:
        ror result ; 5 cyc
    .endif

-
 .endmacro

 ; 5 to 25 cycles
@ -330,11 +306,18 @@ positive:
    copy32 dest, FR2  ; 24 cyc
 .endmacro

-.macro imul16_round dest, arg1, arg2
+.macro shift_round_16 arg, shift
+    .repeat shift
+        shl32 arg
+    .endrepeat
+    round16 arg
+.endmacro
+
+.macro imul16_round dest, arg1, arg2, shift
    copy16 FR0, arg1  ; 12 cyc
    copy16 FR1, arg2  ; 12 cyc
    jsr imul16_func   ; 470-780 cyc
-    round16 FR2       ; 5-28 cyc
+    shift_round_16 FR2, shift
    copy16 dest, FR2 + 2  ; 12 cyc
 .endmacro

@ -438,71 +421,60 @@ next:
    ; dist = 0
    ; iter = 0
    lda #00
-    ldx iter - zx
+    ldx #(iter - zx + 1)
 initloop:
-    sta zx,x
+    sta zx - 1,x
    dex
    bne initloop

 loop:
-    ; 1939 - 3007 cyc
-
-    ; iter++ & max-iters break = 7 cyc
-    inc iter       ; 5 cyc
-    bne keep_going ; 2 cyc
+    ; iter++ & max-iters break
+    inc iter
+    bne keep_going
    rts
 keep_going:

+    .macro quick_exit arg
+        .local keep_going
+        lda arg + 1
+        cmp #(4 << 4)
+        bmi keep_going
+        rts
+    keep_going:
+    .endmacro
+
    ; 4.12: (-8 .. +7.9)
-    ; zx = zx_2  - zy_2  + cx   = 3 * 20 = 60 cyc
+    ; zx = zx_2  - zy_2  + cx
    sub16 zx, zx_2, zy_2
    add16 zx, zx, cx
+    quick_exit zx

-    ; zy = zx_zy + zx_zy + cy   = 3 * 20 = 60 cyc
-    sub16 zy, zx_zy, zx_zy
+    ; zy = zx_zy + zx_zy + cy
+    add16 zy, zx_zy, zx_zy
    add16 zy, zy, cy

-    ; 8.24: (-128 .. +127.9)
-    ; zx_2 = zx * zx            = 518 - 828 cyc
-    imul16 zx_2, zx, zx
+    ; zx_2 = zx * zx
+    imul16_round zx_2, zx, zx, 4
+    quick_exit dist

-    ; zy_2 = zy * zy            = 518 - 828 cyc
-    imul16 zy_2, zy, zy
+    ; zy_2 = zy * zy
+    imul16_round zy_2, zy, zy, 4
+    quick_exit dist

-    ; zx_zy = zx * zy           = 518 - 828 cyc
-    imul16 zx_zy, zx, zy
+    ; zx_zy = zx * zy
+    imul16_round zx_zy, zx, zy, 4
+    quick_exit dist

-    ; dist = zx_2 + zy_2        = 38 cyc
-    add32 dist, zx_2, zy_2
-
-    ; if dist >= 4 break, else continue iterating = 7 cyc
-    lda dist + 3  ; 3 cyc
-    cmp #4        ; 2 cyc
-    bmi still_in  ; 2 cyc
-    rts
-still_in:
-
-    ; shift and round zx_2 to 4.12 = (60 + 5) - (60 + 28) = 65 - 88 cyc
-    .repeat 4      ; 60 cyc
-        shl24 zx_2 ; 15 cyc
-    .endrepeat
-    round16 zx_2   ; 5-28 cycles
-
-    ; shift and round zy_2 to 4.12 = (20 + 5) - (20 + 28) = 65 - 88 cyc
-    .repeat 4      ; 60 cyc
-        shl24 zy_2 ; 15 cyc
-    .endrepeat
-    round16 zy_2   ; 5-28 cycles
-
-    ; shift and round zx_zy to 4.12 = (20 + 5) - (20 + 28) = 65 - 88 cyc
-    .repeat 4       ; 60 cyc
-        shl24 zx_zy ; 15 cyc
-    .endrepeat
-    round16 zx_zy   ; 5-28 cycles
+    ; dist = zx_2 + zy_2
+    add16 dist, zx_2, zy_2
+    quick_exit dist

    ; if may be in the lake, look for looping output with a small buffer
    ; as an optimization vs running to max iters
-    jmp loop ; 3 cycles
+    jmp loop
+
+peace_out:
+    rts

 .endproc

@ -523,7 +495,7 @@ enough:

    ; cy = cy * (3 / 4)
    ; cx = cx * (5 / 4)
-    imul16_round dest, dest, aspect
+    imul16_round dest, dest, aspect, 4
 .endmacro

 .proc pset
@ -584,6 +556,9 @@ point:
    ; pixel_mask <<= pixel_shift (shifting in ones)
    and #3
    sta pixel_shift
+    lda #3
+    sec
+    sbc pixel_shift
    tax
 shift_loop:
    beq shift_done
@ -637,9 +612,13 @@ done:
    sta ox + 1
    sta oy
    sta oy + 1
+
+    ; zoom = 2x
+    lda #1
    sta zoom

    ; Disable display DMA
+    lda #0
    sta DMACTL

    ; zero the range from framebuffer_top to framebuffer_end
Author	SHA1	Message	Date
Brooke Vibber	57975b7158	not sure what's wrong have to hceck over	2023-01-22 12:02:15 -08:00
Brooke Vibber	1bef004ccd	precision cleanup using 4.12 and 8.24 consistently	2023-01-22 11:17:51 -08:00
Brooke Vibber	ae9dd0674d	corrupt! but it produces pixels	2023-01-22 10:42:37 -08:00
Brooke Vibber	b4721ae46b	fix pixel shift	2023-01-22 09:37:37 -08:00
Brooke Vibber	dbbec8ed6d	ok two things wrong: 1) bit masks are backwards 2) iter always returning 0	2023-01-22 09:34:42 -08:00