From e6c05d0f66cb0634f192a215284cdb3ab3af81aa Mon Sep 17 00:00:00 2001
From: Brion Vibber <brion@pobox.com>
Date: Wed, 28 Dec 2022 21:08:16 -0800
Subject: [PATCH] stuff

---
 .gitignore |   3 +
 Makefile   |  14 ++++
 mandel.s   | 208 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 225 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Makefile
 create mode 100644 mandel.s

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8d2f7ce
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.o
+*.xex
+.DS_Store
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..25148b4
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,14 @@
+.PHONY : clean all
+
+all : mandel.xex
+
+%.xex : %.o
+	ld65 -C atari-asm-xex.cfg -o $@ $<
+
+%.o : %.s
+	ca65 -o $@ $<
+
+clean :
+	rm -f *.o
+	rm -f *.xex
+
diff --git a/mandel.s b/mandel.s
new file mode 100644
index 0000000..60ffb76
--- /dev/null
+++ b/mandel.s
@@ -0,0 +1,208 @@
+; FP registers in zero page
+FR0 = $d4
+FRE = $da
+FR1 = $e0
+FR2 = $e6
+FRX = $ec
+EEXP = $ed
+NSIGN = $ee
+ESIGN = $ef
+FLPTR = $fc
+FPTR2 = $fe
+
+; FP routines
+AFP = $D800
+FASC = $D8E6
+IFP = $D9AA
+FIP = $D9D2
+ZFR0 = $DA44
+ZFI = $DA46
+FSUB = $DA60
+FADD = $DA66
+FMUL = $DADB
+FDIV = $DB28
+PLYVEL = $DD40
+FLD0R = $DD49 ; from pointer in X/Y
+FLD0P = $DD89 ; from pointer in FLPTR
+FLD1R = $DD89
+FLD1P = $DD9c
+FST0R = $DDA7
+FST0P = $DDAB
+FMOVE = $DDB6 ; FR0 -> FR1
+EXP = $DDC0
+EXP10 = $DDCC
+LOG = $decd
+LOG10 = $ded1
+
+
+.code
+
+.export start
+
+.proc start
+loop:
+    jmp loop
+.endproc
+
+.proc mandelfloat
+.endproc
+
+.macro sext16to32 arg
+    .local plus
+    .local minus
+    lda arg+1
+    bpl plus
+    lda #$ff
+    jmp minus
+plus:
+    lda #$00
+minus:
+    sta arg+2
+    sta arg+3
+.endmacro
+
+.macro copy bytes, arg1, arg2
+    .repeat 2, byte
+        lda arg1+byte
+        sta arg2+byte
+    .endrepeat
+.endmacro
+
+.macro copy16 arg1, arg2
+    copy 2, arg1, arg2
+.endmacro
+
+.macro copy32 arg1, arg2
+    copy 4, arg1, arg2
+.endmacro
+
+.macro add bytes, arg1, arg2
+    clc
+    .repeat bytes, byte
+        lda arg1+byte
+        adc arg2+byte
+        sta arg1+byte
+    .endrepeat
+.endmacro
+
+.macro add16 arg1, arg2
+    add 2, arg1, arg2
+.endmacro
+
+.macro add32 arg1, arg2
+    add 4, arg1, arg2
+.endmacro
+
+.macro shl bytes, arg
+    asl arg
+    .repeat bytes-1, byte
+        rol arg+byte+1
+    .endrepeat
+.endmacro
+
+.macro shl16 arg
+    shl 2, arg
+.endmacro
+
+.macro shl24 arg
+    shl 3, arg
+.endmacro
+
+.macro shl32 arg
+    shl 4, arg
+.endmacro
+
+.macro shr bytes, arg
+    lsr arg
+    .repeat bytes-1, byte
+        ror arg+byte+1
+    .endrepeat
+.endmacro
+
+.macro shr16 arg
+    shr 2, arg
+.endmacro
+
+.macro shr24 arg
+    shr 3, arg
+.endmacro
+
+.macro shr32 arg
+    shr 4, arg
+.endmacro
+
+.macro checkbit arg, bits
+    .if bits < 8
+        lda arg
+        and #(1 << bits)
+    .else
+        lda arg + 1
+        and #(1 << (bits - 8))
+    .endif
+.endmacro
+
+.macro bitmul arg1, arg2, res, bits
+    .local next
+    checkbit arg2, bits
+    beq next
+    add32 res, arg1
+next:
+    shl32 arg1
+.endmacro
+
+.proc imul16
+    ; 16-bit arg in FR0
+    ; 16-bit arg in FR1
+    ; 16-bit result in FR0
+
+    ; sign-extend the argument
+    sext16to32 FR0
+
+    ; zero out the 32-bit temp
+    lda #0
+    sta FRX
+    sta FRX+1
+    sta FRX+2
+    sta FRX+3
+
+    ; shift and add :D
+    .repeat 16, bitnum
+        bitmul FR0, FR1, FRX, bitnum
+    .endrepeat
+
+    ; Re-normalize the ones place
+    shr24 FRX
+    shr24 FRX
+    shr24 FRX
+
+    ; @fixme round the last bit
+
+    ; And copy out our result
+    copy16 FRX+2, FR0
+    ; @fixme could save a few cycles by combining the last two ops
+
+.endproc
+
+.proc iter
+    ; (cx and cy should be pre-scaled to 6.26 fixed point)
+
+    ; zx = 0
+    ; zx_2 = 0
+    ; zy = 0
+    ; zx_2 = 0
+
+loop:
+    ; iters++
+
+    ; zx_next = zx_2 + zy_2 + cx
+    ; zy_next = 2 * zx * zy + cy
+    ; (detect overflows to -4 or +4 and break if necessary)
+    ; (re-downshift into zx and zy as 3.13 fixed point; round.)
+
+    ; zx_2 = zx * zx
+    ; zy_2 = zy * zy
+    ; dist = zx_2 + zy_2
+
+    ; if dist >= 4 break, else continue iterating
+
+.endproc