307 lines
8.9 KiB
Plaintext
307 lines
8.9 KiB
Plaintext
3D with free rotation around 1 axis
|
|
See Cube.gtb
|
|
|
|
Per frame
|
|
|
|
Variable dX dY Rz Camera position and direction
|
|
Constant f Fixed focus
|
|
|
|
1 sin/cos (byte) Use small lookup table
|
|
C = cos Rz
|
|
S = sin Rz
|
|
|
|
Per 3D point x, y, z
|
|
|
|
2 add/sub Translation
|
|
x1 = x - dx
|
|
x1 = y - dY
|
|
|
|
4 mul, 2 add/sub Rotation
|
|
x2 = x1 * C - y1 * S
|
|
y2 = x1 * S + y1 * C
|
|
[A rotating object demo can do these
|
|
incrementally, sin a = a, cos a = 1,
|
|
fixed "convenient" a (e.g. a=1/256)
|
|
|
|
x2 = x1 - y1 * a
|
|
y2 = x1 * a + y1
|
|
|
|
Zero multiplications
|
|
|
|
This is a like a mass-spring system.
|
|
The 2D points before projection follow
|
|
a harmonic motion]
|
|
|
|
2 div, 3 add/sub Projection
|
|
r = y2 + f
|
|
u = 80 + x2/r
|
|
v = 60 - z/r
|
|
|
|
Total: 6 mul/div + some add/subs
|
|
|
|
Bresenham's algorithm to trace the edge between two points
|
|
|
|
SYS_SetMemory to fill the area between two edges
|
|
|
|
Either triangles (polygon system), or point lists per pixel line
|
|
|
|
Arithmetic: 15 bit signed fixed point
|
|
|
|
High byte: 1 sign, 7 integer -128..127
|
|
Low byte: 1 internal carry, 7 fraction 0..127
|
|
|
|
Speed:
|
|
7 iterations per mul, plus shifts
|
|
Fast internal carry
|
|
|
|
Maybe down to 2 scanlines per operation???
|
|
10 scanlines per point???
|
|
This can become really fast
|
|
|
|
Eliminate iteration overhead: SYS functions that process vector
|
|
of points through self-restart
|
|
|
|
Addition: 9 cycles
|
|
; A1 A0
|
|
; B1 B0
|
|
; ----- +
|
|
; C1 C0
|
|
|
|
ld [A0]
|
|
adda [B0]
|
|
anda $80,x
|
|
anda $7f
|
|
st [C0]
|
|
ld [x]
|
|
adda [A1]
|
|
adda [B1]
|
|
st [C1]
|
|
|
|
Multiplication: 217 cycles
|
|
|
|
;(A2)A1.A0 A2 is sign extension: 0 or 127/255
|
|
; B0 Is cos() or sin() XXX TODO +/- sign, -1.0 and +1.0
|
|
; -------- +*
|
|
; C2 C1.C0
|
|
|
|
ld [B0] #0 Bit N (18 cycles)
|
|
anda 1<<N #1
|
|
bpl .4 #2
|
|
ld [A0] #3
|
|
ld 4 #4 Zero
|
|
.3 bne .3 #5,7,9,11,13
|
|
suba 1 #6,8,10,12,14
|
|
beq .5 #15
|
|
ld [C2] #16
|
|
.4 adda [C0] #4 One
|
|
anda $80,x #5
|
|
anda $7f #6
|
|
st [C0] #7
|
|
ld [X] #8
|
|
adda [A1] #9
|
|
adda [C1] #10
|
|
anda $80,x #11
|
|
anda $7f #12
|
|
st [C1] #13
|
|
ld [X] #14
|
|
adda [A2] #15
|
|
adda [C2] #16
|
|
.5 st [C2] #17
|
|
|
|
ld [C0] #0 Shift left (15 cycles)
|
|
adda [C0] #1
|
|
anda $80,x #2
|
|
anda $7f #3
|
|
st [C0] #4
|
|
ld [X] #5
|
|
adda [C1] #6
|
|
adda [C1] #7
|
|
anda $80,x #8
|
|
anda $7f #9
|
|
st [C1] #10
|
|
ld [X] #11
|
|
adda [C2] #12
|
|
adda [C2] #13
|
|
st [C2] #14
|
|
|
|
Total
|
|
add bit: 7 x 18 = 126
|
|
shift left: 6 x 15 = 75
|
|
===
|
|
216 cycles
|
|
|
|
Table based multiplication
|
|
|
|
(a+b)^2 = a^2 + b^2 + ^2ab
|
|
(a-b)^2 = a^2 + b^2 - ^2ab
|
|
========================== -
|
|
4ab = (a+b)^2 - (a-b)^2
|
|
|
|
ab = (a+b)^2/4 - (a-b)^2/4
|
|
= table[a+b] - table[abs(a-b)]
|
|
|
|
table[i] = i^2/4
|
|
|
|
For 7-bit x 7-bit muliply, needs two 255-byte tables
|
|
The quarters cancel out
|
|
|
|
http://nparker.llx.com/a2/mult.html
|
|
|
|
|
|
mulTableLo: nop
|
|
ld ...
|
|
ld ...
|
|
ld ...
|
|
...
|
|
mulTableLo+255: bra 255 XXX Can also spread out the code over 4 pages
|
|
mulTableHi: nop
|
|
ld ...
|
|
ld ...
|
|
ld ...
|
|
...
|
|
mulTableHi+255: bra [vTmp]
|
|
nop
|
|
|
|
vACH vACL
|
|
A0
|
|
---------
|
|
vACL * A0 -> 28+25 cycles
|
|
A1 A2
|
|
vACH * A0 -> 28+27 cycles
|
|
A2 A3
|
|
--------------
|
|
A2 A1 -> 8 cycles
|
|
-------------
|
|
116 cycles
|
|
|
|
SYS_Mul:
|
|
; Strip sign(s)
|
|
; XXX TODO
|
|
|
|
; Partial vACL * A0
|
|
ld .L0 ;(a-b)^2/4 lo 28 cycles
|
|
st [vTmp]
|
|
ld [vACL]
|
|
suba [A0]
|
|
bmi pc+3
|
|
bra pc+3
|
|
suba 1
|
|
xora 255
|
|
adda 1
|
|
st [A1]
|
|
ld >mulTableLo,Y
|
|
jmp y,ac
|
|
bra 255
|
|
;ld ...
|
|
;bra [vTmp]
|
|
;nop
|
|
.L0 st [A2]
|
|
ld .L1 ;hi
|
|
st [vTmp]
|
|
ld [A1]
|
|
ld >mulTableHi,Y
|
|
jmp y,ac
|
|
bra 255
|
|
;ld ...
|
|
;bra [vTmp]
|
|
;nop
|
|
.L1 xora 255 ;Almost negate
|
|
st [A1]
|
|
|
|
ld .L2 ;(a-b)^2/4 lo 25 cycles
|
|
st [vTmp]
|
|
ld [vACL]
|
|
adda [A0]
|
|
ld >mulTableLo,Y
|
|
jmp y,ac
|
|
bra 255
|
|
;ld ...
|
|
;bra [vTmp]
|
|
;nop
|
|
.L2 suba [A2]
|
|
anda $80,x ;Carry
|
|
ld .L3 ;hi
|
|
st [vTmp]
|
|
ld [vACL]
|
|
adda [A0]
|
|
ld >mulTableHi,Y
|
|
jmp y,ac
|
|
bra 255
|
|
;ld ...
|
|
;bra [vTmp]
|
|
;nop
|
|
.L3 adda [A1]
|
|
adda [x] ;Apply carry
|
|
st [A1]
|
|
|
|
; Partial vACH * A0
|
|
ld .L4 ;(a-b)^2/4 lo 28 cycles
|
|
st [vTmp]
|
|
ld [vACH]
|
|
suba [A0]
|
|
bmi pc+3
|
|
bra pc+3
|
|
suba 1
|
|
xora 255
|
|
adda 1
|
|
st [A1]
|
|
ld >mulTableLo,Y
|
|
jmp y,ac
|
|
bra 255
|
|
;ld ...
|
|
;bra [vTmp]
|
|
;nop
|
|
.L4 st [A3]
|
|
ld .L5 ;hi
|
|
st [vTmp]
|
|
ld [A1]
|
|
ld >mulTableHi,Y
|
|
jmp y,ac
|
|
bra 255
|
|
;ld ...
|
|
;bra [vTmp]
|
|
;nop
|
|
.L5 xora 255 ;Almost negate
|
|
st [A2]
|
|
|
|
ld .L6 ;(a-b)^2/4 lo 27 cycles
|
|
st [vTmp]
|
|
ld [vACH]
|
|
adda [A0]
|
|
ld >mulTableLo,Y
|
|
jmp y,ac
|
|
bra 255
|
|
;ld ...
|
|
;bra [vTmp]
|
|
;nop
|
|
.L6 suba [A3]
|
|
anda $80,x ;Carry
|
|
anda $7f
|
|
st [A3]
|
|
ld .L7 ;hi
|
|
st [vTmp]
|
|
ld [vACL]
|
|
adda [A0]
|
|
ld >mulTableHi,Y
|
|
jmp y,ac
|
|
bra 255
|
|
;ld ...
|
|
;bra [vTmp]
|
|
;nop
|
|
.L7 adda [A2]
|
|
adda [x] ;Apply carry
|
|
st [A2]
|
|
|
|
ld [A1] ;Sum 8 cycles
|
|
adda [A3]
|
|
anda $80,x ;Carry
|
|
anda $7d
|
|
st [A1]
|
|
ld [A2]
|
|
adda [x] ;Apply carry
|
|
st [A2]
|
|
|
|
; Apply result sign
|
|
; XXX TODO
|
|
|