diff --git a/Docs/ÐŸÐ¾Ð»Ð½Ð¾Ðµ Ð¾Ð¿Ð¸ÑÐ°Ð½Ð¸Ðµ ÐºÐ¾Ð¼Ð°Ð½Ð´ Ð¼Ð¸ÐºÑ€Ð¾Ð¿Ñ€Ð¾Ñ†ÐµÑÑÐ¾Ñ€Ð° Z80.doc b/Docs/ÐŸÐ¾Ð»Ð½Ð¾Ðµ Ð¾Ð¿Ð¸ÑÐ°Ð½Ð¸Ðµ ÐºÐ¾Ð¼Ð°Ð½Ð´ Ð¼Ð¸ÐºÑ€Ð¾Ð¿Ñ€Ð¾Ñ†ÐµÑÑÐ¾Ñ€Ð° Z80.doc
new file mode 100644
index 0000000..3a03798
Binary files /dev/null and b/Docs/ÐŸÐ¾Ð»Ð½Ð¾Ðµ Ð¾Ð¿Ð¸ÑÐ°Ð½Ð¸Ðµ ÐºÐ¾Ð¼Ð°Ð½Ð´ Ð¼Ð¸ÐºÑ€Ð¾Ð¿Ñ€Ð¾Ñ†ÐµÑÑÐ¾Ñ€Ð° Z80.doc differ
diff --git a/constants/SP2000.inc b/constants/SP2000.inc
index de95dd7..34654e7 100644
--- a/constants/SP2000.inc
+++ b/constants/SP2000.inc
@@ -351,6 +351,7 @@ FastRAM:
 .ON		EQU #FB					; ‚ª«îç¥­¨¥ ªíè  IN A,(FastRAM.ON) - ¯à¨¡¨â® £¢®§¤ï¬¨ ¢ ª®­ä¥
 .OFF		EQU #7B					; Žâª«îç¥­¨¥ ªíè  IN A,(FastRAM.OFF) - ¯à¨¡¨â® £¢®§¤ï¬¨ ¢ ª®­ä¥
 .SLOT0		EQU #5C					; ¥à¥ª«îç¥­¨¥ áâà ­¨æ ªíè  ¯à¨ FastRAM.ON ¢ ­ã«¥¢®¬ ®ª­¥ (bit0..1) - ¯à¨¡¨â® £¢®§¤ï¬¨ ¢ ª®­ä¥
+; ¥à¥ª«îç¥­¨¥ áâà ­¨æ à ¡®â ¥â â®«ìª® ¯à¨ SYS_PORT.ROM.
 ;!TODO ¯®áâ ¢¨âì ¢ ª àâ¥ ¯®àâ®¢ ­  çâ¥­¨¥ ¯®àâ  #FB ¢­ãâà¥­­¨© ¯®àâ SLOT0
 ; çâ®¡ ¯à¨ ¢ª«îç¥­¨¨ ªíè ¢ à¥£¨áâà á®åà ­ï« áì áâà ­¨æ  ¢ ¡ ­ª¥ 0 (¯®«¥§­® ¯à¨ ¢ª«îç¥­¨¨ ªíè ¨§ ¡ ­ª¨ ­®«ì)
 ;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
diff --git a/math.asm b/math.asm
new file mode 100644
index 0000000..06d74eb
--- /dev/null
+++ b/math.asm
@@ -0,0 +1,4968 @@
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ’à¨£®­®¬¥âà¨ç¥áª¨¥ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+arctan_88:
+;Input:
+;   D.E
+;Output: atan(D.E)->D.E
+   push de
+   ld a,d
+   or a
+   jp p,$+5
+   neg
+   ld d,a
+   dec a
+   jr nz,.checkneedinv
+   inc e : dec e : jr nz,.checkneedinv
+   pop af : rla : ld de,201 : ret nc : ld de,-201 : ret
+.checkneedinv:
+   inc a
+   call nz,.DEgt1_Inv
+;0.E is the value to atan
+   ld hl,.adjustatan
+   push hl
+   ld a,e
+   cp 46 : ret c
+   dec a : cp 42h : ret c
+   dec a : cp 4Eh : ret c
+   dec a : cp 57h : ret c
+   dec a : cp 5Eh : ret c
+   dec a : cp 64h : ret c
+   dec a : cp 6Ah : ret c
+   dec a : cp 6Fh : ret c
+   sub 6Fh : ld e,a
+   ld hl,.LUT
+   add hl,de
+   ld a,(hl)
+   ret
+.adjustatan:
+   ld e,a
+   pop bc
+   ld a,b
+   or a
+   jp p,$+5
+   neg
+   jr z,$+9
+   ld hl,402
+   or a
+   sbc hl,de
+   ex de,hl
+   rl b
+   ret nc
+   xor a
+   sub e
+   ld e,a
+   sbc a,a
+   sub d
+   ld d,a
+   ret
+
+.DEgt1_Inv:
+;Works if DE>1
+   ld hl,256
+   ld b,8
+.InvLoop:
+   add hl,hl
+   sbc hl,de
+   jr nc,$+3
+   add hl,de
+   adc a,a
+   djnz .InvLoop
+    cpl
+   ld e,a
+    ld d,b
+    ret
+;		         0    1    2    3    4    5    6    7    8    9 
+.LUT:		DB	#6F, #6F, #70, #71, #72, #73, #73, #74, #75, #76 ;  0
+			#77, #77, #78, #79, #7A, #7B, #7B, #7C, #7D, #7E ;  1
+			#7F, #7F, #80, #81, #82, #82, #83, #84, #85, #85 ;  2
+			#86, #87, #88, #88, #89, #8A, #8B, #8B, #8C, #8D ;  3
+			#8E, #8E, #8F, #90, #90, #91, #92, #93, #93, #94 ;  4
+			#95, #95, #96, #97, #97, #98, #99, #9A, #9A, #9B ;  5
+			#9C, #9C, #9D, #9E, #9E, #9F, #A0, #A0, #A1, #A2 ;  6
+			#A2, #A3, #A3, #A4, #A5, #A5, #A6, #A7, #A7, #A8 ;  7
+			#A9, #A9, #AA, #AA, #AB, #AC, #AC, #AD, #AD, #AE ;  8
+			#AF, #AF, #B0, #B0, #B1, #B2, #B2, #B3, #B3, #B4 ;  9
+			#B5, #B5, #B6, #B6, #B7, #B7, #B8, #B9, #B9, #BA ;  10
+			#BA, #BB, #BB, #BC, #BC, #BD, #BE, #BE, #BF, #BF ;  11
+			#C0, #C0, #C1, #C1, #C2, #C2, #C3, #C3, #C4, #C4 ;  12
+			#C5, #C6, #C6, #C7, #C7, #C8, #C8, #C9		 ;  13
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+atan8:
+;computes 256*atan(A/256)->A
+;56 bytes including the LUT
+;min: 246cc
+;max: 271cc
+;avg: 258.5cc
+  rlca
+  rlca
+  rlca
+  ld d,a
+  and 7
+  ld hl,.LUT
+  add a,l
+  ld l,a
+ if (.LUT & 255) > 248    ;this section not included in size/speed totals
+  jr nc,$+3               ;can add three bytes, 12cc to max, 11cc to min, and 11.5cc to avg
+  inc h
+ endif
+  ld c,(hl)
+  inc hl
+  ld a,(hl)
+  sub c
+  ld e,0
+  ex de,hl
+  ld d,l
+  ld e,a
+  sla h : jr nc,$+3 : ld l,e
+  add hl,hl : jr nc,$+3 : add hl,de
+  add hl,hl : jr nc,$+3 : add hl,de
+  add hl,hl : jr nc,$+3 : add hl,de
+  add hl,hl : jr nc,$+3 : add hl,de
+  add hl,hl
+  add hl,hl
+  add hl,hl
+;  add hl,hl    ;used in rounding...
+  ld a,h
+;  rra          ;but doesn't seem to improve the error
+  adc a,c
+  ret
+.LUT:		DB	0,32,63,92,119,143,165,184,201
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+atanE:
+;returns H=256*arctan(E/256)
+;min: 496cc
+;max: 539cc
+;avg: 517.5cc
+;multiply E by 201
+  ld d,0
+  ld h,d
+  ld l,e
+  add hl,hl
+  add hl,de
+  add hl,hl
+  add hl,hl
+  add hl,hl
+  add hl,de
+  add hl,hl
+  add hl,hl
+  add hl,hl
+  add hl,de
+  ld b,h
+  ld c,l
+
+;E*(256-E)
+  xor a
+  ld d,a
+  sub e
+  ld h,a
+  ld l,d
+  sla h : jr nc,$+3 : ld l,e
+  add hl,hl : jr nc,$+3 : add hl,de
+  add hl,hl : jr nc,$+3 : add hl,de
+  add hl,hl : jr nc,$+3 : add hl,de
+  add hl,hl : jr nc,$+3 : add hl,de
+  add hl,hl : jr nc,$+3 : add hl,de
+  add hl,hl : jr nc,$+3 : add hl,de
+  add hl,hl : jr nc,$+3 : add hl,de
+;.HL*70
+  ld d,h
+  ld e,l
+  xor a
+  add hl,hl
+  add hl,hl : rla   ;rla needed for the case when input = 128 :(
+  add hl,hl : rla
+  add hl,hl : rla
+  add hl,de : adc a,0
+  add hl,hl : rla
+  add hl,de : adc a,0
+  add hl,hl : rla
+  ld l,h
+  ld h,a
+  add hl,bc
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ‚ëç¨â ­¨¥ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;written by calc84maniac
+;comment from calc84maniac:
+;   To clarify why I did a cpl/scf/adc instead of a cpl/inc/add or neg/add,
+;   is that it handles the case of A=0 properly. Typically, SUB N and
+;   ADD A,-N give opposite carry outputs, but SUB 0 and ADD A,-0 both reset the
+;   carry flag. On the other hand, SCF : ADC A,255 will set the carry flag like
+;   we want it to.
+; BC=BC-A
+BC_Minus_A:
+  cpl
+  scf
+  adc a,c
+  ld c,a
+  ret c
+  dec b
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;via calc84maniac
+;"Optimized routine for HL=A-HL (the negate HL optimization can be derived from this by setting A=0 first)"
+A_Minus_HL:
+  sub l
+  ld l,a
+  sbc a,a
+  sub h
+  ld h,a
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛŠ®à¥­ìÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;Adapted from Axe
+;Inputs: A.C
+;Output: D.E contains the squareroot
+;speed: 1482+12{0,17}
+;min: 1482cc
+;max: 1686cc
+;avg: 1584cc
+;35 bytes
+sqrtfixed_88:
+	ld	b,12
+	ld	de,0
+	ld	h,d
+	ld	l,e
+.Loop:	sub	#40
+	sbc	hl,de
+	jr	nc,.Skip
+	add	a,#40
+	adc	hl,de
+.Skip:	ccf
+	rl	e
+	rl	d
+	sla	c
+	rla
+	adc	hl,hl
+	sla	c
+	rla
+	adc 	hl,hl
+	djnz	.Loop
+	ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;returns HL as the sqrt, DE as the remainder
+;33 bytes
+;min: 928cc
+;max: 1120cc
+;avg: 1024cc
+;928+8{24,0}
+sqrtDE:
+  ld b,#80
+  xor a
+  ld h,a
+  ld l,a
+.sqrt_loop:
+  srl b
+  rra
+  ld c,a
+  add hl,bc
+  ex de,hl
+  sbc hl,de
+  jr nc,.next
+  add hl,de
+  ex de,hl
+  or a
+  sbc hl,bc
+		DB	#DA   ;start of jp c,** which is 10cc to skip the next two bytes.
+.next:
+  ex de,hl
+  add hl,bc
+  srl h
+  rr l
+  srl b
+  rra
+  jr nc,.sqrt_loop
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Written by Zeda
+;Input: A.E ==> D.E
+;Output: DE is the sqrt, AHL is the remainder
+;Speed: 690+6{0,13}+{0,3+{0,18}}+{0,38}+sqrtA
+;min: 855cc
+;max: 1003cc
+;avg: 924.5cc
+;152 bytes
+sqrtfixed_88:
+  call sqrtA
+  ld l,a
+  ld a,e
+  ld h,0
+  ld e,d
+  ld d,h
+
+  sla e
+  rl d
+
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+  sbc hl,de
+  jr nc,.next
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.next:
+  inc e
+
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+  sbc hl,de
+  jr nc,.next2
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.next2:
+  inc e
+
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+  sbc hl,de
+  jr nc,.next3
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.next3:
+  inc e
+
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+  sbc hl,de
+  jr nc,.next4
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.next4:
+  inc e
+
+;Now we have four more iterations
+;The first two are no problem
+  sll e : rl d
+  add hl,hl
+  add hl,hl
+  sbc hl,de
+  jr nc,.next5
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.next5:
+  inc e
+
+  sll e : rl d
+  add hl,hl
+  add hl,hl
+  sbc hl,de
+  jr nc,.next6
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.next6:
+  inc e
+
+.iter11:
+;On the next iteration, HL might temporarily overflow by 1 bit
+  sll e : rl d      ;sla e : rl d : inc e
+  add hl,hl
+  add hl,hl
+  jr c,.iter11_br0
+;
+  sbc hl,de
+  jr nc,.next7
+  add hl,de
+  dec e
+  jr .iter12
+.iter11_br0:
+  or a
+  sbc hl,de
+.next7:
+  inc e
+
+;On the next iteration, HL is allowed to overflow, DE could overflow with our current routine, but it needs to be shifted right at the end, anyways
+.iter12:
+  ld b,a      ;A is 0, so B is 0
+  add hl,hl
+  add hl,hl
+  rla
+;AHL - (DE+DE+1)
+  sbc hl,de : sbc a,b
+  inc e
+  or a
+  sbc hl,de : sbc a,b
+  ret p
+  add hl,de
+  adc a,b
+  dec e
+  add hl,de
+  adc a,b
+  ret
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+; FASTEST
+;Written by Zeda
+;Input: A
+;Output: D is the squareroot, A is the remainder (input-D^2)
+;Destroys: E
+;speed: 118+{0,6}+{0,7}+{0,7}+{0,3}
+;min: 118cc
+;max: 141cc
+;avg: 129.5cc
+;38 bytes
+sqrtA:
+  ld de,5040h       
+  sub e            
+  jr nc,.skip1
+  add a,e
+  ld d,10h
+.skip1:	
+; ------
+  cp d             
+  jr c,.skip2
+  sub d
+  set 5,d
+.skip2:	
+; ------
+  res 4,d          
+  srl d            
+  set 2,d          
+  cp d             
+  jr c,.skip3
+  sub D
+  set 3,d
+.skip3:      
+  srl d            
+; ------
+  inc a            
+  sub d            
+  jr nc,.skip4
+  dec d
+  add a,d
+.skip4:      
+  srl d            
+  ret      
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Input: HLDE
+;Output: DE is the sqrt, AHL is the remainder
+;speed: 238+{0,1}+{0,44}+sqrtHL+3*.sub_2+.iter15
+;min: 1260
+;max: 1506
+;avg: 1377.75
+sqrt32:
+  push de
+  call sqrtHL
+  pop bc
+  add a,a
+  ld e,a
+  jr nc,.skip
+  inc d
+.skip:
+  ld a,b
+  call .sub_2
+  call .sub_2
+;Now we have four more iterations
+;The first two are no problem
+  ld a,c
+  call .sub_2
+
+;On the next iteration, HL might temporarily overflow by 1 bit
+  call .iter15
+
+;On the next iteration, HL is allowed to overflow, DE could overflow with our current routine, but it needs to be shifted right at the end, anyways
+.iter16:
+  add a,a
+  ld b,a        ;either 0x00 or 0x80
+  adc hl,hl
+  rla
+  adc hl,hl
+  rla
+;AHL - (DE+DE+1)
+  sbc hl,de : sbc a,b
+  inc e
+  or a
+  sbc hl,de : sbc a,b
+  ret p
+  add hl,de
+  adc a,b
+  dec e
+  add hl,de
+  adc a,b
+  ret
+
+.sub_2:
+;min: 185cc
+;max: 231cc
+;avg: 208cc
+  call .iter17
+.iter17:
+;min: 84cc
+;max: 107cc
+;avg: 95.5cc
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+
+  sbc hl,de
+  inc e
+  ret nc
+  dec e
+  add hl,de
+  dec e
+  ret
+
+.iter15:
+;91+{8,0+{0,23}}
+;min: 91cc
+;max: 114cc
+;avg: 100.75cc
+  sll e : rl d      ;sla e : rl d : inc e
+  add a,a
+  adc hl,hl
+  add a,a
+  adc hl,hl       ;This might overflow!
+  jr c,.iter15_br0
+;
+  sbc hl,de
+  inc e
+  ret nc
+  dec e
+  add hl,de
+  dec e
+  ret
+.iter15_br0:
+  or a
+  sbc hl,de
+  inc e
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs:
+;     L is the value to find the square root of
+;Outputs:
+;      C is the result
+;      B,L are 0
+;     DE is not changed
+;      H is how far away it is from the next smallest perfect square
+;      L is 0
+;      z flag set if it was a perfect square
+;Destroyed:
+;      A
+;287+7x, x is the number of bits in the result
+;min: 287
+;max: 315
+;19 bytes
+SqrtL:
+     ld bc,#400
+     ld h,c
+.Loop:
+     add hl,hl
+     add hl,hl
+     rl c
+     ld a,c
+     rla
+     sub a,h
+     jr nc,$+5
+     inc c
+     cpl
+     ld h,a
+     djnz .Loop
+     ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Input: HLIX
+;Output: DE is the sqrt, AHL is the remainder
+;speed: 751+6{0,6}+{0,3+{0,18}}+{0,38}+sqrtHL
+;min: 1103
+;max: 1237
+;avg: 1165.5
+;166 bytes
+sqrtHLIX:
+  call .sqrtHL   ;expects returns A as sqrt, HL as remainder, D = 0
+  add a,a
+  ld e,a
+  rl d
+
+  ld a,ixh
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+  sbc hl,de
+  jr nc,.skip1
+  add hl,de
+  dec e
+  DB	#FE     ;start of `cp *`
+.skip1:
+  inc e
+
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+  sbc hl,de
+  jr nc,.skip2
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.skip2:
+  inc e
+
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+  sbc hl,de
+  jr nc,.skip3
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.skip3:
+  inc e
+
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+  sbc hl,de
+  jr nc,.skip4
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.skip4:
+  inc e
+
+;Now we have four more iterations
+;The first two are no problem
+  ld a,ixl
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+  sbc hl,de
+  jr nc,.skip5
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.skip5:
+  inc e
+
+  sll e : rl d
+  add a,a : adc hl,hl
+  add a,a : adc hl,hl
+  sbc hl,de
+  jr nc,.skip6
+  add hl,de
+  dec e
+		DB	#FE     ;start of `cp *`
+.skip6:
+  inc e
+
+.iter15:
+;On the next iteration, HL might temporarily overflow by 1 bit
+  sll e : rl d      ;sla e : rl d : inc e
+  add a,a
+  adc hl,hl
+  add a,a
+  adc hl,hl       ;This might overflow!
+  jr c,.iter15_br0
+;
+  sbc hl,de
+  jr nc,.skip7
+  add hl,de
+  dec e
+  jr .iter16
+.iter15_br0:
+  or a
+  sbc hl,de
+.skip7:
+  inc e
+
+;On the next iteration, HL is allowed to overflow, DE could overflow with our current routine, but it needs to be shifted right at the end, anyways
+.iter16:
+  add a,a
+  ld b,a        ;either 0x00 or 0x80
+  adc hl,hl
+  rla
+  adc hl,hl
+  rla
+;AHL - (DE+DE+1)
+  sbc hl,de : sbc a,b
+  inc e
+  or a
+  sbc hl,de : sbc a,b
+  ret p
+  add hl,de
+  adc a,b
+  dec e
+  add hl,de
+  adc a,b
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+; very fastest 16-bit isqrt by Zeda Thomas
+;Feel free to use for whatever :)
+;Input: HL
+;Output: A is the integer square root of HL
+;Destroys: HL,DE (D is actually 0)
+;min: 343cc
+;max: 380cc
+;avg: 361.5cc
+;88 bytes
+sqrtHL:
+  ld de,05040h
+  ld a,h
+  sub e
+  jr nc,.sq7
+  add a,e
+  ld d,16
+.sq7:
+; ----------
+  cp d 
+  jr c,.sq6 
+  sub d
+  set 5,d
+.sq6:
+; ----------
+  res 4,d
+  srl d
+  set 2,d
+  cp d 
+  jr c,.sq5 
+  sub d
+  set 3,d
+.sq5:
+  srl d
+; ----------
+  inc a
+  sub d
+  jr nc,.sq4
+  dec d
+  add a,d
+  dec d         ; <-- this resets the low bit of D, so `srl d` resets carry.
+.sq4:
+  srl d  
+  ld h,a
+; ----------
+  ld a,e
+  sbc hl,de
+  jr nc,.sq3
+  add hl,de
+.sq3:
+  ccf  
+  rra  
+  srl d  
+  rra  
+; ----------
+  ld e,a
+  sbc hl,de
+  jr c,.sq2 
+  or #20
+  db 254        ; <-- start of `cp *` which is 7cc to skip the next byte.
+.sq2:
+  add hl,de
+  xor #18
+  srl d  
+  rra  
+; ----------
+  ld e,a
+  sbc hl,de
+  jr c,.sq1 
+  or 8
+  db 254        ; <-- start of `cp *` which is 7cc to skip the next byte.
+.sq1:
+  add hl,de
+  xor 6
+  srl d  
+  rra  
+; ----------
+  ld e,a
+  sbc hl,de
+;This code would restore the square root
+;   jr nc,.sq0 
+;   add hl,de     ; | 12cc or 18cc
+; .sq0:
+  sbc a,255
+  srl d
+  rra  
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+; FASTEST
+;written by Zeda
+;returns A as the sqrt, HL as the remainder, D = 0
+;min: 352cc
+;max: 391cc
+;avg: 371.5cc
+sqrtHL:
+  ld de,#5040
+  ld a,h      
+  sub e       
+  jr nc,.sq7  
+  add a,e     
+  ld d,16     
+.sq7:         
+; ----------
+  cp d        
+  jr c,.sq6   
+  sub d       
+  set 5,d     
+.sq6:         
+; ----------
+  res 4,d     
+  srl d       
+  set 2,d     
+  cp d        
+  jr c,.sq5   
+  sub d       
+  set 3,d     
+.sq5:         
+  srl d       
+; ----------
+  inc a       
+  sub d       
+  jr nc,.sq4  
+  dec d       
+  add a,d     
+  dec d         ; <-- this resets the low bit of D, so `srl d` resets carry.
+.sq4:   
+  srl d 
+  ld h,a
+; ----------
+  ld a,e    
+  sbc hl,de 
+  jr nc,.sq3
+  add hl,de 
+.sq3:       
+  ccf       
+  rra       
+  srl d     
+  rra       
+; ----------
+  ld e,a   
+  sbc hl,de
+  jr c,.sq2
+  or #20
+  db 254        ; <-- start of `cp *` which is 7cc to skip the next byte.
+.sq2:
+  add hl,de
+  xor #18
+  srl d    
+  rra      
+; ----------
+  ld e,a   
+  sbc hl,de
+  jr c,.sq1
+  or 8     
+  db 254        ; <-- start of `cp *` which is 7cc to skip the next byte.
+.sq1:      
+  add hl,de
+  xor 6    
+  srl d    
+  rra      
+; ----------
+  ld e,a   
+  sbc hl,de
+  jr nc,.sq
+  add hl,de
+  srl d    
+  rra      
+  ret      
+.sq:         
+  inc a    
+  srl d    
+  rra      
+  ret      
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Adapted from Axe
+;Input: HL
+;Output: D is the square root, cH is the remainder (c being the c flag), A is 0, B is 0, L is 0
+;speed: 758+8{0,6}
+;min: 758cc
+;max: 806cc
+;avg: 782cc
+;26 bytes
+sqrtHL:
+;p_Sqrt:
+	ld	a,l
+	ld	l,h
+	ld	de,#0040
+	ld	h,d
+	ld	b,8
+	or	a
+.Loop:
+	sbc	hl,de
+	jr	nc,.Skip
+	add	hl,de
+.Skip:
+	ccf
+	rl	d
+	add a,a
+	adc	hl,hl
+	add a,a
+	adc	hl,hl
+	djnz	.Loop
+	ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛRNDÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;Inputs: (seed1), (seed2), and (seed3) are 16-bit seeds. (seed1) and (seed2) can't both be 0.
+;Outputs: HL is the pseudorandom number
+;Destroys: A,DE,BC
+;cycle: 281,474,976,645,120
+;It would take about 185 years at 15MHz to repeat
+;min: 258cc (236cc if using ENABLE_SMC)
+;max: 288cc (266cc if using ENABLE_SMC)
+;avg: 273cc (251cc if using ENABLE_SMC)
+;63 bytes (62 bytes if using ENABLE_SMC)
+xsp32:
+ ifdef ENABLE_SMC
+.seed1 equ $+1
+  ld hl,12345
+.seed2 equ $+1
+  ld de,6789
+ else
+  ld hl,(.seed1)
+  ld de,(.seed2)
+ endif
+
+;first, XOR it with itself, shifted left 23 bits
+;low bit of d needs to be shifted in
+  ld a,h
+  rra
+  ld a,l
+  rra
+  jr nc,.skip1
+  rl e
+  ccf
+  rr e
+.skip1:
+  xor d
+  ld d,a
+
+;XOR it with itself, shifted right 15 bits
+  ld a,h
+  rla
+  ld a,e
+  rla
+  xor l
+  ld l,a
+
+  ld a,e
+  rla
+  ld a,d
+  rla
+  jr nc,.skip2
+  rr e
+  ccf
+  rl e
+.skip2:
+  xor h
+  ld h,a
+
+;XOR it with itself, shifted left 17 bits
+;HL<<1
+  ld (.seed1),hl
+  add hl,hl
+  ld a,h
+  xor d
+  ld h,a
+
+  ld a,l
+  xor e
+  ld l,a
+  ld (.seed2),hl
+  ex de,hl
+
+ ifdef ENABLE_SMC
+.seed3 equ $+1
+  ld hl,33333
+ else
+  ld hl,(.seed3)
+ endif
+
+  inc hl
+  inc h
+  ld (.seed3),hl
+  add hl,de
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;32-bit xorshift
+;seed^=seed<<23
+;seed^=seed>>15
+;seed^=seed<<17
+;min: 209cc (193cc if using ENABLE_SMC)
+;max: 239cc (223cc if using ENABLE_SMC)
+;avg: 224cc (208cc if using ENABLE_SMC)
+;53 bytes (52 bytes if using ENABLE_SMC)
+xs32:
+ ifdef ENABLE_SMC
+.seed1 equ $+1
+  ld hl,12345
+.seed2 equ $+1
+  ld de,6789
+ else
+  ld hl,(.seed1)
+  ld de,(.seed2)
+ endif
+
+;first, XOR it with itself, shifted left 23 bits
+;low bit of d needs to be shifted in
+  ld a,h
+  rra
+  ld a,l
+  rra
+  jr nc,.skip1
+  rl e
+  ccf
+  rr e
+.skip1:
+  xor d
+  ld d,a
+
+;XOR it with itself, shifted right 15 bits
+  ld a,h
+  rla
+  ld a,e
+  rla
+  xor l
+  ld l,a
+
+  ld a,e
+  rla
+  ld a,d
+  rla
+  jr nc,.skip2
+  rr e
+  ccf
+  rl e
+.skip2:
+  xor h
+  ld h,a
+
+;XOR it with itself, shifted left 17 bits
+;HL<<1
+  ld (.seed1),hl
+  add hl,hl
+  ld a,h
+  xor d
+  ld h,a
+
+  ld a,l
+  xor e
+  ld l,a
+  ld (.seed2),hl
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;You may use this routine, just be sure to credit John Metcalf!
+;Written by John Metcalf
+;   http://www.retroprogramming.com/2017/07/xorshift-pseudorandom-numbers-in-z80.html
+;
+; Annotated by Zeda Thomas, fixed typo (86 cycles==> 82 cycles)
+;Note: uses ENABLE_SMC (Self Modifying Code)
+; 16-bit xorshift pseudorandom number generator
+; 20 bytes, 82 cycles (excluding ret)
+; returns   hl = pseudorandom number
+; corrupts   a
+xrnd:
+  ld hl,1         ; Init the seed, must not be 0
+  ld a,h          ;\
+  rra             ; | Get the top bits of xs<<7 and xor with the top byte of HL
+  ld a,l          ; |        abcdefgh ijklmnop
+  rra             ; |       ^hijklmno 00000000
+  xor h           ; | Note that we still need to xor the 'p' with the top byte of l
+  ld h,a          ;/
+  ld a,l          ;\
+  rra             ; | we get 'p' in the carry flag, now shift that in when we do xs>>9
+  ld a,h          ; |        abcdefgh ijklmnop   (new value)
+  rra             ; |       ^00000000 pabcdefg
+  xor l           ; | the 'p' is leftover from the first step, so now Step 1 and 2 are done
+  ld l,a          ;/
+  xor h           ;\ Finally, xor the bottom byte with the top byte for step 3
+  ld h,a          ;/
+  ld (xrnd+1),hl  ; write back the new value as the next seed
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;This code snippet is 9 bytes and 43cc
+;Inputs:
+;   HL is the input seed and must be non-zero
+;Outputs:
+;   A is the 8-bit pseudo-random number
+;   HL is the new seed value (will be non-zero)
+rng8_very_very_fast:
+    add hl,hl
+    sbc a,a
+    and %0010'1101
+    xor l
+    ld l,a
+    ld a,r
+    add a,h
+    ret
+;-------------------------------------------------------------------------------
+;Technical details:
+;   The concept behind this routine is to combine an LFSR (poor RNG) with a
+; counter. The counter improves the RNG quality, while also extending the period
+; length.
+;   For this routine, I took advantage of the Z80's built-in counter, the `r`
+; register. This means that we don't need to store the counter anywhere, and it
+; is pretty fast to access!
+;   Some caveats:
+;     * r is a 7-bit counter
+;     * r will increment some number of times between runs of the RNG. In most
+;       cases, this will be constant, but if it increments an even number each
+;       time, then the bottom bit is always the same, weakening the effect of
+;       the counter. In the worst case, it increments a multiple of 128 times,
+;       effectively making your RNG just as good/bad as the LFSR. Ideally, you
+;       want `r` to increment an odd number of times between runs.
+;     * In the best case, the bottom 7 bits have 50/50 chance of being 0 or 1.
+;       The top bit is 1 with probability 1/2 + 1/(2^17-2) ~ .5000076295
+;     * In the event that your main loop waits for user input between calls,
+;       then congatulations, you might have a True RNG :)
+;-------------------------------------------------------------------------------
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Tested and passes all CAcert tests
+;Uses a very simple 32-bit LCG and 32-bit LFSR
+;it has a period of 18,446,744,069,414,584,320
+;roughly 18.4 quintillion.
+;LFSR taps: 0,2,6,7  = 11000101
+;291cc
+;Thanks to Runer112 for his help on optimizing the LCG and suggesting to try the much simpler LCG. On their own, the two are terrible, but together they are great.
+;58 bytes
+rand32:
+.seed1_0 equ $+1
+    ld hl,12345
+.seed1_1 equ $+1
+    ld de,6789
+    ld b,h
+    ld c,l
+    add hl,hl : rl e : rl d
+    add hl,hl : rl e : rl d
+    inc l
+    add hl,bc
+    ld (.seed1_0),hl
+    ld hl,(.seed1_1)
+    adc hl,de
+    ld (.seed1_1),hl
+    ex de,hl
+;;lfsr
+.seed2_0 equ $+1
+    ld hl,9876
+.seed2_1 equ $+1
+    ld bc,54321
+    add hl,hl : rl c : rl b
+    ld (.seed2_1),bc
+    sbc a,a
+    and %1100'0101
+    xor l
+    ld l,a
+    ld (.seed2_0),hl
+    ex de,hl
+    add hl,bc
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;;219cc
+rand24:
+ ifdef ENABLE_SMC
+.seed1_0 equ $+1
+    ld hl,12345
+.seed1_1 equ $+1
+    ld a,67
+ else
+    ld hl,(.seed1_0)
+    ld a,(.seed1_1)
+ endif
+    ld b,h
+    ld c,l
+    ld d,a
+    add hl,hl : rla
+    add hl,hl : rla
+    inc l
+    add hl,bc : adc a,0
+    ld (.seed1_0),hl
+    ld (.seed1_1),a
+    ld c,b
+    ld b,a
+ ifdef ENABLE_SMC
+.seed2_0 equ $+1
+    ld hl,65432
+.seed2_1 equ $+1
+    ld a,10
+ else
+    ld hl,(.seed2_0)
+    ld a,(.seed2_1)
+ endif
+    add hl,hl
+    rla
+    ld (.seed2_1),a
+    sbc a,a
+    and %1000'0111
+    xor l
+    ld l,a
+    ld (.seed2_0),hl
+    add hl,bc
+    ret
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;You may use this routine, just be sure to credit John Metcalf for the
+;xorshift16 part of this routine!
+
+; This routine is a fast Pseudo Random Number Generator
+;for the Z80. It combines a 16-bit LCG and 16-bit xorshift.
+;The xorshift routine was written by John Metcalf
+;and posted here:
+;   http://www.retroprogramming.com/2017/07/xorshift-pseudorandom-numbers-in-z80.html
+
+;174cc (or 186cc if not using ENABLE_SMC)
+;34 bytes
+;cycle length: 4,294,901,760 (almost 4.3 billion)
+
+; For the first seed, we use an LCG, 1+5*seed1 ==> seed1
+rand16:
+ ifdef ENABLE_SMC
+.seed1 equ $+1
+  	ld hl,9999
+ else
+    ld hl,(.seed1)
+ endif
+    ld b,h
+    ld c,l
+    add hl,hl
+    add hl,hl
+    inc l
+    add hl,bc
+    ld (.seed1),hl
+
+; For the second seed, we apply an xorshift
+;    seed2^(seed2<<7) ==> seed2
+;    seed2^(seed2>>9) ==> seed2
+;    seed2^(seed2<<8) ==> seed2
+; This code was originally made by John Metcalf and posted here:
+;     http://www.retroprogramming.com/2017/07/xorshift-pseudorandom-numbers-in-z80.html
+; (My modifications are only in naming and compiler directives.)
+
+ ifdef ENABLE_SMC
+.seed2 equ $+1
+	ld hl,9999
+ else
+    ld hl,(.seed2)
+ endif
+    ld a,h
+    rra
+    ld a,l
+    rra
+    xor h
+    ld h,a
+    ld a,l
+    rra
+    ld a,h
+    rra
+    xor l
+    ld l,a
+    xor h
+    ld h,a
+    ld (.seed2),hl
+    add hl,bc
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;collaboration by Zeda with Runer112
+;160cc or 148cc if using ENABLE_SMC
+;26 bytes
+;cycle: 4,294,901,760 (almost 4.3 billion)
+rand16:
+ ifdef ENABLE_SMC
+.seed1 equ $+1
+    ld hl,9999
+ else
+    ld hl,(.seed1)
+ endif
+    ld b,h
+    ld c,l
+    add hl,hl
+    add hl,hl
+    inc l
+    add hl,bc
+    ld (.seed1),hl
+ ifdef ENABLE_SMC
+.seed2 equ $+1
+    ld hl,9999
+ else
+    ld hl,(.seed2)
+ endif
+    add hl,hl
+    sbc a,a
+    and %00101101
+    xor l
+    ld l,a
+    ld (.seed2),hl
+    add hl,bc
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Returns A on [0,4]
+;Destroys: All
+;Notes:
+; This is a non-standard approach to generating random integers on [0,4].
+; If you have a truly random number generator that generates bits (0 or 1)
+; with equal probability, then standard approaches will still cause a slight
+; bias. ("Standard": "rand mod 5" or int(5*rand)). For example, suppose we
+; generate a 4-bit number. Then "rand mod 5" will cause 0 to be chosen
+; 4/16 times, while 1, 2, 3, and 4 will be chosen 3/16 times (on average).
+; A similar problem exists with int(5*rand). One way to mitigate this issue
+; is just generating infintely many bits, but apparently that is impractical,
+; so I came up with a compromise.
+;
+; My approach basically looks at the binary expansion of 1/5, 2/5, 3/5, and 4/5.
+;   1/5 = .0011001100110011...
+;   2/5 = .0110011001100110...
+;   3/5 = .1001100110011001...
+;   4/5 = .1100110011001100...
+;
+; So if I generate random bits and I get .001100, then a 0, then I know
+; that no matter what all of the rest of the bits are, the number is less than
+; 1/5, and so int(5*rand) is 0.
+;
+; By applying similar logic to the rest of the values, I can guarantee a uniform
+; distribution on [0,4]. But there are four cases where this process might
+; continue forever, specifically the cases that are like ...00110011...., but
+; lucky for us, this happens 4/inf= 0% of the time. In fact, on average it
+; takes 3 to 4 bits before the algorithm can assert which value to return.
+;
+; The one caveat is that on the Z80, we generally don't have truly random
+; numbers :| On the otherhand, it is easy enough to generate pseudo-random
+; bits with equal probability :)
+rand5:
+  call rand
+  ld a,h
+  and #C0
+  push af    ;save the original value
+  ld c,a
+.start:
+  push bc
+  call rand
+  pop bc
+  ld b,15    ;I set this to 15 because I like to guarantee a bit is available for rand10.
+.loop:
+  ld a,h
+  xor c
+  jp p,.end
+  add hl,hl
+  sla c
+  jr c,$+4
+  set 6,c
+  djnz .loop
+  jr .start
+.end:
+  pop af
+  rlca
+  rlca
+  sla h
+  adc a,0
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Returns A as a random integer on [0,9]
+;Destroys: All
+rand10:
+  call rand5
+  sla h
+  rla
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Generates a random TI float at HL
+rand_TI_Float:
+  push hl
+  ; call rand_init
+  ld de,#8000   ;D is exponent, E is type. E is used in .zero
+.get_rand_exponent_loop:
+;decrement exponent
+  dec d
+
+;if the exponent is -100, underflow to 0.
+;I don't think this is possible with this RNG, or even likely to ever happen
+;before the universe's heat death with a true RNG, but better to be safe?
+  ld a,d
+  cp 28
+  jp z,.zero
+
+;save the exponent
+  push de
+
+;Generate a uniform random digit on [0,9] as a candidate for our first digit.
+  call rand10
+
+;restore the exponent+type
+  pop de
+  or a
+;if A is 0, we'll decrement the exponent and find a new candidate for the first
+;digit. This is because we need our float to be "normalized" (top digit non-zero)
+;This also preserves the uniform distribution for values.
+  jr z,.get_rand_exponent_loop
+
+  pop hl
+  ld (hl),e
+  inc hl
+  ld (hl),d
+  inc hl
+
+;write the first digit
+  ld (hl),a
+  ld b,13
+.math_rand_loop:
+;now generate subsequent digits
+  push bc
+  rr b
+  jr c,$+3
+  inc hl
+  push hl
+
+;generate the next digit
+  call rand10
+  pop hl
+  rld
+  pop bc
+  djnz .math_rand_loop
+  ret
+
+.zero:
+  pop hl
+  ld b,9
+  ld (hl),e     ; E is 0
+  inc hl
+  djnz $-2
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+; Output is in HL
+; This rand routine combines Patrik Rak's fantastic 32-bit xorshift
+; (https://gist.github.com/raxoft/c074743ea3f926db0037) with a simple lcg for
+; extra smoothing.
+; It has a period of 281,474,976,645,120 (2^48-2^16) and uses 48 bits of state.
+; 42 bytes
+; 210cc
+rand:
+  ld hl,(.seed0)
+  ld b,h
+  ld c,l
+  add hl,hl
+  add hl,hl
+  inc l
+  add hl,bc
+  ld (.seed0),hl
+; xorshift
+  ld hl,(.seed1)     ; yw -> zt
+  ld de,(.seed1+2)   ; xz -> yw
+  ld (.seed1+2),hl   ; x = y, z = w
+  ld a,l            ; w = w ^ ( w << 3 )
+  add a,a
+  add a,a
+  add a,a
+  xor l
+  ld l,a
+  ld a,d         ; t = x ^ (x << 1)
+  add a,a
+  xor d
+  ld h,a
+  rra            ; t = t ^ (t >> 1) ^ w
+  xor h
+  xor l
+  ld h,e         ; y = z
+  ld l,a         ; w = t
+  ld (.seed1),hl
+; Mix the xorshift and the lcg
+  add hl,bc
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+; need to make sure seed1 is non-zero
+randinit:
+  ld hl,.seed1
+  ld a,(hl)
+  inc hl
+  or (hl)
+  inc hl
+  or (hl)
+  inc hl
+  or (hl)
+  ret nz
+  dec (hl)
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;;Output: A is an 8-bit pseudo-random number.
+lfsr64:
+    ld hl,.seed
+    sla (hl) : inc hl
+    rl (hl) : inc hl
+    rl (hl) : inc hl
+    rl (hl) : inc hl
+    rl (hl) : inc hl
+    rl (hl) : inc hl
+    rl (hl) : inc hl
+    rl (hl)
+    ret nc
+    ld a,(.seed)
+    xor %000011011
+    ld (.seed),a
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;13 bytes
+;72cc (66cc if using SMC)
+;period is 65535
+LFSR:
+ ifdef ENABLE_SMC
+.seed equ $+1
+    ld hl,9797
+ else
+    ld hl,(.seed)
+ endif
+    add hl,hl
+    sbc a,a
+    and %00101101
+    xor l
+    ld l,a
+    ld (seed),hl
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Input:
+;  (seed) has the seed value of the RNG
+;Output:
+;  (seed) is updated, HL is the result
+;Destroys:
+;  A,DE,BC
+;Timing:
+;  if seed>0        231cc or 232cc, condition dependent
+;  if seed=0        91cc
+;  if ENABLE_SMC defined   subtract 6cc
+;Size: 44 bytes
+;Notes:
+;    Uses the Lehmer RNG used by the Sinclair ZX81
+;    75x mod 65537 -> x
+lehmer:
+ ifndef ENABLE_SMC
+    ld hl,(.seed)
+ else
+.seed equ $+1
+    ld hl,0
+ endif
+;multiply by 75
+    ld c,l
+    ld b,h
+    xor a
+    adc hl,hl
+    jr z,.special
+    ld d,a : rla
+    add hl,hl : rla
+    add hl,hl : rla : add hl,bc : adc a,d
+    add hl,hl : rla
+    add hl,hl : rla : add hl,bc : adc a,d
+    add hl,hl : rla : add hl,bc
+;modulo 65537, see note below on how this works
+    ld e,a
+    sbc hl,de       ;No need to reset the c flag since it is already
+    jr nc,$+3
+    inc hl
+    ld (.seed),hl
+    ret
+.special:
+;In the case that HL=0, this should be interpreted as 65536 = -1 mod 65537, so return -75 mod 65537 = -74 mod 65536 in HL
+    ld hl,-74
+    ld (.seed),hl
+    ret
+;mod by 2^16 + 1 (a prime)
+;current form is A*2^16+HL
+;need:
+;  (A*2^16+HL) mod (2^16+1)
+;add 0 as +1-1
+;  (A*(2^16+1-1)+HL) mod (2^16+1)
+;distribute
+;  (A*(2^16+1)-A+HL) mod (2^16+1)
+;A*(2^16+1) mod 2^16+1 = 0, so remove
+;  (-A+HL) mod (2^16+1)
+;Oh hey, that's easy! :P
+;I use this trick everywhere, you should, too.
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛŠ®à­¨ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;A*A->A
+;Destroys: HL
+;76cc or 79cc or 82cc
+;Avg: 79cc
+;51 bytes
+sqrA:
+    add a,a
+    add a,a
+    jr nc,$+4
+    neg
+    rrca
+    rrca
+    ld l,a
+    srl l
+    ld h,.LUT/256
+    jr c,$+4
+    neg
+    add a,(hl)
+    ret
+;!FIXIT
+;MUST BE ALIGNED to a 256-byte boundary.
+;Can use:
+;  #if 0!=$&255
+;  .fill 256-($&255),0
+;  #endif
+.LUT:		DB	#00, #06, #14, #2A, #48, #6E, #9C, #D2
+		DB	#10, #56, #A4, #FA, #58, #BE, #2C, #A2
+		DB	#20, #A6, #34, #CA, #68, #0E, #BC, #72
+		DB	#30, #F6, #C4, #9A, #78, #5E, #4C, #42
+
+ ASSERT (low sqrLUT) = 0, "sqrLUT MUST BE ALIGNED to a 256-byte boundary!"
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Input: L
+;Output: L*L->A
+;147 t-states
+;36 bytes
+L_sqrd:
+    ld b,l
+	;First iteration, get the lowest 3 bits of -x^2
+    sla l
+    rrc b
+    sbc a,a
+    or l
+    ld c,a
+	;second iteration, get the next 2 bits of -x^2
+    rrc b
+    sbc a,a
+    xor l
+    and #F8
+    add a,c
+    ld c,a
+	;third iteration, get the next 2 bits of -x^2
+    sla l
+    rrc b
+    sbc a,a
+    xor l
+    and #E0
+    add a,c
+    ld c,a
+	;fourth iteration, get the eight bit of x^2
+    sla l
+    rrc b
+    sbc a,a
+    xor l
+    and #80
+    sub c
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ“¬­®¦¥­¨¥ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;This multiplies two 64-bit integers and returns a 128-bit result.
+;This requires the following routines:
+;   mul32
+;       Inputs: DEHL, BCIX
+;       Output: stored at z32_0, little-endian
+;Multiplies DE.HL by BC.IX, stores the result in DE.HL
+mulfixed16_16:
+; First, find out if the output is positive or negative
+  ld a,d
+  xor b
+  push af   ;sign bit is the result sign bit
+; Now make sure the inputs are positive
+  xor b     ;A now has the value of D, since I XORed it with B twice (cancelling)
+  jp p,.skip1   ;if Positive, don't negate
+  xor a
+  sub l
+  ld l,a
+  ld a,0
+  sbc a,h
+  ld h,a
+  ld a,0
+  sbc a,e
+  ld e,a
+  sbc a,a
+  sub d
+  ld d,a
+.skip1:
+  bit 7,b
+  jr z,.skip2
+  xor a
+  sub ixl
+  ld ixl,a
+  ld a,0
+  sbc a,ixh
+  ld ixh,a
+  ld a,0
+  sbc a,c
+  ld c,a
+  sbc a,a
+  sub b
+  ld b,a
+.skip2:
+; Now we multiply
+  call mul32
+;We should check for overflow. If the upper two bytes are non-zero, we will set the result to 0x7FFFFFFF
+  ld hl,(.z32_0+6)
+  ld a,h
+  or l
+;Get the middle four bytes and put them in DEHL
+  ld hl,(.z32_0+2)
+  ld de,(.z32_0+4)
+;Maybe we need to set the result to 0x7FFFFFFF
+  jr z,.skip3
+  ld de,#7FFF
+  ld h,e
+  ld l,e
+.skip3:
+; Now we need to restore the sign
+  pop af
+  ret p    ;don't need to do anything, result is already positive
+  xor a
+  ld b,a
+  sub l
+  ld l,a
+  ld a,b
+  sbc a,h
+  ld h,a
+  ld a,b
+  sbc a,e
+  ld e,a
+  sbc a,a
+  sub d
+  ld d,a
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;This multiplies two 64-bit integers and returns a 128-bit result.
+;This requires the following routines:
+;   mul32
+;       Inputs: DEHL, BCIX
+;       Output: stored at z32_0, little-endian
+;Multiplies DE.HL by BC.IX, stores the result in DE.HL
+mulfixed16_16:	; First, find out if the output is positive or negative
+		LD	A,D
+		XOR	B
+		PUSH	AF   ;sign bit is the result sign bit
+		; Now make sure the inputs are positive
+		XOR	B     ;A now has the value of D, since I XORed it with B twice (cancelling)
+		JP	P,.skip1   ;if Positive, don't negate
+		XOR	A
+		SUB	L
+		LD	L,A
+		LD	A,0
+		SBC	A,H
+		LD	H,A
+		LD	A,0
+		SBC	A,E
+		LD	E,A
+		SBC	A,A
+		SUB	D
+		LD	D,A
+.skip1:		BIT	7,B
+		JR	Z,.skip2
+		XOR	A
+		SUB	IXL
+		LD	IXL,A
+		LD	A,0
+		SBC	A,IXH
+		LD	IXH,A
+		LD	A,0
+		SBC	A,C
+		LD	C,A
+		SBC	A,A
+		SUB	B
+		LD	B,A
+.skip2:		; Now we multiply
+		CALL	mul32
+		;We should check for overflow. If the upper two bytes are non-zero, we will set the result to 0x7FFFFFFF
+		LD	HL,(.z32_0+6)
+		LD	A,H
+		OR	L
+		;Get the middle four bytes and put them in DEHL
+		LD	HL,(.z32_0+2)
+		LD	DE,(.z32_0+4)
+		;Maybe we need to set the result to 0x7FFFFFFF
+		JR	Z,.skip3
+		LD	DE,#7FFF
+		LD	H,E
+		LD	L,E
+.skip3:		; Now we need to restore the sign
+		POP	AF
+		RET	P    ;don't need to do anything, result is already positive
+		XOR	A
+		LD	B,A
+		SUB	L
+		LD	L,A
+		LD	A,B
+		SBC	A,H
+		LD	H,A
+		LD	A,B
+		SBC	A,E
+		LD	E,A
+		SBC	A,A
+		SUB	D
+		LD	D,A
+		RET
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Requires:
+;   mul16
+;     Inputs: BC,DE
+;     Output: DEHL
+;Multiplies 4.12 fixed point numbers.
+;Inputs: HL is the first fixed-point multiplicand
+;        DE is the second fixed-point multiplicand
+;Output: HL is the fixed-point output
+;Overflow is stored as 0x7.FFF or 0x8.001 depending on positive or negative
+mulfixed4_12:
+; First, find out if the output is positive or negative
+  ld a,h
+  xor d
+  push af   ;sign bit is the result sign bit
+; Now make sure the inputs are positive
+  xor d     ;A now has the value of H, since I XORed it with D twice (cancelling)
+  jp p,.skip1   ;if Positive, don't negate
+  xor a
+  sub l
+  ld l,a
+  sbc a,a
+  sub h
+  ld h,a
+.skip1:
+  bit 7,d
+  jr z,.skip2
+  xor a
+  sub e
+  ld e,a
+  sbc a,a
+  sub d
+  ld d,a
+.skip2:
+; Now we need to put DE in BC to use mul16
+  ld b,h
+  ld c,l
+  call mul16
+;The result doesn't need the top 4 bits or bottom 12 bits.
+;We'll hold onto the top 4 bits to check overflow, though.
+;Currently we need to shift DEH left by 4 bits and keep DE, or right by 12 bits and keep HL.
+  ld a,h    ;we'll actually be moving the discared bits into A
+  and #F0
+  ex de,hl
+  rla : adc hl,hl
+  rla : adc hl,hl
+  rla : adc hl,hl
+  rla : adc hl,hl
+  adc a,a
+;if A is non-zero, we have overflow
+  jr z,.skip3
+  ld hl,#7FFF
+.skip3:
+; Now we need to restore the sign
+  pop af
+  ret p    ;don't need to do anything, result is already positive
+  xor a
+  sub l
+  ld l,a
+  sbc a,a
+  sub h
+  ld h,a
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Multiplies H.L by D.E, stores the result in H.L
+mulfixed_88:
+; First, find out if the output is positive or negative
+  ld a,h
+  xor d
+  push af   ;sign bit is the result sign bit
+
+; Now make sure the inputs are positive
+  xor d     ;A now has the value of H, since I XORed it with D twice (cancelling)
+  jp p,.skip1   ;if Positive, don't negate
+  xor a
+  sub l
+  ld l,a
+  sbc a,a
+  sub h
+  ld h,a
+.skip1:
+  bit 7,d
+  jr z,.skip2
+  xor a
+  sub e
+  ld e,a
+  sbc a,a
+  sub d
+  ld d,a
+.skip2:
+; Now we need to put HL in BC to use mul16
+  ld b,h
+  ld c,l
+  call mul16
+
+;Need to round, so get the top bit of L
+  sla l
+
+;Get the middle two bytes, EH, and put them in HL
+  ld l,h
+  ld h,e
+
+  ld a,d
+  ld de,0
+  adc hl,de
+
+;check for overflow!
+;We should check for overflow. If A>0, we will set HL to 0x7FFF
+  adc a,e
+  jr c,$+4
+  jr z,.skip3
+  ld hl,#7FFF
+.skip3:
+
+; Now we need to restore the sign
+  pop af
+  ret p    ;don't need to do anything, result is already positive
+  xor a
+  sub l
+  ld l,a
+  sbc a,a
+  sub h
+  ld h,a
+  ret
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;This multiplies two 64-bit integers and returns a 128-bit result.
+;This requires the following routines:
+;   mul32 ;!TEST
+;       Inputs: DEHL, BCIX
+;       Output: stored at z32_0, little-endian
+;
+;   Defined:
+;       inp64_1 is where the first 64-bit multiplicand is located, little-endian
+;       inp64_2 is where the second 64-bit multiplicand is located, little-endian
+;       out128  is where the 128-bit result is stored
+;   Uses 8 additional bytes after out128
+
+;multiplies the 64-bit integers at inp64_1 and inp64_2
+;stores the 128-bit (16-byte) result at out128
+;
+;min: 1740+3*min(mul32)
+;     5631cc
+;max: 1901+3*max(mul32)
+;     10013cc
+;avg: 1797+3*avg(mul32) + 9572881/2^24
+;    ~8720.733cc
+;uses 24 bytes at out128
+mul64:
+.z64_0 EQU out128
+.z64_2 EQU .z64_0+8
+.z32_0 EQU .z64_2+8
+
+  ld de,(.inp64_1+6)
+  ld hl,(.inp64_1+4)
+  ld bc,(.inp64_2+6)
+  ld ix,(.inp64_2+4)
+  call mul32
+  ;copy the 8 bytes at z32_0 to z64_2
+  ld hl,.z32_0
+  ld de,.z64_2
+  call .mov8
+
+  ld de,(.inp64_1+2)
+  ld hl,(.inp64_1)
+  ld bc,(.inp64_2+2)
+  ld ix,(.inp64_2)
+  call mul32
+  ;copy the 8 bytes at z32_0 to z64_0
+  ld hl,.z32_0
+  ld de,.z64_0
+  call .mov8
+
+;now I need to subtract the 32-bit digits from each other
+  xor a
+  ld hl,(.inp64_1)
+  ld bc,(.inp64_1+4)
+  sbc hl,bc
+  ex de,hl
+  ld hl,(.inp64_1+2)
+  ld bc,(.inp64_1+6)
+  sbc hl,bc
+  jr nc,.skip1
+  ld b,a : sub e : ld e,a
+  ld a,b : sbc a,d : ld d,a
+  ld a,b : sbc a,l : ld l,a
+  ld a,b : sbc a,h : ld h,a
+  ld a,b
+.skip1:
+  rla
+  push hl   ;top byte
+  push de
+
+  ld hl,(.inp64_2)
+  ld bc,(.inp64_2+4)
+  sbc hl,bc
+  ex de,hl
+  ld hl,(.inp64_2+2)
+  ld bc,(.inp64_2+6)
+  sbc hl,bc
+  jr nc,.skip2
+  ld c,a
+  xor a
+  ld b,a
+  sub e : ld e,a
+  ld a,b : sbc a,d : ld d,a
+  ld a,b : sbc a,l : ld l,a
+  ld a,b : sbc a,h : ld h,a
+  ld a,c
+  inc a
+.skip2:
+  ex de,hl
+  pop ix
+  pop bc
+  push af
+  call mul32
+  pop af    ;holds the sign in the low bit
+
+  rra
+  jp c,.add
+;need to perform z0+z2-result
+  xor a
+  ld hl,(.z64_0)
+  ld de,(.z64_2)
+  add hl,de
+  ld (.inp64_1),hl
+  ld hl,(.z64_0+2)
+  ld de,(.z64_2+2)
+  adc hl,de
+  ld (.inp64_1+2),hl
+  ld hl,(.z64_0+4)
+  ld de,(.z64_2+4)
+  adc hl,de
+  ld (.inp64_1+4),hl
+  ld hl,(.z64_0+6)
+  ld de,(.z64_2+6)
+  adc hl,de
+  ld (.inp64_1+6),hl
+  rla
+;now need to subtract
+  ld hl,(.inp64_1)
+  ld de,(.z32_0)
+  sbc hl,de
+  ld (.inp64_1),hl
+  ld hl,(.inp64_1+2)
+  ld de,(.z32_0+2)
+  sbc hl,de
+  ld (.inp64_1+2),hl
+  ld hl,(.inp64_1+4)
+  ld de,(.z32_0+4)
+  sbc hl,de
+  ld (.inp64_1+4),hl
+  ld hl,(.inp64_1+6)
+  ld de,(.z32_0+6)
+  sbc hl,de
+  ld (.inp64_1+6),hl
+  sbc a,0
+.final:
+;now need to add it back in
+  ld hl,(z64_0+4)
+  ld de,(.inp64_1)
+  add hl,de
+  ld (z64_0+4),hl
+  ld hl,(z64_0+6)
+  ld de,(.inp64_1+2)
+  adc hl,de
+  ld (z64_0+6),hl
+  ld hl,(z64_0+8)
+  ld de,(.inp64_1+4)
+  adc hl,de
+  ld (z64_0+8),hl
+  ld hl,(z64_0+10)
+  ld de,(.inp64_1+6)
+  adc hl,de
+  ld (z64_0+10),hl
+  ld hl,z64_0+12
+  adc a,(hl)
+  ld (hl),a
+  ret nc
+  inc hl : inc (hl) : ret nz
+  inc hl : inc (hl) : ret nz
+  inc hl : inc (hl) : ret
+.add:
+;add to the current result
+;z0+z2+result
+  xor a
+  ld hl,(z64_0)
+  ld de,(z64_2)
+  add hl,de
+  ld (.inp64_1),hl
+  ld hl,(z64_0+2)
+  ld de,(z64_2+2)
+  adc hl,de
+  ld (.inp64_1+2),hl
+  ld hl,(z64_0+4)
+  ld de,(z64_2+4)
+  adc hl,de
+  ld (.inp64_1+4),hl
+  ld hl,(z64_0+6)
+  ld de,(z64_2+6)
+  adc hl,de
+  ld (.inp64_1+6),hl
+  rla
+;now need to subtract
+  ld hl,(.inp64_1)
+  ld de,(.z32_0)
+  add hl,de
+  ld (.inp64_1),hl
+  ld hl,(.inp64_1+2)
+  ld de,(.z32_0+2)
+  adc hl,de
+  ld (.inp64_1+2),hl
+  ld hl,(.inp64_1+4)
+  ld de,(.z32_0+4)
+  adc hl,de
+  ld (.inp64_1+4),hl
+  ld hl,(.inp64_1+6)
+  ld de,(.z32_0+6)
+  adc hl,de
+  ld (.inp64_1+6),hl
+  adc a,0
+  jp .final
+
+.mov8:	LDI
+	LDI
+	LDI
+	LDI
+	LDI
+	LDI
+	LDI
+	LDI
+	RET
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Requires:
+;   mul16 ;!TEST
+;     Inputs: BC,DE
+;     Output: DEHL
+;max: 703cc  + 3*mul16
+;     2704cc
+;min: 655cc  + 3*mul16
+;     1297cc
+;avg: 673.25cc+3*mul16
+;     2307.911cc
+;DEHL * BCIX ==> .z32_0
+mul32:
+  push de
+  push bc
+  push hl
+  push ix
+  call mul16  ;DEHL
+  ld (.z32_2),hl
+  ld (.z32_2+2),de
+
+  pop de
+  pop bc
+  push de
+  call mul16  ;DEHL
+  ld (.z32_0),hl
+  ld (.z32_0+2),de
+
+  pop de    ;low word
+  pop hl
+  xor a
+  sbc hl,de
+  jr nc,.skip1
+  sub l
+  ld l,a
+  sbc a,a
+  sub h
+  ld h,a
+  xor a
+  inc a
+.skip1:
+  ex de,hl
+  pop hl
+  sbc hl,bc
+  jr nc,.skip2
+  ld b,a
+  xor a
+  sub l
+  ld l,a
+  sbc a,a
+  sub h
+  ld h,a
+  ld a,b
+  inc a
+.skip2:
+  ld b,h
+  ld c,l
+  push af
+  call mul16
+  pop af    ;holds the sign in the low bit
+  rra
+  jr c,.add
+;need to perform z0+z2-result
+  push de
+  push hl
+  xor a
+  ld hl,(.z32_0)
+  ld bc,(.z32_2)
+  add hl,bc
+  ex de,hl
+  ld hl,(.z32_0+2)
+  ld bc,(.z32_2+2)
+  adc hl,bc
+  rla
+;now need to subtract
+  ex de,hl
+  pop bc
+  sbc hl,bc
+  ex de,hl
+  pop bc
+  sbc hl,bc
+  sbc a,0
+;A:HL:DE is the result, need to add to z32_0+2
+.final:
+  ld bc,(.z32_0+2)
+  ex de,hl
+  add hl,bc
+  ld (.z32_0+2),hl
+  ld hl,(.z32_2)
+  adc hl,de
+  ld (.z32_2),hl
+  ld hl,z32_2+2
+  adc a,(hl)
+  ld (hl),a
+  ret nc
+  inc hl
+  inc (hl)
+  ret
+.add:
+;add to the current result
+  xor a
+  ld bc,(.z32_0)
+  add hl,bc
+  ex de,hl
+  ld bc,(.z32_0+2)
+  adc hl,bc
+  rla
+  ex de,hl
+  ld bc,(.z32_2)
+  add hl,bc
+  ex de,hl
+  ld bc,(.z32_2+2)
+  adc hl,bc
+  adc a,0
+  jp .final
+  ;
+.z32_0:	DS 4
+.z32_2:	DS 4
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;BDE*CHL -> HLBCDE
+;155 bytes
+;402+3*C_Times_BDE
+;fastest:1201cc
+;slowest:1753cc
+;avg: 1464.9033203125cc (1464+925/1024)
+;min: 825cc
+;max: 1926cc
+;avg: 1449.63839751681cc
+mul24:
+    push bc
+    ld c,l
+    push hl
+    call C_Times_BDE
+    ld (.var48),hl
+    ld l,a
+    ld h,c
+    ld (.var48+2),hl
+
+    pop hl
+    ld c,h
+    call C_Times_BDE
+    push bc
+    ld bc,(.var48+1)
+    add hl,bc
+    ld (.var48+1),hl
+    pop bc
+    ld b,c
+    ld c,a
+    ld hl,(.var48+3)
+    ld h,0
+    adc hl,bc
+    ld (.var48+3),hl
+
+    pop bc
+    call C_Times_BDE
+    ld de,(.var48+2)
+    add hl,de
+    ld (.var48+2),hl
+    ld d,c
+    ld e,a
+    ld b,h
+    ld c,l
+    ld hl,(.var48+4)
+    ld h,0
+    adc hl,de
+    ld de,(.var48)
+    ret
+
+.var48: DS 6
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;This was made by Runer112
+;Tested by jacobly
+;BC*DE --> DEHL
+; ~544.887cc as calculated in jacobly's test
+;min: 214cc  (DE = 1)
+;max: 667cc
+;avg: 544.4507883cc   however, deferring to jacobly's result as mine may have math issues ?
+;177 bytes
+mul16:		LD	A,D
+		LD	D,0
+		LD	H,B
+		LD	L,C
+		ADD	A,A : JR C,.Bit14
+		ADD	A,A : JR C,.Bit13
+		ADD	A,A : JR C,.Bit12
+		ADD	A,A : JR C,.Bit11
+		ADD	A,A : JR C,.Bit10
+		ADD	A,A : JR C,.Bit9
+		ADD	A,A : JR C,.Bit8
+		ADD	A,A : JR C,.Bit7
+		LD	A,E
+		AND	%11111110
+		ADD	A,A : JR C,.Bit6
+		ADD	A,A : JR C,.Bit5
+		ADD	A,A : JR C,.Bit4
+		ADD	A,A : JR C,.Bit3
+		ADD	A,A : JR C,.Bit2
+		ADD	A,A : JR C,.Bit1
+		ADD	A,A : JR C,.Bit0
+		RR	E
+		RET	C
+		LD	H,D
+		LD	L,E
+		RET
+		;
+.Bit14:		ADD	HL,HL : ADC A,A : JR NC,.Bit13 : ADD HL,BC : ADC A,D
+.Bit13:		ADD	HL,HL : ADC A,A : JR NC,.Bit12 : ADD HL,BC : ADC A,D
+.Bit12:		ADD	HL,HL : ADC A,A : JR NC,.Bit11 : ADD HL,BC : ADC A,D
+.Bit11:		ADD	HL,HL : ADC A,A : JR NC,.Bit10 : ADD HL,BC : ADC A,D
+.Bit10:		ADD	HL,HL : ADC A,A : JR NC,.Bit9  : ADD HL,BC : ADC A,D
+.Bit9:		ADD	HL,HL : ADC A,A : JR NC,.Bit8  : ADD HL,BC : ADC A,D
+.Bit8:		ADD	HL,HL : ADC A,A : JR NC,.Bit7  : ADD HL,BC : ADC A,D
+.Bit7:		LD	D,A
+		LD	A,E
+		AND	%11111110
+		ADD	HL,HL : ADC A,A : JR NC,.Bit6 : ADD HL,BC : ADC A,0
+.Bit6:		ADD	HL,HL : ADC A,A : JR NC,.Bit5 : ADD HL,BC : ADC A,0
+.Bit5:		ADD	HL,HL : ADC A,A : JR NC,.Bit4 : ADD HL,BC : ADC A,0
+.Bit4:		ADD	HL,HL : ADC A,A : JR NC,.Bit3 : ADD HL,BC : ADC A,0
+.Bit3:		ADD	HL,HL : ADC A,A : JR NC,.Bit2 : ADD HL,BC : ADC A,0
+.Bit2:		ADD	HL,HL : ADC A,A : JR NC,.Bit1 : ADD HL,BC : ADC A,0
+.Bit1:		ADD	HL,HL : ADC A,A : JR NC,.Bit0 : ADD HL,BC : ADC A,0
+.Bit0:		ADD	HL,HL
+		ADC	A,A
+		JR	C,.FunkyCarry
+		RR	E
+		LD	E,A
+		RET	NC
+		ADD	HL,BC
+		RET	NC
+		INC	E
+		RET	NZ
+		INC	D
+		RET
+		;
+.FunkyCarry:	INC	D
+		RR	E
+		LD	E,A
+		RET	NC
+		ADD	HL,BC
+		RET	NC
+		INC	E
+		RET
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs: H,E
+;Outputs: HL is the product, D is 0
+;Destroys: A
+;187+6{0,6}+{0,15}
+;min: 187cc
+;max: 238cc
+;avg: 212.5cc
+;35 bytes
+H_Times_E:
+    ld d,0
+    sla h
+    sbc a,a
+    and e
+    ld l,a
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : ret nc : add hl,de
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+H_Times_E_No_A:
+;Inputs: H,E
+;Outputs: HL is the product, D is 0
+;190+6{0,6}+{0,15}+{0,1}
+;min: 190cc
+;max: 242
+;avg: 216
+;36 bytes
+    ld d,0
+    ld l,d
+    sla h : jr nc,$+3 : ld l,e
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : jr nc,$+3 : add hl,de
+    add hl,hl : ret nc : add hl,de
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+HL_Times_128:
+  xor a
+  rr h
+  rr l
+  rra
+  ld h,l
+  ld l,a
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;NOTE: This is a set of in-line routines!
+; Input: HL
+; Output: BC is the input, HL is 12 times the input
+; 6 bytes, 52cc
+HL_Times_12
+  ld b,h
+  ld c,l
+  add hl,hl
+  add hl,bc
+  add hl,hl
+  add hl,hl
+;Destroys only register E and F
+; Input: HL <= 85,
+; 8 bytes, 46cc
+   ld e,a
+   ld a,l
+   add a,a   ; hl*2
+   add a,l   ; hl*3
+   ld l,a
+   ld a,e
+   add hl,hl ; hl*6
+   add hl,hl ; hl*12
+;Destroys only register E and F
+; Input: HL <= 85,
+; 7 bytes, 55cc
+   ld e,l
+   add hl,hl ; hl*2
+   add hl,de ; hl*3+d*256
+   ld h,0    ; hl*3
+   add hl,hl ; hl*6
+   add hl,hl ; hl*12
+   RET
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs:
+;   DEBC is a 32-bit multiplicand
+;   A is an 8-bit multiplicand
+;Outputs:
+;   AHLIX is the 40-bit result
+;   carry reset
+;   z set if top 8 bits are 0
+;   sign flag set as expected
+;===============================================================
+;503+8{0,41}
+;min: 503cc
+;max: 831cc
+;avg: 667cc
+;29 bytes
+DEBC_Times_A:
+  ld hl,0
+  ld ix,0
+  call .iter3
+.iter3:
+;231+4{0,41}
+  call .iter2
+.iter2:
+;107+2{0,41}
+  call .iter1
+.iter1:
+;45+{0,41}
+  add ix,ix
+  adc hl,hl
+  adc a,a
+  ret nc
+  add ix,bc
+  adc hl,de
+  adc a,0
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs:
+;     DE and A are factors
+;Outputs:
+;     A is not changed
+;     B is 0
+;     C is not changed
+;     DE is not changed
+;     HL is the product
+;Time:
+;     342+6x
+;13 bytes
+DE_Times_A:
+  ld b,8
+  ld hl,0
+.loop:
+  add hl,hl
+  rlca
+  jr nc,$+3
+  add hl,de
+  djnz .loop
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Input: DE,A
+;Output: A:HL is the product, C=0, B,DE unaffected, z flag set if result is zero, c flag set if A is input as 1, else nc.
+;A:128~255 219+6{0,10}+{0,19}    avg=258.5   *1/2
+;A:64~127  203+5{0,10}+{0,19}    avg=237.5   *1/4
+;A:32~63   187+4{0,10}+{0,19}    avg=216.5   *1/8
+;A:16~31   171+3{0,10}+{0,19}    avg=195.5   *1/16
+;A:8~15    155+2{0,10}+{0,19}    avg=174.5   *1/32
+;A:4~7     139+{0,10}+{0,19}     avg=153.5   *1/64
+;A:2~3     123+{0,19}            avg=132.5   *1/128
+;A:1       107cc                 avg=107     *1/256
+;A:0       119cc                 avg=119     *1/256
+;overall avg: 237.671875cc
+DE_Times_A_v1:
+    ld c,0
+    ld h,d
+    ld l,e
+    add a,a : jr c,.mul_07
+    rla : jr c,.mul_06
+    rla : jr c,.mul_05
+    rla : jr c,.mul_04
+    rla : jr c,.mul_03
+    rla : jr c,.mul_02
+    rla : jr c,.mul_01
+    rla
+    ret c
+    ld h,a
+    ld l,a
+    ret
+.mul_07:
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,c
+.mul_06:
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,c
+.mul_05:
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,c
+.mul_04:
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,c
+.mul_03:
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,c
+.mul_02:
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,c
+.mul_01:
+    add hl,hl : rla : ret nc : add hl,de : adc a,c
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+DE_Times_A_v2:
+;DE*A ==> AHL
+    ld hl,0
+    ld b,h
+    add a,a : jr nc,$+5 : ld h,d : ld l,e
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,b
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,b
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,b
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,b
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,b
+    add hl,hl : rla : jr nc,$+4 : add hl,de : adc a,b
+    add hl,hl : rla : ret nc : add hl,de : adc a,b
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;C*BDE => CAHL
+;C = 0     157
+;C = 1     141
+;141+
+;C>=128    135+6{0,33+{0,1}}+{0,20+{0,8}}
+;C>=64     115+5{0,33+{0,1}}+{0,20+{0,8}}
+;C>=32     95+4{0,33+{0,1}}+{0,20+{0,8}}
+;C>=16     75+3{0,33+{0,1}}+{0,20+{0,8}}
+;C>=8      55+2{0,33+{0,1}}+{0,20+{0,8}}
+;C>=4      35+{0,33+{0,1}}+{0,20+{0,8}}
+;C>=2      15+{0,20+{0,8}}
+;min: 141cc
+;max: 508cc
+;avg: 349.21279907227cc
+C_times_BDE:
+  ld a,b
+  ld h,d
+  ld l,e
+  sla c : jr c,.mul8_24_1
+  sla c : jr c,.mul8_24_2
+  sla c : jr c,.mul8_24_3
+  sla c : jr c,.mul8_24_4
+  sla c : jr c,.mul8_24_5
+  sla c : jr c,.mul8_24_6
+  sla c : jr c,.mul8_24_7
+  sla c : ret c
+  ld a,c
+  ld h,c
+  ld l,c
+  ret
+.mul8_24_1:
+    add hl,hl : rla : rl c : jr nc,$+7 : add hl,de : adc a,b : jr nc,$+3 : inc c
+.mul8_24_2:
+    add hl,hl : rla : rl c : jr nc,$+7 : add hl,de : adc a,b : jr nc,$+3 : inc c
+.mul8_24_3:
+    add hl,hl : rla : rl c : jr nc,$+7 : add hl,de : adc a,b : jr nc,$+3 : inc c
+.mul8_24_4:
+    add hl,hl : rla : rl c : jr nc,$+7 : add hl,de : adc a,b : jr nc,$+3 : inc c
+.mul8_24_5:
+    add hl,hl : rla : rl c : jr nc,$+7 : add hl,de : adc a,b : jr nc,$+3 : inc c
+.mul8_24_6:
+    add hl,hl : rla : rl c : jr nc,$+7 : add hl,de : adc a,b : jr nc,$+3 : inc c
+.mul8_24_7:
+    add hl,hl : rla : rl c : ret nc : add hl,de : adc a,b : ret nc : inc c
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ„¥«¥­¨¥ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;Signed division CHL/DE by Zeda, inspired by code from matrefeytontias.
+;signed CHL/DE
+;signed CHL/DE ==> CHL, |remainder| is DE
+sdiv24_16:
+;Get the sign of the result
+  ld a,c
+  xor d
+  push af
+
+;Make BHL positive
+  xor d
+  jp p,.skip1
+  xor a
+  sub l
+  ld l,a
+  ld a,0
+  sbc a,h
+  ld h,a
+  sbc a,a
+  sub c
+  ld c,a
+.skip1:
+
+;make DE negative
+  bit 7,d
+  jr z,.skip2   ;setting DE negative
+  xor a
+  sub e
+  ld e,a
+  sbc a,a
+  sub d
+  ld d,a
+  ld a,c
+.skip2:
+
+  ld b,24
+  push hl
+  pop ix
+  ld hl,0
+
+.loop:
+  add ix,ix
+  rla
+  adc hl,hl
+  add hl,de
+  jr c,.skip3
+  sbc hl,de
+		DB	#DA     ;start or `jp c,**`
+.skip3:
+  inc ixl
+  djnz .loop
+  ld c,a
+  ex de,hl    ;DE is remainder
+
+  push ix
+  pop hl
+
+;restore sign
+  pop af
+  ret p
+  xor a
+  sub l
+  ld l,a
+  ld a,b
+  sbc a,h
+  ld h,a
+  sbc a,a
+  sub c
+  ld c,a
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Adapted from Axe
+p_SDiv:
+	ld	a,h
+	xor	d
+	push	af
+	xor	d
+	jp	p,.Skip1
+	xor	a
+	sub	l
+	ld	l,a
+	sbc	a,a
+	sub	h
+	ld	h,a
+.Skip1:
+	bit	7,d
+	jr	z,.Skip2
+	xor	a
+	sub	e
+	ld	e,a
+	sbc	a,a
+	sub	d
+	ld	d,a
+.Skip2:
+	call	div16       ;normal routine division
+	pop	af
+	ret	p
+	xor	a
+	sub	l
+	ld	l,a
+	sbc	a,a
+	sub	h
+	ld	h,a
+	ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Input: HLDE is numerator, C<129 is the divisor.
+;Output: HLDE is quotient, A is remainder, C is negated
+;1021+4{0,15}
+;min: 1021cc
+;max: 1081cc
+;min: 1051cc
+;87 bytes
+HLDE_Div_C:
+  xor a
+  sub c
+  ld c,a
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Note: -C<129
+;1009+4{0,15}
+;min: 1009cc
+;max: 1069cc
+;min: 1039cc
+;84 bytes
+HLDE_Div_negC:
+  xor a
+  call .div
+  ld b,h
+
+  ld h,l
+  call .div
+  ld l,h
+
+  ld h,d
+  call .div
+  ld d,h
+
+  ld h,e
+  call .div
+  ld e,h
+
+  ld h,b
+  rl e
+  rl d
+  adc hl,hl
+  ret
+
+;216+7{0,1}+{0,8}
+;min: 216cc
+;max: 231cc
+;avg: 224.5cc
+.div:
+  rl h : rla  : add a,c : jr c,$+3 : sub c
+  rl h : rla  : add a,c : jr c,$+3 : sub c
+  rl h : rla  : add a,c : jr c,$+3 : sub c
+  rl h : rla  : add a,c : jr c,$+3 : sub c
+  rl h : rla  : add a,c : jr c,$+3 : sub c
+  rl h : rla  : add a,c : jr c,$+3 : sub c
+  rl h : rla  : add a,c : jr c,$+3 : sub c
+  rl h : rla  : add a,c : ret c : sub c
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Written by calc84maniac, based on a routine from Zeda
+;===============================================================
+;===============================================================
+;Performs HL/BC
+;Speed:   1168 to 1318 cycles depending on how many set bits in the result
+;         add 19 if HL is negative
+;         add 19 if BC is positive
+;         add another 28 if only one is negative
+;Size:    54 bytes
+;         **31 bytes larger than the regular HL_Div_BC
+;Inputs:
+;     HL is the numerator
+;     BC is the denominator
+;Outputs:
+;     HL is the quotient
+;     DE is the remainder
+;     BC = -abs(BC)
+;===============================================================
+HL_SDiv_BC:
+     ld a,h
+     xor b
+     push af
+.absHL:
+     add hl,hl
+     jr nc,.negabsBC
+     xor a : sub l : ld l,a
+     sbc a,a : sub h : ld h,a
+.negabsBC:
+     bit 7,b
+     jr nz,$+8
+     xor a : sub c : ld c,a
+     sbc a,a : sub b : ld b,a
+       ex de,hl
+       xor a
+       ld h,a
+       ld l,a
+       ld a,15
+.Div_Loop_1:
+         rl e : rl d
+         adc hl,hl
+         add hl,bc
+         jr c,$+4
+          sbc hl,bc
+         dec a
+         jr nz,.Div_Loop_1
+       ex de,hl
+       adc hl,hl
+       pop af : ret p
+     xor a : sub l : ld l,a
+     sbc a,a : sub h : ld h,a
+     ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs:
+;     HL is the numerator
+;     C<128 is the denominator
+;Outputs:
+;     A is twice the remainder of the unrounded value
+;     B is 0
+;     C is not changed
+;     DE is not changed
+;     HL is the rounded quotient
+;     c flag set means no rounding was performed
+;            reset means the value was rounded
+HL_Div_C_round:
+       ld b,16
+       xor a
+         add hl,hl
+         rla
+         cp c
+         jr c,$+4
+           inc l
+           sub c
+         djnz $-7
+       add a,a
+       cp c
+       ret c
+       inc hl
+       ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;I'm not postive on the timing. 
+;min: 203
+;max: 308
+;avg: 236.125
+HL_Div_B:
+  add hl,hl
+  ld a,h
+  jr c,.div16_8_2_0
+  cp b
+  jr c,$+4
+  sub b : inc l
+  sla l : rla
+  jr c,.div16_8_2_1
+.div16_8_1_1:
+  cp b
+  jr c,$+4
+  sub b : inc l
+  sla l : rla
+  jr c,.div16_8_2_2
+.div16_8_1_2:
+  cp b
+  jr c,$+4
+  sub b : inc l
+  sla l : rla
+  jr c,.div16_8_2_3
+.div16_8_1_3:
+  cp b
+  jr c,$+4
+  sub b : inc l
+  sla l : rla
+  jr c,.div16_8_2_4
+.div16_8_1_4:
+  cp b
+  jr c,$+4
+  sub b : inc l
+  sla l : rla
+  jr c,.div16_8_2_5
+.div16_8_1_5:
+  cp b
+  jr c,$+4
+  sub b : inc l
+  sla l : rla
+  jr c,.div16_8_2_6
+.div16_8_1_6:
+  cp b
+  jr c,$+4
+  sub b : inc l
+  sla l : rla
+  jr c,.div16_8_2_7
+.div16_8_1_7:
+  cp b : ret c : sub b : inc l
+  ret
+
+.div16_8_2_0:
+  sub b : rl l : rla : jr nc,.div16_8_1_1
+.div16_8_2_1:
+  sub b : rl l : rla : jr nc,.div16_8_1_2
+.div16_8_2_2:
+  sub b : rl l : rla : jr nc,.div16_8_1_3
+.div16_8_2_3:
+  sub b : rl l : rla : jr nc,.div16_8_1_4
+.div16_8_2_4:
+  sub b : rl l : rla : jr nc,.div16_8_1_5
+.div16_8_2_5:
+  sub b : rl l : rla : jr nc,.div16_8_1_6
+.div16_8_2_6:
+  sub b : rl l : rla : jr nc,.div16_8_1_7
+.div16_8_2_7:
+  sub b : inc l
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+HL_Div_384:
+;223cc
+  ;(HL+HL*5*17*2)/256
+  push hl
+  ld b,h
+  ld c,l
+  xor a
+  add hl,hl : rl a
+  add hl,hl : rl a
+  add hl,bc : adc a,0
+  ld d,a
+  ld b,h
+  ld c,l
+  add hl,hl : rl a
+  add hl,hl : rl a
+  add hl,hl : rl a
+  add hl,hl : rl a
+  add hl,bc : adc a,d
+  add hl,hl : rla
+  pop de
+  add hl,hl : rl a
+  sla l
+  adc a,0
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;;270cc or 280cc
+HL_Div_7_round:
+    xor a
+    ld d,h
+    ld e,l
+    ld b,a
+    add hl,hl : rla
+    add hl,hl : rla
+    add hl,hl : rla
+    add hl,de : adc a,b
+    ld d,h
+    ld e,l
+    ld c,a
+    add hl,hl : rla
+    add hl,hl : rla   
+    ld d,h
+    ld e,l
+    ld c,a
+    add hl,hl : rla
+    add hl,hl : rla
+    ld d,a
+    ld d,h
+    ld e,l
+    ld c,a
+    ld l,a
+    ld h,b
+    add hl,hl
+    add hl,hl
+    add hl,hl
+    add hl,hl
+    add hl,de
+    adc a,b
+    sla l
+    ld l,h
+    ld h,a
+    ret nc
+    inc hl
+    ret
+;AH/16
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;210cc or 220cc
+HL_Div_5_round:
+    xor a
+    ld d,h : ld e,l : ld b,a
+    add hl,hl : rla
+    add hl,de : adc a,b
+    ld d,h : ld e,l : ld c,a
+    add hl,hl : rla
+    add hl,hl : rla
+    add hl,hl : rla
+    add hl,hl : rla
+    add hl,de : adc a,c
+    ld d,a : ld e,h
+    add hl,de : adc a,b
+    ld d,a : ld e,h
+    add a,l
+    ex de,hl
+    rla : rla : and 3 : rra
+    adc a,b
+    add a,l
+    ld l,a
+    ret nc
+    inc h
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;HL/5
+;HL/4+HL*3*17*257
+;234cc to 245cc
+HL_Div_5:
+  xor a
+  ld b,h
+  ld c,l
+  ld d,a
+  add hl,hl : rla
+  add hl,bc : adc a,d   ;3
+  add hl,hl : rla       ;6
+  add hl,hl : rla       ;12
+  add hl,hl : rla       ;24
+  add hl,bc : adc a,d   ;25
+  add hl,hl : rla       ;50
+  add hl,bc : adc a,d   ;51
+;AHL0+AHL+BC/2
+;AHL*257/256 =AHL+A
+  srl b : rr c
+  srl b : rr c
+  ld d,a
+  ld a,b
+  add a,l
+  ld b,a
+  ld e,h
+  jr nc,$+3
+  inc de
+  add hl,bc
+  ld a,d
+  add a,e
+  ld e,a
+  ret nc
+  inc d
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;205cc or 215cc
+HL_Div_3_round:
+    xor a : ld d,h : ld e,l
+    add hl,hl : rla
+    add hl,hl : rla
+    add hl,de
+    ld d,h : ld e,l : ld b,a
+    add hl,hl : rla
+    add hl,hl : rla
+    add hl,hl : rla
+    add hl,hl : rla
+    add hl,de : adc a,bas
+    ld d,h : ld e,l : ld b,a
+    ld d,a : ld e,h : add hl,de
+    adc a,0
+    sla l
+    ld l,h
+    ld h,a
+    ret nc
+    inc hl
+    ret  
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;HL/3 --> DE
+;209cc to 219cc
+HL_Div_3:
+  xor a
+  ld b,a
+  ld d,h
+  ld e,l
+  add hl,hl : rla
+  add hl,hl : rla
+  add hl,de : adc a,b
+  add hl,hl : rla
+  add hl,hl : rla
+  add hl,de : adc a,b
+  add hl,hl : rla
+  add hl,hl : rla
+  add hl,de : adc a,b
+;AHL+(AHL+(DE>>1))/256
+  srl d : rr e
+;AHL+(AHL+DE)/256
+;AH.L+A.HL+.DE
+  ld b,h
+  ld c,l
+;AB.C+A.HL+.DE
+  add hl,de
+;AB.C+A.HL+carry
+  ld d,a
+;DB.C+A.H+carry
+  adc a,b
+  ld e,a
+  jr nc,$+3
+  inc d
+;DE.C+0.H+carry
+  ld a,h
+  add a,c
+  ex de,hl
+  ret nc
+  inc hl
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Input: HL
+;Output: HL is the input divided by 3
+;Destroys: B,C,E,A
+;217cc
+HL_Div_3:
+;increment HL, putting overflow in A
+  ld bc,1
+  ld a,b
+  add hl,bc
+  adc a,b
+;We want a difference of a factor of 2 shifts
+  ld b,h
+  ld c,l
+  ld e,a
+  add hl,hl : rla
+  add hl,hl : rla
+  add hl,bc : adc a,e
+;We want a difference of a factor of 4 shifts
+  ld b,h
+  ld c,l
+  ld e,a
+  add hl,hl : rla
+  add hl,hl : rla
+  add hl,hl : rla
+  add hl,hl : rla
+  add hl,bc : adc a,e
+  ld b,a
+  ld c,h
+  add hl,bc
+  adc a,0
+  ld l,h
+  ld h,a
+;now HL is our result
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;1360+24({0,3+{0,3}})
+;min: 1360cc
+;max: 1504cc
+;avg: 1414cc
+;17 bytes
+EHL_Div_D:
+  xor a
+  ld b,24
+.loop:
+  add hl,hl
+  rl e
+  rla
+  jr c,$+5    ;if D is guaranteed <129, can omit this
+  cp d
+  jr c,$+4
+  sub d
+  inc l
+  djnz .loop
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs:
+;     DE,BC are 8.8 Fixed Point numbers
+;Outputs:
+;     HL is the 8.8 Fixed Point result (rounded to the least significant bit)
+;if DE is 0 : 122cc or 136cc if BC is negative
+;if |BC|>=128*|DE| : 152cc or 166cc if BC is negative
+;Otherwise:
+;min: 1164cc
+;max: 1377cc
+;avg: 1258.5cc
+BC_Div_DE_88:
+; First, find out if the output is positive or negative
+    ld a,b
+    xor d
+    push af   ;sign bit is the result sign bit
+
+; Now make sure the inputs are positive
+    xor d     ;A now has the value of B, since I XORed it with D twice (cancelling)
+    jp p,.skip1   ;if Positive, don't negate
+    xor a
+    sub c
+    ld c,a
+    sbc a,a
+    sub b
+    ld b,a
+.skip1:
+
+;now make DE negative to optimize the remainder comparison
+    ld a,d
+    or d
+    jp m,.skip2
+    xor a
+    sub e
+    ld e,a
+    sbc a,a
+    sub d
+    ld d,a
+.skip2:
+
+;if DE is 0, we can call it an overflow
+;A is the current value of D
+  or e
+  jr z,div_fixed88_overflow
+
+;The accumulator gets set to B if no overflow.
+;We can use H=0 to save a few cc in the meantime
+    ld h,0
+
+;if B+DE>=0, then we'll have overflow
+    ld a,b
+    add a,e
+    ld a,d
+    adc a,h
+    jr c,div_fixed88_overflow
+
+;Now we can load the accumulator/remainder with B
+;H is already 0
+    ld l,b
+
+    ld a,c
+    call div_fixed88_sub
+    ld c,a
+
+    ld a,b      ;A is now 0
+    call div_fixed88_sub
+
+; if 2HL+DE>=0, increment result to round.
+    add hl,hl
+    add hl,de
+    ld h,c
+    ld l,a
+    jr nc,$+3
+    inc hl
+
+;Now check if H is overflowed
+    bit 7,h
+    jr nz,.div_fixed88_overflow
+
+
+    pop af
+    ret p
+    xor a
+    sub l
+    ld l,a
+    sbc a,a
+    sub h
+    ld h,a
+    ret
+
+.div_fixed88_overflow:
+    ld hl,#7FFF
+    pop af
+    ret p
+    inc hl
+    inc l
+    ret
+
+.div_fixed88_sub:
+;min: 456cc
+;max: 536cc
+;avg: 496cc
+    ld b,8
+.loop:
+    rla
+    adc hl,hl
+    add hl,de
+    jr c,$+4
+    sbc hl,de
+    djnz .loop
+    adc a,a
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;HLIX/BC -> HLIX remainder DE
+;174+4*.sub8
+;min: 2186cc
+;max: 2794cc
+;avg: 2466cc
+;61 bytes
+div32_16:
+  ex de,hl   ; 4
+; Negate BC to allow add instead of sbc
+  xor a      ; 4
+; Need to set HL to 0 anyways, so save 2cc and a byte
+  ld h,a     ; 4
+  ld l,a     ; 4
+  sub c      ; 4
+  ld c,a     ; 4
+  sbc a,a    ; 4
+  sub b      ; 4
+  ld b,a     ; 4
+
+
+  ld a,d              ; 4
+  call .sub8  ; 17
+  rla                 ; 4
+  ld d,a              ; 4
+
+  ld a,e              ; 4
+  call .sub8  ; 17
+  rla                 ; 4
+  ld e,a              ; 4
+
+  ld a,ixh            ; 8
+  call .sub8  ; 17
+  rla                 ; 4
+  ld ixh,a            ; 8
+
+  ld a,ixl            ; 8
+  call .sub8  ; 17
+  rla                 ; 4
+  ld ixl,a            ; 8
+
+  ex de,hl   ; 4
+  ret        ; 10
+
+.sub8:
+;119+8*.sub
+;min: 503cc
+;max: 655cc
+;avg: 573cc
+  call .iter1
+.iter1:
+;17+2(17+2(.sub)))
+  call .iter2
+.iter2:
+;17+2(.sub)
+  call .sub
+.sub:
+;48+{8,0+{0,19}}
+;min: 48cc
+;max: 67cc
+;avg: 56.75cc
+  rla        ; 4
+  adc hl,hl  ; 15
+  jr c,.skip    ;12/7
+  add hl,bc  ; 11
+  ret c      ;11/5
+  sbc hl,bc  ; 15
+  ret        ; 10
+.skip:
+  add hl,bc  ; 11
+  scf        ; 4
+  ret        ; 10
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;HL/9 --> A, HL<2304
+div9:
+  inc hl
+  ld d,h
+  ld e,l
+  add hl,hl
+  add hl,de
+  add hl,hl
+  add hl,de
+  ld e,0
+  ld d,l
+  ld a,h
+  add hl,hl
+  add hl,hl
+  add hl,de
+  adc a,e
+  add hl,hl
+  rla
+  add hl,hl
+  rla
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Made by Zeda Thomas, use it for whatever, and please optimize this!
+;Slight Warning: This passed a handful of tests, but if you find a bug,
+;please report it. I still actively maintain these (as of January 2020).
+;Inputs:
+;   HLIX/BCDE
+;Outputs:
+;   HLIX is the quotient
+;   BCDE is the remainder
+;RAM:
+;   uses 8 bytes of RAM:
+;     4 bytes at temp32_0
+;     4 bytes at temp32_1
+;
+;min: 5240cc
+;max: 6264cc
+;avg: 5752cc
+;113 bytes
+div_32_32:
+; Back up HLIX
+  ld (.temp32_0),ix
+  ld (.temp32_0+2),hl
+
+
+;negate BCDE
+  xor a
+  ld l,a : sbc a,e : ld e,a
+  ld a,l : sbc a,d : ld d,a
+  ld a,l : sbc a,c : ld c,a
+  ld a,l : sbc a,b : ld b,a
+
+  ld a,h
+;set HLIX to 0
+  ld h,l
+  ld ix,0
+  call .sub
+  ld (.temp32_0+3),a
+
+  ld a,(.temp32_0+2)
+  call .sub
+  ld (.temp32_0+2),a
+
+  ld a,(.temp32_0+1)
+  call .sub
+  ld (.temp32_0+1),a
+
+  ld a,(.temp32_0+0)
+  call .sub
+  ld (.temp32_0),a
+
+  push ix
+  pop de
+  ld b,h
+  ld c,l
+  ld ix,(.temp32_0)
+  ld hl,(.temp32_0+2)
+  ret
+
+
+
+.sub:
+;min: 1223cc
+;max: 1479cc
+;avg: 1351cc
+
+  call .iter1
+.iter1:
+  call .iter2
+.iter2:
+  call .iter3
+.iter3:
+;min: 138cc
+;max: 170cc
+;avg: 154cc
+;HLIX*2
+  add ix,ix
+  adc hl,hl
+
+;rotate in the bit
+  add a,a
+  jr nc,.skip1
+  inc ix
+.skip1:
+
+;save HLIX in case we need to restore
+  ld (temp32_1),ix
+  ld (temp32_1+2),hl
+
+;check if HLIX>=-BCDE
+;     ==> HLIX+BCDE >= 0
+  add ix,de
+  adc hl,bc
+  jr c,.skip2
+
+;we need to restore
+  ld ix,(temp32_1)
+  ld hl,(temp32_1+2)
+  ret
+.skip2:
+  inc a
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Created by calc84maniac
+;NOTE from Zeda: C should <=128, the original forgot to mention this.
+;Inputs: dehl=32-bit dividend, c<=128 is the divisor (Or is it the other way around?)
+;Outputs: dehl=32-bit quotient, a=remainder, c=unchanged, b=0
+;min: 1936cc
+;max: 2032cc
+;avg: 1984cc
+;Size: 17 bytes
+DEHL_Div_C:
+.div32bit:
+ ld b,32
+ xor a
+.divloop:
+ add hl,hl
+ rl e
+ rl d
+ rla
+ cp c
+ jr c,.divlbl
+ inc l
+ sub c
+.divlbl:
+ djnz .divloop
+ ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs:
+;     DEHL
+;Outputs:
+;     DEHL is the quotient
+;     A is the remainder
+;     B is the remainder
+;     C is 10
+;1300cc~1329cc
+;49 bytes
+DEHL_Div_10_v1:
+    xor a
+    ld bc,05F6h
+    rl d : rla
+    rl d : rla
+    rl d : rla
+    rl d : rla : add a,c : jr c,$+3 : sub c : djnz $-7
+    ld b,8
+    rl e : rla : add a,c : jr c,$+3 : sub c : djnz $-7
+    ld b,8
+    rl h : rla : add a,c : jr c,$+3 : sub c : djnz $-7
+    ld b,8
+    rl l : rla : add a,c : jr c,$+3 : sub c : djnz $-7
+    adc hl,hl
+    rl e
+    rl d
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs:
+;     DEHL
+;Outputs:
+;     DEHL is the quotient
+;     A is the remainder
+;     B is the remainder
+;     C is 10
+;912cc~941cc
+DEHL_Div_10_v2:
+    xor a
+    ld c,10
+    rl d : rla
+    rl d : rla
+    rl d : rla
+    rl d : rla : sub c : jr nc,$+3 : add a,c
+    rl d : rla : sub c : jr nc,$+3 : add a,c
+    rl d : rla : sub c : jr nc,$+3 : add a,c
+    rl d : rla : sub c : jr nc,$+3 : add a,c
+    rl d : rla : sub c : jr nc,$+3 : add a,c
+    rl e : rla : sub c : jr nc,$+3 : add a,c
+    rl e : rla : sub c : jr nc,$+3 : add a,c
+    rl e : rla : sub c : jr nc,$+3 : add a,c
+    rl e : rla : sub c : jr nc,$+3 : add a,c
+    rl e : rla : sub c : jr nc,$+3 : add a,c
+    rl e : rla : sub c : jr nc,$+3 : add a,c
+    rl e : rla : sub c : jr nc,$+3 : add a,c
+    rl e : rla : sub c : jr nc,$+3 : add a,c
+    rl h : rla : sub c : jr nc,$+3 : add a,c
+    rl h : rla : sub c : jr nc,$+3 : add a,c
+    rl h : rla : sub c : jr nc,$+3 : add a,c
+    rl h : rla : sub c : jr nc,$+3 : add a,c
+    rl h : rla : sub c : jr nc,$+3 : add a,c
+    rl h : rla : sub c : jr nc,$+3 : add a,c
+    rl h : rla : sub c : jr nc,$+3 : add a,c
+    rl h : rla : sub c : jr nc,$+3 : add a,c
+    rl l : rla : sub c : jr nc,$+3 : add a,c
+    rl l : rla : sub c : jr nc,$+3 : add a,c
+    rl l : rla : sub c : jr nc,$+3 : add a,c
+    rl l : rla : sub c : jr nc,$+3 : add a,c
+    rl l : rla : sub c : jr nc,$+3 : add a,c
+    rl l : rla : sub c : jr nc,$+3 : add a,c
+    rl l : rla : sub c : jr nc,$+3 : add a,c
+    rl l : rla : sub c : jr nc,$+3 : add a,c
+    ld b,a
+    ld a,l : rra : ccf : ld l,a
+    ld a,h : rra : ccf : ld h,a
+    ld a,e : rra : ccf : ld e,a
+    ld a,d : rra : ccf : ld d,a
+    ld a,b
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs:
+;     C is the numerator
+;     D is the denominator
+;Outputs:
+;     A is the remainder
+;     B is 0
+;     C is the result of C/D
+;     D,E,H,L are not changed
+C_Div_D:
+  ld b,8
+  xor a
+.loop:
+  sla c
+  rla
+  cp d
+  jr c,.skip1
+  inc c
+  sub d
+.skip1:
+  djnz .loop
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Input:
+;   HL points to the bignum (1 byte size prefix (0 -> 1 byte, 1 -> 2 bytes, n-1 -> n bytes), n subsequent bytes)
+;Output:
+;   bignum is divided in-place, not renormalized
+;   A is the remainder
+;   BC is 100
+bignum_div_100:
+    ld c,100
+bignum_div_C:
+;Note: C<128
+    ld b,(hl)
+    inc hl
+    ld a,(hl)
+    ld h,-1
+    inc h : sub c : jr nc,$-2
+    add a,c
+    ld (hl),a
+    inc b
+    dec b
+    ret z
+.loop:
+    inc hl
+    ld e,(hl)
+    sla e : rla  : cp c : jr c,$+4 : sub a,c : inc e
+    sla e : rla  : cp c : jr c,$+4 : sub a,c : inc e
+    sla e : rla  : cp c : jr c,$+4 : sub a,c : inc e
+    sla e : rla  : cp c : jr c,$+4 : sub a,c : inc e
+    sla e : rla  : cp c : jr c,$+4 : sub a,c : inc e
+    sla e : rla  : cp c : jr c,$+4 : sub a,c : inc e
+    sla e : rla  : cp c : jr c,$+4 : sub a,c : inc e
+    sla e : rla  : cp c : jr c,$+4 : sub a,c : inc e
+    ld (hl),a
+    djnz .loop
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;BC/DE ==> BC, remainder in HL
+BC_Div_DE:
+	ld	hl,0
+	ld	a,b
+	ld	b,16
+.loop:
+	;shift the bits from BC into HL
+	sla	c 
+	rla
+	adc	hl,hl
+	sbc	hl,de
+	jr	nc,.inc_acc
+	add	hl,de
+	db	#FE     ;this begins the instruction `cp *`, so it eats the next byte.
+.inc_acc:
+	inc	c
+	djnz	.loop
+	ld	b,a
+	ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+BC_Div_DE_faster:
+;BC/DE ==> BC, remainder in HL
+;NOTE: BC/0 returns 0 as the quotient.
+;min: 738cc
+;max: 898cc
+;avg: 818cc
+;144 bytes
+  xor a
+  ld h,a
+  ld l,a
+  sub e
+  ld e,a
+  sbc a,a
+  sub d
+  ld d,a
+
+  ld a,b
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla
+  ld b,a
+
+  ld a,c
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla
+  ld c,a
+
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;BC/DE ==> BC, remainder in HL
+;NOTE: BC/0 returns 0 as the quotient.
+;min: 773cc
+;max: 933cc
+;avg: 853cc
+;82 bytes
+BC_Div_DE_fast:
+  xor a
+  ld h,a
+  ld l,a
+  sub e
+  ld e,a
+  sbc a,a
+  sub d
+  ld d,a
+
+  ld a,b
+  ld b,c
+  call .sub
+  ld a,b
+  ld b,c
+
+.sub:
+;min: 354cc
+;max: 434cc
+;avg: 394cc
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla : adc hl,hl : add hl,de : jr c,$+4 : sbc hl,de
+  rla
+  ld c,a
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+BC_Div_DE:
+    ld hl,0
+    inc d
+    dec d
+    jr z,.smalldiv
+    ld l,b
+    ld b,h
+.nextpart:
+    ld a,c
+    rla : adc hl,hl : sbc hl,de : jr nc,$+3 : add hl,de
+    rla : adc hl,hl : sbc hl,de : jr nc,$+3 : add hl,de
+    rla : adc hl,hl : sbc hl,de : jr nc,$+3 : add hl,de
+    rla : adc hl,hl : sbc hl,de : jr nc,$+3 : add hl,de
+    rla : adc hl,hl : sbc hl,de : jr nc,$+3 : add hl,de
+    rla : adc hl,hl : sbc hl,de : jr nc,$+3 : add hl,de
+    rla : adc hl,hl : sbc hl,de : jr nc,$+3 : add hl,de
+    rla : adc hl,hl : sbc hl,de : jr nc,$+3 : add hl,de
+    cpl
+    ld c,a
+    ret
+.smalldiv:
+    xor a
+    rl b : rla : sub e : jr nc,$+3 : add a,e
+    rl b : rla : sub e : jr nc,$+3 : add a,e
+    rl b : rla : sub e : jr nc,$+3 : add a,e
+    rl b : rla : sub e : jr nc,$+3 : add a,e
+    rl b : rla : sub e : jr nc,$+3 : add a,e
+    rl b : rla : sub e : jr nc,$+3 : add a,e
+    rl b : rla : sub e : jr nc,$+3 : add a,e
+    rl b : rla : sub e : jr nc,$+3 : add a,e
+    ld l,a
+    ld a,b
+    cpl
+    ld b,a
+    jp .nextpart
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Divides a 48-bit integer by 100, where A holds the upper 8 bits and L holds the next 8, followed by DE and IX
+;Result is in HLDEIX, A is the remainder
+ALDEIX_div_100:
+    ld c,100
+ALDEIX_Div_C:
+;Note: C<128
+    call AL_Div_C
+    push hl
+    ld l,d
+    call AL_Div_C.rotate
+    ld h,l
+    ld l,e
+    call AL_Div_C.rotate
+    push hl
+    push ix
+    pop de
+    ld l,d
+    call AL_Div_C.rotate
+    ld h,l
+    ld l,e
+    call AL_Div_C.rotate
+    pop de
+    ex (sp),ix
+    pop hl
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Divides a 32-bit integer by 100, where A holds the upper 8 bits and L holds the next 8, followed by DE
+;Result is in DEHL, A is the remainder
+ALDE_div_100:
+    ld c,100
+ALDE_Div_C:
+;Note: C<128
+    call AL_Div_C
+    push hl
+    ld l,d
+    call AL_Div_C.rotate
+    ld h,l
+    ld l,e
+    pop de
+    jp AL_Div_C.rotate
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+AL_div_100:
+;Divides a 16-bit integer by 100, where A holds the upper 8 bits and L holds the lower
+;Result is in HL, A is the remainder
+;min:256
+;max:329
+;avg:305.5625cc
+    ld c,100
+AL_Div_C:
+;Note: C<128
+    ld h,-1
+    inc h : sub c : jr nc,$-2
+    add a,c
+.rotate:
+    sla l : rla  : cp c : jr c,$+4 : sub a,c : inc l
+    sla l : rla  : cp c : jr c,$+4 : sub a,c : inc l
+    sla l : rla  : cp c : jr c,$+4 : sub a,c : inc l
+    sla l : rla  : cp c : jr c,$+4 : sub a,c : inc l
+    sla l : rla  : cp c : jr c,$+4 : sub a,c : inc l
+    sla l : rla  : cp c : jr c,$+4 : sub a,c : inc l
+    sla l : rla  : cp c : jr c,$+4 : sub a,c : inc l
+    sla l : rla  : cp c : ret c : sub a,c : inc l
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+HL_mod_3:
+;destroys HL, returns HL mod 3 in A
+;112+{0,2} + {0,8} + {0,1}
+;min: 112
+;max: 123
+;avg: 117.5
+
+; HL mod 3 == (H*256+L) mod 3 == (H*1+L) mod 3 == (H+L) mod 3
+;So add the upper and lower byte
+  ld a,h
+  add a,l
+
+;If adding caused an overflow, well add (256 mod 3) == 1 to A.
+  adc a,0   ;We don't need to worry abput overflow here :)
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;destroys HL, returns A mod 3 in A
+;97+{0,2} + {0,8} + {0,1}
+;min: 97
+;max: 108
+;avg: 102.5
+;A mod 3 is equal to adding the upper and lower nibble of A mod 3
+;For example, if A=16u+l, then A mod 3 == 16u+l mod 3 == u+l
+A_mod_3:
+;So add the upper and lower nibble
+  ld l,a  ;save a copy of a
+  add a,a
+  add a,a
+  add a,a
+  add a,a
+  add a,l
+
+; If there was overflow, again, add 1. However, our number is shifted up by 4,
+; so we need to add 1<<4 == 16
+  jr nc,$+4
+  add a,16
+
+; Now our number is in the upper 4 bits of A. We need to add the top 2 bits to
+; the preceding 2 bits
+
+  ld l,a
+  add a,a
+  add a,a
+
+; Note that now we might have some garbage bits in the middle 4 bits of A,
+; overlapping two garbage bits in L. We'll need to clear out bits to avoid
+; issues. It is convenient to use a mask of %11000000
+  ld h,%11000000
+  and h
+  add a,l
+
+;Now if there was overflow, add 1<<6 == #40. H "happens" to be -#40, so we can
+;do this by subtracting h
+  jr nc,$+3
+  sub h
+
+;Now finally, mask out all but those upper two bits
+  and h
+
+; At this point, we can stop if we only need to test divisibility
+; If the parity is even, then we have to do (0 mod 3) or (3 mod 3), both of
+; which are 0, indicating divisibility by 3. If we have odd parity, then the
+; upper two bits are 10 or 01, both of which are not 0 mod 3.
+; basically, pe==divisible, po==not divisible.
+;
+; But, to get full modulo, shift those uppertwo bits into the lower two bits
+  rlca
+  rlca
+  ret po
+; And make sure to set A to 0 if it was 0 or 3 :)
+  xor a
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs: HL
+;Outputs: pe if HL was divisible by 3, else po.
+;Destroys: HL
+;103+{0,2}+{0,1}
+;min: 103
+;max: 106
+;avg: 104.5
+HL_divisible_by_3:
+  ld a,h
+  add a,l
+  adc a,0
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs: A
+;Outputs: pe if A was divisible by 3, po if A was not divisible by 3
+;Destroys: HL
+;88+{0,2}+{0,1}
+;min: 88
+;max: 91
+;avg: 89.5
+A_divisible_by_3:
+  ld h,#C0
+  ld l,a  ;save a copy of a
+  add a,a
+  add a,a
+  add a,a
+  add a,a
+  add a,l
+  jr nc,$+4
+  add a,16
+  ld l,a
+  add a,a
+  add a,a
+  and h
+  add a,l
+  jr nc,$+3
+  sub h
+  and h
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ‹®£ à¨ä¬ëÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;Input:  H.L needs to be on (0,128.0)
+;Output: H.L if c flag set
+;    returns nc if input is negative (HL not modified)
+;Error:
+;   The error on the outputs is as follows:
+;      20592 inputs are exact
+;      12075 inputs are off by 1/256
+;      100 inputs are off by 2/256
+;   So all 32767 inputs are within 2/256, with average error being <1/683 which is smaller than 1/256.
+;Size: 177 bytes
+;Speed: average speed is less than 1250 t-states
+lognat:
+   ld a,h : or l : jr nz,$+5
+   ld h,80h : ret
+   dec h
+   dec h
+   jr nz,$+9
+   inc l : dec l
+   jr nz,.normalizeln
+   ld l,177
+   ret
+   inc h
+   jr nz,.normalizeln_2
+   ld b,h
+   ld c,l
+   ld e,l
+   ld d,8
+   add hl,hl
+   add hl,hl
+   add hl,de
+   ex de,hl
+   ;call .HL_Div_DE
+   add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+   add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+   add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+   add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+   add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+   add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+   add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+   add hl,hl : sbc hl,de : adc a,a
+   ld h,a : ld l,b
+   sla h : jr c,$+3 : ld l,c
+   add hl,hl : jr c,$+3 : add hl,bc
+   add hl,hl : jr c,$+3 : add hl,bc
+   add hl,hl : jr c,$+3 : add hl,bc
+   add hl,hl : jr c,$+3 : add hl,bc
+   add hl,hl : jr c,$+3 : add hl,bc
+   add hl,hl : jr c,$+3 : add hl,bc
+   add hl,hl : jr c,$+3 : add hl,bc
+   rl l
+   ld a,h
+   adc a,b
+   ld h,b
+   ld l,a
+   scf
+   ret
+
+; .HL_Div_DE:
+;    add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+;    add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+;    add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+;    add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+;    add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+;    add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+;    add hl,hl : sbc hl,de : jr nc,$+3 : add hl,de : adc a,a
+;    add hl,hl : sbc hl,de : adc a,a : ret
+
+.normalizeln:
+   inc h
+.normalizeln_2:
+   xor a
+   inc h : ret m
+   ld d,a : ld e,a
+   ld a,l
+   jr z,.toosmall
+   inc e : srl h : rra : jr nz,$-4
+   rla : rl h
+   dec e
+.stepin:
+   ld l,a
+   push de
+   call lognat
+   pop de
+   ;now multiply DE by 355, then divide by 2 (rounding)
+   ld b,d : ld c,e : ld a,d
+   ex de,hl
+   add hl,hl
+   add hl,hl   ;4
+   add hl,bc   ;5
+   add hl,hl   ;10
+   add hl,bc   ;11
+   add hl,hl   ;22
+   add hl,hl
+   add hl,hl
+   add hl,hl
+   add hl,bc
+   add hl,hl
+   add hl,bc
+   sra h : rr l
+   adc hl,de
+   scf
+   ret
+.toosmall:
+   dec d
+   dec e : add a,a : jr nc,$-2
+   inc h
+   jp .stepin
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Input: HL is a fixed point number
+;Output: lg(H.L)->H.L
+;Speed: Avg: 340
+lg_88:
+   ld de,.LUT
+   ld b,0
+   ld a,h
+   or a
+   ret m
+   ld a,l
+   jr z,$+8
+   inc b : srl h : rra : jr nz,$-4
+   or a : jr nz,$+6
+   ld hl,8000h : ret
+   rra : inc b : jr nc,$-2
+	;A is the element to look up in the LUT
+   ld l,a
+    ld c,h
+    dec b
+   add hl,hl
+   add hl,de
+   ld e,(hl)
+   inc hl
+   ld d,(hl)
+    ex de,hl
+   add hl,bc
+   ret
+;			  0      1      2      3      4      5      6      7      8      9
+.LUT:		DW	#F800, #F996, #FA52, #FACF, #FB2C, #FB76, #FBB3, #FBE8,	#FC16, #FC3F  ; 0
+		DW	#FC64, #FC86, #FCA5, #FCC1, #FCDC, #FCF4, #FD0B, #FD21, #FD36, #FD49  ; 1
+		DW	#FD5C, #FD6D, #FD7E, #FD8E, #FD9D, #FDAC, #FDBA, #FDC8, #FDD5, #FDE2  ; 2
+		DW	#FDEE, #FDFA, #FE06, #FE11, #FE1C, #FE26, #FE31, #FE3B, #FE44, #FE4E  ; 3
+		DW	#FE57, #FE60, #FE69, #FE71, #FE7A, #FE82, #FE8A, #FE92, #FE9A, #FEA1  ; 4
+		DW	#FEA9, #FEB0, #FEB7, #FEBE, #FEC5, #FECB, #FED2, #FED8, #FEDF, #FEE5  ; 5
+		DW	#FEEB, #FEF1, #FEF7, #FEFD, #FF03, #FF09, #FF0E, #FF14, #FF19, #FF1E  ; 6
+		DW	#FF24, #FF29, #FF2E, #FF33, #FF38, #FF3D, #FF42, #FF47, #FF4B, #FF50  ; 7
+		DW	#FF55, #FF59, #FF5E, #FF62, #FF67, #FF6B, #FF6F, #FF74, #FF78, #FF7C  ; 8
+		DW	#FF80, #FF84, #FF88, #FF8C, #FF90, #FF94, #FF98, #FF9B, #FF9F, #FFA3  ; 9
+		DW	#FFA7, #FFAA, #FFAE, #FFB2, #FFB5, #FFB9, #FFBC, #FFC0, #FFC3, #FFC6  ; 10
+		DW	#FFCA, #FFCD, #FFD0, #FFD4, #FFD7, #FFDA, #FFDD, #FFE0, #FFE4, #FFE7  ; 11
+		DW	#FFEA, #FFED, #FFF0, #FFF3, #FFF6, #FFF9, #FFFC, #FFFF                ; 12
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs:
+;     HL is an unsigned 8.8 fixed point number.
+;Outputs:
+;     HL is the signed 8.8 fixed point value of log base 2 of the input.
+;Example:
+;     pass HL = 3.0, returns 1.58203125 (actual is ~1.584962501...)
+;averages about 39 t-states slower than original
+;62 bytes
+Log_2_88_size:
+     ex de,hl
+     ld hl,0
+     ld a,d
+     ld c,8
+     or a
+     jr z,.DE_lessthan_1
+     srl d
+     jr z,logloop-1
+     inc l
+     rr e
+     jr $-7
+.DE_lessthan_1:
+     ld a,e
+     dec hl
+     or a
+     ret z
+     inc l
+     dec l
+     add a,a
+     jr nc,$-2
+     ld e,a
+
+     inc d
+.loop:
+     add hl,hl
+     push hl
+     ld h,d
+     ld l,e
+     ld a,e
+     ld b,8
+
+     add hl,hl
+     rla
+     jr nc,$+5
+       add hl,de
+       adc a,0
+     djnz $-7
+
+     ld e,h
+     ld d,a
+     pop hl
+     rr a         ;this is NOT supposed to be rra, we need the z flag affected
+     jr z,$+7
+       srl d
+       rr e
+       inc l
+     dec c
+     jr nz,.loop
+     ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+;Input: HL is a fixed point number
+;Output: ln(H.L)->H.L
+;Speed: Avg: 340+(325 worst case)
+ln_88_fixed:
+   call lg_88
+   ;now signed multiply HL by 355, then divide by 2 (rounding)
+    ld de,0
+    bit 7,h
+    jr z,$+9
+    dec e : xor a : sub l : ld l,a
+    sbc a,a : sub h : ld h,a
+    ld b,h
+    ld c,l
+    xor a
+   add hl,hl
+      add hl,hl : rla
+   add hl,bc : adc a,d
+   add hl,hl : rla
+   add hl,bc : adc a,d
+   add hl,hl : rla
+   add hl,hl : rla
+   add hl,hl : rla
+   add hl,hl : rla
+   add hl,bc : adc a,d
+   add hl,hl : rla
+   add hl,bc : adc a,d
+    sra a : rr h
+    ld l,h
+    ld h,a
+    inc e
+    ret nz
+    xor a : sub l : ld l,a
+    sbc a,a : sub h : ld h,a
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ §­®¥ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+; ‘â¥¯¥­ì
+;Inputs:
+;     HL is the 8.8 fixed point number 'x' for 2^x
+;Outputs:
+;     DEHL is the 24.8 fixed point result. If there was overflow exceeding 2^24, then this value is set to the max.
+power_2:
+     ld a,l
+     or a
+     push hl     ;save H for later, H is the integer part of the power
+     ld hl,1
+     jr z,.integer
+     scf      ;set the carry flag so that a bit is rotated into a. This will act as our counter.
+;wait until we come across the lowest bit. Also note that we
+     rra
+     jr nc,$-1
+     ld hl,2*256
+.loop:
+     push af
+     call FPSqrtHL    ;returns in HL
+     pop af
+     srl a
+     jr z,.integer
+     jr nc,.loop
+     add hl,hl
+     jp .loop
+.integer:
+     pop bc
+;Now b is the integer part for 2^x that we need to multiply HL by.
+     ld de,0
+     ld a,b
+     or a
+     ret z
+
+     add hl,hl
+     rl e : rl d : jr c,.wayoverflow
+     djnz $-7
+     ret
+.wayoverflow:
+     ld hl,-1
+     ld d,h
+     ld e,l
+     ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Written by Zeda
+
+; Requires ;!TEST
+;    mul16          ;BC*DE ==> DEHL
+;    DEHL_Div_BC    ;DEHL/BC ==> DEHL
+;"n choose r", defined as n!/(r!(n-r)!)
+;Computes "HL choose DE"
+;Inputs: HL,DE
+;Outputs:
+;   HL is the result
+;       "HL choose DE"
+;   carry flag reset means overflow
+;Destroys:
+;   A,BC,DE,IX
+;Notes:
+;   Overflow is returned as 0
+;   Overflow happens if HL choose DE exceeds 65535
+;   This algorithm is constructed in such a way that intermediate
+;   operations won't erroneously trigger overflow.
+;66 bytes
+ncr_HL_DE:
+  ld bc,1
+  or a
+  sbc hl,de
+  jr c,.oob
+  jr z,.exit
+  sbc hl,de
+  add hl,de
+  jr c,$+3
+  ex de,hl
+  ld a,h
+  or l
+  push hl
+  pop ix
+.exit:
+  ld h,b
+  ld l,c
+  scf
+  ret z
+.loop:
+  push bc : push de
+  push hl : push bc
+  ld b,h
+  ld c,l
+  call mul16          ;BC*DE ==> DEHL
+  pop bc
+  call DEHL_Div_BC    ;result in DEHL
+  ld a,d
+  or e
+  pop bc
+  pop de
+  jr nz,.overflow
+  add hl,bc
+  jr c,.overflow
+  pop bc
+  inc bc
+  ld a,b
+  cp ixh
+  jr c,.loop
+  ld a,ixl
+  cp c
+  jr nc,.loop
+  ret
+.overflow:
+  pop bc
+  xor a
+  ld b,a
+.oob:
+  ld h,b
+  ld l,b
+  ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Inputs: DE,HL
+;Outputs: c flag set if HL is not divisible by DE, else c flag is reset.
+;         HL is 0 if true.
+;See below for a note on the motivation and development of this algorithm.
+isDivisible:
+    ld a,d : or e : ccf : ret z         ;remove this if DE is always guaranteed non-zero
+;step 1
+    ld a,e : or l : rra : jr c,.step2   ;\
+    srl d : rr e : rr h : rr l          ; |
+    ld a,e : or l : rra : jr nc,$-11    ; |Remove these if DE is always guaranteed odd at input.
+.step2:                                  ; |
+    ld a,e : rra : ccf : ret c          ;/
+;steps 3, 4, and 5
+    ld a,l
+    or a
+.loop:
+    sbc hl,de : ret c : ret z
+    rr h : rra : bit 0,a : jr z,$-5
+    ld l,a
+    jp .loop
+;Motivation and Development
+;  I often find myself in a situation where I need to find the factors of a number, but I have no technology around to aid me. This means I need to use... mental arithmetic!
+;  I've been doing this for 15 years, so I have refined my mental process quite a bit.
+;  It is still a trial division algorithm, but with a very obfuscated "division" technique.
+;  We don't need to do 1131/7 to see if it is divisible by 7, we just need to see if 7 divides 1131 and this is what my algorithm does.
+;  Interestingly, testing divisibility at the algorithmic level is a little faster than division. Not by much, but it is also non-negligible.
+;The Algorithm
+;  The core algorith is designed around checking that (A mod B == 0) is true or false.
+;  We also make the assumption that B is odd and by extension, non-zero.
+;  The case where B is non-zero and even will be discussed later.
+;
+;  Since B is odd, 2 does not divide B. This means that if A is even:
+;      (A mod B == 0) if and only if  (A/2 mod B)==0.
+;  We also know by the definition of divisibility that
+;      (A mod B) == (A+c*B mod B)
+;  where c is any integer. Combining all this, we have an algorithm:
+;
+;  1]  Remove all factors of 2 from A
+;  2]  With A now odd, do A=A-B
+;      If the result is zero, that means (A mod B == 0)
+;      If the result underflow (becomes "negative", or on the Z80, sets the carry flag), it means that A was somewhere on [1,B-1], so it is not divisible by B.
+;  3] Continue back at 1.
+;
+;  Now suppose B is allowd to be non-zero and even. Then B is of the form d*2^k where d is odd.
+;  This just means there are some factors of 2 that can be removed from B until it is odd.
+;  The only way A is divisible by B, is if it has the same number or more of factors of 2 as B.
+;  If we factor out common factors of 2 and find B is still even, then A is not divisible by B.
+;  Otherwise we have an odd number and only need to check the new (A mod d)
+;  for which we can use the "odd algorithm" above.
+;  So putting it all together:
+;
+;  1] If B==0, return FALSE.
+;  2] Remove common factors of 2 from A and B.
+;  3] If B is even, return FALSE.
+;  4] Remove all factors of 2 from A.
+;  5] Subtract B from A (A=A-B).
+;      If the result is zero, return TRUE.
+;      If the result is "negative" (setting the carry flag on many processors), return FALSE.
+;  6] Repeat at 4]
+;
+;  The overhead steps are 1] to 3].
+;  The iterated steps are 4] and 5].
+;  Because 5 always produces an even number, when it then performs step 4, it always divides by at least one factor of 2.
+;  This means the algorithm takes at most 1+ceil(log2(A))-floor(log2(B) iterations.
+;  For example, if A is a 37-bit number and B is a 13-bit number,this takes at most 38-13 = 25 iterations.
+;  However, in practice it is usually slightly less.
+;Example Time:
+;  Say I wanted to test if 1337 is divisible by 17.
+;  Since 17 is odd, we can proceed.
+;  1337 is odd, so no factors of 2 to remove.
+;  1337-17 == 1320.
+;  1320/2 == 660
+;  660/2 == 330
+;  330/2 == 165
+;  165-17 == 148
+;  148/2 == 74
+;  74/2 == 37
+;  37-17 == 20
+;  20/2 == 10
+;  10/2 == 5
+;  5-17 = -12
+;
+;  So 1337 is not divisible by 17.
+;Now test divisibility by 7:
+;1337 => 1330
+;=>665
+;=>658
+;=>329
+;=>322
+;=>161
+;=>154
+;=>77
+;=>70
+;=>35
+;=>28
+;=>14
+;=>7
+;=>0
+;
+;  So 1337 is divisible by 7.
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;Adds two, little-endian 16-digit BCD integers (8 bytes)
+;Input:
+;   HL points to one BCD integer
+;   DE points to another BCD integer
+;Output:
+;   The sum is wrriten over the integer at HL.
+;   HL and DE point to the last digit of their integers.
+;46 bytes, 284cc
+addBCD_16:
+    ld a,(de) : add a,(hl) : daa : ld (de),a : inc hl : inc de
+    ld a,(de) : adc a,(hl) : daa : ld (de),a : inc hl : inc de
+    ld a,(de) : adc a,(hl) : daa : ld (de),a : inc hl : inc de
+    ld a,(de) : adc a,(hl) : daa : ld (de),a : inc hl : inc de
+    ld a,(de) : adc a,(hl) : daa : ld (de),a : inc hl : inc de
+    ld a,(de) : adc a,(hl) : daa : ld (de),a : inc hl : inc de
+    ld a,(de) : adc a,(hl) : daa : ld (de),a : inc hl : inc de
+    ld a,(de) : adc a,(hl) : daa : ld (de),a
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;gcd(HL,DE)->HL
+;Output:
+;   B=0
+;   HL is the GCD of the inputs
+;Destroys:
+;   A,DE
+;     DE is guaranteed 0 unless the output is 0 (which only happens if one of the inputs is 0).
+;Uses the binary GCD algorithm
+;65 bytes
+gcdHL_DE:
+;B is our cofactor-of-2 counter
+    ld b,0
+;If HL=0, return 0
+    ld a,h : or l : ret z
+;If DE=0, return 0
+    ex de,hl
+    ld a,h : or l : jr nz,.test_cofactor_of_2
+    ret
+.cofactor_2_loop:
+    inc b
+    srl h : rr l
+    srl d : rr e
+.test_cofactor_of_2:
+    inc b
+    ld a,e
+    or l
+    rra
+    .c,gcd_cofactor_2_loop
+
+.remove_factors_of_2_op2:
+    srl h : rr l : jr nc,.remove_factors_of_2_op2
+    adc hl,hl
+    jr .swap_ops
+
+.swap_ops_negate:
+;At this point, HL needs to be negated and swapped with DE
+    xor a : sub l : ld l,a : sbc a,a : sub h : ld h,a
+.swap_ops:
+    ex de,hl
+.remove_factors_of_2_op1:
+    srl h : rr l : jr nc,.remove_factors_of_2_op1
+    adc hl,hl
+    sbc hl,de
+    jr c,.swap_ops_negate
+    jp nz,.remove_factors_of_2_op1
+
+;DE is the GCD, need to shift it left B-1 times.
+    ex de,hl
+    dec b
+    ret z
+    add hl,hl : djnz $-1
+    ret
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+;¥à¥¢®¤¨â ç¨á«® ¨§ A ¢ HEX-String ¢¨¤ ¨ ¯®¬¥é ¥â ¯®  ¤à¥áã HL.
+; …á«¨ A=#31, â® á  ¤à¥á  HL ¡ã¤¥â byte #33,#31
+; 112 T
+ByteToStrHEX:	LD	B,A
+		AND	#F0
+		RRCA
+		RRCA
+		RRCA
+		RRCA
+		ADD	A,#90
+		DAA
+		ADC	A,#40
+		DAA
+		LD	(HL),A
+		INC	HL
+		LD	A,B
+		AND	#0F
+		ADD	A,#90
+		DAA
+		ADC	A,#40
+		DAA
+		LD	(HL),A
+		RET
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
+
+
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ‘à ¢­¥­¨ï. Œ ªà®áëÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;ÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛÛ
+;These code snippets are for 16-bit comparisons.
+;"I learned these from calc84maniac"
+;"These have similar flags to that of the `cp` instruction. At the very least,
+; you get the zero and carry flag identical."
+
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;Inputs:
+;   HL, DE
+;Outputs:
+;   z flag is set if HL=DE, else nz
+;   c flag is set if HL<DE, else nc
+;Destroys:
+;   none
+;size: 4 bytes
+;speed: 30cc
+cpHL_DE:
+    or a
+    sbc hl,de
+    add hl,de
+
+
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+cpHL_BC:
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;Inputs:
+;   HL, BC
+;Outputs:
+;   z flag is set if HL=BC, else nz
+;   c flag is set if HL<BC, else nc
+;Destroys:
+;   none
+;size: 4 bytes
+;speed: 30cc
+;
+    or a
+    sbc hl,bc
+    add hl,bc
+
+
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+cpHL_DE_faster:
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;Inputs:
+;   HL, DE
+;Outputs:
+;   z flag is set if HL=DE, else nz
+;   c flag is set if HL<DE, else nc
+;Destroys:
+;   A
+;size: 6 bytes
+;speed: 20cc or 23cc
+;
+  ld a,h
+  cp d
+  jr nz,cpHL_DE_result
+  ld a,l
+  cp e
+cpHL_DE_result:
+
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+cpHL_BC_faster:
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;Inputs:
+;   HL, BC
+;Outputs:
+;   z flag is set if HL=BC, else nz
+;   c flag is set if HL<BC, else nc
+;Destroys:
+;   A
+;size: 6 bytes
+;speed: 20cc or 23cc
+;
+  ld a,h
+  cp b
+  jr nz,cpHL_BC_result
+  ld a,l
+  cp c
+cpHL_BC_result:
+
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+cpDE_HL_faster:
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;Inputs:
+;   DE, HL
+;Outputs:
+;   z flag is set if DE=HL, else nz
+;   c flag is set if DE<HL, else nc
+;Destroys:
+;   A
+;size: 6 bytes
+;speed: 20cc or 23cc
+;
+  ld a,d
+  cp h
+  jr nz,cpDE_HL_result
+  ld a,e
+  cp l
+cpDE_BC_result:
+
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+cpDE_BC_faster:
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;Inputs:
+;   DE, BC
+;Outputs:
+;   z flag is set if DE=BC, else nz
+;   c flag is set if DE<BC, else nc
+;Destroys:
+;   A
+;size: 6 bytes
+;speed: 20cc or 23cc
+;
+  ld a,d
+  cp b
+  jr nz,cpDE_BC_result
+  ld a,e
+  cp c
+cpDE_BC_result:
+
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+cpBC_HL_faster:
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;Inputs:
+;   BC, HL
+;Outputs:
+;   z flag is set if BC=HL, else nz
+;   c flag is set if BC<HL, else nc
+;Destroys:
+;   A
+;size: 6 bytes
+;speed: 20cc or 23cc
+;
+  ld a,b
+  cp h
+  jr nz,cpBC_HL_result
+  ld a,c
+  cp l
+cpBC_HL_result:
+
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+cpBC_DE_faster:
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;Inputs:
+;   BC, DE
+;Outputs:
+;   z flag is set if BC=DE, else nz
+;   c flag is set if BC<DE, else nc
+;Destroys:
+;   A
+;size: 6 bytes
+;speed: 20cc or 23cc
+;
+  ld a,b
+  cp d
+  jr nz,cpBC_DE_result
+  ld a,c
+  cp e
+cpBC_DE_result:
+
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;Signed Compare D to E!
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;Returns carry and zero flag as expected
+;Destroys A.
+  ld a,d
+  sub e
+  sbc a,a
+  xor e
+  xor d
+  rla
+
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+Mask_least_bit:
+;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;:;;
+;masks the lowest bit in C
+;Ex. If c=%10110100, this would return a=%00000100
+; (it also always returns nc, and z only when c=0).
+    xor a
+    sub c
+    and c
+;°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°;
\ No newline at end of file