nw, fix most N64 visual bugs from new RDP SSE code.

2025-04-23 00:39:36 +03:00 · 2015-07-11 15:26:47 +02:00 · 2015-07-11 15:26:47 +02:00 · e815452f37
commit e815452f37
parent df8a949c00
8 changed files with 109 additions and 75 deletions
--- a/src/emu/video/rgbgen.h
+++ b/src/emu/video/rgbgen.h
@ -123,42 +123,42 @@ public:
 		m_b = value;
 	}

-	inline UINT8 get_a()
+	inline UINT8 get_a() const
 	{
 		return m_a;
 	}

-	inline UINT8 get_r()
+	inline UINT8 get_r() const
 	{
 		return m_r;
 	}

-	inline UINT8 get_g()
+	inline UINT8 get_g() const
 	{
 		return m_g;
 	}

-	inline UINT8 get_b()
+	inline UINT8 get_b() const
 	{
 		return m_b;
 	}

-	inline INT32 get_a32()
+	inline INT32 get_a32() const
 	{
 		return m_a;
 	}

-	inline INT32 get_r32()
+	inline INT32 get_r32() const
 	{
 		return m_r;
 	}

-	inline INT32 get_g32()
+	inline INT32 get_g32() const
 	{
 		return m_g;
 	}

-	inline INT32 get_b32()
+	inline INT32 get_b32() const
 	{
 		return m_b;
 	}
--- a/src/emu/video/rgbsse.h
+++ b/src/emu/video/rgbsse.h
@ -108,42 +108,42 @@ public:
 		m_value = _mm_or_si128(_mm_and_si128(m_value, blue_mask()), _mm_set_epi32(0, 0, 0, value));
 	}

-	inline UINT8 get_a()
+	inline UINT8 get_a() const
 	{
 		return _mm_extract_epi16(m_value, 6);
 	}

-	inline UINT8 get_r()
+	inline UINT8 get_r() const
 	{
 		return _mm_extract_epi16(m_value, 4);
 	}

-	inline UINT8 get_g()
+	inline UINT8 get_g() const
 	{
 		return _mm_extract_epi16(m_value, 2);
 	}

-	inline UINT8 get_b()
+	inline UINT8 get_b() const
 	{
 		return _mm_extract_epi16(m_value, 0);
 	}

-	inline INT32 get_a32()
+	inline INT32 get_a32() const
 	{
 		return (_mm_extract_epi16(m_value, 7) << 16) | _mm_extract_epi16(m_value, 6);
 	}

-	inline INT32 get_r32()
+	inline INT32 get_r32() const
 	{
 		return (_mm_extract_epi16(m_value, 5) << 16) | _mm_extract_epi16(m_value, 4);
 	}

-	inline INT32 get_g32()
+	inline INT32 get_g32() const
 	{
 		return (_mm_extract_epi16(m_value, 3) << 16) | _mm_extract_epi16(m_value, 2);
 	}

-	inline INT32 get_b32()
+	inline INT32 get_b32() const
 	{
 		return (_mm_extract_epi16(m_value, 1) << 16) | _mm_extract_epi16(m_value, 0);
 	}
@ -173,7 +173,19 @@ public:

 	inline void shl(const rgbaint_t& shift)
 	{
-		m_value = _mm_sll_epi32(m_value, shift.m_value);
+		rgbaint_t areg(*this);
+		rgbaint_t rreg(*this);
+		rgbaint_t greg(*this);
+		rgbaint_t breg(*this);
+		rgbaint_t ashift(0, 0, 0, shift.get_a32());
+		rgbaint_t rshift(0, 0, 0, shift.get_r32());
+		rgbaint_t gshift(0, 0, 0, shift.get_g32());
+		rgbaint_t bshift(0, 0, 0, shift.get_b32());
+		areg.m_value = _mm_sll_epi32(areg.m_value, ashift.m_value);
+		rreg.m_value = _mm_sll_epi32(rreg.m_value, rshift.m_value);
+		greg.m_value = _mm_sll_epi32(greg.m_value, gshift.m_value);
+		breg.m_value = _mm_sll_epi32(breg.m_value, bshift.m_value);
+		set(areg.get_a32(), rreg.get_r32(), greg.get_g32(), breg.get_b32());
 	}

 	inline void shl_imm(const UINT8 shift)
@ -183,7 +195,19 @@ public:

 	inline void shr(const rgbaint_t& shift)
 	{
-		m_value = _mm_srl_epi32(m_value, shift.m_value);
+		rgbaint_t areg(*this);
+		rgbaint_t rreg(*this);
+		rgbaint_t greg(*this);
+		rgbaint_t breg(*this);
+		rgbaint_t ashift(0, 0, 0, shift.get_a32());
+		rgbaint_t rshift(0, 0, 0, shift.get_r32());
+		rgbaint_t gshift(0, 0, 0, shift.get_g32());
+		rgbaint_t bshift(0, 0, 0, shift.get_b32());
+		areg.m_value = _mm_srl_epi32(areg.m_value, ashift.m_value);
+		rreg.m_value = _mm_srl_epi32(rreg.m_value, rshift.m_value);
+		greg.m_value = _mm_srl_epi32(greg.m_value, gshift.m_value);
+		breg.m_value = _mm_srl_epi32(breg.m_value, bshift.m_value);
+		set(areg.get_a32(), rreg.get_r32(), greg.get_g32(), breg.get_b32());
 	}

 	inline void shr_imm(const UINT8 shift)
@ -193,7 +217,19 @@ public:

 	inline void sra(const rgbaint_t& shift)
 	{
-		m_value = _mm_sra_epi32(m_value, shift.m_value);
+		rgbaint_t areg(*this);
+		rgbaint_t rreg(*this);
+		rgbaint_t greg(*this);
+		rgbaint_t breg(*this);
+		rgbaint_t ashift(0, 0, 0, shift.get_a32());
+		rgbaint_t rshift(0, 0, 0, shift.get_r32());
+		rgbaint_t gshift(0, 0, 0, shift.get_g32());
+		rgbaint_t bshift(0, 0, 0, shift.get_b32());
+		areg.m_value = _mm_sra_epi32(areg.m_value, ashift.m_value);
+		rreg.m_value = _mm_sra_epi32(rreg.m_value, rshift.m_value);
+		greg.m_value = _mm_sra_epi32(greg.m_value, gshift.m_value);
+		breg.m_value = _mm_sra_epi32(breg.m_value, bshift.m_value);
+		set(areg.get_a32(), rreg.get_r32(), greg.get_g32(), breg.get_b32());
 	}

 	inline void sra_imm(const UINT8 shift)
--- a/src/emu/video/rgbvmx.h
+++ b/src/emu/video/rgbvmx.h
@ -140,56 +140,56 @@ public:
 		m_value = vec_perm(m_value, temp, blue_perm);
 	}

-	inline UINT8 get_a()
+	inline UINT8 get_a() const
 	{
 		UINT8 result;
 		vec_ste(vec_splat((VECU8)m_value, 3), 0, &result);
 		return result;
 	}

-	inline UINT8 get_r()
+	inline UINT8 get_r() const
 	{
 		UINT8 result;
 		vec_ste(vec_splat((VECU8)m_value, 7), 0, &result);
 		return result;
 	}

-	inline UINT8 get_g()
+	inline UINT8 get_g() const
 	{
 		UINT8 result;
 		vec_ste(vec_splat((VECU8)m_value, 11), 0, &result);
 		return result;
 	}

-	inline UINT8 get_b()
+	inline UINT8 get_b() const
 	{
 		UINT8 result;
 		vec_ste(vec_splat((VECU8)m_value, 15), 0, &result);
 		return result;
 	}

-	inline INT32 get_a32()
+	inline INT32 get_a32() const
 	{
 		INT32 result;
 		vec_ste(vec_splat(m_value, 0), 0, &result);
 		return result;
 	}

-	inline INT32 get_r32()
+	inline INT32 get_r32() const
 	{
 		INT32 result;
 		vec_ste(vec_splat(m_value, 1), 0, &result);
 		return result;
 	}

-	inline INT32 get_g32()
+	inline INT32 get_g32() const
 	{
 		INT32 result;
 		vec_ste(vec_splat(m_value, 2), 0, &result);
 		return result;
 	}

-	inline INT32 get_b32()
+	inline INT32 get_b32() const
 	{
 		INT32 result;
 		vec_ste(vec_splat(m_value, 3), 0, &result);
--- a/src/mame/video/n64.c
+++ b/src/mame/video/n64.c
@ -2660,6 +2660,7 @@ void n64_rdp::cmd_load_tile(UINT32 w1, UINT32 w2)
    }
    topad = 0; // ????
 */
+
 	switch (m_misc_state.m_ti_size)
 	{
 		case PIXEL_SIZE_8BIT:
@ -2755,7 +2756,6 @@ void n64_rdp::cmd_load_tile(UINT32 w1, UINT32 w2)
 void n64_rdp::cmd_set_tile(UINT32 w1, UINT32 w2)
 {
 	//wait("SetTile");
-
 	const INT32 tilenum = (w2 >> 24) & 0x7;
 	n64_tile_t* tex_tile = &m_tiles[tilenum];

@ -2784,7 +2784,8 @@ void n64_rdp::cmd_set_tile(UINT32 w1, UINT32 w2)
 	tex_tile->clamp_t = tex_tile->ct || !tex_tile->mask_t;
 	tex_tile->mm = rgbaint_t(tex_tile->ms ? ~0 : 0, tex_tile->ms ? ~0 : 0, tex_tile->mt ? ~0 : 0, tex_tile->mt ? ~0 : 0);
 	tex_tile->invmm = rgbaint_t(tex_tile->ms ? 0 : ~0, tex_tile->ms ? 0 : ~0, tex_tile->mt ? 0 : ~0, tex_tile->mt ? 0 : ~0);
-	tex_tile->mask = rgbaint_t(tex_tile->mask_s, tex_tile->mask_s, tex_tile->mask_t, tex_tile->mask_t);
+	tex_tile->mask = rgbaint_t(tex_tile->mask_s ? ~0 : 0, tex_tile->mask_s ? ~0 : 0, tex_tile->mask_t ? ~0 : 0, tex_tile->mask_t ? ~0 : 0);
+	tex_tile->invmask = rgbaint_t(tex_tile->mask_s ? 0 : ~0, tex_tile->mask_s ? 0 : ~0, tex_tile->mask_t ? 0 : ~0, tex_tile->mask_t ? 0 : ~0);
 	tex_tile->lshift = rgbaint_t(tex_tile->lshift_s, tex_tile->lshift_s, tex_tile->lshift_t, tex_tile->lshift_t);
 	tex_tile->rshift = rgbaint_t(tex_tile->rshift_s, tex_tile->rshift_s, tex_tile->rshift_t, tex_tile->rshift_t);
 	tex_tile->clamp_st = rgbaint_t(tex_tile->clamp_s ? ~0 : 0, tex_tile->clamp_s ? ~0 : 0, tex_tile->clamp_t ? ~0 : 0, tex_tile->clamp_t ? ~0 : 0);
@ -2809,7 +2810,6 @@ void n64_rdp::cmd_set_tile(UINT32 w1, UINT32 w2)
 void n64_rdp::cmd_fill_rect(UINT32 w1, UINT32 w2)
 {
 	//if(m_pending_mode_block) { wait("Block on pending mode-change"); m_pending_mode_block = false; }
-
 	const UINT32 xh = (w2 >> 12) & 0xfff;
 	const UINT32 xl = (w1 >> 12) & 0xfff;
 	const UINT32 yh = (w2 >>  0) & 0xfff;
@ -3041,6 +3041,8 @@ void n64_rdp::process_command_list()

 n64_rdp::n64_rdp(n64_state &state) : poly_manager<UINT32, rdp_poly_state, 8, 32000>(state.machine())
 {
+	ignore = false;
+	dolog = false;
 	m_aux_buf_ptr = 0;
 	m_aux_buf = NULL;
 	m_pipe_clean = true;
@ -3055,11 +3057,6 @@ n64_rdp::n64_rdp(n64_state &state) : poly_manager<UINT32, rdp_poly_state, 8, 320
 	m_current = 0;
 	m_status = 0x88;

-	for (INT32 i = 0; i < 8; i++)
-	{
-		m_tiles[i].num = i;
-	}
-
 	m_one.set(0xff, 0xff, 0xff, 0xff);
 	m_zero.set(0, 0, 0, 0);

--- a/src/mame/video/n64.h
+++ b/src/mame/video/n64.h
@ -153,6 +153,13 @@ public:

 		memset(m_tiles, 0, 8 * sizeof(n64_tile_t));
 		memset(m_cmd_data, 0, sizeof(m_cmd_data));
+
+		for (INT32 i = 0; i < 8; i++)
+		{
+			m_tiles[i].num = i;
+			m_tiles[i].invmm = rgbaint_t(~0, ~0, ~0, ~0);
+			m_tiles[i].invmask = rgbaint_t(~0, ~0, ~0, ~0);
+		}
 	}

 	void        process_command_list();
@ -381,6 +388,10 @@ private:
 	static const INT32 s_rdp_command_length[];
 	static const char* s_image_format[];
 	static const char* s_image_size[];
+
+public:
+	bool ignore;
+	bool dolog;
 };

 #endif // _VIDEO_N64_H_
--- a/src/mame/video/n64types.h
+++ b/src/mame/video/n64types.h
@ -121,7 +121,14 @@ struct n64_tile_t
 	INT32 wrapped_mask_s, wrapped_mask_t;
 	bool clamp_s, clamp_t;
 	rgbaint_t mm, invmm;
-	rgbaint_t wrapped_mask, mask, lshift, rshift, sth, stl, clamp_st;
+	rgbaint_t wrapped_mask;
+	rgbaint_t mask;
+	rgbaint_t invmask;
+	rgbaint_t lshift;
+	rgbaint_t rshift;
+	rgbaint_t sth;
+	rgbaint_t stl;
+	rgbaint_t clamp_st;
 	UINT16 sl, tl, sh, th;      // 10.2 fixed-point, starting and ending texel row / column
 	INT32 num;
 };
--- a/src/mame/video/rdptpipe.c
+++ b/src/mame/video/rdptpipe.c
@ -21,8 +21,6 @@

 #define RELATIVE(x, y)  ((((x) >> 3) - (y)) << 3) | (x & 7);

-#define USE_SIMD (1)
-
 void n64_texture_pipe_t::set_machine(running_machine &machine)
 {
 	n64_state* state = machine.driver_data<n64_state>();
@ -65,34 +63,24 @@ void n64_texture_pipe_t::set_machine(running_machine &machine)
 	m_v1.set(1, 1, 1, 1);
 }

-void n64_texture_pipe_t::mask(rgbaint_t& st, const n64_tile_t& tile)
+void n64_texture_pipe_t::mask(rgbaint_t& sstt, const n64_tile_t& tile)
 {
 	UINT32 s_mask_bits = m_maskbits_table[tile.mask_s];
 	UINT32 t_mask_bits = m_maskbits_table[tile.mask_t];
 	rgbaint_t maskbits(s_mask_bits, s_mask_bits, t_mask_bits, t_mask_bits);

-	rgbaint_t wrap(st);
-	wrap.sra(tile.wrapped_mask);
-	wrap.and_reg(m_v1);
-	wrap.cmpeq(m_v1);
-	wrap.and_reg(tile.mm);
-	st.xor_reg(wrap);
-	st.and_reg(maskbits);
-}
+	rgbaint_t do_wrap(sstt);
+	do_wrap.sra(tile.wrapped_mask);
+	do_wrap.and_reg(m_v1);
+	do_wrap.cmpeq(m_v1);
+	do_wrap.and_reg(tile.mm);

-void n64_texture_pipe_t::mask_coupled(rgbaint_t& sstt, const n64_tile_t& tile)
-{
-	UINT32 s_mask_bits = m_maskbits_table[tile.mask_s];
-	UINT32 t_mask_bits = m_maskbits_table[tile.mask_t];
-	rgbaint_t maskbits(s_mask_bits, s_mask_bits, t_mask_bits, t_mask_bits);
-
-	rgbaint_t wrap(sstt);
-	wrap.sra(tile.wrapped_mask);
-	wrap.and_reg(m_v1);
-	wrap.cmpeq(m_v1);
-	wrap.and_reg(tile.mm);
-	sstt.xor_reg(wrap);
-	sstt.and_reg(maskbits);
+	rgbaint_t wrapped(sstt);
+	wrapped.xor_reg(do_wrap);
+	wrapped.and_reg(maskbits);
+	wrapped.and_reg(tile.mask);
+	sstt.and_reg(tile.invmask);
+	sstt.or_reg(wrapped);
 }

 rgbaint_t n64_texture_pipe_t::shift_cycle(rgbaint_t& st, const n64_tile_t& tile)
@ -250,7 +238,7 @@ void n64_texture_pipe_t::cycle_linear(color_t* TEX, color_t* prev, INT32 SSS, IN

 	clamp_cycle(st, stfrac, maxst, tilenum, tile, userdata);

-	mask_coupled(st, tile);
+	mask(st, tile);

 	const UINT32 tbase = tile.tmem + ((tile.line * st.get_b32()) & 0x1ff);

@ -307,7 +295,7 @@ void n64_texture_pipe_t::cycle_linear_lerp(color_t* TEX, color_t* prev, INT32 SS

 	sstt.add(m_st2_add);

-	mask_coupled(sstt, tile);
+	mask(sstt, tile);

 	const UINT32 tbase1 = tile.tmem + ((tile.line * sstt.get_b32()) & 0x1ff);
 	const UINT32 tbase2 = tile.tmem + ((tile.line * sstt.get_g32()) & 0x1ff);
@ -321,10 +309,6 @@ void n64_texture_pipe_t::cycle_linear_lerp(color_t* TEX, color_t* prev, INT32 SS
 		invstf.subr_imm(0x20);
 		invstf.shl_imm(3);
 	}
-	else
-	{
-		invstf.set(0, 0, 0, 0);
-	}

 	stfrac.shl_imm(3);

@ -670,20 +654,20 @@ void n64_texture_pipe_t::calculate_clamp_diffs(UINT32 prim_tile, rdp_span_aux* u
 		{
 			for (INT32 start = 0; start <= 7; start++)
 			{
-				userdata->m_clamp_diff[start].set(0, (tiles[start].sh >> 2) - (tiles[start].sl >> 2), 0, (tiles[start].th >> 2) - (tiles[start].tl >> 2));
+				userdata->m_clamp_diff[start].set((tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2));
 			}
 		}
 		else
 		{
 			const INT32 start = prim_tile;
 			const INT32 end = (prim_tile + 1) & 7;
-			userdata->m_clamp_diff[start].set(0, (tiles[start].sh >> 2) - (tiles[start].sl >> 2), 0, (tiles[start].th >> 2) - (tiles[start].tl >> 2));
-			userdata->m_clamp_diff[end].set(0, (tiles[end].sh >> 2) - (tiles[end].sl >> 2), 0, (tiles[end].th >> 2) - (tiles[end].tl >> 2));
+			userdata->m_clamp_diff[start].set((tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].sh >> 2) - (tiles[start].sl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2), (tiles[start].th >> 2) - (tiles[start].tl >> 2));
+			userdata->m_clamp_diff[end].set((tiles[end].sh >> 2) - (tiles[end].sl >> 2), (tiles[end].sh >> 2) - (tiles[end].sl >> 2), (tiles[end].th >> 2) - (tiles[end].tl >> 2), (tiles[end].th >> 2) - (tiles[end].tl >> 2));
 		}
 	}
 	else//1-cycle or copy
 	{
-		userdata->m_clamp_diff[prim_tile].set(0, (tiles[prim_tile].sh >> 2) - (tiles[prim_tile].sl >> 2), 0, (tiles[prim_tile].th >> 2) - (tiles[prim_tile].tl >> 2));
+		userdata->m_clamp_diff[prim_tile].set((tiles[prim_tile].sh >> 2) - (tiles[prim_tile].sl >> 2), (tiles[prim_tile].sh >> 2) - (tiles[prim_tile].sl >> 2), (tiles[prim_tile].th >> 2) - (tiles[prim_tile].tl >> 2), (tiles[prim_tile].th >> 2) - (tiles[prim_tile].tl >> 2));
 	}
 }

@ -786,7 +770,7 @@ void n64_texture_pipe_t::fetch_yuv(rgbaint_t& out, INT32 s, INT32 t, INT32 tbase
 	u |= ((u & 0x80) << 1);
 	v |= ((v & 0x80) << 1);

-	out.set(y, y, u, v);
+	out.set(y & 0xff, y & 0xff, u & 0xff, v & 0xff);
 }

 void n64_texture_pipe_t::fetch_ci4_tlut0(rgbaint_t& out, INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata)
@ -938,7 +922,7 @@ void n64_texture_pipe_t::fetch_ia8_raw(rgbaint_t& out, INT32 s, INT32 t, INT32 t
 	UINT8 i = p & 0xf0;
 	i |= (i >> 4);

-	out.set((p << 4) | (p & 0xf), i, i, i);
+	out.set(((p << 4) | (p & 0xf)) & 0xff, i, i, i);
 }

 void n64_texture_pipe_t::fetch_ia16_tlut0(rgbaint_t& out, INT32 s, INT32 t, INT32 tbase, INT32 tpal, rdp_span_aux* userdata)
--- a/src/mame/video/rdptpipe.h
+++ b/src/mame/video/rdptpipe.h
@ -27,7 +27,7 @@ class n64_texture_pipe_t

 		n64_texture_pipe_t()
 		{
-			m_maskbits_table[0] = 0x3ff;
+			m_maskbits_table[0] = 0xffff;
 			for(int i = 1; i < 16; i++)
 			{
 				m_maskbits_table[i] = ((UINT16)(0xffff) >> (16 - i)) & 0x3ff;
@ -108,8 +108,7 @@ class n64_texture_pipe_t
 		bool                m_start_span;

 	private:
-		void                mask(rgbaint_t& st, const n64_tile_t& tile);
-		void                mask_coupled(rgbaint_t& sstt, const n64_tile_t& tile);
+		void                mask(rgbaint_t& sstt, const n64_tile_t& tile);

 		rgbaint_t           shift_cycle(rgbaint_t& st, const n64_tile_t& tile);
 		void                shift_copy(rgbaint_t& st, const n64_tile_t& tile);