rgbsse.h - Added andnot_reg, clamp_to_uint8, bilinear_filter_rgbaint member functions.

rgbgen.h - Added andnot_reg, clamp_to_uint8, bilinear_filter_rgbaint, and += functions. vooddefs.h - Converted rasterizer path to use rgbaint_t types. Switched to new reciplog function. voodoo.c - Converted entire pixel pipe path to use rgbaint_t types.
2025-07-01 16:19:38 +03:00 · 2015-07-03 19:03:22 -06:00 · 2015-07-03 19:03:22 -06:00 · 143bac01bd
commit 143bac01bd
parent 5f6235f080
4 changed files with 489 additions and 252 deletions
--- a/src/emu/video/rgbgen.h
+++ b/src/emu/video/rgbgen.h
@ -283,6 +283,11 @@ public:
 		and_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
 	}

+	inline void andnot_reg(const rgbaint_t& color)
+	{
+		and_imm_rgba(~color.m_a, ~color.m_r, ~color.m_g, ~color.m_b);
+	}
+
 	inline void and_imm(const INT32 imm)
 	{
 		and_imm_rgba(imm, imm, imm, imm);
@ -334,6 +339,14 @@ public:
 		m_b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b;
 	}

+	inline void clamp_to_uint8()
+	{
+		m_a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a;
+		m_r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r;
+		m_g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g;
+		m_b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b;
+	}
+
 	inline void sign_extend(const UINT32 compare, const UINT32 sign)
 	{
 		if ((m_a & compare) == compare)
@ -427,6 +440,24 @@ public:
 		return *this;
 	}

+	inline rgbaint_t& operator+=(const rgbaint_t& other)
+	{
+		m_a += other.m_a;
+		m_r += other.m_r;
+		m_g += other.m_g;
+		m_b += other.m_b;
+		return *this;
+	}
+
+	inline rgbaint_t& operator+=(const INT32 other)
+	{
+		m_a += other;
+		m_r += other;
+		m_g += other;
+		m_b += other;
+		return *this;
+	}
+
 	static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
 	{
 		UINT32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
@ -446,6 +477,26 @@ public:
 		return ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff);
 	}

+	inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
+	{
+		UINT32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
+		UINT32 rb1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8);
+
+		rgb00 >>= 8;
+		rgb01 >>= 8;
+		rgb10 >>= 8;
+		rgb11 >>= 8;
+
+		UINT32 ag0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
+		UINT32 ag1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8);
+
+		rb0 = (rb0 & 0x00ff00ff) + ((((rb1 & 0x00ff00ff) - (rb0 & 0x00ff00ff)) * v) >> 8);
+		ag0 = (ag0 & 0x00ff00ff) + ((((ag1 & 0x00ff00ff) - (ag0 & 0x00ff00ff)) * v) >> 8);
+
+		UINT32 result = ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff);
+		this->set(result);
+	}
+
 protected:
 	INT32 m_a;
 	INT32 m_r;
--- a/src/emu/video/rgbsse.h
+++ b/src/emu/video/rgbsse.h
@ -221,6 +221,11 @@ public:
 		m_value = _mm_and_si128(m_value, color.m_value);
 	}

+	inline void andnot_reg(const rgbaint_t& color)
+	{
+		m_value = _mm_andnot_si128(color.m_value, m_value);
+	}
+
 	inline void and_imm(const INT32 value)
 	{
 		m_value = _mm_and_si128(m_value, _mm_set1_epi32(value));
@ -256,6 +261,14 @@ public:
 		m_value = _mm_or_si128(_mm_and_si128(vsign, mask), _mm_and_si128(m_value, _mm_xor_si128(mask, _mm_set1_epi32(0xffffffff))));
 	}

+	inline void clamp_to_uint8()
+	{
+		m_value = _mm_packs_epi32(m_value, _mm_setzero_si128());
+		m_value = _mm_packus_epi16(m_value, _mm_setzero_si128());
+		m_value = _mm_unpacklo_epi8(m_value, _mm_setzero_si128());
+		m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128());
+	}
+
 	inline void sign_extend(const UINT32 compare, const UINT32 sign)
 	{
 		__m128i compare_vec = _mm_set1_epi32(compare);
@ -414,6 +427,27 @@ public:
 		return _mm_cvtsi128_si32(color01);
 	}

+	inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
+	{
+		__m128i color00 = _mm_cvtsi32_si128(rgb00);
+		__m128i color01 = _mm_cvtsi32_si128(rgb01);
+		__m128i color10 = _mm_cvtsi32_si128(rgb10);
+		__m128i color11 = _mm_cvtsi32_si128(rgb11);
+
+		/* interleave color01 and color00 at the byte level */
+		color01 = _mm_unpacklo_epi8(color01, color00);
+		color11 = _mm_unpacklo_epi8(color11, color10);
+		color01 = _mm_unpacklo_epi8(color01, _mm_setzero_si128());
+		color11 = _mm_unpacklo_epi8(color11, _mm_setzero_si128());
+		color01 = _mm_madd_epi16(color01, scale_factor(u));
+		color11 = _mm_madd_epi16(color11, scale_factor(u));
+		color01 = _mm_slli_epi32(color01, 15);
+		color11 = _mm_srli_epi32(color11, 1);
+		color01 = _mm_max_epi16(color01, color11);
+		color01 = _mm_madd_epi16(color01, scale_factor(v));
+		m_value = _mm_srli_epi32(color01, 15);
+	}
+
 protected:
 	struct _statics
 	{
--- a/src/emu/video/vooddefs.h
+++ b/src/emu/video/vooddefs.h
--- a/src/emu/video/voodoo.c
+++ b/src/emu/video/voodoo.c
@ -1458,8 +1458,14 @@ INLINE INT32 prepare_tmu(tmu_state *t)
 	/* adjust the result: negative to get the log of the original value */
 	/* plus 12 to account for the extra exponent, and divided by 2 to */
 	/* get the log of the square root of texdx */
-	(void)fast_reciplog(texdx, &lodbase);
-	return (-lodbase + (12 << 8)) / 2;
+	#if USE_FAST_RECIP == 1
+		(void)fast_reciplog(texdx, &lodbase);
+		return (-lodbase + (12 << 8)) / 2;
+	#else
+		double tmpTex = texdx;
+		lodbase = new_log2(tmpTex);
+		return (lodbase + (12 << 8)) / 2;
+	#endif
 }


@ -3303,8 +3309,6 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask)
 					iterw = (UINT32) sw[pix] << 16;
 				}
 				INT32 iterz = sw[pix] << 12;
-				rgb_union color;
-				rgb_union iterargb = { 0 };

 				/* apply clipping */
 				if (FBZMODE_ENABLE_CLIPPING(v->reg[fbzMode].u))
@ -3319,6 +3323,13 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask)
 						goto nextpixel;
 					}
 				}
+				#if USE_OLD_RASTER == 1
+					rgb_union color;
+					rgb_union iterargb = { 0 };
+				#else
+					rgbaint_t color, preFog;
+					rgbaint_t iterargb(0);
+				#endif

 				/* pixel pipeline part 1 handles depth testing and stippling */
 				//PIXEL_PIPELINE_BEGIN(v, stats, x, y, v->reg[fbzColorPath].u, v->reg[fbzMode].u, iterz, iterw);
@ -3362,35 +3373,38 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask)
 				// Depth testing value for lfb pipeline writes is directly from write data, no biasing is used
 				fogdepth = biasdepth = (UINT32) sw[pix];

-				/* use the RGBA we stashed above */
-				color.rgb.r = r = sr[pix];
-				color.rgb.g = g = sg[pix];
-				color.rgb.b = b = sb[pix];
-				color.rgb.a = a = sa[pix];
-
-				if (USE_OLD_RASTER) {
+				#if USE_OLD_RASTER == 1
 					/* Perform depth testing */
 					DEPTH_TEST(v, stats, x, v->reg[fbzMode].u);

+					/* use the RGBA we stashed above */
+					color.rgb.r = r = sr[pix];
+					color.rgb.g = g = sg[pix];
+					color.rgb.b = b = sb[pix];
+					color.rgb.a = a = sa[pix];
+
 					/* apply chroma key, alpha mask, and alpha testing */
 					APPLY_CHROMAKEY(v, stats, v->reg[fbzMode].u, color);
 					APPLY_ALPHAMASK(v, stats, v->reg[fbzMode].u, color.rgb.a);
 					APPLY_ALPHATEST(v, stats, v->reg[alphaMode].u, color.rgb.a);
-				} else {
+				#else
 					/* Perform depth testing */
 					if (!depthTest((UINT16) v->reg[zaColor].u, stats, depth[x], v->reg[fbzMode].u, biasdepth))
 						goto nextpixel;

+					/* use the RGBA we stashed above */
+					color.set(sa[pix], sr[pix], sg[pix], sb[pix]);
+
 					/* handle chroma key */
 					if (!chromaKeyTest(v, stats, v->reg[fbzMode].u, color))
 						goto nextpixel;
 					/* handle alpha mask */
-					if (!alphaMaskTest(stats, v->reg[fbzMode].u, color.rgb.a))
+					if (!alphaMaskTest(stats, v->reg[fbzMode].u, color.get_a()))
 						goto nextpixel;
 					/* handle alpha test */
-					if (!alphaTest(v, stats, v->reg[alphaMode].u, color.rgb.a))
+					if (!alphaTest(v, stats, v->reg[alphaMode].u, color.get_a()))
 						goto nextpixel;
-				}
+				#endif

 				/* wait for any outstanding work to finish */
 				poly_wait(v->poly, "LFB Write");
@ -6496,5 +6510,6 @@ RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x00000000, 0x00010FF9, 0x000000C4, 0x
 RASTERIZER_ENTRY( 0x00002425, 0x00045119, 0x000000C1, 0x00010FF9, 0x00000ACD, 0x0C261ACD ) /*   67     1962      14755 */
 RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010FF9, 0x000000C4, 0x0C261ACD ) /* * 66       74       3951 */
 RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x00000000, 0x00010FF9, 0x00000ACD, 0x04221AC9 ) /*   70      374       3691 */
+RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010FF9, 0x00000ACD, 0x0C261ACD ) /* * 20      350       7928 */

 #endif