diff --git a/src/emu/video/rgbgen.h b/src/emu/video/rgbgen.h index a96a4cbfb6d..f4813b0d624 100644 --- a/src/emu/video/rgbgen.h +++ b/src/emu/video/rgbgen.h @@ -283,6 +283,11 @@ public: and_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b); } + inline void andnot_reg(const rgbaint_t& color) + { + and_imm_rgba(~color.m_a, ~color.m_r, ~color.m_g, ~color.m_b); + } + inline void and_imm(const INT32 imm) { and_imm_rgba(imm, imm, imm, imm); @@ -334,6 +339,14 @@ public: m_b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b; } + inline void clamp_to_uint8() + { + m_a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a; + m_r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r; + m_g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g; + m_b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b; + } + inline void sign_extend(const UINT32 compare, const UINT32 sign) { if ((m_a & compare) == compare) @@ -427,6 +440,24 @@ public: return *this; } + inline rgbaint_t& operator+=(const rgbaint_t& other) + { + m_a += other.m_a; + m_r += other.m_r; + m_g += other.m_g; + m_b += other.m_b; + return *this; + } + + inline rgbaint_t& operator+=(const INT32 other) + { + m_a += other; + m_r += other; + m_g += other; + m_b += other; + return *this; + } + static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v) { UINT32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8); @@ -446,6 +477,26 @@ public: return ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff); } + inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v) + { + UINT32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8); + UINT32 rb1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8); + + rgb00 >>= 8; + rgb01 >>= 8; + rgb10 >>= 8; + rgb11 >>= 8; + + UINT32 ag0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8); + UINT32 ag1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8); + + rb0 = (rb0 & 0x00ff00ff) + ((((rb1 & 0x00ff00ff) - (rb0 & 0x00ff00ff)) * v) >> 8); + ag0 = (ag0 & 0x00ff00ff) + ((((ag1 & 0x00ff00ff) - (ag0 & 0x00ff00ff)) * v) >> 8); + + UINT32 result = ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff); + this->set(result); + } + protected: INT32 m_a; INT32 m_r; diff --git a/src/emu/video/rgbsse.h b/src/emu/video/rgbsse.h index 4feb83a7c59..f51f77f5c47 100644 --- a/src/emu/video/rgbsse.h +++ b/src/emu/video/rgbsse.h @@ -221,6 +221,11 @@ public: m_value = _mm_and_si128(m_value, color.m_value); } + inline void andnot_reg(const rgbaint_t& color) + { + m_value = _mm_andnot_si128(color.m_value, m_value); + } + inline void and_imm(const INT32 value) { m_value = _mm_and_si128(m_value, _mm_set1_epi32(value)); @@ -256,6 +261,14 @@ public: m_value = _mm_or_si128(_mm_and_si128(vsign, mask), _mm_and_si128(m_value, _mm_xor_si128(mask, _mm_set1_epi32(0xffffffff)))); } + inline void clamp_to_uint8() + { + m_value = _mm_packs_epi32(m_value, _mm_setzero_si128()); + m_value = _mm_packus_epi16(m_value, _mm_setzero_si128()); + m_value = _mm_unpacklo_epi8(m_value, _mm_setzero_si128()); + m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128()); + } + inline void sign_extend(const UINT32 compare, const UINT32 sign) { __m128i compare_vec = _mm_set1_epi32(compare); @@ -414,6 +427,27 @@ public: return _mm_cvtsi128_si32(color01); } + inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v) + { + __m128i color00 = _mm_cvtsi32_si128(rgb00); + __m128i color01 = _mm_cvtsi32_si128(rgb01); + __m128i color10 = _mm_cvtsi32_si128(rgb10); + __m128i color11 = _mm_cvtsi32_si128(rgb11); + + /* interleave color01 and color00 at the byte level */ + color01 = _mm_unpacklo_epi8(color01, color00); + color11 = _mm_unpacklo_epi8(color11, color10); + color01 = _mm_unpacklo_epi8(color01, _mm_setzero_si128()); + color11 = _mm_unpacklo_epi8(color11, _mm_setzero_si128()); + color01 = _mm_madd_epi16(color01, scale_factor(u)); + color11 = _mm_madd_epi16(color11, scale_factor(u)); + color01 = _mm_slli_epi32(color01, 15); + color11 = _mm_srli_epi32(color11, 1); + color01 = _mm_max_epi16(color01, color11); + color01 = _mm_madd_epi16(color01, scale_factor(v)); + m_value = _mm_srli_epi32(color01, 15); + } + protected: struct _statics { diff --git a/src/emu/video/vooddefs.h b/src/emu/video/vooddefs.h index bb320bf237e..efa82284350 100644 --- a/src/emu/video/vooddefs.h +++ b/src/emu/video/vooddefs.h @@ -27,7 +27,7 @@ enum #define USE_OLD_RASTER 0 // Use old table lookup versus straight double divide -#define USE_FAST_RECIP 1 +#define USE_FAST_RECIP 0 /* maximum number of TMUs */ #define MAX_TMU 2 @@ -2199,33 +2199,35 @@ do } \ while (0) -// NB: This code should no longer be SSE2-specific now that it uses rgbaint_t, consider removing the #define and the #else case. -INLINE UINT32 ATTR_FORCE_INLINE clampARGB(INT32 iterr, INT32 iterg, INT32 iterb, INT32 itera, UINT32 FBZCP) +INLINE rgbaint_t ATTR_FORCE_INLINE clampARGB(const rgbaint_t &iterargb, UINT32 FBZCP) { - rgb_t result; - rgbaint_t colorint((INT32) (itera>>12), (INT32) (iterr>>12), (INT32) (iterg>>12), (INT32) (iterb>>12)); + rgbaint_t result(iterargb); + //rgbaint_t colorint((INT32) (itera>>12), (INT32) (iterr>>12), (INT32) (iterg>>12), (INT32) (iterb>>12)); + result.shr_imm(12); if (FBZCP_RGBZW_CLAMP(FBZCP) == 0) { //r &= 0xfff; - colorint.and_imm(0xfff); + result.and_imm(0xfff); //if (r == 0xfff) - rgbaint_t temp(colorint); + rgbaint_t temp(result); temp.cmpeq_imm(0xfff); // result.rgb.r = 0; - temp.xor_imm(0xffffffff); - colorint.and_reg(temp); + result.andnot_reg(temp); //else if (r == 0x100) - temp.set(colorint); + temp.set(result); temp.cmpeq_imm(0x100); + // Shift by 1 so that INT32 result is not negative + temp.shr_imm(1); // result.rgb.r = 0xff; - colorint.or_reg(temp); - return colorint.to_rgba(); + result.or_reg(temp); } else { - return colorint.to_rgba_clamp(); + //return colorint.to_rgba_clamp(); } + result.clamp_to_uint8(); + return result; } #define CLAMPED_Z(ITERZ, FBZCP, RESULT) \ @@ -2344,10 +2346,12 @@ do } \ while (0) -INLINE bool ATTR_FORCE_INLINE chromaKeyTest(voodoo_state *v, stats_block *stats, UINT32 fbzModeReg, rgb_union color) +INLINE bool ATTR_FORCE_INLINE chromaKeyTest(voodoo_state *v, stats_block *stats, UINT32 fbzModeReg, rgbaint_t rgbaIntColor) { if (FBZMODE_ENABLE_CHROMAKEY(fbzModeReg)) { + rgb_union color; + color.u = (rgbaIntColor.get_a()<<24) | (rgbaIntColor.get_r()<<16) | (rgbaIntColor.get_g()<<8) | rgbaIntColor.get_b(); /* non-range version */ if (!CHROMARANGE_ENABLE(v->reg[chromaRange].u)) { @@ -2746,7 +2750,7 @@ do } \ while (0) -INLINE void ATTR_FORCE_INLINE alphaBlend(UINT32 FBZMODE, UINT32 ALPHAMODE, INT32 x, const UINT8 *dither, int dpix, UINT16 *depth, rgb_union preFog, rgb_union &color) +INLINE void ATTR_FORCE_INLINE alphaBlend(UINT32 FBZMODE, UINT32 ALPHAMODE, INT32 x, const UINT8 *dither, int dpix, UINT16 *depth, rgbaint_t &preFog, rgbaint_t &srcColor) { if (ALPHAMODE_ALPHABLEND(ALPHAMODE)) { @@ -2758,7 +2762,7 @@ INLINE void ATTR_FORCE_INLINE alphaBlend(UINT32 FBZMODE, UINT32 ALPHAMODE, INT32 //int sg = (GG); //int sb = (BB); //int sa = (AA); - int sa = color.rgb.a; + int sa = srcColor.get_a(); int ta; int srcAlphaScale, destAlphaScale; rgbaint_t srcScale, destScale; @@ -2791,7 +2795,8 @@ INLINE void ATTR_FORCE_INLINE alphaBlend(UINT32 FBZMODE, UINT32 ALPHAMODE, INT32 break; case 1: /* ASRC_ALPHA */ - srcScale.set(srcAlphaScale, sa, sa, sa); + srcScale.set(srcAlphaScale-1, sa, sa, sa); + srcScale.add_imm(1); //(RR) = (sr * (sa + 1)) >> 8; //(GG) = (sg * (sa + 1)) >> 8; //(BB) = (sb * (sa + 1)) >> 8; @@ -2842,6 +2847,7 @@ INLINE void ATTR_FORCE_INLINE alphaBlend(UINT32 FBZMODE, UINT32 ALPHAMODE, INT32 case 15: /* ASATURATE */ ta = (sa < (0x100 - da)) ? sa : (0x100 - da); + ta++; srcScale.set(srcAlphaScale, ta, ta, ta); //(RR) = (sr * (ta + 1)) >> 8; //(GG) = (sg * (ta + 1)) >> 8; @@ -2864,7 +2870,7 @@ INLINE void ATTR_FORCE_INLINE alphaBlend(UINT32 FBZMODE, UINT32 ALPHAMODE, INT32 break; case 1: /* ASRC_ALPHA */ - destScale.set(destAlphaScale, sa, sa, sa); + destScale.set(destAlphaScale-1, sa, sa, sa); destScale.add_imm(1); //(RR) += (dr * (sa + 1)) >> 8; //(GG) += (dg * (sa + 1)) >> 8; @@ -2872,8 +2878,9 @@ INLINE void ATTR_FORCE_INLINE alphaBlend(UINT32 FBZMODE, UINT32 ALPHAMODE, INT32 break; case 2: /* A_COLOR */ - destScale.set((rgb_t) (((destAlphaScale-1)<<24) | (color.u & 0x00ffffff))); + destScale.set(srcColor); destScale.add_imm(1); + destScale.set_a(destAlphaScale); //(RR) += (dr * (sr + 1)) >> 8; //(GG) += (dg * (sg + 1)) >> 8; //(BB) += (db * (sb + 1)) >> 8; @@ -2903,7 +2910,10 @@ INLINE void ATTR_FORCE_INLINE alphaBlend(UINT32 FBZMODE, UINT32 ALPHAMODE, INT32 break; case 6: /* AOM_COLOR */ - destScale.set(destAlphaScale, (0x100 - color.rgb.r), (0x100 - color.rgb.g), (0x100 - color.rgb.b)); + destScale.set(0x100, 0x100, 0x100, 0x100); + destScale.sub(srcColor); + destScale.set_a(destAlphaScale); + //destScale.set(destAlphaScale, (0x100 - color.rgb.r), (0x100 - color.rgb.g), (0x100 - color.rgb.b)); //(RR) += (dr * (0x100 - sr)) >> 8; //(GG) += (dg * (0x100 - sg)) >> 8; //(BB) += (db * (0x100 - sb)) >> 8; @@ -2918,19 +2928,20 @@ INLINE void ATTR_FORCE_INLINE alphaBlend(UINT32 FBZMODE, UINT32 ALPHAMODE, INT32 break; case 15: /* A_COLORBEFOREFOG */ - destScale.set((rgb_t) (((destAlphaScale-1)<<24) | (preFog.u & 0x00ffffff))); + destScale.set(preFog); destScale.add_imm(1); + destScale.set_a(destAlphaScale); + //destScale.set((rgb_t) (((destAlphaScale-1)<<24) | (preFog.u & 0x00ffffff))); + //destScale.add_imm(1); //(RR) += (dr * (prefogr + 1)) >> 8; //(GG) += (dg * (prefogg + 1)) >> 8; //(BB) += (db * (prefogb + 1)) >> 8; break; } // Main blend - rgbaint_t srcColor((rgb_t) color.u); rgbaint_t destColor(da, dr, dg, db); srcColor.scale_add_and_clamp(srcScale, destColor, destScale); - color.u = srcColor.to_rgba(); /* clamp */ //CLAMP((RR), 0x00, 0xff); //CLAMP((GG), 0x00, 0xff); @@ -3057,32 +3068,32 @@ do } \ while (0) -INLINE void ATTR_FORCE_INLINE applyFogging(voodoo_state *v, UINT32 fogModeReg, UINT32 fbzCpReg, INT32 x, const UINT8 *dither4, INT32 fogDepth, rgb_union &color, INT32 iterz, INT64 iterw, rgb_union iterargb) +INLINE void ATTR_FORCE_INLINE applyFogging(voodoo_state *v, UINT32 fogModeReg, UINT32 fbzCpReg, INT32 x, const UINT8 *dither4, INT32 fogDepth, + rgbaint_t &color, INT32 iterz, INT64 iterw, UINT8 itera) { if (FOGMODE_ENABLE_FOG(fogModeReg)) { - UINT32 color_alpha = color.u & 0xff000000; - rgbaint_t tmpA; + UINT32 color_alpha = color.get_a(); /* constant fog bypasses everything else */ - rgb_union fogColorLocal = v->reg[fogColor]; - rgbaint_t tmpB((rgb_t) color.u); + rgbaint_t fogColorLocal(v->reg[fogColor].u); + if (FOGMODE_FOG_CONSTANT(fogModeReg)) { - tmpA.set((rgb_t) fogColorLocal.u); /* if fog_mult is 0, we add this to the original color */ if (FOGMODE_FOG_MULT(fogModeReg) == 0) { - tmpA.add(tmpB); + fogColorLocal.add(color); + fogColorLocal.clamp_to_uint8(); //color += fog; } /* otherwise this just becomes the new color */ - //else - //{ + else + { + //color = fogColorLocal; //color = fog; - //} - color.u = tmpA.to_rgba_clamp(); + } } /* non-constant fog comes from several sources */ else @@ -3091,15 +3102,13 @@ INLINE void ATTR_FORCE_INLINE applyFogging(voodoo_state *v, UINT32 fogModeReg, U /* if fog_add is zero, we start with the fog color */ if (FOGMODE_FOG_ADD(fogModeReg)) - fogColorLocal.u = 0; + fogColorLocal.set(0, 0, 0, 0); //fr = fg = fb = 0; - tmpA.set((rgb_t) fogColorLocal.u); - /* if fog_mult is zero, we subtract the incoming color */ if (!FOGMODE_FOG_MULT(fogModeReg)) { - tmpA.sub(tmpB); + fogColorLocal.sub(color); //fog.rgb -= color.rgb; //fr -= (RR); //fg -= (GG); @@ -3134,7 +3143,7 @@ INLINE void ATTR_FORCE_INLINE applyFogging(voodoo_state *v, UINT32 fogModeReg, U } case 1: /* iterated A */ - fogblend = iterargb.rgb.a; + fogblend = itera; break; case 2: /* iterated Z */ @@ -3156,7 +3165,7 @@ INLINE void ATTR_FORCE_INLINE applyFogging(voodoo_state *v, UINT32 fogModeReg, U /* if fog_mult is 0, we add this to the original color */ if (FOGMODE_FOG_MULT(fogModeReg) == 0) { - tmpA.scale_imm_add_and_clamp((INT32) fogblend, tmpB); + fogColorLocal.scale_imm_add_and_clamp(fogblend, color); //color += fog; //(RR) += fr; //(GG) += fg; @@ -3166,13 +3175,12 @@ INLINE void ATTR_FORCE_INLINE applyFogging(voodoo_state *v, UINT32 fogModeReg, U /* otherwise this just becomes the new color */ else { - tmpA.scale_imm_and_clamp(fogblend); + fogColorLocal.scale_imm_and_clamp(fogblend); //color = fog; //(RR) = fr; //(GG) = fg; //(BB) = fb; } - color.u = tmpA.to_rgba(); } @@ -3180,7 +3188,8 @@ INLINE void ATTR_FORCE_INLINE applyFogging(voodoo_state *v, UINT32 fogModeReg, U //CLAMP((RR), 0x00, 0xff); //CLAMP((GG), 0x00, 0xff); //CLAMP((BB), 0x00, 0xff); - color.u = (color.u & 0x00ffffff) | color_alpha; + fogColorLocal.set_a(color_alpha); + color.set(fogColorLocal); } } @@ -3778,28 +3787,19 @@ INLINE bool ATTR_FORCE_INLINE depthTest(UINT16 zaColorReg, stats_block *stats, I return true; } +#if USE_OLD_RASTER == 1 #define PIXEL_PIPELINE_END(VV, STATS, DITHER, DITHER4, DITHER_LOOKUP, XX, dest, depth, FBZMODE, FBZCOLORPATH, ALPHAMODE, FOGMODE, ITERZ, ITERW, ITERAXXX) \ \ - if (USE_OLD_RASTER) { \ - /* perform fogging */ \ - INT32 prefogr, prefogg, prefogb; \ - prefogr = r; \ - prefogg = g; \ - prefogb = b; \ - APPLY_FOGGING(VV, FOGMODE, FBZCOLORPATH, XX, DITHER4, r, g, b, \ - ITERZ, ITERW, ITERAXXX); \ + /* perform fogging */ \ + INT32 prefogr, prefogg, prefogb; \ + prefogr = r; \ + prefogg = g; \ + prefogb = b; \ + APPLY_FOGGING(VV, FOGMODE, FBZCOLORPATH, XX, DITHER4, r, g, b, \ + ITERZ, ITERW, ITERAXXX); \ \ /* perform alpha blending */ \ APPLY_ALPHA_BLEND(FBZMODE, ALPHAMODE, XX, DITHER, r, g, b, a); \ - } else { \ - /* perform fogging */ \ - rgb_union preFog; \ - preFog.u = color.u; \ - applyFogging(VV, FOGMODE, FBZCOLORPATH, XX, DITHER4, fogdepth, color, ITERZ, ITERW, ITERAXXX); \ - /* perform alpha blending */ \ - alphaBlend(FBZMODE, ALPHAMODE, XX, DITHER, dest[XX], depth, preFog, color); \ - a = color.rgb.a; r = color.rgb.r; g = color.rgb.g; b = color.rgb.b; \ - } \ /* modify the pixel for debugging purposes */ \ MODIFY_PIXEL(VV); \ \ @@ -3827,7 +3827,43 @@ skipdrawdepth: ; \ } \ while (0) - +#else +#define PIXEL_PIPELINE_END(VV, STATS, DITHER, DITHER4, DITHER_LOOKUP, XX, dest, depth, FBZMODE, FBZCOLORPATH, ALPHAMODE, FOGMODE, ITERZ, ITERW, ITERAXXX) \ + \ + /* perform fogging */ \ + preFog.set(color); \ + applyFogging(VV, FOGMODE, FBZCOLORPATH, XX, DITHER4, fogdepth, color, ITERZ, ITERW, ITERAXXX.get_a()); \ + /* perform alpha blending */ \ + alphaBlend(FBZMODE, ALPHAMODE, XX, DITHER, dest[XX], depth, preFog, color); \ + a = color.get_a(); r = color.get_r(); g = color.get_g(); b = color.get_b(); \ + /* modify the pixel for debugging purposes */ \ + MODIFY_PIXEL(VV); \ + \ + /* write to framebuffer */ \ + if (FBZMODE_RGB_BUFFER_MASK(FBZMODE)) \ + { \ + /* apply dithering */ \ + APPLY_DITHER(FBZMODE, XX, DITHER_LOOKUP, r, g, b); \ + dest[XX] = (r << 11) | (g << 5) | b; \ + } \ + \ + /* write to aux buffer */ \ + if (depth && FBZMODE_AUX_BUFFER_MASK(FBZMODE)) \ + { \ + if (FBZMODE_ENABLE_ALPHA_PLANES(FBZMODE) == 0) \ + depth[XX] = biasdepth; \ + else \ + depth[XX] = a; \ + } \ + \ + /* track pixel writes to the frame buffer regardless of mask */ \ + (STATS)->pixels_out++; \ + \ +skipdrawdepth: \ + ; \ +} \ +while (0) +#endif /************************************* @@ -4128,28 +4164,28 @@ do while (0) INLINE bool ATTR_FORCE_INLINE combineColor(voodoo_state *VV, stats_block *STATS, UINT32 FBZCOLORPATH, UINT32 FBZMODE, UINT32 ALPHAMODE, - rgb_union TEXELARGB, INT32 ITERZ, INT64 ITERW, rgb_union ITERARGB, rgb_union &color) + rgbaint_t TEXELARGB, INT32 ITERZ, INT64 ITERW, rgbaint_t &srcColor) { - rgb_union c_other; - rgb_union c_local; + rgbaint_t c_other; + rgbaint_t c_local; /* compute c_other */ switch (FBZCP_CC_RGBSELECT(FBZCOLORPATH)) { case 0: /* iterated RGB */ - c_other.u = ITERARGB.u; + c_other.set(srcColor); break; case 1: /* texture RGB */ - c_other.u = TEXELARGB.u; + c_other.set(TEXELARGB); break; case 2: /* color1 RGB */ - c_other.u = (VV)->reg[color1].u; + c_other.set((VV)->reg[color1].u); break; default: /* reserved - voodoo3 framebufferRGB */ - c_other.u = 0; + c_other.set(0); break; } @@ -4162,24 +4198,24 @@ INLINE bool ATTR_FORCE_INLINE combineColor(voodoo_state *VV, stats_block *STATS, switch (FBZCP_CC_ASELECT(FBZCOLORPATH)) { case 0: /* iterated alpha */ - c_other.rgb.a = ITERARGB.rgb.a; + c_other.merge_alpha(srcColor); break; case 1: /* texture alpha */ - c_other.rgb.a = TEXELARGB.rgb.a; + c_other.merge_alpha(TEXELARGB); break; case 2: /* color1 alpha */ - c_other.rgb.a = (VV)->reg[color1].rgb.a; + c_other.set_a((VV)->reg[color1].rgb.a); break; default: /* reserved */ - c_other.rgb.a = 0; + c_other.set_a(0); break; } /* handle alpha mask */ - if (!alphaMaskTest(STATS, FBZMODE, c_other.rgb.a)) + if (!alphaMaskTest(STATS, FBZMODE, c_other.get_a())) return false; //APPLY_ALPHAMASK(VV, STATS, FBZMODE, c_other.rgb.a); @@ -4188,16 +4224,16 @@ INLINE bool ATTR_FORCE_INLINE combineColor(voodoo_state *VV, stats_block *STATS, if (FBZCP_CC_LOCALSELECT_OVERRIDE(FBZCOLORPATH) == 0) { if (FBZCP_CC_LOCALSELECT(FBZCOLORPATH) == 0) /* iterated RGB */ - c_local.u = ITERARGB.u; + c_local.set(srcColor); else /* color0 RGB */ - c_local.u = (VV)->reg[color0].u; + c_local.set((VV)->reg[color0].u); } else { - if (!(TEXELARGB.rgb.a & 0x80)) /* iterated RGB */ - c_local.u = ITERARGB.u; + if (!(TEXELARGB.get_a() & 0x80)) /* iterated RGB */ + c_local.set(srcColor); else /* color0 RGB */ - c_local.u = (VV)->reg[color0].u; + c_local.set((VV)->reg[color0].u); } /* compute a_local */ @@ -4205,18 +4241,18 @@ INLINE bool ATTR_FORCE_INLINE combineColor(voodoo_state *VV, stats_block *STATS, { default: case 0: /* iterated alpha */ - c_local.rgb.a = ITERARGB.rgb.a; + c_local.merge_alpha(srcColor); break; case 1: /* color0 alpha */ - c_local.rgb.a = (VV)->reg[color0].rgb.a; + c_local.set_a((VV)->reg[color0].rgb.a); break; case 2: /* clamped iterated Z[27:20] */ { int temp; CLAMPED_Z(ITERZ, FBZCOLORPATH, temp); - c_local.rgb.a = (UINT8)temp; + c_local.set_a((UINT8) temp); break; } @@ -4224,42 +4260,37 @@ INLINE bool ATTR_FORCE_INLINE combineColor(voodoo_state *VV, stats_block *STATS, { int temp; CLAMPED_W(ITERW, FBZCOLORPATH, temp); /* Voodoo 2 only */ - c_local.rgb.a = (UINT8)temp; + c_local.set_a((UINT8) temp); break; } } - UINT8 a_other = c_other.rgb.a; - UINT8 a_local = c_local.rgb.a; + UINT8 a_other = c_other.get_a(); + UINT8 a_local = c_local.get_a(); UINT8 tmp; - rgb_union add_val = c_local; - rgbaint_t tmpA, tmpB, tmpC; - + rgbaint_t add_val(c_local); /* select zero or c_other */ if (FBZCP_CC_ZERO_OTHER(FBZCOLORPATH)) - c_other.u &= 0xff000000; + c_other.and_imm_rgba(-1, 0, 0, 0); //r = g = b = 0; /* select zero or a_other */ if (FBZCP_CCA_ZERO_OTHER(FBZCOLORPATH)) - c_other.u &= 0x00ffffff; - - tmpA.set((rgb_t) c_other.u); + c_other.set_a(0); /* subtract a/c_local */ if (FBZCP_CC_SUB_CLOCAL(FBZCOLORPATH) || (FBZCP_CCA_SUB_CLOCAL(FBZCOLORPATH))) { - rgb_union sub_val = c_local; + rgbaint_t sub_val = c_local; if (!FBZCP_CC_SUB_CLOCAL(FBZCOLORPATH)) - sub_val.u &= 0xff000000; + sub_val.set(a_local, 0, 0, 0); if (!FBZCP_CCA_SUB_CLOCAL(FBZCOLORPATH)) - sub_val.u &= 0x00ffffff; + sub_val.set_a(0); - tmpB.set((rgb_t) sub_val.u); - tmpA.sub(tmpB); + c_other.sub(sub_val); } /* blend RGB */ @@ -4267,27 +4298,27 @@ INLINE bool ATTR_FORCE_INLINE combineColor(voodoo_state *VV, stats_block *STATS, { default: /* reserved */ case 0: /* 0 */ - c_local.u &= 0xff000000; + c_local.and_imm_rgba(-1, 0, 0, 0); break; case 1: /* c_local */ break; case 2: /* a_other */ - c_local.u = (c_local.u & 0xff000000) | (a_other<<16) | (a_other<<8) | (a_other); + c_local.set(a_local, a_other, a_other, a_other); break; case 3: /* a_local */ - c_local.u = (c_local.u & 0xff000000) | (a_local<<16) | (a_local<<8) | (a_local); + c_local.set(a_local, a_local, a_local, a_local); break; case 4: /* texture alpha */ - tmp = TEXELARGB.rgb.a; - c_local.u = (c_local.u & 0xff000000) | (tmp<<16) | (tmp<<8) | (tmp); + tmp = TEXELARGB.get_a(); + c_local.set(a_local, tmp, tmp, tmp); break; case 5: /* texture RGB (Voodoo 2 only) */ - c_local.u = (c_local.u & 0xff000000) | (TEXELARGB.u & 0x00ffffff); + c_local.set(TEXELARGB); break; } @@ -4296,30 +4327,30 @@ INLINE bool ATTR_FORCE_INLINE combineColor(voodoo_state *VV, stats_block *STATS, { default: /* reserved */ case 0: /* 0 */ - c_local.u &= 0x00ffffff; + c_local.set_a(0); break; case 1: /* a_local */ case 3: /* a_local */ - c_local.rgb.a = a_local; + c_local.set_a(a_local); break; case 2: /* a_other */ - c_local.rgb.a = a_other; + c_local.set_a(a_other); break; case 4: /* texture alpha */ - c_local.rgb.a = TEXELARGB.rgb.a; + c_local.merge_alpha(TEXELARGB); break; } /* reverse the RGB blend */ if (!FBZCP_CC_REVERSE_BLEND(FBZCOLORPATH)) - c_local.u ^= 0x00ffffff; + c_local.xor_imm_rgba(0, 0xff, 0xff, 0xff); /* reverse the alpha blend */ if (!FBZCP_CCA_REVERSE_BLEND(FBZCOLORPATH)) - c_local.u ^= 0xff000000; + c_local.xor_imm_rgba(0xff, 0, 0, 0); /* do the blend */ //color.rgb.a = (color.rgb.a * (blenda + 1)) >> 8; @@ -4327,48 +4358,46 @@ INLINE bool ATTR_FORCE_INLINE combineColor(voodoo_state *VV, stats_block *STATS, //color.rgb.g = (color.rgb.g * (blendg + 1)) >> 8; //color.rgb.b = (color.rgb.b * (blendb + 1)) >> 8; - /* add clocal or alocal to alpha */ - if (!FBZCP_CCA_ADD_ACLOCAL(FBZCOLORPATH)) - add_val.u &= 0x00ffffff; - //color.rgb.a += c_local.rgb.a; - /* add clocal or alocal to RGB */ switch (FBZCP_CC_ADD_ACLOCAL(FBZCOLORPATH)) { case 3: /* reserved */ case 0: /* nothing */ - add_val.u &= 0xff000000; + add_val.set(a_local, 0, 0, 0); break; case 1: /* add c_local */ break; case 2: /* add_alocal */ - add_val.u = (add_val.u & 0xff000000) | (a_local<<16) | (a_local<<8) | (a_local); + add_val.set(a_local, a_local, a_local, a_local); break; } + /* add clocal or alocal to alpha */ + if (!FBZCP_CCA_ADD_ACLOCAL(FBZCOLORPATH)) + add_val.set_a(0); + //color.rgb.a += c_local.rgb.a; + /* clamp */ //CLAMP(color.rgb.a, 0x00, 0xff); //CLAMP(color.rgb.r, 0x00, 0xff); //CLAMP(color.rgb.g, 0x00, 0xff); //CLAMP(color.rgb.b, 0x00, 0xff); - tmpB.set((rgb_t) c_local.u); - tmpB.add_imm(1); - tmpC.set((rgb_t) add_val.u); - tmpA.scale_add_and_clamp(tmpB, tmpC); - color.u = tmpA.to_rgba(); + c_local.add_imm(1); + c_other.scale_add_and_clamp(c_local, add_val); + srcColor.set(c_other); /* invert */ if (FBZCP_CCA_INVERT_OUTPUT(FBZCOLORPATH)) - color.u ^= 0xff000000; + srcColor.xor_imm_rgba(0xff, 0, 0, 0); /* invert */ if (FBZCP_CC_INVERT_OUTPUT(FBZCOLORPATH)) - color.u ^= 0x00ffffff; + srcColor.xor_imm_rgba(0, 0xff, 0xff, 0xff); /* handle alpha test */ - if (!alphaTest(VV, STATS, ALPHAMODE, color.rgb.a)) + if (!alphaTest(VV, STATS, ALPHAMODE, srcColor.get_a())) return false; //APPLY_ALPHATEST(VV, STATS, ALPHAMODE, color.rgb.a); @@ -4382,7 +4411,7 @@ INLINE bool ATTR_FORCE_INLINE combineColor(voodoo_state *VV, stats_block *STATS, * Rasterizer generator macro * *************************************/ - +#if USE_OLD_RASTER == 1 #define RASTERIZER(name, TMUS, FBZCOLORPATH, FBZMODE, ALPHAMODE, FOGMODE, TEXMODE0, TEXMODE1) \ \ static void raster_##name(void *destbase, INT32 y, const poly_extent *extent, const void *extradata, int threadid) \ @@ -4474,74 +4503,34 @@ static void raster_##name(void *destbase, INT32 y, const poly_extent *extent, co { \ rgb_union iterargb; \ rgb_union texel = { 0 }; \ - rgb_union color; \ \ /* pixel pipeline part 1 handles depth setup and stippling */ \ PIXEL_PIPELINE_BEGIN(v, stats, x, y, FBZCOLORPATH, FBZMODE, iterz, iterw); \ - if (USE_OLD_RASTER) { \ - DEPTH_TEST(v, stats, x, FBZMODE); \ - \ - /* run the texture pipeline on TMU1 to produce a value in texel */ \ - /* note that they set LOD min to 8 to "disable" a TMU */ \ - if (TMUS >= 2 && v->tmu[1].lodmin < (8 << 8)) \ - TEXTURE_PIPELINE(&v->tmu[1], x, dither4, TEXMODE1, texel, \ - v->tmu[1].lookup, extra->lodbase1, \ - iters1, itert1, iterw1, texel); \ - \ - /* run the texture pipeline on TMU0 to produce a final */ \ - /* result in texel */ \ - /* note that they set LOD min to 8 to "disable" a TMU */ \ - if (TMUS >= 1 && v->tmu[0].lodmin < (8 << 8)) \ - { \ - if (!v->send_config) \ - TEXTURE_PIPELINE(&v->tmu[0], x, dither4, TEXMODE0, texel, \ - v->tmu[0].lookup, extra->lodbase0, \ - iters0, itert0, iterw0, texel); \ - else \ - texel.u = v->tmu_config; \ - } \ - /* colorpath pipeline selects source colors and does blending */ \ - CLAMPED_ARGB(iterr, iterg, iterb, itera, FBZCOLORPATH, iterargb); \ - COLORPATH_PIPELINE(v, stats, FBZCOLORPATH, FBZMODE, ALPHAMODE, texel, \ - iterz, iterw, iterargb); \ - } else { \ - /* depth testing */ \ - if (!depthTest((UINT16) v->reg[zaColor].u, stats, depth[x], FBZMODE, biasdepth)) \ - goto skipdrawdepth; \ - \ - /* run the texture pipeline on TMU1 to produce a value in texel */ \ - /* note that they set LOD min to 8 to "disable" a TMU */ \ - if (TMUS >= 2 && v->tmu[1].lodmin < (8 << 8)) { \ - INT32 tmp; \ - const rgb_union texelZero = {0}; \ - texel.u = genTexture(&v->tmu[1], x, dither4, TEXMODE1, v->tmu[1].lookup, extra->lodbase1, \ - iters1, itert1, iterw1, tmp); \ - texel.u = combineTexture(&v->tmu[1], TEXMODE1, texel, texelZero, tmp); \ - } \ - /* run the texture pipeline on TMU0 to produce a final */ \ - /* result in texel */ \ - /* note that they set LOD min to 8 to "disable" a TMU */ \ - if (TMUS >= 1 && v->tmu[0].lodmin < (8 << 8)) \ - { \ - rgb_union texelT0; \ - if (!v->send_config) \ - { \ - INT32 lod0; \ - texelT0.u = genTexture(&v->tmu[0], x, dither4, TEXMODE0, v->tmu[0].lookup, extra->lodbase0, \ - iters0, itert0, iterw0, lod0); \ - texel.u = combineTexture(&v->tmu[0], TEXMODE0, texelT0, texel, lod0); \ - } \ - else \ - { \ - texel.u=v->tmu_config; \ - } \ - } \ - \ - /* colorpath pipeline selects source colors and does blending */ \ - iterargb.u = clampARGB(iterr, iterg, iterb, itera, FBZCOLORPATH); \ - if (!combineColor(v, stats, FBZCOLORPATH, FBZMODE, ALPHAMODE, texel, iterz, iterw, iterargb, color)) \ - goto skipdrawdepth; \ - } \ + DEPTH_TEST(v, stats, x, FBZMODE); \ + \ + /* run the texture pipeline on TMU1 to produce a value in texel */ \ + /* note that they set LOD min to 8 to "disable" a TMU */ \ + if (TMUS >= 2 && v->tmu[1].lodmin < (8 << 8)) \ + TEXTURE_PIPELINE(&v->tmu[1], x, dither4, TEXMODE1, texel, \ + v->tmu[1].lookup, extra->lodbase1, \ + iters1, itert1, iterw1, texel); \ + \ + /* run the texture pipeline on TMU0 to produce a final */ \ + /* result in texel */ \ + /* note that they set LOD min to 8 to "disable" a TMU */ \ + if (TMUS >= 1 && v->tmu[0].lodmin < (8 << 8)) \ + { \ + if (!v->send_config) \ + TEXTURE_PIPELINE(&v->tmu[0], x, dither4, TEXMODE0, texel, \ + v->tmu[0].lookup, extra->lodbase0, \ + iters0, itert0, iterw0, texel); \ + else \ + texel.u = v->tmu_config; \ + } \ + /* colorpath pipeline selects source colors and does blending */ \ + CLAMPED_ARGB(iterr, iterg, iterb, itera, FBZCOLORPATH, iterargb); \ + COLORPATH_PIPELINE(v, stats, FBZCOLORPATH, FBZMODE, ALPHAMODE, texel, \ + iterz, iterw, iterargb); \ \ /* pixel pipeline part 2 handles fog, alpha, and final output */ \ PIXEL_PIPELINE_END(v, stats, dither, dither4, dither_lookup, x, dest, depth, \ @@ -4569,7 +4558,165 @@ static void raster_##name(void *destbase, INT32 y, const poly_extent *extent, co } \ } \ } - +#else +// New rasterizer implementation +#define RASTERIZER(name, TMUS, FBZCOLORPATH, FBZMODE, ALPHAMODE, FOGMODE, TEXMODE0, TEXMODE1) \ + \ +static void raster_##name(void *destbase, INT32 y, const poly_extent *extent, const void *extradata, int threadid) \ +{ \ + const poly_extra_data *extra = (const poly_extra_data *)extradata; \ + voodoo_state *v = extra->state; \ + stats_block *stats = &v->thread_stats[threadid]; \ + DECLARE_DITHER_POINTERS; \ + INT32 startx = extent->startx; \ + INT32 stopx = extent->stopx; \ + rgbaint_t iterargb, iterargbDelta; \ + INT32 iterz; \ + INT64 iterw, iterw0 = 0, iterw1 = 0; \ + INT64 iters0 = 0, iters1 = 0; \ + INT64 itert0 = 0, itert1 = 0; \ + UINT16 *depth; \ + UINT16 *dest; \ + INT32 dx, dy; \ + INT32 scry; \ + INT32 x; \ + \ + /* determine the screen Y */ \ + scry = y; \ + if (FBZMODE_Y_ORIGIN(FBZMODE)) \ + scry = (v->fbi.yorigin - y) & 0x3ff; \ + \ + /* compute dithering */ \ + COMPUTE_DITHER_POINTERS(FBZMODE, y); \ + \ + /* apply clipping */ \ + if (FBZMODE_ENABLE_CLIPPING(FBZMODE)) \ + { \ + INT32 tempclip; \ + \ + /* Y clipping buys us the whole scanline */ \ + if (scry < ((v->reg[clipLowYHighY].u >> 16) & 0x3ff) || \ + scry >= (v->reg[clipLowYHighY].u & 0x3ff)) \ + { \ + stats->pixels_in += stopx - startx; \ + stats->clip_fail += stopx - startx; \ + return; \ + } \ + \ + /* X clipping */ \ + tempclip = (v->reg[clipLeftRight].u >> 16) & 0x3ff; \ + if (startx < tempclip) \ + { \ + stats->pixels_in += tempclip - startx; \ + v->stats.total_clipped += tempclip - startx; \ + startx = tempclip; \ + } \ + tempclip = v->reg[clipLeftRight].u & 0x3ff; \ + if (stopx >= tempclip) \ + { \ + stats->pixels_in += stopx - tempclip; \ + v->stats.total_clipped += stopx - tempclip; \ + stopx = tempclip - 1; \ + } \ + } \ + \ + /* get pointers to the target buffer and depth buffer */ \ + dest = (UINT16 *)destbase + scry * v->fbi.rowpixels; \ + depth = (v->fbi.auxoffs != ~0) ? ((UINT16 *)(v->fbi.ram + v->fbi.auxoffs) + scry * v->fbi.rowpixels) : NULL; \ + \ + /* compute the starting parameters */ \ + dx = startx - (extra->ax >> 4); \ + dy = y - (extra->ay >> 4); \ + INT32 iterr = extra->startr + dy * extra->drdy + dx * extra->drdx; \ + INT32 iterg = extra->startg + dy * extra->dgdy + dx * extra->dgdx; \ + INT32 iterb = extra->startb + dy * extra->dbdy + dx * extra->dbdx; \ + INT32 itera = extra->starta + dy * extra->dady + dx * extra->dadx; \ + iterargb.set(itera, iterr, iterg, iterb); \ + iterargbDelta.set(extra->dadx, extra->drdx, extra->dgdx, extra->dbdx); \ + iterz = extra->startz + dy * extra->dzdy + dx * extra->dzdx; \ + iterw = extra->startw + dy * extra->dwdy + dx * extra->dwdx; \ + if (TMUS >= 1) \ + { \ + iterw0 = extra->startw0 + dy * extra->dw0dy + dx * extra->dw0dx; \ + iters0 = extra->starts0 + dy * extra->ds0dy + dx * extra->ds0dx; \ + itert0 = extra->startt0 + dy * extra->dt0dy + dx * extra->dt0dx; \ + } \ + if (TMUS >= 2) \ + { \ + iterw1 = extra->startw1 + dy * extra->dw1dy + dx * extra->dw1dx; \ + iters1 = extra->starts1 + dy * extra->ds1dy + dx * extra->ds1dx; \ + itert1 = extra->startt1 + dy * extra->dt1dy + dx * extra->dt1dx; \ + } \ + extra->info->hits++; \ + /* loop in X */ \ + for (x = startx; x < stopx; x++) \ + { \ + rgbaint_t texel(0); \ + rgbaint_t color, preFog; \ + \ + /* pixel pipeline part 1 handles depth setup and stippling */ \ + PIXEL_PIPELINE_BEGIN(v, stats, x, y, FBZCOLORPATH, FBZMODE, iterz, iterw); \ + /* depth testing */ \ + if (!depthTest((UINT16) v->reg[zaColor].u, stats, depth[x], FBZMODE, biasdepth)) \ + goto skipdrawdepth; \ + \ + /* run the texture pipeline on TMU1 to produce a value in texel */ \ + /* note that they set LOD min to 8 to "disable" a TMU */ \ + if (TMUS >= 2 && v->tmu[1].lodmin < (8 << 8)) { \ + INT32 tmp; \ + const rgbaint_t texelZero(0); \ + texel = genTexture(&v->tmu[1], x, dither4, TEXMODE1, v->tmu[1].lookup, extra->lodbase1, \ + iters1, itert1, iterw1, tmp); \ + texel = combineTexture(&v->tmu[1], TEXMODE1, texel, texelZero, tmp); \ + } \ + /* run the texture pipeline on TMU0 to produce a final */ \ + /* result in texel */ \ + /* note that they set LOD min to 8 to "disable" a TMU */ \ + if (TMUS >= 1 && v->tmu[0].lodmin < (8 << 8)) \ + { \ + if (!v->send_config) \ + { \ + INT32 lod0; \ + rgbaint_t texelT0; \ + texelT0 = genTexture(&v->tmu[0], x, dither4, TEXMODE0, v->tmu[0].lookup, extra->lodbase0, \ + iters0, itert0, iterw0, lod0); \ + texel = combineTexture(&v->tmu[0], TEXMODE0, texelT0, texel, lod0); \ + } \ + else \ + { \ + texel.set(v->tmu_config); \ + } \ + } \ + \ + /* colorpath pipeline selects source colors and does blending */ \ + color = clampARGB(iterargb, FBZCOLORPATH); \ + if (!combineColor(v, stats, FBZCOLORPATH, FBZMODE, ALPHAMODE, texel, iterz, iterw, color)) \ + goto skipdrawdepth; \ + \ + /* pixel pipeline part 2 handles fog, alpha, and final output */ \ + PIXEL_PIPELINE_END(v, stats, dither, dither4, dither_lookup, x, dest, depth, \ + FBZMODE, FBZCOLORPATH, ALPHAMODE, FOGMODE, \ + iterz, iterw, iterargb); \ + \ + /* update the iterated parameters */ \ + iterargb += iterargbDelta; \ + iterz += extra->dzdx; \ + iterw += extra->dwdx; \ + if (TMUS >= 1) \ + { \ + iterw0 += extra->dw0dx; \ + iters0 += extra->ds0dx; \ + itert0 += extra->dt0dx; \ + } \ + if (TMUS >= 2) \ + { \ + iterw1 += extra->dw1dx; \ + iters1 += extra->ds1dx; \ + itert1 += extra->dt1dx; \ + } \ + } \ +} +#endif // ****************************************************************************************************************************** // Computes a log2 of a 16.32 value to 2 fractional bits of precision. // The return value is coded as a 24.8 value. @@ -4602,9 +4749,9 @@ INLINE void ATTR_FORCE_INLINE multi_reciplog(INT64 valueA, INT64 valueB, INT64 v } -INLINE UINT32 ATTR_FORCE_INLINE genTexture(tmu_state *TT, INT32 x, const UINT8 *dither4, const UINT32 TEXMODE, rgb_t *LOOKUP, INT32 LODBASE, INT64 ITERS, INT64 ITERT, INT64 ITERW, INT32 &lod) +INLINE rgbaint_t ATTR_FORCE_INLINE genTexture(tmu_state *TT, INT32 x, const UINT8 *dither4, const UINT32 TEXMODE, rgb_t *LOOKUP, INT32 LODBASE, INT64 ITERS, INT64 ITERT, INT64 ITERW, INT32 &lod) { - UINT32 result; + rgbaint_t result; INT32 s, t, ilod; /* determine the S/T/LOD values for this texture */ @@ -4679,15 +4826,15 @@ INLINE UINT32 ATTR_FORCE_INLINE genTexture(tmu_state *TT, INT32 x, const UINT8 * if (TEXMODE_FORMAT(TEXMODE) < 8) { texel0 = *(UINT8 *)&(TT)->ram[(texbase + t + s) & (TT)->mask]; - result = (LOOKUP)[texel0]; + result.set((LOOKUP)[texel0]); } else { texel0 = *(UINT16 *)&(TT)->ram[(texbase + 2*(t + s)) & (TT)->mask]; if (TEXMODE_FORMAT(TEXMODE) >= 10 && TEXMODE_FORMAT(TEXMODE) <= 12) - result = (LOOKUP)[texel0]; + result.set((LOOKUP)[texel0]); else - result = ((LOOKUP)[texel0 & 0xff] & 0xffffff) | ((texel0 & 0xff00) << 16); + result.set(((LOOKUP)[texel0 & 0xff] & 0xffffff) | ((texel0 & 0xff00) << 16)); } } else @@ -4788,45 +4935,38 @@ INLINE UINT32 ATTR_FORCE_INLINE genTexture(tmu_state *TT, INT32 x, const UINT8 * /* weigh in each texel */ - result = rgbaint_t::bilinear_filter(texel0, texel1, texel2, texel3, sfrac, tfrac); + result.bilinear_filter_rgbaint(texel0, texel1, texel2, texel3, sfrac, tfrac); } return result; } -INLINE UINT32 ATTR_FORCE_INLINE combineTexture(tmu_state *TT, const UINT32 TEXMODE, rgb_union c_local, rgb_union c_other, INT32 lod) +INLINE rgbaint_t ATTR_FORCE_INLINE combineTexture(tmu_state *TT, const UINT32 TEXMODE, rgbaint_t c_local, rgbaint_t c_other, INT32 lod) { - UINT32 result; - //INT32 blendr, blendg, blendb, blenda; - //INT32 tr, tg, tb, ta; - UINT8 a_other = c_other.rgb.a; - UINT8 a_local = c_local.rgb.a; - rgb_union add_val = c_local; + INT32 a_other = c_other.get_a(); + INT32 a_local = c_local.get_a(); + rgbaint_t add_val = c_local; UINT8 tmp; - rgbaint_t tmpA, tmpB, tmpC; /* select zero/other for RGB */ if (TEXMODE_TC_ZERO_OTHER(TEXMODE)) - c_other.u &= 0xff000000; + c_other.and_imm_rgba(-1, 0, 0, 0); /* select zero/other for alpha */ if (TEXMODE_TCA_ZERO_OTHER(TEXMODE)) - c_other.u &= 0x00ffffff; - - tmpA.set((rgb_t) c_other.u); + c_other.set_a(0); if (TEXMODE_TC_SUB_CLOCAL(TEXMODE) || TEXMODE_TCA_SUB_CLOCAL(TEXMODE)) { - rgb_union sub_val = c_local; + rgbaint_t sub_val = c_local; /* potentially subtract c_local */ if (!TEXMODE_TC_SUB_CLOCAL(TEXMODE)) - sub_val.u &= 0xff000000; + sub_val.and_imm_rgba(-1, 0, 0, 0); if (!TEXMODE_TCA_SUB_CLOCAL(TEXMODE)) - sub_val.u &= 0x00ffffff; + sub_val.set_a(0); - tmpB.set((rgb_t) sub_val.u); - tmpA.sub(tmpB); + c_other.sub(sub_val); } /* blend RGB */ @@ -4834,35 +4974,35 @@ INLINE UINT32 ATTR_FORCE_INLINE combineTexture(tmu_state *TT, const UINT32 TEXMO { default: /* reserved */ case 0: /* zero */ - c_local.u &= 0xff000000; + c_local.and_imm_rgba(-1, 0, 0, 0); break; case 1: /* c_local */ break; case 2: /* a_other */ - c_local.u = (c_local.u & 0xff000000) | (a_other<<16) | (a_other<<8) | (a_other); + c_local.set(a_local, a_other, a_other, a_other); break; case 3: /* a_local */ - c_local.u = (c_local.u & 0xff000000) | (a_local<<16) | (a_local<<8) | (a_local); + c_local.set(a_local, a_local, a_local, a_local); break; case 4: /* LOD (detail factor) */ if ((TT)->detailbias <= lod) - c_local.u &= 0xff000000; + c_local.and_imm_rgba(-1, 0, 0, 0); else { tmp = ((((TT)->detailbias - lod) << (TT)->detailscale) >> 8); if (tmp > (TT)->detailmax) tmp = (TT)->detailmax; - c_local.u = (c_local.u & 0xff000000) | (tmp<<16) | (tmp<<8) | (tmp); + c_local.set(a_local, tmp, tmp, tmp); } break; case 5: /* LOD fraction */ tmp = lod & 0xff; - c_local.u = (c_local.u & 0xff000000) | (tmp<<16) | (tmp<<8) | (tmp); + c_local.set(a_local, tmp, tmp, tmp); break; } @@ -4871,14 +5011,14 @@ INLINE UINT32 ATTR_FORCE_INLINE combineTexture(tmu_state *TT, const UINT32 TEXMO { default: /* reserved */ case 0: /* zero */ - c_local.u &= 0x00ffffff; + c_local.set_a(0); break; case 1: /* c_local */ break; case 2: /* a_other */ - c_local.rgb.a = a_other; + c_local.set_a(a_other); break; case 3: /* a_local */ @@ -4886,30 +5026,30 @@ INLINE UINT32 ATTR_FORCE_INLINE combineTexture(tmu_state *TT, const UINT32 TEXMO case 4: /* LOD (detail factor) */ if ((TT)->detailbias <= lod) - c_local.u &= 0x00ffffff; + c_local.set_a(0); else { tmp = ((((TT)->detailbias - lod) << (TT)->detailscale) >> 8); if (tmp > (TT)->detailmax) tmp = (TT)->detailmax; - c_local.rgb.a = tmp; + c_local.set_a(tmp); } break; case 5: /* LOD fraction */ - c_local.rgb.a = lod & 0xff; + c_local.set_a(lod & 0xff); break; } /* reverse the RGB blend */ if (!TEXMODE_TC_REVERSE_BLEND(TEXMODE)) { - c_local.u ^= 0x00ffffff; + c_local.xor_imm_rgba(0, 0xff, 0xff, 0xff); } /* reverse the alpha blend */ if (!TEXMODE_TCA_REVERSE_BLEND(TEXMODE)) - c_local.u ^= 0xff000000; + c_local.xor_imm_rgba(0xff, 0, 0, 0); /* do the blend */ //tr = (tr * (blendr + 1)) >> 8; @@ -4922,14 +5062,14 @@ INLINE UINT32 ATTR_FORCE_INLINE combineTexture(tmu_state *TT, const UINT32 TEXMO { case 3: /* reserved */ case 0: /* nothing */ - add_val.u &= 0xff000000; + add_val.set(a_local, 0, 0, 0); break; case 1: /* add c_local */ break; case 2: /* add_alocal */ - add_val.u = (add_val.u & 0xff000000) | (a_local << 16) | (a_local << 8) | (a_local << 0); + add_val.set(a_local, a_local , a_local , a_local); //tr += c_local.rgb.a; //tg += c_local.rgb.a; //tb += c_local.rgb.a; @@ -4938,7 +5078,7 @@ INLINE UINT32 ATTR_FORCE_INLINE combineTexture(tmu_state *TT, const UINT32 TEXMO /* add clocal or alocal to alpha */ if (!TEXMODE_TCA_ADD_ACLOCAL(TEXMODE)) - add_val.u &= 0x00ffffff; + add_val.set_a(0); //ta += c_local.rgb.a; /* clamp */ @@ -4946,16 +5086,13 @@ INLINE UINT32 ATTR_FORCE_INLINE combineTexture(tmu_state *TT, const UINT32 TEXMO //result.rgb.g = (tg < 0) ? 0 : (tg > 0xff) ? 0xff : tg; //result.rgb.b = (tb < 0) ? 0 : (tb > 0xff) ? 0xff : tb; //result.rgb.a = (ta < 0) ? 0 : (ta > 0xff) ? 0xff : ta; - tmpB.set((rgb_t) c_local.u); - tmpB.add_imm(1); - tmpC.set((rgb_t) add_val.u); - tmpA.scale_add_and_clamp(tmpB, tmpC); - result = tmpA.to_rgba(); - + c_local.add_imm(1); + c_other.scale_add_and_clamp(c_local, add_val); + rgbaint_t result(c_other); /* invert */ if (TEXMODE_TC_INVERT_OUTPUT(TEXMODE)) - result ^= 0x00ffffff; + result.xor_imm_rgba(0, 0xff, 0xff, 0xff); if (TEXMODE_TCA_INVERT_OUTPUT(TEXMODE)) - result ^= 0xff000000; + result.xor_imm_rgba(0xff, 0, 0, 0); return result; } diff --git a/src/emu/video/voodoo.c b/src/emu/video/voodoo.c index ff98657a672..777747d22c5 100644 --- a/src/emu/video/voodoo.c +++ b/src/emu/video/voodoo.c @@ -1458,8 +1458,14 @@ INLINE INT32 prepare_tmu(tmu_state *t) /* adjust the result: negative to get the log of the original value */ /* plus 12 to account for the extra exponent, and divided by 2 to */ /* get the log of the square root of texdx */ - (void)fast_reciplog(texdx, &lodbase); - return (-lodbase + (12 << 8)) / 2; + #if USE_FAST_RECIP == 1 + (void)fast_reciplog(texdx, &lodbase); + return (-lodbase + (12 << 8)) / 2; + #else + double tmpTex = texdx; + lodbase = new_log2(tmpTex); + return (lodbase + (12 << 8)) / 2; + #endif } @@ -3303,8 +3309,6 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask) iterw = (UINT32) sw[pix] << 16; } INT32 iterz = sw[pix] << 12; - rgb_union color; - rgb_union iterargb = { 0 }; /* apply clipping */ if (FBZMODE_ENABLE_CLIPPING(v->reg[fbzMode].u)) @@ -3319,6 +3323,13 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask) goto nextpixel; } } + #if USE_OLD_RASTER == 1 + rgb_union color; + rgb_union iterargb = { 0 }; + #else + rgbaint_t color, preFog; + rgbaint_t iterargb(0); + #endif /* pixel pipeline part 1 handles depth testing and stippling */ //PIXEL_PIPELINE_BEGIN(v, stats, x, y, v->reg[fbzColorPath].u, v->reg[fbzMode].u, iterz, iterw); @@ -3362,35 +3373,38 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask) // Depth testing value for lfb pipeline writes is directly from write data, no biasing is used fogdepth = biasdepth = (UINT32) sw[pix]; - /* use the RGBA we stashed above */ - color.rgb.r = r = sr[pix]; - color.rgb.g = g = sg[pix]; - color.rgb.b = b = sb[pix]; - color.rgb.a = a = sa[pix]; - - if (USE_OLD_RASTER) { + #if USE_OLD_RASTER == 1 /* Perform depth testing */ DEPTH_TEST(v, stats, x, v->reg[fbzMode].u); + /* use the RGBA we stashed above */ + color.rgb.r = r = sr[pix]; + color.rgb.g = g = sg[pix]; + color.rgb.b = b = sb[pix]; + color.rgb.a = a = sa[pix]; + /* apply chroma key, alpha mask, and alpha testing */ APPLY_CHROMAKEY(v, stats, v->reg[fbzMode].u, color); APPLY_ALPHAMASK(v, stats, v->reg[fbzMode].u, color.rgb.a); APPLY_ALPHATEST(v, stats, v->reg[alphaMode].u, color.rgb.a); - } else { + #else /* Perform depth testing */ if (!depthTest((UINT16) v->reg[zaColor].u, stats, depth[x], v->reg[fbzMode].u, biasdepth)) goto nextpixel; + /* use the RGBA we stashed above */ + color.set(sa[pix], sr[pix], sg[pix], sb[pix]); + /* handle chroma key */ if (!chromaKeyTest(v, stats, v->reg[fbzMode].u, color)) goto nextpixel; /* handle alpha mask */ - if (!alphaMaskTest(stats, v->reg[fbzMode].u, color.rgb.a)) + if (!alphaMaskTest(stats, v->reg[fbzMode].u, color.get_a())) goto nextpixel; /* handle alpha test */ - if (!alphaTest(v, stats, v->reg[alphaMode].u, color.rgb.a)) + if (!alphaTest(v, stats, v->reg[alphaMode].u, color.get_a())) goto nextpixel; - } + #endif /* wait for any outstanding work to finish */ poly_wait(v->poly, "LFB Write"); @@ -6496,5 +6510,6 @@ RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x00000000, 0x00010FF9, 0x000000C4, 0x RASTERIZER_ENTRY( 0x00002425, 0x00045119, 0x000000C1, 0x00010FF9, 0x00000ACD, 0x0C261ACD ) /* 67 1962 14755 */ RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010FF9, 0x000000C4, 0x0C261ACD ) /* * 66 74 3951 */ RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x00000000, 0x00010FF9, 0x00000ACD, 0x04221AC9 ) /* 70 374 3691 */ +RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010FF9, 0x00000ACD, 0x0C261ACD ) /* * 20 350 7928 */ #endif