rgbsse.h - Added andnot_reg, clamp_to_uint8, bilinear_filter_rgbaint member functions.

rgbgen.h - Added andnot_reg, clamp_to_uint8, bilinear_filter_rgbaint, and += functions.
vooddefs.h - Converted rasterizer path to use rgbaint_t types. Switched to new reciplog function.
voodoo.c   - Converted entire pixel pipe path to use rgbaint_t types.
This commit is contained in:
ted green 2015-07-03 19:03:22 -06:00
parent 5f6235f080
commit 143bac01bd
4 changed files with 489 additions and 252 deletions

View File

@ -283,6 +283,11 @@ public:
and_imm_rgba(color.m_a, color.m_r, color.m_g, color.m_b);
}
inline void andnot_reg(const rgbaint_t& color)
{
and_imm_rgba(~color.m_a, ~color.m_r, ~color.m_g, ~color.m_b);
}
inline void and_imm(const INT32 imm)
{
and_imm_rgba(imm, imm, imm, imm);
@ -334,6 +339,14 @@ public:
m_b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b;
}
inline void clamp_to_uint8()
{
m_a = (m_a < 0) ? 0 : (m_a > 255) ? 255 : m_a;
m_r = (m_r < 0) ? 0 : (m_r > 255) ? 255 : m_r;
m_g = (m_g < 0) ? 0 : (m_g > 255) ? 255 : m_g;
m_b = (m_b < 0) ? 0 : (m_b > 255) ? 255 : m_b;
}
inline void sign_extend(const UINT32 compare, const UINT32 sign)
{
if ((m_a & compare) == compare)
@ -427,6 +440,24 @@ public:
return *this;
}
inline rgbaint_t& operator+=(const rgbaint_t& other)
{
m_a += other.m_a;
m_r += other.m_r;
m_g += other.m_g;
m_b += other.m_b;
return *this;
}
inline rgbaint_t& operator+=(const INT32 other)
{
m_a += other;
m_r += other;
m_g += other;
m_b += other;
return *this;
}
static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
{
UINT32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
@ -446,6 +477,26 @@ public:
return ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff);
}
inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
{
UINT32 rb0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
UINT32 rb1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8);
rgb00 >>= 8;
rgb01 >>= 8;
rgb10 >>= 8;
rgb11 >>= 8;
UINT32 ag0 = (rgb00 & 0x00ff00ff) + ((((rgb01 & 0x00ff00ff) - (rgb00 & 0x00ff00ff)) * u) >> 8);
UINT32 ag1 = (rgb10 & 0x00ff00ff) + ((((rgb11 & 0x00ff00ff) - (rgb10 & 0x00ff00ff)) * u) >> 8);
rb0 = (rb0 & 0x00ff00ff) + ((((rb1 & 0x00ff00ff) - (rb0 & 0x00ff00ff)) * v) >> 8);
ag0 = (ag0 & 0x00ff00ff) + ((((ag1 & 0x00ff00ff) - (ag0 & 0x00ff00ff)) * v) >> 8);
UINT32 result = ((ag0 << 8) & 0xff00ff00) | (rb0 & 0x00ff00ff);
this->set(result);
}
protected:
INT32 m_a;
INT32 m_r;

View File

@ -221,6 +221,11 @@ public:
m_value = _mm_and_si128(m_value, color.m_value);
}
inline void andnot_reg(const rgbaint_t& color)
{
m_value = _mm_andnot_si128(color.m_value, m_value);
}
inline void and_imm(const INT32 value)
{
m_value = _mm_and_si128(m_value, _mm_set1_epi32(value));
@ -256,6 +261,14 @@ public:
m_value = _mm_or_si128(_mm_and_si128(vsign, mask), _mm_and_si128(m_value, _mm_xor_si128(mask, _mm_set1_epi32(0xffffffff))));
}
inline void clamp_to_uint8()
{
m_value = _mm_packs_epi32(m_value, _mm_setzero_si128());
m_value = _mm_packus_epi16(m_value, _mm_setzero_si128());
m_value = _mm_unpacklo_epi8(m_value, _mm_setzero_si128());
m_value = _mm_unpacklo_epi16(m_value, _mm_setzero_si128());
}
inline void sign_extend(const UINT32 compare, const UINT32 sign)
{
__m128i compare_vec = _mm_set1_epi32(compare);
@ -414,6 +427,27 @@ public:
return _mm_cvtsi128_si32(color01);
}
inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v)
{
__m128i color00 = _mm_cvtsi32_si128(rgb00);
__m128i color01 = _mm_cvtsi32_si128(rgb01);
__m128i color10 = _mm_cvtsi32_si128(rgb10);
__m128i color11 = _mm_cvtsi32_si128(rgb11);
/* interleave color01 and color00 at the byte level */
color01 = _mm_unpacklo_epi8(color01, color00);
color11 = _mm_unpacklo_epi8(color11, color10);
color01 = _mm_unpacklo_epi8(color01, _mm_setzero_si128());
color11 = _mm_unpacklo_epi8(color11, _mm_setzero_si128());
color01 = _mm_madd_epi16(color01, scale_factor(u));
color11 = _mm_madd_epi16(color11, scale_factor(u));
color01 = _mm_slli_epi32(color01, 15);
color11 = _mm_srli_epi32(color11, 1);
color01 = _mm_max_epi16(color01, color11);
color01 = _mm_madd_epi16(color01, scale_factor(v));
m_value = _mm_srli_epi32(color01, 15);
}
protected:
struct _statics
{

File diff suppressed because it is too large Load Diff

View File

@ -1458,8 +1458,14 @@ INLINE INT32 prepare_tmu(tmu_state *t)
/* adjust the result: negative to get the log of the original value */
/* plus 12 to account for the extra exponent, and divided by 2 to */
/* get the log of the square root of texdx */
(void)fast_reciplog(texdx, &lodbase);
return (-lodbase + (12 << 8)) / 2;
#if USE_FAST_RECIP == 1
(void)fast_reciplog(texdx, &lodbase);
return (-lodbase + (12 << 8)) / 2;
#else
double tmpTex = texdx;
lodbase = new_log2(tmpTex);
return (lodbase + (12 << 8)) / 2;
#endif
}
@ -3303,8 +3309,6 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask)
iterw = (UINT32) sw[pix] << 16;
}
INT32 iterz = sw[pix] << 12;
rgb_union color;
rgb_union iterargb = { 0 };
/* apply clipping */
if (FBZMODE_ENABLE_CLIPPING(v->reg[fbzMode].u))
@ -3319,6 +3323,13 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask)
goto nextpixel;
}
}
#if USE_OLD_RASTER == 1
rgb_union color;
rgb_union iterargb = { 0 };
#else
rgbaint_t color, preFog;
rgbaint_t iterargb(0);
#endif
/* pixel pipeline part 1 handles depth testing and stippling */
//PIXEL_PIPELINE_BEGIN(v, stats, x, y, v->reg[fbzColorPath].u, v->reg[fbzMode].u, iterz, iterw);
@ -3362,35 +3373,38 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask)
// Depth testing value for lfb pipeline writes is directly from write data, no biasing is used
fogdepth = biasdepth = (UINT32) sw[pix];
/* use the RGBA we stashed above */
color.rgb.r = r = sr[pix];
color.rgb.g = g = sg[pix];
color.rgb.b = b = sb[pix];
color.rgb.a = a = sa[pix];
if (USE_OLD_RASTER) {
#if USE_OLD_RASTER == 1
/* Perform depth testing */
DEPTH_TEST(v, stats, x, v->reg[fbzMode].u);
/* use the RGBA we stashed above */
color.rgb.r = r = sr[pix];
color.rgb.g = g = sg[pix];
color.rgb.b = b = sb[pix];
color.rgb.a = a = sa[pix];
/* apply chroma key, alpha mask, and alpha testing */
APPLY_CHROMAKEY(v, stats, v->reg[fbzMode].u, color);
APPLY_ALPHAMASK(v, stats, v->reg[fbzMode].u, color.rgb.a);
APPLY_ALPHATEST(v, stats, v->reg[alphaMode].u, color.rgb.a);
} else {
#else
/* Perform depth testing */
if (!depthTest((UINT16) v->reg[zaColor].u, stats, depth[x], v->reg[fbzMode].u, biasdepth))
goto nextpixel;
/* use the RGBA we stashed above */
color.set(sa[pix], sr[pix], sg[pix], sb[pix]);
/* handle chroma key */
if (!chromaKeyTest(v, stats, v->reg[fbzMode].u, color))
goto nextpixel;
/* handle alpha mask */
if (!alphaMaskTest(stats, v->reg[fbzMode].u, color.rgb.a))
if (!alphaMaskTest(stats, v->reg[fbzMode].u, color.get_a()))
goto nextpixel;
/* handle alpha test */
if (!alphaTest(v, stats, v->reg[alphaMode].u, color.rgb.a))
if (!alphaTest(v, stats, v->reg[alphaMode].u, color.get_a()))
goto nextpixel;
}
#endif
/* wait for any outstanding work to finish */
poly_wait(v->poly, "LFB Write");
@ -6496,5 +6510,6 @@ RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x00000000, 0x00010FF9, 0x000000C4, 0x
RASTERIZER_ENTRY( 0x00002425, 0x00045119, 0x000000C1, 0x00010FF9, 0x00000ACD, 0x0C261ACD ) /* 67 1962 14755 */
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010FF9, 0x000000C4, 0x0C261ACD ) /* * 66 74 3951 */
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x00000000, 0x00010FF9, 0x00000ACD, 0x04221AC9 ) /* 70 374 3691 */
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010FF9, 0x00000ACD, 0x0C261ACD ) /* * 20 350 7928 */
#endif