mirror of
https://github.com/holub/mame
synced 2025-04-23 00:39:36 +03:00
voodoodefs.h -- Re-organized macros into function calls to help in optimization identification. The old macros can still used by setting USE_OLD_RASTER to 1.
-- Made new functions use SSE2 implementation. rgbgen.h -- Added immediate add and mult add functions. rgbsse.h -- Added SSE2 implementation of immediate add and mult functions. voodoo.c -- Added comments for rasters.
This commit is contained in:
parent
021aa9eb6c
commit
86d6b88038
@ -164,6 +164,18 @@ INLINE void rgbaint_add(rgbaint *color1, const rgbaint *color2)
|
||||
color1->b += color2->b;
|
||||
}
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_add_imm - add immediate INT16 to rgbaint value
|
||||
-------------------------------------------------*/
|
||||
|
||||
INLINE void rgbaint_add_imm(rgbaint *color1, const INT16 imm)
|
||||
{
|
||||
color1->a += imm;
|
||||
color1->r += imm;
|
||||
color1->g += imm;
|
||||
color1->b += imm;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_sub - subtract two rgbint values
|
||||
@ -308,7 +320,6 @@ INLINE void rgbaint_blend(rgbaint *color1, const rgbaint *color2, UINT8 color1sc
|
||||
color1->b = (color1->b * scale1 + color2->b * scale2) >> 8;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_scale_and_clamp - scale the given
|
||||
color by an 8.8 scale factor, immediate or
|
||||
@ -366,6 +377,50 @@ INLINE void rgbaint_scale_channel_and_clamp(rgbaint *color, const rgbaint *color
|
||||
if ((UINT16)color->b > 255) { color->b = (color->b < 0) ? 0 : 255; }
|
||||
}
|
||||
|
||||
INLINE void rgbaint_scale_immediate_add_and_clamp(rgbaint *color1, INT16 colorscale, const rgbaint *color2)
|
||||
{
|
||||
color1->a = (color1->a * colorscale) >> 8;
|
||||
color1->a += color2->a;
|
||||
if ((UINT16)color1->a > 255) { color1->a = (color1->a < 0) ? 0 : 255; }
|
||||
color1->r = (color1->r * colorscale) >> 8;
|
||||
color1->r += color2->r;
|
||||
if ((UINT16)color1->r > 255) { color1->r = (color1->r < 0) ? 0 : 255; }
|
||||
color1->g = (color1->g * colorscale) >> 8;
|
||||
color1->g += color2->g;
|
||||
if ((UINT16)color1->g > 255) { color1->g = (color1->g < 0) ? 0 : 255; }
|
||||
color1->b = (color1->b * colorscale) >> 8;
|
||||
color1->b += color2->b;
|
||||
if ((UINT16)color1->b > 255) { color1->b = (color1->b < 0) ? 0 : 255; }
|
||||
}
|
||||
|
||||
INLINE void rgbaint_scale_channel_add_and_clamp(rgbaint *color1, const rgbaint *colorscale, const rgbaint *color2)
|
||||
{
|
||||
color1->a = (color1->a * colorscale->a) >> 8;
|
||||
color1->a += color2->a;
|
||||
if ((UINT16)color1->a > 255) { color1->a = (color1->a < 0) ? 0 : 255; }
|
||||
color1->r = (color1->r * colorscale->r) >> 8;
|
||||
color1->r += color2->r;
|
||||
if ((UINT16)color1->r > 255) { color1->r = (color1->r < 0) ? 0 : 255; }
|
||||
color1->g = (color1->g * colorscale->g) >> 8;
|
||||
color1->g += color2->g;
|
||||
if ((UINT16)color1->g > 255) { color1->g = (color1->g < 0) ? 0 : 255; }
|
||||
color1->b = (color1->b * colorscale->b) >> 8;
|
||||
color1->b += color2->b;
|
||||
if ((UINT16)color1->b > 255) { color1->b = (color1->b < 0) ? 0 : 255; }
|
||||
}
|
||||
|
||||
INLINE void rgbaint_scale_channel_add_and_clamp(rgbaint *color1, const rgbaint *colorscale1, const rgbaint *color2, const rgbaint *colorscale2)
|
||||
{
|
||||
color1->a = (color1->a * colorscale1->a + color2->a * colorscale2->a) >> 8;
|
||||
if ((UINT16)color1->a > 255) { color1->a = (color1->a < 0) ? 0 : 255; }
|
||||
color1->r = (color1->r * colorscale1->r + color2->r * colorscale2->r) >> 8;
|
||||
if ((UINT16)color1->r > 255) { color1->r = (color1->r < 0) ? 0 : 255; }
|
||||
color1->g = (color1->g * colorscale1->g + color2->g * colorscale2->g) >> 8;
|
||||
if ((UINT16)color1->g > 255) { color1->g = (color1->g < 0) ? 0 : 255; }
|
||||
color1->b = (color1->b * colorscale1->b + color2->b * colorscale2->b) >> 8;
|
||||
if ((UINT16)color1->b > 255) { color1->b = (color1->b < 0) ? 0 : 255; }
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgb_bilinear_filter - bilinear filter between
|
||||
|
@ -146,6 +146,15 @@ INLINE void rgbaint_add(rgbaint *color1, const rgbaint *color2)
|
||||
*color1 = _mm_add_epi16(*color1, *color2);
|
||||
}
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_add_imm - add immediate INT16 to rgbaint value
|
||||
-------------------------------------------------*/
|
||||
INLINE void rgbaint_add_imm(rgbaint *color1, const INT16 imm)
|
||||
{
|
||||
__m128i temp = _mm_set_epi16(0, 0, 0, 0, imm, imm, imm, imm);
|
||||
*color1 = _mm_add_epi16(*color1, temp);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbint_sub - subtract two rgbint values
|
||||
@ -306,6 +315,38 @@ INLINE void rgbint_scale_channel_and_clamp(rgbint *color, const rgbint *colorsca
|
||||
*color = _mm_min_epi16(*color, *(__m128i *)&rgbsse_statics.maxbyte);
|
||||
}
|
||||
|
||||
INLINE void rgbint_scale_immediate_add_and_clamp(rgbint *color1, INT16 colorscale, const rgbaint *color2)
|
||||
{
|
||||
// color2 will get mutiplied by 2^8 (256) and then divided by 2^8 by the shift by 8
|
||||
__m128i mscale = _mm_unpacklo_epi16(_mm_set1_epi16(colorscale), _mm_set_epi16(0, 0, 0, 0, 256, 256, 256, 256));
|
||||
*color1 = _mm_unpacklo_epi16(*color1, *color2);
|
||||
*color1 = _mm_madd_epi16(*color1, mscale);
|
||||
*color1 = _mm_srli_epi32(*color1, 8);
|
||||
*color1 = _mm_packs_epi32(*color1, *color1);
|
||||
*color1 = _mm_min_epi16(*color1, *(__m128i *)&rgbsse_statics.maxbyte);
|
||||
}
|
||||
|
||||
INLINE void rgbaint_scale_channel_add_and_clamp(rgbaint *color1, const rgbaint *colorscale, const rgbaint *color2)
|
||||
{
|
||||
// color2 will get mutiplied by 2^8 (256) and then divided by 2^8 by the shift by 8
|
||||
__m128i mscale = _mm_unpacklo_epi16(*colorscale, _mm_set_epi16(0, 0, 0, 0, 256, 256, 256, 256));
|
||||
*color1 = _mm_unpacklo_epi16(*color1, *color2);
|
||||
*color1 = _mm_madd_epi16(*color1, mscale);
|
||||
*color1 = _mm_srli_epi32(*color1, 8);
|
||||
*color1 = _mm_packs_epi32(*color1, *color1);
|
||||
*color1 = _mm_min_epi16(*color1, *(__m128i *)&rgbsse_statics.maxbyte);
|
||||
}
|
||||
|
||||
INLINE void rgbaint_scale_channel_add_and_clamp(rgbaint *color1, const rgbaint *colorscale1, const rgbaint *color2, const rgbaint *colorscale2)
|
||||
|
||||
{
|
||||
__m128i mscale = _mm_unpacklo_epi16(*colorscale1, *colorscale2);
|
||||
*color1 = _mm_unpacklo_epi16(*color1, *color2);
|
||||
*color1 = _mm_madd_epi16(*color1, mscale);
|
||||
*color1 = _mm_srli_epi32(*color1, 8);
|
||||
*color1 = _mm_packs_epi32(*color1, *color1);
|
||||
*color1 = _mm_min_epi16(*color1, *(__m128i *)&rgbsse_statics.maxbyte);
|
||||
}
|
||||
|
||||
/*-------------------------------------------------
|
||||
rgbaint_scale_and_clamp - scale the given
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -892,7 +892,7 @@ static void swap_buffers(voodoo_state *v)
|
||||
|
||||
/* periodically log rasterizer info */
|
||||
v->stats.swaps++;
|
||||
if (LOG_RASTERIZERS && v->stats.swaps % 100 == 0)
|
||||
if (LOG_RASTERIZERS && v->stats.swaps % 1000 == 0)
|
||||
dump_rasterizer_stats(v);
|
||||
|
||||
/* update the statistics (debug) */
|
||||
@ -3302,10 +3302,7 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask,
|
||||
//PIXEL_PIPELINE_BEGIN(v, stats, x, y, v->reg[fbzColorPath].u, v->reg[fbzMode].u, iterz, iterw);
|
||||
// Start PIXEL_PIPE_BEGIN copy
|
||||
//#define PIXEL_PIPELINE_BEGIN(VV, STATS, XX, YY, FBZCOLORPATH, FBZMODE, ITERZ, ITERW)
|
||||
do
|
||||
{
|
||||
INT32 fogdepth, biasdepth;
|
||||
INT32 prefogr, prefogg, prefogb;
|
||||
INT32 r, g, b, a;
|
||||
|
||||
(stats)->pixels_in++;
|
||||
@ -3334,7 +3331,7 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask,
|
||||
if (((v->reg[stipple].u >> stipple_index) & 1) == 0)
|
||||
{
|
||||
v->stats.total_stippled++;
|
||||
goto skipdrawdepth;
|
||||
goto nextpixel;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3343,25 +3340,40 @@ static INT32 lfb_w(voodoo_state *v, offs_t offset, UINT32 data, UINT32 mem_mask,
|
||||
// Depth testing value for lfb pipeline writes is directly from write data, no biasing is used
|
||||
fogdepth = biasdepth = (UINT32) sw[pix];
|
||||
|
||||
/* Perform depth testing */
|
||||
DEPTH_TEST(v, stats, x, v->reg[fbzMode].u);
|
||||
|
||||
/* use the RGBA we stashed above */
|
||||
color.rgb.r = r = sr[pix];
|
||||
color.rgb.g = g = sg[pix];
|
||||
color.rgb.b = b = sb[pix];
|
||||
color.rgb.a = a = sa[pix];
|
||||
|
||||
/* apply chroma key, alpha mask, and alpha testing */
|
||||
APPLY_CHROMAKEY(v, stats, v->reg[fbzMode].u, color);
|
||||
APPLY_ALPHAMASK(v, stats, v->reg[fbzMode].u, color.rgb.a);
|
||||
APPLY_ALPHATEST(v, stats, v->reg[alphaMode].u, color.rgb.a);
|
||||
if (USE_OLD_RASTER) {
|
||||
/* Perform depth testing */
|
||||
DEPTH_TEST(v, stats, x, v->reg[fbzMode].u);
|
||||
|
||||
/* apply chroma key, alpha mask, and alpha testing */
|
||||
APPLY_CHROMAKEY(v, stats, v->reg[fbzMode].u, color);
|
||||
APPLY_ALPHAMASK(v, stats, v->reg[fbzMode].u, color.rgb.a);
|
||||
APPLY_ALPHATEST(v, stats, v->reg[alphaMode].u, color.rgb.a);
|
||||
} else {
|
||||
/* Perform depth testing */
|
||||
if (!depthTest((UINT16) v->reg[zaColor].u, stats, depth[x], v->reg[fbzMode].u, biasdepth))
|
||||
goto nextpixel;
|
||||
|
||||
/* handle chroma key */
|
||||
if (!chromaKeyTest(v, stats, v->reg[fbzMode].u, color))
|
||||
goto nextpixel;
|
||||
/* handle alpha mask */
|
||||
if (!alphaMaskTest(stats, v->reg[fbzMode].u, color.rgb.a))
|
||||
goto nextpixel;
|
||||
/* handle alpha test */
|
||||
if (!alphaTest(v, stats, v->reg[alphaMode].u, color.rgb.a))
|
||||
goto nextpixel;
|
||||
}
|
||||
|
||||
/* pixel pipeline part 2 handles color combine, fog, alpha, and final output */
|
||||
PIXEL_PIPELINE_END(v, stats, dither, dither4, dither_lookup, x, dest, depth,
|
||||
v->reg[fbzMode].u, v->reg[fbzColorPath].u, v->reg[alphaMode].u, v->reg[fogMode].u,
|
||||
iterz, iterw, iterargb);
|
||||
}
|
||||
nextpixel:
|
||||
/* advance our pointers */
|
||||
x++;
|
||||
@ -5658,6 +5670,7 @@ static raster_info *add_rasterizer(voodoo_state *v, const raster_info *cinfo)
|
||||
/* fill in the data */
|
||||
info->hits = 0;
|
||||
info->polys = 0;
|
||||
info->hash = hash;
|
||||
|
||||
/* hook us into the hash table */
|
||||
info->next = v->raster_hash[hash];
|
||||
@ -5760,7 +5773,7 @@ static void dump_rasterizer_stats(voodoo_state *v)
|
||||
break;
|
||||
|
||||
/* print it */
|
||||
printf("RASTERIZER_ENTRY( 0x%08X, 0x%08X, 0x%08X, 0x%08X, 0x%08X, 0x%08X ) /* %c %8d %10d */\n",
|
||||
printf("RASTERIZER_ENTRY( 0x%08X, 0x%08X, 0x%08X, 0x%08X, 0x%08X, 0x%08X ) /* %c %2d %8d %10d */\n",
|
||||
best->eff_color_path,
|
||||
best->eff_alpha_mode,
|
||||
best->eff_fog_mode,
|
||||
@ -5768,6 +5781,7 @@ static void dump_rasterizer_stats(voodoo_state *v)
|
||||
best->eff_tex_mode_0,
|
||||
best->eff_tex_mode_1,
|
||||
best->is_generic ? '*' : ' ',
|
||||
best->hash,
|
||||
best->polys,
|
||||
best->hits);
|
||||
|
||||
@ -6436,6 +6450,26 @@ RASTERIZER_ENTRY( 0x00602439, 0x00044119, 0x00000000, 0x000B0379, 0x00000009, 0x
|
||||
//RASTERIZER_ENTRY( 0x00002809, 0x00004110, 0x00000001, 0x00030FFB, 0x08241AC7, 0xFFFFFFFF ) /* in-game */
|
||||
//RASTERIZER_ENTRY( 0x00424219, 0x00000000, 0x00000001, 0x00030F7B, 0x08241AC7, 0xFFFFFFFF ) /* in-game */
|
||||
//RASTERIZER_ENTRY( 0x0200421A, 0x00001510, 0x00000001, 0x00030F7B, 0x08241AC7, 0xFFFFFFFF ) /* in-game */
|
||||
/* gtfore06 ----> fbzColorPath alphaMode fogMode, fbzMode, texMode0, texMode1 */
|
||||
RASTERIZER_ENTRY( 0x00002425, 0x00045119, 0x000000C1, 0x00010F79, 0x0C224A0D, 0x0C261ACD ) /* 47 901402 15032233 */
|
||||
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010F79, 0x0C261ACD, 0x0C2610C4 ) /* 90 186896 9133452 */
|
||||
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010F79, 0x0C261ACD, 0x0C261ACD ) /* 18 119615 9038715 */
|
||||
RASTERIZER_ENTRY( 0x00002429, 0x00000000, 0x000000C1, 0x00010FF9, 0x00000A09, 0x0C261A0F ) /* 12 33459 3336035 */
|
||||
RASTERIZER_ENTRY( 0x00002425, 0x00045119, 0x000000C1, 0x00010F79, 0x0C224A0D, 0x0C261A0D ) /* 45 166408 2416297 */
|
||||
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010FF9, 0x0C261ACD, 0x0C2610C4 ) /* 79 39422 2109850 */
|
||||
RASTERIZER_ENTRY( 0x00002425, 0x00045110, 0x000000C1, 0x00010FF9, 0x00000ACD, 0x0C261ACD ) /* 26 9335 850817 */
|
||||
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010F79, 0x00000ACD, 0x0C261ACD ) /* 9 8990 267028 */
|
||||
RASTERIZER_ENTRY( 0x00002425, 0x00045110, 0x000000C1, 0x00010FF9, 0x000000C4, 0x0C261ACD ) /* 61 2540 184702 */
|
||||
RASTERIZER_ENTRY( 0x00002425, 0x00045119, 0x000000C1, 0x00010FF9, 0x000000C4, 0x0C261ACD ) /* 5 1270 162503 */
|
||||
RASTERIZER_ENTRY( 0x00002425, 0x00045119, 0x00000000, 0x00010F79, 0x0C224A0D, 0x0C261A0D ) /* 84 7393 51970 */
|
||||
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x00000000, 0x00010FF9, 0x0C261ACD, 0x042210C0 ) /* 2 9440 39646 */
|
||||
RASTERIZER_ENTRY( 0x00002425, 0x00045119, 0x000000C1, 0x00010FF9, 0x00000ACD, 0x0C261ACD ) /* 67 990 13559 */
|
||||
RASTERIZER_ENTRY( 0x00002429, 0x00000000, 0x00000000, 0x00010FF9, 0x00000A09, 0x0C261A0F ) /* 24 176 13213 */
|
||||
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010FF9, 0x00000ACD, 0x0C261ACD ) /* 20 348 7883 */
|
||||
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x00000000, 0x00010FF9, 0x00000ACD, 0x04221AC9 ) /* 70 2020 6048 */
|
||||
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x00000000, 0x00010FF9, 0x0C261ACD, 0x04221AC9 ) /* 92 28 28 */
|
||||
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x000000C1, 0x00010F79, 0x000000C4, 0x0C261ACD ) /* 55 18 540 */
|
||||
RASTERIZER_ENTRY( 0x00482405, 0x00045119, 0x00000000, 0x00010FF9, 0x000000C4, 0x04221AC9 ) /* * 19 2 24 */
|
||||
|
||||
/* golden tee fore! series */
|
||||
RASTERIZER_ENTRY( 0x00002429, 0x00000000, 0x00000000, 0x00010FF9, 0x00000A09, 0x0C261A0F )
|
||||
|
Loading…
Reference in New Issue
Block a user