voodoo: Improve pixel pipeline throughput for LFB writes. Helps gtfore and related games.

This commit is contained in:
Aaron Giles 2021-07-11 08:44:50 -07:00
parent 915095a890
commit b8e8bc59d4
3 changed files with 28 additions and 47 deletions

View File

@ -1687,22 +1687,7 @@ void voodoo_1_device::internal_lfb_w(offs_t offset, u32 data, u32 mem_mask)
depth += scry * m_renderer->rowpixels();
// make a dummy poly_extra_data structure with some cached values
poly_data poly;
poly.raster.compute(m_reg, nullptr, nullptr);
poly.destbase = dest;
poly.depthbase = depth;
poly.clipleft = m_reg.clip_left();
poly.clipright = m_reg.clip_right();
poly.cliptop = m_reg.clip_top();
poly.clipbottom = m_reg.clip_bottom();
poly.color0 = m_reg.color0().argb();
poly.color1 = m_reg.color1().argb();
poly.chromakey = m_reg.chroma_key().argb();
poly.fogcolor = m_reg.fog_color().argb();
poly.zacolor = m_reg.za_color();
poly.stipple = m_reg.stipple();
poly.alpharef = m_reg.alpha_mode().alpharef();
if (poly.raster.fbzmode().enable_stipple() && !poly.raster.fbzmode().stipple_pattern())
if (m_reg.fbz_mode().enable_stipple() && !m_reg.fbz_mode().stipple_pattern())
logerror("Warning: rotated stipple pattern used in LFB write\n");
// loop over up to two pixels
@ -1712,7 +1697,7 @@ void voodoo_1_device::internal_lfb_w(offs_t offset, u32 data, u32 mem_mask)
{
// make sure we care about this pixel
if ((mask & LFB_PIXEL0_MASK) != 0)
m_renderer->pixel_pipeline(threadstats, poly, lfbmode, x, y, src_color[pix], src_depth[pix]);
m_renderer->pixel_pipeline(threadstats, dest, depth, x, y, src_color[pix], src_depth[pix]);
// advance our pointers
x++;

View File

@ -1428,11 +1428,8 @@ inline s32 ATTR_FORCE_INLINE voodoo_renderer::compute_depthval(poly_data const &
// the caller
//-------------------------------------------------
inline bool ATTR_FORCE_INLINE voodoo_renderer::depth_test(thread_stats_block &threadstats, poly_data const &poly, reg_fbz_mode const fbzmode, s32 depthdest, s32 depthval)
inline bool ATTR_FORCE_INLINE voodoo_renderer::depth_test(thread_stats_block &threadstats, reg_fbz_mode const fbzmode, s32 depthdest, s32 depthsource)
{
// the source depth is either the iterated W/Z+bias or a constant value
s32 depthsource = (fbzmode.depth_source_compare() == 0) ? depthval : u16(poly.zacolor);
// test against the depth buffer
switch (fbzmode.depth_function())
{
@ -1884,10 +1881,10 @@ inline bool ATTR_FORCE_INLINE voodoo_renderer::chroma_key_test(thread_stats_bloc
// the caller
//-------------------------------------------------
inline void ATTR_FORCE_INLINE voodoo_renderer::apply_fogging(rgbaint_t &color, poly_data const &poly, reg_fbz_mode const fbzmode, reg_fog_mode const fogmode, reg_fbz_colorpath const fbzcp, s32 x, dither_helper const &dither, s32 wfloat, s32 iterz, s64 iterw, rgbaint_t const &iterargb)
inline void ATTR_FORCE_INLINE voodoo_renderer::apply_fogging(rgbaint_t &color, rgb_t fogcolor, u32 depthbias, reg_fbz_mode const fbzmode, reg_fog_mode const fogmode, reg_fbz_colorpath const fbzcp, s32 x, dither_helper const &dither, s32 wfloat, s32 iterz, s64 iterw, rgbaint_t const &iterargb)
{
// constant fog bypasses everything else
rgbaint_t fog_color_local(poly.fogcolor);
rgbaint_t fog_color_local(fogcolor);
if (fogmode.fog_constant())
{
// if fog_mult is 0, we add this to the original color
@ -1921,7 +1918,7 @@ inline void ATTR_FORCE_INLINE voodoo_renderer::apply_fogging(rgbaint_t &color, p
// add the bias for fog selection
if (fbzmode.enable_depth_bias())
fog_depth = std::clamp(fog_depth + s16(poly.zacolor), 0, 0xffff);
fog_depth = std::clamp(fog_depth + s16(depthbias), 0, 0xffff);
// perform the multiply against lower 8 bits of wfloat
s32 delta = m_fogdelta[fog_depth >> 10];
@ -2140,23 +2137,15 @@ inline void ATTR_FORCE_INLINE voodoo_renderer::write_pixel(thread_stats_block &t
// pipeline
//-------------------------------------------------
void voodoo_renderer::pixel_pipeline(thread_stats_block &threadstats, poly_data const &poly, reg_lfb_mode const lfbmode, s32 x, s32 scry, rgb_t src_color, u16 sz)
void voodoo_renderer::pixel_pipeline(thread_stats_block &threadstats, u16 *dest, u16 *depth, s32 x, s32 scry, rgb_t src_color, u16 sz)
{
auto const fbzcp = poly.raster.fbzcp();
auto const alphamode = poly.raster.alphamode();
auto const fbzmode = poly.raster.fbzmode();
auto const fogmode = poly.raster.fogmode();
dither_helper dither(scry, fbzmode, fogmode);
u16 *depth = poly.depthbase;
u16 *dest = poly.destbase;
u32 stipple = poly.stipple;
threadstats.pixels_in++;
// apply clipping
auto const fbzmode = m_fbi_reg.fbz_mode();
if (fbzmode.enable_clipping())
{
if (x < poly.clipleft || x >= poly.clipright || scry < poly.cliptop || scry >= poly.clipbottom)
if (x < m_fbi_reg.clip_left() || x >= m_fbi_reg.clip_right() || scry < m_fbi_reg.clip_top() || scry >= m_fbi_reg.clip_bottom())
{
threadstats.clip_fail++;
return;
@ -2164,21 +2153,25 @@ void voodoo_renderer::pixel_pipeline(thread_stats_block &threadstats, poly_data
}
// handle stippling
if (fbzmode.enable_stipple() && !stipple_test(threadstats, fbzmode, x, scry, stipple))
return;
if (fbzmode.enable_stipple())
{
u32 stipple = m_fbi_reg.stipple();
if (!stipple_test(threadstats, fbzmode, x, scry, stipple))
return;
}
// Depth testing value for lfb pipeline writes is directly from write data, no biasing is used
s32 depthval = u32(sz);
// Perform depth testing
if (fbzmode.enable_depthbuf() && !depth_test(threadstats, poly, fbzmode, depth[x], depthval))
if (fbzmode.enable_depthbuf() && !depth_test(threadstats, fbzmode, depth[x], (fbzmode.depth_source_compare() == 0) ? depthval : u16(m_fbi_reg.za_color())))
return;
// use the RGBA we stashed above
rgbaint_t color(src_color);
// handle chroma key
if (fbzmode.enable_chromakey() && !chroma_key_test(threadstats, color, poly.chromakey))
if (fbzmode.enable_chromakey() && !chroma_key_test(threadstats, color, m_fbi_reg.chroma_key().argb()))
return;
// handle alpha mask
@ -2186,16 +2179,19 @@ void voodoo_renderer::pixel_pipeline(thread_stats_block &threadstats, poly_data
return;
// handle alpha test
if (alphamode.alphatest() && !alpha_test(threadstats, alphamode, color.get_a(), poly.alpharef))
auto const alphamode = m_fbi_reg.alpha_mode();
if (alphamode.alphatest() && !alpha_test(threadstats, alphamode, color.get_a(), alphamode.alpharef()))
return;
// perform fogging
auto const fogmode = m_fbi_reg.fog_mode();
dither_helper dither(scry, fbzmode, fogmode);
rgbaint_t prefog(color);
if (fogmode.enable_fog())
{
s32 iterz = sz << 12;
s64 iterw = lfbmode.write_w_select() ? u32(poly.zacolor << 16) : u32(sz << 16);
apply_fogging(color, poly, fbzmode, fogmode, fbzcp, x, dither, depthval, iterz, iterw, rgbaint_t(0));
s64 iterw = m_fbi_reg.lfb_mode().write_w_select() ? u32(m_fbi_reg.za_color() << 16) : u32(sz << 16);
apply_fogging(color, m_fbi_reg.fog_color().argb(), m_fbi_reg.za_color(), fbzmode, fogmode, m_fbi_reg.fbz_colorpath(), x, dither, depthval, iterz, iterw, rgbaint_t(0));
}
// wait for any outstanding work to finish
@ -2334,7 +2330,7 @@ void voodoo_renderer::rasterizer(s32 y, const voodoo_renderer::extent_t &extent,
s32 depthval = compute_depthval(poly, fbzmode, fbzcp, wfloat, iterz);
// depth testing
if (fbzmode.enable_depthbuf() && !depth_test(threadstats, poly, fbzmode, depth[x], depthval))
if (fbzmode.enable_depthbuf() && !depth_test(threadstats, fbzmode, depth[x], (fbzmode.depth_source_compare() == 0) ? depthval : u16(poly.zacolor)))
break;
// run the texture pipeline on TMU1 to produce a value in texel
@ -2379,7 +2375,7 @@ void voodoo_renderer::rasterizer(s32 y, const voodoo_renderer::extent_t &extent,
// perform fogging
rgbaint_t prefog(color);
if (fogmode.enable_fog())
apply_fogging(color, poly, fbzmode, fogmode, fbzcp, x, dither, wfloat, iterz, iterw, iterargb);
apply_fogging(color, poly.fogcolor, poly.zacolor, fbzmode, fogmode, fbzcp, x, dither, wfloat, iterz, iterw, iterargb);
// perform alpha blending
if (alphamode.alphablend())

View File

@ -531,7 +531,7 @@ public:
void rasterizer(s32 y, const voodoo::voodoo_renderer::extent_t &extent, const voodoo::poly_data &extra, int threadid);
// run the pixel pipeline for LFB writes
void pixel_pipeline(thread_stats_block &threadstats, voodoo::poly_data const &extra, voodoo::reg_lfb_mode const lfbmode, s32 x, s32 scry, rgb_t color, u16 sz);
void pixel_pipeline(thread_stats_block &threadstats, u16 *dest, u16 *depth, s32 x, s32 scry, rgb_t color, u16 sz);
// update the fog tables
void write_fog(u32 base, u32 data)
@ -576,12 +576,12 @@ private:
// pipeline stages, in order
bool stipple_test(thread_stats_block &threadstats, voodoo::reg_fbz_mode const fbzmode, s32 x, s32 y, u32 &stipple);
s32 compute_depthval(voodoo::poly_data const &extra, voodoo::reg_fbz_mode const fbzmode, voodoo::reg_fbz_colorpath const fbzcp, s32 wfloat, s32 iterz);
bool depth_test(thread_stats_block &stats, voodoo::poly_data const &extra, voodoo::reg_fbz_mode const fbzmode, s32 destDepth, s32 biasdepth);
bool depth_test(thread_stats_block &stats, voodoo::reg_fbz_mode const fbzmode, s32 depth_dest, s32 depth_source);
bool combine_color(rgbaint_t &color, thread_stats_block &threadstats, const voodoo::poly_data &extradata, voodoo::reg_fbz_colorpath const fbzcp, voodoo::reg_fbz_mode const fbzmode, rgbaint_t texel, s32 iterz, s64 iterw, rgb_t chromakey);
bool alpha_mask_test(thread_stats_block &stats, u32 alpha);
bool alpha_test(thread_stats_block &stats, voodoo::reg_alpha_mode const alphamode, u32 alpha, u32 alpharef);
bool chroma_key_test(thread_stats_block &stats, rgbaint_t const &colorin, rgb_t chromakey);
void apply_fogging(rgbaint_t &color, voodoo::poly_data const &extra, voodoo::reg_fbz_mode const fbzmode, voodoo::reg_fog_mode const fogmode, voodoo::reg_fbz_colorpath const fbzcp, s32 x, voodoo::dither_helper const &dither, s32 wfloat, s32 iterz, s64 iterw, const rgbaint_t &iterargb);
void apply_fogging(rgbaint_t &color, rgb_t fogcolor, u32 depthbias, voodoo::reg_fbz_mode const fbzmode, voodoo::reg_fog_mode const fogmode, voodoo::reg_fbz_colorpath const fbzcp, s32 x, voodoo::dither_helper const &dither, s32 wfloat, s32 iterz, s64 iterw, const rgbaint_t &iterargb);
void alpha_blend(rgbaint_t &color, voodoo::reg_fbz_mode const fbzmode, voodoo::reg_alpha_mode const alphamode, s32 x, voodoo::dither_helper const &dither, int dpix, u16 *depth, rgbaint_t const &prefog);
void write_pixel(thread_stats_block &threadstats, voodoo::reg_fbz_mode const fbzmode, voodoo::dither_helper const &dither, u16 *destbase, u16 *depthbase, s32 x, rgbaint_t const &color, s32 depthval);