voodoo: Improve fidelity of synchronization points. Add synchronization back to NOP to fix flicker in gtfore06.

This commit is contained in:
Aaron Giles 2021-07-12 19:27:19 -07:00
parent adffe506b2
commit b3e47e87ec
6 changed files with 67 additions and 34 deletions

View File

@ -519,8 +519,8 @@ poly_manager<BaseType, ObjectType, MaxParams, Flags>::~poly_manager()
#if TRACK_POLY_WAITS
{
osd_printf_info("Wait summary:\n");
osd_printf_info("Cause Cycles Waits Actuals Average <100 100-1k 1k-10k 10k+\n");
osd_printf_info("------------------ ------- ------- ------- ------- ------- ------- ------- -------\n");
osd_printf_info("Cause Cycles Waits Actuals Average <100 100-1k 1k-10k 10k+\n");
osd_printf_info("-------------------------- ------- ------- ------- ------- ------- ------- ------- -------\n");
while (1)
{
typename waitmap_t::value_type *biggest = nullptr;
@ -532,7 +532,7 @@ poly_manager<BaseType, ObjectType, MaxParams, Flags>::~poly_manager()
if (biggest == nullptr)
break;
osd_printf_info("%-20s%-7s %-7s %-7s %-7s %-7s %-7s %-7s %-7s\n",
osd_printf_info("%-28s%-7s %-7s %-7s %-7s %-7s %-7s %-7s %-7s\n",
biggest->first.c_str(),
friendly_number(biggest->second.total_cycles).c_str(),
friendly_number(biggest->second.total_waits).c_str(),

View File

@ -1033,7 +1033,7 @@ void voodoo_1_device::device_start()
void voodoo_1_device::device_stop()
{
m_renderer->wait("Destruction");
m_renderer->wait("device_stop");
}
@ -1540,7 +1540,7 @@ u32 voodoo_1_device::internal_lfb_r(offs_t offset)
}
// wait for any outstanding work to finish before reading
m_renderer->wait("LFB read");
m_renderer->wait("internal_lfb_r");
// read and assemble two pixels
u32 data = buffer[0] | (buffer[1] << 16);
@ -1629,7 +1629,7 @@ void voodoo_1_device::internal_lfb_w(offs_t offset, u32 data, u32 mem_mask)
depth += scry * m_renderer->rowpixels() + x;
// wait for any outstanding work to finish
m_renderer->wait("LFB Write");
m_renderer->wait("internal_lfb_w(raw)");
// loop over up to two pixels
voodoo::dither_helper dither(scry, fbzmode);
@ -1891,9 +1891,6 @@ void voodoo_1_device::internal_texture_w(offs_t offset, u32 data)
if (!BIT(m_chipmask, 1 + tmunum))
return;
// wait for any outstanding work to finish
m_renderer->wait("Texture write");
// the seq_8_downld flag seems to always come from TMU #0
bool seq_8_downld = m_tmu[0].regs().texture_mode().seq_8_downld();
@ -1924,6 +1921,9 @@ void voodoo_1_device::internal_texture_w(offs_t offset, u32 data)
return;
u8 *dest = texture.write_ptr(lod, ts, tt, bytes_per_texel);
// wait for any outstanding work to finish
m_renderer->wait("internal_texture_w");
// write the four bytes in little-endian order
if (bytes_per_texel == 1)
{
@ -2282,6 +2282,12 @@ u32 voodoo_1_device::reg_triangle_w(u32 chipmask, u32 regnum, u32 data)
u32 voodoo_1_device::reg_nop_w(u32 chipmask, u32 regnum, u32 data)
{
// NOP should synchronize the pipeline; in theory we can mostly get away without
// it, but gtfore06 shows flicker on some golfers if we don't respect it; some
// games (notably gradius4) take a noticeable hit when this is present, so it
// may be worth adding an option to not block here
m_renderer->wait("reg_nop_w");
if (BIT(data, 0))
reset_counters();
if (BIT(data, 1))
@ -2321,8 +2327,6 @@ u32 voodoo_1_device::reg_fastfill_w(u32 chipmask, u32 regnum, u32 data)
u32 voodoo_1_device::reg_swapbuffer_w(u32 chipmask, u32 regnum, u32 data)
{
m_renderer->wait("swapbufferCMD");
// the don't swap value is Voodoo 2-only, masked off by the register engine
m_vblank_swap_pending = true;
m_vblank_swap = BIT(data, 1, 8);
@ -2360,7 +2364,7 @@ u32 voodoo_1_device::reg_fbiinit_w(u32 chipmask, u32 regnum, u32 data)
{
if (BIT(chipmask, 0) && m_init_enable.enable_hw_init())
{
m_renderer->wait("fbi_init");
m_renderer->wait("reg_fbiinit_w");
m_reg.write(regnum, data);
// handle resets written to fbiInit0
@ -2394,7 +2398,7 @@ u32 voodoo_1_device::reg_video_w(u32 chipmask, u32 regnum, u32 data)
{
if (BIT(chipmask, 0))
{
m_renderer->wait("video_configuration");
m_renderer->wait("reg_video_w");
m_reg.write(regnum, data);
auto const hsync = m_reg.hsync<true>();
@ -2423,12 +2427,12 @@ u32 voodoo_1_device::reg_clut_w(u32 chipmask, u32 regnum, u32 data)
{
if (BIT(chipmask, 0))
{
m_renderer->wait("clut");
if (m_reg.fbi_init1().video_timing_reset() == 0)
{
int index = BIT(data, 24, 8);
if (index <= 32 && m_clut[index] != data)
{
screen().update_partial(screen().vpos());
m_clut[index] = data;
m_clut_dirty = true;
}
@ -2607,6 +2611,7 @@ void voodoo_1_device::swap_buffers()
logerror("--- swap_buffers @ %d\n", screen().vpos());
// force a partial update
m_renderer->wait("swap_buffers");
screen().update_partial(screen().vpos());
m_video_changed = true;
@ -2677,6 +2682,16 @@ void voodoo_1_device::rotate_buffers()
int voodoo_1_device::update_common(bitmap_rgb32 &bitmap, const rectangle &cliprect, rgb_t const *pens)
{
// flush the pipes
if (operation_pending())
{
if (LOG_VBLANK_SWAP)
logerror("---- update flush begin\n");
flush_fifos(machine().time());
if (LOG_VBLANK_SWAP)
logerror("---- update flush end\n");
}
// reset the video changed flag
bool changed = m_video_changed;
m_video_changed = false;
@ -2687,9 +2702,9 @@ int voodoo_1_device::update_common(bitmap_rgb32 &bitmap, const rectangle &clipre
drawbuf = m_backbuf;
// copy from the current front buffer
if (LOG_VBLANK_SWAP) logerror("--- update_common @ %d from %08X\n", screen().vpos(), m_rgboffs[m_frontbuf]);
u32 rowpixels = m_renderer->rowpixels();
u16 *buffer_base = draw_buffer(drawbuf);
if (LOG_VBLANK_SWAP) logerror("--- update_common %d-%d @ %d from %08X\n", cliprect.min_y, cliprect.max_y, screen().vpos(), u32((u8 *)buffer_base - m_fbram));
for (s32 y = cliprect.min_y; y <= cliprect.max_y; y++)
{
if (y < m_yoffs)

View File

@ -578,6 +578,7 @@ u32 command_fifo::packet_type_5(u32 command)
if (LOG_CMDFIFO)
m_device.logerror(" PACKET TYPE 5: FB count=%d dest=%08X bd2=%X bdN=%X\n", count, target, BIT(command, 26, 4), BIT(command, 22, 4));
m_device.renderer().wait("packet_type_5(0)");
for (u32 word = 0; word < count; word++)
m_ram[target++ & m_mask] = little_endianize_int32(read_next());
break;
@ -945,7 +946,7 @@ u32 voodoo_2_device::reg_video2_w(u32 chipmask, u32 regnum, u32 data)
{
if (BIT(chipmask, 0))
{
m_renderer->wait("video_configuration");
m_renderer->wait("reg_video2_w");
m_reg.write(regnum, data);
auto const hsync = m_reg.hsync<false>();
@ -988,7 +989,7 @@ u32 voodoo_2_device::reg_sargb_w(u32 chipmask, u32 regnum, u32 data)
u32 voodoo_2_device::reg_userintr_w(u32 chipmask, u32 regnum, u32 data)
{
m_renderer->wait("userIntrCMD");
m_renderer->wait("reg_userintr_w");
// Bit 5 of intrCtrl enables user interrupts
if (m_reg.intr_ctrl().user_interrupt_enable())
@ -1015,7 +1016,7 @@ u32 voodoo_2_device::reg_cmdfifo_w(u32 chipmask, u32 regnum, u32 data)
{
if (BIT(chipmask, 0))
{
m_renderer->wait("cmdFifo write");
m_renderer->wait("reg_cmdfifo_w");
m_reg.write(regnum, data);
m_cmdfifo.set_base(BIT(m_reg.read(voodoo_regs::reg_cmdFifoBaseAddr), 0, 10) << 12);
m_cmdfifo.set_end((BIT(m_reg.read(voodoo_regs::reg_cmdFifoBaseAddr), 16, 10) + 1) << 12);
@ -1034,7 +1035,6 @@ u32 voodoo_2_device::reg_cmdfifoptr_w(u32 chipmask, u32 regnum, u32 data)
{
if (BIT(chipmask, 0))
{
m_renderer->wait("cmdFifoReadPtr");
m_reg.write(regnum, data);
m_cmdfifo.set_read_pointer(data);
}
@ -1051,7 +1051,6 @@ u32 voodoo_2_device::reg_cmdfifodepth_w(u32 chipmask, u32 regnum, u32 data)
{
if (BIT(chipmask, 0))
{
m_renderer->wait("cmdFifoDepth");
m_reg.write(regnum, data);
m_cmdfifo.set_depth(data);
}
@ -1068,7 +1067,6 @@ u32 voodoo_2_device::reg_cmdfifoholes_w(u32 chipmask, u32 regnum, u32 data)
{
if (BIT(chipmask, 0))
{
m_renderer->wait("cmdFifoHoles");
m_reg.write(regnum, data);
m_cmdfifo.set_holes(data);
}
@ -1084,10 +1082,16 @@ u32 voodoo_2_device::reg_fbiinit5_7_w(u32 chipmask, u32 regnum, u32 data)
{
if (BIT(chipmask, 0) && m_init_enable.enable_hw_init())
{
m_renderer->wait("fbiInit5-7");
u32 delta = m_reg.read(regnum) ^ data;
m_reg.write(regnum, data);
if (regnum == voodoo_regs::reg_fbiInit5 || regnum == voodoo_regs::reg_fbiInit6)
// a few bits affect video memory configuration
if ((regnum == voodoo_regs::reg_fbiInit5 && BIT(delta, 9, 2) != 0) ||
(regnum == voodoo_regs::reg_fbiInit6 && BIT(delta, 30, 1) != 0))
{
m_renderer->wait("reg_fbiinit5_7_w");
recompute_video_memory();
}
m_cmdfifo.set_enable(m_reg.fbi_init7().cmdfifo_enable());
m_cmdfifo.set_count_holes(!m_reg.fbi_init7().disable_cmdfifo_holes());
}

View File

@ -331,6 +331,7 @@ u32 voodoo_banshee_device::read_lfb(offs_t offset, u32 mem_mask)
u32 addr = offset * 4;
if (addr <= m_fbmask)
{
m_renderer->wait("read_lfb");
u32 result = *(u32 *)&m_fbram[addr];
if (LOG_LFB)
logerror("%s:read_lfb(%X) = %08X\n", machine().describe_context(), addr, result);
@ -363,6 +364,7 @@ void voodoo_banshee_device::write_lfb(offs_t offset, u32 data, u32 mem_mask)
// writes below the LFB base are direct?
if (addr <= m_fbmask)
{
m_renderer->wait("write_lfb");
if (LOG_LFB)
logerror("%s:write_lfb(%X) = %08X & %08X\n", machine().describe_context(), addr, data, mem_mask);
COMBINE_DATA((u32 *)&m_fbram[addr]);
@ -800,6 +802,7 @@ void voodoo_banshee_device::internal_io_w(offs_t offset, u32 data, u32 mem_mask)
u32 dacaddr = BIT(m_io_regs.read(banshee_io_regs::dacAddr), 0, 9);
if (newval != m_clut[dacaddr])
{
screen().update_partial(screen().vpos());
m_clut[dacaddr] = newval;
m_clut_dirty = true;
}
@ -1062,9 +1065,6 @@ void voodoo_banshee_device::internal_texture_w(offs_t offset, u32 data)
if (!BIT(m_chipmask, 1 + tmunum))
return;
// wait for any outstanding work to finish
m_renderer->wait("Texture write");
// pull out modes from the TMU and update state
auto &regs = m_tmu[tmunum].regs();
auto const texlod = regs.texture_lod();
@ -1080,6 +1080,9 @@ void voodoo_banshee_device::internal_texture_w(offs_t offset, u32 data)
// determine destination pointer
u8 *dest = texture.write_ptr(0, offset * 4, 0, 1);
// wait for any outstanding work to finish
m_renderer->wait("internal_texture_w");
// write the four bytes in little-endian order
u32 bytes_per_texel = (texmode.format() < 8) ? 1 : 2;
if (bytes_per_texel == 1)
@ -1142,6 +1145,9 @@ void voodoo_banshee_device::internal_lfb_direct_w(offs_t offset, u32 data, u32 m
// advance pointers to the proper row
dest += y * m_renderer->rowpixels() + x;
// wait for any outstanding work to finish
m_renderer->wait("internal_lfb_direct_w");
// write to the RGB buffer
if (ACCESSING_BITS_0_15 && dest < end)
dest[0] = BIT(data, 0, 16);
@ -1560,6 +1566,8 @@ void voodoo_banshee_device::execute_blit(u32 data)
if (LOG_BANSHEE_2D)
logerror(" blit_2d:host_to_screen: %08x -> %08x, %d, %d\n", data, addr, m_blt_dst_x, m_blt_dst_y);
m_renderer->wait("execute_blit(3)");
switch (m_blt_dst_bpp)
{
case 1:

View File

@ -2195,7 +2195,7 @@ void voodoo_renderer::pixel_pipeline(thread_stats_block &threadstats, u16 *dest,
}
// wait for any outstanding work to finish
wait("LFB Write");
wait("pixel_pipeline");
// perform alpha blending
if (alphamode.alphablend())

View File

@ -536,24 +536,30 @@ public:
// update the fog tables
void write_fog(u32 base, u32 data)
{
wait("Fog write");
m_fogdelta[base + 0] = (data >> 0) & 0xff;
m_fogblend[base + 0] = (data >> 8) & 0xff;
m_fogdelta[base + 1] = (data >> 16) & 0xff;
m_fogblend[base + 1] = (data >> 24) & 0xff;
u32 oldval = m_fogdelta[base + 0] | (m_fogblend[base + 0] << 8) | (m_fogdelta[base + 1] << 16) | (m_fogblend[base + 1] << 24);
if (oldval != data)
{
wait("write_fog");
m_fogdelta[base + 0] = BIT(data, 0, 8);
m_fogblend[base + 0] = BIT(data, 8, 8);
m_fogdelta[base + 1] = BIT(data, 16, 8);
m_fogblend[base + 1] = BIT(data, 24, 8);
}
}
// update the Y origin
void set_yorigin(s32 yorigin)
{
wait("Y origin write");
if (m_yorigin != yorigin)
wait("set_yorigin");
m_yorigin = yorigin;
}
// update the rowpixels
void set_rowpixels(u32 rowpixels)
{
wait("Rowpixels write");
if (m_rowpixels != rowpixels)
wait("set_rowpixels");
m_rowpixels = rowpixels;
}