voodoo: Improve fidelity of synchronization points. Add synchronization back to NOP to fix flicker in gtfore06.

This commit is contained in:
Aaron Giles 2021-07-12 19:27:19 -07:00
parent adffe506b2
commit b3e47e87ec
6 changed files with 67 additions and 34 deletions

View File

@ -519,8 +519,8 @@ poly_manager<BaseType, ObjectType, MaxParams, Flags>::~poly_manager()
#if TRACK_POLY_WAITS #if TRACK_POLY_WAITS
{ {
osd_printf_info("Wait summary:\n"); osd_printf_info("Wait summary:\n");
osd_printf_info("Cause Cycles Waits Actuals Average <100 100-1k 1k-10k 10k+\n"); osd_printf_info("Cause Cycles Waits Actuals Average <100 100-1k 1k-10k 10k+\n");
osd_printf_info("------------------ ------- ------- ------- ------- ------- ------- ------- -------\n"); osd_printf_info("-------------------------- ------- ------- ------- ------- ------- ------- ------- -------\n");
while (1) while (1)
{ {
typename waitmap_t::value_type *biggest = nullptr; typename waitmap_t::value_type *biggest = nullptr;
@ -532,7 +532,7 @@ poly_manager<BaseType, ObjectType, MaxParams, Flags>::~poly_manager()
if (biggest == nullptr) if (biggest == nullptr)
break; break;
osd_printf_info("%-20s%-7s %-7s %-7s %-7s %-7s %-7s %-7s %-7s\n", osd_printf_info("%-28s%-7s %-7s %-7s %-7s %-7s %-7s %-7s %-7s\n",
biggest->first.c_str(), biggest->first.c_str(),
friendly_number(biggest->second.total_cycles).c_str(), friendly_number(biggest->second.total_cycles).c_str(),
friendly_number(biggest->second.total_waits).c_str(), friendly_number(biggest->second.total_waits).c_str(),

View File

@ -1033,7 +1033,7 @@ void voodoo_1_device::device_start()
void voodoo_1_device::device_stop() void voodoo_1_device::device_stop()
{ {
m_renderer->wait("Destruction"); m_renderer->wait("device_stop");
} }
@ -1540,7 +1540,7 @@ u32 voodoo_1_device::internal_lfb_r(offs_t offset)
} }
// wait for any outstanding work to finish before reading // wait for any outstanding work to finish before reading
m_renderer->wait("LFB read"); m_renderer->wait("internal_lfb_r");
// read and assemble two pixels // read and assemble two pixels
u32 data = buffer[0] | (buffer[1] << 16); u32 data = buffer[0] | (buffer[1] << 16);
@ -1629,7 +1629,7 @@ void voodoo_1_device::internal_lfb_w(offs_t offset, u32 data, u32 mem_mask)
depth += scry * m_renderer->rowpixels() + x; depth += scry * m_renderer->rowpixels() + x;
// wait for any outstanding work to finish // wait for any outstanding work to finish
m_renderer->wait("LFB Write"); m_renderer->wait("internal_lfb_w(raw)");
// loop over up to two pixels // loop over up to two pixels
voodoo::dither_helper dither(scry, fbzmode); voodoo::dither_helper dither(scry, fbzmode);
@ -1891,9 +1891,6 @@ void voodoo_1_device::internal_texture_w(offs_t offset, u32 data)
if (!BIT(m_chipmask, 1 + tmunum)) if (!BIT(m_chipmask, 1 + tmunum))
return; return;
// wait for any outstanding work to finish
m_renderer->wait("Texture write");
// the seq_8_downld flag seems to always come from TMU #0 // the seq_8_downld flag seems to always come from TMU #0
bool seq_8_downld = m_tmu[0].regs().texture_mode().seq_8_downld(); bool seq_8_downld = m_tmu[0].regs().texture_mode().seq_8_downld();
@ -1924,6 +1921,9 @@ void voodoo_1_device::internal_texture_w(offs_t offset, u32 data)
return; return;
u8 *dest = texture.write_ptr(lod, ts, tt, bytes_per_texel); u8 *dest = texture.write_ptr(lod, ts, tt, bytes_per_texel);
// wait for any outstanding work to finish
m_renderer->wait("internal_texture_w");
// write the four bytes in little-endian order // write the four bytes in little-endian order
if (bytes_per_texel == 1) if (bytes_per_texel == 1)
{ {
@ -2282,6 +2282,12 @@ u32 voodoo_1_device::reg_triangle_w(u32 chipmask, u32 regnum, u32 data)
u32 voodoo_1_device::reg_nop_w(u32 chipmask, u32 regnum, u32 data) u32 voodoo_1_device::reg_nop_w(u32 chipmask, u32 regnum, u32 data)
{ {
// NOP should synchronize the pipeline; in theory we can mostly get away without
// it, but gtfore06 shows flicker on some golfers if we don't respect it; some
// games (notably gradius4) take a noticeable hit when this is present, so it
// may be worth adding an option to not block here
m_renderer->wait("reg_nop_w");
if (BIT(data, 0)) if (BIT(data, 0))
reset_counters(); reset_counters();
if (BIT(data, 1)) if (BIT(data, 1))
@ -2321,8 +2327,6 @@ u32 voodoo_1_device::reg_fastfill_w(u32 chipmask, u32 regnum, u32 data)
u32 voodoo_1_device::reg_swapbuffer_w(u32 chipmask, u32 regnum, u32 data) u32 voodoo_1_device::reg_swapbuffer_w(u32 chipmask, u32 regnum, u32 data)
{ {
m_renderer->wait("swapbufferCMD");
// the don't swap value is Voodoo 2-only, masked off by the register engine // the don't swap value is Voodoo 2-only, masked off by the register engine
m_vblank_swap_pending = true; m_vblank_swap_pending = true;
m_vblank_swap = BIT(data, 1, 8); m_vblank_swap = BIT(data, 1, 8);
@ -2360,7 +2364,7 @@ u32 voodoo_1_device::reg_fbiinit_w(u32 chipmask, u32 regnum, u32 data)
{ {
if (BIT(chipmask, 0) && m_init_enable.enable_hw_init()) if (BIT(chipmask, 0) && m_init_enable.enable_hw_init())
{ {
m_renderer->wait("fbi_init"); m_renderer->wait("reg_fbiinit_w");
m_reg.write(regnum, data); m_reg.write(regnum, data);
// handle resets written to fbiInit0 // handle resets written to fbiInit0
@ -2394,7 +2398,7 @@ u32 voodoo_1_device::reg_video_w(u32 chipmask, u32 regnum, u32 data)
{ {
if (BIT(chipmask, 0)) if (BIT(chipmask, 0))
{ {
m_renderer->wait("video_configuration"); m_renderer->wait("reg_video_w");
m_reg.write(regnum, data); m_reg.write(regnum, data);
auto const hsync = m_reg.hsync<true>(); auto const hsync = m_reg.hsync<true>();
@ -2423,12 +2427,12 @@ u32 voodoo_1_device::reg_clut_w(u32 chipmask, u32 regnum, u32 data)
{ {
if (BIT(chipmask, 0)) if (BIT(chipmask, 0))
{ {
m_renderer->wait("clut");
if (m_reg.fbi_init1().video_timing_reset() == 0) if (m_reg.fbi_init1().video_timing_reset() == 0)
{ {
int index = BIT(data, 24, 8); int index = BIT(data, 24, 8);
if (index <= 32 && m_clut[index] != data) if (index <= 32 && m_clut[index] != data)
{ {
screen().update_partial(screen().vpos());
m_clut[index] = data; m_clut[index] = data;
m_clut_dirty = true; m_clut_dirty = true;
} }
@ -2607,6 +2611,7 @@ void voodoo_1_device::swap_buffers()
logerror("--- swap_buffers @ %d\n", screen().vpos()); logerror("--- swap_buffers @ %d\n", screen().vpos());
// force a partial update // force a partial update
m_renderer->wait("swap_buffers");
screen().update_partial(screen().vpos()); screen().update_partial(screen().vpos());
m_video_changed = true; m_video_changed = true;
@ -2677,6 +2682,16 @@ void voodoo_1_device::rotate_buffers()
int voodoo_1_device::update_common(bitmap_rgb32 &bitmap, const rectangle &cliprect, rgb_t const *pens) int voodoo_1_device::update_common(bitmap_rgb32 &bitmap, const rectangle &cliprect, rgb_t const *pens)
{ {
// flush the pipes
if (operation_pending())
{
if (LOG_VBLANK_SWAP)
logerror("---- update flush begin\n");
flush_fifos(machine().time());
if (LOG_VBLANK_SWAP)
logerror("---- update flush end\n");
}
// reset the video changed flag // reset the video changed flag
bool changed = m_video_changed; bool changed = m_video_changed;
m_video_changed = false; m_video_changed = false;
@ -2687,9 +2702,9 @@ int voodoo_1_device::update_common(bitmap_rgb32 &bitmap, const rectangle &clipre
drawbuf = m_backbuf; drawbuf = m_backbuf;
// copy from the current front buffer // copy from the current front buffer
if (LOG_VBLANK_SWAP) logerror("--- update_common @ %d from %08X\n", screen().vpos(), m_rgboffs[m_frontbuf]);
u32 rowpixels = m_renderer->rowpixels(); u32 rowpixels = m_renderer->rowpixels();
u16 *buffer_base = draw_buffer(drawbuf); u16 *buffer_base = draw_buffer(drawbuf);
if (LOG_VBLANK_SWAP) logerror("--- update_common %d-%d @ %d from %08X\n", cliprect.min_y, cliprect.max_y, screen().vpos(), u32((u8 *)buffer_base - m_fbram));
for (s32 y = cliprect.min_y; y <= cliprect.max_y; y++) for (s32 y = cliprect.min_y; y <= cliprect.max_y; y++)
{ {
if (y < m_yoffs) if (y < m_yoffs)

View File

@ -578,6 +578,7 @@ u32 command_fifo::packet_type_5(u32 command)
if (LOG_CMDFIFO) if (LOG_CMDFIFO)
m_device.logerror(" PACKET TYPE 5: FB count=%d dest=%08X bd2=%X bdN=%X\n", count, target, BIT(command, 26, 4), BIT(command, 22, 4)); m_device.logerror(" PACKET TYPE 5: FB count=%d dest=%08X bd2=%X bdN=%X\n", count, target, BIT(command, 26, 4), BIT(command, 22, 4));
m_device.renderer().wait("packet_type_5(0)");
for (u32 word = 0; word < count; word++) for (u32 word = 0; word < count; word++)
m_ram[target++ & m_mask] = little_endianize_int32(read_next()); m_ram[target++ & m_mask] = little_endianize_int32(read_next());
break; break;
@ -945,7 +946,7 @@ u32 voodoo_2_device::reg_video2_w(u32 chipmask, u32 regnum, u32 data)
{ {
if (BIT(chipmask, 0)) if (BIT(chipmask, 0))
{ {
m_renderer->wait("video_configuration"); m_renderer->wait("reg_video2_w");
m_reg.write(regnum, data); m_reg.write(regnum, data);
auto const hsync = m_reg.hsync<false>(); auto const hsync = m_reg.hsync<false>();
@ -988,7 +989,7 @@ u32 voodoo_2_device::reg_sargb_w(u32 chipmask, u32 regnum, u32 data)
u32 voodoo_2_device::reg_userintr_w(u32 chipmask, u32 regnum, u32 data) u32 voodoo_2_device::reg_userintr_w(u32 chipmask, u32 regnum, u32 data)
{ {
m_renderer->wait("userIntrCMD"); m_renderer->wait("reg_userintr_w");
// Bit 5 of intrCtrl enables user interrupts // Bit 5 of intrCtrl enables user interrupts
if (m_reg.intr_ctrl().user_interrupt_enable()) if (m_reg.intr_ctrl().user_interrupt_enable())
@ -1015,7 +1016,7 @@ u32 voodoo_2_device::reg_cmdfifo_w(u32 chipmask, u32 regnum, u32 data)
{ {
if (BIT(chipmask, 0)) if (BIT(chipmask, 0))
{ {
m_renderer->wait("cmdFifo write"); m_renderer->wait("reg_cmdfifo_w");
m_reg.write(regnum, data); m_reg.write(regnum, data);
m_cmdfifo.set_base(BIT(m_reg.read(voodoo_regs::reg_cmdFifoBaseAddr), 0, 10) << 12); m_cmdfifo.set_base(BIT(m_reg.read(voodoo_regs::reg_cmdFifoBaseAddr), 0, 10) << 12);
m_cmdfifo.set_end((BIT(m_reg.read(voodoo_regs::reg_cmdFifoBaseAddr), 16, 10) + 1) << 12); m_cmdfifo.set_end((BIT(m_reg.read(voodoo_regs::reg_cmdFifoBaseAddr), 16, 10) + 1) << 12);
@ -1034,7 +1035,6 @@ u32 voodoo_2_device::reg_cmdfifoptr_w(u32 chipmask, u32 regnum, u32 data)
{ {
if (BIT(chipmask, 0)) if (BIT(chipmask, 0))
{ {
m_renderer->wait("cmdFifoReadPtr");
m_reg.write(regnum, data); m_reg.write(regnum, data);
m_cmdfifo.set_read_pointer(data); m_cmdfifo.set_read_pointer(data);
} }
@ -1051,7 +1051,6 @@ u32 voodoo_2_device::reg_cmdfifodepth_w(u32 chipmask, u32 regnum, u32 data)
{ {
if (BIT(chipmask, 0)) if (BIT(chipmask, 0))
{ {
m_renderer->wait("cmdFifoDepth");
m_reg.write(regnum, data); m_reg.write(regnum, data);
m_cmdfifo.set_depth(data); m_cmdfifo.set_depth(data);
} }
@ -1068,7 +1067,6 @@ u32 voodoo_2_device::reg_cmdfifoholes_w(u32 chipmask, u32 regnum, u32 data)
{ {
if (BIT(chipmask, 0)) if (BIT(chipmask, 0))
{ {
m_renderer->wait("cmdFifoHoles");
m_reg.write(regnum, data); m_reg.write(regnum, data);
m_cmdfifo.set_holes(data); m_cmdfifo.set_holes(data);
} }
@ -1084,10 +1082,16 @@ u32 voodoo_2_device::reg_fbiinit5_7_w(u32 chipmask, u32 regnum, u32 data)
{ {
if (BIT(chipmask, 0) && m_init_enable.enable_hw_init()) if (BIT(chipmask, 0) && m_init_enable.enable_hw_init())
{ {
m_renderer->wait("fbiInit5-7"); u32 delta = m_reg.read(regnum) ^ data;
m_reg.write(regnum, data); m_reg.write(regnum, data);
if (regnum == voodoo_regs::reg_fbiInit5 || regnum == voodoo_regs::reg_fbiInit6)
// a few bits affect video memory configuration
if ((regnum == voodoo_regs::reg_fbiInit5 && BIT(delta, 9, 2) != 0) ||
(regnum == voodoo_regs::reg_fbiInit6 && BIT(delta, 30, 1) != 0))
{
m_renderer->wait("reg_fbiinit5_7_w");
recompute_video_memory(); recompute_video_memory();
}
m_cmdfifo.set_enable(m_reg.fbi_init7().cmdfifo_enable()); m_cmdfifo.set_enable(m_reg.fbi_init7().cmdfifo_enable());
m_cmdfifo.set_count_holes(!m_reg.fbi_init7().disable_cmdfifo_holes()); m_cmdfifo.set_count_holes(!m_reg.fbi_init7().disable_cmdfifo_holes());
} }

View File

@ -331,6 +331,7 @@ u32 voodoo_banshee_device::read_lfb(offs_t offset, u32 mem_mask)
u32 addr = offset * 4; u32 addr = offset * 4;
if (addr <= m_fbmask) if (addr <= m_fbmask)
{ {
m_renderer->wait("read_lfb");
u32 result = *(u32 *)&m_fbram[addr]; u32 result = *(u32 *)&m_fbram[addr];
if (LOG_LFB) if (LOG_LFB)
logerror("%s:read_lfb(%X) = %08X\n", machine().describe_context(), addr, result); logerror("%s:read_lfb(%X) = %08X\n", machine().describe_context(), addr, result);
@ -363,6 +364,7 @@ void voodoo_banshee_device::write_lfb(offs_t offset, u32 data, u32 mem_mask)
// writes below the LFB base are direct? // writes below the LFB base are direct?
if (addr <= m_fbmask) if (addr <= m_fbmask)
{ {
m_renderer->wait("write_lfb");
if (LOG_LFB) if (LOG_LFB)
logerror("%s:write_lfb(%X) = %08X & %08X\n", machine().describe_context(), addr, data, mem_mask); logerror("%s:write_lfb(%X) = %08X & %08X\n", machine().describe_context(), addr, data, mem_mask);
COMBINE_DATA((u32 *)&m_fbram[addr]); COMBINE_DATA((u32 *)&m_fbram[addr]);
@ -800,6 +802,7 @@ void voodoo_banshee_device::internal_io_w(offs_t offset, u32 data, u32 mem_mask)
u32 dacaddr = BIT(m_io_regs.read(banshee_io_regs::dacAddr), 0, 9); u32 dacaddr = BIT(m_io_regs.read(banshee_io_regs::dacAddr), 0, 9);
if (newval != m_clut[dacaddr]) if (newval != m_clut[dacaddr])
{ {
screen().update_partial(screen().vpos());
m_clut[dacaddr] = newval; m_clut[dacaddr] = newval;
m_clut_dirty = true; m_clut_dirty = true;
} }
@ -1062,9 +1065,6 @@ void voodoo_banshee_device::internal_texture_w(offs_t offset, u32 data)
if (!BIT(m_chipmask, 1 + tmunum)) if (!BIT(m_chipmask, 1 + tmunum))
return; return;
// wait for any outstanding work to finish
m_renderer->wait("Texture write");
// pull out modes from the TMU and update state // pull out modes from the TMU and update state
auto &regs = m_tmu[tmunum].regs(); auto &regs = m_tmu[tmunum].regs();
auto const texlod = regs.texture_lod(); auto const texlod = regs.texture_lod();
@ -1080,6 +1080,9 @@ void voodoo_banshee_device::internal_texture_w(offs_t offset, u32 data)
// determine destination pointer // determine destination pointer
u8 *dest = texture.write_ptr(0, offset * 4, 0, 1); u8 *dest = texture.write_ptr(0, offset * 4, 0, 1);
// wait for any outstanding work to finish
m_renderer->wait("internal_texture_w");
// write the four bytes in little-endian order // write the four bytes in little-endian order
u32 bytes_per_texel = (texmode.format() < 8) ? 1 : 2; u32 bytes_per_texel = (texmode.format() < 8) ? 1 : 2;
if (bytes_per_texel == 1) if (bytes_per_texel == 1)
@ -1142,6 +1145,9 @@ void voodoo_banshee_device::internal_lfb_direct_w(offs_t offset, u32 data, u32 m
// advance pointers to the proper row // advance pointers to the proper row
dest += y * m_renderer->rowpixels() + x; dest += y * m_renderer->rowpixels() + x;
// wait for any outstanding work to finish
m_renderer->wait("internal_lfb_direct_w");
// write to the RGB buffer // write to the RGB buffer
if (ACCESSING_BITS_0_15 && dest < end) if (ACCESSING_BITS_0_15 && dest < end)
dest[0] = BIT(data, 0, 16); dest[0] = BIT(data, 0, 16);
@ -1560,6 +1566,8 @@ void voodoo_banshee_device::execute_blit(u32 data)
if (LOG_BANSHEE_2D) if (LOG_BANSHEE_2D)
logerror(" blit_2d:host_to_screen: %08x -> %08x, %d, %d\n", data, addr, m_blt_dst_x, m_blt_dst_y); logerror(" blit_2d:host_to_screen: %08x -> %08x, %d, %d\n", data, addr, m_blt_dst_x, m_blt_dst_y);
m_renderer->wait("execute_blit(3)");
switch (m_blt_dst_bpp) switch (m_blt_dst_bpp)
{ {
case 1: case 1:

View File

@ -2195,7 +2195,7 @@ void voodoo_renderer::pixel_pipeline(thread_stats_block &threadstats, u16 *dest,
} }
// wait for any outstanding work to finish // wait for any outstanding work to finish
wait("LFB Write"); wait("pixel_pipeline");
// perform alpha blending // perform alpha blending
if (alphamode.alphablend()) if (alphamode.alphablend())

View File

@ -536,24 +536,30 @@ public:
// update the fog tables // update the fog tables
void write_fog(u32 base, u32 data) void write_fog(u32 base, u32 data)
{ {
wait("Fog write"); u32 oldval = m_fogdelta[base + 0] | (m_fogblend[base + 0] << 8) | (m_fogdelta[base + 1] << 16) | (m_fogblend[base + 1] << 24);
m_fogdelta[base + 0] = (data >> 0) & 0xff; if (oldval != data)
m_fogblend[base + 0] = (data >> 8) & 0xff; {
m_fogdelta[base + 1] = (data >> 16) & 0xff; wait("write_fog");
m_fogblend[base + 1] = (data >> 24) & 0xff; m_fogdelta[base + 0] = BIT(data, 0, 8);
m_fogblend[base + 0] = BIT(data, 8, 8);
m_fogdelta[base + 1] = BIT(data, 16, 8);
m_fogblend[base + 1] = BIT(data, 24, 8);
}
} }
// update the Y origin // update the Y origin
void set_yorigin(s32 yorigin) void set_yorigin(s32 yorigin)
{ {
wait("Y origin write"); if (m_yorigin != yorigin)
wait("set_yorigin");
m_yorigin = yorigin; m_yorigin = yorigin;
} }
// update the rowpixels // update the rowpixels
void set_rowpixels(u32 rowpixels) void set_rowpixels(u32 rowpixels)
{ {
wait("Rowpixels write"); if (m_rowpixels != rowpixels)
wait("set_rowpixels");
m_rowpixels = rowpixels; m_rowpixels = rowpixels;
} }