chihiro: add support for vblank wait NV2A accelarator method (nw)

It is disabled by default since is slows down too much.
There is a new debugger command "chihiro waitvblank" to enable and disable it.
This commit is contained in:
yz70s 2014-12-28 21:49:12 +01:00
parent 956f2a350e
commit 495fe0f2dd
3 changed files with 233 additions and 137 deletions

View File

@ -777,6 +777,18 @@ static void nv2a_combiners_command(running_machine &machine, int ref, int params
debug_console_printf(machine, "Register combiners disabled\n");
}
static void waitvblank_command(running_machine &machine, int ref, int params, const char **param)
{
int en;
chihiro_state *chst = machine.driver_data<chihiro_state>();
en = chst->nvidia_nv2a->toggle_wait_vblank_support();
if (en != 0)
debug_console_printf(machine, "Vblank method enabled\n");
else
debug_console_printf(machine, "Vblank method disabled\n");
}
static void grab_texture_command(running_machine &machine, int ref, int params, const char **param)
{
UINT64 type;
@ -862,6 +874,7 @@ static void help_command(running_machine &machine, int ref, int params, const ch
debug_console_printf(machine, " chihiro curthread -- Print information about current thread\n");
debug_console_printf(machine, " chihiro irq,<number> -- Generate interrupt with irq number 0-15\n");
debug_console_printf(machine, " chihiro nv2a_combiners -- Toggle use of register combiners\n");
debug_console_printf(machine, " chihiro waitvblank -- Toggle support for wait vblank method\n");
debug_console_printf(machine, " chihiro grab_texture,<type>,<filename> -- Save to <filename> the next used texture of type <type>\n");
debug_console_printf(machine, " chihiro grab_vprog,<filename> -- save current vertex program instruction slots to <filename>\n");
debug_console_printf(machine, " chihiro vprogdis,<address>,<length>[,<type>] -- disassemble <lenght> vertex program instructions at <address> of <type>\n");
@ -886,6 +899,8 @@ static void chihiro_debug_commands(running_machine &machine, int ref, int params
generate_irq_command(machine, ref, params - 1, param + 1);
else if (strcmp("nv2a_combiners", param[0]) == 0)
nv2a_combiners_command(machine, ref, params - 1, param + 1);
else if (strcmp("waitvblank", param[0]) == 0)
waitvblank_command(machine, ref, params - 1, param + 1);
else if (strcmp("grab_texture", param[0]) == 0)
grab_texture_command(machine, ref, params - 1, param + 1);
else if (strcmp("grab_vprog", param[0]) == 0)
@ -1787,6 +1802,7 @@ void chihiro_state::machine_start()
save_item(NAME(smbusst.words));
save_item(NAME(pic16lc_buffer));
save_item(NAME(usbhack_counter));
nvidia_nv2a->start();
nvidia_nv2a->savestate_items();
}

View File

@ -191,8 +191,12 @@ public:
rendertarget = NULL;
depthbuffer = NULL;
displayedtarget = NULL;
puller_channel = 0;
puller_subchannel = 0;
puller_waiting = 0;
debug_grab_texttype = -1;
debug_grab_textfile = NULL;
waitvblank_used = 0;
memset(vertex_attribute_words, 0, sizeof(vertex_attribute_words));
memset(vertex_attribute_offset, 0, sizeof(vertex_attribute_offset));
}
@ -208,7 +212,7 @@ public:
int geforce_commandkind(UINT32 word);
UINT32 geforce_object_offset(UINT32 handle);
void geforce_read_dma_object(UINT32 handle, UINT32 &offset, UINT32 &size);
void geforce_exec_method(address_space &space, UINT32 channel, UINT32 subchannel, UINT32 method, UINT32 address, int &countlen);
int geforce_exec_method(address_space &space, UINT32 channel, UINT32 subchannel, UINT32 method, UINT32 address, int &countlen);
UINT32 texture_get_texel(int number, int x, int y);
void write_pixel(int x, int y, UINT32 color, UINT32 depth);
void combiner_initialize_registers(UINT32 argb8[6]);
@ -238,15 +242,17 @@ public:
void computedilated(void);
void putpixtex(int xp, int yp, int up, int vp);
int toggle_register_combiners_usage();
int toggle_wait_vblank_support();
void debug_grab_texture(int type, const char *filename);
void debug_grab_vertex_program_slot(int slot, UINT32 *instruction);
void start();
void savestate_items();
void read_vertex(address_space & space, offs_t address, vertex_nv &vertex, int attrib);
int read_vertices_0x1810(address_space & space, vertex_nv *destination, int offset, int limit);
int read_vertices_0x1800(address_space & space, vertex_nv *destination, UINT32 address, int limit);
int read_vertices_0x1818(address_space & space, vertex_nv *destination, UINT32 address, int limit);
void convert_vertices_poly(vertex_nv *source, vertex_t *destination, int count);
TIMER_CALLBACK_MEMBER(puller_timer_work);
struct {
UINT32 regs[0x80 / 4];
@ -429,12 +435,18 @@ public:
int enabled_vertex_attributes;
int vertex_attribute_words[16];
int vertex_attribute_offset[16];
emu_timer *puller_timer;
int puller_channel;
int puller_subchannel;
int puller_waiting;
address_space *puller_space;
UINT32 dilated0[16][2048];
UINT32 dilated1[16][2048];
int dilatechose[256];
nvidia_object_data *objectdata;
int debug_grab_texttype;
char *debug_grab_textfile;
int waitvblank_used;
enum VERTEX_PARAMETER {
PARAM_COLOR_B = 0,
@ -479,7 +491,7 @@ public:
TEX3 = 12
};
enum NV2A_VTXBUF_TYPE {
NV2A_VTXBUF_TYPE_UNKNOWN_0 = 0, // used for vertex color ?
NV2A_VTXBUF_TYPE_UBYTE2 = 0, // what is the difference with UBYTE ?
NV2A_VTXBUF_TYPE_FLOAT = 2,
NV2A_VTXBUF_TYPE_UBYTE = 4,
NV2A_VTXBUF_TYPE_USHORT = 5,

View File

@ -2027,8 +2027,13 @@ void nv2a_renderer::read_vertex(address_space & space, offs_t address, vertex_nv
}
break;
case NV2A_VTXBUF_TYPE_UBYTE:
u = space.read_dword(address + 0);
for (c = l-1; c >= l; c--) {
vertex.attribute[attrib].fv[c] = (u & 0xff) / 255.0;
u = u >> 8;
}
break;
case NV2A_VTXBUF_TYPE_UNKNOWN_0:
case NV2A_VTXBUF_TYPE_UBYTE2:
u = space.read_dword(address + 0);
for (c = 0; c < l; c++) {
vertex.attribute[attrib].fv[c] = (u & 0xff) / 255.0;
@ -2142,7 +2147,7 @@ void nv2a_renderer::convert_vertices_poly(vertex_nv *source, vertex_t *destinati
destination[m].p[PARAM_TEXTURE0_U + u * 2] = source[m].attribute[9 + u].fv[0];
destination[m].p[PARAM_TEXTURE0_V + u * 2] = source[m].attribute[9 + u].fv[1];
}
destination[m].p[PARAM_Z] = 0+0xffffff;
destination[m].p[PARAM_Z] = 0xffffff;
}
}
else {
@ -2164,7 +2169,7 @@ void nv2a_renderer::convert_vertices_poly(vertex_nv *source, vertex_t *destinati
}
}
void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UINT32 subchannel, UINT32 method, UINT32 address, int &countlen)
int nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UINT32 subchannel, UINT32 method, UINT32 address, int &countlen)
{
UINT32 maddress;
UINT32 data;
@ -2364,7 +2369,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
if (countlen < 0) {
logerror("Method 0x1818 missing %d words to draw a complete primitive\n", -countlen);
countlen = 0;
return;
return 0;
}
address = address + c * 4;
for (n = 1; countlen > 0; n++) {
@ -2392,7 +2397,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
if (countlen < 0) {
logerror("Method 0x1818 missing %d words to draw a complete primitive\n", -countlen);
countlen = 0;
return;
return 0;
}
address = address + c * 4;
for (n = 0; countlen > 0; n++) {
@ -2439,7 +2444,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
if (countlen < 0) {
logerror("Method 0x1818 missing %d words to draw a complete primitive\n", -countlen);
countlen = 0;
return;
return 0;
}
address = address + c * 4;
for (n = 0; countlen > 0; n += 2) {
@ -2449,7 +2454,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
if (countlen < 0) {
logerror("Method 0x1818 missing %d words to draw a complete primitive\n", -countlen);
countlen = 0;
return;
return 0;
}
address = address + c * 4;
render_triangle(limits_rendertarget, renderspans, 4 + 4 * 2, xy[n & 3], xy[(n + 1) & 3], xy[(n + 2) & 3]);
@ -2477,7 +2482,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
vertexbuffer_kind[bit] = data & 15;
vertexbuffer_size[bit] = (data >> 4) & 15;
switch (vertexbuffer_kind[bit]) {
case NV2A_VTXBUF_TYPE_UNKNOWN_0:
case NV2A_VTXBUF_TYPE_UBYTE2:
vertex_attribute_words[bit] = (vertexbuffer_size[bit] * 1) >> 2;
break;
case NV2A_VTXBUF_TYPE_FLOAT:
@ -2541,7 +2546,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
m = 2;
else
m = 1;
// possible buffers: color, depth, stencil, and accumulation
// possible buffers: color, depth, stencil
// clear framebuffer
if (data & 0xf0) {
bitmap_rgb32 bm(rendertarget, (limits_rendertarget.right() + 1) * m, (limits_rendertarget.bottom() + 1) * m, pitch_rendertarget / 4); // why *2 ?
@ -2550,12 +2555,14 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
bm.fill(color);
//printf("clearscreen\n\r");
}
if (data & 0x01) {
if ((data & 0x03) == 3) {
bitmap_rgb32 bm(depthbuffer, (limits_rendertarget.right() + 1) * m, (limits_rendertarget.bottom() + 1) * m, pitch_rendertarget / 4); // why *2 ?
// clear zbuffer
UINT32 depth = channel[chanel][subchannel].object.method[0x1d8c / 4];
bm.fill(depth);
// clear zbuffer and stencil
UINT32 depth_stencil = channel[chanel][subchannel].object.method[0x1d8c / 4];
bm.fill(depth_stencil);
}
else if (((data & 0x03) == 1) || ((data & 0x03) == 2))
logerror("Unsupported clear method parameter %d\n\r", data & 0x03);
countlen--;
}
if (maddress == 0x0200) {
@ -2583,6 +2590,13 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
displayedtarget = (UINT32 *)space.get_write_ptr(data);
}
}
if (maddress == 0x0130) {
countlen--;
if (waitvblank_used == 1)
return 1; // block until next vblank
else
return 0;
}
if (maddress == 0x0210) {
// framebuffer offset ?
rendertarget = (UINT32 *)space.get_write_ptr(data);
@ -2956,6 +2970,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
//combiner.=(data >> 27) & 7;
countlen--;
}
return 0;
}
int nv2a_renderer::toggle_register_combiners_usage()
@ -2964,6 +2979,12 @@ int nv2a_renderer::toggle_register_combiners_usage()
return combiner.used;
}
int nv2a_renderer::toggle_wait_vblank_support()
{
waitvblank_used = 1 - waitvblank_used;
return waitvblank_used;
}
void nv2a_renderer::debug_grab_texture(int type, const char *filename)
{
debug_grab_texttype = type;
@ -2982,10 +3003,6 @@ void nv2a_renderer::debug_grab_vertex_program_slot(int slot, UINT32 *instruction
instruction[3] = vertexprogram.exec.op[slot].i[3];
}
void nv2a_renderer::savestate_items()
{
}
void nv2a_renderer::combiner_argb8_float(UINT32 color, float reg[4])
{
reg[0] = (float)(color & 0xff) / 255.0;
@ -3569,6 +3586,10 @@ bool nv2a_renderer::vblank_callback(screen_device &screen, bool state)
pmc[0x100 / 4] |= 0x1000000;
else
pmc[0x100 / 4] &= ~0x1000000;
if ((state == true) && (puller_waiting == 1)) {
puller_waiting = 0;
puller_timer_work(NULL, 0);
}
if ((pmc[0x100 / 4] != 0) && (pmc[0x140 / 4] != 0)) {
// send interrupt
return true;
@ -3589,6 +3610,146 @@ UINT32 nv2a_renderer::screen_update_callback(screen_device &screen, bitmap_rgb32
return 0;
}
TIMER_CALLBACK_MEMBER(nv2a_renderer::puller_timer_work)
{
int chanel, subchannel;
int method, count, handle, objclass;
UINT32 *dmaput, *dmaget;
UINT32 cmd, cmdtype;
int countlen;
int ret;
address_space *space = puller_space;
chanel = puller_channel;
subchannel = puller_subchannel;
dmaput = &channel[chanel][subchannel].regs[0x40 / 4];
dmaget = &channel[chanel][subchannel].regs[0x44 / 4];
chanel = puller_channel;
subchannel = puller_subchannel;
while (*dmaget != *dmaput) {
cmd = space->read_dword(*dmaget);
*dmaget += 4;
cmdtype = geforce_commandkind(cmd);
switch (cmdtype)
{
case 6: // jump
#ifdef LOG_NV2A
printf("jump dmaget %08X", *dmaget);
#endif
*dmaget = cmd & 0xfffffffc;
#ifdef LOG_NV2A
printf(" -> %08X\n\r", *dmaget);
#endif
break;
case 0: // increasing method
method = (cmd >> 2) & 2047; // method*4 is address // if method >= 0x40 send it to assigned object
#ifdef LOG_NV2A
subch = (cmd >> 13) & 7;
#endif
count = (cmd >> 18) & 2047;
if ((method == 0) && (count == 1)) {
handle = space->read_dword(*dmaget);
handle = geforce_object_offset(handle);
#ifdef LOG_NV2A
logerror(" assign to subchannel %d object at %d\n", subch, handle);
#endif
channel[chanel][subchannel].object.objhandle = handle;
handle = ramin[handle / 4];
objclass = handle & 0xff;
channel[chanel][subchannel].object.objclass = objclass;
*dmaget += 4;
}
else {
#ifdef LOG_NV2A
logerror(" subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
#endif
ret = 0;
while (count > 0) {
countlen = 1;
ret=geforce_exec_method(*space, chanel, subchannel, method, *dmaget, countlen);
count--;
method++;
*dmaget += 4;
if (ret != 0)
break;
}
if (ret != 0) {
puller_timer->enable(false);
puller_waiting = 1;
return;
}
}
break;
case 5: // non-increasing method
method = (cmd >> 2) & 2047;
#ifdef LOG_NV2A
subch = (cmd >> 13) & 7;
#endif
count = (cmd >> 18) & 2047;
if ((method == 0) && (count == 1)) {
#ifdef LOG_NV2A
logerror(" assign channel %d\n", subch);
#endif
handle = space->read_dword(*dmaget);
handle = geforce_object_offset(handle);
#ifdef LOG_NV2A
logerror(" assign to subchannel %d object at %d\n", subch, handle);
#endif
channel[chanel][subchannel].object.objhandle = handle;
handle = ramin[handle / 4];
objclass = handle & 0xff;
channel[chanel][subchannel].object.objclass = objclass;
*dmaget += 4;
}
else {
#ifdef LOG_NV2A
logerror(" subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
#endif
while (count > 0) {
countlen = count;
ret=geforce_exec_method(*space, chanel, subchannel, method, *dmaget, countlen);
*dmaget += 4 * (count - countlen);
count = countlen;
}
}
break;
case 3: // long non-increasing method
method = (cmd >> 2) & 2047;
#ifdef LOG_NV2A
subch = (cmd >> 13) & 7;
#endif
count = space->read_dword(*dmaget);
*dmaget += 4;
if ((method == 0) && (count == 1)) {
handle = space->read_dword(*dmaget);
handle = geforce_object_offset(handle);
#ifdef LOG_NV2A
logerror(" assign to subchannel %d object at %d\n", subch, handle);
#endif
channel[chanel][subchannel].object.objhandle = handle;
handle = ramin[handle / 4];
objclass = handle & 0xff;
channel[chanel][subchannel].object.objclass = objclass;
*dmaget += 4;
}
else {
#ifdef LOG_NV2A
logerror(" subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
#endif
while (count > 0) {
countlen = count;
ret=geforce_exec_method(*space, chanel, subchannel, method, *dmaget, countlen);
*dmaget += 4 * (count - countlen);
count = countlen;
}
}
break;
default:
logerror(" unimplemented command %08X\n", cmd);
}
}
}
READ32_MEMBER(nv2a_renderer::geforce_r)
{
static int x, ret;
@ -3684,7 +3845,7 @@ WRITE32_MEMBER(nv2a_renderer::geforce_w)
else if ((offset >= 0x00800000 / 4) && (offset < 0x00900000 / 4)) {
// 32 channels size 0x10000 each, 8 subchannels per channel size 0x2000 each
int chanel, subchannel, suboffset;
int method, count, handle, objclass;
//int method, count, handle, objclass;
#ifdef LOG_NV2A
int subch;
#endif
@ -3699,130 +3860,37 @@ WRITE32_MEMBER(nv2a_renderer::geforce_w)
COMBINE_DATA(&channel[chanel][subchannel].regs[suboffset]);
if ((suboffset == 0x40 / 4) || (suboffset == 0x44 / 4)) { // DMA_PUT or DMA_GET
UINT32 *dmaput, *dmaget;
UINT32 cmd, cmdtype;
int countlen;
dmaput = &channel[chanel][subchannel].regs[0x40 / 4];
dmaget = &channel[chanel][subchannel].regs[0x44 / 4];
//printf("dmaget %08X dmaput %08X\n\r",*dmaget,*dmaput);
if ((*dmaput == 0x048cf000) && (*dmaget == 0x07f4d000))
if ((*dmaput == 0x048cf000) && (*dmaget == 0x07f4d000)) {
*dmaget = *dmaput;
while (*dmaget != *dmaput) {
cmd = space.read_dword(*dmaget);
*dmaget += 4;
cmdtype = geforce_commandkind(cmd);
switch (cmdtype)
{
case 6: // jump
#ifdef LOG_NV2A
printf("jump dmaget %08X", *dmaget);
#endif
*dmaget = cmd & 0xfffffffc;
#ifdef LOG_NV2A
printf(" -> %08X\n\r", *dmaget);
#endif
break;
case 0: // increasing method
method = (cmd >> 2) & 2047; // method*4 is address // if method >= 0x40 send it to assigned object
#ifdef LOG_NV2A
subch = (cmd >> 13) & 7;
#endif
count = (cmd >> 18) & 2047;
if ((method == 0) && (count == 1)) {
handle = space.read_dword(*dmaget);
handle = geforce_object_offset(handle);
#ifdef LOG_NV2A
logerror(" assign to subchannel %d object at %d\n", subch, handle);
#endif
channel[chanel][subchannel].object.objhandle = handle;
handle = ramin[handle / 4];
objclass = handle & 0xff;
channel[chanel][subchannel].object.objclass = objclass;
*dmaget += 4;
}
else {
#ifdef LOG_NV2A
logerror(" subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
#endif
while (count > 0) {
countlen = 1;
geforce_exec_method(space, chanel, subchannel, method, *dmaget, countlen);
count--;
method++;
*dmaget += 4;
}
}
break;
case 5: // non-increasing method
method = (cmd >> 2) & 2047;
#ifdef LOG_NV2A
subch = (cmd >> 13) & 7;
#endif
count = (cmd >> 18) & 2047;
if ((method == 0) && (count == 1)) {
#ifdef LOG_NV2A
logerror(" assign channel %d\n", subch);
#endif
handle = space.read_dword(*dmaget);
handle = geforce_object_offset(handle);
#ifdef LOG_NV2A
logerror(" assign to subchannel %d object at %d\n", subch, handle);
#endif
channel[chanel][subchannel].object.objhandle = handle;
handle = ramin[handle / 4];
objclass = handle & 0xff;
channel[chanel][subchannel].object.objclass = objclass;
*dmaget += 4;
}
else {
#ifdef LOG_NV2A
logerror(" subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
#endif
while (count > 0) {
countlen = count;
geforce_exec_method(space, chanel, subchannel, method, *dmaget, countlen);
*dmaget += 4 * (count - countlen);
count = countlen;
}
}
break;
case 3: // long non-increasing method
method = (cmd >> 2) & 2047;
#ifdef LOG_NV2A
subch = (cmd >> 13) & 7;
#endif
count = space.read_dword(*dmaget);
*dmaget += 4;
if ((method == 0) && (count == 1)) {
handle = space.read_dword(*dmaget);
handle = geforce_object_offset(handle);
#ifdef LOG_NV2A
logerror(" assign to subchannel %d object at %d\n", subch, handle);
#endif
channel[chanel][subchannel].object.objhandle = handle;
handle = ramin[handle / 4];
objclass = handle & 0xff;
channel[chanel][subchannel].object.objclass = objclass;
*dmaget += 4;
}
else {
#ifdef LOG_NV2A
logerror(" subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
#endif
while (count > 0) {
countlen = count;
geforce_exec_method(space, chanel, subchannel, method, *dmaget, countlen);
*dmaget += 4 * (count - countlen);
count = countlen;
}
}
break;
default:
logerror(" unimplemented command %08X\n", cmd);
puller_waiting = 0;
puller_timer->enable(false);
return;
}
if (*dmaget != *dmaput) {
if (puller_waiting == 0) {
puller_channel = chanel;
puller_subchannel = subchannel;
puller_space = &space;
puller_timer->enable();
puller_timer->adjust(attotime::zero);
}
}
}
}
else;
//else
// logerror("NV_2A: write at %08X mask %08X value %08X\n",0xfd000000+offset*4,mem_mask,data);
}
void nv2a_renderer::savestate_items()
{
}
void nv2a_renderer::start()
{
puller_timer = machine().scheduler().timer_alloc(timer_expired_delegate(FUNC(nv2a_renderer::puller_timer_work), this), (void *)"NV2A Puller Timer");
puller_timer->enable(false);
}