chihiro: add support for vblank wait NV2A accelarator method (nw)

It is disabled by default since is slows down too much. There is a new debugger command "chihiro waitvblank" to enable and disable it.
2025-10-06 09:00:04 +03:00 · 2014-12-28 21:49:12 +01:00 · 2014-12-28 21:49:12 +01:00 · 495fe0f2dd
commit 495fe0f2dd
parent 956f2a350e
3 changed files with 233 additions and 137 deletions
--- a/src/mame/drivers/chihiro.c
+++ b/src/mame/drivers/chihiro.c
@ -777,6 +777,18 @@ static void nv2a_combiners_command(running_machine &machine, int ref, int params
 		debug_console_printf(machine, "Register combiners disabled\n");
 }

+static void waitvblank_command(running_machine &machine, int ref, int params, const char **param)
+{
+	int en;
+
+	chihiro_state *chst = machine.driver_data<chihiro_state>();
+	en = chst->nvidia_nv2a->toggle_wait_vblank_support();
+	if (en != 0)
+		debug_console_printf(machine, "Vblank method enabled\n");
+	else
+		debug_console_printf(machine, "Vblank method disabled\n");
+}
+
 static void grab_texture_command(running_machine &machine, int ref, int params, const char **param)
 {
 	UINT64 type;
@ -862,6 +874,7 @@ static void help_command(running_machine &machine, int ref, int params, const ch
 	debug_console_printf(machine, "  chihiro curthread -- Print information about current thread\n");
 	debug_console_printf(machine, "  chihiro irq,<number> -- Generate interrupt with irq number 0-15\n");
 	debug_console_printf(machine, "  chihiro nv2a_combiners -- Toggle use of register combiners\n");
+	debug_console_printf(machine, "  chihiro waitvblank -- Toggle support for wait vblank method\n");
 	debug_console_printf(machine, "  chihiro grab_texture,<type>,<filename> -- Save to <filename> the next used texture of type <type>\n");
 	debug_console_printf(machine, "  chihiro grab_vprog,<filename> -- save current vertex program instruction slots to <filename>\n");
 	debug_console_printf(machine, "  chihiro vprogdis,<address>,<length>[,<type>] -- disassemble <lenght> vertex program instructions at <address> of <type>\n");
@ -886,6 +899,8 @@ static void chihiro_debug_commands(running_machine &machine, int ref, int params
 		generate_irq_command(machine, ref, params - 1, param + 1);
 	else if (strcmp("nv2a_combiners", param[0]) == 0)
 		nv2a_combiners_command(machine, ref, params - 1, param + 1);
+	else if (strcmp("waitvblank", param[0]) == 0)
+		waitvblank_command(machine, ref, params - 1, param + 1);
 	else if (strcmp("grab_texture", param[0]) == 0)
 		grab_texture_command(machine, ref, params - 1, param + 1);
 	else if (strcmp("grab_vprog", param[0]) == 0)
@ -1787,6 +1802,7 @@ void chihiro_state::machine_start()
 	save_item(NAME(smbusst.words));
 	save_item(NAME(pic16lc_buffer));
 	save_item(NAME(usbhack_counter));
+	nvidia_nv2a->start();
 	nvidia_nv2a->savestate_items();
 }

--- a/src/mame/includes/chihiro.h
+++ b/src/mame/includes/chihiro.h
@ -191,8 +191,12 @@ public:
 		rendertarget = NULL;
 		depthbuffer = NULL;
 		displayedtarget = NULL;
+		puller_channel = 0;
+		puller_subchannel = 0;
+		puller_waiting = 0;
 		debug_grab_texttype = -1;
 		debug_grab_textfile = NULL;
+		waitvblank_used = 0;
 		memset(vertex_attribute_words, 0, sizeof(vertex_attribute_words));
 		memset(vertex_attribute_offset, 0, sizeof(vertex_attribute_offset));
 	}
@ -208,7 +212,7 @@ public:
 	int geforce_commandkind(UINT32 word);
 	UINT32 geforce_object_offset(UINT32 handle);
 	void geforce_read_dma_object(UINT32 handle, UINT32 &offset, UINT32 &size);
-	void geforce_exec_method(address_space &space, UINT32 channel, UINT32 subchannel, UINT32 method, UINT32 address, int &countlen);
+	int geforce_exec_method(address_space &space, UINT32 channel, UINT32 subchannel, UINT32 method, UINT32 address, int &countlen);
 	UINT32 texture_get_texel(int number, int x, int y);
 	void write_pixel(int x, int y, UINT32 color, UINT32 depth);
 	void combiner_initialize_registers(UINT32 argb8[6]);
@ -238,15 +242,17 @@ public:
 	void computedilated(void);
 	void putpixtex(int xp, int yp, int up, int vp);
 	int toggle_register_combiners_usage();
+	int toggle_wait_vblank_support();
 	void debug_grab_texture(int type, const char *filename);
 	void debug_grab_vertex_program_slot(int slot, UINT32 *instruction);
+	void start();
 	void savestate_items();
-
 	void read_vertex(address_space & space, offs_t address, vertex_nv &vertex, int attrib);
 	int read_vertices_0x1810(address_space & space, vertex_nv *destination, int offset, int limit);
 	int read_vertices_0x1800(address_space & space, vertex_nv *destination, UINT32 address, int limit);
 	int read_vertices_0x1818(address_space & space, vertex_nv *destination, UINT32 address, int limit);
 	void convert_vertices_poly(vertex_nv *source, vertex_t *destination, int count);
+	TIMER_CALLBACK_MEMBER(puller_timer_work);

 	struct {
 		UINT32 regs[0x80 / 4];
@ -429,12 +435,18 @@ public:
 	int enabled_vertex_attributes;
 	int vertex_attribute_words[16];
 	int vertex_attribute_offset[16];
+	emu_timer *puller_timer;
+	int puller_channel;
+	int puller_subchannel;
+	int puller_waiting;
+	address_space *puller_space;
 	UINT32 dilated0[16][2048];
 	UINT32 dilated1[16][2048];
 	int dilatechose[256];
 	nvidia_object_data *objectdata;
 	int debug_grab_texttype;
 	char *debug_grab_textfile;
+	int waitvblank_used;

 	enum VERTEX_PARAMETER {
 		PARAM_COLOR_B = 0,
@ -479,7 +491,7 @@ public:
 		TEX3 = 12
 	};
 	enum NV2A_VTXBUF_TYPE {
-		NV2A_VTXBUF_TYPE_UNKNOWN_0 = 0, // used for vertex color ?
+		NV2A_VTXBUF_TYPE_UBYTE2 = 0, // what is the difference with UBYTE ?
 		NV2A_VTXBUF_TYPE_FLOAT = 2,
 		NV2A_VTXBUF_TYPE_UBYTE = 4,
 		NV2A_VTXBUF_TYPE_USHORT = 5,
--- a/src/mame/video/chihiro.c
+++ b/src/mame/video/chihiro.c
@ -2027,8 +2027,13 @@ void nv2a_renderer::read_vertex(address_space & space, offs_t address, vertex_nv
 		}
 		break;
 	case NV2A_VTXBUF_TYPE_UBYTE:
+		u = space.read_dword(address + 0);
+		for (c = l-1; c >= l; c--) {
+			vertex.attribute[attrib].fv[c] = (u & 0xff) / 255.0;
+			u = u >> 8;
+		}
 		break;
-	case  NV2A_VTXBUF_TYPE_UNKNOWN_0:
+	case  NV2A_VTXBUF_TYPE_UBYTE2:
 		u = space.read_dword(address + 0);
 		for (c = 0; c < l; c++) {
 			vertex.attribute[attrib].fv[c] = (u & 0xff) / 255.0;
@ -2142,7 +2147,7 @@ void nv2a_renderer::convert_vertices_poly(vertex_nv *source, vertex_t *destinati
 				destination[m].p[PARAM_TEXTURE0_U + u * 2] = source[m].attribute[9 + u].fv[0];
 				destination[m].p[PARAM_TEXTURE0_V + u * 2] = source[m].attribute[9 + u].fv[1];
 			}
-			destination[m].p[PARAM_Z] = 0+0xffffff;
+			destination[m].p[PARAM_Z] = 0xffffff;
 		}
 	}
 	else {
@ -2164,7 +2169,7 @@ void nv2a_renderer::convert_vertices_poly(vertex_nv *source, vertex_t *destinati
 	}
 }

-void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UINT32 subchannel, UINT32 method, UINT32 address, int &countlen)
+int nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UINT32 subchannel, UINT32 method, UINT32 address, int &countlen)
 {
 	UINT32 maddress;
 	UINT32 data;
@ -2364,7 +2369,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
 			if (countlen < 0) {
 				logerror("Method 0x1818 missing %d words to draw a complete primitive\n", -countlen);
 				countlen = 0;
-				return;
+				return 0;
 			}
 			address = address + c * 4;
 			for (n = 1; countlen > 0; n++) {
@ -2392,7 +2397,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
 			if (countlen < 0) {
 				logerror("Method 0x1818 missing %d words to draw a complete primitive\n", -countlen);
 				countlen = 0;
-				return;
+				return 0;
 			}
 			address = address + c * 4;
 			for (n = 0; countlen > 0; n++) {
@ -2439,7 +2444,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
 			if (countlen < 0) {
 				logerror("Method 0x1818 missing %d words to draw a complete primitive\n", -countlen);
 				countlen = 0;
-				return;
+				return 0;
 			}
 			address = address + c * 4;
 			for (n = 0; countlen > 0; n += 2) {
@ -2449,7 +2454,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
 				if (countlen < 0) {
 					logerror("Method 0x1818 missing %d words to draw a complete primitive\n", -countlen);
 					countlen = 0;
-					return;
+					return 0;
 				}
 				address = address + c * 4;
 				render_triangle(limits_rendertarget, renderspans, 4 + 4 * 2, xy[n & 3], xy[(n + 1) & 3], xy[(n + 2) & 3]);
@ -2477,7 +2482,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
 		vertexbuffer_kind[bit] = data & 15;
 		vertexbuffer_size[bit] = (data >> 4) & 15;
 		switch (vertexbuffer_kind[bit]) {
-		case NV2A_VTXBUF_TYPE_UNKNOWN_0:
+		case NV2A_VTXBUF_TYPE_UBYTE2:
 			vertex_attribute_words[bit] = (vertexbuffer_size[bit] * 1) >> 2;
 			break;
 		case NV2A_VTXBUF_TYPE_FLOAT:
@ -2541,7 +2546,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
 			m = 2;
 		else
 			m = 1;
-		// possible buffers: color, depth, stencil, and accumulation
+		// possible buffers: color, depth, stencil
 		// clear framebuffer
 		if (data & 0xf0) {
 			bitmap_rgb32 bm(rendertarget, (limits_rendertarget.right() + 1) * m, (limits_rendertarget.bottom() + 1) * m, pitch_rendertarget / 4); // why *2 ?
@ -2550,12 +2555,14 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
 			bm.fill(color);
 			//printf("clearscreen\n\r");
 		}
-		if (data & 0x01) {
+		if ((data & 0x03) == 3) {
 			bitmap_rgb32 bm(depthbuffer, (limits_rendertarget.right() + 1) * m, (limits_rendertarget.bottom() + 1) * m, pitch_rendertarget / 4); // why *2 ?
-			// clear zbuffer
-			UINT32 depth = channel[chanel][subchannel].object.method[0x1d8c / 4];
-			bm.fill(depth);
+			// clear zbuffer and stencil
+			UINT32 depth_stencil = channel[chanel][subchannel].object.method[0x1d8c / 4];
+			bm.fill(depth_stencil);
 		}
+		else if (((data & 0x03) == 1) || ((data & 0x03) == 2))
+			logerror("Unsupported clear method parameter %d\n\r", data & 0x03);
 		countlen--;
 	}
 	if (maddress == 0x0200) {
@ -2583,6 +2590,13 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
 			displayedtarget = (UINT32 *)space.get_write_ptr(data);
 		}
 	}
+	if (maddress == 0x0130) {
+		countlen--;
+		if (waitvblank_used == 1)
+			return 1; // block until next vblank
+		else
+			return 0;
+	}
 	if (maddress == 0x0210) {
 		// framebuffer offset ?
 		rendertarget = (UINT32 *)space.get_write_ptr(data);
@ -2956,6 +2970,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space, UINT32 chanel, UI
 		//combiner.=(data >> 27) & 7;
 		countlen--;
 	}
+	return 0;
 }

 int nv2a_renderer::toggle_register_combiners_usage()
@ -2964,6 +2979,12 @@ int nv2a_renderer::toggle_register_combiners_usage()
 	return combiner.used;
 }

+int nv2a_renderer::toggle_wait_vblank_support()
+{
+	waitvblank_used = 1 - waitvblank_used;
+	return waitvblank_used;
+}
+
 void nv2a_renderer::debug_grab_texture(int type, const char *filename)
 {
 	debug_grab_texttype = type;
@ -2982,10 +3003,6 @@ void nv2a_renderer::debug_grab_vertex_program_slot(int slot, UINT32 *instruction
 	instruction[3] = vertexprogram.exec.op[slot].i[3];
 }

-void nv2a_renderer::savestate_items()
-{
-}
-
 void nv2a_renderer::combiner_argb8_float(UINT32 color, float reg[4])
 {
 	reg[0] = (float)(color & 0xff) / 255.0;
@ -3569,6 +3586,10 @@ bool nv2a_renderer::vblank_callback(screen_device &screen, bool state)
 		pmc[0x100 / 4] |= 0x1000000;
 	else
 		pmc[0x100 / 4] &= ~0x1000000;
+	if ((state == true) && (puller_waiting == 1)) {
+		puller_waiting = 0;
+		puller_timer_work(NULL, 0);
+	}
 	if ((pmc[0x100 / 4] != 0) && (pmc[0x140 / 4] != 0)) {
 		// send interrupt
 		return true;
@ -3589,6 +3610,146 @@ UINT32 nv2a_renderer::screen_update_callback(screen_device &screen, bitmap_rgb32
 	return 0;
 }

+TIMER_CALLBACK_MEMBER(nv2a_renderer::puller_timer_work)
+{
+	int chanel, subchannel;
+	int method, count, handle, objclass;
+	UINT32 *dmaput, *dmaget;
+	UINT32 cmd, cmdtype;
+	int countlen;
+	int ret;
+	address_space *space = puller_space;
+
+	chanel = puller_channel;
+	subchannel = puller_subchannel;
+	dmaput = &channel[chanel][subchannel].regs[0x40 / 4];
+	dmaget = &channel[chanel][subchannel].regs[0x44 / 4];
+	chanel = puller_channel;
+	subchannel = puller_subchannel;
+	while (*dmaget != *dmaput) {
+		cmd = space->read_dword(*dmaget);
+		*dmaget += 4;
+		cmdtype = geforce_commandkind(cmd);
+		switch (cmdtype)
+		{
+		case 6: // jump
+#ifdef LOG_NV2A
+			printf("jump dmaget %08X", *dmaget);
+#endif
+			*dmaget = cmd & 0xfffffffc;
+#ifdef LOG_NV2A
+			printf(" -> %08X\n\r", *dmaget);
+#endif
+			break;
+		case 0: // increasing method
+			method = (cmd >> 2) & 2047; // method*4 is address // if method >= 0x40 send it to assigned object
+#ifdef LOG_NV2A
+			subch = (cmd >> 13) & 7;
+#endif
+			count = (cmd >> 18) & 2047;
+			if ((method == 0) && (count == 1)) {
+				handle = space->read_dword(*dmaget);
+				handle = geforce_object_offset(handle);
+#ifdef LOG_NV2A
+				logerror("  assign to subchannel %d object at %d\n", subch, handle);
+#endif
+				channel[chanel][subchannel].object.objhandle = handle;
+				handle = ramin[handle / 4];
+				objclass = handle & 0xff;
+				channel[chanel][subchannel].object.objclass = objclass;
+				*dmaget += 4;
+			}
+			else {
+#ifdef LOG_NV2A
+				logerror("  subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
+#endif
+				ret = 0;
+				while (count > 0) {
+					countlen = 1;
+					ret=geforce_exec_method(*space, chanel, subchannel, method, *dmaget, countlen);
+					count--;
+					method++;
+					*dmaget += 4;
+					if (ret != 0)
+						break;
+				}
+				if (ret != 0) {
+					puller_timer->enable(false);
+					puller_waiting = 1;
+					return;
+				}
+			}
+			break;
+		case 5: // non-increasing method
+			method = (cmd >> 2) & 2047;
+#ifdef LOG_NV2A
+			subch = (cmd >> 13) & 7;
+#endif
+			count = (cmd >> 18) & 2047;
+			if ((method == 0) && (count == 1)) {
+#ifdef LOG_NV2A
+				logerror("  assign channel %d\n", subch);
+#endif
+				handle = space->read_dword(*dmaget);
+				handle = geforce_object_offset(handle);
+#ifdef LOG_NV2A
+				logerror("  assign to subchannel %d object at %d\n", subch, handle);
+#endif
+				channel[chanel][subchannel].object.objhandle = handle;
+				handle = ramin[handle / 4];
+				objclass = handle & 0xff;
+				channel[chanel][subchannel].object.objclass = objclass;
+				*dmaget += 4;
+			}
+			else {
+#ifdef LOG_NV2A
+				logerror("  subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
+#endif
+				while (count > 0) {
+					countlen = count;
+					ret=geforce_exec_method(*space, chanel, subchannel, method, *dmaget, countlen);
+					*dmaget += 4 * (count - countlen);
+					count = countlen;
+				}
+			}
+			break;
+		case 3: // long non-increasing method
+			method = (cmd >> 2) & 2047;
+#ifdef LOG_NV2A
+			subch = (cmd >> 13) & 7;
+#endif
+			count = space->read_dword(*dmaget);
+			*dmaget += 4;
+			if ((method == 0) && (count == 1)) {
+				handle = space->read_dword(*dmaget);
+				handle = geforce_object_offset(handle);
+#ifdef LOG_NV2A
+				logerror("  assign to subchannel %d object at %d\n", subch, handle);
+#endif
+				channel[chanel][subchannel].object.objhandle = handle;
+				handle = ramin[handle / 4];
+				objclass = handle & 0xff;
+				channel[chanel][subchannel].object.objclass = objclass;
+				*dmaget += 4;
+			}
+			else {
+#ifdef LOG_NV2A
+				logerror("  subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
+#endif
+				while (count > 0) {
+					countlen = count;
+					ret=geforce_exec_method(*space, chanel, subchannel, method, *dmaget, countlen);
+					*dmaget += 4 * (count - countlen);
+					count = countlen;
+				}
+			}
+			break;
+		default:
+			logerror("  unimplemented command %08X\n", cmd);
+		}
+	}
+}
+
 READ32_MEMBER(nv2a_renderer::geforce_r)
 {
 	static int x, ret;
@ -3684,7 +3845,7 @@ WRITE32_MEMBER(nv2a_renderer::geforce_w)
 	else if ((offset >= 0x00800000 / 4) && (offset < 0x00900000 / 4)) {
 		// 32 channels size 0x10000 each, 8 subchannels per channel size 0x2000 each
 		int chanel, subchannel, suboffset;
-		int method, count, handle, objclass;
+		//int method, count, handle, objclass;
 #ifdef LOG_NV2A
 		int subch;
 #endif
@ -3699,130 +3860,37 @@ WRITE32_MEMBER(nv2a_renderer::geforce_w)
 		COMBINE_DATA(&channel[chanel][subchannel].regs[suboffset]);
 		if ((suboffset == 0x40 / 4) || (suboffset == 0x44 / 4)) { // DMA_PUT or DMA_GET
 			UINT32 *dmaput, *dmaget;
-			UINT32 cmd, cmdtype;
-			int countlen;

 			dmaput = &channel[chanel][subchannel].regs[0x40 / 4];
 			dmaget = &channel[chanel][subchannel].regs[0x44 / 4];
 			//printf("dmaget %08X dmaput %08X\n\r",*dmaget,*dmaput);
-			if ((*dmaput == 0x048cf000) && (*dmaget == 0x07f4d000))
+			if ((*dmaput == 0x048cf000) && (*dmaget == 0x07f4d000)) {
 				*dmaget = *dmaput;
-			while (*dmaget != *dmaput) {
-				cmd = space.read_dword(*dmaget);
-				*dmaget += 4;
-				cmdtype = geforce_commandkind(cmd);
-				switch (cmdtype)
-				{
-				case 6: // jump
-#ifdef LOG_NV2A
-					printf("jump dmaget %08X", *dmaget);
-#endif
-					*dmaget = cmd & 0xfffffffc;
-#ifdef LOG_NV2A
-					printf(" -> %08X\n\r", *dmaget);
-#endif
-					break;
-				case 0: // increasing method
-					method = (cmd >> 2) & 2047; // method*4 is address // if method >= 0x40 send it to assigned object
-#ifdef LOG_NV2A
-					subch = (cmd >> 13) & 7;
-#endif
-					count = (cmd >> 18) & 2047;
-					if ((method == 0) && (count == 1)) {
-						handle = space.read_dword(*dmaget);
-						handle = geforce_object_offset(handle);
-#ifdef LOG_NV2A
-						logerror("  assign to subchannel %d object at %d\n", subch, handle);
-#endif
-						channel[chanel][subchannel].object.objhandle = handle;
-						handle = ramin[handle / 4];
-						objclass = handle & 0xff;
-						channel[chanel][subchannel].object.objclass = objclass;
-						*dmaget += 4;
-					}
-					else {
-#ifdef LOG_NV2A
-						logerror("  subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
-#endif
-						while (count > 0) {
-							countlen = 1;
-							geforce_exec_method(space, chanel, subchannel, method, *dmaget, countlen);
-							count--;
-							method++;
-							*dmaget += 4;
-						}
-					}
-					break;
-				case 5: // non-increasing method
-					method = (cmd >> 2) & 2047;
-#ifdef LOG_NV2A
-					subch = (cmd >> 13) & 7;
-#endif
-					count = (cmd >> 18) & 2047;
-					if ((method == 0) && (count == 1)) {
-#ifdef LOG_NV2A
-						logerror("  assign channel %d\n", subch);
-#endif
-						handle = space.read_dword(*dmaget);
-						handle = geforce_object_offset(handle);
-#ifdef LOG_NV2A
-						logerror("  assign to subchannel %d object at %d\n", subch, handle);
-#endif
-						channel[chanel][subchannel].object.objhandle = handle;
-						handle = ramin[handle / 4];
-						objclass = handle & 0xff;
-						channel[chanel][subchannel].object.objclass = objclass;
-						*dmaget += 4;
-					}
-					else {
-#ifdef LOG_NV2A
-						logerror("  subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
-#endif
-						while (count > 0) {
-							countlen = count;
-							geforce_exec_method(space, chanel, subchannel, method, *dmaget, countlen);
-							*dmaget += 4 * (count - countlen);
-							count = countlen;
-						}
-					}
-					break;
-				case 3: // long non-increasing method
-					method = (cmd >> 2) & 2047;
-#ifdef LOG_NV2A
-					subch = (cmd >> 13) & 7;
-#endif
-					count = space.read_dword(*dmaget);
-					*dmaget += 4;
-					if ((method == 0) && (count == 1)) {
-						handle = space.read_dword(*dmaget);
-						handle = geforce_object_offset(handle);
-#ifdef LOG_NV2A
-						logerror("  assign to subchannel %d object at %d\n", subch, handle);
-#endif
-						channel[chanel][subchannel].object.objhandle = handle;
-						handle = ramin[handle / 4];
-						objclass = handle & 0xff;
-						channel[chanel][subchannel].object.objclass = objclass;
-						*dmaget += 4;
-					}
-					else {
-#ifdef LOG_NV2A
-						logerror("  subch. %d method %04x offset %04x count %d\n", subch, method, method * 4, count);
-#endif
-						while (count > 0) {
-							countlen = count;
-							geforce_exec_method(space, chanel, subchannel, method, *dmaget, countlen);
-							*dmaget += 4 * (count - countlen);
-							count = countlen;
-						}
-					}
-					break;
-				default:
-					logerror("  unimplemented command %08X\n", cmd);
+				puller_waiting = 0;
+				puller_timer->enable(false);
+				return;
+			}
+			if (*dmaget != *dmaput) {
+				if (puller_waiting == 0) {
+					puller_channel = chanel;
+					puller_subchannel = subchannel;
+					puller_space = &space;
+					puller_timer->enable();
+					puller_timer->adjust(attotime::zero);
 				}
 			}
 		}
 	}
-	else;
+	//else
 	//      logerror("NV_2A: write at %08X mask %08X value %08X\n",0xfd000000+offset*4,mem_mask,data);
 }
+
+void nv2a_renderer::savestate_items()
+{
+}
+
+void nv2a_renderer::start()
+{
+	puller_timer = machine().scheduler().timer_alloc(timer_expired_delegate(FUNC(nv2a_renderer::puller_timer_work), this), (void *)"NV2A Puller Timer");
+	puller_timer->enable(false);
+}