Significantly optimized the inner execution loop. arknoid2 is ~2.5x

faster with maximum interleave now.
This commit is contained in:
Aaron Giles 2008-12-22 10:28:20 +00:00
parent 6c4013134f
commit bf20e586d1
4 changed files with 240 additions and 130 deletions

View File

@ -59,6 +59,10 @@ struct _cpu_input_data
typedef struct _cpu_class_data cpu_class_data;
struct _cpu_class_data
{
/* execution lists */
const device_config *device; /* pointer back to our device */
cpu_class_data *next; /* pointer to the next CPU to execute, in order */
/* cycle counting and executing */
int profiler; /* profiler tag */
int * icount; /* pointer to the icount */
@ -104,6 +108,7 @@ struct _cpu_class_data
struct _cpuexec_private
{
const device_config *executingcpu; /* pointer to the currently executing CPU */
cpu_class_data *executelist; /* execution list; suspended CPUs are at the back */
char statebuf[256]; /* string buffer containing state description */
};
@ -122,6 +127,7 @@ static TIMER_CALLBACK( triggertime_callback );
static TIMER_CALLBACK( empty_event_queue );
static IRQ_CALLBACK( standard_irq_callback );
static void register_save_states(const device_config *device);
static void rebuild_execute_list(running_machine *machine);
static UINT64 get_register_value(const device_config *device, void *baseptr, const cpu_state_entry *entry);
static void set_register_value(const device_config *device, void *baseptr, const cpu_state_entry *entry, UINT64 value);
static void get_register_string_value(const device_config *device, void *baseptr, const cpu_state_entry *entry, char *dest);
@ -130,6 +136,17 @@ static int get_register_string_max_width(const device_config *device, void *base
#endif
/***************************************************************************
MACROS
***************************************************************************/
/* these are macros to ensure inlining in cpuexec_timeslice */
#define ATTOTIME_LT(a,b) ((a).seconds < (b).seconds || ((a).seconds == (b).seconds && (a).attoseconds < (b).attoseconds))
#define ATTOTIME_NORMALIZE(a) do { if ((a).attoseconds >= ATTOSECONDS_PER_SECOND) { (a).seconds++; (a).attoseconds -= ATTOSECONDS_PER_SECOND; } } while (0)
/***************************************************************************
INLINE FUNCTIONS
***************************************************************************/
@ -235,102 +252,127 @@ void cpuexec_init(running_machine *machine)
void cpuexec_timeslice(running_machine *machine)
{
int call_debugger = ((machine->debug_flags & DEBUG_FLAG_ENABLED) != 0);
timer_execution_state *timerexec = timer_get_execution_state(machine);
cpuexec_private *global = machine->cpuexec_data;
attotime target = timer_next_fire_time(machine);
attotime base = timer_get_time(machine);
const device_config *cpu;
int ran;
LOG(("------------------\n"));
LOG(("cpu_timeslice: target = %s\n", attotime_string(target, 9)));
/* apply pending suspension changes */
for (cpu = machine->cpu[0]; cpu != NULL; cpu = cpu->typenext)
/* build the execution list if we don't have one yet */
if (global->executelist == NULL)
rebuild_execute_list(machine);
/* loop until we hit the next timer */
while (ATTOTIME_LT(timerexec->basetime, timerexec->nextfire))
{
cpu_class_data *classdata = get_class_data(cpu);
classdata->suspend = classdata->nextsuspend;
classdata->nextsuspend &= ~SUSPEND_REASON_TIMESLICE;
classdata->eatcycles = classdata->nexteatcycles;
}
cpu_class_data *classdata;
UINT32 suspendchanged;
attotime target;
/* by default, assume our target is the end of the next quantum */
target.seconds = timerexec->basetime.seconds;
target.attoseconds = timerexec->basetime.attoseconds + timerexec->curquantum;
ATTOTIME_NORMALIZE(target);
/* loop over non-suspended CPUs */
for (cpu = machine->cpu[0]; cpu != NULL; cpu = cpu->typenext)
{
cpu_class_data *classdata = get_class_data(cpu);
if (classdata->suspend == 0)
/* however, if the next timer is going to fire before then, override */
assert(attotime_sub(timerexec->nextfire, target).seconds <= 0);
if (ATTOTIME_LT(timerexec->nextfire, target))
target = timerexec->nextfire;
LOG(("------------------\n"));
LOG(("cpu_timeslice: target = %s\n", attotime_string(target, 9)));
/* apply pending suspension changes */
suspendchanged = 0;
for (classdata = global->executelist; classdata != NULL; classdata = classdata->next)
{
attotime delta = attotime_sub(target, classdata->localtime);
if (delta.seconds >= 0 && delta.attoseconds >= classdata->attoseconds_per_cycle)
suspendchanged |= (classdata->suspend ^ classdata->nextsuspend);
classdata->suspend = classdata->nextsuspend;
classdata->nextsuspend &= ~SUSPEND_REASON_TIMESLICE;
classdata->eatcycles = classdata->nexteatcycles;
}
/* recompute the execute list if any CPUs changed their suspension state */
if (suspendchanged != 0)
rebuild_execute_list(machine);
/* loop over non-suspended CPUs */
for (classdata = global->executelist; classdata != NULL; classdata = classdata->next)
{
/* only process if our target is later than the CPU's current time (coarse check) */
if (target.seconds >= classdata->localtime.seconds)
{
/* compute how long to run */
classdata->cycles_running = div_64x32(delta.attoseconds >> classdata->divshift, classdata->divisor);
LOG((" cpu '%s': %d cycles\n", cpu->tag, classdata->cycles_running));
attoseconds_t delta, actualdelta;
/* compute how many attoseconds to execute this CPU */
delta = target.attoseconds - classdata->localtime.attoseconds;
if (delta < 0 && target.seconds > classdata->localtime.seconds)
delta += ATTOSECONDS_PER_SECOND;
assert(delta == attotime_to_attoseconds(attotime_sub(target, classdata->localtime)));
profiler_mark(classdata->profiler);
/* note that this global variable cycles_stolen can be modified */
/* via the call to cpu_execute */
classdata->cycles_stolen = 0;
global->executingcpu = cpu;
if (!call_debugger)
ran = cpu_execute(cpu, classdata->cycles_running);
else
/* if we have enough for at least 1 cycle, do the math */
if (delta >= classdata->attoseconds_per_cycle)
{
debugger_start_cpu_hook(cpu, target);
ran = cpu_execute(cpu, classdata->cycles_running);
debugger_stop_cpu_hook(cpu);
}
/* compute how many cycles we want to execute */
ran = classdata->cycles_running = divu_64x32((UINT64)delta >> classdata->divshift, classdata->divisor);
LOG((" cpu '%s': %d cycles\n", classdata->device->tag, classdata->cycles_running));
#ifdef MAME_DEBUG
if (ran < classdata->cycles_stolen)
fatalerror("Negative CPU cycle count!");
#endif /* MAME_DEBUG */
/* if we're not suspended, actually execute */
if (classdata->suspend == 0)
{
profiler_mark(classdata->profiler);
ran -= classdata->cycles_stolen;
profiler_mark(PROFILER_END);
/* note that this global variable cycles_stolen can be modified */
/* via the call to cpu_execute */
classdata->cycles_stolen = 0;
global->executingcpu = classdata->device;
if (!call_debugger)
ran = cpu_execute(classdata->device, classdata->cycles_running);
else
{
debugger_start_cpu_hook(classdata->device, target);
ran = cpu_execute(classdata->device, classdata->cycles_running);
debugger_stop_cpu_hook(classdata->device);
}
/* account for these cycles */
classdata->totalcycles += ran;
classdata->localtime = attotime_add_attoseconds(classdata->localtime, ran * classdata->attoseconds_per_cycle);
LOG((" %d ran, %d total, time = %s\n", ran, (INT32)classdata->totalcycles, attotime_string(classdata->localtime, 9)));
/* adjust for any cycles we took back */
assert(ran >= classdata->cycles_stolen);
ran -= classdata->cycles_stolen;
profiler_mark(PROFILER_END);
}
/* account for these cycles */
classdata->totalcycles += ran;
/* update the local time for this CPU */
actualdelta = classdata->attoseconds_per_cycle * ran;
classdata->localtime.attoseconds += actualdelta;
ATTOTIME_NORMALIZE(classdata->localtime);
LOG((" %d ran, %d total, time = %s\n", ran, (INT32)classdata->totalcycles, attotime_string(classdata->localtime, 9)));
/* if the new local CPU time is less than our target, move the target up */
if (attotime_compare(classdata->localtime, target) < 0)
{
target = attotime_max(classdata->localtime, base);
LOG((" (new target)\n"));
/* if the new local CPU time is less than our target, move the target up */
if (ATTOTIME_LT(classdata->localtime, target))
{
assert(attotime_compare(classdata->localtime, target) < 0);
target = classdata->localtime;
/* however, if this puts us before the base, clamp to the base as a minimum */
if (ATTOTIME_LT(target, timerexec->basetime))
{
assert(attotime_compare(target, timerexec->basetime) < 0);
target = timerexec->basetime;
}
LOG((" (new target)\n"));
}
}
}
}
}
global->executingcpu = NULL;
global->executingcpu = NULL;
/* update the local times of all CPUs */
for (cpu = machine->cpu[0]; cpu != NULL; cpu = cpu->typenext)
{
cpu_class_data *classdata = get_class_data(cpu);
/* if we're suspended and counting, process */
if (classdata->suspend != 0 && classdata->eatcycles && attotime_compare(classdata->localtime, target) < 0)
{
attotime delta = attotime_sub(target, classdata->localtime);
/* compute how long to run */
classdata->cycles_running = div_64x32(delta.attoseconds >> classdata->divshift, classdata->divisor);
LOG((" cpu '%s': %d cycles (suspended)\n", cpu->tag, classdata->cycles_running));
classdata->totalcycles += classdata->cycles_running;
classdata->localtime = attotime_add_attoseconds(classdata->localtime, classdata->cycles_running * classdata->attoseconds_per_cycle);
LOG((" %d skipped, %d total, time = %s\n", classdata->cycles_running, (INT32)classdata->totalcycles, attotime_string(classdata->localtime, 9)));
}
/* update the suspend state (breaks steeltal if we don't) */
classdata->suspend = classdata->nextsuspend;
classdata->eatcycles = classdata->nexteatcycles;
/* update the base time */
timerexec->basetime = target;
}
/* update the global time */
timer_set_global_time(machine, target);
/* execute timers */
timer_execute_timers(machine);
}
@ -443,6 +485,7 @@ static DEVICE_START( cpu )
}
/* fill in the suspend states */
classdata->device = device;
classdata->profiler = index + PROFILER_CPU1;
classdata->suspend = SUSPEND_REASON_RESET;
classdata->inttrigger = index + TRIGGER_INT;
@ -1091,7 +1134,7 @@ void cpuexec_trigger(running_machine *machine, int trigger)
cpu_abort_timeslice(cpu);
/* see if this is a matching trigger */
if (classdata->suspend != 0 && classdata->trigger == trigger)
if ((classdata->nextsuspend & SUSPEND_REASON_TRIGGER) != 0 && classdata->trigger == trigger)
{
cpu_resume(cpu, SUSPEND_REASON_TRIGGER);
classdata->trigger = 0;
@ -1570,6 +1613,48 @@ static void register_save_states(const device_config *device)
}
/*-------------------------------------------------
rebuild_execute_list - rebuild the list of
executing CPUs, moving suspended CPUs to the
end
-------------------------------------------------*/
static void rebuild_execute_list(running_machine *machine)
{
cpuexec_private *global = machine->cpuexec_data;
const device_config *curcpu;
cpu_class_data **tailptr;
/* start with an empty list */
tailptr = &global->executelist;
*tailptr = NULL;
/* first iterate over non-suspended CPUs */
for (curcpu = machine->cpu[0]; curcpu != NULL; curcpu = curcpu->typenext)
{
cpu_class_data *classdata = get_class_data(curcpu);
if (classdata->suspend == 0)
{
*tailptr = classdata;
tailptr = &classdata->next;
classdata->next = NULL;
}
}
/* then add the suspended CPUs */
for (curcpu = machine->cpu[0]; curcpu != NULL; curcpu = curcpu->typenext)
{
cpu_class_data *classdata = get_class_data(curcpu);
if (classdata->suspend != 0)
{
*tailptr = classdata;
tailptr = &classdata->next;
classdata->next = NULL;
}
}
}
/*-------------------------------------------------
get_register_value - return a register value
of a CPU using the state table

View File

@ -1615,9 +1615,8 @@ static TIMER_CALLBACK( soft_reset )
/* now we're running */
mame->current_phase = MAME_PHASE_RUNNING;
/* set the global time to the current time */
/* this allows 0-time queued callbacks to run before any CPUs execute */
timer_set_global_time(machine, timer_get_time(machine));
/* allow 0-time queued callbacks to run before any CPUs execute */
timer_execute_timers(machine);
}

View File

@ -29,9 +29,10 @@
CONSTANTS
***************************************************************************/
#define MAX_TIMERS 256
#define MAX_QUANTA 16
#define MAX_TIMERS 256
#define MAX_QUANTA 16
#define DEFAULT_MINIMUM_QUANTUM ATTOSECONDS_IN_MSEC(100)
/***************************************************************************
@ -91,20 +92,22 @@ struct _timer_private
{
/* list of active timers */
emu_timer timers[MAX_TIMERS]; /* actual timers */
emu_timer * activelist; /* head of the active list */
emu_timer * freelist; /* head of the free list */
emu_timer * freelist_tail; /* tail of the free list */
emu_timer * activelist; /* head of the active list */
emu_timer * freelist; /* head of the free list */
emu_timer * freelist_tail; /* tail of the free list */
/* execution state */
timer_execution_state exec; /* current global execution state */
/* other internal states */
attotime basetime; /* the current global base time */
emu_timer * callback_timer; /* pointer to the current callback timer */
emu_timer * callback_timer; /* pointer to the current callback timer */
UINT8 callback_timer_modified; /* TRUE if the current callback timer was modified */
attotime callback_timer_expire_time; /* the original expiration time */
/* scheduling quanta */
quantum_slot quantum_list[MAX_QUANTA]; /* list of scheduling quanta */
quantum_slot * quantum_current;/* current minimum quantum */
attoseconds_t quantum_minimum;/* duration of minimum quantum */
quantum_slot * quantum_current; /* current minimum quantum */
attoseconds_t quantum_minimum; /* duration of minimum quantum */
};
@ -138,7 +141,7 @@ INLINE attotime get_current_time(running_machine *machine)
/* if we're executing as a particular CPU, use its local time as a base */
/* otherwise, return the global base time */
return cpuexec_override_local_time(machine, global->basetime);
return cpuexec_override_local_time(machine, global->exec.basetime);
}
@ -198,7 +201,7 @@ INLINE void timer_list_insert(emu_timer *timer)
#endif
/* loop over the timer list */
for (t = global->activelist; t; lt = t, t = t->next)
for (t = global->activelist; t != NULL; lt = t, t = t->next)
{
/* if the current list entry expires after us, we should be inserted before it */
if (attotime_compare(t->expire, expire) > 0)
@ -207,20 +210,26 @@ INLINE void timer_list_insert(emu_timer *timer)
timer->prev = t->prev;
timer->next = t;
if (t->prev)
if (t->prev != NULL)
t->prev->next = timer;
else
{
global->activelist = timer;
global->exec.nextfire = timer->expire;
}
t->prev = timer;
return;
}
}
/* need to insert after the last one */
if (lt)
if (lt != NULL)
lt->next = timer;
else
{
global->activelist = timer;
global->exec.nextfire = timer->expire;
}
timer->prev = lt;
timer->next = NULL;
}
@ -263,11 +272,15 @@ INLINE void timer_list_remove(emu_timer *timer)
#endif
/* remove it from the list */
if (timer->prev)
if (timer->prev != NULL)
timer->prev->next = timer->next;
else
{
global->activelist = timer->next;
if (timer->next)
if (global->activelist != NULL)
global->exec.nextfire = global->activelist->expire;
}
if (timer->next != NULL)
timer->next->prev = timer->prev;
}
@ -291,13 +304,15 @@ void timer_init(running_machine *machine)
memset(global, 0, sizeof(*global));
/* we need to wait until the first call to timer_cyclestorun before using real CPU times */
global->basetime = attotime_zero;
global->exec.basetime = attotime_zero;
global->exec.nextfire = attotime_never;
global->exec.curquantum = DEFAULT_MINIMUM_QUANTUM;
global->callback_timer = NULL;
global->callback_timer_modified = FALSE;
/* register with the save state system */
state_save_register_item(machine, "timer", NULL, 0, global->basetime.seconds);
state_save_register_item(machine, "timer", NULL, 0, global->basetime.attoseconds);
state_save_register_item(machine, "timer", NULL, 0, global->exec.basetime.seconds);
state_save_register_item(machine, "timer", NULL, 0, global->exec.basetime.attoseconds);
state_save_register_postload(machine, timer_postload, NULL);
/* initialize the lists */
@ -309,8 +324,8 @@ void timer_init(running_machine *machine)
global->freelist_tail = &global->timers[MAX_TIMERS-1];
/* reset the quanta */
global->quantum_list[0].requested = ATTOSECONDS_IN_MSEC(100);
global->quantum_list[0].actual = ATTOSECONDS_IN_MSEC(100);
global->quantum_list[0].requested = DEFAULT_MINIMUM_QUANTUM;
global->quantum_list[0].actual = DEFAULT_MINIMUM_QUANTUM;
global->quantum_list[0].expire = attotime_never;
global->quantum_current = &global->quantum_list[0];
global->quantum_minimum = ATTOSECONDS_IN_NSEC(1) / 1000;
@ -334,17 +349,29 @@ void timer_destructor(void *ptr, size_t size)
***************************************************************************/
/*-------------------------------------------------
timer_next_fire_time - return the
time when the next timer will fire
timer_get_execution_state - return a pointer
to the execution state
-------------------------------------------------*/
attotime timer_next_fire_time(running_machine *machine)
timer_execution_state *timer_get_execution_state(running_machine *machine)
{
timer_private *global = machine->timer_data;
attotime quantum_time;
return &global->exec;
}
/*-------------------------------------------------
timer_execute_timers - execute timers and
update scheduling quanta
-------------------------------------------------*/
void timer_execute_timers(running_machine *machine)
{
timer_private *global = machine->timer_data;
emu_timer *timer;
/* if the current quantum has expired, find a new one */
if (attotime_compare(global->basetime, global->quantum_current->expire) >= 0)
if (attotime_compare(global->exec.basetime, global->quantum_current->expire) >= 0)
{
int curr;
@ -353,29 +380,13 @@ attotime timer_next_fire_time(running_machine *machine)
for (curr = 1; curr < ARRAY_LENGTH(global->quantum_list); curr++)
if (global->quantum_list[curr].requested != 0 && global->quantum_list[curr].requested < global->quantum_current->requested)
global->quantum_current = &global->quantum_list[curr];
global->exec.curquantum = global->quantum_current->actual;
}
quantum_time = attotime_add_attoseconds(global->basetime, global->quantum_current->actual);
return attotime_min(quantum_time, global->activelist->expire);
}
/*-------------------------------------------------
timer_adjust_global_time - adjust the global
time; this is also where we fire the timers
-------------------------------------------------*/
void timer_set_global_time(running_machine *machine, attotime newbase)
{
timer_private *global = machine->timer_data;
emu_timer *timer;
/* set the new global offset */
global->basetime = newbase;
LOG(("timer_set_global_time: new=%s head->expire=%s\n", attotime_string(newbase, 9), attotime_string(global->activelist->expire, 9)));
LOG(("timer_set_global_time: new=%s head->expire=%s\n", attotime_string(global->exec.basetime, 9), attotime_string(global->activelist->expire, 9)));
/* now process any timers that are overdue */
while (attotime_compare(global->activelist->expire, global->basetime) <= 0)
while (attotime_compare(global->activelist->expire, global->exec.basetime) <= 0)
{
int was_enabled = global->activelist->enabled;
@ -473,7 +484,10 @@ void timer_add_scheduling_quantum(running_machine *machine, attoseconds_t quantu
/* update the minimum */
if (quantum < global->quantum_current->requested)
{
global->quantum_current = &global->quantum_list[blank];
global->exec.curquantum = global->quantum_current->actual;
}
}
@ -496,6 +510,9 @@ void timer_set_minimum_quantum(running_machine *machine, attoseconds_t quantum)
for (curr = 0; curr < ARRAY_LENGTH(global->quantum_list); curr++)
if (global->quantum_list[curr].requested != 0)
global->quantum_list[curr].actual = MAX(global->quantum_list[curr].requested, global->quantum_minimum);
/* ensure that the live current quantum is up to date */
global->exec.curquantum = global->quantum_current->actual;
}

View File

@ -98,6 +98,15 @@ struct _timer_config
typedef struct _emu_timer emu_timer;
typedef struct _timer_execution_state timer_execution_state;
struct _timer_execution_state
{
attotime nextfire; /* time that the head of the timer list will fire */
attotime basetime; /* global basetime; everything moves forward from here */
attoseconds_t curquantum; /* current quantum of execution */
};
/***************************************************************************
TIMER DEVICE CONFIGURATION MACROS
@ -159,11 +168,11 @@ void timer_destructor(void *ptr, size_t size);
/* ----- scheduling helpers ----- */
/* return the time when the next timer will fire */
attotime timer_next_fire_time(running_machine *machine);
/* return a pointer to the execution state */
timer_execution_state *timer_get_execution_state(running_machine *machine);
/* adjust the global time; this is also where we fire the timers */
void timer_set_global_time(running_machine *machine, attotime newbase);
/* execute timers and update scheduling quanta */
void timer_execute_timers(running_machine *machine);
/* add a scheduling quantum; the smallest active one is the one that is in use */
void timer_add_scheduling_quantum(running_machine *machine, attoseconds_t quantum, attotime duration);