diff --git a/src/emu/sound/disc_flt.c b/src/emu/sound/disc_flt.c index 8e7ad3df977..155126afe71 100644 --- a/src/emu/sound/disc_flt.c +++ b/src/emu/sound/disc_flt.c @@ -148,7 +148,7 @@ static DISCRETE_STEP(dst_crfilter) { struct dst_rcfilter_context *context = (struct dst_rcfilter_context *)node->context; - if (context->has_rc_nodes) + if (UNEXPECTED(context->has_rc_nodes)) { double rc = DST_CRFILTER__R * DST_CRFILTER__C; if (rc != context->rc) @@ -1033,7 +1033,7 @@ static DISCRETE_STEP(dst_rcfilter) { struct dst_rcfilter_context *context = (struct dst_rcfilter_context *)node->context; - if (context->has_rc_nodes) + if (UNEXPECTED(context->has_rc_nodes)) { double rc = DST_RCFILTER__R * DST_RCFILTER__C; if (rc != context->rc) @@ -1051,6 +1051,12 @@ static DISCRETE_STEP(dst_rcfilter) node->output[0] = context->vCap + DST_RCFILTER__VREF; } +static DISCRETE_STEP(dst_rcfilter_fast) +{ + struct dst_rcfilter_context *context = (struct dst_rcfilter_context *)node->context; + node->output[0] += ((DST_RCFILTER__VIN - node->output[0]) * context->exponent); +} + static DISCRETE_RESET(dst_rcfilter) { struct dst_rcfilter_context *context = (struct dst_rcfilter_context *)node->context; @@ -1060,6 +1066,8 @@ static DISCRETE_RESET(dst_rcfilter) context->exponent = RC_CHARGE_EXP(context->rc); context->vCap = 0; node->output[0] = 0; + if (!context->has_rc_nodes && DST_RCFILTER__VREF == 0) + node->step = DISCRETE_STEP_NAME(dst_rcfilter_fast); } /************************************************************************ diff --git a/src/emu/sound/disc_wav.c b/src/emu/sound/disc_wav.c index 29fea3636e6..6eafdace943 100644 --- a/src/emu/sound/disc_wav.c +++ b/src/emu/sound/disc_wav.c @@ -199,7 +199,7 @@ static DISCRETE_STEP(dss_counter) max = DSS_COUNTER__MAX; ds_clock = DSS_COUNTER__CLOCK; - if (context->clock_type == DISC_CLK_IS_FREQ) + if (UNEXPECTED(context->clock_type == DISC_CLK_IS_FREQ)) { /* We need to keep clocking the internal clock even if disabled. */ cycles = (context->t_left + node->info->sample_time) * ds_clock; @@ -215,7 +215,7 @@ static DISCRETE_STEP(dss_counter) /* If reset enabled then set output to the reset value. No x_time in reset. */ - if (DSS_COUNTER__RESET) + if (UNEXPECTED(DSS_COUNTER__RESET)) { context->count = DSS_COUNTER__INIT; node->output[0] = context->count; @@ -226,7 +226,7 @@ static DISCRETE_STEP(dss_counter) * Only count if module is enabled. * This has the effect of holding the output at it's current value. */ - if (DSS_COUNTER__ENABLE) + if (EXPECTED(DSS_COUNTER__ENABLE)) { last_count = context->count; @@ -260,7 +260,7 @@ static DISCRETE_STEP(dss_counter) node->output[0] = context->is_7492 ? disc_7492_count[context->count] : context->count; - if (context->count != last_count) + if (EXPECTED(context->count != last_count)) { /* the x_time is only output if the output changed. */ switch (context->out_type) diff --git a/src/emu/sound/discrete.c b/src/emu/sound/discrete.c index 650e1d903b9..581f86d580a 100644 --- a/src/emu/sound/discrete.c +++ b/src/emu/sound/discrete.c @@ -40,6 +40,24 @@ #include "discrete.h" #include "eminline.h" +/************************************* + * + * Performance + * + *************************************/ + +/* + * Normally, the discrete core processes 960 samples per update. + * With the various buffers involved, this on a Core2 is not as + * performant as processing 240 samples 4 times. + * The setting most probably depends on CPU and which modules are + * run and how many tasks are defined. + * + * Values < 32 exhibit poor performance (too much overhead) while + * Values > 500 have a slightly worse performace (too much cache misses?). + */ + +#define MAX_SAMPLES_PER_TASK_SLICE (240) /************************************* * @@ -58,20 +76,6 @@ #define DISCRETE_PROFILING (0) -/************************************* - * - * Structures - * - *************************************/ - -typedef struct _task_info task_info; -struct _task_info -{ - discrete_task *task; - int samples; -}; - - /************************************* * * Prototypes @@ -85,6 +89,7 @@ static DEVICE_RESET( discrete ); static STREAM_UPDATE( discrete_stream_update ); static STREAM_UPDATE( buffer_stream_update ); +static int profiling = DISCRETE_PROFILING; /************************************* * @@ -313,7 +318,7 @@ INLINE void step_nodes_in_list(const linked_list_entry *list) { const linked_list_entry *entry; - if (DISCRETE_PROFILING) + if (EXPECTED(!profiling)) { osd_ticks_t last = get_profile_ticks(); @@ -322,7 +327,7 @@ INLINE void step_nodes_in_list(const linked_list_entry *list) node_description *node = (node_description *) entry->ptr; node->run_time -= last; - (*node->module->step)(node); + (*node->step)(node); last = get_profile_ticks(); node->run_time += last; } @@ -334,7 +339,7 @@ INLINE void step_nodes_in_list(const linked_list_entry *list) node_description *node = (node_description *) entry->ptr; /* Now step the node */ - (*node->module->step)(node); + (*node->step)(node); } } } @@ -599,7 +604,7 @@ static DEVICE_STOP( discrete ) osd_work_queue_free(info->queue); - if (DISCRETE_PROFILING) + if (profiling) { display_profiling(info); } @@ -648,8 +653,8 @@ static DEVICE_RESET( discrete ) (*node->module->reset)(node); /* otherwise, just step it */ - else if (node->module->step) - (*node->module->step)(node); + else if (node->step) + (*node->step)(node); } } @@ -661,30 +666,52 @@ static DEVICE_RESET( discrete ) static void *task_callback(void *param, int threadid) { - const task_info *ti = (task_info *) param; + const linked_list_entry *list = (linked_list_entry *) param; const linked_list_entry *entry; - discrete_task *task = ti->task; - int samples, i; + int samples; - /* set up task buffers */ - for (i = 0; i < task->numbuffered; i++) - task->ptr[i] = task->node_buf[i]; - - /* initialize sources */ - for (entry = task->source_list; entry != 0; entry = entry->next) + do { - discrete_source_node *sn = (discrete_source_node *) entry->ptr; - sn->ptr = sn->task->node_buf[sn->output_node]; - } + for (entry = list; entry != 0; entry = entry->next) + { + discrete_task *task = (discrete_task *) entry->ptr; + INT32 prev_id; + + /* try to lock */ + prev_id = compare_exchange32(&task->threadid, -1, threadid); + if (prev_id == -1 && task->threadid == threadid) + { + linked_list_entry *src_entry; - samples = ti->samples; - while (samples-- > 0) - { - /* step */ - step_nodes_in_list(task->list); - } + samples = MIN(task->samples, MAX_SAMPLES_PER_TASK_SLICE); - free(param); + /* check dependencies */ + for (src_entry = task->source_list; src_entry != NULL; src_entry = src_entry->next) + { + discrete_source_node *sn = (discrete_source_node *) src_entry->ptr; + int avail; + + avail = sn->task->ptr[sn->output_node] - sn->ptr; + if (avail < samples) + samples = avail; + } + + task->samples -= samples; + while (samples > 0) + { + /* step */ + step_nodes_in_list(task->list); + samples--; + } + if (task->samples == 0) + { + return NULL; + } + task->threadid = -1; + } + } + } while (1); + return NULL; } @@ -706,7 +733,8 @@ static STREAM_UPDATE( discrete_stream_update ) { discrete_info *info = (discrete_info *)param; const linked_list_entry *entry; - int outputnum, task_group; + int outputnum; + //, task_group; if (samples == 0) return; @@ -724,28 +752,36 @@ static STREAM_UPDATE( discrete_stream_update ) context->ptr = (stream_sample_t *) inputs[context->stream_in_number]; } - for (task_group = 0; task_group < DISCRETE_MAX_TASK_GROUPS; task_group++) + /* Setup tasks */ + for (entry = info->task_list; entry != 0; entry = entry->next) { - /* Queue tasks */ - for (entry = info->task_list; entry != 0; entry = entry->next) + discrete_task *task = (discrete_task *) entry->ptr; + linked_list_entry *src_entry; + int i; + + task->samples = samples; + task->threadid = -1; + + /* set up task buffers */ + for (i = 0; i < task->numbuffered; i++) + task->ptr[i] = task->node_buf[i]; + + /* initialize sources */ + for (src_entry = task->source_list; src_entry != 0; src_entry = src_entry->next) { - discrete_task *task = (discrete_task *) entry->ptr; - - if (task->task_group == task_group) - { - task_info *ti = (task_info *)malloc(sizeof(task_info)); - - /* Fire task */ - ti->task = task; - ti->samples = samples; - osd_work_item_queue(info->queue, task_callback, (void *) ti, WORK_ITEM_FLAG_AUTO_RELEASE); - } + discrete_source_node *sn = (discrete_source_node *) src_entry->ptr; + sn->ptr = sn->task->node_buf[sn->output_node]; } - /* and wait for them */ - osd_work_queue_wait(info->queue, osd_ticks_per_second()*10); } - if (DISCRETE_PROFILING) + for (entry = info->task_list; entry != 0; entry = entry->next) + { + /* Fire a work item for each task */ + osd_work_item_queue(info->queue, task_callback, (void *) info->task_list, WORK_ITEM_FLAG_AUTO_RELEASE); + } + osd_work_queue_wait(info->queue, osd_ticks_per_second()*10); + + if (profiling) { info->total_samples += samples; info->total_stream_updates++; @@ -821,6 +857,9 @@ static void init_nodes(discrete_info *info, const linked_list_entry *block_list, node->module = &custom->module; node->custom = custom->custom; } + + /* copy initial / default step function */ + node->step = node->module->step; /* allocate memory if necessary */ if (node->module->contextsize) @@ -895,7 +934,7 @@ static void init_nodes(discrete_info *info, const linked_list_entry *block_list, /* our running order just follows the order specified */ /* does the node step ? */ - if (node->module->step != NULL) + if (node->step != NULL) { /* do we belong to a task? */ if (task_node_list_ptr == NULL) @@ -942,9 +981,10 @@ static void init_nodes(discrete_info *info, const linked_list_entry *block_list, /* attempt to group all static node parameters together. * Has a negative impact on performance - but it should * reduce memory bandwidth - this is weird. */ + #if 0 -static double dbuf[10240]; -static int dbufptr = 0; +static double dbuf[10240] = { 0.0, 1.0 }; +static int dbufptr = 2; static double *getDoublePtr(double val) { diff --git a/src/emu/sound/discrete.h b/src/emu/sound/discrete.h index 6fb097cea89..9f59fb05f96 100644 --- a/src/emu/sound/discrete.h +++ b/src/emu/sound/discrete.h @@ -3452,7 +3452,7 @@ #define DISCRETE_MAX_INPUTS 10 #define DISCRETE_MAX_OUTPUTS 8 #define DISCRETE_MAX_TASK_OUTPUTS 8 -#define DISCRETE_MAX_TASK_GROUPS 8 +#define DISCRETE_MAX_TASK_GROUPS 10 /************************************* @@ -3720,17 +3720,19 @@ struct _discrete_module struct _node_description { - const discrete_module *module; /* Node's module info */ - + /* this declaration order seems to be optimal */ double output[DISCRETE_MAX_OUTPUTS]; /* The node's last output value */ + DISCRETE_FUNC((*step)); /* Called to execute one time delta of output update */ + void * context; /* Contextual information specific to this node type */ + + const double * input[DISCRETE_MAX_INPUTS]; /* Addresses of Input values */ int active_inputs; /* Number of active inputs on this node type */ int input_is_node; /* Bit Flags. 1 in bit location means input_is_node */ - const double * input[DISCRETE_MAX_INPUTS]; /* Addresses of Input values */ - void * context; /* Contextual information specific to this node type */ const void * custom; /* Custom function specific initialisation data */ + const discrete_module *module; /* Node's module info */ const discrete_info *info; /* Points to the parent */ const discrete_sound_block *block; /* Points to the node's setup block. */ @@ -3759,16 +3761,20 @@ typedef struct _discrete_task discrete_task; struct _discrete_task { const linked_list_entry *list; + + volatile INT32 threadid; + volatile int samples; + + /* list of source nodes */ + linked_list_entry *source_list; /* discrete_source_node */ int task_group; int numbuffered; double *ptr[DISCRETE_MAX_TASK_OUTPUTS]; - double *node_buf[DISCRETE_MAX_TASK_OUTPUTS]; - const node_description *nodes[DISCRETE_MAX_TASK_OUTPUTS]; const double *source[DISCRETE_MAX_TASK_OUTPUTS]; - /* list of source nodes */ - linked_list_entry *source_list; /* discrete_source_node */ + double *node_buf[DISCRETE_MAX_TASK_OUTPUTS]; + const node_description *nodes[DISCRETE_MAX_TASK_OUTPUTS]; }; typedef struct _discrete_source_node discrete_source_node;