mirror of
https://github.com/holub/mame
synced 2025-10-05 16:50:57 +03:00
Atari pokey performance II (#4702)
* pokey: rename pokey_device::m_output -> pokey_device::m_out_raw There is a variable pokey_device::pokey_channel::m_output. Two variables with same name in close context but complete different maning are not exactly helpful to understand the code. renaming pokey_device::pokey_channel::m_output was not an option because this would damage stored machine states - right? Signed-off-by: Andreas Müller <schnitzeltony@gmail.com> * pokey: rework for performance enhancements Profiling with valgrind pointed to the following hotspot: pokey_device::step_one_clock / line 686-689: | for (int ch = 0; ch < 4; ch++) | { | sum |= (((((m_channel[ch].m_output ^ m_channel[ch].m_filter_sample) || (m_channel[ch].m_AUDC & VOLUME_ONLY)) ? (m_channel[ch].m_AUDC & VOLUME_MASK) : 0 )) << (ch * 4)); | } First solution was to move bit-shifting to first part of ?-part: | sum |= (((m_channel[ch].m_output ^ m_channel[ch].m_filter_sample) || (m_channel[ch].m_AUDC & VOLUME_ONLY)) ? ((m_channel[ch].m_AUDC & VOLUME_MASK) << (ch * 4)) : 0); because shifting 0 does not change value. Performance measurements showed improvements but they were not satisfying exactly (change is part of this patch). So I thought more of what this piece of code is about: * it is run at high frequency (@starwars: 1.5MHz * 4 pokey instances * 4 channels -> ~6MHz) => that is creating the high CPU cycle consumption * frequency of output change is in a range of (double) audible frequencies (few kHz). => there are long sequences creating identical output * the sum value calculated depends on few channel input variables: m_output / m_filter_sample / m_AUDC This patch suggests a solution which keeps track of possible input variable change and as long as they don't change there is no need to render output sum. The following tests were performed: * mame64 -bench 50 starwars: Average speed increases from ~430 to ~490 on my PC * on screen profiling shows ~2% idle win * starwars, missile-command and marble-madness do not show any audible artefacts Signed-off-by: Andreas Müller <schnitzeltony@gmail.com> * pokey: rework prescaler handling * CLK_1 does not have a prescaler so there is no need to increment and reset m_clock_cnt[CLK_1] * Unroll other prescalers: It gives performance win and reading is easier. Function tests: on missile/starwars Performance test: mame64 -nothrottle starwars Before: Average speed: 409.36% (21 seconds) After: Average speed: 447.37% (21 seconds) Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
This commit is contained in:
parent
390b7c812c
commit
29a535e089
@ -161,8 +161,6 @@
|
||||
#define CLK_28 1
|
||||
#define CLK_114 2
|
||||
|
||||
static const int clock_divisors[3] = {1, DIV_64, DIV_15};
|
||||
|
||||
constexpr unsigned pokey_device::FREQ_17_EXACT;
|
||||
|
||||
|
||||
@ -253,7 +251,8 @@ void pokey_device::device_start()
|
||||
m_pot_counter = 0;
|
||||
m_kbd_cnt = 0;
|
||||
m_out_filter = 0;
|
||||
m_output = 0;
|
||||
m_out_raw = 0;
|
||||
m_old_raw_inval = true;
|
||||
m_kbd_state = 0;
|
||||
|
||||
/* reset more internal state */
|
||||
@ -435,14 +434,7 @@ void pokey_device::execute_run()
|
||||
{
|
||||
do
|
||||
{
|
||||
uint32_t new_out = step_one_clock();
|
||||
if (m_output != new_out)
|
||||
{
|
||||
//printf("forced update %08d %08x\n", m_icount, m_output);
|
||||
m_stream->update();
|
||||
m_output = new_out;
|
||||
}
|
||||
|
||||
step_one_clock();
|
||||
m_icount--;
|
||||
} while (m_icount > 0);
|
||||
|
||||
@ -570,25 +562,12 @@ void pokey_device::step_pot()
|
||||
*
|
||||
*/
|
||||
|
||||
uint32_t pokey_device::step_one_clock(void)
|
||||
void pokey_device::step_one_clock(void)
|
||||
{
|
||||
int const base_clock = (m_AUDCTL & CLK_15KHZ) ? CLK_114 : CLK_28;
|
||||
|
||||
/* Clocks only count if we are not in a reset */
|
||||
if (m_SKCTL & SK_RESET)
|
||||
{
|
||||
/* Clocks only count if we are not in a reset */
|
||||
int clock_triggered[3] = {0,0,0};
|
||||
int clk;
|
||||
for (clk = 0; clk < 3; clk++)
|
||||
{
|
||||
m_clock_cnt[clk]++;
|
||||
if (m_clock_cnt[clk] >= clock_divisors[clk])
|
||||
{
|
||||
m_clock_cnt[clk] = 0;
|
||||
clock_triggered[clk] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* polynom pointers */
|
||||
if (++m_p4 == 0x0000f)
|
||||
m_p4 = 0;
|
||||
if (++m_p5 == 0x0001f)
|
||||
@ -598,7 +577,23 @@ uint32_t pokey_device::step_one_clock(void)
|
||||
if (++m_p17 == 0x1ffff)
|
||||
m_p17 = 0;
|
||||
|
||||
clk = (m_AUDCTL & CH1_HICLK) ? CLK_1 : base_clock;
|
||||
/* CLK_1: no presacler */
|
||||
int clock_triggered[3] = {1,0,0};
|
||||
/* CLK_28: prescaler 63.9211 kHz */
|
||||
if (++m_clock_cnt[CLK_28] >= DIV_64)
|
||||
{
|
||||
m_clock_cnt[CLK_28] = 0;
|
||||
clock_triggered[CLK_28] = 1;
|
||||
}
|
||||
/* CLK_114 prescaler 15.6999 kHz */
|
||||
if (++m_clock_cnt[CLK_114] >= DIV_15)
|
||||
{
|
||||
m_clock_cnt[CLK_114] = 0;
|
||||
clock_triggered[CLK_114] = 1;
|
||||
}
|
||||
|
||||
int const base_clock = (m_AUDCTL & CLK_15KHZ) ? CLK_114 : CLK_28;
|
||||
int clk = (m_AUDCTL & CH1_HICLK) ? CLK_1 : base_clock;
|
||||
if (clock_triggered[clk])
|
||||
m_channel[CHAN1].inc_chan();
|
||||
|
||||
@ -682,12 +677,23 @@ uint32_t pokey_device::step_one_clock(void)
|
||||
m_channel[CHAN1].m_filter_sample = 1;
|
||||
}
|
||||
|
||||
uint32_t sum = 0;
|
||||
for (int ch = 0; ch < 4; ch++)
|
||||
if (m_old_raw_inval)
|
||||
{
|
||||
sum |= (((((m_channel[ch].m_output ^ m_channel[ch].m_filter_sample) || (m_channel[ch].m_AUDC & VOLUME_ONLY)) ? (m_channel[ch].m_AUDC & VOLUME_MASK) : 0 )) << (ch * 4));
|
||||
uint32_t sum = 0;
|
||||
for (int ch = 0; ch < 4; ch++)
|
||||
{
|
||||
sum |= (((m_channel[ch].m_output ^ m_channel[ch].m_filter_sample) || (m_channel[ch].m_AUDC & VOLUME_ONLY)) ?
|
||||
((m_channel[ch].m_AUDC & VOLUME_MASK) << (ch * 4)) : 0);
|
||||
}
|
||||
|
||||
if (m_out_raw != sum)
|
||||
{
|
||||
//printf("forced update %08d %08x\n", m_icount, m_out_raw);
|
||||
m_stream->update();
|
||||
}
|
||||
m_old_raw_inval = false;
|
||||
m_out_raw = sum;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
//-------------------------------------------------
|
||||
@ -704,7 +710,7 @@ void pokey_device::sound_stream_update(sound_stream &stream, stream_sample_t **i
|
||||
{
|
||||
int32_t out = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
out += ((m_output >> (4*i)) & 0x0f);
|
||||
out += ((m_out_raw >> (4*i)) & 0x0f);
|
||||
out *= POKEY_DEFAULT_GAIN;
|
||||
out = (out > 0x7fff) ? 0x7fff : out;
|
||||
while( samples > 0 )
|
||||
@ -715,7 +721,7 @@ void pokey_device::sound_stream_update(sound_stream &stream, stream_sample_t **i
|
||||
}
|
||||
else if (m_output_type == RC_LOWPASS)
|
||||
{
|
||||
double rTot = m_voltab[m_output];
|
||||
double rTot = m_voltab[m_out_raw];
|
||||
|
||||
double V0 = rTot / (rTot+m_r_pullup) * m_v_ref / 5.0 * 32767.0;
|
||||
double mult = (m_cap == 0.0) ? 1.0 : 1.0 - exp(-(rTot + m_r_pullup) / (m_cap * m_r_pullup * rTot) * m_clock_period.as_double());
|
||||
@ -731,7 +737,7 @@ void pokey_device::sound_stream_update(sound_stream &stream, stream_sample_t **i
|
||||
}
|
||||
else if (m_output_type == OPAMP_C_TO_GROUND)
|
||||
{
|
||||
double rTot = m_voltab[m_output];
|
||||
double rTot = m_voltab[m_out_raw];
|
||||
/* In this configuration there is a capacitor in parallel to the pokey output to ground.
|
||||
* With a LM324 in LTSpice this causes the opamp circuit to oscillate at around 100 kHz.
|
||||
* We are ignoring the capacitor here, since this oscillation would not be audible.
|
||||
@ -753,7 +759,7 @@ void pokey_device::sound_stream_update(sound_stream &stream, stream_sample_t **i
|
||||
}
|
||||
else if (m_output_type == OPAMP_LOW_PASS)
|
||||
{
|
||||
double rTot = m_voltab[m_output];
|
||||
double rTot = m_voltab[m_out_raw];
|
||||
/* This post-pokey stage usually has a low-pass filter behind it
|
||||
* It is approximated by not adding in VRef below.
|
||||
*/
|
||||
@ -771,7 +777,7 @@ void pokey_device::sound_stream_update(sound_stream &stream, stream_sample_t **i
|
||||
}
|
||||
else if (m_output_type == DISCRETE_VAR_R)
|
||||
{
|
||||
int32_t out = m_voltab[m_output];
|
||||
int32_t out = m_voltab[m_out_raw];
|
||||
while( samples > 0 )
|
||||
{
|
||||
*buffer++ = out;
|
||||
@ -898,6 +904,7 @@ void pokey_device::write_internal(offs_t offset, uint8_t data)
|
||||
case AUDC1_C:
|
||||
LOG_SOUND(("POKEY '%s' AUDC1 $%02x (%s)\n", tag(), data, audc2str(data)));
|
||||
m_channel[CHAN1].m_AUDC = data;
|
||||
m_old_raw_inval = true;
|
||||
break;
|
||||
|
||||
case AUDF2_C:
|
||||
@ -908,6 +915,7 @@ void pokey_device::write_internal(offs_t offset, uint8_t data)
|
||||
case AUDC2_C:
|
||||
LOG_SOUND(("POKEY '%s' AUDC2 $%02x (%s)\n", tag(), data, audc2str(data)));
|
||||
m_channel[CHAN2].m_AUDC = data;
|
||||
m_old_raw_inval = true;
|
||||
break;
|
||||
|
||||
case AUDF3_C:
|
||||
@ -918,6 +926,7 @@ void pokey_device::write_internal(offs_t offset, uint8_t data)
|
||||
case AUDC3_C:
|
||||
LOG_SOUND(("POKEY '%s' AUDC3 $%02x (%s)\n", tag(), data, audc2str(data)));
|
||||
m_channel[CHAN3].m_AUDC = data;
|
||||
m_old_raw_inval = true;
|
||||
break;
|
||||
|
||||
case AUDF4_C:
|
||||
@ -928,6 +937,7 @@ void pokey_device::write_internal(offs_t offset, uint8_t data)
|
||||
case AUDC4_C:
|
||||
LOG_SOUND(("POKEY '%s' AUDC4 $%02x (%s)\n", tag(), data, audc2str(data)));
|
||||
m_channel[CHAN4].m_AUDC = data;
|
||||
m_old_raw_inval = true;
|
||||
break;
|
||||
|
||||
case AUDCTL_C:
|
||||
@ -952,7 +962,7 @@ void pokey_device::write_internal(offs_t offset, uint8_t data)
|
||||
m_channel[i].m_output = 0;
|
||||
m_channel[i].m_filter_sample = (i<2 ? 1 : 0);
|
||||
}
|
||||
|
||||
m_old_raw_inval = true;
|
||||
break;
|
||||
|
||||
case SKREST_C:
|
||||
@ -1070,6 +1080,7 @@ inline void pokey_device::process_channel(int ch)
|
||||
m_channel[ch].m_output = (m_poly9[m_p9] & 1);
|
||||
else
|
||||
m_channel[ch].m_output = (m_poly17[m_p17] & 1);
|
||||
m_old_raw_inval = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -264,7 +264,7 @@ private:
|
||||
|
||||
static constexpr int POKEY_CHANNELS = 4;
|
||||
|
||||
uint32_t step_one_clock();
|
||||
void step_one_clock();
|
||||
void step_keyboard();
|
||||
void step_pot();
|
||||
|
||||
@ -284,10 +284,11 @@ private:
|
||||
|
||||
pokey_channel m_channel[POKEY_CHANNELS];
|
||||
|
||||
uint32_t m_output; /* raw output */
|
||||
double m_out_filter; /* filtered output */
|
||||
uint32_t m_out_raw; /* raw output */
|
||||
bool m_old_raw_inval; /* true: recalc m_out_raw required */
|
||||
double m_out_filter; /* filtered output */
|
||||
|
||||
int32_t m_clock_cnt[3]; /* clock counters */
|
||||
int32_t m_clock_cnt[3]; /* clock counters */
|
||||
uint32_t m_p4; /* poly4 index */
|
||||
uint32_t m_p5; /* poly5 index */
|
||||
uint32_t m_p9; /* poly9 index */
|
||||
|
Loading…
Reference in New Issue
Block a user