netlist: runtime performance statistics on demand.

Runtime performance statistics can now be enabled with nltool
option "-s". To enable those with MAME you need to run

NL_STATS=1 ./mamenl64 -v -oslog game
This commit is contained in:
couriersud 2019-04-21 23:28:33 +02:00
parent c52b2f7c6f
commit 49c05c24a2
10 changed files with 147 additions and 71 deletions

View File

@ -1012,6 +1012,18 @@ void netlist_mame_device::common_dev_start(netlist::netlist_t *lnetlist) const
{
auto &lsetup = lnetlist->nlstate().setup();
// Override log statistics
pstring p = plib::util::environment("NL_STATS", "");
if (p != "")
{
bool err=false;
bool v = plib::pstonum_ne<bool>(p, err);
if (err)
lsetup.log().warning("NL_STATS: invalid value {1}", p);
else
lnetlist->enable_stats(v);
}
// register additional devices
nl_register_devices(lsetup);
@ -1308,11 +1320,11 @@ offs_t netlist_disassembler::disassemble(std::ostream &stream, offs_t pc, const
{
unsigned startpc = pc;
int relpc = pc - m_dev->genPC();
if (relpc >= 0 && relpc < m_dev->netlist().queue().size())
if (relpc >= 0 && relpc < m_dev->netlist().queuex().size())
{
int dpc = m_dev->netlist().queue().size() - relpc - 1;
util::stream_format(stream, "%c %s @%10.7f", (relpc == 0) ? '*' : ' ', m_dev->netlist().queue()[dpc].m_object->name().c_str(),
m_dev->netlist().queue()[dpc].m_exec_time.as_double());
int dpc = m_dev->netlist().queuex().size() - relpc - 1;
util::stream_format(stream, "%c %s @%10.7f", (relpc == 0) ? '*' : ' ', m_dev->netlist().queuex()[dpc].m_object->name().c_str(),
m_dev->netlist().queuex()[dpc].m_exec_time.as_double());
}
pc+=1;

View File

@ -107,9 +107,8 @@ const logic_family_desc_t *family_CD4XXX()
// ----------------------------------------------------------------------------------------
detail::queue_t::queue_t(netlist_state_t &nl)
: timed_queue<pqentry_t<net_t *, netlist_time>, false, NL_KEEP_STATISTICS>(512)
: timed_queue<pqentry_t<net_t *, netlist_time>, false, true>(512)
, netlist_ref(nl)
// , plib::state_manager_t::callback_t()
, m_qsize(0)
, m_times(512)
, m_net_ids(512)
@ -193,6 +192,7 @@ netlist_t::netlist_t(const pstring &aname, plib::unique_ptr<callbacks_t> callbac
, m_time(netlist_time::zero())
, m_mainclock(nullptr)
, m_queue(*m_state)
, m_stats(false)
{
devices::initialize_factory(nlstate().setup().factory());
NETLIST_NAME(base)(nlstate().setup());
@ -363,14 +363,23 @@ void netlist_state_t::reset()
#endif
}
void netlist_t::process_queue(const netlist_time delta) NL_NOEXCEPT
{
auto sm_guard(m_stat_mainloop.guard());
if (!m_stats)
process_queue_stats<false>(delta);
else
{
auto sm_guard(m_stat_mainloop.guard());
process_queue_stats<true>(delta);
}
}
template <bool KEEP_STATS>
void netlist_t::process_queue_stats(const netlist_time delta) NL_NOEXCEPT
{
netlist_time stop(m_time + delta);
m_queue.push(detail::queue_t::entry_t(stop, nullptr));
qpush(detail::queue_t::entry_t(stop, nullptr));
if (m_mainclock == nullptr)
{
@ -378,8 +387,8 @@ void netlist_t::process_queue(const netlist_time delta) NL_NOEXCEPT
m_time = e.m_exec_time;
while (e.m_object != nullptr)
{
e.m_object->update_devs();
m_perf_out_processed.inc();
e.m_object->template update_devs<KEEP_STATS>();
if (KEEP_STATS) m_perf_out_processed.inc();
e = m_queue.pop();
m_time = e.m_exec_time;
}
@ -396,7 +405,7 @@ void netlist_t::process_queue(const netlist_time delta) NL_NOEXCEPT
{
m_time = mc_time;
mc_net.toggle_new_Q();
mc_net.update_devs();
mc_net.update_devs<KEEP_STATS>();
mc_time += inc;
}
@ -404,8 +413,8 @@ void netlist_t::process_queue(const netlist_time delta) NL_NOEXCEPT
m_time = e.m_exec_time;
if (e.m_object != nullptr)
{
e.m_object->update_devs();
m_perf_out_processed.inc();
e.m_object->template update_devs<KEEP_STATS>();
if (KEEP_STATS) m_perf_out_processed.inc();
}
else
break;
@ -416,33 +425,34 @@ void netlist_t::process_queue(const netlist_time delta) NL_NOEXCEPT
void netlist_t::print_stats() const
{
if (nperftime_t<NL_KEEP_STATISTICS>::enabled)
if (m_stats)
{
std::vector<size_t> index;
for (size_t i=0; i < m_state->m_devices.size(); i++)
index.push_back(i);
std::sort(index.begin(), index.end(),
[&](size_t i1, size_t i2) { return m_state->m_devices[i1].second->m_stat_total_time.total() < m_state->m_devices[i2].second->m_stat_total_time.total(); });
[&](size_t i1, size_t i2) { return m_state->m_devices[i1].second->m_stats->m_stat_total_time.total() < m_state->m_devices[i2].second->m_stats->m_stat_total_time.total(); });
nperftime_t<NL_KEEP_STATISTICS>::type total_time(0);
nperftime_t<NL_KEEP_STATISTICS>::ctype total_count(0);
nperftime_t<true>::type total_time(0);
nperftime_t<true>::ctype total_count(0);
for (auto & j : index)
{
auto entry = m_state->m_devices[j].second.get();
auto stats = m_state->m_devices[j].second.get()->m_stats.get();
log().verbose("Device {1:20} : {2:12} {3:12} {4:15} {5:12}", entry->name(),
entry->m_stat_call_count(), entry->m_stat_total_time.count(),
entry->m_stat_total_time.total(), entry->m_stat_inc_active());
total_time += entry->m_stat_total_time.total();
total_count += entry->m_stat_total_time.count();
stats->m_stat_call_count(), stats->m_stat_total_time.count(),
stats->m_stat_total_time.total(), stats->m_stat_inc_active());
total_time += stats->m_stat_total_time.total();
total_count += stats->m_stat_total_time.count();
}
log().verbose("Total calls : {1:12} {2:12} {3:12}", total_count,
total_time, total_time / static_cast<decltype(total_time)>(total_count));
nperftime_t<NL_KEEP_STATISTICS> overhead;
nperftime_t<NL_KEEP_STATISTICS> test;
nperftime_t<true> overhead;
nperftime_t<true> test;
{
auto overhead_guard(overhead.guard());
for (int j=0; j<100000;j++)
@ -451,9 +461,9 @@ void netlist_t::print_stats() const
}
}
nperftime_t<NL_KEEP_STATISTICS>::type total_overhead = overhead()
* static_cast<nperftime_t<NL_KEEP_STATISTICS>::type>(total_count)
/ static_cast<nperftime_t<NL_KEEP_STATISTICS>::type>(200000);
nperftime_t<true>::type total_overhead = overhead()
* static_cast<nperftime_t<true>::type>(total_count)
/ static_cast<nperftime_t<true>::type>(200000);
log().verbose("Queue Pushes {1:15}", m_queue.m_prof_call());
log().verbose("Queue Moves {1:15}", m_queue.m_prof_sortmove());
@ -467,8 +477,8 @@ void netlist_t::print_stats() const
log().verbose("");
log().verbose("Take the next lines with a grain of salt. They depend on the measurement implementation.");
log().verbose("Total overhead {1:15}", total_overhead);
nperftime_t<NL_KEEP_STATISTICS>::type overhead_per_pop = (m_stat_mainloop()-2*total_overhead - (total_time - total_overhead))
/ static_cast<nperftime_t<NL_KEEP_STATISTICS>::type>(m_queue.m_prof_call());
nperftime_t<true>::type overhead_per_pop = (m_stat_mainloop()-2*total_overhead - (total_time - total_overhead))
/ static_cast<nperftime_t<true>::type>(m_queue.m_prof_call());
log().verbose("Overhead per pop {1:11}", overhead_per_pop );
log().verbose("");
@ -476,12 +486,13 @@ void netlist_t::print_stats() const
for (auto &entry : m_state->m_devices)
{
auto ep = entry.second.get();
auto stats = ep->m_stats.get();
// Factor of 3 offers best performace increase
if (ep->m_stat_inc_active() > 3 * ep->m_stat_total_time.count()
&& ep->m_stat_inc_active() > trigger)
if (stats->m_stat_inc_active() > 3 * stats->m_stat_total_time.count()
&& stats->m_stat_inc_active() > trigger)
log().verbose("HINT({}, NO_DEACTIVATE) // {} {} {}", ep->name(),
static_cast<double>(ep->m_stat_inc_active()) / static_cast<double>(ep->m_stat_total_time.count()),
ep->m_stat_inc_active(), ep->m_stat_total_time.count());
static_cast<double>(stats->m_stat_inc_active()) / static_cast<double>(stats->m_stat_total_time.count()),
stats->m_stat_inc_active(), stats->m_stat_total_time.count());
}
}
}
@ -516,6 +527,8 @@ core_device_t::core_device_t(netlist_state_t &owner, const pstring &name)
{
if (logic_family() == nullptr)
set_logic_family(family_TTL());
if (exec().stats_enabled())
m_stats = plib::make_unique<stats_t>();
}
core_device_t::core_device_t(core_device_t &owner, const pstring &name)
@ -529,6 +542,8 @@ core_device_t::core_device_t(core_device_t &owner, const pstring &name)
if (logic_family() == nullptr)
set_logic_family(family_TTL());
state().add_dev(this->name(), pool_owned_ptr<core_device_t>(this, false));
if (exec().stats_enabled())
m_stats = plib::make_unique<stats_t>();
}
void core_device_t::set_default_delegate(detail::core_terminal_t &term)
@ -650,26 +665,39 @@ void detail::net_t::rebuild_list()
}
}
template <typename T>
template <bool KEEP_STATS, typename T>
void detail::net_t::process(const T mask, netlist_sig_t sig)
{
m_cur_Q = sig;
for (auto & p : m_list_active)
if (KEEP_STATS)
{
p.set_copied_input(sig);
p.device().m_stat_call_count.inc();
if ((p.terminal_state() & mask))
for (auto & p : m_list_active)
{
auto g(p.device().m_stat_total_time.guard());
//p.device().m_stat_total_time.start();
p.m_delegate();
//p.device().m_stat_total_time.stop();
p.set_copied_input(sig);
auto *stats = p.device().m_stats.get();
stats->m_stat_call_count.inc();
if ((p.terminal_state() & mask))
{
auto g(stats->m_stat_total_time.guard());
//p.device().m_stat_total_time.start();
p.m_delegate();
//p.device().m_stat_total_time.stop();
}
}
}
else
{
for (auto & p : m_list_active)
{
p.set_copied_input(sig);
if ((p.terminal_state() & mask))
p.m_delegate();
}
}
}
template <bool KEEP_STATS>
void detail::net_t::update_devs() NL_NOEXCEPT
{
nl_assert(this->isRailNet());
@ -684,7 +712,7 @@ void detail::net_t::update_devs() NL_NOEXCEPT
{
case core_terminal_t::STATE_INP_HL:
case core_terminal_t::STATE_INP_LH:
process(mask | core_terminal_t::STATE_INP_ACTIVE, new_Q);
process<KEEP_STATS>(mask | core_terminal_t::STATE_INP_ACTIVE, new_Q);
break;
default:
/* do nothing */

View File

@ -653,6 +653,7 @@ namespace netlist
void push_to_queue(netlist_time delay) NL_NOEXCEPT;
bool is_queued() const noexcept { return m_in_queue == queue_status::QUEUED; }
template <bool KEEP_STATS>
void update_devs() NL_NOEXCEPT;
netlist_time next_scheduled_time() const noexcept { return m_next_scheduled_time; }
@ -741,7 +742,7 @@ namespace netlist
plib::linkedlist_t<core_terminal_t> m_list_active;
std::vector<core_terminal_t *> m_core_terms; // save post-start m_list ...
template <typename T>
template <bool KEEP_STATS, typename T>
void process(const T mask, netlist_sig_t sig);
};
} // namespace detail
@ -1166,7 +1167,8 @@ namespace netlist
{
if (++m_active_outputs == 1)
{
m_stat_inc_active.inc();
if (m_stats/*NL_KEEP_STATISTICS*/)
m_stats->m_stat_inc_active.inc();
inc_active();
}
}
@ -1189,9 +1191,15 @@ namespace netlist
void set_default_delegate(detail::core_terminal_t &term);
/* stats */
nperftime_t<NL_KEEP_STATISTICS> m_stat_total_time;
nperfcount_t<NL_KEEP_STATISTICS> m_stat_call_count;
nperfcount_t<NL_KEEP_STATISTICS> m_stat_inc_active;
struct stats_t
{
// NL_KEEP_STATISTICS
nperftime_t<true> m_stat_total_time;
nperfcount_t<true> m_stat_call_count;
nperfcount_t<true> m_stat_inc_active;
};
plib::unique_ptr<stats_t> m_stats;
virtual void update() NL_NOEXCEPT { }
virtual void reset() { }
@ -1288,7 +1296,8 @@ namespace netlist
* solvers will update inputs after parallel processing.
*/
class detail::queue_t :
public timed_queue<pqentry_t<net_t *, netlist_time>, false, NL_KEEP_STATISTICS>,
//public timed_queue<pqentry_t<net_t *, netlist_time>, false, NL_KEEP_STATISTICS>,
public timed_queue<pqentry_t<net_t *, netlist_time>, false, true>,
public detail::netlist_ref,
public plib::state_manager_t::callback_t
{
@ -1488,9 +1497,25 @@ namespace netlist
void process_queue(const netlist_time delta) NL_NOEXCEPT;
void abort_current_queue_slice() NL_NOEXCEPT { m_queue.retime(detail::queue_t::entry_t(m_time, nullptr)); }
const detail::queue_t &queuex() const NL_NOEXCEPT { return m_queue; }
#if 0
const detail::queue_t &queue() const NL_NOEXCEPT { return m_queue; }
detail::queue_t &queue() NL_NOEXCEPT { return m_queue; }
#else
void qpush(detail::queue_t::entry_t && e) noexcept
{
if (!m_stats)
m_queue.push_nostats(std::move(e));
else
m_queue.push(std::move(e));
}
template <class R>
void qremove(const R &elem) noexcept
{
m_queue.remove(elem);
}
#endif
/* Control functions */
void stop();
@ -1519,7 +1544,14 @@ namespace netlist
void print_stats() const;
bool stats_enabled() const { return m_stats; }
void enable_stats(bool val) { m_stats = val; }
private:
template <bool KEEP_STATS>
void process_queue_stats(const netlist_time delta) NL_NOEXCEPT;
plib::unique_ptr<netlist_state_t> m_state;
devices::NETLIB_NAME(solver) * m_solver;
@ -1530,10 +1562,11 @@ namespace netlist
PALIGNAS_CACHELINE()
detail::queue_t m_queue;
bool m_stats;
// performance
nperftime_t<NL_KEEP_STATISTICS> m_stat_mainloop;
nperfcount_t<NL_KEEP_STATISTICS> m_perf_out_processed;
nperftime_t<true> m_stat_mainloop;
nperfcount_t<true> m_perf_out_processed;
};
// -----------------------------------------------------------------------------
@ -1653,15 +1686,15 @@ namespace netlist
if ((num_cons() != 0))
{
auto &lexec(exec());
auto &q(lexec.queue());
//auto &q(lexec.queue());
auto nst(lexec.time() + delay);
if (is_queued())
q.remove(this);
lexec.qremove(this);
m_in_queue = (!m_list_active.empty()) ?
queue_status::QUEUED : queue_status::DELAYED_DUE_TO_INACTIVE; /* queued ? */
if (m_in_queue == queue_status::QUEUED)
q.push(queue_t::entry_t(nst, this));
lexec.qpush(queue_t::entry_t(nst, this));
else
update_inputs();
m_next_scheduled_time = nst;
@ -1679,7 +1712,7 @@ namespace netlist
if (m_next_scheduled_time > exec().time())
{
m_in_queue = queue_status::QUEUED; /* pending */
exec().queue().push({m_next_scheduled_time, this});
exec().qpush({m_next_scheduled_time, this});
}
else
{

View File

@ -58,9 +58,7 @@
//============================================================
#define NL_DEBUG (false)
#define NL_KEEP_STATISTICS (0)
//#define NL_DEBUG (true)
//#define NL_KEEP_STATISTICS (1)
//============================================================
// General Macros

View File

@ -128,6 +128,19 @@ namespace netlist
m_prof_call.inc();
}
void push_nostats(T && e) noexcept
{
/* Lock */
lock_guard_type lck(m_lock);
T * i(m_end-1);
for (; QueueOp::less(*(i), e); --i)
{
*(i+1) = *(i);
}
*(i+1) = std::move(e);
++m_end;
}
T pop() noexcept { return *(--m_end); }
const T &top() const noexcept { return *(m_end-1); }

View File

@ -39,6 +39,7 @@ public:
opt_grp3(*this, "Options for run command", "These options are only used by the run command."),
opt_ttr (*this, "t", "time_to_run", 1.0, "time to run the emulation (seconds)\n\n abc def\n\n xyz"),
opt_stats(*this, "s", "statistics", "gather runtime statistics"),
opt_logs(*this, "l", "log" , "define terminal to log. This option may be specified repeatedly."),
opt_inp(*this, "i", "input", "", "input file to process (default is none)"),
opt_loadstate(*this,"", "loadstate", "", "load state from file and continue from there"),
@ -75,6 +76,7 @@ public:
plib::option_str opt_name;
plib::option_group opt_grp3;
plib::option_num<double> opt_ttr;
plib::option_bool opt_stats;
plib::option_vec opt_logs;
plib::option_str opt_inp;
plib::option_str opt_loadstate;
@ -351,6 +353,7 @@ void tool_app_t::run()
auto t_guard(t.guard());
//plib::perftime_t<plib::exact_ticks> t;
nt.enable_stats(opt_stats());
nt.init();
if (!opt_verb())

View File

@ -605,7 +605,7 @@ namespace devices
void matrix_solver_t::log_stats()
{
if (this->m_stat_calculations != 0 && this->m_stat_vsolver_calls && this->m_params.m_log_stats)
if (this->m_stat_calculations != 0 && this->m_stat_vsolver_calls && log().verbose.is_enabled())
{
log().verbose("==============================================");
log().verbose("Solver {1}", this->name());

View File

@ -37,7 +37,6 @@ namespace devices
netlist_time m_nr_recalc_delay;
bool m_use_gabs;
bool m_use_linear_prediction;
bool m_log_stats;
};

View File

@ -273,13 +273,6 @@ namespace devices
//m_params.m_max_timestep = std::max(m_params.m_max_timestep, m_params.m_max_timestep::)
// Override log statistics
pstring p = plib::util::environment("NL_STATS", "");
if (p != "")
m_params.m_log_stats = plib::pstonum<decltype(m_params.m_log_stats)>(p);
else
m_params.m_log_stats = m_log_stats();
log().verbose("Scanning net groups ...");
// determine net groups

View File

@ -60,7 +60,6 @@ namespace devices
, m_use_gabs(*this, "USE_GABS", true)
, m_use_linear_prediction(*this, "USE_LINEAR_PREDICTION", false) // // savings are eaten up by effort
, m_log_stats(*this, "LOG_STATS", true) // log statistics on shutdown
, m_params()
{
// internal staff
@ -100,8 +99,6 @@ namespace devices
param_logic_t m_use_gabs;
param_logic_t m_use_linear_prediction;
param_logic_t m_log_stats;
std::vector<pool_owned_ptr<matrix_solver_t>> m_mat_solvers;
std::vector<matrix_solver_t *> m_mat_solvers_all;
std::vector<matrix_solver_t *> m_mat_solvers_timestepping;