mame/src/lib/netlist/solver/nld_matrix_solver.cpp
couriersud 0b3787d5f4 netlist: Fix some more exotic compile options. (nw)
128bit integers on kaby lake are not that much slower than 64bit. This
may be an option for MAME's attotime as well.
2019-02-23 16:24:01 +01:00

647 lines
18 KiB
C++

// license:GPL-2.0+
// copyright-holders:Couriersud
/*
* nld_matrix_solver.cpp
*
*/
#include "nld_matrix_solver.h"
#include "../plib/putil.h"
#include <cmath> // <<= needed by windows build
namespace netlist
{
namespace devices
{
terms_for_net_t::terms_for_net_t()
: m_railstart(0)
, m_last_V(0.0)
, m_DD_n_m_1(0.0)
, m_h_n_m_1(1e-9)
{
}
void terms_for_net_t::add(terminal_t *term, int net_other, bool sorted)
{
if (sorted)
for (std::size_t i=0; i < m_connected_net_idx.size(); i++)
{
if (m_connected_net_idx[i] > net_other)
{
plib::container::insert_at(m_terms, i, term);
plib::container::insert_at(m_connected_net_idx, i, net_other);
return;
}
}
m_terms.push_back(term);
m_connected_net_idx.push_back(net_other);
}
void terms_for_net_t::set_pointers()
{
m_gt.resize(count(), 0.0);
m_go.resize(count(), 0.0);
m_Idr.resize(count(), 0.0);
m_connected_net_V.resize(count(), nullptr);
for (std::size_t i = 0; i < count(); i++)
{
m_terms[i]->set_ptrs(&m_gt[i], &m_go[i], &m_Idr[i]);
m_connected_net_V[i] = m_terms[i]->otherterm()->net().Q_Analog_state_ptr();
}
}
// ----------------------------------------------------------------------------------------
// matrix_solver
// ----------------------------------------------------------------------------------------
matrix_solver_t::matrix_solver_t(netlist_state_t &anetlist, const pstring &name,
const eSortType sort, const solver_parameters_t *params)
: device_t(anetlist, name)
, m_params(*params)
, m_stat_calculations(*this, "m_stat_calculations", 0)
, m_stat_newton_raphson(*this, "m_stat_newton_raphson", 0)
, m_stat_vsolver_calls(*this, "m_stat_vsolver_calls", 0)
, m_iterative_fail(*this, "m_iterative_fail", 0)
, m_iterative_total(*this, "m_iterative_total", 0)
, m_last_step(*this, "m_last_step", netlist_time::zero())
, m_fb_sync(*this, "FB_sync")
, m_Q_sync(*this, "Q_sync")
, m_ops(0)
, m_sort(sort)
{
connect_post_start(m_fb_sync, m_Q_sync);
}
void matrix_solver_t::setup_base(analog_net_t::list_t &nets)
{
log().debug("New solver setup\n");
m_nets.clear();
m_terms.clear();
for (auto & net : nets)
{
m_nets.push_back(net);
m_terms.push_back(plib::make_unique<terms_for_net_t>());
m_rails_temp.push_back(plib::make_unique<terms_for_net_t>());
}
for (std::size_t k = 0; k < nets.size(); k++)
{
analog_net_t *net = nets[k];
log().debug("setting up net\n");
net->set_solver(this);
for (auto &p : net->core_terms())
{
log().debug("{1} {2} {3}\n", p->name(), net->name(), net->isRailNet());
switch (p->type())
{
case detail::terminal_type::TERMINAL:
if (p->device().is_timestep())
if (!plib::container::contains(m_step_devices, &p->device()))
m_step_devices.push_back(&p->device());
if (p->device().is_dynamic())
if (!plib::container::contains(m_dynamic_devices, &p->device()))
m_dynamic_devices.push_back(&p->device());
{
auto *pterm = dynamic_cast<terminal_t *>(p);
add_term(k, pterm);
}
log().debug("Added terminal {1}\n", p->name());
break;
case detail::terminal_type::INPUT:
{
proxied_analog_output_t *net_proxy_output = nullptr;
for (auto & input : m_inps)
if (input->proxied_net() == &p->net())
{
net_proxy_output = input.get();
break;
}
if (net_proxy_output == nullptr)
{
pstring nname = this->name() + "." + pstring(plib::pfmt("m{1}")(m_inps.size()));
nl_assert(p->net().is_analog());
auto net_proxy_output_u = pool().make_poolptr<proxied_analog_output_t>(*this, nname, static_cast<analog_net_t *>(&p->net()));
net_proxy_output = net_proxy_output_u.get();
m_inps.push_back(std::move(net_proxy_output_u));
}
net_proxy_output->net().add_terminal(*p);
// FIXME: repeated calling - kind of brute force
net_proxy_output->net().rebuild_list();
log().debug("Added input\n");
}
break;
case detail::terminal_type::OUTPUT:
log().fatal(MF_1_UNHANDLED_ELEMENT_1_FOUND,
p->name());
break;
}
}
log().debug("added net with {1} populated connections\n", net->core_terms().size());
}
/* now setup the matrix */
setup_matrix();
}
void matrix_solver_t::sort_terms(eSortType sort)
{
/* Sort in descending order by number of connected matrix voltages.
* The idea is, that for Gauss-Seidel algo the first voltage computed
* depends on the greatest number of previous voltages thus taking into
* account the maximum amout of information.
*
* This actually improves performance on popeye slightly. Average
* GS computations reduce from 2.509 to 2.370
*
* Smallest to largest : 2.613
* Unsorted : 2.509
* Largest to smallest : 2.370
*
* Sorting as a general matrix pre-conditioning is mentioned in
* literature but I have found no articles about Gauss Seidel.
*
* For Gaussian Elimination however increasing order is better suited.
* NOTE: Even better would be to sort on elements right of the matrix diagonal.
*
*/
const std::size_t iN = m_nets.size();
switch (sort)
{
case PREFER_BAND_MATRIX:
{
for (std::size_t k = 0; k < iN - 1; k++)
{
auto pk = get_weight_around_diag(k,k);
for (std::size_t i = k+1; i < iN; i++)
{
auto pi = get_weight_around_diag(i,k);
if (pi < pk)
{
std::swap(m_terms[i], m_terms[k]);
std::swap(m_nets[i], m_nets[k]);
pk = get_weight_around_diag(k,k);
}
}
}
}
break;
case PREFER_IDENTITY_TOP_LEFT:
{
for (std::size_t k = 0; k < iN - 1; k++)
{
auto pk = get_left_right_of_diag(k,k);
for (std::size_t i = k+1; i < iN; i++)
{
auto pi = get_left_right_of_diag(i,k);
if (pi.first <= pk.first && pi.second >= pk.second)
{
std::swap(m_terms[i], m_terms[k]);
std::swap(m_nets[i], m_nets[k]);
pk = get_left_right_of_diag(k,k);
}
}
}
}
break;
case ASCENDING:
case DESCENDING:
{
int sort_order = (m_sort == DESCENDING ? 1 : -1);
for (std::size_t k = 0; k < iN - 1; k++)
for (std::size_t i = k+1; i < iN; i++)
{
if ((static_cast<int>(m_terms[k]->m_railstart) - static_cast<int>(m_terms[i]->m_railstart)) * sort_order < 0)
{
std::swap(m_terms[i], m_terms[k]);
std::swap(m_nets[i], m_nets[k]);
}
}
}
break;
case NOSORT:
break;
}
/* rebuild */
for (auto &term : m_terms)
{
int *other = term->connected_net_idx();
for (std::size_t i = 0; i < term->count(); i++)
//FIXME: this is weird
if (other[i] != -1)
other[i] = get_net_idx(&term->terms()[i]->otherterm()->net());
}
}
void matrix_solver_t::setup_matrix()
{
const std::size_t iN = m_nets.size();
for (std::size_t k = 0; k < iN; k++)
{
m_terms[k]->m_railstart = m_terms[k]->count();
for (std::size_t i = 0; i < m_rails_temp[k]->count(); i++)
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->connected_net_idx()[i], false);
m_terms[k]->set_pointers();
}
// free all - no longer needed
m_rails_temp.clear();
sort_terms(m_sort);
/* create a list of non zero elements. */
for (unsigned k = 0; k < iN; k++)
{
terms_for_net_t * t = m_terms[k].get();
/* pretty brutal */
int *other = t->connected_net_idx();
t->m_nz.clear();
for (std::size_t i = 0; i < t->m_railstart; i++)
if (!plib::container::contains(t->m_nz, static_cast<unsigned>(other[i])))
t->m_nz.push_back(static_cast<unsigned>(other[i]));
t->m_nz.push_back(k); // add diagonal
/* and sort */
std::sort(t->m_nz.begin(), t->m_nz.end());
}
/* create a list of non zero elements right of the diagonal
* These list anticipate the population of array elements by
* Gaussian elimination.
*/
for (std::size_t k = 0; k < iN; k++)
{
terms_for_net_t * t = m_terms[k].get();
/* pretty brutal */
int *other = t->connected_net_idx();
if (k==0)
t->m_nzrd.clear();
else
{
t->m_nzrd = m_terms[k-1]->m_nzrd;
for (auto j = t->m_nzrd.begin(); j != t->m_nzrd.end(); )
{
if (*j < k + 1)
j = t->m_nzrd.erase(j);
else
++j;
}
}
for (std::size_t i = 0; i < t->m_railstart; i++)
if (!plib::container::contains(t->m_nzrd, static_cast<unsigned>(other[i])) && other[i] >= static_cast<int>(k + 1))
t->m_nzrd.push_back(static_cast<unsigned>(other[i]));
/* and sort */
std::sort(t->m_nzrd.begin(), t->m_nzrd.end());
}
/* create a list of non zero elements below diagonal k
* This should reduce cache misses ...
*/
std::vector<std::vector<bool>> touched(iN, std::vector<bool>(iN));
for (std::size_t k = 0; k < iN; k++)
{
for (std::size_t j = 0; j < iN; j++)
touched[k][j] = false;
for (std::size_t j = 0; j < m_terms[k]->m_nz.size(); j++)
touched[k][m_terms[k]->m_nz[j]] = true;
}
m_ops = 0;
for (unsigned k = 0; k < iN; k++)
{
m_ops++; // 1/A(k,k)
for (unsigned row = k + 1; row < iN; row++)
{
if (touched[row][k])
{
m_ops++;
if (!plib::container::contains(m_terms[k]->m_nzbd, row))
m_terms[k]->m_nzbd.push_back(row);
for (std::size_t col = k + 1; col < iN; col++)
if (touched[k][col])
{
touched[row][col] = true;
m_ops += 2;
}
}
}
}
log().verbose("Number of mults/adds for {1}: {2}", name(), m_ops);
if ((false))
for (std::size_t k = 0; k < iN; k++)
{
pstring line = plib::pfmt("{1:3}")(k);
for (const auto & nzrd : m_terms[k]->m_nzrd)
line += plib::pfmt(" {1:3}")(nzrd);
log().verbose("{1}", line);
}
/*
* save states
*/
for (std::size_t k = 0; k < iN; k++)
{
pstring num = plib::pfmt("{1}")(k);
state().save(*this, m_terms[k]->m_last_V, this->name(), "lastV." + num);
state().save(*this, m_terms[k]->m_DD_n_m_1, this->name(), "m_DD_n_m_1." + num);
state().save(*this, m_terms[k]->m_h_n_m_1, this->name(), "m_h_n_m_1." + num);
// FIXME: This shouldn't be necessary, recalculate on each entry ...
state().save(*this, m_terms[k]->go(),"GO" + num, this->name(), m_terms[k]->count());
state().save(*this, m_terms[k]->gt(),"GT" + num, this->name(), m_terms[k]->count());
state().save(*this, m_terms[k]->Idr(),"IDR" + num, this->name(), m_terms[k]->count());
}
}
void matrix_solver_t::update_inputs()
{
// avoid recursive calls. Inputs are updated outside this call
for (auto &inp : m_inps)
inp->push(inp->proxied_net()->Q_Analog());
}
void matrix_solver_t::update_dynamic()
{
/* update all non-linear devices */
for (auto &dyn : m_dynamic_devices)
dyn->update_terminals();
}
void matrix_solver_t::reset()
{
m_last_step = netlist_time::zero();
}
void matrix_solver_t::update() NL_NOEXCEPT
{
const netlist_time new_timestep = solve(exec().time());
update_inputs();
if (m_params.m_dynamic_ts && has_timestep_devices() && new_timestep > netlist_time::zero())
{
m_Q_sync.net().toggle_and_push_to_queue(new_timestep);
}
}
/* update_forced is called from within param_update
*
* this should only occur outside of execution and thus
* using time should be safe.
*
*/
void matrix_solver_t::update_forced()
{
const netlist_time new_timestep = solve(exec().time());
plib::unused_var(new_timestep);
update_inputs();
if (m_params.m_dynamic_ts && has_timestep_devices())
{
m_Q_sync.net().toggle_and_push_to_queue(netlist_time::from_double(m_params.m_min_timestep));
}
}
void matrix_solver_t::step(const netlist_time &delta)
{
const nl_double dd = delta.as_double();
for (auto &d : m_step_devices)
d->timestep(dd);
}
void matrix_solver_t::solve_base()
{
++m_stat_vsolver_calls;
if (has_dynamic_devices())
{
std::size_t this_resched;
std::size_t newton_loops = 0;
do
{
update_dynamic();
// Gauss-Seidel will revert to Gaussian elemination if steps exceeded.
this_resched = this->vsolve_non_dynamic(true);
newton_loops++;
} while (this_resched > 1 && newton_loops < m_params.m_nr_loops);
m_stat_newton_raphson += newton_loops;
// reschedule ....
if (this_resched > 1 && !m_Q_sync.net().is_queued())
{
log().warning(MW_1_NEWTON_LOOPS_EXCEEDED_ON_NET_1, this->name());
m_Q_sync.net().toggle_and_push_to_queue(m_params.m_nr_recalc_delay);
}
}
else
{
this->vsolve_non_dynamic(false);
}
}
const netlist_time matrix_solver_t::solve(netlist_time now)
{
const netlist_time delta = now - m_last_step;
// We are already up to date. Avoid oscillations.
// FIXME: Make this a parameter!
if (delta < netlist_time::quantum())
return netlist_time::zero();
/* update all terminals for new time step */
m_last_step = now;
step(delta);
solve_base();
const netlist_time next_time_step = compute_next_timestep(delta.as_double());
return next_time_step;
}
int matrix_solver_t::get_net_idx(detail::net_t *net)
{
for (std::size_t k = 0; k < m_nets.size(); k++)
if (m_nets[k] == net)
return static_cast<int>(k);
return -1;
}
std::pair<int, int> matrix_solver_t::get_left_right_of_diag(std::size_t irow, std::size_t idiag)
{
/*
* return the maximum column left of the diagonal (-1 if no cols found)
* return the minimum column right of the diagonal (999999 if no cols found)
*/
const auto row = static_cast<int>(irow);
const auto diag = static_cast<int>(idiag);
int colmax = -1;
int colmin = 999999;
auto &term = m_terms[irow];
for (std::size_t i = 0; i < term->count(); i++)
{
auto col = get_net_idx(&term->terms()[i]->otherterm()->net());
if (col != -1)
{
if (col==row) col = diag;
else if (col==diag) col = row;
if (col > diag && col < colmin)
colmin = col;
else if (col < diag && col > colmax)
colmax = col;
}
}
return {colmax, colmin};
}
double matrix_solver_t::get_weight_around_diag(std::size_t row, std::size_t diag)
{
{
/*
* return average absolute distance
*/
std::vector<bool> touched(1024, false); // FIXME!
double weight = 0.0;
auto &term = m_terms[row];
for (std::size_t i = 0; i < term->count(); i++)
{
auto col = get_net_idx(&term->terms()[i]->otherterm()->net());
if (col >= 0)
{
auto colu = static_cast<std::size_t>(col);
if (!touched[colu])
{
if (colu==row) colu = static_cast<unsigned>(diag);
else if (colu==diag) colu = static_cast<unsigned>(row);
weight = weight + std::abs(static_cast<double>(colu) - static_cast<double>(diag));
touched[colu] = true;
}
}
}
return weight; // / static_cast<double>(term->m_railstart);
}
}
void matrix_solver_t::add_term(std::size_t k, terminal_t *term)
{
if (term->otherterm()->net().isRailNet())
{
m_rails_temp[k]->add(term, -1, false);
}
else
{
int ot = get_net_idx(&term->otherterm()->net());
if (ot>=0)
{
m_terms[k]->add(term, ot, true);
}
/* Should this be allowed ? */
else // if (ot<0)
{
m_rails_temp[k]->add(term, ot, true);
log().fatal(MF_1_FOUND_TERM_WITH_MISSING_OTHERNET, term->name());
}
}
}
netlist_time matrix_solver_t::compute_next_timestep(const double cur_ts)
{
nl_double new_solver_timestep = m_params.m_max_timestep;
if (m_params.m_dynamic_ts)
{
for (std::size_t k = 0, iN=m_terms.size(); k < iN; k++)
{
analog_net_t *n = m_nets[k];
terms_for_net_t *t = m_terms[k].get();
const nl_double DD_n = (n->Q_Analog() - t->m_last_V);
const nl_double hn = cur_ts;
nl_double DD2 = (DD_n / hn - t->m_DD_n_m_1 / t->m_h_n_m_1) / (hn + t->m_h_n_m_1);
nl_double new_net_timestep;
t->m_h_n_m_1 = hn;
t->m_DD_n_m_1 = DD_n;
if (std::fabs(DD2) > plib::constants<nl_double>::cast(1e-60)) // avoid div-by-zero
new_net_timestep = std::sqrt(m_params.m_dynamic_lte / std::fabs(plib::constants<nl_double>::cast(0.5)*DD2));
else
new_net_timestep = m_params.m_max_timestep;
if (new_net_timestep < new_solver_timestep)
new_solver_timestep = new_net_timestep;
t->m_last_V = n->Q_Analog();
}
if (new_solver_timestep < m_params.m_min_timestep)
{
//log().warning("Dynamic timestep below min timestep. Consider decreasing MIN_TIMESTEP: {1} us", new_solver_timestep*1.0e6);
new_solver_timestep = m_params.m_min_timestep;
}
}
//if (new_solver_timestep > 10.0 * hn)
// new_solver_timestep = 10.0 * hn;
/*
* FIXME: Factor 2 below is important. Without, we get timing issues. This must be a bug elsewhere.
*/
return std::max(netlist_time::from_double(new_solver_timestep), netlist_time::quantum() * 2);
}
void matrix_solver_t::log_stats()
{
if (this->m_stat_calculations != 0 && this->m_stat_vsolver_calls && this->m_params.m_log_stats)
{
log().verbose("==============================================");
log().verbose("Solver {1}", this->name());
log().verbose(" ==> {1} nets", this->m_nets.size()); //, (*(*groups[i].first())->m_core_terms.first())->name());
log().verbose(" has {1} elements", this->has_dynamic_devices() ? "dynamic" : "no dynamic");
log().verbose(" has {1} elements", this->has_timestep_devices() ? "timestep" : "no timestep");
log().verbose(" {1:6.3} average newton raphson loops",
static_cast<double>(this->m_stat_newton_raphson) / static_cast<double>(this->m_stat_vsolver_calls));
log().verbose(" {1:10} invocations ({2:6.0} Hz) {3:10} gs fails ({4:6.2} %) {5:6.3} average",
this->m_stat_calculations,
static_cast<double>(this->m_stat_calculations) / this->exec().time().as_double(),
this->m_iterative_fail,
100.0 * static_cast<double>(this->m_iterative_fail)
/ static_cast<double>(this->m_stat_calculations),
static_cast<double>(this->m_iterative_total) / static_cast<double>(this->m_stat_calculations));
}
}
} // namespace devices
} // namespace netlist