From f981eb7118bbdbdb49350352f88ed42e6c747d07 Mon Sep 17 00:00:00 2001 From: Couriersud Date: Thu, 19 Jun 2014 11:44:56 +0000 Subject: [PATCH] Moved solver templates into separate header files. --- .gitattributes | 4 + src/emu/netlist/analog/nld_bjt.h | 31 +- src/emu/netlist/analog/nld_ms_direct.h | 464 +++++++++++ src/emu/netlist/analog/nld_ms_direct1.h | 60 ++ src/emu/netlist/analog/nld_ms_direct2.h | 66 ++ src/emu/netlist/analog/nld_ms_gauss_seidel.h | 352 ++++++++ src/emu/netlist/analog/nld_solver.c | 826 +------------------ src/emu/netlist/analog/nld_solver.h | 125 +-- src/emu/netlist/analog/nld_twoterm.h | 6 +- 9 files changed, 1013 insertions(+), 921 deletions(-) create mode 100644 src/emu/netlist/analog/nld_ms_direct.h create mode 100644 src/emu/netlist/analog/nld_ms_direct1.h create mode 100644 src/emu/netlist/analog/nld_ms_direct2.h create mode 100644 src/emu/netlist/analog/nld_ms_gauss_seidel.h diff --git a/.gitattributes b/.gitattributes index dc5eaf41560..e204c61de48 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2925,6 +2925,10 @@ src/emu/netlist/analog/nld_bjt.c svneol=native#text/plain src/emu/netlist/analog/nld_bjt.h svneol=native#text/plain src/emu/netlist/analog/nld_fourterm.c svneol=native#text/plain src/emu/netlist/analog/nld_fourterm.h svneol=native#text/plain +src/emu/netlist/analog/nld_ms_direct.h svneol=native#text/plain +src/emu/netlist/analog/nld_ms_direct1.h svneol=native#text/plain +src/emu/netlist/analog/nld_ms_direct2.h svneol=native#text/plain +src/emu/netlist/analog/nld_ms_gauss_seidel.h svneol=native#text/plain src/emu/netlist/analog/nld_solver.c svneol=native#text/plain src/emu/netlist/analog/nld_solver.h svneol=native#text/plain src/emu/netlist/analog/nld_switches.c svneol=native#text/plain diff --git a/src/emu/netlist/analog/nld_bjt.h b/src/emu/netlist/analog/nld_bjt.h index 24948057faa..082f70d9b17 100644 --- a/src/emu/netlist/analog/nld_bjt.h +++ b/src/emu/netlist/analog/nld_bjt.h @@ -98,11 +98,12 @@ public: NETLIB_UPDATE_TERMINALS() { - double m = (is_qtype( BJT_NPN) ? 1 : -1); + const double m = (is_qtype( BJT_NPN) ? 1 : -1); - int new_state = (m_RB.deltaV() * m > m_V ) ? 1 : 0; + const int new_state = (m_RB.deltaV() * m > m_V ) ? 1 : 0; if (m_state_on ^ new_state) { +#if 0 double gb = m_gB; double gc = m_gC; double v = m_V * m; @@ -113,6 +114,11 @@ public: v = 0; gc = netlist().gmin(); } +#else + const double gb = new_state ? m_gB : netlist().gmin(); + const double gc = new_state ? m_gC : netlist().gmin(); + const double v = new_state ? m_V * m : 0; +#endif m_RB.set(gb, v, 0.0); m_RC.set(gc, 0.0, 0.0); //m_RB.update_dev(); @@ -163,22 +169,19 @@ public: NETLIB_UPDATE_TERMINALS() { - double polarity = (qtype() == BJT_NPN ? 1.0 : -1.0); + const double polarity = (qtype() == BJT_NPN ? 1.0 : -1.0); m_gD_BE.update_diode(-m_D_EB.deltaV() * polarity); m_gD_BC.update_diode(-m_D_CB.deltaV() * polarity); - double gee = m_gD_BE.G(); - double gcc = m_gD_BC.G(); - double gec = m_alpha_r * gcc; - double gce = m_alpha_f * gee; - double sIe = -m_gD_BE.I() + m_alpha_r * m_gD_BC.I(); - double sIc = m_alpha_f * m_gD_BE.I() - m_gD_BC.I(); - double Ie = (sIe + gee * m_gD_BE.Vd() - gec * m_gD_BC.Vd()) * polarity; - double Ic = (sIc - gce * m_gD_BE.Vd() + gcc * m_gD_BC.Vd()) * polarity; - //double Ie = sIe + gee * -m_D_EB.deltaV() - gec * -m_D_CB.deltaV(); - //double Ic = sIc - gce * -m_D_EB.deltaV() + gcc * -m_D_CB.deltaV(); - //printf("EB %f sIe %f sIc %f\n", m_D_BE.deltaV(), sIe, sIc); + const double gee = m_gD_BE.G(); + const double gcc = m_gD_BC.G(); + const double gec = m_alpha_r * gcc; + const double gce = m_alpha_f * gee; + const double sIe = -m_gD_BE.I() + m_alpha_r * m_gD_BC.I(); + const double sIc = m_alpha_f * m_gD_BE.I() - m_gD_BC.I(); + const double Ie = (sIe + gee * m_gD_BE.Vd() - gec * m_gD_BC.Vd()) * polarity; + const double Ic = (sIc - gce * m_gD_BE.Vd() + gcc * m_gD_BC.Vd()) * polarity; m_D_EB.set_mat(gee, gec - gee, gce - gee, gee - gec, Ie, -Ie); m_D_CB.set_mat(gcc, gce - gcc, gec - gcc, gcc - gce, Ic, -Ic); diff --git a/src/emu/netlist/analog/nld_ms_direct.h b/src/emu/netlist/analog/nld_ms_direct.h new file mode 100644 index 00000000000..1d1089f054c --- /dev/null +++ b/src/emu/netlist/analog/nld_ms_direct.h @@ -0,0 +1,464 @@ +/* + * nld_ms_direct.h + * + */ + +#ifndef NLD_MS_DIRECT_H_ +#define NLD_MS_DIRECT_H_ + +#include "nld_solver.h" + +template +class netlist_matrix_solver_direct_t: public netlist_matrix_solver_t +{ +public: + + netlist_matrix_solver_direct_t(const netlist_solver_parameters_t ¶ms, int size); + + virtual ~netlist_matrix_solver_direct_t(); + + ATTR_COLD virtual void vsetup(netlist_analog_net_t::list_t &nets); + ATTR_COLD virtual void reset() { netlist_matrix_solver_t::reset(); } + + ATTR_HOT inline const int N() const { if (m_N == 0) return m_dim; else return m_N; } + + ATTR_HOT inline int vsolve_non_dynamic(); + +protected: + ATTR_COLD virtual void add_term(int net_idx, netlist_terminal_t *term); + + ATTR_HOT virtual double vsolve(); + + ATTR_HOT int solve_non_dynamic(); + ATTR_HOT void build_LE(); + ATTR_HOT void gauss_LE(double (* RESTRICT x)); + ATTR_HOT double delta(const double (* RESTRICT V)); + ATTR_HOT void store(const double (* RESTRICT V), const bool store_RHS); + + /* bring the whole system to the current time + * Don't schedule a new calculation time. The recalculation has to be + * triggered by the caller after the netlist element was changed. + */ + ATTR_HOT double compute_next_timestep(); + + double m_A[_storage_N][((_storage_N + 7) / 8) * 8]; + double m_RHS[_storage_N]; + double m_last_RHS[_storage_N]; // right hand side - contains currents + double m_Vdelta[_storage_N]; + double m_last_V[_storage_N]; + + terms_t **m_terms; + terms_t *m_rails_temp; + +private: + vector_ops_t *m_row_ops[_storage_N + 1]; + + int m_dim; + double m_lp_fact; +}; + +// ---------------------------------------------------------------------------------------- +// netlist_matrix_solver_direct +// ---------------------------------------------------------------------------------------- + +template +netlist_matrix_solver_direct_t::~netlist_matrix_solver_direct_t() +{ + for (int k=0; k<_storage_N; k++) + { + //delete[] m_A[k]; + } + //delete[] m_last_RHS; + //delete[] m_RHS; + delete[] m_terms; + delete[] m_rails_temp; + //delete[] m_row_ops; + +} + +template +ATTR_HOT double netlist_matrix_solver_direct_t::compute_next_timestep() +{ + double new_solver_timestep = m_params.m_max_timestep; + + if (m_params.m_dynamic) + { + /* + * FIXME: We should extend the logic to use either all nets or + * only output nets. + */ +#if 0 + for (netlist_analog_output_t * const *p = m_inps.first(); p != NULL; p = m_inps.next(p)) + { + netlist_analog_net_t *n = (*p)->m_proxied_net; +#else + for (int k = 0; k < N(); k++) + { + netlist_analog_net_t *n = m_nets[k]; +#endif + const double DD_n = (n->m_cur_Analog - m_last_V[k]); + const double hn = current_timestep(); + + double DD2 = (DD_n / hn - n->m_DD_n_m_1 / n->m_h_n_m_1) / (hn + n->m_h_n_m_1); + double new_net_timestep; + + n->m_h_n_m_1 = hn; + n->m_DD_n_m_1 = DD_n; + if (fabs(DD2) > 1e-50) // avoid div-by-zero + new_net_timestep = sqrt(m_params.m_lte / fabs(0.5*DD2)); + else + new_net_timestep = m_params.m_max_timestep; + + if (new_net_timestep < new_solver_timestep) + new_solver_timestep = new_net_timestep; + } + if (new_solver_timestep < m_params.m_min_timestep) + new_solver_timestep = m_params.m_min_timestep; + } + //if (new_solver_timestep > 10.0 * hn) + // new_solver_timestep = 10.0 * hn; + return new_solver_timestep; +} + +template +ATTR_COLD void netlist_matrix_solver_direct_t::add_term(int k, netlist_terminal_t *term) +{ + if (term->m_otherterm->net().isRailNet()) + { + m_rails_temp[k].add(term, -1); + } + else + { + int ot = get_net_idx(&term->m_otherterm->net()); + if (ot>=0) + { + m_terms[k]->add(term, ot); + SOLVER_VERBOSE_OUT(("Net %d Term %s %f %f\n", k, terms[i]->name().cstr(), terms[i]->m_gt, terms[i]->m_go)); + } + /* Should this be allowed ? */ + else // if (ot<0) + { + m_rails_temp[k].add(term, ot); + netlist().error("found term with missing othernet %s\n", term->name().cstr()); + } + } +} + + +template +ATTR_COLD void netlist_matrix_solver_direct_t::vsetup(netlist_analog_net_t::list_t &nets) +{ + + if (m_dim < nets.count()) + netlist().error("Dimension %d less than %d", m_dim, nets.count()); + + for (int k = 0; k < N(); k++) + { + m_terms[k]->clear(); + m_rails_temp[k].clear(); + } + + netlist_matrix_solver_t::setup(nets); + + for (int k = 0; k < N(); k++) + { + m_terms[k]->m_railstart = m_terms[k]->count(); + for (int i = 0; i < m_rails_temp[k].count(); i++) + this->m_terms[k]->add(m_rails_temp[k].terms()[i], m_rails_temp[k].net_other()[i]); + + m_rails_temp[k].clear(); // no longer needed + m_terms[k]->set_pointers(); + } + +#if 1 + + /* Sort in descending order by number of connected matrix voltages. + * The idea is, that for Gauss-Seidel algo the first voltage computed + * depends on the greatest number of previous voltages thus taking into + * account the maximum amout of information. + * + * This actually improves performance on popeye slightly. Average + * GS computations reduce from 2.509 to 2.370 + * + * Smallest to largest : 2.613 + * Unsorted : 2.509 + * Largest to smallest : 2.370 + * + * Sorting as a general matrix pre-conditioning is mentioned in + * literature but I have found no articles about Gauss Seidel. + * + */ + + + for (int k = 0; k < N() / 2; k++) + for (int i = 0; i < N() - 1; i++) + { + if (m_terms[i]->m_railstart < m_terms[i+1]->m_railstart) + { + std::swap(m_terms[i],m_terms[i+1]); + m_nets.swap(i, i+1); + } + } + + for (int k = 0; k < N(); k++) + { + int *other = m_terms[k]->net_other(); + for (int i = 0; i < m_terms[k]->count(); i++) + if (other[i] != -1) + other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net()); + } + +#endif + +} + +template +ATTR_HOT void netlist_matrix_solver_direct_t::build_LE() +{ +#if 0 + for (int k=0; k < N(); k++) + for (int i=0; i < N(); i++) + m_A[k][i] = 0.0; +#endif + + for (int k = 0; k < N(); k++) + { + for (int i=0; i < N(); i++) + m_A[k][i] = 0.0; + + double rhsk = 0.0; + double akk = 0.0; + { + const int terms_count = m_terms[k]->count(); + const double * RESTRICT gt = m_terms[k]->gt(); + const double * RESTRICT go = m_terms[k]->go(); + const double * RESTRICT Idr = m_terms[k]->Idr(); +#if VECTALT + + for (int i = 0; i < terms_count; i++) + { + rhsk = rhsk + Idr[i]; + akk = akk + gt[i]; + } +#else + m_terms[k]->ops()->sum2(Idr, gt, rhsk, akk); +#endif + double * const * RESTRICT other_cur_analog = m_terms[k]->other_curanalog(); + for (int i = m_terms[k]->m_railstart; i < terms_count; i++) + { + //rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog(); + rhsk = rhsk + go[i] * *other_cur_analog[i]; + } + } +#if 0 + /* + * Matrix preconditioning with 1.0 / Akk + * + * will save a number of calculations during elimination + * + */ + akk = 1.0 / akk; + m_RHS[k] = rhsk * akk; + m_A[k][k] += 1.0; + { + const int *net_other = m_terms[k]->net_other(); + const double *go = m_terms[k]->go(); + const int railstart = m_terms[k]->m_railstart; + + for (int i = 0; i < railstart; i++) + { + m_A[k][net_other[i]] += -go[i] * akk; + } + } +#else + m_RHS[k] = rhsk; + m_A[k][k] += akk; + { + const int * RESTRICT net_other = m_terms[k]->net_other(); + const double * RESTRICT go = m_terms[k]->go(); + const int railstart = m_terms[k]->m_railstart; + + for (int i = 0; i < railstart; i++) + { + m_A[k][net_other[i]] += -go[i]; + } + } +#endif + } +} + +template +ATTR_HOT void netlist_matrix_solver_direct_t::gauss_LE( + double (* RESTRICT x)) +{ +#if 0 + for (int i = 0; i < N(); i++) + { + for (int k = 0; k < N(); k++) + printf("%f ", m_A[i][k]); + printf("| %f = %f \n", x[i], m_RHS[i]); + } + printf("\n"); +#endif + + const int kN = N(); + + for (int i = 0; i < kN; i++) { + // FIXME: use a parameter to enable pivoting? + if (USE_PIVOT_SEARCH) + { + /* Find the row with the largest first value */ + int maxrow = i; + for (int j = i + 1; j < kN; j++) + { + if (fabs(m_A[j][i]) > fabs(m_A[maxrow][i])) + maxrow = j; + } + + if (maxrow != i) + { + /* Swap the maxrow and ith row */ + for (int k = i; k < kN; k++) { + std::swap(m_A[i][k], m_A[maxrow][k]); + } + std::swap(m_RHS[i], m_RHS[maxrow]); + } + } + + /* FIXME: Singular matrix? */ + const double f = 1.0 / m_A[i][i]; + + /* Eliminate column i from row j */ + + for (int j = i + 1; j < kN; j++) + { + const double f1 = - m_A[j][i] * f; + if (f1 != 0.0) + { +#if 0 && VECTALT + for (int k = i + 1; k < kN; k++) + m_A[j][k] += m_A[i][k] * f1; +#else + // addmult gives some performance increase here... + m_row_ops[kN - (i + 1)]->addmult(&m_A[j][i+1], &m_A[i][i+1], f1) ; +#endif + m_RHS[j] += m_RHS[i] * f1; + } + } + } + /* back substitution */ + for (int j = kN - 1; j >= 0; j--) + { + double tmp = 0; + + for (int k = j + 1; k < kN; k++) + tmp += m_A[j][k] * x[k]; + + x[j] = (m_RHS[j] - tmp) / m_A[j][j]; + } +#if 0 + printf("Solution:\n"); + for (int i = 0; i < N(); i++) + { + for (int k = 0; k < N(); k++) + printf("%f ", m_A[i][k]); + printf("| %f = %f \n", x[i], m_RHS[i]); + } + printf("\n"); +#endif + +} + +template +ATTR_HOT double netlist_matrix_solver_direct_t::delta( + const double (* RESTRICT V)) +{ + double cerr = 0; + double cerr2 = 0; + for (int i = 0; i < this->N(); i++) + { + const double e = (V[i] - this->m_nets[i]->m_cur_Analog); + const double e2 = (m_RHS[i] - this->m_last_RHS[i]); + cerr = (fabs(e) > cerr ? fabs(e) : cerr); + cerr2 = (fabs(e2) > cerr2 ? fabs(e2) : cerr2); + } + // FIXME: Review + return cerr + cerr2*100000.0; +} + +template +ATTR_HOT void netlist_matrix_solver_direct_t::store( + const double (* RESTRICT V), const bool store_RHS) +{ + for (int i = 0; i < this->N(); i++) + { + this->m_nets[i]->m_cur_Analog = V[i]; + } + if (store_RHS) + { + for (int i = 0; i < this->N(); i++) + { + this->m_last_RHS[i] = m_RHS[i]; + } + } +} + +template +ATTR_HOT double netlist_matrix_solver_direct_t::vsolve() +{ + solve_base(this); + return this->compute_next_timestep(); +} + + +template +ATTR_HOT int netlist_matrix_solver_direct_t::solve_non_dynamic() +{ + double new_v[_storage_N] = { 0.0 }; + + this->gauss_LE(new_v); + + if (this->is_dynamic()) + { + double err = delta(new_v); + + store(new_v, true); + + if (err > this->m_params.m_accuracy) + { + return 2; + } + return 1; + } + store(new_v, false); // ==> No need to store RHS + return 1; +} + +template +ATTR_HOT inline int netlist_matrix_solver_direct_t::vsolve_non_dynamic() +{ + this->build_LE(); + + return this->solve_non_dynamic(); +} + +template +netlist_matrix_solver_direct_t::netlist_matrix_solver_direct_t(const netlist_solver_parameters_t ¶ms, int size) +: netlist_matrix_solver_t(params) +, m_dim(size) +, m_lp_fact(0) +{ + m_terms = new terms_t *[N()]; + m_rails_temp = new terms_t[N()]; + + for (int k = 0; k < N(); k++) + { + m_terms[k] = new terms_t; + m_row_ops[k] = vector_ops_t::create_ops(k); + } + m_row_ops[N()] = vector_ops_t::create_ops(N()); +} + + + +#endif /* NLD_MS_DIRECT_H_ */ diff --git a/src/emu/netlist/analog/nld_ms_direct1.h b/src/emu/netlist/analog/nld_ms_direct1.h new file mode 100644 index 00000000000..f562ea93dff --- /dev/null +++ b/src/emu/netlist/analog/nld_ms_direct1.h @@ -0,0 +1,60 @@ +/* + * nld_ms_direct1.h + * + */ + +#ifndef NLD_MS_DIRECT1_H_ +#define NLD_MS_DIRECT1_H_ + +#include "nld_solver.h" +#include "nld_ms_direct.h" + +class ATTR_ALIGNED(64) netlist_matrix_solver_direct1_t: public netlist_matrix_solver_direct_t<1,1> +{ +public: + + netlist_matrix_solver_direct1_t(const netlist_solver_parameters_t ¶ms) + : netlist_matrix_solver_direct_t<1, 1>(params, 1) + {} + ATTR_HOT inline int vsolve_non_dynamic(); +protected: + ATTR_HOT virtual double vsolve(); +private: +}; + +// ---------------------------------------------------------------------------------------- +// netlist_matrix_solver - Direct1 +// ---------------------------------------------------------------------------------------- + +ATTR_HOT double netlist_matrix_solver_direct1_t::vsolve() +{ + solve_base(this); + return this->compute_next_timestep(); +} + +ATTR_HOT inline int netlist_matrix_solver_direct1_t::vsolve_non_dynamic() +{ + + netlist_analog_net_t *net = m_nets[0]; + this->build_LE(); + //NL_VERBOSE_OUT(("%f %f\n", new_val, m_RHS[0] / m_A[0][0]); + + double new_val = m_RHS[0] / m_A[0][0]; + + double e = (new_val - net->m_cur_Analog); + double cerr = fabs(e); + + net->m_cur_Analog = new_val; + + if (is_dynamic() && (cerr > m_params.m_accuracy)) + { + return 2; + } + else + return 1; + +} + + + +#endif /* NLD_MS_DIRECT1_H_ */ diff --git a/src/emu/netlist/analog/nld_ms_direct2.h b/src/emu/netlist/analog/nld_ms_direct2.h new file mode 100644 index 00000000000..03bfc2b7429 --- /dev/null +++ b/src/emu/netlist/analog/nld_ms_direct2.h @@ -0,0 +1,66 @@ +/* + * nld_ms_direct1.h + * + */ + +#ifndef NLD_MS_DIRECT2_H_ +#define NLD_MS_DIRECT2_H_ + +#include "nld_solver.h" +#include "nld_ms_direct.h" + + + +class ATTR_ALIGNED(64) netlist_matrix_solver_direct2_t: public netlist_matrix_solver_direct_t<2,2> +{ +public: + + netlist_matrix_solver_direct2_t(const netlist_solver_parameters_t ¶ms) + : netlist_matrix_solver_direct_t<2, 2>(params, 2) + {} + ATTR_HOT inline int vsolve_non_dynamic(); +protected: + ATTR_HOT virtual double vsolve(); +private: +}; + +// ---------------------------------------------------------------------------------------- +// netlist_matrix_solver - Direct2 +// ---------------------------------------------------------------------------------------- + +ATTR_HOT double netlist_matrix_solver_direct2_t::vsolve() +{ + solve_base(this); + return this->compute_next_timestep(); +} + +ATTR_HOT inline int netlist_matrix_solver_direct2_t::vsolve_non_dynamic() +{ + + build_LE(); + + const double a = m_A[0][0]; + const double b = m_A[0][1]; + const double c = m_A[1][0]; + const double d = m_A[1][1]; + + double new_val[2]; + new_val[1] = (a * m_RHS[1] - c * m_RHS[0]) / (a * d - b * c); + new_val[0] = (m_RHS[0] - b * new_val[1]) / a; + + if (is_dynamic()) + { + double err = this->delta(new_val); + store(new_val, true); + if (err > m_params.m_accuracy ) + return 2; + else + return 1; + } + store(new_val, false); + return 1; +} + + + +#endif /* NLD_MS_DIRECT2_H_ */ diff --git a/src/emu/netlist/analog/nld_ms_gauss_seidel.h b/src/emu/netlist/analog/nld_ms_gauss_seidel.h new file mode 100644 index 00000000000..fb066c7b0a1 --- /dev/null +++ b/src/emu/netlist/analog/nld_ms_gauss_seidel.h @@ -0,0 +1,352 @@ +/* + * nld_ms_direct1.h + * + */ + +#ifndef NLD_MS_GAUSS_SEIDEL_H_ +#define NLD_MS_GAUSS_SEIDEL_H_ + +#include + +#include "nld_solver.h" +#include "nld_ms_direct.h" + +template +class ATTR_ALIGNED(64) netlist_matrix_solver_gauss_seidel_t: public netlist_matrix_solver_direct_t +{ +public: + + netlist_matrix_solver_gauss_seidel_t(const netlist_solver_parameters_t ¶ms, int size) + : netlist_matrix_solver_direct_t(params, size) + , m_lp_fact(0) + , m_gs_fail(0) + , m_gs_total(0) + {} + + virtual ~netlist_matrix_solver_gauss_seidel_t() {} + + ATTR_COLD virtual void log_stats(); + + ATTR_HOT inline int vsolve_non_dynamic(); +protected: + ATTR_HOT virtual double vsolve(); + +private: + double m_lp_fact; + int m_gs_fail; + int m_gs_total; + +}; + +// ---------------------------------------------------------------------------------------- +// netlist_matrix_solver - Gauss - Seidel +// ---------------------------------------------------------------------------------------- + +template +void netlist_matrix_solver_gauss_seidel_t::log_stats() +{ +#if 1 + printf("==============================================\n"); + printf("Solver %s\n", this->name().cstr()); + printf(" ==> %d nets\n", this->N()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr()); + printf(" has %s elements\n", this->is_dynamic() ? "dynamic" : "no dynamic"); + printf(" has %s elements\n", this->is_timestep() ? "timestep" : "no timestep"); + printf(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average\n", + this->m_calculations, + this->m_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0), + this->m_gs_fail, + 100.0 * (double) this->m_gs_fail / (double) this->m_calculations, + (double) this->m_gs_total / (double) this->m_calculations); +#endif +} + +template +ATTR_HOT double netlist_matrix_solver_gauss_seidel_t::vsolve() +{ + /* + * enable linear prediction on first newton pass + */ + + if (USE_LINEAR_PREDICTION) + for (int k = 0; k < this->N(); k++) + { + this->m_last_V[k] = this->m_nets[k]->m_cur_Analog; + this->m_nets[k]->m_cur_Analog = this->m_nets[k]->m_cur_Analog + this->m_Vdelta[k] * this->current_timestep() * m_lp_fact; + } + else + for (int k = 0; k < this->N(); k++) + { + this->m_last_V[k] = this->m_nets[k]->m_cur_Analog; + } + + this->solve_base(this); + + if (USE_LINEAR_PREDICTION) + { + double sq = 0; + double sqo = 0; + const double rez_cts = 1.0 / this->current_timestep(); + for (int k = 0; k < this->N(); k++) + { + const netlist_analog_net_t *n = this->m_nets[k]; + const double nv = (n->m_cur_Analog - this->m_last_V[k]) * rez_cts ; + sq += nv * nv; + sqo += this->m_Vdelta[k] * this->m_Vdelta[k]; + this->m_Vdelta[k] = nv; + } + if (sqo > 1e-90) + m_lp_fact = std::min(sqrt(sq/sqo), 2.0); + else + m_lp_fact = 0.0; + } + + + return this->compute_next_timestep(); +} + +template +ATTR_HOT inline int netlist_matrix_solver_gauss_seidel_t::vsolve_non_dynamic() +{ + /* The matrix based code looks a lot nicer but actually is 30% slower than + * the optimized code which works directly on the data structures. + * Need something like that for gaussian elimination as well. + */ + +#if 0 || USE_MATRIX_GS + static double ws = 1.0; + ATTR_ALIGN double new_v[_storage_N] = { 0.0 }; + const int iN = this->N(); + + bool resched = false; + + int resched_cnt = 0; + + this->build_LE(); + + { + double frob; + frob = 0; + double rmin = 1e99, rmax = -1e99; + for (int k = 0; k < iN; k++) + { + new_v[k] = this->m_nets[k]->m_cur_Analog; + double s=0.0; + for (int i = 0; i < iN; i++) + { + frob += this->m_A[k][i] * this->m_A[k][i]; + s = s + fabs(this->m_A[k][i]); + } + + if (srmax) + rmax = s; + } + //if (fabs(rmax) > 0.01) + // printf("rmin %f rmax %f\n", rmin, rmax); +#if 0 + double frobA = sqrt(frob /(iN)); + if (1 &&frobA < 1.0) + //ws = 2.0 / (1.0 + sqrt(1.0-frobA)); + ws = 2.0 / (2.0 - frobA); + else + ws = 1.0; + ws = 0.9; +#else + // calculate an estimate for rho. + // This is based on the Perron–Frobenius theorem for positive matrices. + // No mathematical proof here. The following estimates the + // optimal relaxation parameter pretty well. Unfortunately, the + // overhead is bigger than the gain. Consequently the fast GS below + // uses a fixed GS. One can however use this here to determine a + // suitable parameter. + double rm = (rmax + rmin) * 0.5; + if (rm < 1.0) + ws = 2.0 / (1.0 + sqrt(1.0-rm)); + else + ws = 1.0; +#endif + } + + // Frobenius norm for (D-L)^(-1)U + //double frobU; + //double frobL; + //double norm; + do { + resched = false; + double cerr = 0.0; + //frobU = 0; + //frobL = 0; + //norm = 0; + + for (int k = 0; k < iN; k++) + { + double Idrive = 0; + //double norm_t = 0; + // Reduction loops need -ffast-math + for (int i = 0; i < iN; i++) + Idrive += this->m_A[k][i] * new_v[i]; + + for (int i = 0; i < iN; i++) + { + //if (i < k) frobL += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] /this-> m_A[k][k]; + //if (i > k) frobU += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] / this->m_A[k][k]; + //norm_t += fabs(this->m_A[k][i]); + } + + //if (norm_t > norm) norm = norm_t; + const double new_val = (1.0-ws) * new_v[k] + ws * (this->m_RHS[k] - Idrive + this->m_A[k][k] * new_v[k]) / this->m_A[k][k]; + + const double e = fabs(new_val - new_v[k]); + cerr = (e > cerr ? e : cerr); + new_v[k] = new_val; + } + + if (cerr > this->m_params.m_accuracy) + { + resched = true; + } + resched_cnt++; + //ATTR_UNUSED double frobUL = sqrt((frobU + frobL) / (double) (iN) / (double) (iN)); + } while (resched && (resched_cnt < this->m_params.m_gs_loops)); + //printf("Frobenius %f %f %f %f %f\n", sqrt(frobU), sqrt(frobL), frobUL, frobA, norm); + //printf("Omega Estimate1 %f %f\n", 2.0 / (1.0 + sqrt(1-frobUL)), 2.0 / (1.0 + sqrt(1-frobA)) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) )); + //printf("Omega Estimate2 %f %f\n", 2.0 / (2.0 - frobUL), 2.0 / (2.0 - frobA) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) )); + + + this->store(new_v, false); + + this->m_gs_total += resched_cnt; + if (resched) + { + //this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS"); + this->m_gs_fail++; + int tmp = netlist_matrix_solver_direct_t::solve_non_dynamic(); + this->m_calculations++; + return tmp; + } + else { + this->m_calculations++; + + return resched_cnt; + } + +#else + const int iN = this->N(); + bool resched = false; + int resched_cnt = 0; + + /* ideally, we could get an estimate for the spectral radius of + * Inv(D - L) * U + * + * and estimate using + * + * omega = 2.0 / (1.0 + sqrt(1-rho)) + */ + + const double ws = this->m_params.m_sor; //1.045; //2.0 / (1.0 + /*sin*/(3.14159 * 5.5 / (double) (m_nets.count()+1))); + //const double ws = 2.0 / (1.0 + sin(3.14159 * 4 / (double) (this->N()))); + + ATTR_ALIGN double w[_storage_N]; + ATTR_ALIGN double one_m_w[_storage_N]; + ATTR_ALIGN double RHS[_storage_N]; + ATTR_ALIGN double new_V[_storage_N]; + + for (int k = 0; k < iN; k++) + { + double gtot_t = 0.0; + double gabs_t = 0.0; + double RHS_t = 0.0; + + new_V[k] = this->m_nets[k]->m_cur_Analog; + + { + const int term_count = this->m_terms[k]->count(); + const double * const RESTRICT gt = this->m_terms[k]->gt(); + const double * const RESTRICT go = this->m_terms[k]->go(); + const double * const RESTRICT Idr = this->m_terms[k]->Idr(); + const double * const *other_cur_analog = this->m_terms[k]->other_curanalog(); +#if VECTALT + for (int i = 0; i < term_count; i++) + { + gtot_t = gtot_t + gt[i]; + RHS_t = RHS_t + Idr[i]; + } + if (USE_GABS) + for (int i = 0; i < term_count; i++) + gabs_t = gabs_t + fabs(go[i]); +#else + if (USE_GABS) + this->m_terms[k]->ops()->sum2a(gt, Idr, go, gtot_t, RHS_t, gabs_t); + else + this->m_terms[k]->ops()->sum2(gt, Idr, gtot_t, RHS_t); +#endif + for (int i = this->m_terms[k]->m_railstart; i < term_count; i++) + RHS_t = RHS_t + go[i] * *other_cur_analog[i]; + } + + RHS[k] = RHS_t; + + //if (fabs(gabs_t - fabs(gtot_t)) > 1e-20) + // printf("%d %e abs: %f tot: %f\n",k, gabs_t / gtot_t -1.0, gabs_t, gtot_t); + + gabs_t *= 0.5; // avoid rounding issues + if (!USE_GABS || gabs_t <= gtot_t) + { + w[k] = ws / gtot_t; + one_m_w[k] = 1.0 - ws; + } + else + { + //printf("abs: %f tot: %f\n", gabs_t, gtot_t); + w[k] = 1.0 / (gtot_t + gabs_t); + one_m_w[k] = 1.0 - 1.0 * gtot_t / (gtot_t + gabs_t); + } + + } + + const double accuracy = this->m_params.m_accuracy; + + do { + resched = false; + + for (int k = 0; k < iN; k++) + { + const int * RESTRICT net_other = this->m_terms[k]->net_other(); + const int railstart = this->m_terms[k]->m_railstart; + const double * RESTRICT go = this->m_terms[k]->go(); + + double Idrive = 0.0; + for (int i = 0; i < railstart; i++) + Idrive = Idrive + go[i] * new_V[net_other[i]]; + + //double new_val = (net->m_cur_Analog * gabs[k] + iIdr) / (gtot[k]); + const double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k]; + + resched = resched || (std::abs(new_val - new_V[k]) > accuracy); + new_V[k] = new_val; + } + + resched_cnt++; + } while (resched && (resched_cnt < this->m_params.m_gs_loops)); + + for (int k = 0; k < iN; k++) + this->m_nets[k]->m_cur_Analog = new_V[k]; + + this->m_gs_total += resched_cnt; + this->m_calculations++; + + if (resched) + { + //this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS"); + this->m_gs_fail++; + return netlist_matrix_solver_direct_t::vsolve_non_dynamic(); + } + else { + return resched_cnt; + } +#endif +} + + +#endif /* NLD_MS_GAUSS_SEIDEL_H_ */ diff --git a/src/emu/netlist/analog/nld_solver.c b/src/emu/netlist/analog/nld_solver.c index b49076332cb..5d729cbb7e6 100644 --- a/src/emu/netlist/analog/nld_solver.c +++ b/src/emu/netlist/analog/nld_solver.c @@ -3,30 +3,8 @@ * */ -#include - -#include "nld_solver.h" -#include "nld_twoterm.h" -#include "../nl_lists.h" - -#if HAS_OPENMP -#include "omp.h" -#endif - -#define USE_PIVOT_SEARCH (0) -#define VECTALT 1 -#define USE_GABS 0 -#define USE_MATRIX_GS 0 -//#define SORP 1.059 -#define SORP 1.059 -// savings are eaten up by effort -#define USE_LINEAR_PREDICTION (0) - -#define SOLVER_VERBOSE_OUT(x) do {} while (0) -//#define SOLVER_VERBOSE_OUT(x) printf x - -/* Commented out for now. Relatively low number of terminals / nes makes - * the vectorizations this enables pretty expensive +/* Commented out for now. Relatively low number of terminals / nets make + * the vectorizations fast-math enables pretty expensive */ #if 0 @@ -35,17 +13,33 @@ #pragma GCC optimize "-funswitch-loops" #pragma GCC optimize "-fvariable-expansion-in-unroller" #pragma GCC optimize "-funsafe-loop-optimizations" +#pragma GCC optimize "-fvect-cost-model" +#pragma GCC optimize "-fvariable-expansion-in-unroller" #pragma GCC optimize "-ftree-loop-if-convert-stores" #pragma GCC optimize "-ftree-loop-distribution" #pragma GCC optimize "-ftree-loop-im" #pragma GCC optimize "-ftree-loop-ivcanon" #pragma GCC optimize "-fivopts" #pragma GCC optimize "-ftree-parallelize-loops=4" -#pragma GCC optimize "-fvect-cost-model" -#pragma GCC optimize "-fvariable-expansion-in-unroller" #endif -static vector_ops_t *create_ops(const int size) +#define SOLVER_VERBOSE_OUT(x) do {} while (0) +//#define SOLVER_VERBOSE_OUT(x) printf x + +#include +#include "nld_solver.h" +#include "nld_ms_direct.h" +#include "nld_ms_direct1.h" +#include "nld_ms_direct2.h" +#include "nld_ms_gauss_seidel.h" +#include "nld_twoterm.h" +#include "../nl_lists.h" + +#if HAS_OPENMP +#include "omp.h" +#endif + +vector_ops_t *vector_ops_t::create_ops(const int size) { switch (size) { @@ -98,15 +92,15 @@ ATTR_COLD void terms_t::set_pointers() m_other_curanalog[i] = &m_term[i]->m_otherterm->net().as_analog().m_cur_Analog; } - m_ops = create_ops(m_gt.count()); + m_ops = vector_ops_t::create_ops(m_gt.count()); } // ---------------------------------------------------------------------------------------- // netlist_matrix_solver // ---------------------------------------------------------------------------------------- -ATTR_COLD netlist_matrix_solver_t::netlist_matrix_solver_t() -: m_calculations(0), m_cur_ts(0) +ATTR_COLD netlist_matrix_solver_t::netlist_matrix_solver_t(const netlist_solver_parameters_t ¶ms) +: m_calculations(0), m_params(params), m_cur_ts(0) { } @@ -197,85 +191,6 @@ ATTR_COLD void netlist_matrix_solver_t::setup(netlist_analog_net_t::list_t &nets } } -// ---------------------------------------------------------------------------------------- -// netlist_matrix_solver_direct -// ---------------------------------------------------------------------------------------- - -template -netlist_matrix_solver_direct_t::netlist_matrix_solver_direct_t(int size) -: netlist_matrix_solver_t() -, m_dim(size) -, m_lp_fact(0) -{ - m_terms = new terms_t *[N()]; - m_rails_temp = new terms_t[N()]; - - for (int k = 0; k < N(); k++) - { - m_terms[k] = new terms_t; - m_row_ops[k] = create_ops(k); - } - m_row_ops[N()] = create_ops(N()); -} - -template -netlist_matrix_solver_direct_t::~netlist_matrix_solver_direct_t() -{ - for (int k=0; k<_storage_N; k++) - { - //delete[] m_A[k]; - } - //delete[] m_last_RHS; - //delete[] m_RHS; - delete[] m_terms; - delete[] m_rails_temp; - //delete[] m_row_ops; - -} - -template -ATTR_HOT double netlist_matrix_solver_direct_t::compute_next_timestep() -{ - double new_solver_timestep = m_params.m_max_timestep; - - if (m_params.m_dynamic) - { - /* - * FIXME: We should extend the logic to use either all nets or - * only output nets. - */ -#if 0 - for (netlist_analog_output_t * const *p = m_inps.first(); p != NULL; p = m_inps.next(p)) - { - netlist_analog_net_t *n = (*p)->m_proxied_net; -#else - for (int k = 0; k < N(); k++) - { - netlist_analog_net_t *n = m_nets[k]; -#endif - const double DD_n = (n->m_cur_Analog - m_last_V[k]); - const double hn = current_timestep(); - - double DD2 = (DD_n / hn - n->m_DD_n_m_1 / n->m_h_n_m_1) / (hn + n->m_h_n_m_1); - double new_net_timestep; - - n->m_h_n_m_1 = hn; - n->m_DD_n_m_1 = DD_n; - if (fabs(DD2) > 1e-50) // avoid div-by-zero - new_net_timestep = sqrt(m_params.m_lte / fabs(0.5*DD2)); - else - new_net_timestep = m_params.m_max_timestep; - - if (new_net_timestep < new_solver_timestep) - new_solver_timestep = new_net_timestep; - } - if (new_solver_timestep < m_params.m_min_timestep) - new_solver_timestep = m_params.m_min_timestep; - } - //if (new_solver_timestep > 10.0 * hn) - // new_solver_timestep = 10.0 * hn; - return new_solver_timestep; -} ATTR_HOT void netlist_matrix_solver_t::update_inputs() { @@ -386,23 +301,6 @@ ATTR_HOT double netlist_matrix_solver_t::solve() return next_time_step; } -template -void netlist_matrix_solver_gauss_seidel_t::log_stats() -{ -#if 1 - printf("==============================================\n"); - printf("Solver %s\n", this->name().cstr()); - printf(" ==> %d nets\n", this->N()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr()); - printf(" has %s elements\n", this->is_dynamic() ? "dynamic" : "no dynamic"); - printf(" has %s elements\n", this->is_timestep() ? "timestep" : "no timestep"); - printf(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average\n", - this->m_calculations, - this->m_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0), - this->m_gs_fail, - 100.0 * (double) this->m_gs_fail / (double) this->m_calculations, - (double) this->m_gs_total / (double) this->m_calculations); -#endif -} // ---------------------------------------------------------------------------------------- // netlist_matrix_solver - Direct base @@ -416,672 +314,11 @@ ATTR_COLD int netlist_matrix_solver_t::get_net_idx(netlist_net_t *net) return -1; } -template -ATTR_COLD void netlist_matrix_solver_direct_t::add_term(int k, netlist_terminal_t *term) -{ - if (term->m_otherterm->net().isRailNet()) - { - m_rails_temp[k].add(term, -1); - } - else - { - int ot = get_net_idx(&term->m_otherterm->net()); - if (ot>=0) - { - m_terms[k]->add(term, ot); - SOLVER_VERBOSE_OUT(("Net %d Term %s %f %f\n", k, terms[i]->name().cstr(), terms[i]->m_gt, terms[i]->m_go)); - } - /* Should this be allowed ? */ - else // if (ot<0) - { - m_rails_temp[k].add(term, ot); - netlist().error("found term with missing othernet %s\n", term->name().cstr()); - } - } -} -template -ATTR_COLD void netlist_matrix_solver_direct_t::vsetup(netlist_analog_net_t::list_t &nets) -{ - if (m_dim < nets.count()) - netlist().error("Dimension %d less than %d", m_dim, nets.count()); - for (int k = 0; k < N(); k++) - { - m_terms[k]->clear(); - m_rails_temp[k].clear(); - } - netlist_matrix_solver_t::setup(nets); - - for (int k = 0; k < N(); k++) - { - m_terms[k]->m_railstart = m_terms[k]->count(); - for (int i = 0; i < m_rails_temp[k].count(); i++) - this->m_terms[k]->add(m_rails_temp[k].terms()[i], m_rails_temp[k].net_other()[i]); - - m_rails_temp[k].clear(); // no longer needed - m_terms[k]->set_pointers(); - } - -#if 1 - - /* Sort in descending order by number of connected matrix voltages. - * The idea is, that for Gauss-Seidel algo the first voltage computed - * depends on the greatest number of previous voltages thus taking into - * account the maximum amout of information. - * - * This actually improves performance on popeye slightly. Average - * GS computations reduce from 2.509 to 2.370 - * - * Smallest to largest : 2.613 - * Unsorted : 2.509 - * Largest to smallest : 2.370 - * - * Sorting as a general matrix pre-conditioning is mentioned in - * literature but I have found no articles about Gauss Seidel. - * - */ - - - for (int k = 0; k < N() / 2; k++) - for (int i = 0; i < N() - 1; i++) - { - if (m_terms[i]->m_railstart < m_terms[i+1]->m_railstart) - { - std::swap(m_terms[i],m_terms[i+1]); - m_nets.swap(i, i+1); - } - } - - for (int k = 0; k < N(); k++) - { - int *other = m_terms[k]->net_other(); - for (int i = 0; i < m_terms[k]->count(); i++) - if (other[i] != -1) - other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net()); - } - -#endif - -} - -template -ATTR_HOT void netlist_matrix_solver_direct_t::build_LE() -{ -#if 0 - for (int k=0; k < N(); k++) - for (int i=0; i < N(); i++) - m_A[k][i] = 0.0; -#endif - - for (int k = 0; k < N(); k++) - { - for (int i=0; i < N(); i++) - m_A[k][i] = 0.0; - - double rhsk = 0.0; - double akk = 0.0; - { - const int terms_count = m_terms[k]->count(); - const double * RESTRICT gt = m_terms[k]->gt(); - const double * RESTRICT go = m_terms[k]->go(); - const double * RESTRICT Idr = m_terms[k]->Idr(); -#if VECTALT - - for (int i = 0; i < terms_count; i++) - { - rhsk = rhsk + Idr[i]; - akk = akk + gt[i]; - } -#else - m_terms[k]->ops()->sum2(Idr, gt, rhsk, akk); -#endif - double * const * RESTRICT other_cur_analog = m_terms[k]->other_curanalog(); - for (int i = m_terms[k]->m_railstart; i < terms_count; i++) - { - //rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog(); - rhsk = rhsk + go[i] * *other_cur_analog[i]; - } - } -#if 0 - /* - * Matrix preconditioning with 1.0 / Akk - * - * will save a number of calculations during elimination - * - */ - akk = 1.0 / akk; - m_RHS[k] = rhsk * akk; - m_A[k][k] += 1.0; - { - const int *net_other = m_terms[k]->net_other(); - const double *go = m_terms[k]->go(); - const int railstart = m_terms[k]->m_railstart; - - for (int i = 0; i < railstart; i++) - { - m_A[k][net_other[i]] += -go[i] * akk; - } - } -#else - m_RHS[k] = rhsk; - m_A[k][k] += akk; - { - const int * RESTRICT net_other = m_terms[k]->net_other(); - const double * RESTRICT go = m_terms[k]->go(); - const int railstart = m_terms[k]->m_railstart; - - for (int i = 0; i < railstart; i++) - { - m_A[k][net_other[i]] += -go[i]; - } - } -#endif - } -} - -template -ATTR_HOT void netlist_matrix_solver_direct_t::gauss_LE( - double (* RESTRICT x)) -{ -#if 0 - for (int i = 0; i < N(); i++) - { - for (int k = 0; k < N(); k++) - printf("%f ", m_A[i][k]); - printf("| %f = %f \n", x[i], m_RHS[i]); - } - printf("\n"); -#endif - - const int kN = N(); - - for (int i = 0; i < kN; i++) { - // FIXME: use a parameter to enable pivoting? - if (USE_PIVOT_SEARCH) - { - /* Find the row with the largest first value */ - int maxrow = i; - for (int j = i + 1; j < kN; j++) - { - if (fabs(m_A[j][i]) > fabs(m_A[maxrow][i])) - maxrow = j; - } - - if (maxrow != i) - { - /* Swap the maxrow and ith row */ - for (int k = i; k < kN; k++) { - std::swap(m_A[i][k], m_A[maxrow][k]); - } - std::swap(m_RHS[i], m_RHS[maxrow]); - } - } - - /* FIXME: Singular matrix? */ - const double f = 1.0 / m_A[i][i]; - - /* Eliminate column i from row j */ - - for (int j = i + 1; j < kN; j++) - { - const double f1 = - m_A[j][i] * f; - if (f1 != 0.0) - { -#if 0 && VECTALT - for (int k = i + 1; k < kN; k++) - m_A[j][k] += m_A[i][k] * f1; -#else - // addmult gives some performance increase here... - m_row_ops[kN - (i + 1)]->addmult(&m_A[j][i+1], &m_A[i][i+1], f1) ; -#endif - m_RHS[j] += m_RHS[i] * f1; - } - } - } - /* back substitution */ - for (int j = kN - 1; j >= 0; j--) - { - double tmp = 0; - - for (int k = j + 1; k < kN; k++) - tmp += m_A[j][k] * x[k]; - - x[j] = (m_RHS[j] - tmp) / m_A[j][j]; - } -#if 0 - printf("Solution:\n"); - for (int i = 0; i < N(); i++) - { - for (int k = 0; k < N(); k++) - printf("%f ", m_A[i][k]); - printf("| %f = %f \n", x[i], m_RHS[i]); - } - printf("\n"); -#endif - -} - -template -ATTR_HOT double netlist_matrix_solver_direct_t::delta( - const double (* RESTRICT V)) -{ - double cerr = 0; - double cerr2 = 0; - for (int i = 0; i < this->N(); i++) - { - const double e = (V[i] - this->m_nets[i]->m_cur_Analog); - const double e2 = (m_RHS[i] - this->m_last_RHS[i]); - cerr = (fabs(e) > cerr ? fabs(e) : cerr); - cerr2 = (fabs(e2) > cerr2 ? fabs(e2) : cerr2); - } - // FIXME: Review - return cerr + cerr2*100000.0; -} - -template -ATTR_HOT void netlist_matrix_solver_direct_t::store( - const double (* RESTRICT V), const bool store_RHS) -{ - for (int i = 0; i < this->N(); i++) - { - this->m_nets[i]->m_cur_Analog = V[i]; - } - if (store_RHS) - { - for (int i = 0; i < this->N(); i++) - { - this->m_last_RHS[i] = m_RHS[i]; - } - } -} - -template -ATTR_HOT double netlist_matrix_solver_direct_t::vsolve() -{ - solve_base(this); - return this->compute_next_timestep(); -} - - -template -ATTR_HOT int netlist_matrix_solver_direct_t::solve_non_dynamic() -{ - double new_v[_storage_N] = { 0.0 }; - - this->gauss_LE(new_v); - - if (this->is_dynamic()) - { - double err = delta(new_v); - - store(new_v, true); - - if (err > this->m_params.m_accuracy) - { - return 2; - } - return 1; - } - store(new_v, false); // ==> No need to store RHS - return 1; -} - -template -ATTR_HOT inline int netlist_matrix_solver_direct_t::vsolve_non_dynamic() -{ - this->build_LE(); - - return this->solve_non_dynamic(); -} - - -// ---------------------------------------------------------------------------------------- -// netlist_matrix_solver - Direct1 -// ---------------------------------------------------------------------------------------- - -ATTR_HOT double netlist_matrix_solver_direct1_t::vsolve() -{ - solve_base(this); - return this->compute_next_timestep(); -} - -ATTR_HOT inline int netlist_matrix_solver_direct1_t::vsolve_non_dynamic() -{ - - netlist_analog_net_t *net = m_nets[0]; - this->build_LE(); - //NL_VERBOSE_OUT(("%f %f\n", new_val, m_RHS[0] / m_A[0][0]); - - double new_val = m_RHS[0] / m_A[0][0]; - - double e = (new_val - net->m_cur_Analog); - double cerr = fabs(e); - - net->m_cur_Analog = new_val; - - if (is_dynamic() && (cerr > m_params.m_accuracy)) - { - return 2; - } - else - return 1; - -} - - - -// ---------------------------------------------------------------------------------------- -// netlist_matrix_solver - Direct2 -// ---------------------------------------------------------------------------------------- - -ATTR_HOT double netlist_matrix_solver_direct2_t::vsolve() -{ - solve_base(this); - return this->compute_next_timestep(); -} - -ATTR_HOT inline int netlist_matrix_solver_direct2_t::vsolve_non_dynamic() -{ - - build_LE(); - - const double a = m_A[0][0]; - const double b = m_A[0][1]; - const double c = m_A[1][0]; - const double d = m_A[1][1]; - - double new_val[2]; - new_val[1] = (a * m_RHS[1] - c * m_RHS[0]) / (a * d - b * c); - new_val[0] = (m_RHS[0] - b * new_val[1]) / a; - - if (is_dynamic()) - { - double err = this->delta(new_val); - store(new_val, true); - if (err > m_params.m_accuracy ) - return 2; - else - return 1; - } - store(new_val, false); - return 1; -} - -// ---------------------------------------------------------------------------------------- -// netlist_matrix_solver - Gauss - Seidel -// ---------------------------------------------------------------------------------------- - -template -ATTR_HOT double netlist_matrix_solver_gauss_seidel_t::vsolve() -{ - /* - * enable linear prediction on first newton pass - */ - - if (USE_LINEAR_PREDICTION) - for (int k = 0; k < this->N(); k++) - { - this->m_last_V[k] = this->m_nets[k]->m_cur_Analog; - this->m_nets[k]->m_cur_Analog = this->m_nets[k]->m_cur_Analog + this->m_Vdelta[k] * this->current_timestep() * m_lp_fact; - } - else - for (int k = 0; k < this->N(); k++) - { - this->m_last_V[k] = this->m_nets[k]->m_cur_Analog; - } - - this->solve_base(this); - - if (USE_LINEAR_PREDICTION) - { - double sq = 0; - double sqo = 0; - for (int k = 0; k < this->N(); k++) - { - netlist_analog_net_t *n = this->m_nets[k]; - double nv = (n->m_cur_Analog - this->m_last_V[k]) / this->current_timestep(); - sq += nv * nv; - sqo += this->m_Vdelta[k] * this->m_Vdelta[k]; - this->m_Vdelta[k] = nv; - } - if (sqo > 1e-90) - m_lp_fact = sqrt(sq/sqo); - else - m_lp_fact = 0.0; - if (m_lp_fact > 2.0) - m_lp_fact = 2.0; - //printf("fact %f\n", fact); - } - - - return this->compute_next_timestep(); -} - -template -ATTR_HOT inline int netlist_matrix_solver_gauss_seidel_t::vsolve_non_dynamic() -{ - /* The matrix based code looks a lot nicer but actually is 30% slower than - * the optimized code which works directly on the data structures. - * Need something like that for gaussian elimination as well. - */ - -#if USE_MATRIX_GS - static double ws = 1.0; - ATTR_ALIGN double new_v[_storage_N] = { 0.0 }; - const int iN = this->N(); - - bool resched = false; - - int resched_cnt = 0; - - this->build_LE(); - - { - double frob; - frob = 0; - for (int k = 0; k < iN; k++) - { - new_v[k] = this->m_nets[k]->m_cur_Analog; - for (int i = 0; i < iN; i++) - { - frob += this->m_A[k][i] * this->m_A[k][i]; - } - - } - double frobA = sqrt(frob /(iN)); - if (1 &&frobA < 1.0) - //ws = 2.0 / (1.0 + sqrt(1.0-frobA)); - ws = 2.0 / (2.0 - frobA); - else - ws = 1.0; - ws = 0.9; - } - - // Frobenius norm for (D-L)^(-1)U - //double frobU; - //double frobL; - //double norm; - do { - resched = false; - double cerr = 0.0; - //frobU = 0; - //frobL = 0; - //norm = 0; - - for (int k = 0; k < iN; k++) - { - double Idrive = 0; - //double norm_t = 0; - // Reduction loops need -ffast-math - for (int i = 0; i < iN; i++) - Idrive += this->m_A[k][i] * new_v[i]; - - for (int i = 0; i < iN; i++) - { - //if (i < k) frobL += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] /this-> m_A[k][k]; - //if (i > k) frobU += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] / this->m_A[k][k]; - //norm_t += fabs(this->m_A[k][i]); - } - - //if (norm_t > norm) norm = norm_t; - const double new_val = (1.0-ws) * new_v[k] + ws * (this->m_RHS[k] - Idrive + this->m_A[k][k] * new_v[k]) / this->m_A[k][k]; - - const double e = fabs(new_val - new_v[k]); - cerr = (e > cerr ? e : cerr); - new_v[k] = new_val; - } - - if (cerr > this->m_params.m_accuracy) - { - resched = true; - } - resched_cnt++; - //ATTR_UNUSED double frobUL = sqrt((frobU + frobL) / (double) (iN) / (double) (iN)); - } while (resched && (resched_cnt < this->m_params.m_gs_loops)); - //printf("Frobenius %f %f %f %f %f\n", sqrt(frobU), sqrt(frobL), frobUL, frobA, norm); - //printf("Omega Estimate1 %f %f\n", 2.0 / (1.0 + sqrt(1-frobUL)), 2.0 / (1.0 + sqrt(1-frobA)) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) )); - //printf("Omega Estimate2 %f %f\n", 2.0 / (2.0 - frobUL), 2.0 / (2.0 - frobA) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) )); - - - this->store(new_v, false); - - this->m_gs_total += resched_cnt; - if (resched) - { - //this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS"); - this->m_gs_fail++; - int tmp = netlist_matrix_solver_direct_t::solve_non_dynamic(); - this->m_calculations++; - return tmp; - } - else { - this->m_calculations++; - - return resched_cnt; - } - -#else - const int iN = this->N(); - bool resched = false; - int resched_cnt = 0; - - /* ideally, we could get an estimate for the spectral radius of - * Inv(D - L) * U - * - * and estimate using - * - * omega = 2.0 / (1.0 + sqrt(1-rho)) - */ - - const double ws = SORP; //1.045; //2.0 / (1.0 + /*sin*/(3.14159 * 5.5 / (double) (m_nets.count()+1))); - - ATTR_ALIGN double w[_storage_N]; - ATTR_ALIGN double one_m_w[_storage_N]; - ATTR_ALIGN double RHS[_storage_N]; - ATTR_ALIGN double new_V[_storage_N]; - - for (int k = 0; k < iN; k++) - { - new_V[k] = this->m_nets[k]->m_cur_Analog; - } - for (int k = 0; k < iN; k++) - { - double gtot_t = 0.0; - double gabs_t = 0.0; - double RHS_t = 0.0; - - { - const int term_count = this->m_terms[k]->count(); - const double * RESTRICT gt = this->m_terms[k]->gt(); - const double * RESTRICT go = this->m_terms[k]->go(); - const double * RESTRICT Idr = this->m_terms[k]->Idr(); -#if VECTALT - for (int i = 0; i < term_count; i++) - { - gtot_t += gt[i]; - if (USE_GABS) gabs_t += fabs(go[i]); - RHS_t += Idr[i]; - } -#else - if (USE_GABS) - this->m_terms[k]->ops()->sum2a(gt, Idr, go, gtot_t, RHS_t, gabs_t); - else - this->m_terms[k]->ops()->sum2(gt, Idr, gtot_t, RHS_t); -#endif - double * const *other_cur_analog = this->m_terms[k]->other_curanalog(); - for (int i = this->m_terms[k]->m_railstart; i < term_count; i++) - //RHS_t += go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog(); - RHS_t += go[i] * *other_cur_analog[i]; - } - - RHS[k] = RHS_t; - - //if (fabs(gabs_t - fabs(gtot_t)) > 1e-20) - // printf("%d %e abs: %f tot: %f\n",k, gabs_t / gtot_t -1.0, gabs_t, gtot_t); - - gabs_t *= 0.5; // avoid rounding issues - if (!USE_GABS || gabs_t <= gtot_t) - { - w[k] = ws / gtot_t; - one_m_w[k] = 1.0 - ws; - } - else - { - //printf("abs: %f tot: %f\n", gabs_t, gtot_t); - w[k] = 1.0 / (gtot_t + gabs_t); - one_m_w[k] = 1.0 - 1.0 * gtot_t / (gtot_t + gabs_t); - } - - } - - do { - resched = false; - //double cerr = 0.0; - - for (int k = 0; k < iN; k++) - { - const int * RESTRICT net_other = this->m_terms[k]->net_other(); - const int railstart = this->m_terms[k]->m_railstart; - const double * RESTRICT go = this->m_terms[k]->go(); - - double Idrive = 0.0; - for (int i = 0; i < railstart; i++) - Idrive = Idrive + go[i] * new_V[net_other[i]]; - - //double new_val = (net->m_cur_Analog * gabs[k] + iIdr) / (gtot[k]); - const double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k]; - - resched = resched || (fabs(new_val - new_V[k]) > this->m_params.m_accuracy); - new_V[k] = new_val; - } - - resched_cnt++; - } while (resched && (resched_cnt < this->m_params.m_gs_loops)); - - for (int k = 0; k < iN; k++) - this->m_nets[k]->m_cur_Analog = new_V[k]; - - this->m_gs_total += resched_cnt; - - if (resched) - { - //this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS"); - this->m_gs_fail++; - int tmp = netlist_matrix_solver_direct_t::vsolve_non_dynamic(); - this->m_calculations++; - return tmp; - } - else { - this->m_calculations++; - - return resched_cnt; - } -#endif -} // ---------------------------------------------------------------------------------------- // solver @@ -1099,9 +336,10 @@ NETLIB_START(solver) register_param("ACCURACY", m_accuracy, 1e-7); register_param("GS_LOOPS", m_gs_loops, 9); // Gauss-Seidel loops - register_param("GS_THRESHOLD", m_gs_threshold, 5); // below this value, gaussian elimination is used + register_param("GS_THRESHOLD", m_gs_threshold, 5); // below this value, gaussian elimination is used register_param("NR_LOOPS", m_nr_loops, 25); // Newton-Raphson loops register_param("PARALLEL", m_parallel, 0); + register_param("SOR_FACTOR", m_sor, 1.059); register_param("GMIN", m_gmin, NETLIST_GMIN_DEFAULT); register_param("DYNAMIC_TS", m_dynamic, 0); register_param("LTE", m_lte, 5e-5); // diff/timestep @@ -1190,15 +428,15 @@ template netlist_matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const int gs_threshold, const bool use_specific) { if (use_specific && m_N == 1) - return new netlist_matrix_solver_direct1_t(); + return new netlist_matrix_solver_direct1_t(m_params); else if (use_specific && m_N == 2) - return new netlist_matrix_solver_direct2_t(); + return new netlist_matrix_solver_direct2_t(m_params); else { if (size >= gs_threshold) - return new netlist_matrix_solver_gauss_seidel_t(size); + return new netlist_matrix_solver_gauss_seidel_t(m_params, size); else - return new netlist_matrix_solver_direct_t(size); + return new netlist_matrix_solver_direct_t(m_params, size); } } @@ -1214,6 +452,8 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start() m_params.m_nr_loops = m_nr_loops.Value(); m_params.m_nt_sync_delay = m_sync_delay.Value(); m_params.m_lte = m_lte.Value(); + m_params.m_sor = m_sor.Value(); + m_params.m_min_timestep = m_min_timestep.Value(); m_params.m_dynamic = (m_dynamic.Value() == 1 ? true : false); m_params.m_max_timestep = netlist_time::from_hz(m_freq.Value()).as_double(); @@ -1302,8 +542,6 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start() break; } - ms->m_params = m_params; - register_sub(*ms, pstring::sprintf("Solver %d",m_mat_solvers.count())); ms->vsetup(groups[i]); diff --git a/src/emu/netlist/analog/nld_solver.h b/src/emu/netlist/analog/nld_solver.h index 30edb99f089..52f15cee8f8 100644 --- a/src/emu/netlist/analog/nld_solver.h +++ b/src/emu/netlist/analog/nld_solver.h @@ -11,9 +11,13 @@ //#define ATTR_ALIGNED(N) __attribute__((aligned(N))) #define ATTR_ALIGNED(N) ATTR_ALIGN -//#undef RESTRICT -//#define RESTRICT +#define USE_PIVOT_SEARCH (0) +#define VECTALT 1 +#define USE_GABS 0 +#define USE_MATRIX_GS 0 +// savings are eaten up by effort +#define USE_LINEAR_PREDICTION (0) // ---------------------------------------------------------------------------------------- // Macros @@ -37,6 +41,7 @@ struct netlist_solver_parameters_t double m_lte; double m_min_timestep; double m_max_timestep; + double m_sor; bool m_dynamic; int m_gs_loops; int m_nr_loops; @@ -54,8 +59,6 @@ public: virtual ~vector_ops_t() {} - ATTR_ALIGNED(64) double * RESTRICT m_V; - virtual const double sum(const double * v) = 0; virtual void sum2(const double * RESTRICT v1, const double * RESTRICT v2, double & RESTRICT s1, double & RESTRICT s2) = 0; virtual void addmult(double * RESTRICT v1, const double * RESTRICT v2, const double &mult) = 0; @@ -63,6 +66,8 @@ public: virtual const double sumabs(const double * v) = 0; + static vector_ops_t *create_ops(const int size); + protected: int m_dim; @@ -150,7 +155,8 @@ class ATTR_ALIGNED(64) terms_t NETLIST_PREVENT_COPYING(terms_t) public: - ATTR_COLD terms_t() {} + ATTR_COLD terms_t() : m_railstart(0), m_ops(NULL) + {} ATTR_COLD void clear() { @@ -178,8 +184,8 @@ class ATTR_ALIGNED(64) terms_t private: plinearlist_t m_term; plinearlist_t m_net_other; - plinearlist_t m_gt; plinearlist_t m_go; + plinearlist_t m_gt; plinearlist_t m_Idr; plinearlist_t m_other_curanalog; vector_ops_t * m_ops; @@ -191,7 +197,7 @@ public: typedef plinearlist_t list_t; typedef netlist_core_device_t::list_t dev_list_t; - ATTR_COLD netlist_matrix_solver_t(); + ATTR_COLD netlist_matrix_solver_t(const netlist_solver_parameters_t ¶ms); ATTR_COLD virtual ~netlist_matrix_solver_t(); ATTR_COLD virtual void vsetup(netlist_analog_net_t::list_t &nets) = 0; @@ -215,8 +221,6 @@ public: ATTR_COLD virtual void start(); ATTR_COLD virtual void reset(); - netlist_solver_parameters_t m_params; - ATTR_COLD int get_net_idx(netlist_net_t *net); ATTR_COLD virtual void log_stats() {}; @@ -234,6 +238,7 @@ protected: plinearlist_t m_inps; int m_calculations; + const netlist_solver_parameters_t &m_params; ATTR_HOT inline const double current_timestep() { return m_cur_ts; } private: @@ -252,108 +257,7 @@ private: }; -template -class netlist_matrix_solver_direct_t: public netlist_matrix_solver_t -{ -public: - netlist_matrix_solver_direct_t(int size); - - virtual ~netlist_matrix_solver_direct_t(); - - ATTR_COLD virtual void vsetup(netlist_analog_net_t::list_t &nets); - ATTR_COLD virtual void reset() { netlist_matrix_solver_t::reset(); } - - ATTR_HOT inline const int N() const { if (m_N == 0) return m_dim; else return m_N; } - - ATTR_HOT inline int vsolve_non_dynamic(); - -protected: - ATTR_COLD virtual void add_term(int net_idx, netlist_terminal_t *term); - - ATTR_HOT virtual double vsolve(); - - ATTR_HOT int solve_non_dynamic(); - ATTR_HOT void build_LE(); - ATTR_HOT void gauss_LE(double (* RESTRICT x)); - ATTR_HOT double delta(const double (* RESTRICT V)); - ATTR_HOT void store(const double (* RESTRICT V), const bool store_RHS); - - /* bring the whole system to the current time - * Don't schedule a new calculation time. The recalculation has to be - * triggered by the caller after the netlist element was changed. - */ - ATTR_HOT double compute_next_timestep(); - - double m_A[_storage_N][((_storage_N + 7) / 8) * 8]; - double m_RHS[_storage_N]; - double m_last_RHS[_storage_N]; // right hand side - contains currents - double m_Vdelta[_storage_N]; - double m_last_V[_storage_N]; - - terms_t **m_terms; - - terms_t *m_rails_temp; - -private: - vector_ops_t *m_row_ops[_storage_N + 1]; - - int m_dim; - double m_lp_fact; -}; - -template -class ATTR_ALIGNED(64) netlist_matrix_solver_gauss_seidel_t: public netlist_matrix_solver_direct_t -{ -public: - - netlist_matrix_solver_gauss_seidel_t(int size) - : netlist_matrix_solver_direct_t(size) - , m_lp_fact(0) - , m_gs_fail(0) - , m_gs_total(0) - {} - - virtual ~netlist_matrix_solver_gauss_seidel_t() {} - - ATTR_COLD virtual void log_stats(); - - ATTR_HOT inline int vsolve_non_dynamic(); -protected: - ATTR_HOT virtual double vsolve(); - -private: - double m_lp_fact; - int m_gs_fail; - int m_gs_total; - -}; - -class ATTR_ALIGNED(64) netlist_matrix_solver_direct1_t: public netlist_matrix_solver_direct_t<1,1> -{ -public: - - netlist_matrix_solver_direct1_t() - : netlist_matrix_solver_direct_t<1, 1>(1) - {} - ATTR_HOT inline int vsolve_non_dynamic(); -protected: - ATTR_HOT virtual double vsolve(); -private: -}; - -class ATTR_ALIGNED(64) netlist_matrix_solver_direct2_t: public netlist_matrix_solver_direct_t<2,2> -{ -public: - - netlist_matrix_solver_direct2_t() - : netlist_matrix_solver_direct_t<2, 2>(2) - {} - ATTR_HOT inline int vsolve_non_dynamic(); -protected: - ATTR_HOT virtual double vsolve(); -private: -}; class ATTR_ALIGNED(64) NETLIB_NAME(solver) : public netlist_device_t { @@ -381,6 +285,7 @@ protected: netlist_param_double_t m_accuracy; netlist_param_double_t m_gmin; netlist_param_double_t m_lte; + netlist_param_double_t m_sor; netlist_param_logic_t m_dynamic; netlist_param_double_t m_min_timestep; diff --git a/src/emu/netlist/analog/nld_twoterm.h b/src/emu/netlist/analog/nld_twoterm.h index 1454d41b0bb..1b2fe9de13f 100644 --- a/src/emu/netlist/analog/nld_twoterm.h +++ b/src/emu/netlist/analog/nld_twoterm.h @@ -103,7 +103,7 @@ public: m_N.set( G, G, ( -V) * G + I); } - ATTR_HOT inline double deltaV() + ATTR_HOT inline double deltaV() const { return m_P.net().as_analog().Q_Analog() - m_N.net().as_analog().Q_Analog(); } @@ -172,8 +172,8 @@ public: ATTR_HOT void step_time(const double st) { - double G = m_C.Value() / st; - double I = -G * deltaV(); + const double G = m_C.Value() / st; + const double I = -G * deltaV(); set(G, 0.0, I); }