From 161cc143a5d0ec03c432bd3f07899de311c75a44 Mon Sep 17 00:00:00 2001 From: couriersud Date: Thu, 10 Oct 2019 00:32:32 +0200 Subject: [PATCH] netlist: code maintenance. (nw) - some readability improvements - some simplifications - kidniki uses frontiers again (speed improvement) --- src/lib/netlist/nl_errstr.h | 2 +- src/lib/netlist/plib/gmres.h | 86 ++++++++++++++++---- src/lib/netlist/plib/mat_cr.h | 79 +++++++++--------- src/lib/netlist/plib/pdynlib.h | 2 +- src/lib/netlist/plib/putil.h | 13 +-- src/lib/netlist/solver/nld_matrix_solver.cpp | 39 ++++----- src/lib/netlist/solver/nld_matrix_solver.h | 83 ++++++++++++++++--- src/lib/netlist/solver/nld_ms_direct.h | 2 +- src/lib/netlist/solver/nld_ms_gcr.h | 64 ++------------- src/lib/netlist/solver/nld_ms_gmres.h | 3 +- src/lib/netlist/solver/nld_ms_sor.h | 2 +- src/lib/netlist/solver/nld_ms_sor_mat.h | 2 +- src/lib/netlist/solver/nld_solver.cpp | 65 ++++++--------- src/mame/audio/nl_kidniki.cpp | 8 +- 14 files changed, 246 insertions(+), 204 deletions(-) diff --git a/src/lib/netlist/nl_errstr.h b/src/lib/netlist/nl_errstr.h index 5e5a7037445..c2464d0ef69 100644 --- a/src/lib/netlist/nl_errstr.h +++ b/src/lib/netlist/nl_errstr.h @@ -125,7 +125,7 @@ namespace netlist // nld_solver.cpp - PERRMSGV(MF_UNKNOWN_SOLVER_TYPE, 1, "Unknown solver type: {1}") + //PERRMSGV(MF_UNKNOWN_SOLVER_TYPE, 1, "Unknown solver type: {1}") PERRMSGV(MF_NETGROUP_SIZE_EXCEEDED_1, 1, "Encountered netgroup with > {1} nets") PERRMSGV(MI_NO_SPECIFIC_SOLVER, 1, "No specific solver found for netlist of size {1}") diff --git a/src/lib/netlist/plib/gmres.h b/src/lib/netlist/plib/gmres.h index c76c257ce21..0ce27faead8 100644 --- a/src/lib/netlist/plib/gmres.h +++ b/src/lib/netlist/plib/gmres.h @@ -72,7 +72,7 @@ namespace plib } template - void solve_LU_inplace(V &v) + void solve_inplace(V &v) { m_LU.solveLU(v); } @@ -91,6 +91,7 @@ namespace plib mat_precondition_diag(std::size_t size, int dummy = 0) : m_mat(size) , m_diag(size) + , nzcol(size) { plib::unused_var(dummy); } @@ -99,6 +100,18 @@ namespace plib void build(M &fill) { m_mat.build_from_fill_mat(fill, 0); + for (std::size_t i = 0; i< m_diag.size(); i++) + { + for (std::size_t j = 0; j< m_diag.size(); j++) + { + std::size_t k=m_mat.row_idx[j]; + while (m_mat.col_idx[k] < i && k < m_mat.row_idx[j+1]) + k++; + if (m_mat.col_idx[k] == i && k < m_mat.row_idx[j+1]) + nzcol[i].push_back(k); + } + nzcol[i].push_back(static_cast(-1)); + } } template @@ -111,12 +124,51 @@ namespace plib { for (std::size_t i = 0; i< m_diag.size(); i++) { - m_diag[i] = 1.0 / m_mat.A[m_mat.diag[i]]; + // ILUT: 265% + FT v(0.0); +#if 0 + // doesn't works, Mame perforamnce drops significantly% + // 136% + for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++) + v += m_mat.A[j] * m_mat.A[j]; + m_diag[i] = 1.0 / std::sqrt(v); +#elif 0 + // works halfway, i.e. Mame perforamnce 50% + // 147% - lowest average solution time with 7.094 + for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++) + v += m_mat.A[j] * m_mat.A[j]; + m_diag[i] = m_mat.A[m_mat.diag[i]] / v; +#elif 0 + // works halfway, i.e. Mame perforamnce 50% + // sum over column i + // 344% - lowest average solution time with 3.06 + std::size_t nzcolp = 0; + const auto &nz = nzcol[i]; + std::size_t j; + + while ((j = nz[nzcolp++])!=static_cast(-1)) // NOLINT(bugprone-infinite-loop) + { + v += m_mat.A[j] * m_mat.A[j]; + } + m_diag[i] = m_mat.A[m_mat.diag[i]] / v; +#elif 0 + // works halfway, i.e. Mame perforamnce 50% + // 151% + for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++) + v += std::abs(m_mat.A[j]); + m_diag[i] = 1.0 / v; +#else + // 124% + for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++) + v = std::max(v, std::abs(m_mat.A[j])); + m_diag[i] = 1.0 / v; +#endif + //m_diag[i] = 1.0 / m_mat.A[m_mat.diag[i]]; } } template - void solve_LU_inplace(V &v) + void solve_inplace(V &v) { for (std::size_t i = 0; i< m_diag.size(); i++) v[i] = v[i] * m_diag[i]; @@ -124,6 +176,7 @@ namespace plib plib::matrix_compressed_rows_t m_mat; plib::parray m_diag; + plib::parray, SIZE > nzcol; }; template @@ -152,7 +205,7 @@ namespace plib } template - void solve_LU_inplace(V &v) + void solve_inplace(V &v) { plib::unused_var(v); } @@ -163,7 +216,7 @@ namespace plib /* FIXME: hardcoding RESTART to 20 becomes an issue on very large * systems. */ - template + template struct gmres_t { public: @@ -191,18 +244,18 @@ namespace plib std::size_t size() const { return (SIZE<=0) ? m_size : static_cast(SIZE); } template - bool do_k(OPS &ops, VT &x, FT rho_delta, bool dummy) + bool do_k(OPS &ops, VT &x, std::size_t &itr_used, FT rho_delta, bool dummy) { plib::unused_var(dummy); //printf("%d\n", k); - if (do_k(ops, x, rho_delta, do_khelper::value)) + if (do_k(ops, x, itr_used, rho_delta, do_khelper::value)) return true; const std::size_t kp1 = k + 1; const std::size_t n = size(); ops.calc_rhs(m_v[kp1], m_v[k]); - ops.solve_LU_inplace(m_v[kp1]); + ops.solve_inplace(m_v[kp1]); for (std::size_t j = 0; j <= k; j++) { @@ -229,7 +282,7 @@ namespace plib FT rho = std::abs(m_g[kp1]); // FIXME .. - //itr_used = itr_used + 1; + itr_used = itr_used + 1; if (rho <= rho_delta || k == RESTART-1) { @@ -252,10 +305,9 @@ namespace plib } template - bool do_k(OPS &ops, VT &x, FT rho_delta, float dummy) + bool do_k(OPS &ops, VT &x, std::size_t &itr_used, FT rho_delta, float dummy) { - plib::unused_var(ops, x, rho_delta, dummy); - //printf("here\n"); + plib::unused_var(ops, x, itr_used, rho_delta, dummy); return false; } @@ -307,7 +359,7 @@ namespace plib vec_set_scalar(n, residual, accuracy); ops.calc_rhs(Ax, residual); - ops.solve_LU_inplace(Ax); + ops.solve_inplace(Ax); const float_type rho_to_accuracy = std::sqrt(vec_mult2(n, Ax)) / accuracy; @@ -320,7 +372,7 @@ namespace plib * Using * * vec_set(n, x, rhs); - * ops.solve_LU_inplace(x); + * ops.solve_inplace(x); * * to get a starting point for x degrades convergence speed compared * to using the last solution for x. @@ -337,7 +389,7 @@ namespace plib vec_sub(n, residual, rhs, Ax); - ops.solve_LU_inplace(residual); + ops.solve_inplace(residual); rho = std::sqrt(vec_mult2(n, residual)); @@ -353,7 +405,7 @@ namespace plib vec_mult_scalar(n, m_v[0], residual, constants::one() / rho); - if (do_k(ops, x, rho_delta, true)) + if (do_k(ops, x, itr_used, rho_delta, true)) // converged break; } @@ -446,7 +498,7 @@ namespace plib for (int i = 0; i < iter_max; i++) { - ops.solve_LU_inplace(residual); + ops.solve_inplace(residual); if (i==0) { vec_set(size(), p, residual); diff --git a/src/lib/netlist/plib/mat_cr.h b/src/lib/netlist/plib/mat_cr.h index 340c6305e96..cfa17f35dc8 100644 --- a/src/lib/netlist/plib/mat_cr.h +++ b/src/lib/netlist/plib/mat_cr.h @@ -207,6 +207,7 @@ namespace plib // proceed to column i std::size_t pj = row_idx[j]; + std::size_t pje = row_idx[j+1]; while (col_idx[pj] < i) pj++; @@ -216,7 +217,7 @@ namespace plib // subtract row i from j // fill-in available assumed, i.e. matrix was prepared - for (std::size_t pii = pi; pii(i); + return -1; + } + template void gaussian_elimination_parallel(V & RHS) { @@ -314,25 +323,17 @@ namespace plib * res = A * x */ #if 0 - parray xi(m_size*m_size); - //std::vector xi(m_size*m_size); - - plib::omp::for_static(0, constants::zero(), row_idx[m_size], [this, &x, &xi](index_type k) - { - xi[k] = x[col_idx[k]]; - }); -#endif -#if 1 //plib::omp::set_num_threads(4); plib::omp::for_static(0, constants::zero(), m_size, [this, &res, &x](index_type row) { - T tmp = 0.0; - const index_type e = row_idx[row+1]; + T tmp(0.0); + const index_type e(row_idx[row+1]); for (index_type k = row_idx[row]; k < e; k++) tmp += A[k] * x[col_idx[k]]; res[row] = tmp; }); #else + // this is a bit faster than the version above std::size_t row = 0; std::size_t k = 0; const std::size_t oe = nz_num; @@ -373,10 +374,10 @@ namespace plib template void reduction_copy_from(LUMAT & src) { - C sp = 0; - for (std::size_t r=0; r void raw_copy_from(LUMAT & src) { - for (std::size_t k = 0; k < nz_num; k++) + for (index_type k = 0; k < nz_num; k++) A[k] = src.A[k]; } @@ -422,32 +423,32 @@ namespace plib */ index_type p(0); - while (std::size_t i = ilu_rows[p++]) // NOLINT(bugprone-infinite-loop) + while (auto i = ilu_rows[p++]) // NOLINT(bugprone-infinite-loop) { - const std::size_t p_i_end = row_idx[i + 1]; + const auto p_i_end = row_idx[i + 1]; // loop over all columns k left of diag in row i //if (row_idx[i] < diag[i]) // printf("occ %d\n", (int)i); - for (std::size_t i_k = row_idx[i]; i_k < diag[i]; i_k++) + for (auto i_k = row_idx[i]; i_k < diag[i]; i_k++) { - const std::size_t k = col_idx[i_k]; - const std::size_t p_k_end = row_idx[k + 1]; + const auto k(col_idx[i_k]); + const auto p_k_end(row_idx[k + 1]); const T LUp_i_k = A[i_k] = A[i_k] / A[diag[k]]; - std::size_t k_j = diag[k] + 1; - std::size_t i_j = i_k + 1; + auto k_j(diag[k] + 1); + auto i_j(i_k + 1); while (i_j < p_i_end && k_j < p_k_end ) // pj = (i, j) { // we can assume that within a row ja increases continuously */ - const std::size_t c_i_j = col_idx[i_j]; // row i, column j - const std::size_t c_k_j = col_idx[k_j]; // row k, column j - if (c_k_j < c_i_j) - k_j++; - else if (c_k_j == c_i_j) - A[i_j++] -= LUp_i_k * A[k_j++]; - else - i_j++; + const auto c_i_j(col_idx[i_j]); // row i, column j + const auto c_k_j(col_idx[k_j]); // row k, column j + + if (c_k_j == c_i_j) + A[i_j] -= LUp_i_k * A[k_j]; + k_j += (c_k_j <= c_i_j ? 1 : 0); + i_j += (c_k_j >= c_i_j ? 1 : 0); + } } } @@ -477,23 +478,23 @@ namespace plib * This can be solved for x using backwards elimination in U. * */ - for (std::size_t i = 1; i < size(); ++i ) + for (index_type i = 1; i < size(); ++i ) { T tmp = 0.0; - const std::size_t j1 = row_idx[i]; - const std::size_t j2 = diag[i]; + const auto j1(row_idx[i]); + const auto j2(diag[i]); - for (std::size_t j = j1; j < j2; ++j ) + for (auto j = j1; j < j2; ++j ) tmp += A[j] * r[col_idx[j]]; r[i] -= tmp; } // i now is equal to n; - for (std::size_t i = size(); i-- > 0; ) + for (auto i = size(); i-- > 0; ) { T tmp = 0.0; - const std::size_t di = diag[i]; - const std::size_t j2 = row_idx[i+1]; - for (std::size_t j = di + 1; j < j2; j++ ) + const auto di(diag[i]); + const auto j2(row_idx[i+1]); + for (auto j = di + 1; j < j2; j++ ) tmp += A[j] * r[col_idx[j]]; r[i] = (r[i] - tmp) / A[di]; } diff --git a/src/lib/netlist/plib/pdynlib.h b/src/lib/netlist/plib/pdynlib.h index 1454c053298..b2fbb1487d2 100644 --- a/src/lib/netlist/plib/pdynlib.h +++ b/src/lib/netlist/plib/pdynlib.h @@ -62,7 +62,7 @@ public: //return m_sym(args...); } - bool resolved() { return m_sym != nullptr; } + bool resolved() const { return m_sym != nullptr; } private: calltype m_sym; }; diff --git a/src/lib/netlist/plib/putil.h b/src/lib/netlist/plib/putil.h index 8fd74ac8b8c..5ff70d941a2 100644 --- a/src/lib/netlist/plib/putil.h +++ b/src/lib/netlist/plib/putil.h @@ -242,18 +242,21 @@ namespace plib ename (E v) : m_v(v) { } \ template explicit ename(T val) { m_v = static_cast(val); } \ bool set_from_string (const pstring &s) { \ - static const pstring strings = # __VA_ARGS__; \ - int f = from_string_int(strings, s); \ + int f = from_string_int(strings(), s); \ if (f>=0) { m_v = static_cast(f); return true; } else { return false; } \ } \ operator E() const {return m_v;} \ bool operator==(const ename &rhs) const {return m_v == rhs.m_v;} \ bool operator==(const E &rhs) const {return m_v == rhs;} \ std::string name() const { \ - static const pstring strings = # __VA_ARGS__; \ - return nthstr(static_cast(m_v), strings); \ + return nthstr(static_cast(m_v), strings()); \ } \ - private: E m_v; }; + private: E m_v; \ + static pstring strings() {\ + static const pstring lstrings = # __VA_ARGS__; \ + return lstrings; \ + } \ + }; #endif /* PUTIL_H_ */ diff --git a/src/lib/netlist/solver/nld_matrix_solver.cpp b/src/lib/netlist/solver/nld_matrix_solver.cpp index 3535424b421..0073823dc2c 100644 --- a/src/lib/netlist/solver/nld_matrix_solver.cpp +++ b/src/lib/netlist/solver/nld_matrix_solver.cpp @@ -15,15 +15,16 @@ namespace netlist namespace devices { - terms_for_net_t::terms_for_net_t() + terms_for_net_t::terms_for_net_t(analog_net_t * net) : m_railstart(0) , m_last_V(0.0) , m_DD_n_m_1(0.0) , m_h_n_m_1(1e-12) + , m_net(net) { } - void terms_for_net_t::add(terminal_t *term, int net_other, bool sorted) + void terms_for_net_t::add_terminal(terminal_t *term, int net_other, bool sorted) { if (sorted) for (std::size_t i=0; i < m_connected_net_idx.size(); i++) @@ -65,13 +66,11 @@ namespace devices log().debug("New solver setup\n"); - m_nets.clear(); m_terms.clear(); for (auto & net : nets) { - m_nets.push_back(net); - m_terms.push_back(plib::make_unique()); + m_terms.push_back(plib::make_unique(net)); m_rails_temp.push_back(plib::make_unique()); } @@ -159,7 +158,7 @@ namespace devices * */ - const std::size_t iN = m_nets.size(); + const std::size_t iN = m_terms.size(); switch (sort) { @@ -174,7 +173,6 @@ namespace devices if (pi < pk) { std::swap(m_terms[i], m_terms[k]); - std::swap(m_nets[i], m_nets[k]); pk = get_weight_around_diag(k,k); } } @@ -183,7 +181,7 @@ namespace devices break; case matrix_sort_type_e::PREFER_IDENTITY_TOP_LEFT: { - for (std::size_t k = 0; k < iN - 1; k++) + for (std::size_t k = 0; k < iN - 2; k++) { auto pk = get_left_right_of_diag(k,k); for (std::size_t i = k+1; i < iN; i++) @@ -192,7 +190,6 @@ namespace devices if (pi.first <= pk.first && pi.second >= pk.second) { std::swap(m_terms[i], m_terms[k]); - std::swap(m_nets[i], m_nets[k]); pk = get_left_right_of_diag(k,k); } } @@ -210,7 +207,6 @@ namespace devices if ((static_cast(m_terms[k]->m_railstart) - static_cast(m_terms[i]->m_railstart)) * sort_order < 0) { std::swap(m_terms[i], m_terms[k]); - std::swap(m_nets[i], m_nets[k]); } } } @@ -231,13 +227,13 @@ namespace devices void matrix_solver_t::setup_matrix() { - const std::size_t iN = m_nets.size(); + const std::size_t iN = m_terms.size(); for (std::size_t k = 0; k < iN; k++) { m_terms[k]->m_railstart = m_terms[k]->count(); for (std::size_t i = 0; i < m_rails_temp[k]->count(); i++) - this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->m_connected_net_idx.data()[i], false); + this->m_terms[k]->add_terminal(m_rails_temp[k]->terms()[i], m_rails_temp[k]->m_connected_net_idx.data()[i], false); } // free all - no longer needed @@ -464,10 +460,10 @@ namespace devices return next_time_step; } - int matrix_solver_t::get_net_idx(detail::net_t *net) + int matrix_solver_t::get_net_idx(const analog_net_t *net) const { - for (std::size_t k = 0; k < m_nets.size(); k++) - if (m_nets[k] == net) + for (std::size_t k = 0; k < m_terms.size(); k++) + if (m_terms[k]->isNet(net)) return static_cast(k); return -1; } @@ -539,19 +535,19 @@ namespace devices { if (term->connected_terminal()->net().isRailNet()) { - m_rails_temp[k]->add(term, -1, false); + m_rails_temp[k]->add_terminal(term, -1, false); } else { int ot = get_net_idx(&term->connected_terminal()->net()); if (ot>=0) { - m_terms[k]->add(term, ot, true); + m_terms[k]->add_terminal(term, ot, true); } /* Should this be allowed ? */ else // if (ot<0) { - m_rails_temp[k]->add(term, ot, true); + m_rails_temp[k]->add_terminal(term, ot, true); log().fatal(MF_FOUND_TERM_WITH_MISSING_OTHERNET(term->name())); } } @@ -565,12 +561,11 @@ namespace devices { for (std::size_t k = 0, iN=m_terms.size(); k < iN; k++) { - analog_net_t *n = m_nets[k]; terms_for_net_t *t = m_terms[k].get(); //const nl_double DD_n = (n->Q_Analog() - t->m_last_V); // avoid floating point exceptions - const nl_double DD_n = std::max(-1e100, std::min(1e100,(n->Q_Analog() - t->m_last_V))); + const nl_double DD_n = std::max(-1e100, std::min(1e100,(t->getV() - t->m_last_V))); const nl_double hn = cur_ts; //printf("%g %g %g %g\n", DD_n, hn, t->m_DD_n_m_1, t->m_h_n_m_1); @@ -587,7 +582,7 @@ namespace devices if (new_net_timestep < new_solver_timestep) new_solver_timestep = new_net_timestep; - t->m_last_V = n->Q_Analog(); + t->m_last_V = t->getV(); } if (new_solver_timestep < m_params.m_min_timestep) { @@ -608,7 +603,7 @@ namespace devices { log().verbose("=============================================="); log().verbose("Solver {1}", this->name()); - log().verbose(" ==> {1} nets", this->m_nets.size()); //, (*(*groups[i].first())->m_core_terms.first())->name()); + log().verbose(" ==> {1} nets", this->m_terms.size()); //, (*(*groups[i].first())->m_core_terms.first())->name()); log().verbose(" has {1} elements", this->has_dynamic_devices() ? "dynamic" : "no dynamic"); log().verbose(" has {1} elements", this->has_timestep_devices() ? "timestep" : "no timestep"); log().verbose(" {1:6.3} average newton raphson loops", diff --git a/src/lib/netlist/solver/nld_matrix_solver.h b/src/lib/netlist/solver/nld_matrix_solver.h index 888f2c79309..eb265e7b55a 100644 --- a/src/lib/netlist/solver/nld_matrix_solver.h +++ b/src/lib/netlist/solver/nld_matrix_solver.h @@ -29,6 +29,16 @@ namespace devices PREFER_BAND_MATRIX ) + P_ENUM(matrix_type_e, + SOR_MAT, + MAT_CR, + MAT, + SM, + W, + SOR, + GMRES + ) + struct solver_parameters_t { solver_parameters_t(device_t &parent) @@ -36,7 +46,7 @@ namespace devices /* iteration parameters */ , m_gs_sor(parent, "SOR_FACTOR", 1.059) - , m_method(parent, "METHOD", "MAT_CR") + , m_method(parent, "METHOD", matrix_type_e::MAT_CR) , m_accuracy(parent, "ACCURACY", 1e-7) , m_gs_loops(parent, "GS_LOOPS", 9) // Gauss-Seidel loops @@ -75,7 +85,7 @@ namespace devices param_double_t m_freq; param_double_t m_gs_sor; - param_str_t m_method; + param_enum_t m_method; param_double_t m_accuracy; param_num_t m_gs_loops; param_double_t m_gmin; @@ -99,16 +109,22 @@ namespace devices class terms_for_net_t : plib::nocopyassignmove { public: - terms_for_net_t(); + terms_for_net_t(analog_net_t * net = nullptr); void clear(); - void add(terminal_t *term, int net_other, bool sorted); + void add_terminal(terminal_t *term, int net_other, bool sorted); std::size_t count() const { return m_terms.size(); } terminal_t **terms() { return m_terms.data(); } + nl_double getV() const { return m_net->Q_Analog(); } + + void setV(nl_double v) { m_net->set_Q_Analog(v); } + + bool isNet(const analog_net_t * net) const { return net == m_net; } + std::size_t m_railstart; std::vector m_nz; /* all non zero for multiplication */ @@ -122,6 +138,7 @@ namespace devices std::vector m_connected_net_idx; private: + analog_net_t * m_net; std::vector m_terms; }; @@ -172,7 +189,7 @@ namespace devices NETLIB_RESETI(); public: - int get_net_idx(detail::net_t *net); + int get_net_idx(const analog_net_t *net) const; std::pair get_left_right_of_diag(std::size_t row, std::size_t diag); double get_weight_around_diag(std::size_t row, std::size_t diag); @@ -215,7 +232,7 @@ namespace devices void set_pointers() { - const std::size_t iN = this->m_nets.size(); + const std::size_t iN = this->m_terms.size(); std::size_t max_count = 0; std::size_t max_rail = 0; @@ -287,6 +304,52 @@ namespace devices } + template + void log_fill(const T &fill, M &mat) + { + /* FIXME: move this to the cr matrix class and use computed + * parallel ordering once it makes sense. + */ + + const std::size_t iN = fill.size(); + + std::vector levL(iN, 0); + std::vector levU(iN, 0); + + // parallel scheme for L x = y + for (std::size_t k = 0; k < iN; k++) + { + unsigned lm=0; + for (std::size_t j = 0; j 0; ) + { + unsigned lm=0; + for (std::size_t j = iN; --j > k; ) + if (fill[k][j] < M::FILL_INFINITY) + lm = std::max(lm, levU[j]); + levU[k] = 1+lm; + } + for (std::size_t k = 0; k < iN; k++) + { + unsigned fm = 0; + pstring ml = ""; + for (std::size_t j = 0; j < iN; j++) + { + ml += fill[k][j] == 0 ? 'X' : fill[k][j] < M::FILL_INFINITY ? '+' : '.'; + if (fill[k][j] < M::FILL_INFINITY) + if (fill[k][j] > fm) + fm = fill[k][j]; + } + this->log().verbose("{1:4} {2} {3:4} {4:4} {5:4} {6:4}", k, ml, levL[k], levU[k], mat.get_parallel_level(k), fm); + } + } + template using aligned_alloc = plib::aligned_allocator; @@ -296,10 +359,8 @@ namespace devices plib::pmatrix2d> m_mat_ptr; plib::pmatrix2d> m_connected_net_Vn; - plib::pmatrix2d m_test; - std::vector> m_terms; - std::vector m_nets; + std::vector> m_inps; std::vector> m_rails_temp; @@ -340,7 +401,7 @@ namespace devices const std::size_t iN = this->m_terms.size(); typename std::decay::type cerr = 0; for (std::size_t i = 0; i < iN; i++) - cerr = std::max(cerr, std::abs(V[i] - this->m_nets[i]->Q_Analog())); + cerr = std::max(cerr, std::abs(V[i] - this->m_terms[i]->getV())); return cerr; } @@ -349,7 +410,7 @@ namespace devices { const std::size_t iN = this->m_terms.size(); for (std::size_t i = 0; i < iN; i++) - this->m_nets[i]->set_Q_Analog(V[i]); + this->m_terms[i]->setV(V[i]); } template diff --git a/src/lib/netlist/solver/nld_ms_direct.h b/src/lib/netlist/solver/nld_ms_direct.h index 521fa21fe81..54deef49256 100644 --- a/src/lib/netlist/solver/nld_ms_direct.h +++ b/src/lib/netlist/solver/nld_ms_direct.h @@ -10,8 +10,8 @@ #include "nld_matrix_solver.h" #include "nld_solver.h" -#include "plib/mat_cr.h" #include "plib/vector_ops.h" +#include "plib/parray.h" #include #include diff --git a/src/lib/netlist/solver/nld_ms_gcr.h b/src/lib/netlist/solver/nld_ms_gcr.h index 20ff2314afe..4767168cdce 100644 --- a/src/lib/netlist/solver/nld_ms_gcr.h +++ b/src/lib/netlist/solver/nld_ms_gcr.h @@ -56,9 +56,7 @@ namespace devices using mat_index_type = typename plib::matrix_compressed_rows_t::index_type; - void csc_private(plib::putf8_fmt_writer &strm); - - using extsolver = void (*)(double * m_A, double * RHS, double * V); + void generate_code(plib::putf8_fmt_writer &strm); pstring static_compile_name(); @@ -68,7 +66,6 @@ namespace devices mat_type mat; - //extsolver m_proc; plib::dynproc m_proc; }; @@ -77,16 +74,6 @@ namespace devices // matrix_solver - GCR // ---------------------------------------------------------------------------------------- - // FIXME: namespace or static class member - template - std::size_t inline get_level(const V &v, std::size_t k) - { - for (std::size_t i = 0; i < v.size(); i++) - if (plib::container::contains(v[i], k)) - return i; - throw plib::pexception("Error in get_level"); - } - template void matrix_solver_GCR_t::vsetup(analog_net_t::list_t &nets) { @@ -113,48 +100,7 @@ namespace devices auto gr = mat.gaussian_extend_fill_mat(fill); - /* FIXME: move this to the cr matrix class and use computed - * parallel ordering once it makes sense. - */ - - std::vector levL(iN, 0); - std::vector levU(iN, 0); - - // parallel scheme for L x = y - for (std::size_t k = 0; k < iN; k++) - { - unsigned lm=0; - for (std::size_t j = 0; j 0; ) - { - unsigned lm=0; - for (std::size_t j = iN; --j > k; ) - if (fill[k][j] < decltype(mat)::FILL_INFINITY) - lm = std::max(lm, levU[j]); - levU[k] = 1+lm; - } - - - for (std::size_t k = 0; k < iN; k++) - { - unsigned fm = 0; - pstring ml = ""; - for (std::size_t j = 0; j < iN; j++) - { - ml += fill[k][j] == 0 ? 'X' : fill[k][j] < decltype(mat)::FILL_INFINITY ? '+' : '.'; - if (fill[k][j] < decltype(mat)::FILL_INFINITY) - if (fill[k][j] > fm) - fm = fill[k][j]; - } - this->log().verbose("{1:4} {2} {3:4} {4:4} {5:4} {6:4}", k, ml, levL[k], levU[k], get_level(mat.m_ge_par, k), fm); - } - + log_fill(fill, mat); mat.build_from_fill_mat(fill); @@ -197,7 +143,7 @@ namespace devices } template - void matrix_solver_GCR_t::csc_private(plib::putf8_fmt_writer &strm) + void matrix_solver_GCR_t::generate_code(plib::putf8_fmt_writer &strm) { const std::size_t iN = N(); @@ -265,7 +211,7 @@ namespace devices std::stringstream t; t.imbue(std::locale::classic()); plib::putf8_fmt_writer w(&t); - csc_private(w); + generate_code(w); std::hash::type>::type> h; return plib::pfmt("nl_gcr_{1:x}_{2}")(h( t.str() ))(mat.nz_num); @@ -281,7 +227,7 @@ namespace devices strm.writeline(plib::pfmt("extern \"C\" void {1}(double * __restrict m_A, double * __restrict RHS, double * __restrict V)\n")(name)); strm.writeline("{\n"); - csc_private(strm); + generate_code(strm); strm.writeline("}\n"); // some compilers (_WIN32, _WIN64, mac osx) need an explicit cast return std::pair(name, pstring(t.str())); diff --git a/src/lib/netlist/solver/nld_ms_gmres.h b/src/lib/netlist/solver/nld_ms_gmres.h index fc750e6822d..48e0cc9acb4 100644 --- a/src/lib/netlist/solver/nld_ms_gmres.h +++ b/src/lib/netlist/solver/nld_ms_gmres.h @@ -80,6 +80,7 @@ namespace devices } m_ops.build(fill); + this->log_fill(fill, m_ops.m_mat); /* build pointers into the compressed row format matrix for each terminal */ @@ -115,7 +116,7 @@ namespace devices for (std::size_t k = 0; k < iN; k++) { - this->m_new_V[k] = this->m_nets[k]->Q_Analog(); + this->m_new_V[k] = this->m_terms[k]->getV(); } const float_type accuracy = this->m_params.m_accuracy; diff --git a/src/lib/netlist/solver/nld_ms_sor.h b/src/lib/netlist/solver/nld_ms_sor.h index 0f10ca3d426..d886a28c0be 100644 --- a/src/lib/netlist/solver/nld_ms_sor.h +++ b/src/lib/netlist/solver/nld_ms_sor.h @@ -90,7 +90,7 @@ unsigned matrix_solver_SOR_t::vsolve_non_dynamic(const bool newton_rap const float_type * const Idr = this->m_Idrn[k]; auto other_cur_analog = this->m_connected_net_Vn[k]; - this->m_new_V[k] = this->m_nets[k]->Q_Analog(); + this->m_new_V[k] = this->m_terms[k]->getV(); for (std::size_t i = 0; i < term_count; i++) { diff --git a/src/lib/netlist/solver/nld_ms_sor_mat.h b/src/lib/netlist/solver/nld_ms_sor_mat.h index 46fce636b02..d0866a2ad08 100644 --- a/src/lib/netlist/solver/nld_ms_sor_mat.h +++ b/src/lib/netlist/solver/nld_ms_sor_mat.h @@ -166,7 +166,7 @@ namespace devices #endif for (std::size_t k = 0; k < iN; k++) - this->m_new_V[k] = this->m_nets[k]->Q_Analog(); + this->m_new_V[k] = this->m_terms[k]->getV(); do { resched = false; diff --git a/src/lib/netlist/solver/nld_solver.cpp b/src/lib/netlist/solver/nld_solver.cpp index 42bff9e88b4..1bb67e8875e 100644 --- a/src/lib/netlist/solver/nld_solver.cpp +++ b/src/lib/netlist/solver/nld_solver.cpp @@ -119,50 +119,33 @@ namespace devices template plib::unique_ptr NETLIB_NAME(solver)::create_solver(std::size_t size, const pstring &solvername) { - if (m_params.m_method() == "SOR_MAT") + switch (m_params.m_method()) { - return create_it>(state(), solvername, m_params, size); - //typedef matrix_solver_SOR_mat_t solver_sor_mat; - //return plib::make_unique(state(), solvername, &m_params, size); - } - else if (m_params.m_method() == "MAT_CR") - { - if (size > 0) // GCR always outperforms MAT solver - { - return create_it>(state(), solvername, m_params, size); - } - else - { + case matrix_type_e::MAT_CR: + if (size > 0) // GCR always outperforms MAT solver + { + return create_it>(state(), solvername, m_params, size); + } + else + { + return create_it>(state(), solvername, m_params, size); + } + case matrix_type_e::SOR_MAT: + return create_it>(state(), solvername, m_params, size); + case matrix_type_e::MAT: return create_it>(state(), solvername, m_params, size); - } - } - else if (m_params.m_method() == "MAT") - { - return create_it>(state(), solvername, m_params, size); - } - else if (m_params.m_method() == "SM") - { - /* Sherman-Morrison Formula */ - return create_it>(state(), solvername, m_params, size); - } - else if (m_params.m_method() == "W") - { - /* Woodbury Formula */ - return create_it>(state(), solvername, m_params, size); - } - else if (m_params.m_method() == "SOR") - { - return create_it>(state(), solvername, m_params, size); - } - else if (m_params.m_method() == "GMRES") - { - return create_it>(state(), solvername, m_params, size); - } - else - { - log().fatal(MF_UNKNOWN_SOLVER_TYPE(m_params.m_method())); - return plib::unique_ptr(); + case matrix_type_e::SM: + /* Sherman-Morrison Formula */ + return create_it>(state(), solvername, m_params, size); + case matrix_type_e::W: + /* Woodbury Formula */ + return create_it>(state(), solvername, m_params, size); + case matrix_type_e::SOR: + return create_it>(state(), solvername, m_params, size); + case matrix_type_e::GMRES: + return create_it>(state(), solvername, m_params, size); } + return plib::unique_ptr(); } struct net_splitter diff --git a/src/mame/audio/nl_kidniki.cpp b/src/mame/audio/nl_kidniki.cpp index 3721796e2ae..2226a8e8063 100644 --- a/src/mame/audio/nl_kidniki.cpp +++ b/src/mame/audio/nl_kidniki.cpp @@ -375,15 +375,15 @@ NETLIST_START(kidniki) PARAM(Solver.DYNAMIC_LTE, 5e-4) PARAM(Solver.DYNAMIC_MIN_TIMESTEP, 20e-6) PARAM(Solver.SORT_TYPE, "PREFER_IDENTITY_TOP_LEFT") + //PARAM(Solver.SORT_TYPE, "PREFER_BAND_MATRIX") #else - SOLVER(Solver, 24000) + //PARAM(Solver.SORT_TYPE, "PREFER_BAND_MATRIX") + SOLVER(Solver, 48000) PARAM(Solver.ACCURACY, 1e-7) PARAM(Solver.NR_LOOPS, 10000) - PARAM(Solver.GS_LOOPS, 300000) + PARAM(Solver.GS_LOOPS, 100) //PARAM(Solver.METHOD, "MAT_CR") PARAM(Solver.METHOD, "GMRES") - //PARAM(Solver.SOR_FACTOR, 1.73) - //PARAM(Solver.METHOD, "SOR") #endif #if (USE_FRONTIERS)