From d9df811529f5858a0ae737ad5dee3aecf496bfec Mon Sep 17 00:00:00 2001 From: couriersud Date: Fri, 15 Apr 2016 02:00:15 +0200 Subject: [PATCH] Significant speed improvement: - added a new solver using compressed row format - fixed sorting As a result, netlist performance on kidniki nearly doubled. The performance increase is mainly due to the fact that sorting decreases the number of operations for gaussian elimination of the kidniki matrix from ~7800 to 707. In addition, compressed row format improves L1 usage. [Couriersud] --- nl_examples/kidniki.c | 3 +- src/lib/netlist/nl_util.h | 3 + src/lib/netlist/solver/nld_ms_direct.h | 9 +- src/lib/netlist/solver/nld_ms_gcr.h | 280 ++++++++++++++++++++++++ src/lib/netlist/solver/nld_ms_gmres.h | 41 ++-- src/lib/netlist/solver/nld_ms_sor.h | 2 +- src/lib/netlist/solver/nld_ms_sor_mat.h | 4 +- src/lib/netlist/solver/nld_ms_w.h | 37 ++-- src/lib/netlist/solver/nld_solver.cpp | 22 +- src/lib/netlist/solver/vector_base.h | 20 +- src/mame/audio/irem.cpp | 4 +- 11 files changed, 349 insertions(+), 76 deletions(-) create mode 100644 src/lib/netlist/solver/nld_ms_gcr.h diff --git a/nl_examples/kidniki.c b/nl_examples/kidniki.c index 6616a5c2a5d..6b2a5c66215 100644 --- a/nl_examples/kidniki.c +++ b/nl_examples/kidniki.c @@ -15,7 +15,8 @@ NETLIST_START(dummy) PARAM(Solver.GS_LOOPS, 1) PARAM(Solver.GS_THRESHOLD, 6) //PARAM(Solver.ITERATIVE, "W") - PARAM(Solver.ITERATIVE, "MAT") + PARAM(Solver.ITERATIVE, "MAT_CR") + //PARAM(Solver.ITERATIVE, "MAT") //PARAM(Solver.ITERATIVE, "GMRES") //PARAM(Solver.ITERATIVE, "SOR") PARAM(Solver.DYNAMIC_TS, 0) diff --git a/src/lib/netlist/nl_util.h b/src/lib/netlist/nl_util.h index 50c545c22e3..ab09eac0408 100644 --- a/src/lib/netlist/nl_util.h +++ b/src/lib/netlist/nl_util.h @@ -64,6 +64,9 @@ public: template static T sqrt(const T &x) { return std::sqrt(x); } + template + static T hypot(const T &x, const T &y) { return std::hypot(x, y); } + // this one has an accuracy of better than 5%. That's enough for our purpose // add c3 and it'll be better than 1% diff --git a/src/lib/netlist/solver/nld_ms_direct.h b/src/lib/netlist/solver/nld_ms_direct.h index 211b4919dff..1f72fa081d6 100644 --- a/src/lib/netlist/solver/nld_ms_direct.h +++ b/src/lib/netlist/solver/nld_ms_direct.h @@ -307,7 +307,7 @@ void matrix_solver_direct_t::LE_solve() { x_i[p] = i; x_start[p] = chunks * p; - x_stop[p] = std::min(chunks*(p+1), eb); + x_stop[p] = nl_math::min(chunks*(p+1), eb); if (p::LE_solve() const auto &nzrd = m_terms[i]->m_nzrd; const auto &nzbd = m_terms[i]->m_nzbd; - /* Eliminate column i from row j */ - for (auto & j : nzbd) { - //__builtin_prefetch((const void*)(&A(j+2,0)),0,0); - const nl_double f1 = - A(j,i) * f; + const nl_double f1 = -f * A(j,i); for (auto & k : nzrd) A(j,k) += A(i,k) * f1; //RHS(j) += RHS(i) * f1; - } #endif + } } } } diff --git a/src/lib/netlist/solver/nld_ms_gcr.h b/src/lib/netlist/solver/nld_ms_gcr.h new file mode 100644 index 00000000000..a7a0dd52ba4 --- /dev/null +++ b/src/lib/netlist/solver/nld_ms_gcr.h @@ -0,0 +1,280 @@ +// license:GPL-2.0+ +// copyright-holders:Couriersud +/* + * nld_ms_gcr.h + * + * Gaussian elimination using compressed row format. + * + * Fow w==1 we will do the classic Gauss-Seidel approach + * + */ + +#ifndef NLD_MS_GCR_H_ +#define NLD_MS_GCR_H_ + +#include + +#include "solver/mat_cr.h" +#include "solver/nld_ms_direct.h" +#include "solver/nld_solver.h" +#include "solver/vector_base.h" + +#define NL_USE_SSE 0 + +NETLIB_NAMESPACE_DEVICES_START() + +template +class matrix_solver_GCR_t: public matrix_solver_direct_t +{ +public: + + matrix_solver_GCR_t(const solver_parameters_t *params, int size) + : matrix_solver_direct_t(matrix_solver_t::GAUSSIAN_ELIMINATION, params, size) + { + } + + virtual ~matrix_solver_GCR_t() + { + } + + virtual void vsetup(analog_net_t::list_t &nets) override; + virtual int vsolve_non_dynamic(const bool newton_raphson) override; + +private: + + pvector_t m_term_cr[_storage_N]; + mat_cr_t<_storage_N> mat; + nl_double m_A[_storage_N * _storage_N]; + +}; + +// ---------------------------------------------------------------------------------------- +// matrix_solver - GMRES +// ---------------------------------------------------------------------------------------- + +template +void matrix_solver_GCR_t::vsetup(analog_net_t::list_t &nets) +{ + matrix_solver_direct_t::vsetup(nets); + + unsigned nz = 0; + const unsigned iN = this->N(); + + /* build the final matrix */ + + bool touched[_storage_N][_storage_N] = { { false } }; + for (unsigned k = 0; k < iN; k++) + { + for (auto & j : this->m_terms[k]->m_nz) + touched[k][j] = true; + } + + unsigned ops = 0; + for (unsigned k = 0; k < iN; k++) + { + ops++; // 1/A(k,k) + for (unsigned row = k + 1; row < iN; row++) + { + if (touched[row][k]) + { + ops++; + for (unsigned col = k + 1; col < iN; col++) + if (touched[k][col]) + { + touched[row][col] = true; + ops += 2; + } + } + } + } + + + for (unsigned k=0; km_terms[k]->m_railstart;j++) + { + for (unsigned i = mat.ia[k]; im_terms[k]->net_other()[j] == (int) mat.ja[i]) + { + m_term_cr[k].push_back(i); + break; + } + nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart); + } + } + + mat.ia[iN] = nz; + mat.nz_num = nz; + + this->log().verbose("Ops: {1} Occupancy ratio: {2}\n", ops, (double) nz / double (iN * iN)); +} + +template +int matrix_solver_GCR_t::vsolve_non_dynamic(const bool newton_raphson) +{ + const unsigned iN = this->N(); + + ATTR_ALIGN nl_double RHS[_storage_N]; + ATTR_ALIGN nl_double new_V[_storage_N]; + + for (unsigned i=0, e=mat.nz_num; im_terms[k]->count(); + const unsigned railstart = this->m_terms[k]->m_railstart; + const nl_double * const RESTRICT gt = this->m_terms[k]->gt(); + const nl_double * const RESTRICT go = this->m_terms[k]->go(); + const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr(); + const nl_double * const * RESTRICT other_cur_analog = this->m_terms[k]->other_curanalog(); + +#if (NL_USE_SSE) + __m128d mg = _mm_set_pd(0.0, 0.0); + __m128d mr = _mm_set_pd(0.0, 0.0); + unsigned i = 0; + for (; i < term_count - 1; i+=2) + { + mg = _mm_add_pd(mg, _mm_loadu_pd(>[i])); + mr = _mm_add_pd(mr, _mm_loadu_pd(&Idr[i])); + } + gtot_t = _mm_cvtsd_f64(mg) + _mm_cvtsd_f64(_mm_unpackhi_pd(mg,mg)); + RHS_t = _mm_cvtsd_f64(mr) + _mm_cvtsd_f64(_mm_unpackhi_pd(mr,mr)); + for (; i < term_count; i++) + { + gtot_t += gt[i]; + RHS_t += Idr[i]; + } +#else + for (unsigned i = 0; i < term_count; i++) + { + gtot_t += gt[i]; + RHS_t += Idr[i]; + } +#endif + for (unsigned i = railstart; i < term_count; i++) + RHS_t += go[i] * *other_cur_analog[i]; + + RHS[k] = RHS_t; + + // add diagonal element + m_A[mat.diag[k]] = gtot_t; + + for (unsigned i = 0; i < railstart; i++) + { + const unsigned pi = m_term_cr[k][i]; + m_A[pi] -= go[i]; + } + } + mat.ia[iN] = mat.nz_num; + + /* now solve it */ + + for (unsigned i = 0; i < iN - 1; i++) + { + const auto &nzbd = this->m_terms[i]->m_nzbd; + + if (nzbd.size() > 0) + { + unsigned pi = mat.diag[i]; + const nl_double f = 1.0 / m_A[pi++]; + const unsigned piie = mat.ia[i+1]; + + for (auto & j : nzbd) + { + // proceed to column i + //__builtin_prefetch(&m_A[mat.diag[j+1]], 1); + unsigned pj = mat.ia[j]; + + while (mat.ja[pj] < i) + pj++; + + const nl_double f1 = - m_A[pj++] * f; + + // subtract row i from j */ + for (unsigned pii = pi; pii= 0; j--) + { + //__builtin_prefetch(&new_V[j-1], 1); + //if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0); +#if (NL_USE_SSE) + __m128d tmp = _mm_set_pd1(0.0); + const unsigned e = mat.ia[j+1]; + unsigned pk = mat.diag[j] + 1; + for (; pk < e - 1; pk+=2) + { + //tmp += m_A[pk] * new_V[mat.ja[pk]]; + tmp = _mm_add_pd(tmp, _mm_mul_pd(_mm_set_pd(m_A[pk], m_A[pk+1]), + _mm_set_pd(new_V[mat.ja[pk]], new_V[mat.ja[pk+1]]))); + } + double tmpx = _mm_cvtsd_f64(tmp) + _mm_cvtsd_f64(_mm_unpackhi_pd(tmp,tmp)); + for (; pk < e; pk++) + { + tmpx += m_A[pk] * new_V[mat.ja[pk]]; + } + new_V[j] = (RHS[j] - tmpx) / m_A[mat.diag[j]]; +#else + double tmp = 0; + const unsigned e = mat.ia[j+1]; + for (unsigned pk = mat.diag[j] + 1; pk < e; pk++) + { + tmp += m_A[pk] * new_V[mat.ja[pk]]; + } + new_V[j] = (RHS[j] - tmp) / m_A[mat.diag[j]]; +#endif + } + this->m_stat_calculations++; + + if (newton_raphson) + { + nl_double err = this->delta(new_V); + + this->store(new_V); + + return (err > this->m_params.m_accuracy) ? 2 : 1; + } + else + { + this->store(new_V); + return 1; + } +} + +NETLIB_NAMESPACE_DEVICES_END() + +#endif /* NLD_MS_GCR_H_ */ diff --git a/src/lib/netlist/solver/nld_ms_gmres.h b/src/lib/netlist/solver/nld_ms_gmres.h index 826819f756e..8c47f67e2b4 100644 --- a/src/lib/netlist/solver/nld_ms_gmres.h +++ b/src/lib/netlist/solver/nld_ms_gmres.h @@ -27,7 +27,7 @@ class matrix_solver_GMRES_t: public matrix_solver_direct_t public: matrix_solver_GMRES_t(const solver_parameters_t *params, int size) - : matrix_solver_direct_t(matrix_solver_t::GAUSS_SEIDEL, params, size) + : matrix_solver_direct_t(matrix_solver_t::GAUSSIAN_ELIMINATION, params, size) , m_use_iLU_preconditioning(true) , m_use_more_precise_stop_condition(false) , m_accuracy_mult(1.0) @@ -49,6 +49,7 @@ private: bool m_use_iLU_preconditioning; bool m_use_more_precise_stop_condition; + nl_double m_accuracy_mult; // FXIME: Save state mat_cr_t<_storage_N> mat; @@ -60,9 +61,8 @@ private: nl_double m_ht[_storage_N + 1][_storage_N]; /* (mr + 1), mr */ nl_double m_s[_storage_N + 1]; /* mr + 1 */ nl_double m_v[_storage_N + 1][_storage_N]; /*(mr + 1), n */ - //double m_y[_storage_N]; /* mr + 1 */ + nl_double m_y[_storage_N + 1]; /* mr + 1 */ - nl_double m_accuracy_mult; // FXIME: Save state }; // ---------------------------------------------------------------------------------------- @@ -124,7 +124,6 @@ int matrix_solver_GMRES_t::vsolve_non_dynamic(const bool newton //nz_num = 0; ATTR_ALIGN nl_double RHS[_storage_N]; ATTR_ALIGN nl_double new_V[_storage_N]; - ATTR_ALIGN nl_double l_V[_storage_N]; for (unsigned i=0, e=mat.nz_num; i::vsolve_non_dynamic(const bool newton const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr(); const nl_double * const * RESTRICT other_cur_analog = this->m_terms[k]->other_curanalog(); - l_V[k] = new_V[k] = this->m_nets[k]->m_cur_Analog; + new_V[k] = this->m_nets[k]->m_cur_Analog; for (unsigned i = 0; i < term_count; i++) { @@ -170,7 +169,7 @@ int matrix_solver_GMRES_t::vsolve_non_dynamic(const bool newton int mr = iN; if (iN > 3 ) mr = (int) sqrt(iN) * 2; - int iter = std::max(1, this->m_params.m_gs_loops); + int iter = nl_math::max(1, this->m_params.m_gs_loops); int gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy); int failed = mr * iter; @@ -185,21 +184,15 @@ int matrix_solver_GMRES_t::vsolve_non_dynamic(const bool newton if (newton_raphson) { - nl_double err = 0; - for (unsigned k = 0; k < iN; k++) - err = std::max(nl_math::abs(l_V[k] - new_V[k]), err); + nl_double err = this->delta(new_V); - for (unsigned k = 0; k < iN; k++) - this->m_nets[k]->m_cur_Analog += 1.0 * (new_V[k] - this->m_nets[k]->m_cur_Analog); - if (err > accuracy) - return 2; - else - return 1; + this->store(new_V); + + return (err > this->m_params.m_accuracy) ? 2 : 1; } else { - for (unsigned k = 0; k < iN; k++) - this->m_nets[k]->m_cur_Analog = new_V[k]; + this->store(new_V); return 1; } } @@ -266,12 +259,12 @@ int matrix_solver_GMRES_t::solve_ilu_gmres (nl_double * RESTRIC mat.mult_vec(m_A, t, Ax); mat.solveLUx(m_LU, Ax); - const nl_double rho_to_accuracy = std::sqrt(vecmult2(n, Ax)) / accuracy; + const nl_double rho_to_accuracy = nl_math::sqrt(vecmult2(n, Ax)) / accuracy; rho_delta = accuracy * rho_to_accuracy; } else - rho_delta = accuracy * std::sqrt((double) n) * m_accuracy_mult; + rho_delta = accuracy * nl_math::sqrt((double) n) * m_accuracy_mult; for (unsigned itr = 0; itr < restart_max; itr++) { @@ -291,7 +284,7 @@ int matrix_solver_GMRES_t::solve_ilu_gmres (nl_double * RESTRIC mat.solveLUx(m_LU, residual); } - rho = std::sqrt(vecmult2(n, residual)); + rho = nl_math::sqrt(vecmult2(n, residual)); vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]); @@ -315,7 +308,7 @@ int matrix_solver_GMRES_t::solve_ilu_gmres (nl_double * RESTRIC m_ht[j][k] = vecmult(n, m_v[k1], m_v[j]); vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]); } - m_ht[k1][k] = std::sqrt(vecmult2(n, m_v[k1])); + m_ht[k1][k] = nl_math::sqrt(vecmult2(n, m_v[k1])); if (m_ht[k1][k] != 0.0) vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]); @@ -323,7 +316,7 @@ int matrix_solver_GMRES_t::solve_ilu_gmres (nl_double * RESTRIC for (unsigned j = 0; j < k; j++) givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]); - mu = std::hypot(m_ht[k][k], m_ht[k1][k]); + mu = nl_math::hypot(m_ht[k][k], m_ht[k1][k]); m_c[k] = m_ht[k][k] / mu; m_s[k] = -m_ht[k1][k] / mu; @@ -332,7 +325,7 @@ int matrix_solver_GMRES_t::solve_ilu_gmres (nl_double * RESTRIC givens_mult(m_c[k], m_s[k], m_g[k], m_g[k1]); - rho = std::abs(m_g[k1]); + rho = nl_math::abs(m_g[k1]); itr_used = itr_used + 1; @@ -347,8 +340,6 @@ int matrix_solver_GMRES_t::solve_ilu_gmres (nl_double * RESTRIC /* didn't converge within accuracy */ last_k = mr - 1; - nl_double m_y[_storage_N + 1]; - /* Solve the system H * y = g */ /* x += m_v[j] * m_y[j] */ for (int i = last_k; i >= 0; i--) diff --git a/src/lib/netlist/solver/nld_ms_sor.h b/src/lib/netlist/solver/nld_ms_sor.h index 1805b23a9d0..801f2efa1a0 100644 --- a/src/lib/netlist/solver/nld_ms_sor.h +++ b/src/lib/netlist/solver/nld_ms_sor.h @@ -146,7 +146,7 @@ int matrix_solver_SOR_t::vsolve_non_dynamic(const bool newton_r const nl_double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k]; - err = std::max(nl_math::abs(new_val - new_V[k]), err); + err = nl_math::max(nl_math::abs(new_val - new_V[k]), err); new_V[k] = new_val; } diff --git a/src/lib/netlist/solver/nld_ms_sor_mat.h b/src/lib/netlist/solver/nld_ms_sor_mat.h index 7f615fa4ac4..4c6ceee1eb1 100644 --- a/src/lib/netlist/solver/nld_ms_sor_mat.h +++ b/src/lib/netlist/solver/nld_ms_sor_mat.h @@ -106,7 +106,7 @@ nl_double matrix_solver_SOR_mat_t::vsolve() // FIXME: used to be 1e90, but this would not be compatible with float if (sqo > NL_FCONST(1e-20)) - m_lp_fact = std::min(nl_math::sqrt(sq/sqo), (nl_double) 2.0); + m_lp_fact = nl_math::min(nl_math::sqrt(sq/sqo), (nl_double) 2.0); else m_lp_fact = NL_FCONST(0.0); } @@ -190,7 +190,7 @@ int matrix_solver_SOR_mat_t::vsolve_non_dynamic(const bool newt Idrive = Idrive + this->A(k,p[i]) * new_v[p[i]]; const nl_double delta = m_omega * (this->RHS(k) - Idrive) / this->A(k,k); - cerr = std::max(cerr, nl_math::abs(delta)); + cerr = nl_math::max(cerr, nl_math::abs(delta)); new_v[k] += delta; } diff --git a/src/lib/netlist/solver/nld_ms_w.h b/src/lib/netlist/solver/nld_ms_w.h index cd5af327b25..bfb55989765 100644 --- a/src/lib/netlist/solver/nld_ms_w.h +++ b/src/lib/netlist/solver/nld_ms_w.h @@ -125,9 +125,6 @@ private: template matrix_solver_w_t::~matrix_solver_w_t() { -#if (NL_USE_DYNAMIC_ALLOCATION) - pfree_array(m_A); -#endif } template @@ -276,17 +273,22 @@ int matrix_solver_w_t::solve_non_dynamic(ATTR_UNUSED const bool /* construct w = transform(V) * y * dim: rowcount x iN * */ - nl_double w[_storage_N] = {0}; + nl_double w[_storage_N]; for (unsigned i = 0; i < rowcount; i++) + { + const unsigned r = rows[i]; + double tmp = 0.0; for (unsigned k = 0; k < iN; k++) - w[i] += VT(rows[i],k) * new_V[k]; + tmp += VT(r,k) * new_V[k]; + w[i] = tmp; + } for (unsigned i = 0; i < rowcount; i++) for (unsigned k=0; k< rowcount; k++) H[i][k] = 0.0; for (unsigned i = 0; i < rowcount; i++) - H[i][i] += 1.0; + H[i][i] = 1.0; /* Construct H = (I + VT*Z) */ for (unsigned i = 0; i < rowcount; i++) for (unsigned k=0; k< colcount[i]; k++) @@ -348,16 +350,17 @@ int matrix_solver_w_t::solve_non_dynamic(ATTR_UNUSED const bool } m_cnt++; - for (unsigned i=0; i 1e-6) + printf("%s failed on row %d: %f RHS: %f\n", this->name().cstr(), i, nl_math::abs(tmp-RHS(i)), RHS(i)); } - if (std::fabs(tmp-RHS(i)) > 1e-6) - printf("%s failed on row %d: %f RHS: %f\n", this->name().cstr(), i, std::fabs(tmp-RHS(i)), RHS(i)); - } if (newton_raphson) { nl_double err = delta(new_V); @@ -392,9 +395,6 @@ matrix_solver_w_t::matrix_solver_w_t(const solver_parameters_t ,m_cnt(0) , m_dim(size) { -#if (NL_USE_DYNAMIC_ALLOCATION) - m_A = palloc_array(nl_ext_double, N() * m_pitch); -#endif for (unsigned k = 0; k < N(); k++) { m_last_RHS[k] = 0.0; @@ -407,9 +407,6 @@ matrix_solver_w_t::matrix_solver_w_t(const eSolverType type, co ,m_cnt(0) , m_dim(size) { -#if (NL_USE_DYNAMIC_ALLOCATION) - m_A = palloc_array(nl_ext_double, N() * m_pitch); -#endif for (unsigned k = 0; k < N(); k++) { m_last_RHS[k] = 0.0; diff --git a/src/lib/netlist/solver/nld_solver.cpp b/src/lib/netlist/solver/nld_solver.cpp index 216a2566c05..5924966efa2 100644 --- a/src/lib/netlist/solver/nld_solver.cpp +++ b/src/lib/netlist/solver/nld_solver.cpp @@ -43,7 +43,7 @@ #if 1 #include "nld_ms_direct.h" -//#include "nld_ms_gcr.h" +#include "nld_ms_gcr.h" #else #include "nld_ms_direct_lu.h" #endif @@ -223,7 +223,7 @@ ATTR_COLD void matrix_solver_t::setup_matrix() pfree(m_rails_temp[k]); // no longer needed m_rails_temp.clear(); -#if 0 +#if 1 /* Sort in descending order by number of connected matrix voltages. * The idea is, that for Gauss-Seidel algo the first voltage computed @@ -245,15 +245,15 @@ ATTR_COLD void matrix_solver_t::setup_matrix() * */ - int sort_order = (type() == GAUSS_SEIDEL ? 1 : -1) * -1; + int sort_order = -1;//(type() == GAUSS_SEIDEL ? 1 : -1); - for (unsigned k = 0; k < iN / 2; k++) - for (unsigned i = 0; i < iN - 1; i++) + for (unsigned k = 0; k < iN - 1; k++) + for (unsigned i = k+1; i < iN; i++) { - if ((m_terms[i]->m_railstart - m_terms[i+1]->m_railstart) * sort_order < 0) + if (((int) m_terms[k]->m_railstart - (int) m_terms[i]->m_railstart) * sort_order < 0) { - std::swap(m_terms[i], m_terms[i+1]); - std::swap(m_nets[i], m_nets[i+1]); + std::swap(m_terms[i], m_terms[k]); + std::swap(m_nets[i], m_nets[k]); } } @@ -720,10 +720,14 @@ matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const bool use_sp typedef matrix_solver_SOR_mat_t solver_sor_mat; return palloc(solver_sor_mat(&m_params, size)); } + else if (pstring("MAT_CR").equals(m_iterative_solver)) + { + typedef matrix_solver_GCR_t solver_mat; + return palloc(solver_mat(&m_params, size)); + } else if (pstring("MAT").equals(m_iterative_solver)) { typedef matrix_solver_direct_t solver_mat; - //typedef matrix_solver_GCR_t solver_mat; return palloc(solver_mat(&m_params, size)); } else if (pstring("SM").equals(m_iterative_solver)) diff --git a/src/lib/netlist/solver/vector_base.h b/src/lib/netlist/solver/vector_base.h index c1449aad595..1e8e33388ad 100644 --- a/src/lib/netlist/solver/vector_base.h +++ b/src/lib/netlist/solver/vector_base.h @@ -36,14 +36,14 @@ private: #endif template -inline void vec_set (const std::size_t n, const T &scalar, T * RESTRICT result) +inline void vec_set (const std::size_t & n, const T &scalar, T * RESTRICT result) { for ( std::size_t i = 0; i < n; i++ ) result[i] = scalar; } template -inline T vecmult (const std::size_t n, const T * RESTRICT a1, const T * RESTRICT a2 ) +inline T vecmult (const std::size_t & n, const T * RESTRICT a1, const T * RESTRICT a2 ) { T value = 0.0; for ( std::size_t i = 0; i < n; i++ ) @@ -52,7 +52,7 @@ inline T vecmult (const std::size_t n, const T * RESTRICT a1, const T * RESTRICT } template -inline T vecmult2 (const std::size_t n, const T *a1) +inline T vecmult2 (const std::size_t & n, const T *a1) { T value = 0.0; for ( std::size_t i = 0; i < n; i++ ) @@ -64,7 +64,7 @@ inline T vecmult2 (const std::size_t n, const T *a1) } template -inline void vec_mult_scalar (const std::size_t n, const T * RESTRICT v, const T scalar, T * RESTRICT result) +inline void vec_mult_scalar (const std::size_t & n, const T * RESTRICT v, const T & scalar, T * RESTRICT result) { for ( std::size_t i = 0; i < n; i++ ) { @@ -73,37 +73,37 @@ inline void vec_mult_scalar (const std::size_t n, const T * RESTRICT v, const T } template -inline void vec_add_mult_scalar (const std::size_t n, const T * RESTRICT v, const T scalar, T * RESTRICT result) +inline void vec_add_mult_scalar (const std::size_t & n, const T * RESTRICT v, const T scalar, T * RESTRICT result) { for ( std::size_t i = 0; i < n; i++ ) result[i] += scalar * v[i]; } -inline void vec_add_ip(const std::size_t n, const double * RESTRICT v, double * RESTRICT result) +inline void vec_add_ip(const std::size_t & n, const double * RESTRICT v, double * RESTRICT result) { for ( std::size_t i = 0; i < n; i++ ) result[i] += v[i]; } template -inline void vec_sub(const std::size_t n, const T * RESTRICT v1, const T * RESTRICT v2, T * RESTRICT result) +inline void vec_sub(const std::size_t & n, const T * RESTRICT v1, const T * RESTRICT v2, T * RESTRICT result) { for ( std::size_t i = 0; i < n; i++ ) result[i] = v1[i] - v2[i]; } template -inline void vec_scale (const std::size_t n, T * RESTRICT v, const T scalar) +inline void vec_scale (const std::size_t & n, T * RESTRICT v, const T scalar) { for ( std::size_t i = 0; i < n; i++ ) v[i] = scalar * v[i]; } -inline double vec_maxabs(const std::size_t n, const double * RESTRICT v) +inline double vec_maxabs(const std::size_t & n, const double * RESTRICT v) { double ret = 0.0; for ( std::size_t i = 0; i < n; i++ ) - ret = std::max(ret, std::abs(v[i])); + ret = nl_math::max(ret, nl_math::abs(v[i])); return ret; } diff --git a/src/mame/audio/irem.cpp b/src/mame/audio/irem.cpp index 0a7469091fc..19306bb804e 100644 --- a/src/mame/audio/irem.cpp +++ b/src/mame/audio/irem.cpp @@ -419,8 +419,8 @@ NETLIST_START(kidniki_interface) PARAM(Solver.NR_LOOPS, 300) PARAM(Solver.GS_LOOPS, 1) PARAM(Solver.GS_THRESHOLD, 6) - PARAM(Solver.ITERATIVE, "SOR") - //PARAM(Solver.ITERATIVE, "MAT") + //PARAM(Solver.ITERATIVE, "SOR") + PARAM(Solver.ITERATIVE, "MAT_CR") //PARAM(Solver.ITERATIVE, "GMRES") PARAM(Solver.PARALLEL, 0) PARAM(Solver.SOR_FACTOR, 1.00)