From f5bcee0db382987888d308ab61f02b43ee3b0169 Mon Sep 17 00:00:00 2001 From: couriersud Date: Fri, 25 Mar 2016 02:17:54 +0100 Subject: [PATCH] netlist: prepare code so that matrices can be allocated in one chunk of memory. --- src/lib/netlist/nl_base.h | 2 +- src/lib/netlist/solver/nld_ms_direct.h | 74 ++++++++++++++++++------- src/lib/netlist/solver/nld_ms_direct1.h | 4 +- src/lib/netlist/solver/nld_ms_direct2.h | 6 +- src/lib/netlist/solver/nld_ms_sor_mat.h | 4 +- 5 files changed, 62 insertions(+), 28 deletions(-) diff --git a/src/lib/netlist/nl_base.h b/src/lib/netlist/nl_base.h index 24f2cde585a..bf7e8e41c41 100644 --- a/src/lib/netlist/nl_base.h +++ b/src/lib/netlist/nl_base.h @@ -586,7 +586,7 @@ namespace netlist virtual void reset() override; private: - ATTR_HOT void set_ptr(nl_double *ptr, const nl_double val) + ATTR_HOT void set_ptr(nl_double *ptr, const nl_double val) { if (ptr != NULL && *ptr != val) { diff --git a/src/lib/netlist/solver/nld_ms_direct.h b/src/lib/netlist/solver/nld_ms_direct.h index bdcec31ad44..ab67795507f 100644 --- a/src/lib/netlist/solver/nld_ms_direct.h +++ b/src/lib/netlist/solver/nld_ms_direct.h @@ -13,6 +13,13 @@ #include "solver/nld_solver.h" #include "solver/vector_base.h" +/* Disabling dynamic allocation gives a ~10% boost in performance + * This flag has been added to support continuous storage for arrays + * going forward in case we implement cuda solvers in the future. + */ +#define NL_USE_DYNAMIC_ALLOCATION (0) + + NETLIB_NAMESPACE_DEVICES_START() //#define nl_ext_double __float128 // slow, very slow @@ -43,7 +50,7 @@ protected: ATTR_HOT int solve_non_dynamic(const bool newton_raphson); ATTR_HOT void build_LE_A(); - ATTR_HOT void build_LE_RHS(nl_double * RESTRICT rhs); + ATTR_HOT void build_LE_RHS(); ATTR_HOT void LE_solve(); ATTR_HOT void LE_back_subst(nl_double * RESTRICT x); @@ -60,10 +67,19 @@ protected: */ ATTR_HOT nl_double compute_next_timestep(); - template - inline nl_ext_double &A(const T1 r, const T2 c) { return m_A[r][c]; } - ATTR_ALIGN nl_double m_RHS[_storage_N]; +#if (NL_USE_DYNAMIC_ALLOCATION) + template + inline nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r * m_pitch + c]; } + template + inline nl_ext_double &RHS(const T1 &r) { return m_A[r * m_pitch + N()]; } +#else + template + inline nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; } + template + inline nl_ext_double &RHS(const T1 &r) { return m_A[r][N()]; } + +#endif ATTR_ALIGN nl_double m_last_RHS[_storage_N]; // right hand side - contains currents ATTR_ALIGN nl_double m_last_V[_storage_N]; @@ -71,7 +87,13 @@ protected: terms_t *m_rails_temp; private: - ATTR_ALIGN nl_ext_double m_A[_storage_N][((_storage_N + 7) / 8) * 8]; + static const unsigned m_pitch = (((_storage_N + 1) + 7) / 8) * 8; +#if (NL_USE_DYNAMIC_ALLOCATION) + ATTR_ALIGN nl_ext_double * RESTRICT m_A; +#else + ATTR_ALIGN nl_ext_double m_A[_storage_N][m_pitch]; +#endif + //ATTR_ALIGN nl_ext_double m_RHSx[_storage_N]; const unsigned m_dim; }; @@ -89,6 +111,9 @@ matrix_solver_direct_t::~matrix_solver_direct_t() } pfree_array(m_terms); pfree_array(m_rails_temp); +#if (NL_USE_DYNAMIC_ALLOCATION) + pfree_array(m_A); +#endif } template @@ -304,7 +329,6 @@ ATTR_COLD void matrix_solver_direct_t::vsetup(analog_net_t::lis /* * save states */ - save(NLNAME(m_RHS)); save(NLNAME(m_last_RHS)); save(NLNAME(m_last_V)); @@ -312,6 +336,8 @@ ATTR_COLD void matrix_solver_direct_t::vsetup(analog_net_t::lis { pstring num = pfmt("{1}")(k); + save(RHS(k), "RHS" + num); + save(m_terms[k]->go(),"GO" + num, m_terms[k]->count()); save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count()); save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count()); @@ -350,7 +376,7 @@ ATTR_HOT void matrix_solver_direct_t::build_LE_A() } template -ATTR_HOT void matrix_solver_direct_t::build_LE_RHS(nl_double * RESTRICT rhs) +ATTR_HOT void matrix_solver_direct_t::build_LE_RHS() { const unsigned iN = N(); for (unsigned k = 0; k < iN; k++) @@ -370,7 +396,7 @@ ATTR_HOT void matrix_solver_direct_t::build_LE_RHS(nl_double * //rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog(); rhsk_b = rhsk_b + go[i] * *other_cur_analog[i]; - rhs[k] = rhsk_a + rhsk_b; + RHS(k) = rhsk_a + rhsk_b; } } @@ -398,7 +424,7 @@ ATTR_HOT void matrix_solver_direct_t::LE_solve() for (unsigned k = 0; k < kN; k++) { std::swap(A(i,k), A(maxrow,k)); } - std::swap(m_RHS[i], m_RHS[maxrow]); + std::swap(RHS(i), RHS(maxrow)); } /* FIXME: Singular matrix? */ const nl_double f = 1.0 / A(i,i); @@ -410,14 +436,18 @@ ATTR_HOT void matrix_solver_direct_t::LE_solve() const nl_double f1 = - A(j,i) * f; if (f1 != NL_FCONST(0.0)) { - nl_double * RESTRICT pi = &m_A[i][i+1]; - nl_double * RESTRICT pj = &m_A[j][i+1]; + nl_double * RESTRICT pi = &A(i,i+1); + nl_double * RESTRICT pj = &A(j,i+1); +#if 1 + vec_add_mult_scalar(kN-i,pj,f1,pi); +#else vec_add_mult_scalar(kN-i-1,pj,f1,pi); //for (unsigned k = i+1; k < kN; k++) // pj[k] = pj[k] + pi[k] * f1; //for (unsigned k = i+1; k < kN; k++) //A(j,k) += A(i,k) * f1; - m_RHS[j] += m_RHS[i] * f1; + RHS(j) += RHS(i) * f1; +#endif } } } @@ -444,7 +474,7 @@ ATTR_HOT void matrix_solver_direct_t::LE_solve() for (unsigned k = 0; k < e; k++) A(j,p[k]) += A(i,p[k]) * f1; #endif - m_RHS[j] += m_RHS[i] * f1; + RHS(j) += RHS(i) * f1; } } } @@ -464,7 +494,7 @@ ATTR_HOT void matrix_solver_direct_t::LE_back_subst( nl_double tmp = 0; for (unsigned k = j+1; k < kN; k++) tmp += A(j,k) * x[k]; - x[j] = (m_RHS[j] - tmp) / A(j,j); + x[j] = (RHS(j) - tmp) / A(j,j); } } else @@ -481,7 +511,7 @@ ATTR_HOT void matrix_solver_direct_t::LE_back_subst( const unsigned pk = p[k]; tmp += A(j,pk) * x[pk]; } - x[j] = (m_RHS[j] - tmp) / A(j,j); + x[j] = (RHS(j) - tmp) / A(j,j); } } } @@ -505,7 +535,7 @@ ATTR_HOT void matrix_solver_direct_t::LE_back_subst_full( //ip=indx[i]; USE_PIVOT_SEARCH //sum=b[ip]; //b[ip]=b[i]; - double sum=m_RHS[i];//x[i]; + double sum=RHS(i);//x[i]; for (int j=0; j < i; j++) sum -= A(i,j) * x[j]; x[i]=sum; @@ -580,10 +610,10 @@ template ATTR_HOT inline int matrix_solver_direct_t::vsolve_non_dynamic(const bool newton_raphson) { this->build_LE_A(); - this->build_LE_RHS(m_last_RHS); + this->build_LE_RHS(); for (unsigned i=0, iN=N(); i < iN; i++) - m_RHS[i] = m_last_RHS[i]; + m_last_RHS[i] = RHS(i); this->LE_solve(); @@ -599,7 +629,9 @@ matrix_solver_direct_t::matrix_solver_direct_t(const solver_par { m_terms = palloc_array(terms_t *, N()); m_rails_temp = palloc_array(terms_t, N()); - +#if (NL_USE_DYNAMIC_ALLOCATION) + m_A = palloc_array(nl_ext_double, N() * m_pitch); +#endif for (unsigned k = 0; k < N(); k++) { m_terms[k] = palloc(terms_t); @@ -615,7 +647,9 @@ matrix_solver_direct_t::matrix_solver_direct_t(const eSolverTyp { m_terms = palloc_array(terms_t *, N()); m_rails_temp = palloc_array(terms_t, N()); - +#if (NL_USE_DYNAMIC_ALLOCATION) + m_A = palloc_array(nl_ext_double, N() * m_pitch); +#endif for (unsigned k = 0; k < N(); k++) { m_terms[k] = palloc(terms_t); diff --git a/src/lib/netlist/solver/nld_ms_direct1.h b/src/lib/netlist/solver/nld_ms_direct1.h index fe5a439a9db..81e8c329edf 100644 --- a/src/lib/netlist/solver/nld_ms_direct1.h +++ b/src/lib/netlist/solver/nld_ms_direct1.h @@ -40,10 +40,10 @@ ATTR_HOT inline int matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNUSED cons { analog_net_t *net = m_nets[0]; this->build_LE_A(); - this->build_LE_RHS(m_RHS); + this->build_LE_RHS(); //NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]); - nl_double new_val = m_RHS[0] / A(0,0); + nl_double new_val = RHS(0) / A(0,0); nl_double e = (new_val - net->Q_Analog()); nl_double cerr = nl_math::abs(e); diff --git a/src/lib/netlist/solver/nld_ms_direct2.h b/src/lib/netlist/solver/nld_ms_direct2.h index 06f00302d3c..4a6d768fe06 100644 --- a/src/lib/netlist/solver/nld_ms_direct2.h +++ b/src/lib/netlist/solver/nld_ms_direct2.h @@ -39,7 +39,7 @@ ATTR_HOT nl_double matrix_solver_direct2_t::vsolve() ATTR_HOT inline int matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED const bool newton_raphson) { build_LE_A(); - build_LE_RHS(m_RHS); + build_LE_RHS(); const nl_double a = A(0,0); const nl_double b = A(0,1); @@ -47,8 +47,8 @@ ATTR_HOT inline int matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED cons const nl_double d = A(1,1); nl_double new_val[2]; - new_val[1] = (a * m_RHS[1] - c * m_RHS[0]) / (a * d - b * c); - new_val[0] = (m_RHS[0] - b * new_val[1]) / a; + new_val[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c); + new_val[0] = (RHS(0) - b * new_val[1]) / a; if (is_dynamic()) { diff --git a/src/lib/netlist/solver/nld_ms_sor_mat.h b/src/lib/netlist/solver/nld_ms_sor_mat.h index 4312c3039be..34c40c34d79 100644 --- a/src/lib/netlist/solver/nld_ms_sor_mat.h +++ b/src/lib/netlist/solver/nld_ms_sor_mat.h @@ -129,7 +129,7 @@ ATTR_HOT inline int matrix_solver_SOR_mat_t::vsolve_non_dynamic int resched_cnt = 0; this->build_LE_A(); - this->build_LE_RHS(this->m_RHS); + this->build_LE_RHS(); #if 0 static int ws_cnt = 0; @@ -184,7 +184,7 @@ ATTR_HOT inline int matrix_solver_SOR_mat_t::vsolve_non_dynamic for (unsigned i = 0; i < e; i++) Idrive = Idrive + this->A(k,p[i]) * new_v[p[i]]; - const nl_double delta = m_omega * (this->m_RHS[k] - Idrive) / this->A(k,k); + const nl_double delta = m_omega * (this->RHS(k) - Idrive) / this->A(k,k); cerr = std::max(cerr, nl_math::abs(delta)); new_v[k] += delta; }