From 1d875f33fe9ee512644eb069ea59e1b2bd504652 Mon Sep 17 00:00:00 2001 From: couriersud Date: Thu, 20 Aug 2015 00:31:18 +0200 Subject: [PATCH] Increase performance from 53% to 69% for matrix size around 30x30. These matrices are e.g. used in congo bongo without optimisation. (nw) --- nl_examples/congo_bongo.c | 2 +- src/emu/netlist/solver/nld_ms_direct.h | 60 +++++++++++++++----------- src/emu/netlist/solver/nld_solver.h | 3 +- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/nl_examples/congo_bongo.c b/nl_examples/congo_bongo.c index b227d742d44..facd4054c00 100644 --- a/nl_examples/congo_bongo.c +++ b/nl_examples/congo_bongo.c @@ -101,7 +101,7 @@ NETLIST_START(dummy) PARAM(XU13.D.MODEL, "MB3614(TYPE=1)") #endif -#if 0 +#if 1 OPTIMIZE_FRONTIER(C51.1, RES_K(20), 50) OPTIMIZE_FRONTIER(R77.2, RES_K(20), 50) diff --git a/src/emu/netlist/solver/nld_ms_direct.h b/src/emu/netlist/solver/nld_ms_direct.h index 2ee7a939e9d..fa570995f36 100644 --- a/src/emu/netlist/solver/nld_ms_direct.h +++ b/src/emu/netlist/solver/nld_ms_direct.h @@ -265,6 +265,33 @@ ATTR_COLD void matrix_solver_direct_t::vsetup(analog_net_t::lis psort_list(t->m_nz); } + /* create a list of non zero elements below diagonal k + * This should reduce cache misses ... + */ + + bool touched[_storage_N][_storage_N] = { false }; + for (unsigned k = 0; k < N(); k++) + { + m_terms[k]->m_nzbd.clear(); + for (unsigned j = 0; j < m_terms[k]->m_nz.size(); j++) + touched[k][m_terms[k]->m_nz[j]] = true; + } + + for (unsigned k = 0; k < N(); k++) + { + for (unsigned row = k + 1; row < N(); row++) + { + if (touched[row][k]) + { + if (!m_terms[k]->m_nzbd.contains(row)) + m_terms[k]->m_nzbd.add(row); + for (unsigned col = k; col < N(); col++) + if (touched[k][col]) + touched[row][col] = true; + } + } + } + if (0) for (unsigned k = 0; k < N(); k++) { @@ -393,37 +420,22 @@ ATTR_HOT void matrix_solver_direct_t::LE_solve() { /* FIXME: Singular matrix? */ const nl_double f = 1.0 / A(i,i); - const unsigned *p = m_terms[i]->m_nzrd.data(); + const unsigned * RESTRICT const p = m_terms[i]->m_nzrd.data(); const unsigned e = m_terms[i]->m_nzrd.size(); /* Eliminate column i from row j */ - for (unsigned j = i + 1; j < kN; j++) + const unsigned * RESTRICT const pb = m_terms[i]->m_nzbd.data(); + const unsigned eb = m_terms[i]->m_nzbd.size(); + for (unsigned jb = 0; jb < eb; jb++) { - if (A(j,i) != NL_FCONST(0.0)) + const unsigned j = pb[jb]; + const nl_double f1 = - A(j,i) * f; + for (unsigned k = 0; k < e; k++) { - const nl_double f1 = - A(j,i) * f; - #if 0 - /* The code below is 30% faster than the original - * implementation which is given here for reference. - * - * for (unsigned k = i + 1; k < kN; k++) - * m_A[j][k] = m_A[j][k] + m_A[i][k] * f1; - */ - double * RESTRICT d = &m_A[j][i+1]; - const double * RESTRICT s = &m_A[i][i+1]; - const int e = kN - i - 1; - for (int k = 0; k < e; k++) - d[k] = d[k] + s[k] * f1; - #else - for (unsigned k = 0; k < e; k++) - { - const unsigned pk = p[k]; - A(j,pk) += A(i,pk) * f1; - } - #endif - m_RHS[j] += m_RHS[i] * f1; + A(j,p[k]) += A(i,p[k]) * f1; } + m_RHS[j] += m_RHS[i] * f1; } } } diff --git a/src/emu/netlist/solver/nld_solver.h b/src/emu/netlist/solver/nld_solver.h index 19dfa87ed11..cf03708582f 100644 --- a/src/emu/netlist/solver/nld_solver.h +++ b/src/emu/netlist/solver/nld_solver.h @@ -84,8 +84,9 @@ class terms_t unsigned m_railstart; - plist_t m_nzrd; /* non zero right of the diagonal for elimination */ plist_t m_nz; /* all non zero for multiplication */ + plist_t m_nzrd; /* non zero right of the diagonal for elimination */ + plist_t m_nzbd; /* non zero below of the diagonal for elimination */ private: plist_t m_term; plist_t m_net_other;