mirror of
https://github.com/holub/mame
synced 2025-06-30 16:00:01 +03:00
netlist: prepare code so that matrices can be allocated in one chunk of
memory.
This commit is contained in:
parent
2ac1e54bd9
commit
f5bcee0db3
@ -586,7 +586,7 @@ namespace netlist
|
|||||||
|
|
||||||
virtual void reset() override;
|
virtual void reset() override;
|
||||||
private:
|
private:
|
||||||
ATTR_HOT void set_ptr(nl_double *ptr, const nl_double val)
|
ATTR_HOT void set_ptr(nl_double *ptr, const nl_double val)
|
||||||
{
|
{
|
||||||
if (ptr != NULL && *ptr != val)
|
if (ptr != NULL && *ptr != val)
|
||||||
{
|
{
|
||||||
|
@ -13,6 +13,13 @@
|
|||||||
#include "solver/nld_solver.h"
|
#include "solver/nld_solver.h"
|
||||||
#include "solver/vector_base.h"
|
#include "solver/vector_base.h"
|
||||||
|
|
||||||
|
/* Disabling dynamic allocation gives a ~10% boost in performance
|
||||||
|
* This flag has been added to support continuous storage for arrays
|
||||||
|
* going forward in case we implement cuda solvers in the future.
|
||||||
|
*/
|
||||||
|
#define NL_USE_DYNAMIC_ALLOCATION (0)
|
||||||
|
|
||||||
|
|
||||||
NETLIB_NAMESPACE_DEVICES_START()
|
NETLIB_NAMESPACE_DEVICES_START()
|
||||||
|
|
||||||
//#define nl_ext_double __float128 // slow, very slow
|
//#define nl_ext_double __float128 // slow, very slow
|
||||||
@ -43,7 +50,7 @@ protected:
|
|||||||
|
|
||||||
ATTR_HOT int solve_non_dynamic(const bool newton_raphson);
|
ATTR_HOT int solve_non_dynamic(const bool newton_raphson);
|
||||||
ATTR_HOT void build_LE_A();
|
ATTR_HOT void build_LE_A();
|
||||||
ATTR_HOT void build_LE_RHS(nl_double * RESTRICT rhs);
|
ATTR_HOT void build_LE_RHS();
|
||||||
ATTR_HOT void LE_solve();
|
ATTR_HOT void LE_solve();
|
||||||
ATTR_HOT void LE_back_subst(nl_double * RESTRICT x);
|
ATTR_HOT void LE_back_subst(nl_double * RESTRICT x);
|
||||||
|
|
||||||
@ -60,10 +67,19 @@ protected:
|
|||||||
*/
|
*/
|
||||||
ATTR_HOT nl_double compute_next_timestep();
|
ATTR_HOT nl_double compute_next_timestep();
|
||||||
|
|
||||||
template <typename T1, typename T2>
|
|
||||||
inline nl_ext_double &A(const T1 r, const T2 c) { return m_A[r][c]; }
|
|
||||||
|
|
||||||
ATTR_ALIGN nl_double m_RHS[_storage_N];
|
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||||
|
template <typename T1, typename T2>
|
||||||
|
inline nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r * m_pitch + c]; }
|
||||||
|
template <typename T1>
|
||||||
|
inline nl_ext_double &RHS(const T1 &r) { return m_A[r * m_pitch + N()]; }
|
||||||
|
#else
|
||||||
|
template <typename T1, typename T2>
|
||||||
|
inline nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
|
||||||
|
template <typename T1>
|
||||||
|
inline nl_ext_double &RHS(const T1 &r) { return m_A[r][N()]; }
|
||||||
|
|
||||||
|
#endif
|
||||||
ATTR_ALIGN nl_double m_last_RHS[_storage_N]; // right hand side - contains currents
|
ATTR_ALIGN nl_double m_last_RHS[_storage_N]; // right hand side - contains currents
|
||||||
ATTR_ALIGN nl_double m_last_V[_storage_N];
|
ATTR_ALIGN nl_double m_last_V[_storage_N];
|
||||||
|
|
||||||
@ -71,7 +87,13 @@ protected:
|
|||||||
terms_t *m_rails_temp;
|
terms_t *m_rails_temp;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ATTR_ALIGN nl_ext_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
|
static const unsigned m_pitch = (((_storage_N + 1) + 7) / 8) * 8;
|
||||||
|
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||||
|
ATTR_ALIGN nl_ext_double * RESTRICT m_A;
|
||||||
|
#else
|
||||||
|
ATTR_ALIGN nl_ext_double m_A[_storage_N][m_pitch];
|
||||||
|
#endif
|
||||||
|
//ATTR_ALIGN nl_ext_double m_RHSx[_storage_N];
|
||||||
|
|
||||||
const unsigned m_dim;
|
const unsigned m_dim;
|
||||||
};
|
};
|
||||||
@ -89,6 +111,9 @@ matrix_solver_direct_t<m_N, _storage_N>::~matrix_solver_direct_t()
|
|||||||
}
|
}
|
||||||
pfree_array(m_terms);
|
pfree_array(m_terms);
|
||||||
pfree_array(m_rails_temp);
|
pfree_array(m_rails_temp);
|
||||||
|
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||||
|
pfree_array(m_A);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned m_N, unsigned _storage_N>
|
template <unsigned m_N, unsigned _storage_N>
|
||||||
@ -304,7 +329,6 @@ ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::vsetup(analog_net_t::lis
|
|||||||
/*
|
/*
|
||||||
* save states
|
* save states
|
||||||
*/
|
*/
|
||||||
save(NLNAME(m_RHS));
|
|
||||||
save(NLNAME(m_last_RHS));
|
save(NLNAME(m_last_RHS));
|
||||||
save(NLNAME(m_last_V));
|
save(NLNAME(m_last_V));
|
||||||
|
|
||||||
@ -312,6 +336,8 @@ ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::vsetup(analog_net_t::lis
|
|||||||
{
|
{
|
||||||
pstring num = pfmt("{1}")(k);
|
pstring num = pfmt("{1}")(k);
|
||||||
|
|
||||||
|
save(RHS(k), "RHS" + num);
|
||||||
|
|
||||||
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
|
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
|
||||||
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
|
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
|
||||||
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
|
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
|
||||||
@ -350,7 +376,7 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_A()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned m_N, unsigned _storage_N>
|
template <unsigned m_N, unsigned _storage_N>
|
||||||
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS(nl_double * RESTRICT rhs)
|
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS()
|
||||||
{
|
{
|
||||||
const unsigned iN = N();
|
const unsigned iN = N();
|
||||||
for (unsigned k = 0; k < iN; k++)
|
for (unsigned k = 0; k < iN; k++)
|
||||||
@ -370,7 +396,7 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS(nl_double *
|
|||||||
//rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
|
//rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
|
||||||
rhsk_b = rhsk_b + go[i] * *other_cur_analog[i];
|
rhsk_b = rhsk_b + go[i] * *other_cur_analog[i];
|
||||||
|
|
||||||
rhs[k] = rhsk_a + rhsk_b;
|
RHS(k) = rhsk_a + rhsk_b;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -398,7 +424,7 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
|||||||
for (unsigned k = 0; k < kN; k++) {
|
for (unsigned k = 0; k < kN; k++) {
|
||||||
std::swap(A(i,k), A(maxrow,k));
|
std::swap(A(i,k), A(maxrow,k));
|
||||||
}
|
}
|
||||||
std::swap(m_RHS[i], m_RHS[maxrow]);
|
std::swap(RHS(i), RHS(maxrow));
|
||||||
}
|
}
|
||||||
/* FIXME: Singular matrix? */
|
/* FIXME: Singular matrix? */
|
||||||
const nl_double f = 1.0 / A(i,i);
|
const nl_double f = 1.0 / A(i,i);
|
||||||
@ -410,14 +436,18 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
|||||||
const nl_double f1 = - A(j,i) * f;
|
const nl_double f1 = - A(j,i) * f;
|
||||||
if (f1 != NL_FCONST(0.0))
|
if (f1 != NL_FCONST(0.0))
|
||||||
{
|
{
|
||||||
nl_double * RESTRICT pi = &m_A[i][i+1];
|
nl_double * RESTRICT pi = &A(i,i+1);
|
||||||
nl_double * RESTRICT pj = &m_A[j][i+1];
|
nl_double * RESTRICT pj = &A(j,i+1);
|
||||||
|
#if 1
|
||||||
|
vec_add_mult_scalar(kN-i,pj,f1,pi);
|
||||||
|
#else
|
||||||
vec_add_mult_scalar(kN-i-1,pj,f1,pi);
|
vec_add_mult_scalar(kN-i-1,pj,f1,pi);
|
||||||
//for (unsigned k = i+1; k < kN; k++)
|
//for (unsigned k = i+1; k < kN; k++)
|
||||||
// pj[k] = pj[k] + pi[k] * f1;
|
// pj[k] = pj[k] + pi[k] * f1;
|
||||||
//for (unsigned k = i+1; k < kN; k++)
|
//for (unsigned k = i+1; k < kN; k++)
|
||||||
//A(j,k) += A(i,k) * f1;
|
//A(j,k) += A(i,k) * f1;
|
||||||
m_RHS[j] += m_RHS[i] * f1;
|
RHS(j) += RHS(i) * f1;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -444,7 +474,7 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
|||||||
for (unsigned k = 0; k < e; k++)
|
for (unsigned k = 0; k < e; k++)
|
||||||
A(j,p[k]) += A(i,p[k]) * f1;
|
A(j,p[k]) += A(i,p[k]) * f1;
|
||||||
#endif
|
#endif
|
||||||
m_RHS[j] += m_RHS[i] * f1;
|
RHS(j) += RHS(i) * f1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -464,7 +494,7 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst(
|
|||||||
nl_double tmp = 0;
|
nl_double tmp = 0;
|
||||||
for (unsigned k = j+1; k < kN; k++)
|
for (unsigned k = j+1; k < kN; k++)
|
||||||
tmp += A(j,k) * x[k];
|
tmp += A(j,k) * x[k];
|
||||||
x[j] = (m_RHS[j] - tmp) / A(j,j);
|
x[j] = (RHS(j) - tmp) / A(j,j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -481,7 +511,7 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst(
|
|||||||
const unsigned pk = p[k];
|
const unsigned pk = p[k];
|
||||||
tmp += A(j,pk) * x[pk];
|
tmp += A(j,pk) * x[pk];
|
||||||
}
|
}
|
||||||
x[j] = (m_RHS[j] - tmp) / A(j,j);
|
x[j] = (RHS(j) - tmp) / A(j,j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -505,7 +535,7 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst_full(
|
|||||||
//ip=indx[i]; USE_PIVOT_SEARCH
|
//ip=indx[i]; USE_PIVOT_SEARCH
|
||||||
//sum=b[ip];
|
//sum=b[ip];
|
||||||
//b[ip]=b[i];
|
//b[ip]=b[i];
|
||||||
double sum=m_RHS[i];//x[i];
|
double sum=RHS(i);//x[i];
|
||||||
for (int j=0; j < i; j++)
|
for (int j=0; j < i; j++)
|
||||||
sum -= A(i,j) * x[j];
|
sum -= A(i,j) * x[j];
|
||||||
x[i]=sum;
|
x[i]=sum;
|
||||||
@ -580,10 +610,10 @@ template <unsigned m_N, unsigned _storage_N>
|
|||||||
ATTR_HOT inline int matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
ATTR_HOT inline int matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||||
{
|
{
|
||||||
this->build_LE_A();
|
this->build_LE_A();
|
||||||
this->build_LE_RHS(m_last_RHS);
|
this->build_LE_RHS();
|
||||||
|
|
||||||
for (unsigned i=0, iN=N(); i < iN; i++)
|
for (unsigned i=0, iN=N(); i < iN; i++)
|
||||||
m_RHS[i] = m_last_RHS[i];
|
m_last_RHS[i] = RHS(i);
|
||||||
|
|
||||||
this->LE_solve();
|
this->LE_solve();
|
||||||
|
|
||||||
@ -599,7 +629,9 @@ matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const solver_par
|
|||||||
{
|
{
|
||||||
m_terms = palloc_array(terms_t *, N());
|
m_terms = palloc_array(terms_t *, N());
|
||||||
m_rails_temp = palloc_array(terms_t, N());
|
m_rails_temp = palloc_array(terms_t, N());
|
||||||
|
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||||
|
m_A = palloc_array(nl_ext_double, N() * m_pitch);
|
||||||
|
#endif
|
||||||
for (unsigned k = 0; k < N(); k++)
|
for (unsigned k = 0; k < N(); k++)
|
||||||
{
|
{
|
||||||
m_terms[k] = palloc(terms_t);
|
m_terms[k] = palloc(terms_t);
|
||||||
@ -615,7 +647,9 @@ matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const eSolverTyp
|
|||||||
{
|
{
|
||||||
m_terms = palloc_array(terms_t *, N());
|
m_terms = palloc_array(terms_t *, N());
|
||||||
m_rails_temp = palloc_array(terms_t, N());
|
m_rails_temp = palloc_array(terms_t, N());
|
||||||
|
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||||
|
m_A = palloc_array(nl_ext_double, N() * m_pitch);
|
||||||
|
#endif
|
||||||
for (unsigned k = 0; k < N(); k++)
|
for (unsigned k = 0; k < N(); k++)
|
||||||
{
|
{
|
||||||
m_terms[k] = palloc(terms_t);
|
m_terms[k] = palloc(terms_t);
|
||||||
|
@ -40,10 +40,10 @@ ATTR_HOT inline int matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNUSED cons
|
|||||||
{
|
{
|
||||||
analog_net_t *net = m_nets[0];
|
analog_net_t *net = m_nets[0];
|
||||||
this->build_LE_A();
|
this->build_LE_A();
|
||||||
this->build_LE_RHS(m_RHS);
|
this->build_LE_RHS();
|
||||||
//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);
|
//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);
|
||||||
|
|
||||||
nl_double new_val = m_RHS[0] / A(0,0);
|
nl_double new_val = RHS(0) / A(0,0);
|
||||||
|
|
||||||
nl_double e = (new_val - net->Q_Analog());
|
nl_double e = (new_val - net->Q_Analog());
|
||||||
nl_double cerr = nl_math::abs(e);
|
nl_double cerr = nl_math::abs(e);
|
||||||
|
@ -39,7 +39,7 @@ ATTR_HOT nl_double matrix_solver_direct2_t::vsolve()
|
|||||||
ATTR_HOT inline int matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED const bool newton_raphson)
|
ATTR_HOT inline int matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED const bool newton_raphson)
|
||||||
{
|
{
|
||||||
build_LE_A();
|
build_LE_A();
|
||||||
build_LE_RHS(m_RHS);
|
build_LE_RHS();
|
||||||
|
|
||||||
const nl_double a = A(0,0);
|
const nl_double a = A(0,0);
|
||||||
const nl_double b = A(0,1);
|
const nl_double b = A(0,1);
|
||||||
@ -47,8 +47,8 @@ ATTR_HOT inline int matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED cons
|
|||||||
const nl_double d = A(1,1);
|
const nl_double d = A(1,1);
|
||||||
|
|
||||||
nl_double new_val[2];
|
nl_double new_val[2];
|
||||||
new_val[1] = (a * m_RHS[1] - c * m_RHS[0]) / (a * d - b * c);
|
new_val[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c);
|
||||||
new_val[0] = (m_RHS[0] - b * new_val[1]) / a;
|
new_val[0] = (RHS(0) - b * new_val[1]) / a;
|
||||||
|
|
||||||
if (is_dynamic())
|
if (is_dynamic())
|
||||||
{
|
{
|
||||||
|
@ -129,7 +129,7 @@ ATTR_HOT inline int matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non_dynamic
|
|||||||
int resched_cnt = 0;
|
int resched_cnt = 0;
|
||||||
|
|
||||||
this->build_LE_A();
|
this->build_LE_A();
|
||||||
this->build_LE_RHS(this->m_RHS);
|
this->build_LE_RHS();
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
static int ws_cnt = 0;
|
static int ws_cnt = 0;
|
||||||
@ -184,7 +184,7 @@ ATTR_HOT inline int matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non_dynamic
|
|||||||
for (unsigned i = 0; i < e; i++)
|
for (unsigned i = 0; i < e; i++)
|
||||||
Idrive = Idrive + this->A(k,p[i]) * new_v[p[i]];
|
Idrive = Idrive + this->A(k,p[i]) * new_v[p[i]];
|
||||||
|
|
||||||
const nl_double delta = m_omega * (this->m_RHS[k] - Idrive) / this->A(k,k);
|
const nl_double delta = m_omega * (this->RHS(k) - Idrive) / this->A(k,k);
|
||||||
cerr = std::max(cerr, nl_math::abs(delta));
|
cerr = std::max(cerr, nl_math::abs(delta));
|
||||||
new_v[k] += delta;
|
new_v[k] += delta;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user