mirror of
https://github.com/holub/mame
synced 2025-07-05 09:57:47 +03:00
From 45% to 60% to 99%. That's the improvement achieved for a 89x89
audio matrix mostly solved by elimination. Cleaned up some code as well. [Couriersud]
This commit is contained in:
parent
b2a7d75ef0
commit
dad7a6bab5
@ -9,13 +9,14 @@
|
||||
NETLIST_START(dummy)
|
||||
SOLVER(Solver, 12000)
|
||||
PARAM(Solver.ACCURACY, 1e-8)
|
||||
PARAM(Solver.NR_LOOPS, 200)
|
||||
PARAM(Solver.GS_LOOPS, 4)
|
||||
PARAM(Solver.SOR_FACTOR, 1)
|
||||
PARAM(Solver.NR_LOOPS, 300)
|
||||
PARAM(Solver.GS_LOOPS, 3)
|
||||
//PARAM(Solver.GS_THRESHOLD, 99) // Force Gaussian elimination here
|
||||
PARAM(Solver.SOR_FACTOR, 1.05)
|
||||
#if 0
|
||||
PARAM(Solver.SOR_FACTOR, 1)
|
||||
//PARAM(Solver.SOR_FACTOR, 1)
|
||||
PARAM(Solver.DYNAMIC_TS, 1)
|
||||
PARAM(Solver.LTE, 1e1)
|
||||
PARAM(Solver.LTE, 1e-1)
|
||||
#endif
|
||||
//FIXME proper models!
|
||||
NET_MODEL(".model 2SC945 NPN(Is=2.04f Xti=3 Eg=1.11 Vaf=6 Bf=400 Ikf=20m Xtb=1.5 Br=3.377 Rc=1 Cjc=1p Mjc=.3333 Vjc=.75 Fc=.5 Cje=25p Mje=.3333 Vje=.75 Tr=450n Tf=20n Itf=0 Vtf=0 Xtf=0 VCEO=45V ICrating=150M MFG=Toshiba)")
|
||||
@ -352,13 +353,20 @@ NETLIST_START(opamp)
|
||||
|
||||
VCCS(G1)
|
||||
PARAM(G1.RI, RES_K(1000))
|
||||
#if 1
|
||||
PARAM(G1.G, 100) // typical OP-AMP amplification 100 * 1000 = 100000
|
||||
RES(RP1, 1000)
|
||||
CAP(CP1, 1.59e-5) // <== change to 1.59e-3 for 10Khz bandwidth
|
||||
#else
|
||||
PARAM(G1.G, 1) // typical OP-AMP amplification 100 * 1000 = 100000
|
||||
RES(RP1, 100000)
|
||||
CAP(CP1, 1.59e-7) // <== change to 1.59e-3 for 10Khz bandwidth
|
||||
#endif
|
||||
VCVS(EBUF)
|
||||
PARAM(EBUF.RO, 50)
|
||||
PARAM(EBUF.G, 1)
|
||||
|
||||
// PARAM(EBUF.RI, 1e20)
|
||||
// NET_C(EBUF.ON, GND)
|
||||
|
||||
NET_C(G1.ON, GND)
|
||||
@ -373,7 +381,15 @@ NETLIST_START(opamp)
|
||||
DIODE(DN,"1N914")
|
||||
|
||||
NET_C(DP.K, VCC)
|
||||
#if 1
|
||||
NET_C(DP.A, DN.K, RP1.1)
|
||||
#else
|
||||
RES(RDP, 1000)
|
||||
RES(RDN, 1000)
|
||||
NET_C(RDP.1, DP.A)
|
||||
NET_C(RDN.1, DN.K)
|
||||
NET_C(RDP.2, RDN.2, RP1.1)
|
||||
#endif
|
||||
NET_C(DN.A, GND)
|
||||
|
||||
NET_C(EBUF.IP, RP1.1)
|
||||
|
@ -36,11 +36,11 @@ protected:
|
||||
|
||||
ATTR_HOT int solve_non_dynamic(const bool newton_raphson);
|
||||
ATTR_HOT void build_LE_A();
|
||||
ATTR_HOT void build_LE_RHS();
|
||||
ATTR_HOT void build_LE_RHS(nl_double * RESTRICT rhs);
|
||||
ATTR_HOT void LE_solve();
|
||||
ATTR_HOT void LE_back_subst(nl_double * RESTRICT x);
|
||||
ATTR_HOT nl_double delta(const nl_double * RESTRICT V);
|
||||
ATTR_HOT void store(const nl_double * RESTRICT V, const bool store_RHS);
|
||||
ATTR_HOT void store(const nl_double * RESTRICT V);
|
||||
|
||||
/* bring the whole system to the current time
|
||||
* Don't schedule a new calculation time. The recalculation has to be
|
||||
@ -88,7 +88,7 @@ ATTR_HOT nl_double netlist_matrix_solver_direct_t<m_N, _storage_N>::compute_next
|
||||
* FIXME: We should extend the logic to use either all nets or
|
||||
* only output nets.
|
||||
*/
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
for (unsigned k = 0, iN=N(); k < iN; k++)
|
||||
{
|
||||
netlist_analog_net_t *n = m_nets[k];
|
||||
|
||||
@ -209,7 +209,61 @@ ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::vsetup(netlist_a
|
||||
|
||||
#endif
|
||||
|
||||
//ATTR_ALIGN nl_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
|
||||
/* create a list of non zero elements right of the diagonal
|
||||
* These list anticipate the population of array elements by
|
||||
* Gaussian elimination.
|
||||
*/
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
terms_t * t = m_terms[k];
|
||||
/* pretty brutal */
|
||||
int *other = t->net_other();
|
||||
|
||||
t->m_nz.clear();
|
||||
|
||||
if (k==0)
|
||||
t->m_nzrd.clear();
|
||||
else
|
||||
{
|
||||
t->m_nzrd = m_terms[k-1]->m_nzrd;
|
||||
int j=0;
|
||||
while(j < t->m_nzrd.size())
|
||||
{
|
||||
if (t->m_nzrd[j] < k + 1)
|
||||
t->m_nzrd.remove_at(j);
|
||||
else
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < N(); j++)
|
||||
{
|
||||
for (unsigned i = 0; i < t->m_railstart; i++)
|
||||
{
|
||||
if (!t->m_nzrd.contains(other[i]) && other[i] >= k + 1)
|
||||
t->m_nzrd.add(other[i]);
|
||||
if (!t->m_nz.contains(other[i]))
|
||||
t->m_nz.add(other[i]);
|
||||
}
|
||||
}
|
||||
psort_list(t->m_nzrd);
|
||||
|
||||
t->m_nz.add(k); // add diagonal
|
||||
psort_list(t->m_nz);
|
||||
}
|
||||
|
||||
if(0)
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
printf("%3d: ", k);
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
|
||||
printf(" %3d", m_terms[k]->m_nzrd[j]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* save states
|
||||
*/
|
||||
save(NLNAME(m_RHS));
|
||||
save(NLNAME(m_last_RHS));
|
||||
save(NLNAME(m_last_V));
|
||||
@ -229,9 +283,10 @@ ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::vsetup(netlist_a
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE_A()
|
||||
{
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
const unsigned iN = N();
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
for (unsigned i=0; i < N(); i++)
|
||||
for (unsigned i=0; i < iN; i++)
|
||||
m_A[k][i] = 0.0;
|
||||
|
||||
nl_double akk = 0.0;
|
||||
@ -247,14 +302,15 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE_A()
|
||||
m_A[k][k] += akk;
|
||||
|
||||
for (unsigned i = 0; i < railstart; i++)
|
||||
m_A[k][net_other[i]] += -go[i];
|
||||
m_A[k][net_other[i]] -= go[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS()
|
||||
ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS(nl_double * RESTRICT rhs)
|
||||
{
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
const unsigned iN = N();
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
nl_double rhsk_a = 0.0;
|
||||
nl_double rhsk_b = 0.0;
|
||||
@ -271,7 +327,7 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS()
|
||||
//rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
|
||||
rhsk_b = rhsk_b + go[i] * *other_cur_analog[i];
|
||||
|
||||
m_RHS[k] = rhsk_a + rhsk_b;
|
||||
rhs[k] = rhsk_a + rhsk_b;
|
||||
}
|
||||
}
|
||||
|
||||
@ -314,16 +370,38 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
||||
|
||||
/* FIXME: Singular matrix? */
|
||||
const nl_double f = 1.0 / m_A[i][i];
|
||||
const double * RESTRICT s = &m_A[i][0];
|
||||
const int *p = m_terms[i]->m_nzrd.data();
|
||||
const unsigned e = m_terms[i]->m_nzrd.size();
|
||||
|
||||
/* Eliminate column i from row j */
|
||||
|
||||
for (unsigned j = i + 1; j < kN; j++)
|
||||
{
|
||||
const nl_double f1 = - m_A[j][i] * f;
|
||||
double * RESTRICT d = &m_A[j][0];
|
||||
const nl_double f1 = - d[i] * f;
|
||||
if (f1 != NL_FCONST(0.0))
|
||||
{
|
||||
for (unsigned k = i + 1; k < kN; k++)
|
||||
m_A[j][k] += m_A[i][k] * f1;
|
||||
#if 0
|
||||
/* The code below is 30% faster than the original
|
||||
* implementation which is given here for reference.
|
||||
*
|
||||
* for (unsigned k = i + 1; k < kN; k++)
|
||||
* m_A[j][k] = m_A[j][k] + m_A[i][k] * f1;
|
||||
*/
|
||||
double * RESTRICT d = &m_A[j][i+1];
|
||||
const double * RESTRICT s = &m_A[i][i+1];
|
||||
const int e = kN - i - 1;
|
||||
|
||||
for (int k = 0; k < e; k++)
|
||||
d[k] = d[k] + s[k] * f1;
|
||||
#else
|
||||
for (unsigned k = 0; k < e; k++)
|
||||
{
|
||||
const unsigned pk = p[k];
|
||||
d[pk] += s[pk] * f1;
|
||||
}
|
||||
#endif
|
||||
m_RHS[j] += m_RHS[i] * f1;
|
||||
}
|
||||
}
|
||||
@ -341,9 +419,28 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst(
|
||||
{
|
||||
nl_double tmp = 0;
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
const double * RESTRICT A = &m_A[j][j+1];
|
||||
const double * RESTRICT xp = &x[j+1];
|
||||
const int e = kN - j - 1;
|
||||
for (int k = 0; k < e; k++)
|
||||
tmp += A[k] * xp[k];
|
||||
#else
|
||||
const double * RESTRICT A = &m_A[j][0];
|
||||
const int *p = m_terms[j]->m_nzrd.data();
|
||||
const unsigned e = m_terms[j]->m_nzrd.size();
|
||||
|
||||
for (unsigned k = 0; k < e; k++)
|
||||
{
|
||||
const unsigned pk = p[k];
|
||||
tmp += A[pk] * x[pk];
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
for (unsigned k = j + 1; k < kN; k++)
|
||||
tmp += m_A[j][k] * x[k];
|
||||
|
||||
#endif
|
||||
x[j] = (m_RHS[j] - tmp) / m_A[j][j];
|
||||
}
|
||||
#if 0
|
||||
@ -363,40 +460,32 @@ template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT nl_double netlist_matrix_solver_direct_t<m_N, _storage_N>::delta(
|
||||
const nl_double * RESTRICT V)
|
||||
{
|
||||
/* FIXME: Ideally we should also include currents (RHS) here. This would
|
||||
* need a revaluation of the right hand side after voltages have been updated
|
||||
* and thus belong into a different calculation. This applies to all solvers.
|
||||
*/
|
||||
|
||||
const unsigned iN = this->N();
|
||||
nl_double cerr = 0;
|
||||
nl_double cerr2 = 0;
|
||||
for (unsigned i = 0; i < this->N(); i++)
|
||||
{
|
||||
const nl_double e = nl_math::abs(V[i] - this->m_nets[i]->m_cur_Analog);
|
||||
const nl_double e2 = nl_math::abs(m_RHS[i] - this->m_last_RHS[i]);
|
||||
cerr = (e > cerr ? e : cerr);
|
||||
cerr2 = (e2 > cerr2 ? e2 : cerr2);
|
||||
}
|
||||
// FIXME: Review
|
||||
return cerr + cerr2*NL_FCONST(100000.0);
|
||||
for (unsigned i = 0; i < iN; i++)
|
||||
cerr = std::max(cerr, nl_math::abs(V[i] - this->m_nets[i]->m_cur_Analog));
|
||||
return cerr;
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::store(
|
||||
const nl_double * RESTRICT V, const bool store_RHS)
|
||||
const nl_double * RESTRICT V)
|
||||
{
|
||||
for (unsigned i = 0; i < this->N(); i++)
|
||||
for (unsigned i = 0, iN=N(); i < iN; i++)
|
||||
{
|
||||
this->m_nets[i]->m_cur_Analog = V[i];
|
||||
}
|
||||
if (store_RHS)
|
||||
{
|
||||
for (unsigned i = 0; i < this->N(); i++)
|
||||
{
|
||||
this->m_last_RHS[i] = m_RHS[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT nl_double netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve()
|
||||
{
|
||||
solve_base<netlist_matrix_solver_direct_t>(this);
|
||||
this->solve_base(this);
|
||||
return this->compute_next_timestep();
|
||||
}
|
||||
|
||||
@ -408,17 +497,17 @@ ATTR_HOT int netlist_matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic(
|
||||
|
||||
this->LE_back_subst(new_v);
|
||||
|
||||
if (this->is_dynamic())
|
||||
if (newton_raphson)
|
||||
{
|
||||
nl_double err = delta(new_v);
|
||||
|
||||
store(new_v, true);
|
||||
store(new_v);
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
store(new_v, false); // ==> No need to store RHS
|
||||
store(new_v);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -427,7 +516,11 @@ template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT inline int netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
this->build_LE_A();
|
||||
this->build_LE_RHS();
|
||||
this->build_LE_RHS(m_last_RHS);
|
||||
|
||||
for (unsigned i=0, iN=N(); i < iN; i++)
|
||||
m_RHS[i] = m_last_RHS[i];
|
||||
|
||||
this->LE_solve();
|
||||
|
||||
return this->solve_non_dynamic(newton_raphson);
|
||||
|
@ -38,7 +38,7 @@ ATTR_HOT inline int netlist_matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNU
|
||||
{
|
||||
netlist_analog_net_t *net = m_nets[0];
|
||||
this->build_LE_A();
|
||||
this->build_LE_RHS();
|
||||
this->build_LE_RHS(m_RHS);
|
||||
//NL_VERBOSE_OUT(("%f %f\n", new_val, m_RHS[0] / m_A[0][0]);
|
||||
|
||||
nl_double new_val = m_RHS[0] / m_A[0][0];
|
||||
|
@ -39,7 +39,7 @@ ATTR_HOT nl_double netlist_matrix_solver_direct2_t::vsolve()
|
||||
ATTR_HOT inline int netlist_matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED const bool newton_raphson)
|
||||
{
|
||||
build_LE_A();
|
||||
build_LE_RHS();
|
||||
build_LE_RHS(m_RHS);
|
||||
|
||||
const nl_double a = m_A[0][0];
|
||||
const nl_double b = m_A[0][1];
|
||||
@ -53,13 +53,13 @@ ATTR_HOT inline int netlist_matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNU
|
||||
if (is_dynamic())
|
||||
{
|
||||
nl_double err = this->delta(new_val);
|
||||
store(new_val, true);
|
||||
store(new_val);
|
||||
if (err > m_params.m_accuracy )
|
||||
return 2;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
store(new_val, false);
|
||||
store(new_val);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -100,8 +100,7 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dyn
|
||||
* omega = 2.0 / (1.0 + nl_math::sqrt(1-rho))
|
||||
*/
|
||||
|
||||
const nl_double ws = this->m_params.m_sor; //1.045; //2.0 / (1.0 + /*sin*/(3.14159 * 5.5 / (double) (m_nets.count()+1)));
|
||||
//const nl_double ws = 2.0 / (1.0 + sin(3.14159 * 4 / (double) (this->N())));
|
||||
const nl_double ws = this->m_params.m_sor;
|
||||
|
||||
ATTR_ALIGN nl_double w[_storage_N];
|
||||
ATTR_ALIGN nl_double one_m_w[_storage_N];
|
||||
@ -114,34 +113,31 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dyn
|
||||
nl_double gabs_t = 0.0;
|
||||
nl_double RHS_t = 0.0;
|
||||
|
||||
const int term_count = this->m_terms[k]->count();
|
||||
const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
|
||||
const nl_double * const RESTRICT go = this->m_terms[k]->go();
|
||||
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
|
||||
const nl_double * const *other_cur_analog = this->m_terms[k]->other_curanalog();
|
||||
|
||||
new_V[k] = this->m_nets[k]->m_cur_Analog;
|
||||
|
||||
for (unsigned i = 0; i < term_count; i++)
|
||||
{
|
||||
const int term_count = this->m_terms[k]->count();
|
||||
const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
|
||||
const nl_double * const RESTRICT go = this->m_terms[k]->go();
|
||||
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
|
||||
const nl_double * const *other_cur_analog = this->m_terms[k]->other_curanalog();
|
||||
|
||||
for (int i = 0; i < term_count; i++)
|
||||
{
|
||||
gtot_t = gtot_t + gt[i];
|
||||
RHS_t = RHS_t + Idr[i];
|
||||
}
|
||||
|
||||
if (USE_GABS)
|
||||
for (int i = 0; i < term_count; i++)
|
||||
gabs_t = gabs_t + nl_math::abs(go[i]);
|
||||
|
||||
for (int i = this->m_terms[k]->m_railstart; i < term_count; i++)
|
||||
RHS_t = RHS_t + go[i] * *other_cur_analog[i];
|
||||
gtot_t = gtot_t + gt[i];
|
||||
RHS_t = RHS_t + Idr[i];
|
||||
}
|
||||
|
||||
for (int i = this->m_terms[k]->m_railstart; i < term_count; i++)
|
||||
RHS_t = RHS_t + go[i] * *other_cur_analog[i];
|
||||
|
||||
RHS[k] = RHS_t;
|
||||
|
||||
if (USE_GABS)
|
||||
{
|
||||
gabs_t *= NL_FCONST(0.95); // avoid rounding issues
|
||||
for (int i = 0; i < term_count; i++)
|
||||
gabs_t = gabs_t + nl_math::abs(go[i]);
|
||||
|
||||
gabs_t *= NL_FCONST(0.5); // derived by try and error
|
||||
if (gabs_t <= gtot_t)
|
||||
{
|
||||
w[k] = ws / gtot_t;
|
||||
@ -164,7 +160,7 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dyn
|
||||
|
||||
do {
|
||||
resched = false;
|
||||
|
||||
double err = 0;
|
||||
for (int k = 0; k < iN; k++)
|
||||
{
|
||||
const int * RESTRICT net_other = this->m_terms[k]->net_other();
|
||||
@ -177,10 +173,13 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dyn
|
||||
|
||||
const nl_double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
|
||||
|
||||
resched = resched || (nl_math::abs(new_val - new_V[k]) > accuracy);
|
||||
err = std::max(nl_math::abs(new_val - new_V[k]), err);
|
||||
new_V[k] = new_val;
|
||||
}
|
||||
|
||||
if (err > accuracy)
|
||||
resched = true;
|
||||
|
||||
resched_cnt++;
|
||||
} while (resched && (resched_cnt < this->m_params.m_gs_loops));
|
||||
|
||||
|
@ -30,11 +30,6 @@ public:
|
||||
, m_gs_fail(0)
|
||||
, m_gs_total(0)
|
||||
{
|
||||
pstring p = nl_util::environment("NETLIST_STATS");
|
||||
if (p != "")
|
||||
m_log_stats = (bool) p.as_long();
|
||||
else
|
||||
m_log_stats = false;
|
||||
}
|
||||
|
||||
virtual ~netlist_matrix_solver_SOR_mat_t() {}
|
||||
@ -53,8 +48,6 @@ private:
|
||||
nl_double m_lp_fact;
|
||||
int m_gs_fail;
|
||||
int m_gs_total;
|
||||
bool m_log_stats;
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
@ -64,15 +57,15 @@ private:
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
void netlist_matrix_solver_SOR_mat_t<m_N, _storage_N>::log_stats()
|
||||
{
|
||||
if (this->m_stat_calculations != 0 && m_log_stats)
|
||||
if (this->m_stat_calculations != 0 && this->m_params.m_log_stats)
|
||||
{
|
||||
this->netlist().log("==============================================\n");
|
||||
this->netlist().log("Solver %s\n", this->name().cstr());
|
||||
this->netlist().log(" ==> %d nets\n", this->N()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
|
||||
this->netlist().log(" has %s elements\n", this->is_dynamic() ? "dynamic" : "no dynamic");
|
||||
this->netlist().log(" has %s elements\n", this->is_timestep() ? "timestep" : "no timestep");
|
||||
this->netlist().log(" %6.3f average newton raphson loops\n", (double) this->m_stat_newton_raphson / (double) this->m_stat_vsolver_calls);
|
||||
this->netlist().log(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average\n",
|
||||
this->netlist().log("==============================================");
|
||||
this->netlist().log("Solver %s", this->name().cstr());
|
||||
this->netlist().log(" ==> %d nets", this->N()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
|
||||
this->netlist().log(" has %s elements", this->is_dynamic() ? "dynamic" : "no dynamic");
|
||||
this->netlist().log(" has %s elements", this->is_timestep() ? "timestep" : "no timestep");
|
||||
this->netlist().log(" %6.3f average newton raphson loops", (double) this->m_stat_newton_raphson / (double) this->m_stat_vsolver_calls);
|
||||
this->netlist().log(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average",
|
||||
this->m_stat_calculations,
|
||||
this->m_stat_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
|
||||
this->m_gs_fail,
|
||||
@ -149,20 +142,19 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non
|
||||
|
||||
|
||||
ATTR_ALIGN nl_double new_v[_storage_N] = { 0.0 };
|
||||
const int iN = this->N();
|
||||
const unsigned iN = this->N();
|
||||
|
||||
bool resched = false;
|
||||
|
||||
int resched_cnt = 0;
|
||||
|
||||
this->build_LE_A();
|
||||
this->build_LE_RHS();
|
||||
this->build_LE_RHS(this->m_RHS);
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
static int ws_cnt = 0;
|
||||
ws_cnt++;
|
||||
if (0 && ws_cnt % 100 == 0)
|
||||
if (1 && ws_cnt % 200 == 0)
|
||||
{
|
||||
// update omega
|
||||
nl_double lambdaN = 0;
|
||||
@ -181,8 +173,8 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non
|
||||
for (int i=0; i<iN; i++)
|
||||
s = s + nl_math::abs(this->m_A[k][i]);
|
||||
akk = s / akk - 1.0;
|
||||
//if ( akk > lambdaN)
|
||||
// lambdaN = akk;
|
||||
if ( akk > lambdaN)
|
||||
lambdaN = akk;
|
||||
if (akk < lambda1)
|
||||
lambda1 = akk;
|
||||
#endif
|
||||
@ -191,99 +183,31 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non
|
||||
|
||||
//ws = 2.0 / (2.0 - lambdaN - lambda1);
|
||||
m_omega = 2.0 / (2.0 - lambda1);
|
||||
//printf("%f\n", ws);
|
||||
//printf("%f %f %f\n", m_omega, lambda1, lambdaN);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int k = 0; k < iN; k++)
|
||||
new_v[k] = this->m_nets[k]->m_cur_Analog;
|
||||
|
||||
#else
|
||||
{
|
||||
nl_double frob;
|
||||
frob = 0;
|
||||
nl_double rmin = 1e99, rmax = -1e99;
|
||||
for (int k = 0; k < iN; k++)
|
||||
{
|
||||
new_v[k] = this->m_nets[k]->m_cur_Analog;
|
||||
nl_double s=0.0;
|
||||
for (int i = 0; i < iN; i++)
|
||||
{
|
||||
frob += this->m_A[k][i] * this->m_A[k][i];
|
||||
s = s + nl_math::abs(this->m_A[k][i]);
|
||||
}
|
||||
|
||||
if (s<rmin)
|
||||
rmin = s;
|
||||
if (s>rmax)
|
||||
rmax = s;
|
||||
}
|
||||
#if 0
|
||||
nl_double frobA = nl_math::sqrt(frob /(iN));
|
||||
if (1 &&frobA < 1.0)
|
||||
//ws = 2.0 / (1.0 + nl_math::sqrt(1.0-frobA));
|
||||
ws = 2.0 / (2.0 - frobA);
|
||||
else
|
||||
ws = 1.0;
|
||||
ws = 0.9;
|
||||
#else
|
||||
// calculate an estimate for rho.
|
||||
// This is based on the Perron???Frobenius theorem for positive matrices.
|
||||
// No mathematical proof here. The following estimates the
|
||||
// optimal relaxation parameter pretty well. Unfortunately, the
|
||||
// overhead is bigger than the gain. Consequently the fast GS below
|
||||
// uses a fixed GS. One can however use this here to determine a
|
||||
// suitable parameter.
|
||||
nl_double rm = (rmax + rmin) * 0.5;
|
||||
if (rm < 1.0)
|
||||
ws = 2.0 / (1.0 + nl_math::sqrt(1.0-rm));
|
||||
else
|
||||
ws = 1.0;
|
||||
if (ws > 1.02 && rmax > 1.001)
|
||||
printf("rmin %f rmax %f ws %f\n", rmin, rmax, ws);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
// Frobenius norm for (D-L)^(-1)U
|
||||
//nl_double frobU;
|
||||
//nl_double frobL;
|
||||
//nl_double norm;
|
||||
do {
|
||||
resched = false;
|
||||
nl_double cerr = 0.0;
|
||||
//frobU = 0;
|
||||
//frobL = 0;
|
||||
//norm = 0;
|
||||
|
||||
for (int k = 0; k < iN; k++)
|
||||
{
|
||||
nl_double Idrive = 0;
|
||||
//nl_double norm_t = 0;
|
||||
// Reduction loops need -ffast-math
|
||||
for (int i = 0; i < iN; i++)
|
||||
Idrive += this->m_A[k][i] * new_v[i];
|
||||
|
||||
#if 0
|
||||
for (int i = 0; i < iN; i++)
|
||||
{
|
||||
if (i < k) frobL += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] /this-> m_A[k][k];
|
||||
if (i > k) frobU += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] / this->m_A[k][k];
|
||||
norm_t += nl_math::abs(this->m_A[k][i]);
|
||||
}
|
||||
#endif
|
||||
//if (norm_t > norm) norm = norm_t;
|
||||
#if 0
|
||||
const nl_double new_val = (1.0-ws) * new_v[k] + ws * (this->m_RHS[k] - Idrive + this->m_A[k][k] * new_v[k]) / this->m_A[k][k];
|
||||
const double * RESTRICT A = &this->m_A[k][0];
|
||||
const int *p = this->m_terms[k]->m_nz.data();
|
||||
const unsigned e = this->m_terms[k]->m_nz.size();
|
||||
|
||||
const nl_double e = nl_math::abs(new_val - new_v[k]);
|
||||
cerr = (e > cerr ? e : cerr);
|
||||
new_v[k] = new_val;
|
||||
#else
|
||||
const nl_double delta = m_omega * (this->m_RHS[k] - Idrive) / this->m_A[k][k];
|
||||
const nl_double adelta = nl_math::abs(delta);
|
||||
cerr = (adelta > cerr ? adelta : cerr);
|
||||
for (unsigned i = 0; i < e; i++)
|
||||
Idrive = Idrive + A[p[i]] * new_v[p[i]];
|
||||
|
||||
const nl_double delta = m_omega * (this->m_RHS[k] - Idrive) / A[k];
|
||||
cerr = std::max(cerr, nl_math::abs(delta));
|
||||
new_v[k] += delta;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (cerr > this->m_params.m_accuracy)
|
||||
@ -291,14 +215,9 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non
|
||||
resched = true;
|
||||
}
|
||||
resched_cnt++;
|
||||
//ATTR_UNUSED nl_double frobUL = nl_math::sqrt((frobU + frobL) / (double) (iN) / (double) (iN));
|
||||
} while (resched && (resched_cnt < this->m_params.m_gs_loops));
|
||||
//printf("Frobenius %f %f %f %f %f\n", nl_math::sqrt(frobU), nl_math::sqrt(frobL), frobUL, frobA, norm);
|
||||
//printf("Omega Estimate1 %f %f\n", 2.0 / (1.0 + nl_math::sqrt(1-frobUL)), 2.0 / (1.0 + nl_math::sqrt(1-frobA)) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
|
||||
//printf("Omega Estimate2 %f %f\n", 2.0 / (2.0 - frobUL), 2.0 / (2.0 - frobA) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
|
||||
|
||||
|
||||
this->store(new_v, false);
|
||||
this->store(new_v);
|
||||
this->m_gs_total += resched_cnt;
|
||||
if (resched)
|
||||
{
|
||||
|
@ -81,6 +81,8 @@ class terms_t
|
||||
|
||||
unsigned m_railstart;
|
||||
|
||||
plist_t<int> m_nzrd; /* non zero right of the diagonal for elimination */
|
||||
plist_t<int> m_nz; /* all non zero for multiplication */
|
||||
private:
|
||||
plist_t<netlist_terminal_t *> m_term;
|
||||
plist_t<int> m_net_other;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#define PLISTS_H_
|
||||
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
#include "palloc.h"
|
||||
#include "pstring.h"
|
||||
@ -582,4 +583,21 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// sort a list ... slow, I am lazy
|
||||
// elements must support ">" operator.
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template<typename Class>
|
||||
static inline void psort_list(Class &sl)
|
||||
{
|
||||
for(int i = 0; i < (int) sl.size() - 1; i++)
|
||||
{
|
||||
for(int j = i + 1; j < sl.size(); j++)
|
||||
if(sl[i] > sl[j])
|
||||
std::swap(sl[i], sl[j]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* PLISTS_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user