Converted USE_PIVOT into runtime option PIVOT. Fixed some issues for

nl_double == float. (nw)
This commit is contained in:
couriersud 2015-08-18 01:12:45 +02:00
parent 683ddc8d7d
commit eacd7ef4b2
14 changed files with 884 additions and 174 deletions

View File

@ -38,11 +38,13 @@ NETLIST_START(dummy)
/* €€ */ SOLVER(Solver, 24000)
PARAM(Solver.ACCURACY, 1e-8)
PARAM(Solver.NR_LOOPS, 150)
PARAM(Solver.NR_LOOPS, 9000)
PARAM(Solver.SOR_FACTOR, 0.001)
PARAM(Solver.GS_LOOPS, 1)
//PARAM(Solver.GS_THRESHOLD, 99)
PARAM(Solver.ITERATIVE, "SOR")
PARAM(Solver.PARALLEL, 0)
PARAM(Solver.PIVOT, 0)
LOCAL_SOURCE(congob_lib)
INCLUDE(congob_lib)
@ -99,6 +101,7 @@ NETLIST_START(dummy)
PARAM(XU13.D.MODEL, "MB3614(TYPE=1)")
#endif
#if 0
OPTIMIZE_FRONTIER(C51.1, RES_K(20), 50)
OPTIMIZE_FRONTIER(R77.2, RES_K(20), 50)
@ -109,7 +112,7 @@ NETLIST_START(dummy)
OPTIMIZE_FRONTIER(R90.2, RES_K(100), 50)
OPTIMIZE_FRONTIER(R92.2, RES_K(15), 50)
#endif
NETLIST_END()
NETLIST_START(CongoBongo_schematics)

View File

@ -169,7 +169,7 @@ NETLIB_UPDATE_PARAM(POT)
m_R2.update_dev();
m_R1.set_R(std::max(m_R.Value() * v, netlist().gmin()));
m_R2.set_R(std::max(m_R.Value() * (1.0 - v), netlist().gmin()));
m_R2.set_R(std::max(m_R.Value() * (NL_FCONST(1.0) - v), netlist().gmin()));
}

View File

@ -106,7 +106,6 @@
//============================================================
#define USE_MATRIX_GS (0)
#define USE_PIVOT_SEARCH (0)
#define USE_GABS (1)
// savings are eaten up by effort
// FIXME: Convert into solver parameter

View File

@ -21,7 +21,8 @@ struct mat_cr_t
unsigned ja[_storage_N * _storage_N];
unsigned diag[_storage_N]; /* n */
void mult_vec(const double * RESTRICT A, const double * RESTRICT x, double * RESTRICT res)
template<typename T>
void mult_vec(const T * RESTRICT A, const T * RESTRICT x, T * RESTRICT res)
{
/*
* res = A * x
@ -41,7 +42,7 @@ struct mat_cr_t
}
}
void incomplete_LU_factorization(const double * RESTRICT A, double * RESTRICT LU)
void incomplete_LU_factorization(const nl_double * RESTRICT A, nl_double * RESTRICT LU)
{
/*
* incomplete LU Factorization according to http://de.wikipedia.org/wiki/ILU-Zerlegung
@ -80,7 +81,7 @@ struct mat_cr_t
}
}
void solveLUx (const double * RESTRICT LU, double * RESTRICT r)
void solveLUx (const nl_double * RESTRICT LU, nl_double * RESTRICT r)
{
/*
* Solve a linear equation Ax = r

View File

@ -45,6 +45,11 @@ protected:
ATTR_HOT void build_LE_RHS(nl_double * RESTRICT rhs);
ATTR_HOT void LE_solve();
ATTR_HOT void LE_back_subst(nl_double * RESTRICT x);
/* Full LU back substitution, not used currently, in for future use */
ATTR_HOT void LE_back_subst_full(nl_double * RESTRICT x);
ATTR_HOT nl_double delta(const nl_double * RESTRICT V);
ATTR_HOT void store(const nl_double * RESTRICT V);
@ -54,8 +59,9 @@ protected:
*/
ATTR_HOT nl_double compute_next_timestep();
ATTR_ALIGN nl_ext_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
//ATTR_ALIGN nl_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
template <typename T1, typename T2>
inline nl_ext_double &A(const T1 r, const T2 c) { return m_A[r][c]; }
ATTR_ALIGN nl_double m_RHS[_storage_N];
ATTR_ALIGN nl_double m_last_RHS[_storage_N]; // right hand side - contains currents
ATTR_ALIGN nl_double m_last_V[_storage_N];
@ -64,9 +70,9 @@ protected:
terms_t *m_rails_temp;
private:
ATTR_ALIGN nl_ext_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
const unsigned m_dim;
nl_double m_lp_fact;
};
// ----------------------------------------------------------------------------------------
@ -259,13 +265,13 @@ ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::vsetup(analog_net_t::lis
psort_list(t->m_nz);
}
if(0)
if (0)
for (unsigned k = 0; k < N(); k++)
{
netlist().log("%3d: ", k);
pstring line = pformat("%1")(k, "3");
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
netlist().log(" %3d", m_terms[k]->m_nzrd[j]);
netlist().log("\n");
line += pformat(" %1")(m_terms[k]->m_nzrd[j], "3");
netlist().log("%s", line.cstr());
}
/*
@ -294,22 +300,25 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_A()
for (unsigned k = 0; k < iN; k++)
{
for (unsigned i=0; i < iN; i++)
m_A[k][i] = 0.0;
A(k,i) = 0.0;
nl_double akk = 0.0;
const unsigned terms_count = m_terms[k]->count();
const unsigned railstart = m_terms[k]->m_railstart;
const nl_double * RESTRICT gt = m_terms[k]->gt();
{
nl_double akk = 0.0;
for (unsigned i = 0; i < terms_count; i++)
akk += gt[i];
A(k,k) = akk;
}
const nl_double * RESTRICT go = m_terms[k]->go();
const int * RESTRICT net_other = m_terms[k]->net_other();
for (unsigned i = 0; i < terms_count; i++)
akk = akk + gt[i];
m_A[k][k] += akk;
for (unsigned i = 0; i < railstart; i++)
m_A[k][net_other[i]] -= go[i];
A(k,net_other[i]) -= go[i];
}
}
@ -341,54 +350,41 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS(nl_double *
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
{
#if 0
for (int i = 0; i < N(); i++)
{
for (int k = 0; k < N(); k++)
printf("%f ", m_A[i][k]);
printf("| %f = %f \n", x[i], m_RHS[i]);
}
printf("\n");
#endif
const unsigned kN = N();
for (unsigned i = 0; i < kN; i++) {
// FIXME: use a parameter to enable pivoting?
if (USE_PIVOT_SEARCH)
// FIXME: use a parameter to enable pivoting? m_pivot
if (m_params.m_pivot)
{
/* Find the row with the largest first value */
unsigned maxrow = i;
for (unsigned j = i + 1; j < kN; j++)
{
//if (std::abs(m_A[j][i]) > std::abs(m_A[maxrow][i]))
if (m_A[j][i] * m_A[j][i] > m_A[maxrow][i] * m_A[maxrow][i])
if (A(j,i) * A(j,i) > A(maxrow,i) * A(maxrow,i))
maxrow = j;
}
if (maxrow != i)
{
/* Swap the maxrow and ith row */
for (unsigned k = i; k < kN; k++) {
std::swap(m_A[i][k], m_A[maxrow][k]);
for (unsigned k = 0; k < kN; k++) {
std::swap(A(i,k), A(maxrow,k));
}
std::swap(m_RHS[i], m_RHS[maxrow]);
}
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / m_A[i][i];
const nl_ext_double * RESTRICT s = &m_A[i][i+1];
const nl_double f = 1.0 / A(i,i);
/* Eliminate column i from row j */
for (unsigned j = i + 1; j < kN; j++)
{
nl_ext_double * RESTRICT d = &m_A[j][i+1];
const nl_double f1 = - m_A[j][i] * f;
const nl_double f1 = - A(j,i) * f;
if (f1 != NL_FCONST(0.0))
{
const unsigned e = kN - i - 1;
for (unsigned k = 0; k < e; k++)
d[k] = d[k] + s[k] * f1;
for (unsigned k = i+1; k < kN; k++)
A(j,k) += A(i,k) * f1;
m_RHS[j] += m_RHS[i] * f1;
}
}
@ -396,8 +392,7 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
else
{
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / m_A[i][i];
const nl_ext_double * RESTRICT s = &m_A[i][0];
const nl_double f = 1.0 / A(i,i);
const unsigned *p = m_terms[i]->m_nzrd.data();
const unsigned e = m_terms[i]->m_nzrd.size();
@ -405,10 +400,9 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
for (unsigned j = i + 1; j < kN; j++)
{
nl_ext_double * RESTRICT d = &m_A[j][0];
const nl_double f1 = - d[i] * f;
if (f1 != NL_FCONST(0.0))
if (A(j,i) != NL_FCONST(0.0))
{
const nl_double f1 = - A(j,i) * f;
#if 0
/* The code below is 30% faster than the original
* implementation which is given here for reference.
@ -419,14 +413,13 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
double * RESTRICT d = &m_A[j][i+1];
const double * RESTRICT s = &m_A[i][i+1];
const int e = kN - i - 1;
for (int k = 0; k < e; k++)
d[k] = d[k] + s[k] * f1;
#else
for (unsigned k = 0; k < e; k++)
{
const unsigned pk = p[k];
d[pk] += s[pk] * f1;
A(j,pk) += A(i,pk) * f1;
}
#endif
m_RHS[j] += m_RHS[i] * f1;
@ -443,34 +436,67 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst(
const unsigned kN = N();
/* back substitution */
for (int j = kN - 1; j >= 0; j--)
if (m_params.m_pivot)
{
nl_double tmp = 0;
#if 1
#if (USE_PIVOT_SEARCH)
const nl_ext_double * RESTRICT A = &m_A[j][j+1];
const nl_double * RESTRICT xp = &x[j+1];
const unsigned e = kN - j - 1;
for (unsigned k = 0; k < e; k++)
tmp += A[k] * xp[k];
#else
const nl_ext_double * RESTRICT A = &m_A[j][0];
const unsigned *p = m_terms[j]->m_nzrd.data();
const unsigned e = m_terms[j]->m_nzrd.size();
for (unsigned k = 0; k < e; k++)
for (int j = kN - 1; j >= 0; j--)
{
const unsigned pk = p[k];
tmp += A[pk] * x[pk];
nl_double tmp = 0;
for (unsigned k = j+1; k < kN; k++)
tmp += A(j,k) * x[k];
x[j] = (m_RHS[j] - tmp) / A(j,j);
}
#endif
#else
for (unsigned k = j + 1; k < kN; k++)
tmp += m_A[j][k] * x[k];
#endif
x[j] = (m_RHS[j] - tmp) / m_A[j][j];
}
else
{
for (int j = kN - 1; j >= 0; j--)
{
nl_double tmp = 0;
const unsigned *p = m_terms[j]->m_nzrd.data();
const unsigned e = m_terms[j]->m_nzrd.size();
for (unsigned k = 0; k < e; k++)
{
const unsigned pk = p[k];
tmp += A(j,pk) * x[pk];
}
x[j] = (m_RHS[j] - tmp) / A(j,j);
}
}
}
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst_full(
nl_double * RESTRICT x)
{
const unsigned kN = N();
/* back substitution */
// int ip;
// ii=-1
//for (int i=0; i < kN; i++)
// x[i] = m_RHS[i];
for (int i=0; i < kN; i++)
{
//ip=indx[i]; USE_PIVOT_SEARCH
//sum=b[ip];
//b[ip]=b[i];
double sum=m_RHS[i];//x[i];
for (int j=0; j < i; j++)
sum -= A(i,j) * x[j];
x[i]=sum;
}
for (int i=kN-1; i >= 0; i--)
{
double sum=x[i];
for (int j = i+1; j < kN; j++)
sum -= A(i,j)*x[j];
x[i] = sum / A(i,i);
}
#if 0
printf("Solution:\n");
for (unsigned i = 0; i < N(); i++)
@ -527,33 +553,11 @@ ATTR_HOT int matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNU
if (newton_raphson)
{
#if 0
/* limiting just doesn't work. */
const unsigned iN = this->N();
double err = 0;
for (unsigned k = 0; k < iN; k++)
{
const double ov = this->m_nets[k]->m_cur_Analog;
double d = new_V[k] - ov;
err = std::max(nl_math::abs(d), err);
}
double a = 1.05;
for (unsigned k = 0; k < iN; k++)
{
const double ov = this->m_nets[k]->m_cur_Analog;
double d = new_V[k] - ov;
const double nv = ov + a * d;
this->m_nets[k]->m_cur_Analog = nv;
}
return (err > this->m_params.m_accuracy) ? 2 : 1;
#else
nl_double err = delta(new_V);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
#endif
}
else
{
@ -580,7 +584,6 @@ template <unsigned m_N, unsigned _storage_N>
matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const solver_parameters_t *params, const int size)
: matrix_solver_t(GAUSSIAN_ELIMINATION, params)
, m_dim(size)
, m_lp_fact(0)
{
m_terms = palloc_array(terms_t *, N());
m_rails_temp = palloc_array(terms_t, N());
@ -597,7 +600,6 @@ template <unsigned m_N, unsigned _storage_N>
matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const eSolverType type, const solver_parameters_t *params, const int size)
: matrix_solver_t(type, params)
, m_dim(size)
, m_lp_fact(0)
{
m_terms = palloc_array(terms_t *, N());
m_rails_temp = palloc_array(terms_t, N());

View File

@ -43,7 +43,7 @@ ATTR_HOT inline int matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNUSED cons
this->build_LE_RHS(m_RHS);
//NL_VERBOSE_OUT(("%f %f\n", new_val, m_RHS[0] / m_A[0][0]);
nl_double new_val = m_RHS[0] / m_A[0][0];
nl_double new_val = m_RHS[0] / A(0,0);
nl_double e = (new_val - net->m_cur_Analog);
nl_double cerr = nl_math::abs(e);

View File

@ -41,10 +41,10 @@ ATTR_HOT inline int matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED cons
build_LE_A();
build_LE_RHS(m_RHS);
const nl_double a = m_A[0][0];
const nl_double b = m_A[0][1];
const nl_double c = m_A[1][0];
const nl_double d = m_A[1][1];
const nl_double a = A(0,0);
const nl_double b = A(0,1);
const nl_double c = A(1,0);
const nl_double d = A(1,1);
nl_double new_val[2];
new_val[1] = (a * m_RHS[1] - c * m_RHS[0]) / (a * d - b * c);

View File

@ -0,0 +1,673 @@
// license:GPL-2.0+
// copyright-holders:Couriersud
/*
* nld_ms_direct.h
*
*/
#ifndef NLD_MS_DIRECT_H_
#define NLD_MS_DIRECT_H_
#include <algorithm>
#include "solver/nld_solver.h"
//#define A(_r, _c) m_A[_r][_c]
NETLIB_NAMESPACE_DEVICES_START()
//#define nl_ext_double __float128 // slow, very slow
//#define nl_ext_double long double // slightly slower
#define nl_ext_double double
template <unsigned m_N, unsigned _storage_N>
class matrix_solver_direct_t: public matrix_solver_t
{
public:
matrix_solver_direct_t(const solver_parameters_t *params, const int size);
matrix_solver_direct_t(const eSolverType type, const solver_parameters_t *params, const int size);
virtual ~matrix_solver_direct_t();
virtual void vsetup(analog_net_t::list_t &nets);
virtual void reset() { matrix_solver_t::reset(); }
ATTR_HOT inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; }
ATTR_HOT inline int vsolve_non_dynamic(const bool newton_raphson);
protected:
virtual void add_term(int net_idx, terminal_t *term);
ATTR_HOT virtual nl_double vsolve();
ATTR_HOT int solve_non_dynamic(const bool newton_raphson);
ATTR_HOT void build_LE_A();
ATTR_HOT void build_LE_RHS(nl_double * RESTRICT rhs);
template<unsigned k>
void LEk()
{
//const unsigned kN = N();
const double akki = 1.0 / A(k,k);
const unsigned * const p = m_terms[k]->m_nzrd.data();
const unsigned e = m_terms[k]->m_nzrd.size();
for (int i = k+1; i < _storage_N;i++)
{
const double alpha = A(i,k) * akki;
A(i,k) = alpha;
if (alpha != 0.0)
for (int j = 0; j < e; j++)
{
const int pk = p[j];
A(i,pk) -= A(k,pk) * alpha;
}
}
}
ATTR_HOT void LE_solve()
{
const unsigned kN = N();
unsigned sk = 1;
if (1 && kN == _storage_N)
{
if (kN> 0 ) LEk<0>();
if (kN> 1 ) LEk<1>();
if (kN> 2 ) LEk<2>();
if (kN> 3 ) LEk<3>();
if (kN> 4 ) LEk<4>();
if (kN> 5 ) LEk<5>();
if (kN> 6 ) LEk<6>();
if (kN> 7 ) LEk<7>();
if (kN> 8 ) LEk<8>();
if (kN> 9 ) LEk<9>();
if (kN>10 ) LEk<10>();
if (kN>11 ) LEk<11>();
if (kN>12 ) LEk<12>();
if (kN>13 ) LEk<13>();
if (kN>14 ) LEk<14>();
if (kN>15 ) LEk<15>();
if (kN>16 ) LEk<16>();
if (kN>17 ) LEk<17>();
if (kN>18 ) LEk<18>();
if (kN>19 ) LEk<19>();
if (kN>20 ) LEk<20>();
if (kN>21 ) LEk<21>();
if (kN>22 ) LEk<22>();
if (kN>23 ) LEk<23>();
if (kN>24 ) LEk<24>();
if (kN>25 ) LEk<25>();
if (kN>26 ) LEk<26>();
if (kN>27 ) LEk<27>();
if (kN>28 ) LEk<28>();
if (kN>29 ) LEk<29>();
sk = 30;
}
for (int k = sk; k < kN - 1; k++)
{
const double akki = 1.0 / A(k,k);
const unsigned * const p = m_terms[k]->m_nzrd.data();
const unsigned e = m_terms[k]->m_nzrd.size();
for (int i = k+1; i < kN;i++)
{
const double alpha = A(i,k) * akki;
A(i,k) = alpha;
if (alpha != 0.0)
for (int j = 0; j < e; j++)
{
const int pk = p[j];
A(i,pk) -= A(k,pk) * alpha;
}
}
}
}
ATTR_HOT void LE_back_subst(nl_double * RESTRICT x);
ATTR_HOT nl_double delta(const nl_double * RESTRICT V);
ATTR_HOT void store(const nl_double * RESTRICT V);
/* bring the whole system to the current time
* Don't schedule a new calculation time. The recalculation has to be
* triggered by the caller after the netlist element was changed.
*/
ATTR_HOT nl_double compute_next_timestep();
template <typename T1, typename T2>
inline nl_ext_double &A(const T1 r, const T2 c) { return m_A[r][c]; }
//ATTR_ALIGN nl_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
ATTR_ALIGN nl_double m_RHS[_storage_N];
ATTR_ALIGN nl_double m_last_RHS[_storage_N]; // right hand side - contains currents
ATTR_ALIGN nl_double m_last_V[_storage_N];
terms_t **m_terms;
terms_t *m_rails_temp;
private:
ATTR_ALIGN nl_ext_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
const unsigned m_dim;
nl_double m_lp_fact;
};
// ----------------------------------------------------------------------------------------
// matrix_solver_direct
// ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned _storage_N>
matrix_solver_direct_t<m_N, _storage_N>::~matrix_solver_direct_t()
{
for (unsigned k = 0; k < N(); k++)
{
pfree(m_terms[k]);
}
pfree_array(m_terms);
pfree_array(m_rails_temp);
}
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT nl_double matrix_solver_direct_t<m_N, _storage_N>::compute_next_timestep()
{
nl_double new_solver_timestep = m_params.m_max_timestep;
if (m_params.m_dynamic)
{
/*
* FIXME: We should extend the logic to use either all nets or
* only output nets.
*/
for (unsigned k = 0, iN=N(); k < iN; k++)
{
analog_net_t *n = m_nets[k];
const nl_double DD_n = (n->m_cur_Analog - m_last_V[k]);
const nl_double hn = current_timestep();
nl_double DD2 = (DD_n / hn - n->m_DD_n_m_1 / n->m_h_n_m_1) / (hn + n->m_h_n_m_1);
nl_double new_net_timestep;
n->m_h_n_m_1 = hn;
n->m_DD_n_m_1 = DD_n;
if (nl_math::abs(DD2) > NL_FCONST(1e-30)) // avoid div-by-zero
new_net_timestep = nl_math::sqrt(m_params.m_lte / nl_math::abs(NL_FCONST(0.5)*DD2));
else
new_net_timestep = m_params.m_max_timestep;
if (new_net_timestep < new_solver_timestep)
new_solver_timestep = new_net_timestep;
}
if (new_solver_timestep < m_params.m_min_timestep)
new_solver_timestep = m_params.m_min_timestep;
}
//if (new_solver_timestep > 10.0 * hn)
// new_solver_timestep = 10.0 * hn;
return new_solver_timestep;
}
template <unsigned m_N, unsigned _storage_N>
ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::add_term(int k, terminal_t *term)
{
if (term->m_otherterm->net().isRailNet())
{
m_rails_temp[k].add(term, -1, false);
}
else
{
int ot = get_net_idx(&term->m_otherterm->net());
if (ot>=0)
{
m_terms[k]->add(term, ot, true);
SOLVER_VERBOSE_OUT(("Net %d Term %s %f %f\n", k, terms[i]->name().cstr(), terms[i]->m_gt, terms[i]->m_go));
}
/* Should this be allowed ? */
else // if (ot<0)
{
m_rails_temp[k].add(term, ot, true);
netlist().error("found term with missing othernet %s\n", term->name().cstr());
}
}
}
template <unsigned m_N, unsigned _storage_N>
ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::vsetup(analog_net_t::list_t &nets)
{
if (m_dim < nets.size())
netlist().error("Dimension %d less than %" SIZETFMT, m_dim, SIZET_PRINTF(nets.size()));
for (unsigned k = 0; k < N(); k++)
{
m_terms[k]->clear();
m_rails_temp[k].clear();
}
matrix_solver_t::setup(nets);
for (unsigned k = 0; k < N(); k++)
{
m_terms[k]->m_railstart = m_terms[k]->count();
for (unsigned i = 0; i < m_rails_temp[k].count(); i++)
this->m_terms[k]->add(m_rails_temp[k].terms()[i], m_rails_temp[k].net_other()[i], false);
m_rails_temp[k].clear(); // no longer needed
m_terms[k]->set_pointers();
}
#if 1
/* Sort in descending order by number of connected matrix voltages.
* The idea is, that for Gauss-Seidel algo the first voltage computed
* depends on the greatest number of previous voltages thus taking into
* account the maximum amout of information.
*
* This actually improves performance on popeye slightly. Average
* GS computations reduce from 2.509 to 2.370
*
* Smallest to largest : 2.613
* Unsorted : 2.509
* Largest to smallest : 2.370
*
* Sorting as a general matrix pre-conditioning is mentioned in
* literature but I have found no articles about Gauss Seidel.
*
* For Gaussian Elimination however increasing order is better suited.
* FIXME: Even better would be to sort on elements right of the matrix diagonal.
*
*/
int sort_order = (type() == GAUSS_SEIDEL ? 1 : -1);
for (unsigned k = 0; k < N() / 2; k++)
for (unsigned i = 0; i < N() - 1; i++)
{
if ((m_terms[i]->m_railstart - m_terms[i+1]->m_railstart) * sort_order < 0)
{
std::swap(m_terms[i],m_terms[i+1]);
m_nets.swap(i, i+1);
}
}
for (unsigned k = 0; k < N(); k++)
{
int *other = m_terms[k]->net_other();
for (unsigned i = 0; i < m_terms[k]->count(); i++)
if (other[i] != -1)
other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
}
#endif
/* create a list of non zero elements right of the diagonal
* These list anticipate the population of array elements by
* Gaussian elimination.
*/
for (unsigned k = 0; k < N(); k++)
{
terms_t * t = m_terms[k];
/* pretty brutal */
int *other = t->net_other();
t->m_nz.clear();
if (k==0)
t->m_nzrd.clear();
else
{
t->m_nzrd = m_terms[k-1]->m_nzrd;
unsigned j=0;
while(j < t->m_nzrd.size())
{
if (t->m_nzrd[j] < k + 1)
t->m_nzrd.remove_at(j);
else
j++;
}
}
for (unsigned j = 0; j < N(); j++)
{
for (unsigned i = 0; i < t->m_railstart; i++)
{
if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1))
t->m_nzrd.add(other[i]);
if (!t->m_nz.contains(other[i]))
t->m_nz.add(other[i]);
}
}
psort_list(t->m_nzrd);
t->m_nz.add(k); // add diagonal
psort_list(t->m_nz);
}
if(0)
for (unsigned k = 0; k < N(); k++)
{
netlist().log("%3d: ", k);
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
netlist().log(" %3d", m_terms[k]->m_nzrd[j]);
netlist().log("\n");
}
/*
* save states
*/
save(NLNAME(m_RHS));
save(NLNAME(m_last_RHS));
save(NLNAME(m_last_V));
for (unsigned k = 0; k < N(); k++)
{
pstring num = pformat("%1")(k);
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
}
}
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_A()
{
const unsigned iN = N();
for (unsigned k = 0; k < iN; k++)
{
for (unsigned i=0; i < iN; i++)
A(k,i) = 0.0;
nl_double akk = 0.0;
const unsigned terms_count = m_terms[k]->count();
const unsigned railstart = m_terms[k]->m_railstart;
const nl_double * RESTRICT gt = m_terms[k]->gt();
const nl_double * RESTRICT go = m_terms[k]->go();
const int * RESTRICT net_other = m_terms[k]->net_other();
for (unsigned i = 0; i < terms_count; i++)
akk = akk + gt[i];
A(k,k) += akk;
for (unsigned i = 0; i < railstart; i++)
A(k, net_other[i]) -= go[i];
}
}
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS(nl_double * RESTRICT rhs)
{
const unsigned iN = N();
for (unsigned k = 0; k < iN; k++)
{
nl_double rhsk_a = 0.0;
nl_double rhsk_b = 0.0;
const int terms_count = m_terms[k]->count();
const nl_double * RESTRICT go = m_terms[k]->go();
const nl_double * RESTRICT Idr = m_terms[k]->Idr();
const nl_double * const * RESTRICT other_cur_analog = m_terms[k]->other_curanalog();
for (int i = 0; i < terms_count; i++)
rhsk_a = rhsk_a + Idr[i];
for (int i = m_terms[k]->m_railstart; i < terms_count; i++)
//rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
rhsk_b = rhsk_b + go[i] * *other_cur_analog[i];
rhs[k] = rhsk_a + rhsk_b;
}
}
#if 1
#else
// Crout algo
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
{
#if 0
for (int i = 0; i < N(); i++)
{
for (int k = 0; k < N(); k++)
printf("%f ", m_A[i][k]);
printf("| %f = %f \n", x[i], m_RHS[i]);
}
printf("\n");
#endif
const unsigned kN = N();
ATTR_UNUSED int imax;
ATTR_UNUSED double big,temp;
#if 0
double vv[_storage_N];
for (i=0;i<kN;i++)
{
big=0.0;
for (j=0;j<kN;j++)
if ((temp=fabs(m_A[i][j])) > big)
big=temp;
//if (big == 0.0) nrerror("Singular matrix in routine LUDCMP");
vv[i]=1.0/big;
}
#endif
for (int j = 0; j < kN; j++)
{
#if 1
for (int i=0; i < kN;i++)
{
double sum = 0.0;
const int e = (i<j ? i : j);
for (int k=0; k < e; k++)
sum += A(i,k) * A(k,j);
A(i,j) -= sum;
}
#else
for (int i=0; i < j;i++)
{
double * RESTRICT p = m_A[i];
double sum = 0.0;
for (int k=0; k < i; k++)
sum += p[k] * m_A[k][j];
p[j] -= sum;
}
big=0.0;
for (int i = j; i < kN; i++)
{
double * RESTRICT p = m_A[i];
double sum = 0.0;
for (int k = 0; k < j; k++)
sum += p[k] * m_A[k][j];
p[j] -= sum;
#if 0
if ( (dum=vv[i]*fabs(sum)) >= big) {
big=dum;
imax=i;
}
#endif
}
#endif
#if 0
// USE_PIVOT_SEARCH
// omit pivoting for now
if (j != imax)
{
for (k=0;k<kN;k++)
{
dum=m_A[imax][k];
m_A[imax][k]=m_A[j][k];
m_A[j][k]=dum;
}
//*d = -(*d);
vv[imax]=vv[j];
}
indx[j]=imax;
#endif
//if (m_A[j][j] == 0.0)
// m_A[j][j] = 1e-20;
double dum = 1.0 / A(j,j);
for (int i = j+1; i < kN; i++)
A(i,j) *= dum;
}
}
#endif
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst(
nl_double * RESTRICT x)
{
const unsigned kN = N();
/* back substitution */
// int ip;
// ii=-1
//for (int i=0; i < kN; i++)
// x[i] = m_RHS[i];
for (int i=0; i < kN; i++)
{
//ip=indx[i]; USE_PIVOT_SEARCH
//sum=b[ip];
//b[ip]=b[i];
double sum=m_RHS[i];//x[i];
for (int j=0; j < i; j++)
sum -= A(i,j) * x[j];
x[i]=sum;
}
for (int i=kN-1; i >= 0; i--)
{
double sum=x[i];
for (int j = i+1; j < kN; j++)
sum -= A(i,j)*x[j];
x[i] = sum / A(i,i);
}
#if 0
printf("Solution:\n");
for (unsigned i = 0; i < N(); i++)
{
for (unsigned k = 0; k < N(); k++)
printf("%f ", m_A[i][k]);
printf("| %f = %f \n", x[i], m_RHS[i]);
}
printf("\n");
#endif
}
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT nl_double matrix_solver_direct_t<m_N, _storage_N>::delta(
const nl_double * RESTRICT V)
{
/* FIXME: Ideally we should also include currents (RHS) here. This would
* need a revaluation of the right hand side after voltages have been updated
* and thus belong into a different calculation. This applies to all solvers.
*/
const unsigned iN = this->N();
nl_double cerr = 0;
for (unsigned i = 0; i < iN; i++)
cerr = std::max(cerr, nl_math::abs(V[i] - this->m_nets[i]->m_cur_Analog));
return cerr;
}
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::store(
const nl_double * RESTRICT V)
{
for (unsigned i = 0, iN=N(); i < iN; i++)
{
this->m_nets[i]->m_cur_Analog = V[i];
}
}
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT nl_double matrix_solver_direct_t<m_N, _storage_N>::vsolve()
{
this->solve_base(this);
return this->compute_next_timestep();
}
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT int matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const bool newton_raphson)
{
nl_double new_V[_storage_N]; // = { 0.0 };
this->LE_back_subst(new_V);
if (newton_raphson)
{
nl_double err = delta(new_V);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
else
{
store(new_V);
return 1;
}
}
template <unsigned m_N, unsigned _storage_N>
ATTR_HOT inline int matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
this->build_LE_A();
this->build_LE_RHS(m_last_RHS);
for (unsigned i=0, iN=N(); i < iN; i++)
m_RHS[i] = m_last_RHS[i];
this->LE_solve();
return this->solve_non_dynamic(newton_raphson);
}
template <unsigned m_N, unsigned _storage_N>
matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const solver_parameters_t *params, const int size)
: matrix_solver_t(GAUSSIAN_ELIMINATION, params)
, m_dim(size)
, m_lp_fact(0)
{
m_terms = palloc_array(terms_t *, N());
m_rails_temp = palloc_array(terms_t, N());
for (unsigned k = 0; k < N(); k++)
{
m_terms[k] = palloc(terms_t);
m_last_RHS[k] = 0.0;
m_last_V[k] = 0.0;
}
}
template <unsigned m_N, unsigned _storage_N>
matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const eSolverType type, const solver_parameters_t *params, const int size)
: matrix_solver_t(type, params)
, m_dim(size)
, m_lp_fact(0)
{
m_terms = palloc_array(terms_t *, N());
m_rails_temp = palloc_array(terms_t, N());
for (unsigned k = 0; k < N(); k++)
{
m_terms[k] = palloc(terms_t);
m_last_RHS[k] = 0.0;
m_last_V[k] = 0.0;
}
}
NETLIB_NAMESPACE_DEVICES_END()
#endif /* NLD_MS_DIRECT_H_ */

View File

@ -35,10 +35,10 @@ public:
unsigned mr=this->N(); /* FIXME: maximum iterations locked in here */
for (unsigned i = 0; i < mr + 1; i++)
m_ht[i] = new double[mr];
m_ht[i] = new nl_double[mr];
for (unsigned i = 0; i < this->N(); i++)
m_v[i] = new double[_storage_N];
m_v[i] = new nl_double[_storage_N];
}
@ -60,7 +60,7 @@ protected:
private:
int solve_ilu_gmres(double * RESTRICT x, double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, double accuracy);
int solve_ilu_gmres(nl_double * RESTRICT x, nl_double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, nl_double accuracy);
plist_t<int> m_term_cr[_storage_N];
@ -69,17 +69,17 @@ private:
mat_cr_t<_storage_N> mat;
double m_A[_storage_N * _storage_N];
double m_LU[_storage_N * _storage_N];
nl_double m_A[_storage_N * _storage_N];
nl_double m_LU[_storage_N * _storage_N];
double m_c[_storage_N + 1]; /* mr + 1 */
double m_g[_storage_N + 1]; /* mr + 1 */
double * RESTRICT m_ht[_storage_N + 1]; /* (mr + 1), mr */
double m_s[_storage_N]; /* mr + 1 */
double * RESTRICT m_v[_storage_N + 1]; /*(mr + 1), n */
nl_double m_c[_storage_N + 1]; /* mr + 1 */
nl_double m_g[_storage_N + 1]; /* mr + 1 */
nl_double * RESTRICT m_ht[_storage_N + 1]; /* (mr + 1), mr */
nl_double m_s[_storage_N]; /* mr + 1 */
nl_double * RESTRICT m_v[_storage_N + 1]; /*(mr + 1), n */
//double m_y[_storage_N]; /* mr + 1 */
double m_accuracy_mult; // FXIME: Save state
nl_double m_accuracy_mult; // FXIME: Save state
};
// ----------------------------------------------------------------------------------------
@ -214,7 +214,7 @@ ATTR_HOT inline int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(c
if (newton_raphson)
{
double err = 0;
nl_double err = 0;
for (unsigned k = 0; k < iN; k++)
err = std::max(nl_math::abs(l_V[k] - new_V[k]), err);
@ -234,7 +234,7 @@ ATTR_HOT inline int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(c
}
}
static inline void givens_mult( const double c, const double s, double * RESTRICT g0, double * RESTRICT g1 )
static inline void givens_mult( const nl_double c, const nl_double s, nl_double * RESTRICT g0, nl_double * RESTRICT g1 )
{
const double tg0 = c * *g0 - s * *g1;
const double tg1 = s * *g0 + c * *g1;
@ -244,7 +244,7 @@ static inline void givens_mult( const double c, const double s, double * RESTRIC
}
template <unsigned m_N, unsigned _storage_N>
int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x, double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, double accuracy)
int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (nl_double * RESTRICT x, nl_double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, nl_double accuracy)
{
/*-------------------------------------------------------------------------
* The code below was inspired by code published by John Burkardt under
@ -289,13 +289,13 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x
* differently: The invest doesn't pay off.
* Therefore we use the approach in the else part.
*/
double t[_storage_N];
double Ax[_storage_N];
nl_double t[_storage_N];
nl_double Ax[_storage_N];
vec_set(n, accuracy, t);
mat.mult_vec(m_A, t, Ax);
mat.solveLUx(m_LU, Ax);
const double rho_to_accuracy = std::sqrt(vecmult2(n, Ax)) / accuracy;
const nl_double rho_to_accuracy = std::sqrt(vecmult2(n, Ax)) / accuracy;
//printf("rho/accuracy = %f\n", rho_to_accuracy);
@ -307,11 +307,11 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x
for (unsigned itr = 0; itr < restart_max; itr++)
{
unsigned last_k = mr;
double mu;
double rho;
nl_double mu;
nl_double rho;
double Ax[_storage_N];
double residual[_storage_N];
nl_double Ax[_storage_N];
nl_double residual[_storage_N];
mat.mult_vec(m_A, x, Ax);
@ -324,13 +324,13 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x
rho = std::sqrt(vecmult2(n, residual));
vec_mult_scalar(n, residual, 1.0 / rho, m_v[0]);
vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]);
vec_set(mr+1, 0.0, m_g);
vec_set(mr+1, NL_FCONST(0.0), m_g);
m_g[0] = rho;
for (unsigned i = 0; i < mr; i++)
vec_set(mr + 1, 0.0, m_ht[i]);
vec_set(mr + 1, NL_FCONST(0.0), m_ht[i]);
for (unsigned k = 0; k < mr; k++)
{
@ -349,7 +349,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x
m_ht[k1][k] = std::sqrt(vecmult2(n, m_v[k1]));
if (m_ht[k1][k] != 0.0)
vec_scale(n, m_v[k1], 1.0 / m_ht[k1][k]);
vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]);
for (unsigned j = 0; j < k; j++)
givens_mult(m_c[j], m_s[j], &m_ht[j][k], &m_ht[j+1][k]);
@ -378,7 +378,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x
/* didn't converge within accuracy */
last_k = mr - 1;
double m_y[_storage_N + 1];
nl_double m_y[_storage_N + 1];
/* Solve the system H * y = g */
/* x += m_v[j] * m_y[j] */

View File

@ -143,7 +143,7 @@ ATTR_HOT inline int matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dynamic(con
do {
resched = false;
double err = 0;
nl_double err = 0;
for (unsigned k = 0; k < iN; k++)
{
const int * RESTRICT net_other = this->m_terms[k]->net_other();

View File

@ -199,14 +199,13 @@ ATTR_HOT inline int matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non_dynamic
{
nl_double Idrive = 0;
const nl_ext_double * RESTRICT A = &this->m_A[k][0];
const unsigned *p = this->m_terms[k]->m_nz.data();
const unsigned e = this->m_terms[k]->m_nz.size();
for (unsigned i = 0; i < e; i++)
Idrive = Idrive + A[p[i]] * new_v[p[i]];
Idrive = Idrive + this->A(k,p[i]) * new_v[p[i]];
const nl_double delta = m_omega * (this->m_RHS[k] - Idrive) / A[k];
const nl_double delta = m_omega * (this->m_RHS[k] - Idrive) / this->A(k,k);
cerr = std::max(cerr, nl_math::abs(delta));
new_v[k] += delta;
}

View File

@ -17,7 +17,6 @@
#endif
//#pragma GCC optimize "-ffast-math"
#if 0
#pragma GCC optimize "-ffast-math"
//#pragma GCC optimize "-ftree-parallelize-loops=4"
@ -37,7 +36,11 @@
#include <iostream>
#include <algorithm>
#include "nld_solver.h"
#if 1
#include "nld_ms_direct.h"
#else
#include "nld_ms_direct_lu.h"
#endif
#include "nld_ms_direct1.h"
#include "nld_ms_direct2.h"
#include "nld_ms_sor.h"
@ -302,11 +305,6 @@ ATTR_HOT nl_double matrix_solver_t::solve()
return next_time_step;
}
// ----------------------------------------------------------------------------------------
// matrix_solver - Direct base
// ----------------------------------------------------------------------------------------
ATTR_COLD int matrix_solver_t::get_net_idx(net_t *net)
{
for (std::size_t k = 0; k < m_nets.size(); k++)
@ -315,6 +313,25 @@ ATTR_COLD int matrix_solver_t::get_net_idx(net_t *net)
return -1;
}
void matrix_solver_t::log_stats()
{
if (this->m_stat_calculations != 0 && this->m_params.m_log_stats)
{
this->netlist().log("==============================================");
this->netlist().log("Solver %s", this->name().cstr());
this->netlist().log(" ==> %d nets", (unsigned) this->m_nets.size()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
this->netlist().log(" has %s elements", this->is_dynamic() ? "dynamic" : "no dynamic");
this->netlist().log(" has %s elements", this->is_timestep() ? "timestep" : "no timestep");
this->netlist().log(" %6.3f average newton raphson loops", (double) this->m_stat_newton_raphson / (double) this->m_stat_vsolver_calls);
this->netlist().log(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average",
this->m_stat_calculations,
this->m_stat_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
this->m_iterative_fail,
100.0 * (double) this->m_iterative_fail / (double) this->m_stat_calculations,
(double) this->m_iterative_total / (double) this->m_stat_calculations);
}
}
@ -335,15 +352,21 @@ NETLIB_START(solver)
register_param("FREQ", m_freq, 48000.0);
register_param("ITERATIVE", m_iterative_solver, "SOR");
/* iteration parameters */
register_param("SOR_FACTOR", m_sor, 1.059);
register_param("ITERATIVE", m_iterative_solver, "SOR");
register_param("ACCURACY", m_accuracy, 1e-7);
register_param("GS_LOOPS", m_gs_loops, 9); // Gauss-Seidel loops
register_param("GS_THRESHOLD", m_gs_threshold, 6); // below this value, gaussian elimination is used
register_param("GS_LOOPS", m_gs_loops, 9); // Gauss-Seidel loops
/* general parameters */
register_param("GMIN", m_gmin, NETLIST_GMIN_DEFAULT);
register_param("PIVOT", m_pivot, 0); // use pivoting - on supported solvers
register_param("NR_LOOPS", m_nr_loops, 250); // Newton-Raphson loops
register_param("PARALLEL", m_parallel, 0);
register_param("SOR_FACTOR", m_sor, 1.059);
register_param("GMIN", m_gmin, NETLIST_GMIN_DEFAULT);
/* automatic time step */
register_param("DYNAMIC_TS", m_dynamic, 0);
register_param("LTE", m_lte, 5e-5); // diff/timestep
register_param("MIN_TIMESTEP", m_min_timestep, 1e-6); // nl_double timestep resolution
@ -474,6 +497,7 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
int cur_group = -1;
const bool use_specific = true;
m_params.m_pivot = m_pivot.Value();
m_params.m_accuracy = m_accuracy.Value();
m_params.m_gs_loops = m_gs_loops.Value();
m_params.m_nr_loops = m_nr_loops.Value();
@ -551,13 +575,31 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
case 8:
ms = create_solver<8,8>(8, use_specific);
break;
case 10:
ms = create_solver<10,10>(10, use_specific);
break;
case 11:
ms = create_solver<11,11>(11, use_specific);
break;
case 12:
ms = create_solver<12,12>(12, use_specific);
break;
case 15:
ms = create_solver<15,15>(15, use_specific);
break;
case 31:
ms = create_solver<31,31>(31, use_specific);
break;
case 49:
ms = create_solver<49,49>(49, use_specific);
break;
#if 0
case 87:
ms = create_solver<87,87>(87, use_specific);
break;
#endif
default:
netlist().warning("No specific solver found for netlist of size %d", (unsigned) net_count);
if (net_count <= 16)
{
ms = create_solver<0,16>(net_count, use_specific);

View File

@ -37,6 +37,7 @@ class NETLIB_NAME(solver);
struct solver_parameters_t
{
int m_pivot;
nl_double m_accuracy;
nl_double m_lte;
nl_double m_min_timestep;
@ -134,25 +135,7 @@ public:
inline eSolverType type() const { return m_type; }
virtual void log_stats()
{
if (this->m_stat_calculations != 0 && this->m_params.m_log_stats)
{
this->netlist().log("==============================================");
this->netlist().log("Solver %s", this->name().cstr());
this->netlist().log(" ==> %d nets", (unsigned) this->m_nets.size()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
this->netlist().log(" has %s elements", this->is_dynamic() ? "dynamic" : "no dynamic");
this->netlist().log(" has %s elements", this->is_timestep() ? "timestep" : "no timestep");
this->netlist().log(" %6.3f average newton raphson loops", (double) this->m_stat_newton_raphson / (double) this->m_stat_vsolver_calls);
this->netlist().log(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average",
this->m_stat_calculations,
this->m_stat_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
this->m_iterative_fail,
100.0 * (double) this->m_iterative_fail / (double) this->m_stat_calculations,
(double) this->m_iterative_total / (double) this->m_stat_calculations);
}
}
virtual void log_stats();
protected:
@ -217,6 +200,7 @@ protected:
logic_input_t m_fb_step;
logic_output_t m_Q_step;
param_logic_t m_pivot;
param_double_t m_freq;
param_double_t m_sync_delay;
param_double_t m_accuracy;

View File

@ -35,32 +35,36 @@ private:
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
inline void vec_set (const std::size_t n, const double &scalar, double * RESTRICT result)
template<typename T>
inline void vec_set (const std::size_t n, const T &scalar, T * RESTRICT result)
{
for ( std::size_t i = 0; i < n; i++ )
result[i] = scalar;
}
inline double vecmult (const std::size_t n, const double * RESTRICT a1, const double * RESTRICT a2 )
template<typename T>
inline T vecmult (const std::size_t n, const T * RESTRICT a1, const T * RESTRICT a2 )
{
double value = 0.0;
T value = 0.0;
for ( std::size_t i = 0; i < n; i++ )
value = value + a1[i] * a2[i];
return value;
}
inline double vecmult2 (const std::size_t n, const double *a1)
template<typename T>
inline T vecmult2 (const std::size_t n, const T *a1)
{
double value = 0.0;
T value = 0.0;
for ( std::size_t i = 0; i < n; i++ )
{
const double temp = a1[i];
const T temp = a1[i];
value = value + temp * temp;
}
return value;
}
inline void vec_mult_scalar (const std::size_t n, const double * RESTRICT v, const double scalar, double * RESTRICT result)
template<typename T>
inline void vec_mult_scalar (const std::size_t n, const T * RESTRICT v, const T scalar, T * RESTRICT result)
{
for ( std::size_t i = 0; i < n; i++ )
{
@ -68,7 +72,8 @@ inline void vec_mult_scalar (const std::size_t n, const double * RESTRICT v, con
}
}
inline void vec_add_mult_scalar (const std::size_t n, const double * RESTRICT v, const double scalar, double * RESTRICT result)
template<typename T>
inline void vec_add_mult_scalar (const std::size_t n, const T * RESTRICT v, const T scalar, T * RESTRICT result)
{
for ( std::size_t i = 0; i < n; i++ )
result[i] += scalar * v[i];
@ -80,13 +85,15 @@ inline void vec_add_ip(const std::size_t n, const double * RESTRICT v, double *
result[i] += v[i];
}
inline void vec_sub(const std::size_t n, const double * RESTRICT v1, const double * RESTRICT v2, double * RESTRICT result)
template<typename T>
inline void vec_sub(const std::size_t n, const T * RESTRICT v1, const T * RESTRICT v2, T * RESTRICT result)
{
for ( std::size_t i = 0; i < n; i++ )
result[i] = v1[i] - v2[i];
}
inline void vec_scale (const std::size_t n, double * RESTRICT v, const double scalar)
template<typename T>
inline void vec_scale (const std::size_t n, T * RESTRICT v, const T scalar)
{
for ( std::size_t i = 0; i < n; i++ )
v[i] = scalar * v[i];