mirror of
https://github.com/holub/mame
synced 2025-06-04 20:06:28 +03:00
Converted USE_PIVOT into runtime option PIVOT. Fixed some issues for
nl_double == float. (nw)
This commit is contained in:
parent
683ddc8d7d
commit
eacd7ef4b2
@ -38,11 +38,13 @@ NETLIST_START(dummy)
|
||||
|
||||
/* €€ */ SOLVER(Solver, 24000)
|
||||
PARAM(Solver.ACCURACY, 1e-8)
|
||||
PARAM(Solver.NR_LOOPS, 150)
|
||||
PARAM(Solver.NR_LOOPS, 9000)
|
||||
PARAM(Solver.SOR_FACTOR, 0.001)
|
||||
PARAM(Solver.GS_LOOPS, 1)
|
||||
//PARAM(Solver.GS_THRESHOLD, 99)
|
||||
PARAM(Solver.ITERATIVE, "SOR")
|
||||
PARAM(Solver.PARALLEL, 0)
|
||||
PARAM(Solver.PIVOT, 0)
|
||||
|
||||
LOCAL_SOURCE(congob_lib)
|
||||
INCLUDE(congob_lib)
|
||||
@ -99,6 +101,7 @@ NETLIST_START(dummy)
|
||||
PARAM(XU13.D.MODEL, "MB3614(TYPE=1)")
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
OPTIMIZE_FRONTIER(C51.1, RES_K(20), 50)
|
||||
OPTIMIZE_FRONTIER(R77.2, RES_K(20), 50)
|
||||
|
||||
@ -109,7 +112,7 @@ NETLIST_START(dummy)
|
||||
|
||||
OPTIMIZE_FRONTIER(R90.2, RES_K(100), 50)
|
||||
OPTIMIZE_FRONTIER(R92.2, RES_K(15), 50)
|
||||
|
||||
#endif
|
||||
NETLIST_END()
|
||||
|
||||
NETLIST_START(CongoBongo_schematics)
|
||||
|
@ -169,7 +169,7 @@ NETLIB_UPDATE_PARAM(POT)
|
||||
m_R2.update_dev();
|
||||
|
||||
m_R1.set_R(std::max(m_R.Value() * v, netlist().gmin()));
|
||||
m_R2.set_R(std::max(m_R.Value() * (1.0 - v), netlist().gmin()));
|
||||
m_R2.set_R(std::max(m_R.Value() * (NL_FCONST(1.0) - v), netlist().gmin()));
|
||||
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,6 @@
|
||||
//============================================================
|
||||
|
||||
#define USE_MATRIX_GS (0)
|
||||
#define USE_PIVOT_SEARCH (0)
|
||||
#define USE_GABS (1)
|
||||
// savings are eaten up by effort
|
||||
// FIXME: Convert into solver parameter
|
||||
|
@ -21,7 +21,8 @@ struct mat_cr_t
|
||||
unsigned ja[_storage_N * _storage_N];
|
||||
unsigned diag[_storage_N]; /* n */
|
||||
|
||||
void mult_vec(const double * RESTRICT A, const double * RESTRICT x, double * RESTRICT res)
|
||||
template<typename T>
|
||||
void mult_vec(const T * RESTRICT A, const T * RESTRICT x, T * RESTRICT res)
|
||||
{
|
||||
/*
|
||||
* res = A * x
|
||||
@ -41,7 +42,7 @@ struct mat_cr_t
|
||||
}
|
||||
}
|
||||
|
||||
void incomplete_LU_factorization(const double * RESTRICT A, double * RESTRICT LU)
|
||||
void incomplete_LU_factorization(const nl_double * RESTRICT A, nl_double * RESTRICT LU)
|
||||
{
|
||||
/*
|
||||
* incomplete LU Factorization according to http://de.wikipedia.org/wiki/ILU-Zerlegung
|
||||
@ -80,7 +81,7 @@ struct mat_cr_t
|
||||
}
|
||||
}
|
||||
|
||||
void solveLUx (const double * RESTRICT LU, double * RESTRICT r)
|
||||
void solveLUx (const nl_double * RESTRICT LU, nl_double * RESTRICT r)
|
||||
{
|
||||
/*
|
||||
* Solve a linear equation Ax = r
|
||||
|
@ -45,6 +45,11 @@ protected:
|
||||
ATTR_HOT void build_LE_RHS(nl_double * RESTRICT rhs);
|
||||
ATTR_HOT void LE_solve();
|
||||
ATTR_HOT void LE_back_subst(nl_double * RESTRICT x);
|
||||
|
||||
/* Full LU back substitution, not used currently, in for future use */
|
||||
|
||||
ATTR_HOT void LE_back_subst_full(nl_double * RESTRICT x);
|
||||
|
||||
ATTR_HOT nl_double delta(const nl_double * RESTRICT V);
|
||||
ATTR_HOT void store(const nl_double * RESTRICT V);
|
||||
|
||||
@ -54,8 +59,9 @@ protected:
|
||||
*/
|
||||
ATTR_HOT nl_double compute_next_timestep();
|
||||
|
||||
ATTR_ALIGN nl_ext_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
|
||||
//ATTR_ALIGN nl_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
|
||||
template <typename T1, typename T2>
|
||||
inline nl_ext_double &A(const T1 r, const T2 c) { return m_A[r][c]; }
|
||||
|
||||
ATTR_ALIGN nl_double m_RHS[_storage_N];
|
||||
ATTR_ALIGN nl_double m_last_RHS[_storage_N]; // right hand side - contains currents
|
||||
ATTR_ALIGN nl_double m_last_V[_storage_N];
|
||||
@ -64,9 +70,9 @@ protected:
|
||||
terms_t *m_rails_temp;
|
||||
|
||||
private:
|
||||
ATTR_ALIGN nl_ext_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
|
||||
|
||||
const unsigned m_dim;
|
||||
nl_double m_lp_fact;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
@ -259,13 +265,13 @@ ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::vsetup(analog_net_t::lis
|
||||
psort_list(t->m_nz);
|
||||
}
|
||||
|
||||
if(0)
|
||||
if (0)
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
netlist().log("%3d: ", k);
|
||||
pstring line = pformat("%1")(k, "3");
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
|
||||
netlist().log(" %3d", m_terms[k]->m_nzrd[j]);
|
||||
netlist().log("\n");
|
||||
line += pformat(" %1")(m_terms[k]->m_nzrd[j], "3");
|
||||
netlist().log("%s", line.cstr());
|
||||
}
|
||||
|
||||
/*
|
||||
@ -294,22 +300,25 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_A()
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
for (unsigned i=0; i < iN; i++)
|
||||
m_A[k][i] = 0.0;
|
||||
A(k,i) = 0.0;
|
||||
|
||||
nl_double akk = 0.0;
|
||||
const unsigned terms_count = m_terms[k]->count();
|
||||
const unsigned railstart = m_terms[k]->m_railstart;
|
||||
const nl_double * RESTRICT gt = m_terms[k]->gt();
|
||||
|
||||
{
|
||||
nl_double akk = 0.0;
|
||||
for (unsigned i = 0; i < terms_count; i++)
|
||||
akk += gt[i];
|
||||
|
||||
A(k,k) = akk;
|
||||
}
|
||||
|
||||
const nl_double * RESTRICT go = m_terms[k]->go();
|
||||
const int * RESTRICT net_other = m_terms[k]->net_other();
|
||||
|
||||
for (unsigned i = 0; i < terms_count; i++)
|
||||
akk = akk + gt[i];
|
||||
|
||||
m_A[k][k] += akk;
|
||||
|
||||
for (unsigned i = 0; i < railstart; i++)
|
||||
m_A[k][net_other[i]] -= go[i];
|
||||
A(k,net_other[i]) -= go[i];
|
||||
}
|
||||
}
|
||||
|
||||
@ -341,54 +350,41 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS(nl_double *
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
||||
{
|
||||
#if 0
|
||||
for (int i = 0; i < N(); i++)
|
||||
{
|
||||
for (int k = 0; k < N(); k++)
|
||||
printf("%f ", m_A[i][k]);
|
||||
printf("| %f = %f \n", x[i], m_RHS[i]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
const unsigned kN = N();
|
||||
|
||||
for (unsigned i = 0; i < kN; i++) {
|
||||
// FIXME: use a parameter to enable pivoting?
|
||||
if (USE_PIVOT_SEARCH)
|
||||
// FIXME: use a parameter to enable pivoting? m_pivot
|
||||
if (m_params.m_pivot)
|
||||
{
|
||||
/* Find the row with the largest first value */
|
||||
unsigned maxrow = i;
|
||||
for (unsigned j = i + 1; j < kN; j++)
|
||||
{
|
||||
//if (std::abs(m_A[j][i]) > std::abs(m_A[maxrow][i]))
|
||||
if (m_A[j][i] * m_A[j][i] > m_A[maxrow][i] * m_A[maxrow][i])
|
||||
if (A(j,i) * A(j,i) > A(maxrow,i) * A(maxrow,i))
|
||||
maxrow = j;
|
||||
}
|
||||
|
||||
if (maxrow != i)
|
||||
{
|
||||
/* Swap the maxrow and ith row */
|
||||
for (unsigned k = i; k < kN; k++) {
|
||||
std::swap(m_A[i][k], m_A[maxrow][k]);
|
||||
for (unsigned k = 0; k < kN; k++) {
|
||||
std::swap(A(i,k), A(maxrow,k));
|
||||
}
|
||||
std::swap(m_RHS[i], m_RHS[maxrow]);
|
||||
}
|
||||
/* FIXME: Singular matrix? */
|
||||
const nl_double f = 1.0 / m_A[i][i];
|
||||
const nl_ext_double * RESTRICT s = &m_A[i][i+1];
|
||||
const nl_double f = 1.0 / A(i,i);
|
||||
|
||||
/* Eliminate column i from row j */
|
||||
|
||||
for (unsigned j = i + 1; j < kN; j++)
|
||||
{
|
||||
nl_ext_double * RESTRICT d = &m_A[j][i+1];
|
||||
const nl_double f1 = - m_A[j][i] * f;
|
||||
const nl_double f1 = - A(j,i) * f;
|
||||
if (f1 != NL_FCONST(0.0))
|
||||
{
|
||||
const unsigned e = kN - i - 1;
|
||||
for (unsigned k = 0; k < e; k++)
|
||||
d[k] = d[k] + s[k] * f1;
|
||||
for (unsigned k = i+1; k < kN; k++)
|
||||
A(j,k) += A(i,k) * f1;
|
||||
m_RHS[j] += m_RHS[i] * f1;
|
||||
}
|
||||
}
|
||||
@ -396,8 +392,7 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
||||
else
|
||||
{
|
||||
/* FIXME: Singular matrix? */
|
||||
const nl_double f = 1.0 / m_A[i][i];
|
||||
const nl_ext_double * RESTRICT s = &m_A[i][0];
|
||||
const nl_double f = 1.0 / A(i,i);
|
||||
const unsigned *p = m_terms[i]->m_nzrd.data();
|
||||
const unsigned e = m_terms[i]->m_nzrd.size();
|
||||
|
||||
@ -405,10 +400,9 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
||||
|
||||
for (unsigned j = i + 1; j < kN; j++)
|
||||
{
|
||||
nl_ext_double * RESTRICT d = &m_A[j][0];
|
||||
const nl_double f1 = - d[i] * f;
|
||||
if (f1 != NL_FCONST(0.0))
|
||||
if (A(j,i) != NL_FCONST(0.0))
|
||||
{
|
||||
const nl_double f1 = - A(j,i) * f;
|
||||
#if 0
|
||||
/* The code below is 30% faster than the original
|
||||
* implementation which is given here for reference.
|
||||
@ -419,14 +413,13 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
||||
double * RESTRICT d = &m_A[j][i+1];
|
||||
const double * RESTRICT s = &m_A[i][i+1];
|
||||
const int e = kN - i - 1;
|
||||
|
||||
for (int k = 0; k < e; k++)
|
||||
d[k] = d[k] + s[k] * f1;
|
||||
#else
|
||||
for (unsigned k = 0; k < e; k++)
|
||||
{
|
||||
const unsigned pk = p[k];
|
||||
d[pk] += s[pk] * f1;
|
||||
A(j,pk) += A(i,pk) * f1;
|
||||
}
|
||||
#endif
|
||||
m_RHS[j] += m_RHS[i] * f1;
|
||||
@ -443,34 +436,67 @@ ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst(
|
||||
const unsigned kN = N();
|
||||
|
||||
/* back substitution */
|
||||
for (int j = kN - 1; j >= 0; j--)
|
||||
if (m_params.m_pivot)
|
||||
{
|
||||
nl_double tmp = 0;
|
||||
|
||||
#if 1
|
||||
#if (USE_PIVOT_SEARCH)
|
||||
const nl_ext_double * RESTRICT A = &m_A[j][j+1];
|
||||
const nl_double * RESTRICT xp = &x[j+1];
|
||||
const unsigned e = kN - j - 1;
|
||||
for (unsigned k = 0; k < e; k++)
|
||||
tmp += A[k] * xp[k];
|
||||
#else
|
||||
const nl_ext_double * RESTRICT A = &m_A[j][0];
|
||||
const unsigned *p = m_terms[j]->m_nzrd.data();
|
||||
const unsigned e = m_terms[j]->m_nzrd.size();
|
||||
|
||||
for (unsigned k = 0; k < e; k++)
|
||||
for (int j = kN - 1; j >= 0; j--)
|
||||
{
|
||||
const unsigned pk = p[k];
|
||||
tmp += A[pk] * x[pk];
|
||||
nl_double tmp = 0;
|
||||
for (unsigned k = j+1; k < kN; k++)
|
||||
tmp += A(j,k) * x[k];
|
||||
x[j] = (m_RHS[j] - tmp) / A(j,j);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
for (unsigned k = j + 1; k < kN; k++)
|
||||
tmp += m_A[j][k] * x[k];
|
||||
#endif
|
||||
x[j] = (m_RHS[j] - tmp) / m_A[j][j];
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j = kN - 1; j >= 0; j--)
|
||||
{
|
||||
nl_double tmp = 0;
|
||||
|
||||
const unsigned *p = m_terms[j]->m_nzrd.data();
|
||||
const unsigned e = m_terms[j]->m_nzrd.size();
|
||||
|
||||
for (unsigned k = 0; k < e; k++)
|
||||
{
|
||||
const unsigned pk = p[k];
|
||||
tmp += A(j,pk) * x[pk];
|
||||
}
|
||||
x[j] = (m_RHS[j] - tmp) / A(j,j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst_full(
|
||||
nl_double * RESTRICT x)
|
||||
{
|
||||
const unsigned kN = N();
|
||||
|
||||
/* back substitution */
|
||||
|
||||
// int ip;
|
||||
// ii=-1
|
||||
|
||||
//for (int i=0; i < kN; i++)
|
||||
// x[i] = m_RHS[i];
|
||||
|
||||
for (int i=0; i < kN; i++)
|
||||
{
|
||||
//ip=indx[i]; USE_PIVOT_SEARCH
|
||||
//sum=b[ip];
|
||||
//b[ip]=b[i];
|
||||
double sum=m_RHS[i];//x[i];
|
||||
for (int j=0; j < i; j++)
|
||||
sum -= A(i,j) * x[j];
|
||||
x[i]=sum;
|
||||
}
|
||||
for (int i=kN-1; i >= 0; i--)
|
||||
{
|
||||
double sum=x[i];
|
||||
for (int j = i+1; j < kN; j++)
|
||||
sum -= A(i,j)*x[j];
|
||||
x[i] = sum / A(i,i);
|
||||
}
|
||||
|
||||
#if 0
|
||||
printf("Solution:\n");
|
||||
for (unsigned i = 0; i < N(); i++)
|
||||
@ -527,33 +553,11 @@ ATTR_HOT int matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNU
|
||||
|
||||
if (newton_raphson)
|
||||
{
|
||||
#if 0
|
||||
/* limiting just doesn't work. */
|
||||
const unsigned iN = this->N();
|
||||
double err = 0;
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
const double ov = this->m_nets[k]->m_cur_Analog;
|
||||
double d = new_V[k] - ov;
|
||||
err = std::max(nl_math::abs(d), err);
|
||||
}
|
||||
double a = 1.05;
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
const double ov = this->m_nets[k]->m_cur_Analog;
|
||||
double d = new_V[k] - ov;
|
||||
const double nv = ov + a * d;
|
||||
this->m_nets[k]->m_cur_Analog = nv;
|
||||
}
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
#else
|
||||
nl_double err = delta(new_V);
|
||||
|
||||
store(new_V);
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -580,7 +584,6 @@ template <unsigned m_N, unsigned _storage_N>
|
||||
matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const solver_parameters_t *params, const int size)
|
||||
: matrix_solver_t(GAUSSIAN_ELIMINATION, params)
|
||||
, m_dim(size)
|
||||
, m_lp_fact(0)
|
||||
{
|
||||
m_terms = palloc_array(terms_t *, N());
|
||||
m_rails_temp = palloc_array(terms_t, N());
|
||||
@ -597,7 +600,6 @@ template <unsigned m_N, unsigned _storage_N>
|
||||
matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const eSolverType type, const solver_parameters_t *params, const int size)
|
||||
: matrix_solver_t(type, params)
|
||||
, m_dim(size)
|
||||
, m_lp_fact(0)
|
||||
{
|
||||
m_terms = palloc_array(terms_t *, N());
|
||||
m_rails_temp = palloc_array(terms_t, N());
|
||||
|
@ -43,7 +43,7 @@ ATTR_HOT inline int matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNUSED cons
|
||||
this->build_LE_RHS(m_RHS);
|
||||
//NL_VERBOSE_OUT(("%f %f\n", new_val, m_RHS[0] / m_A[0][0]);
|
||||
|
||||
nl_double new_val = m_RHS[0] / m_A[0][0];
|
||||
nl_double new_val = m_RHS[0] / A(0,0);
|
||||
|
||||
nl_double e = (new_val - net->m_cur_Analog);
|
||||
nl_double cerr = nl_math::abs(e);
|
||||
|
@ -41,10 +41,10 @@ ATTR_HOT inline int matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED cons
|
||||
build_LE_A();
|
||||
build_LE_RHS(m_RHS);
|
||||
|
||||
const nl_double a = m_A[0][0];
|
||||
const nl_double b = m_A[0][1];
|
||||
const nl_double c = m_A[1][0];
|
||||
const nl_double d = m_A[1][1];
|
||||
const nl_double a = A(0,0);
|
||||
const nl_double b = A(0,1);
|
||||
const nl_double c = A(1,0);
|
||||
const nl_double d = A(1,1);
|
||||
|
||||
nl_double new_val[2];
|
||||
new_val[1] = (a * m_RHS[1] - c * m_RHS[0]) / (a * d - b * c);
|
||||
|
673
src/emu/netlist/solver/nld_ms_direct_lu.h
Normal file
673
src/emu/netlist/solver/nld_ms_direct_lu.h
Normal file
@ -0,0 +1,673 @@
|
||||
// license:GPL-2.0+
|
||||
// copyright-holders:Couriersud
|
||||
/*
|
||||
* nld_ms_direct.h
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NLD_MS_DIRECT_H_
|
||||
#define NLD_MS_DIRECT_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "solver/nld_solver.h"
|
||||
|
||||
//#define A(_r, _c) m_A[_r][_c]
|
||||
|
||||
NETLIB_NAMESPACE_DEVICES_START()
|
||||
|
||||
//#define nl_ext_double __float128 // slow, very slow
|
||||
//#define nl_ext_double long double // slightly slower
|
||||
#define nl_ext_double double
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
class matrix_solver_direct_t: public matrix_solver_t
|
||||
{
|
||||
public:
|
||||
|
||||
matrix_solver_direct_t(const solver_parameters_t *params, const int size);
|
||||
matrix_solver_direct_t(const eSolverType type, const solver_parameters_t *params, const int size);
|
||||
|
||||
virtual ~matrix_solver_direct_t();
|
||||
|
||||
virtual void vsetup(analog_net_t::list_t &nets);
|
||||
virtual void reset() { matrix_solver_t::reset(); }
|
||||
|
||||
ATTR_HOT inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; }
|
||||
|
||||
ATTR_HOT inline int vsolve_non_dynamic(const bool newton_raphson);
|
||||
|
||||
protected:
|
||||
virtual void add_term(int net_idx, terminal_t *term);
|
||||
|
||||
ATTR_HOT virtual nl_double vsolve();
|
||||
|
||||
ATTR_HOT int solve_non_dynamic(const bool newton_raphson);
|
||||
ATTR_HOT void build_LE_A();
|
||||
ATTR_HOT void build_LE_RHS(nl_double * RESTRICT rhs);
|
||||
|
||||
template<unsigned k>
|
||||
void LEk()
|
||||
{
|
||||
//const unsigned kN = N();
|
||||
|
||||
const double akki = 1.0 / A(k,k);
|
||||
const unsigned * const p = m_terms[k]->m_nzrd.data();
|
||||
const unsigned e = m_terms[k]->m_nzrd.size();
|
||||
|
||||
for (int i = k+1; i < _storage_N;i++)
|
||||
{
|
||||
const double alpha = A(i,k) * akki;
|
||||
A(i,k) = alpha;
|
||||
if (alpha != 0.0)
|
||||
for (int j = 0; j < e; j++)
|
||||
{
|
||||
const int pk = p[j];
|
||||
A(i,pk) -= A(k,pk) * alpha;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ATTR_HOT void LE_solve()
|
||||
{
|
||||
const unsigned kN = N();
|
||||
unsigned sk = 1;
|
||||
|
||||
if (1 && kN == _storage_N)
|
||||
{
|
||||
if (kN> 0 ) LEk<0>();
|
||||
if (kN> 1 ) LEk<1>();
|
||||
if (kN> 2 ) LEk<2>();
|
||||
if (kN> 3 ) LEk<3>();
|
||||
if (kN> 4 ) LEk<4>();
|
||||
if (kN> 5 ) LEk<5>();
|
||||
if (kN> 6 ) LEk<6>();
|
||||
if (kN> 7 ) LEk<7>();
|
||||
if (kN> 8 ) LEk<8>();
|
||||
if (kN> 9 ) LEk<9>();
|
||||
if (kN>10 ) LEk<10>();
|
||||
if (kN>11 ) LEk<11>();
|
||||
if (kN>12 ) LEk<12>();
|
||||
if (kN>13 ) LEk<13>();
|
||||
if (kN>14 ) LEk<14>();
|
||||
if (kN>15 ) LEk<15>();
|
||||
if (kN>16 ) LEk<16>();
|
||||
if (kN>17 ) LEk<17>();
|
||||
if (kN>18 ) LEk<18>();
|
||||
if (kN>19 ) LEk<19>();
|
||||
if (kN>20 ) LEk<20>();
|
||||
if (kN>21 ) LEk<21>();
|
||||
if (kN>22 ) LEk<22>();
|
||||
if (kN>23 ) LEk<23>();
|
||||
if (kN>24 ) LEk<24>();
|
||||
if (kN>25 ) LEk<25>();
|
||||
if (kN>26 ) LEk<26>();
|
||||
if (kN>27 ) LEk<27>();
|
||||
if (kN>28 ) LEk<28>();
|
||||
if (kN>29 ) LEk<29>();
|
||||
sk = 30;
|
||||
}
|
||||
|
||||
for (int k = sk; k < kN - 1; k++)
|
||||
{
|
||||
const double akki = 1.0 / A(k,k);
|
||||
const unsigned * const p = m_terms[k]->m_nzrd.data();
|
||||
const unsigned e = m_terms[k]->m_nzrd.size();
|
||||
|
||||
for (int i = k+1; i < kN;i++)
|
||||
{
|
||||
const double alpha = A(i,k) * akki;
|
||||
A(i,k) = alpha;
|
||||
if (alpha != 0.0)
|
||||
for (int j = 0; j < e; j++)
|
||||
{
|
||||
const int pk = p[j];
|
||||
A(i,pk) -= A(k,pk) * alpha;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ATTR_HOT void LE_back_subst(nl_double * RESTRICT x);
|
||||
ATTR_HOT nl_double delta(const nl_double * RESTRICT V);
|
||||
ATTR_HOT void store(const nl_double * RESTRICT V);
|
||||
|
||||
/* bring the whole system to the current time
|
||||
* Don't schedule a new calculation time. The recalculation has to be
|
||||
* triggered by the caller after the netlist element was changed.
|
||||
*/
|
||||
ATTR_HOT nl_double compute_next_timestep();
|
||||
|
||||
template <typename T1, typename T2>
|
||||
inline nl_ext_double &A(const T1 r, const T2 c) { return m_A[r][c]; }
|
||||
|
||||
//ATTR_ALIGN nl_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
|
||||
ATTR_ALIGN nl_double m_RHS[_storage_N];
|
||||
ATTR_ALIGN nl_double m_last_RHS[_storage_N]; // right hand side - contains currents
|
||||
ATTR_ALIGN nl_double m_last_V[_storage_N];
|
||||
|
||||
terms_t **m_terms;
|
||||
terms_t *m_rails_temp;
|
||||
|
||||
private:
|
||||
ATTR_ALIGN nl_ext_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
|
||||
|
||||
const unsigned m_dim;
|
||||
nl_double m_lp_fact;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// matrix_solver_direct
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
matrix_solver_direct_t<m_N, _storage_N>::~matrix_solver_direct_t()
|
||||
{
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
pfree(m_terms[k]);
|
||||
}
|
||||
pfree_array(m_terms);
|
||||
pfree_array(m_rails_temp);
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT nl_double matrix_solver_direct_t<m_N, _storage_N>::compute_next_timestep()
|
||||
{
|
||||
nl_double new_solver_timestep = m_params.m_max_timestep;
|
||||
|
||||
if (m_params.m_dynamic)
|
||||
{
|
||||
/*
|
||||
* FIXME: We should extend the logic to use either all nets or
|
||||
* only output nets.
|
||||
*/
|
||||
for (unsigned k = 0, iN=N(); k < iN; k++)
|
||||
{
|
||||
analog_net_t *n = m_nets[k];
|
||||
|
||||
const nl_double DD_n = (n->m_cur_Analog - m_last_V[k]);
|
||||
const nl_double hn = current_timestep();
|
||||
|
||||
nl_double DD2 = (DD_n / hn - n->m_DD_n_m_1 / n->m_h_n_m_1) / (hn + n->m_h_n_m_1);
|
||||
nl_double new_net_timestep;
|
||||
|
||||
n->m_h_n_m_1 = hn;
|
||||
n->m_DD_n_m_1 = DD_n;
|
||||
if (nl_math::abs(DD2) > NL_FCONST(1e-30)) // avoid div-by-zero
|
||||
new_net_timestep = nl_math::sqrt(m_params.m_lte / nl_math::abs(NL_FCONST(0.5)*DD2));
|
||||
else
|
||||
new_net_timestep = m_params.m_max_timestep;
|
||||
|
||||
if (new_net_timestep < new_solver_timestep)
|
||||
new_solver_timestep = new_net_timestep;
|
||||
}
|
||||
if (new_solver_timestep < m_params.m_min_timestep)
|
||||
new_solver_timestep = m_params.m_min_timestep;
|
||||
}
|
||||
//if (new_solver_timestep > 10.0 * hn)
|
||||
// new_solver_timestep = 10.0 * hn;
|
||||
return new_solver_timestep;
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::add_term(int k, terminal_t *term)
|
||||
{
|
||||
if (term->m_otherterm->net().isRailNet())
|
||||
{
|
||||
m_rails_temp[k].add(term, -1, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
int ot = get_net_idx(&term->m_otherterm->net());
|
||||
if (ot>=0)
|
||||
{
|
||||
m_terms[k]->add(term, ot, true);
|
||||
SOLVER_VERBOSE_OUT(("Net %d Term %s %f %f\n", k, terms[i]->name().cstr(), terms[i]->m_gt, terms[i]->m_go));
|
||||
}
|
||||
/* Should this be allowed ? */
|
||||
else // if (ot<0)
|
||||
{
|
||||
m_rails_temp[k].add(term, ot, true);
|
||||
netlist().error("found term with missing othernet %s\n", term->name().cstr());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
{
|
||||
if (m_dim < nets.size())
|
||||
netlist().error("Dimension %d less than %" SIZETFMT, m_dim, SIZET_PRINTF(nets.size()));
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k]->clear();
|
||||
m_rails_temp[k].clear();
|
||||
}
|
||||
|
||||
matrix_solver_t::setup(nets);
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k]->m_railstart = m_terms[k]->count();
|
||||
for (unsigned i = 0; i < m_rails_temp[k].count(); i++)
|
||||
this->m_terms[k]->add(m_rails_temp[k].terms()[i], m_rails_temp[k].net_other()[i], false);
|
||||
|
||||
m_rails_temp[k].clear(); // no longer needed
|
||||
m_terms[k]->set_pointers();
|
||||
}
|
||||
|
||||
#if 1
|
||||
|
||||
/* Sort in descending order by number of connected matrix voltages.
|
||||
* The idea is, that for Gauss-Seidel algo the first voltage computed
|
||||
* depends on the greatest number of previous voltages thus taking into
|
||||
* account the maximum amout of information.
|
||||
*
|
||||
* This actually improves performance on popeye slightly. Average
|
||||
* GS computations reduce from 2.509 to 2.370
|
||||
*
|
||||
* Smallest to largest : 2.613
|
||||
* Unsorted : 2.509
|
||||
* Largest to smallest : 2.370
|
||||
*
|
||||
* Sorting as a general matrix pre-conditioning is mentioned in
|
||||
* literature but I have found no articles about Gauss Seidel.
|
||||
*
|
||||
* For Gaussian Elimination however increasing order is better suited.
|
||||
* FIXME: Even better would be to sort on elements right of the matrix diagonal.
|
||||
*
|
||||
*/
|
||||
|
||||
int sort_order = (type() == GAUSS_SEIDEL ? 1 : -1);
|
||||
|
||||
for (unsigned k = 0; k < N() / 2; k++)
|
||||
for (unsigned i = 0; i < N() - 1; i++)
|
||||
{
|
||||
if ((m_terms[i]->m_railstart - m_terms[i+1]->m_railstart) * sort_order < 0)
|
||||
{
|
||||
std::swap(m_terms[i],m_terms[i+1]);
|
||||
m_nets.swap(i, i+1);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
int *other = m_terms[k]->net_other();
|
||||
for (unsigned i = 0; i < m_terms[k]->count(); i++)
|
||||
if (other[i] != -1)
|
||||
other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* create a list of non zero elements right of the diagonal
|
||||
* These list anticipate the population of array elements by
|
||||
* Gaussian elimination.
|
||||
*/
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
terms_t * t = m_terms[k];
|
||||
/* pretty brutal */
|
||||
int *other = t->net_other();
|
||||
|
||||
t->m_nz.clear();
|
||||
|
||||
if (k==0)
|
||||
t->m_nzrd.clear();
|
||||
else
|
||||
{
|
||||
t->m_nzrd = m_terms[k-1]->m_nzrd;
|
||||
unsigned j=0;
|
||||
while(j < t->m_nzrd.size())
|
||||
{
|
||||
if (t->m_nzrd[j] < k + 1)
|
||||
t->m_nzrd.remove_at(j);
|
||||
else
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < N(); j++)
|
||||
{
|
||||
for (unsigned i = 0; i < t->m_railstart; i++)
|
||||
{
|
||||
if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1))
|
||||
t->m_nzrd.add(other[i]);
|
||||
if (!t->m_nz.contains(other[i]))
|
||||
t->m_nz.add(other[i]);
|
||||
}
|
||||
}
|
||||
psort_list(t->m_nzrd);
|
||||
|
||||
t->m_nz.add(k); // add diagonal
|
||||
psort_list(t->m_nz);
|
||||
}
|
||||
|
||||
if(0)
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
netlist().log("%3d: ", k);
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
|
||||
netlist().log(" %3d", m_terms[k]->m_nzrd[j]);
|
||||
netlist().log("\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* save states
|
||||
*/
|
||||
save(NLNAME(m_RHS));
|
||||
save(NLNAME(m_last_RHS));
|
||||
save(NLNAME(m_last_V));
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
pstring num = pformat("%1")(k);
|
||||
|
||||
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
|
||||
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
|
||||
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_A()
|
||||
{
|
||||
const unsigned iN = N();
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
for (unsigned i=0; i < iN; i++)
|
||||
A(k,i) = 0.0;
|
||||
|
||||
nl_double akk = 0.0;
|
||||
const unsigned terms_count = m_terms[k]->count();
|
||||
const unsigned railstart = m_terms[k]->m_railstart;
|
||||
const nl_double * RESTRICT gt = m_terms[k]->gt();
|
||||
const nl_double * RESTRICT go = m_terms[k]->go();
|
||||
const int * RESTRICT net_other = m_terms[k]->net_other();
|
||||
|
||||
for (unsigned i = 0; i < terms_count; i++)
|
||||
akk = akk + gt[i];
|
||||
|
||||
A(k,k) += akk;
|
||||
|
||||
for (unsigned i = 0; i < railstart; i++)
|
||||
A(k, net_other[i]) -= go[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS(nl_double * RESTRICT rhs)
|
||||
{
|
||||
const unsigned iN = N();
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
nl_double rhsk_a = 0.0;
|
||||
nl_double rhsk_b = 0.0;
|
||||
|
||||
const int terms_count = m_terms[k]->count();
|
||||
const nl_double * RESTRICT go = m_terms[k]->go();
|
||||
const nl_double * RESTRICT Idr = m_terms[k]->Idr();
|
||||
const nl_double * const * RESTRICT other_cur_analog = m_terms[k]->other_curanalog();
|
||||
|
||||
for (int i = 0; i < terms_count; i++)
|
||||
rhsk_a = rhsk_a + Idr[i];
|
||||
|
||||
for (int i = m_terms[k]->m_railstart; i < terms_count; i++)
|
||||
//rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
|
||||
rhsk_b = rhsk_b + go[i] * *other_cur_analog[i];
|
||||
|
||||
rhs[k] = rhsk_a + rhsk_b;
|
||||
}
|
||||
}
|
||||
|
||||
#if 1
|
||||
#else
|
||||
// Crout algo
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
||||
{
|
||||
#if 0
|
||||
for (int i = 0; i < N(); i++)
|
||||
{
|
||||
for (int k = 0; k < N(); k++)
|
||||
printf("%f ", m_A[i][k]);
|
||||
printf("| %f = %f \n", x[i], m_RHS[i]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
const unsigned kN = N();
|
||||
|
||||
ATTR_UNUSED int imax;
|
||||
ATTR_UNUSED double big,temp;
|
||||
|
||||
#if 0
|
||||
double vv[_storage_N];
|
||||
|
||||
for (i=0;i<kN;i++)
|
||||
{
|
||||
big=0.0;
|
||||
for (j=0;j<kN;j++)
|
||||
if ((temp=fabs(m_A[i][j])) > big)
|
||||
big=temp;
|
||||
//if (big == 0.0) nrerror("Singular matrix in routine LUDCMP");
|
||||
vv[i]=1.0/big;
|
||||
}
|
||||
#endif
|
||||
for (int j = 0; j < kN; j++)
|
||||
{
|
||||
#if 1
|
||||
for (int i=0; i < kN;i++)
|
||||
{
|
||||
double sum = 0.0;
|
||||
const int e = (i<j ? i : j);
|
||||
for (int k=0; k < e; k++)
|
||||
sum += A(i,k) * A(k,j);
|
||||
A(i,j) -= sum;
|
||||
}
|
||||
#else
|
||||
for (int i=0; i < j;i++)
|
||||
{
|
||||
double * RESTRICT p = m_A[i];
|
||||
double sum = 0.0;
|
||||
for (int k=0; k < i; k++)
|
||||
sum += p[k] * m_A[k][j];
|
||||
p[j] -= sum;
|
||||
}
|
||||
big=0.0;
|
||||
for (int i = j; i < kN; i++)
|
||||
{
|
||||
double * RESTRICT p = m_A[i];
|
||||
double sum = 0.0;
|
||||
for (int k = 0; k < j; k++)
|
||||
sum += p[k] * m_A[k][j];
|
||||
p[j] -= sum;
|
||||
#if 0
|
||||
if ( (dum=vv[i]*fabs(sum)) >= big) {
|
||||
big=dum;
|
||||
imax=i;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#if 0
|
||||
// USE_PIVOT_SEARCH
|
||||
// omit pivoting for now
|
||||
if (j != imax)
|
||||
{
|
||||
for (k=0;k<kN;k++)
|
||||
{
|
||||
dum=m_A[imax][k];
|
||||
m_A[imax][k]=m_A[j][k];
|
||||
m_A[j][k]=dum;
|
||||
}
|
||||
//*d = -(*d);
|
||||
vv[imax]=vv[j];
|
||||
}
|
||||
indx[j]=imax;
|
||||
#endif
|
||||
//if (m_A[j][j] == 0.0)
|
||||
// m_A[j][j] = 1e-20;
|
||||
double dum = 1.0 / A(j,j);
|
||||
for (int i = j+1; i < kN; i++)
|
||||
A(i,j) *= dum;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst(
|
||||
nl_double * RESTRICT x)
|
||||
{
|
||||
const unsigned kN = N();
|
||||
|
||||
/* back substitution */
|
||||
|
||||
// int ip;
|
||||
// ii=-1
|
||||
|
||||
//for (int i=0; i < kN; i++)
|
||||
// x[i] = m_RHS[i];
|
||||
|
||||
for (int i=0; i < kN; i++)
|
||||
{
|
||||
//ip=indx[i]; USE_PIVOT_SEARCH
|
||||
//sum=b[ip];
|
||||
//b[ip]=b[i];
|
||||
double sum=m_RHS[i];//x[i];
|
||||
for (int j=0; j < i; j++)
|
||||
sum -= A(i,j) * x[j];
|
||||
x[i]=sum;
|
||||
}
|
||||
for (int i=kN-1; i >= 0; i--)
|
||||
{
|
||||
double sum=x[i];
|
||||
for (int j = i+1; j < kN; j++)
|
||||
sum -= A(i,j)*x[j];
|
||||
x[i] = sum / A(i,i);
|
||||
}
|
||||
|
||||
#if 0
|
||||
printf("Solution:\n");
|
||||
for (unsigned i = 0; i < N(); i++)
|
||||
{
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
printf("%f ", m_A[i][k]);
|
||||
printf("| %f = %f \n", x[i], m_RHS[i]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT nl_double matrix_solver_direct_t<m_N, _storage_N>::delta(
|
||||
const nl_double * RESTRICT V)
|
||||
{
|
||||
/* FIXME: Ideally we should also include currents (RHS) here. This would
|
||||
* need a revaluation of the right hand side after voltages have been updated
|
||||
* and thus belong into a different calculation. This applies to all solvers.
|
||||
*/
|
||||
|
||||
const unsigned iN = this->N();
|
||||
nl_double cerr = 0;
|
||||
for (unsigned i = 0; i < iN; i++)
|
||||
cerr = std::max(cerr, nl_math::abs(V[i] - this->m_nets[i]->m_cur_Analog));
|
||||
return cerr;
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT void matrix_solver_direct_t<m_N, _storage_N>::store(
|
||||
const nl_double * RESTRICT V)
|
||||
{
|
||||
for (unsigned i = 0, iN=N(); i < iN; i++)
|
||||
{
|
||||
this->m_nets[i]->m_cur_Analog = V[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT nl_double matrix_solver_direct_t<m_N, _storage_N>::vsolve()
|
||||
{
|
||||
this->solve_base(this);
|
||||
return this->compute_next_timestep();
|
||||
}
|
||||
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT int matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const bool newton_raphson)
|
||||
{
|
||||
nl_double new_V[_storage_N]; // = { 0.0 };
|
||||
|
||||
this->LE_back_subst(new_V);
|
||||
|
||||
if (newton_raphson)
|
||||
{
|
||||
nl_double err = delta(new_V);
|
||||
|
||||
store(new_V);
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
store(new_V);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
ATTR_HOT inline int matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
this->build_LE_A();
|
||||
this->build_LE_RHS(m_last_RHS);
|
||||
|
||||
for (unsigned i=0, iN=N(); i < iN; i++)
|
||||
m_RHS[i] = m_last_RHS[i];
|
||||
|
||||
this->LE_solve();
|
||||
|
||||
return this->solve_non_dynamic(newton_raphson);
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const solver_parameters_t *params, const int size)
|
||||
: matrix_solver_t(GAUSSIAN_ELIMINATION, params)
|
||||
, m_dim(size)
|
||||
, m_lp_fact(0)
|
||||
{
|
||||
m_terms = palloc_array(terms_t *, N());
|
||||
m_rails_temp = palloc_array(terms_t, N());
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k] = palloc(terms_t);
|
||||
m_last_RHS[k] = 0.0;
|
||||
m_last_V[k] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
matrix_solver_direct_t<m_N, _storage_N>::matrix_solver_direct_t(const eSolverType type, const solver_parameters_t *params, const int size)
|
||||
: matrix_solver_t(type, params)
|
||||
, m_dim(size)
|
||||
, m_lp_fact(0)
|
||||
{
|
||||
m_terms = palloc_array(terms_t *, N());
|
||||
m_rails_temp = palloc_array(terms_t, N());
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k] = palloc(terms_t);
|
||||
m_last_RHS[k] = 0.0;
|
||||
m_last_V[k] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
NETLIB_NAMESPACE_DEVICES_END()
|
||||
|
||||
#endif /* NLD_MS_DIRECT_H_ */
|
@ -35,10 +35,10 @@ public:
|
||||
unsigned mr=this->N(); /* FIXME: maximum iterations locked in here */
|
||||
|
||||
for (unsigned i = 0; i < mr + 1; i++)
|
||||
m_ht[i] = new double[mr];
|
||||
m_ht[i] = new nl_double[mr];
|
||||
|
||||
for (unsigned i = 0; i < this->N(); i++)
|
||||
m_v[i] = new double[_storage_N];
|
||||
m_v[i] = new nl_double[_storage_N];
|
||||
|
||||
}
|
||||
|
||||
@ -60,7 +60,7 @@ protected:
|
||||
|
||||
private:
|
||||
|
||||
int solve_ilu_gmres(double * RESTRICT x, double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, double accuracy);
|
||||
int solve_ilu_gmres(nl_double * RESTRICT x, nl_double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, nl_double accuracy);
|
||||
|
||||
plist_t<int> m_term_cr[_storage_N];
|
||||
|
||||
@ -69,17 +69,17 @@ private:
|
||||
|
||||
mat_cr_t<_storage_N> mat;
|
||||
|
||||
double m_A[_storage_N * _storage_N];
|
||||
double m_LU[_storage_N * _storage_N];
|
||||
nl_double m_A[_storage_N * _storage_N];
|
||||
nl_double m_LU[_storage_N * _storage_N];
|
||||
|
||||
double m_c[_storage_N + 1]; /* mr + 1 */
|
||||
double m_g[_storage_N + 1]; /* mr + 1 */
|
||||
double * RESTRICT m_ht[_storage_N + 1]; /* (mr + 1), mr */
|
||||
double m_s[_storage_N]; /* mr + 1 */
|
||||
double * RESTRICT m_v[_storage_N + 1]; /*(mr + 1), n */
|
||||
nl_double m_c[_storage_N + 1]; /* mr + 1 */
|
||||
nl_double m_g[_storage_N + 1]; /* mr + 1 */
|
||||
nl_double * RESTRICT m_ht[_storage_N + 1]; /* (mr + 1), mr */
|
||||
nl_double m_s[_storage_N]; /* mr + 1 */
|
||||
nl_double * RESTRICT m_v[_storage_N + 1]; /*(mr + 1), n */
|
||||
//double m_y[_storage_N]; /* mr + 1 */
|
||||
|
||||
double m_accuracy_mult; // FXIME: Save state
|
||||
nl_double m_accuracy_mult; // FXIME: Save state
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
@ -214,7 +214,7 @@ ATTR_HOT inline int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(c
|
||||
|
||||
if (newton_raphson)
|
||||
{
|
||||
double err = 0;
|
||||
nl_double err = 0;
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
err = std::max(nl_math::abs(l_V[k] - new_V[k]), err);
|
||||
|
||||
@ -234,7 +234,7 @@ ATTR_HOT inline int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(c
|
||||
}
|
||||
}
|
||||
|
||||
static inline void givens_mult( const double c, const double s, double * RESTRICT g0, double * RESTRICT g1 )
|
||||
static inline void givens_mult( const nl_double c, const nl_double s, nl_double * RESTRICT g0, nl_double * RESTRICT g1 )
|
||||
{
|
||||
const double tg0 = c * *g0 - s * *g1;
|
||||
const double tg1 = s * *g0 + c * *g1;
|
||||
@ -244,7 +244,7 @@ static inline void givens_mult( const double c, const double s, double * RESTRIC
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x, double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, double accuracy)
|
||||
int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (nl_double * RESTRICT x, nl_double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, nl_double accuracy)
|
||||
{
|
||||
/*-------------------------------------------------------------------------
|
||||
* The code below was inspired by code published by John Burkardt under
|
||||
@ -289,13 +289,13 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x
|
||||
* differently: The invest doesn't pay off.
|
||||
* Therefore we use the approach in the else part.
|
||||
*/
|
||||
double t[_storage_N];
|
||||
double Ax[_storage_N];
|
||||
nl_double t[_storage_N];
|
||||
nl_double Ax[_storage_N];
|
||||
vec_set(n, accuracy, t);
|
||||
mat.mult_vec(m_A, t, Ax);
|
||||
mat.solveLUx(m_LU, Ax);
|
||||
|
||||
const double rho_to_accuracy = std::sqrt(vecmult2(n, Ax)) / accuracy;
|
||||
const nl_double rho_to_accuracy = std::sqrt(vecmult2(n, Ax)) / accuracy;
|
||||
|
||||
//printf("rho/accuracy = %f\n", rho_to_accuracy);
|
||||
|
||||
@ -307,11 +307,11 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x
|
||||
for (unsigned itr = 0; itr < restart_max; itr++)
|
||||
{
|
||||
unsigned last_k = mr;
|
||||
double mu;
|
||||
double rho;
|
||||
nl_double mu;
|
||||
nl_double rho;
|
||||
|
||||
double Ax[_storage_N];
|
||||
double residual[_storage_N];
|
||||
nl_double Ax[_storage_N];
|
||||
nl_double residual[_storage_N];
|
||||
|
||||
mat.mult_vec(m_A, x, Ax);
|
||||
|
||||
@ -324,13 +324,13 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x
|
||||
|
||||
rho = std::sqrt(vecmult2(n, residual));
|
||||
|
||||
vec_mult_scalar(n, residual, 1.0 / rho, m_v[0]);
|
||||
vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]);
|
||||
|
||||
vec_set(mr+1, 0.0, m_g);
|
||||
vec_set(mr+1, NL_FCONST(0.0), m_g);
|
||||
m_g[0] = rho;
|
||||
|
||||
for (unsigned i = 0; i < mr; i++)
|
||||
vec_set(mr + 1, 0.0, m_ht[i]);
|
||||
vec_set(mr + 1, NL_FCONST(0.0), m_ht[i]);
|
||||
|
||||
for (unsigned k = 0; k < mr; k++)
|
||||
{
|
||||
@ -349,7 +349,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x
|
||||
m_ht[k1][k] = std::sqrt(vecmult2(n, m_v[k1]));
|
||||
|
||||
if (m_ht[k1][k] != 0.0)
|
||||
vec_scale(n, m_v[k1], 1.0 / m_ht[k1][k]);
|
||||
vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]);
|
||||
|
||||
for (unsigned j = 0; j < k; j++)
|
||||
givens_mult(m_c[j], m_s[j], &m_ht[j][k], &m_ht[j+1][k]);
|
||||
@ -378,7 +378,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (double * RESTRICT x
|
||||
/* didn't converge within accuracy */
|
||||
last_k = mr - 1;
|
||||
|
||||
double m_y[_storage_N + 1];
|
||||
nl_double m_y[_storage_N + 1];
|
||||
|
||||
/* Solve the system H * y = g */
|
||||
/* x += m_v[j] * m_y[j] */
|
||||
|
@ -143,7 +143,7 @@ ATTR_HOT inline int matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dynamic(con
|
||||
|
||||
do {
|
||||
resched = false;
|
||||
double err = 0;
|
||||
nl_double err = 0;
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
const int * RESTRICT net_other = this->m_terms[k]->net_other();
|
||||
|
@ -199,14 +199,13 @@ ATTR_HOT inline int matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non_dynamic
|
||||
{
|
||||
nl_double Idrive = 0;
|
||||
|
||||
const nl_ext_double * RESTRICT A = &this->m_A[k][0];
|
||||
const unsigned *p = this->m_terms[k]->m_nz.data();
|
||||
const unsigned e = this->m_terms[k]->m_nz.size();
|
||||
|
||||
for (unsigned i = 0; i < e; i++)
|
||||
Idrive = Idrive + A[p[i]] * new_v[p[i]];
|
||||
Idrive = Idrive + this->A(k,p[i]) * new_v[p[i]];
|
||||
|
||||
const nl_double delta = m_omega * (this->m_RHS[k] - Idrive) / A[k];
|
||||
const nl_double delta = m_omega * (this->m_RHS[k] - Idrive) / this->A(k,k);
|
||||
cerr = std::max(cerr, nl_math::abs(delta));
|
||||
new_v[k] += delta;
|
||||
}
|
||||
|
@ -17,7 +17,6 @@
|
||||
#endif
|
||||
|
||||
|
||||
//#pragma GCC optimize "-ffast-math"
|
||||
#if 0
|
||||
#pragma GCC optimize "-ffast-math"
|
||||
//#pragma GCC optimize "-ftree-parallelize-loops=4"
|
||||
@ -37,7 +36,11 @@
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include "nld_solver.h"
|
||||
#if 1
|
||||
#include "nld_ms_direct.h"
|
||||
#else
|
||||
#include "nld_ms_direct_lu.h"
|
||||
#endif
|
||||
#include "nld_ms_direct1.h"
|
||||
#include "nld_ms_direct2.h"
|
||||
#include "nld_ms_sor.h"
|
||||
@ -302,11 +305,6 @@ ATTR_HOT nl_double matrix_solver_t::solve()
|
||||
return next_time_step;
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// matrix_solver - Direct base
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
ATTR_COLD int matrix_solver_t::get_net_idx(net_t *net)
|
||||
{
|
||||
for (std::size_t k = 0; k < m_nets.size(); k++)
|
||||
@ -315,6 +313,25 @@ ATTR_COLD int matrix_solver_t::get_net_idx(net_t *net)
|
||||
return -1;
|
||||
}
|
||||
|
||||
void matrix_solver_t::log_stats()
|
||||
{
|
||||
if (this->m_stat_calculations != 0 && this->m_params.m_log_stats)
|
||||
{
|
||||
this->netlist().log("==============================================");
|
||||
this->netlist().log("Solver %s", this->name().cstr());
|
||||
this->netlist().log(" ==> %d nets", (unsigned) this->m_nets.size()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
|
||||
this->netlist().log(" has %s elements", this->is_dynamic() ? "dynamic" : "no dynamic");
|
||||
this->netlist().log(" has %s elements", this->is_timestep() ? "timestep" : "no timestep");
|
||||
this->netlist().log(" %6.3f average newton raphson loops", (double) this->m_stat_newton_raphson / (double) this->m_stat_vsolver_calls);
|
||||
this->netlist().log(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average",
|
||||
this->m_stat_calculations,
|
||||
this->m_stat_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
|
||||
this->m_iterative_fail,
|
||||
100.0 * (double) this->m_iterative_fail / (double) this->m_stat_calculations,
|
||||
(double) this->m_iterative_total / (double) this->m_stat_calculations);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@ -335,15 +352,21 @@ NETLIB_START(solver)
|
||||
|
||||
register_param("FREQ", m_freq, 48000.0);
|
||||
|
||||
register_param("ITERATIVE", m_iterative_solver, "SOR");
|
||||
|
||||
/* iteration parameters */
|
||||
register_param("SOR_FACTOR", m_sor, 1.059);
|
||||
register_param("ITERATIVE", m_iterative_solver, "SOR");
|
||||
register_param("ACCURACY", m_accuracy, 1e-7);
|
||||
register_param("GS_LOOPS", m_gs_loops, 9); // Gauss-Seidel loops
|
||||
register_param("GS_THRESHOLD", m_gs_threshold, 6); // below this value, gaussian elimination is used
|
||||
register_param("GS_LOOPS", m_gs_loops, 9); // Gauss-Seidel loops
|
||||
|
||||
/* general parameters */
|
||||
register_param("GMIN", m_gmin, NETLIST_GMIN_DEFAULT);
|
||||
register_param("PIVOT", m_pivot, 0); // use pivoting - on supported solvers
|
||||
register_param("NR_LOOPS", m_nr_loops, 250); // Newton-Raphson loops
|
||||
register_param("PARALLEL", m_parallel, 0);
|
||||
register_param("SOR_FACTOR", m_sor, 1.059);
|
||||
register_param("GMIN", m_gmin, NETLIST_GMIN_DEFAULT);
|
||||
|
||||
/* automatic time step */
|
||||
register_param("DYNAMIC_TS", m_dynamic, 0);
|
||||
register_param("LTE", m_lte, 5e-5); // diff/timestep
|
||||
register_param("MIN_TIMESTEP", m_min_timestep, 1e-6); // nl_double timestep resolution
|
||||
@ -474,6 +497,7 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
|
||||
int cur_group = -1;
|
||||
const bool use_specific = true;
|
||||
|
||||
m_params.m_pivot = m_pivot.Value();
|
||||
m_params.m_accuracy = m_accuracy.Value();
|
||||
m_params.m_gs_loops = m_gs_loops.Value();
|
||||
m_params.m_nr_loops = m_nr_loops.Value();
|
||||
@ -551,13 +575,31 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
|
||||
case 8:
|
||||
ms = create_solver<8,8>(8, use_specific);
|
||||
break;
|
||||
case 10:
|
||||
ms = create_solver<10,10>(10, use_specific);
|
||||
break;
|
||||
case 11:
|
||||
ms = create_solver<11,11>(11, use_specific);
|
||||
break;
|
||||
case 12:
|
||||
ms = create_solver<12,12>(12, use_specific);
|
||||
break;
|
||||
case 15:
|
||||
ms = create_solver<15,15>(15, use_specific);
|
||||
break;
|
||||
case 31:
|
||||
ms = create_solver<31,31>(31, use_specific);
|
||||
break;
|
||||
case 49:
|
||||
ms = create_solver<49,49>(49, use_specific);
|
||||
break;
|
||||
#if 0
|
||||
case 87:
|
||||
ms = create_solver<87,87>(87, use_specific);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
netlist().warning("No specific solver found for netlist of size %d", (unsigned) net_count);
|
||||
if (net_count <= 16)
|
||||
{
|
||||
ms = create_solver<0,16>(net_count, use_specific);
|
||||
|
@ -37,6 +37,7 @@ class NETLIB_NAME(solver);
|
||||
|
||||
struct solver_parameters_t
|
||||
{
|
||||
int m_pivot;
|
||||
nl_double m_accuracy;
|
||||
nl_double m_lte;
|
||||
nl_double m_min_timestep;
|
||||
@ -134,25 +135,7 @@ public:
|
||||
|
||||
inline eSolverType type() const { return m_type; }
|
||||
|
||||
virtual void log_stats()
|
||||
{
|
||||
if (this->m_stat_calculations != 0 && this->m_params.m_log_stats)
|
||||
{
|
||||
this->netlist().log("==============================================");
|
||||
this->netlist().log("Solver %s", this->name().cstr());
|
||||
this->netlist().log(" ==> %d nets", (unsigned) this->m_nets.size()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
|
||||
this->netlist().log(" has %s elements", this->is_dynamic() ? "dynamic" : "no dynamic");
|
||||
this->netlist().log(" has %s elements", this->is_timestep() ? "timestep" : "no timestep");
|
||||
this->netlist().log(" %6.3f average newton raphson loops", (double) this->m_stat_newton_raphson / (double) this->m_stat_vsolver_calls);
|
||||
this->netlist().log(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average",
|
||||
this->m_stat_calculations,
|
||||
this->m_stat_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
|
||||
this->m_iterative_fail,
|
||||
100.0 * (double) this->m_iterative_fail / (double) this->m_stat_calculations,
|
||||
(double) this->m_iterative_total / (double) this->m_stat_calculations);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void log_stats();
|
||||
|
||||
protected:
|
||||
|
||||
@ -217,6 +200,7 @@ protected:
|
||||
logic_input_t m_fb_step;
|
||||
logic_output_t m_Q_step;
|
||||
|
||||
param_logic_t m_pivot;
|
||||
param_double_t m_freq;
|
||||
param_double_t m_sync_delay;
|
||||
param_double_t m_accuracy;
|
||||
|
@ -35,32 +35,36 @@ private:
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
|
||||
inline void vec_set (const std::size_t n, const double &scalar, double * RESTRICT result)
|
||||
template<typename T>
|
||||
inline void vec_set (const std::size_t n, const T &scalar, T * RESTRICT result)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] = scalar;
|
||||
}
|
||||
inline double vecmult (const std::size_t n, const double * RESTRICT a1, const double * RESTRICT a2 )
|
||||
|
||||
template<typename T>
|
||||
inline T vecmult (const std::size_t n, const T * RESTRICT a1, const T * RESTRICT a2 )
|
||||
{
|
||||
double value = 0.0;
|
||||
T value = 0.0;
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
value = value + a1[i] * a2[i];
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
inline double vecmult2 (const std::size_t n, const double *a1)
|
||||
template<typename T>
|
||||
inline T vecmult2 (const std::size_t n, const T *a1)
|
||||
{
|
||||
double value = 0.0;
|
||||
T value = 0.0;
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
{
|
||||
const double temp = a1[i];
|
||||
const T temp = a1[i];
|
||||
value = value + temp * temp;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
inline void vec_mult_scalar (const std::size_t n, const double * RESTRICT v, const double scalar, double * RESTRICT result)
|
||||
template<typename T>
|
||||
inline void vec_mult_scalar (const std::size_t n, const T * RESTRICT v, const T scalar, T * RESTRICT result)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
{
|
||||
@ -68,7 +72,8 @@ inline void vec_mult_scalar (const std::size_t n, const double * RESTRICT v, con
|
||||
}
|
||||
}
|
||||
|
||||
inline void vec_add_mult_scalar (const std::size_t n, const double * RESTRICT v, const double scalar, double * RESTRICT result)
|
||||
template<typename T>
|
||||
inline void vec_add_mult_scalar (const std::size_t n, const T * RESTRICT v, const T scalar, T * RESTRICT result)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] += scalar * v[i];
|
||||
@ -80,13 +85,15 @@ inline void vec_add_ip(const std::size_t n, const double * RESTRICT v, double *
|
||||
result[i] += v[i];
|
||||
}
|
||||
|
||||
inline void vec_sub(const std::size_t n, const double * RESTRICT v1, const double * RESTRICT v2, double * RESTRICT result)
|
||||
template<typename T>
|
||||
inline void vec_sub(const std::size_t n, const T * RESTRICT v1, const T * RESTRICT v2, T * RESTRICT result)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] = v1[i] - v2[i];
|
||||
}
|
||||
|
||||
inline void vec_scale (const std::size_t n, double * RESTRICT v, const double scalar)
|
||||
template<typename T>
|
||||
inline void vec_scale (const std::size_t n, T * RESTRICT v, const T scalar)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
v[i] = scalar * v[i];
|
||||
|
Loading…
Reference in New Issue
Block a user