mirror of
https://github.com/holub/mame
synced 2025-06-03 19:36:26 +03:00
Recover from creating solvers by copy paste. Move common code were it
belongs. (nw)
This commit is contained in:
parent
4f1ca77643
commit
cd0441b678
@ -23,7 +23,7 @@ NETLIST_START(dummy)
|
||||
PARAM(Solver.MIN_TIMESTEP, 10e-6)
|
||||
PARAM(Solver.PARALLEL, 0)
|
||||
PARAM(Solver.SOR_FACTOR, 1.00)
|
||||
PARAM(Solver.PIVOT, 1)
|
||||
PARAM(Solver.PIVOT, 0)
|
||||
#else
|
||||
SOLVER(Solver, 12000)
|
||||
PARAM(Solver.ACCURACY, 1e-8)
|
||||
|
@ -60,7 +60,8 @@ struct mat_cr_t
|
||||
for (unsigned i = 1; ia[i] < lnz; i++) // row i
|
||||
{
|
||||
const unsigned iai1 = ia[i + 1];
|
||||
for (unsigned pk = ia[i]; pk < diag[i]; pk++) // all columns left of diag in row i
|
||||
const unsigned pke = diag[i];
|
||||
for (unsigned pk = ia[i]; pk < pke; pk++) // all columns left of diag in row i
|
||||
{
|
||||
// pk == (i, k)
|
||||
const unsigned k = ja[pk];
|
||||
|
@ -129,9 +129,9 @@ protected:
|
||||
T delta(const T * RESTRICT V);
|
||||
|
||||
template <typename T>
|
||||
void build_LE_A(T & child);
|
||||
void build_LE_A();
|
||||
template <typename T>
|
||||
void build_LE_RHS(T & child);
|
||||
void build_LE_RHS();
|
||||
|
||||
pvector_t<terms_t *> m_terms;
|
||||
pvector_t<analog_net_t *> m_nets;
|
||||
@ -158,6 +158,9 @@ private:
|
||||
logic_input_t m_fb_sync;
|
||||
logic_output_t m_Q_sync;
|
||||
|
||||
/* calculate matrix */
|
||||
void setup_matrix();
|
||||
|
||||
void step(const netlist_time &delta);
|
||||
|
||||
void update_inputs();
|
||||
@ -169,7 +172,7 @@ template <typename T>
|
||||
T matrix_solver_t::delta(const T * RESTRICT V)
|
||||
{
|
||||
/* FIXME: Ideally we should also include currents (RHS) here. This would
|
||||
* need a revaluation of the right hand side after voltages have been updated
|
||||
* need a reevaluation of the right hand side after voltages have been updated
|
||||
* and thus belong into a different calculation. This applies to all solvers.
|
||||
*/
|
||||
|
||||
@ -188,10 +191,12 @@ void matrix_solver_t::store(const T * RESTRICT V)
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void matrix_solver_t::build_LE_A(T & child)
|
||||
void matrix_solver_t::build_LE_A()
|
||||
{
|
||||
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
|
||||
|
||||
T &child = static_cast<T &>(*this);
|
||||
|
||||
const unsigned iN = child.N();
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
@ -219,8 +224,11 @@ void matrix_solver_t::build_LE_A(T & child)
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void matrix_solver_t::build_LE_RHS(T & child)
|
||||
void matrix_solver_t::build_LE_RHS()
|
||||
{
|
||||
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
|
||||
T &child = static_cast<T &>(*this);
|
||||
|
||||
const unsigned iN = child.N();
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
|
@ -198,157 +198,15 @@ ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::vsetup(analog_net_t::lis
|
||||
|
||||
matrix_solver_t::setup_base(nets);
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k]->m_railstart = m_terms[k]->count();
|
||||
for (unsigned i = 0; i < m_rails_temp[k]->count(); i++)
|
||||
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->net_other()[i], false);
|
||||
|
||||
m_rails_temp[k]->clear(); // no longer needed
|
||||
m_terms[k]->set_pointers();
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
pfree(m_rails_temp[k]); // no longer needed
|
||||
|
||||
m_rails_temp.clear();
|
||||
#if 1
|
||||
|
||||
/* Sort in descending order by number of connected matrix voltages.
|
||||
* The idea is, that for Gauss-Seidel algo the first voltage computed
|
||||
* depends on the greatest number of previous voltages thus taking into
|
||||
* account the maximum amout of information.
|
||||
*
|
||||
* This actually improves performance on popeye slightly. Average
|
||||
* GS computations reduce from 2.509 to 2.370
|
||||
*
|
||||
* Smallest to largest : 2.613
|
||||
* Unsorted : 2.509
|
||||
* Largest to smallest : 2.370
|
||||
*
|
||||
* Sorting as a general matrix pre-conditioning is mentioned in
|
||||
* literature but I have found no articles about Gauss Seidel.
|
||||
*
|
||||
* For Gaussian Elimination however increasing order is better suited.
|
||||
* FIXME: Even better would be to sort on elements right of the matrix diagonal.
|
||||
*
|
||||
*/
|
||||
|
||||
int sort_order = (type() == GAUSS_SEIDEL ? 1 : -1);
|
||||
|
||||
for (unsigned k = 0; k < N() / 2; k++)
|
||||
for (unsigned i = 0; i < N() - 1; i++)
|
||||
{
|
||||
if ((m_terms[i]->m_railstart - m_terms[i+1]->m_railstart) * sort_order < 0)
|
||||
{
|
||||
std::swap(m_terms[i], m_terms[i+1]);
|
||||
std::swap(m_nets[i], m_nets[i+1]);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
int *other = m_terms[k]->net_other();
|
||||
for (unsigned i = 0; i < m_terms[k]->count(); i++)
|
||||
if (other[i] != -1)
|
||||
other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* create a list of non zero elements right of the diagonal
|
||||
* These list anticipate the population of array elements by
|
||||
* Gaussian elimination.
|
||||
*/
|
||||
/* add RHS element */
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
terms_t * t = m_terms[k];
|
||||
/* pretty brutal */
|
||||
int *other = t->net_other();
|
||||
|
||||
t->m_nz.clear();
|
||||
|
||||
if (k==0)
|
||||
t->m_nzrd.clear();
|
||||
else
|
||||
{
|
||||
t->m_nzrd = m_terms[k-1]->m_nzrd;
|
||||
unsigned j=0;
|
||||
while(j < t->m_nzrd.size())
|
||||
{
|
||||
if (t->m_nzrd[j] < k + 1)
|
||||
t->m_nzrd.remove_at(j);
|
||||
else
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < t->m_railstart; i++)
|
||||
{
|
||||
if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1))
|
||||
t->m_nzrd.push_back(other[i]);
|
||||
if (!t->m_nz.contains(other[i]))
|
||||
t->m_nz.push_back(other[i]);
|
||||
}
|
||||
|
||||
/* Add RHS element */
|
||||
if (!t->m_nzrd.contains(N()))
|
||||
t->m_nzrd.push_back(N());
|
||||
|
||||
/* and sort */
|
||||
psort_list(t->m_nzrd);
|
||||
|
||||
t->m_nz.push_back(k); // add diagonal
|
||||
|
||||
psort_list(t->m_nz);
|
||||
}
|
||||
|
||||
/* create a list of non zero elements below diagonal k
|
||||
* This should reduce cache misses ...
|
||||
*/
|
||||
|
||||
bool touched[_storage_N][_storage_N] = { { false } };
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k]->m_nzbd.clear();
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nz.size(); j++)
|
||||
touched[k][m_terms[k]->m_nz[j]] = true;
|
||||
}
|
||||
|
||||
unsigned ops = 0;
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
ops++; // 1/A(k,k)
|
||||
for (unsigned row = k + 1; row < N(); row++)
|
||||
{
|
||||
if (touched[row][k])
|
||||
{
|
||||
ops++;
|
||||
if (!m_terms[k]->m_nzbd.contains(row))
|
||||
m_terms[k]->m_nzbd.push_back(row);
|
||||
for (unsigned col = k; col < N(); col++)
|
||||
if (touched[k][col])
|
||||
{
|
||||
touched[row][col] = true;
|
||||
ops += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log().verbose("Number of mults/adds for {1}: {2}", name(), ops);
|
||||
|
||||
if (0)
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
pstring line = pfmt("{1}")(k, "3");
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
|
||||
line += pfmt(" {1}")(m_terms[k]->m_nzrd[j], "3");
|
||||
log().verbose("{1}", line);
|
||||
}
|
||||
|
||||
/*
|
||||
* save states
|
||||
*/
|
||||
save(NLNAME(m_last_RHS));
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
@ -356,15 +214,7 @@ ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::vsetup(analog_net_t::lis
|
||||
pstring num = pfmt("{1}")(k);
|
||||
|
||||
save(RHS(k), "RHS." + num);
|
||||
save(m_terms[k]->m_last_V, "lastV." + num);
|
||||
save(m_terms[k]->m_DD_n_m_1, "m_DD_n_m_1." + num);
|
||||
save(m_terms[k]->m_h_n_m_1, "m_h_n_m_1." + num);
|
||||
|
||||
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
|
||||
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
|
||||
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -556,8 +406,8 @@ int matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
inline int matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
build_LE_A<matrix_solver_direct_t>();
|
||||
build_LE_RHS<matrix_solver_direct_t>();
|
||||
|
||||
for (unsigned i=0, iN=N(); i < iN; i++)
|
||||
m_last_RHS[i] = RHS(i);
|
||||
|
@ -30,8 +30,8 @@ public:
|
||||
|
||||
inline int matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNUSED const bool newton_raphson)
|
||||
{
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
build_LE_A<matrix_solver_direct1_t>();
|
||||
build_LE_RHS<matrix_solver_direct1_t>();
|
||||
//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);
|
||||
|
||||
nl_double new_val[1] = { RHS(0) / A(0,0) };
|
||||
|
@ -30,8 +30,8 @@ public:
|
||||
|
||||
inline int matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED const bool newton_raphson)
|
||||
{
|
||||
build_LE_A(*this);
|
||||
build_LE_RHS(*this);
|
||||
build_LE_A<matrix_solver_direct2_t>();
|
||||
build_LE_RHS<matrix_solver_direct2_t>();
|
||||
|
||||
const nl_double a = A(0,0);
|
||||
const nl_double b = A(0,1);
|
||||
|
@ -32,25 +32,10 @@ public:
|
||||
, m_use_more_precise_stop_condition(false)
|
||||
, m_accuracy_mult(1.0)
|
||||
{
|
||||
unsigned mr=this->N(); /* FIXME: maximum iterations locked in here */
|
||||
|
||||
for (unsigned i = 0; i < mr + 1; i++)
|
||||
m_ht[i] = new nl_double[mr];
|
||||
|
||||
for (unsigned i = 0; i < this->N(); i++)
|
||||
m_v[i] = new nl_double[_storage_N];
|
||||
|
||||
}
|
||||
|
||||
virtual ~matrix_solver_GMRES_t()
|
||||
{
|
||||
unsigned mr=this->N(); /* FIXME: maximum iterations locked in here */
|
||||
|
||||
for (unsigned i = 0; i < mr + 1; i++)
|
||||
delete[] m_ht[i];
|
||||
|
||||
for (unsigned i = 0; i < this->N(); i++)
|
||||
delete[] m_v[i];
|
||||
}
|
||||
|
||||
virtual void vsetup(analog_net_t::list_t &nets) override;
|
||||
@ -72,9 +57,9 @@ private:
|
||||
|
||||
nl_double m_c[_storage_N + 1]; /* mr + 1 */
|
||||
nl_double m_g[_storage_N + 1]; /* mr + 1 */
|
||||
nl_double * RESTRICT m_ht[_storage_N + 1]; /* (mr + 1), mr */
|
||||
nl_double m_s[_storage_N]; /* mr + 1 */
|
||||
nl_double * RESTRICT m_v[_storage_N + 1]; /*(mr + 1), n */
|
||||
nl_double m_ht[_storage_N + 1][_storage_N]; /* (mr + 1), mr */
|
||||
nl_double m_s[_storage_N + 1]; /* mr + 1 */
|
||||
nl_double m_v[_storage_N + 1][_storage_N]; /*(mr + 1), n */
|
||||
//double m_y[_storage_N]; /* mr + 1 */
|
||||
|
||||
nl_double m_accuracy_mult; // FXIME: Save state
|
||||
@ -182,11 +167,10 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton
|
||||
|
||||
const nl_double accuracy = this->m_params.m_accuracy;
|
||||
|
||||
int mr = _storage_N;
|
||||
if (_storage_N > 3 )
|
||||
mr = (int) sqrt(iN);
|
||||
mr = std::min(mr, this->m_params.m_gs_loops);
|
||||
int iter = 4;
|
||||
int mr = iN;
|
||||
if (iN > 3 )
|
||||
mr = (int) sqrt(iN) * 2;
|
||||
int iter = std::max(1, this->m_params.m_gs_loops);
|
||||
int gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy);
|
||||
int failed = mr * iter;
|
||||
|
||||
@ -221,7 +205,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void givens_mult( const T c, const T s, T & g0, T & g1 )
|
||||
inline void givens_mult( const T & c, const T & s, T & g0, T & g1 )
|
||||
{
|
||||
const T tg0 = c * g0 - s * g1;
|
||||
const T tg1 = s * g0 + c * g1;
|
||||
@ -339,7 +323,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (nl_double * RESTRIC
|
||||
for (unsigned j = 0; j < k; j++)
|
||||
givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]);
|
||||
|
||||
mu = std::sqrt(std::pow(m_ht[k][k], 2) + std::pow(m_ht[k1][k], 2));
|
||||
mu = std::hypot(m_ht[k][k], m_ht[k1][k]);
|
||||
|
||||
m_c[k] = m_ht[k][k] / mu;
|
||||
m_s[k] = -m_ht[k1][k] / mu;
|
||||
|
@ -125,121 +125,15 @@ ATTR_COLD void matrix_solver_sm_t<m_N, _storage_N>::vsetup(analog_net_t::list_t
|
||||
|
||||
matrix_solver_t::setup_base(nets);
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k]->m_railstart = m_terms[k]->count();
|
||||
for (unsigned i = 0; i < m_rails_temp[k]->count(); i++)
|
||||
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->net_other()[i], false);
|
||||
|
||||
m_rails_temp[k]->clear(); // no longer needed
|
||||
m_terms[k]->set_pointers();
|
||||
}
|
||||
|
||||
/* create a list of non zero elements. */
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
terms_t * t = m_terms[k];
|
||||
/* pretty brutal */
|
||||
int *other = t->net_other();
|
||||
|
||||
t->m_nz.clear();
|
||||
|
||||
for (unsigned i = 0; i < t->m_railstart; i++)
|
||||
if (!t->m_nz.contains(other[i]))
|
||||
t->m_nz.push_back(other[i]);
|
||||
|
||||
t->m_nz.push_back(k); // add diagonal
|
||||
|
||||
/* and sort */
|
||||
psort_list(t->m_nz);
|
||||
}
|
||||
|
||||
/* create a list of non zero elements right of the diagonal
|
||||
* These list anticipate the population of array elements by
|
||||
* Gaussian elimination.
|
||||
*/
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
terms_t * t = m_terms[k];
|
||||
/* pretty brutal */
|
||||
int *other = t->net_other();
|
||||
|
||||
if (k==0)
|
||||
t->m_nzrd.clear();
|
||||
else
|
||||
{
|
||||
t->m_nzrd = m_terms[k-1]->m_nzrd;
|
||||
unsigned j=0;
|
||||
while(j < t->m_nzrd.size())
|
||||
{
|
||||
if (t->m_nzrd[j] < k + 1)
|
||||
t->m_nzrd.remove_at(j);
|
||||
else
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < t->m_railstart; i++)
|
||||
if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1))
|
||||
t->m_nzrd.push_back(other[i]);
|
||||
|
||||
/* and sort */
|
||||
psort_list(t->m_nzrd);
|
||||
}
|
||||
|
||||
/* create a list of non zero elements below diagonal k
|
||||
* This should reduce cache misses ...
|
||||
*/
|
||||
|
||||
bool touched[_storage_N][_storage_N] = { { false } };
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k]->m_nzbd.clear();
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nz.size(); j++)
|
||||
touched[k][m_terms[k]->m_nz[j]] = true;
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
for (unsigned row = k + 1; row < N(); row++)
|
||||
{
|
||||
if (touched[row][k])
|
||||
{
|
||||
if (!m_terms[k]->m_nzbd.contains(row))
|
||||
m_terms[k]->m_nzbd.push_back(row);
|
||||
for (unsigned col = k; col < N(); col++)
|
||||
if (touched[k][col])
|
||||
touched[row][col] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (0)
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
pstring line = pfmt("{1}")(k, "3");
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
|
||||
line += pfmt(" {1}")(m_terms[k]->m_nzrd[j], "3");
|
||||
log().verbose("{1}", line);
|
||||
}
|
||||
|
||||
/*
|
||||
* save states
|
||||
*/
|
||||
save(NLNAME(m_last_RHS));
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
pstring num = pfmt("{1}")(k);
|
||||
|
||||
save(RHS(k), "RHS" + num);
|
||||
save(m_terms[k]->m_last_V, "lastV" + num);
|
||||
|
||||
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
|
||||
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
|
||||
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
|
||||
save(RHS(k), "RHS." + num);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -420,8 +314,8 @@ int matrix_solver_sm_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const boo
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
inline int matrix_solver_sm_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
build_LE_A<matrix_solver_sm_t>();
|
||||
build_LE_RHS<matrix_solver_sm_t>();
|
||||
|
||||
for (unsigned i=0, iN=N(); i < iN; i++)
|
||||
m_last_RHS[i] = RHS(i);
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "solver/nld_ms_direct.h"
|
||||
#include "solver/nld_matrix_solver.h"
|
||||
#include "solver/nld_solver.h"
|
||||
|
||||
NETLIB_NAMESPACE_DEVICES_START()
|
||||
@ -22,6 +23,9 @@ NETLIB_NAMESPACE_DEVICES_START()
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<m_N, _storage_N>
|
||||
{
|
||||
|
||||
friend class matrix_solver_t;
|
||||
|
||||
public:
|
||||
|
||||
matrix_solver_SOR_mat_t(const solver_parameters_t *params, int size)
|
||||
@ -124,12 +128,13 @@ int matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newt
|
||||
ATTR_ALIGN nl_double new_v[_storage_N] = { 0.0 };
|
||||
const unsigned iN = this->N();
|
||||
|
||||
matrix_solver_t::build_LE_A<matrix_solver_SOR_mat_t>();
|
||||
matrix_solver_t::build_LE_RHS<matrix_solver_SOR_mat_t>();
|
||||
|
||||
bool resched = false;
|
||||
|
||||
int resched_cnt = 0;
|
||||
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
|
||||
#if 0
|
||||
static int ws_cnt = 0;
|
||||
|
@ -138,121 +138,15 @@ ATTR_COLD void matrix_solver_w_t<m_N, _storage_N>::vsetup(analog_net_t::list_t &
|
||||
|
||||
matrix_solver_t::setup_base(nets);
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k]->m_railstart = m_terms[k]->count();
|
||||
for (unsigned i = 0; i < m_rails_temp[k]->count(); i++)
|
||||
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->net_other()[i], false);
|
||||
|
||||
m_rails_temp[k]->clear(); // no longer needed
|
||||
m_terms[k]->set_pointers();
|
||||
}
|
||||
|
||||
/* create a list of non zero elements. */
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
terms_t * t = m_terms[k];
|
||||
/* pretty brutal */
|
||||
int *other = t->net_other();
|
||||
|
||||
t->m_nz.clear();
|
||||
|
||||
for (unsigned i = 0; i < t->m_railstart; i++)
|
||||
if (!t->m_nz.contains(other[i]))
|
||||
t->m_nz.push_back(other[i]);
|
||||
|
||||
t->m_nz.push_back(k); // add diagonal
|
||||
|
||||
/* and sort */
|
||||
psort_list(t->m_nz);
|
||||
}
|
||||
|
||||
/* create a list of non zero elements right of the diagonal
|
||||
* These list anticipate the population of array elements by
|
||||
* Gaussian elimination.
|
||||
*/
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
terms_t * t = m_terms[k];
|
||||
/* pretty brutal */
|
||||
int *other = t->net_other();
|
||||
|
||||
if (k==0)
|
||||
t->m_nzrd.clear();
|
||||
else
|
||||
{
|
||||
t->m_nzrd = m_terms[k-1]->m_nzrd;
|
||||
unsigned j=0;
|
||||
while(j < t->m_nzrd.size())
|
||||
{
|
||||
if (t->m_nzrd[j] < k + 1)
|
||||
t->m_nzrd.remove_at(j);
|
||||
else
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < t->m_railstart; i++)
|
||||
if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1))
|
||||
t->m_nzrd.push_back(other[i]);
|
||||
|
||||
/* and sort */
|
||||
psort_list(t->m_nzrd);
|
||||
}
|
||||
|
||||
/* create a list of non zero elements below diagonal k
|
||||
* This should reduce cache misses ...
|
||||
*/
|
||||
|
||||
bool touched[_storage_N][_storage_N] = { { false } };
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k]->m_nzbd.clear();
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nz.size(); j++)
|
||||
touched[k][m_terms[k]->m_nz[j]] = true;
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
for (unsigned row = k + 1; row < N(); row++)
|
||||
{
|
||||
if (touched[row][k])
|
||||
{
|
||||
if (!m_terms[k]->m_nzbd.contains(row))
|
||||
m_terms[k]->m_nzbd.push_back(row);
|
||||
for (unsigned col = k; col < N(); col++)
|
||||
if (touched[k][col])
|
||||
touched[row][col] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (0)
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
pstring line = pfmt("{1}")(k, "3");
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
|
||||
line += pfmt(" {1}")(m_terms[k]->m_nzrd[j], "3");
|
||||
log().verbose("{1}", line);
|
||||
}
|
||||
|
||||
/*
|
||||
* save states
|
||||
*/
|
||||
save(NLNAME(m_last_RHS));
|
||||
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
pstring num = pfmt("{1}")(k);
|
||||
|
||||
save(RHS(k), "RHS" + num);
|
||||
save(m_terms[k]->m_last_V, "lastV" + num);
|
||||
|
||||
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
|
||||
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
|
||||
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
|
||||
save(RHS(k), "RHS." + num);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -482,8 +376,8 @@ int matrix_solver_w_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const bool
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
inline int matrix_solver_w_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
build_LE_A<matrix_solver_w_t>();
|
||||
build_LE_RHS<matrix_solver_w_t>();
|
||||
|
||||
for (unsigned i=0, iN=N(); i < iN; i++)
|
||||
m_last_RHS[i] = RHS(i);
|
||||
@ -513,6 +407,9 @@ matrix_solver_w_t<m_N, _storage_N>::matrix_solver_w_t(const eSolverType type, co
|
||||
,m_cnt(0)
|
||||
, m_dim(size)
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
m_A = palloc_array(nl_ext_double, N() * m_pitch);
|
||||
#endif
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_last_RHS[k] = 0.0;
|
||||
|
@ -43,6 +43,7 @@
|
||||
|
||||
#if 1
|
||||
#include "nld_ms_direct.h"
|
||||
//#include "nld_ms_gcr.h"
|
||||
#else
|
||||
#include "nld_ms_direct_lu.h"
|
||||
#endif
|
||||
@ -200,8 +201,191 @@ ATTR_COLD void matrix_solver_t::setup_base(analog_net_t::list_t &nets)
|
||||
log().debug("added net with {1} populated connections\n", net->m_core_terms.size());
|
||||
}
|
||||
|
||||
/* now setup the matrix */
|
||||
setup_matrix();
|
||||
}
|
||||
|
||||
ATTR_COLD void matrix_solver_t::setup_matrix()
|
||||
{
|
||||
const unsigned iN = m_nets.size();
|
||||
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
m_terms[k]->m_railstart = m_terms[k]->count();
|
||||
for (unsigned i = 0; i < m_rails_temp[k]->count(); i++)
|
||||
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->net_other()[i], false);
|
||||
|
||||
m_rails_temp[k]->clear(); // no longer needed
|
||||
m_terms[k]->set_pointers();
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
pfree(m_rails_temp[k]); // no longer needed
|
||||
|
||||
m_rails_temp.clear();
|
||||
#if 0
|
||||
|
||||
/* Sort in descending order by number of connected matrix voltages.
|
||||
* The idea is, that for Gauss-Seidel algo the first voltage computed
|
||||
* depends on the greatest number of previous voltages thus taking into
|
||||
* account the maximum amout of information.
|
||||
*
|
||||
* This actually improves performance on popeye slightly. Average
|
||||
* GS computations reduce from 2.509 to 2.370
|
||||
*
|
||||
* Smallest to largest : 2.613
|
||||
* Unsorted : 2.509
|
||||
* Largest to smallest : 2.370
|
||||
*
|
||||
* Sorting as a general matrix pre-conditioning is mentioned in
|
||||
* literature but I have found no articles about Gauss Seidel.
|
||||
*
|
||||
* For Gaussian Elimination however increasing order is better suited.
|
||||
* FIXME: Even better would be to sort on elements right of the matrix diagonal.
|
||||
*
|
||||
*/
|
||||
|
||||
int sort_order = (type() == GAUSS_SEIDEL ? 1 : -1) * -1;
|
||||
|
||||
for (unsigned k = 0; k < iN / 2; k++)
|
||||
for (unsigned i = 0; i < iN - 1; i++)
|
||||
{
|
||||
if ((m_terms[i]->m_railstart - m_terms[i+1]->m_railstart) * sort_order < 0)
|
||||
{
|
||||
std::swap(m_terms[i], m_terms[i+1]);
|
||||
std::swap(m_nets[i], m_nets[i+1]);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
int *other = m_terms[k]->net_other();
|
||||
for (unsigned i = 0; i < m_terms[k]->count(); i++)
|
||||
if (other[i] != -1)
|
||||
other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* create a list of non zero elements. */
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
terms_t * t = m_terms[k];
|
||||
/* pretty brutal */
|
||||
int *other = t->net_other();
|
||||
|
||||
t->m_nz.clear();
|
||||
|
||||
for (unsigned i = 0; i < t->m_railstart; i++)
|
||||
if (!t->m_nz.contains(other[i]))
|
||||
t->m_nz.push_back(other[i]);
|
||||
|
||||
t->m_nz.push_back(k); // add diagonal
|
||||
|
||||
/* and sort */
|
||||
psort_list(t->m_nz);
|
||||
}
|
||||
|
||||
/* create a list of non zero elements right of the diagonal
|
||||
* These list anticipate the population of array elements by
|
||||
* Gaussian elimination.
|
||||
*/
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
terms_t * t = m_terms[k];
|
||||
/* pretty brutal */
|
||||
int *other = t->net_other();
|
||||
|
||||
if (k==0)
|
||||
t->m_nzrd.clear();
|
||||
else
|
||||
{
|
||||
t->m_nzrd = m_terms[k-1]->m_nzrd;
|
||||
unsigned j=0;
|
||||
while(j < t->m_nzrd.size())
|
||||
{
|
||||
if (t->m_nzrd[j] < k + 1)
|
||||
t->m_nzrd.remove_at(j);
|
||||
else
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < t->m_railstart; i++)
|
||||
if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1))
|
||||
t->m_nzrd.push_back(other[i]);
|
||||
|
||||
/* and sort */
|
||||
psort_list(t->m_nzrd);
|
||||
}
|
||||
|
||||
/* create a list of non zero elements below diagonal k
|
||||
* This should reduce cache misses ...
|
||||
*/
|
||||
|
||||
bool **touched = new bool*[iN];
|
||||
for (unsigned k=0; k<iN; k++)
|
||||
touched[k] = new bool[iN];
|
||||
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
for (unsigned j = 0; j < iN; j++)
|
||||
touched[k][j] = false;
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nz.size(); j++)
|
||||
touched[k][m_terms[k]->m_nz[j]] = true;
|
||||
}
|
||||
|
||||
unsigned ops = 0;
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
ops++; // 1/A(k,k)
|
||||
for (unsigned row = k + 1; row < iN; row++)
|
||||
{
|
||||
if (touched[row][k])
|
||||
{
|
||||
ops++;
|
||||
if (!m_terms[k]->m_nzbd.contains(row))
|
||||
m_terms[k]->m_nzbd.push_back(row);
|
||||
for (unsigned col = k + 1; col < iN; col++)
|
||||
if (touched[k][col])
|
||||
{
|
||||
touched[row][col] = true;
|
||||
ops += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log().verbose("Number of mults/adds for {1}: {2}", name(), ops);
|
||||
|
||||
if (0)
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
pstring line = pfmt("{1}")(k, "3");
|
||||
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
|
||||
line += pfmt(" {1}")(m_terms[k]->m_nzrd[j], "3");
|
||||
log().verbose("{1}", line);
|
||||
}
|
||||
|
||||
/*
|
||||
* save states
|
||||
*/
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
pstring num = pfmt("{1}")(k);
|
||||
|
||||
save(m_terms[k]->m_last_V, "lastV." + num);
|
||||
save(m_terms[k]->m_DD_n_m_1, "m_DD_n_m_1." + num);
|
||||
save(m_terms[k]->m_h_n_m_1, "m_h_n_m_1." + num);
|
||||
|
||||
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
|
||||
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
|
||||
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
|
||||
}
|
||||
|
||||
for (unsigned k=0; k<iN; k++)
|
||||
delete [] touched[k];
|
||||
delete [] touched;
|
||||
}
|
||||
|
||||
void matrix_solver_t::update_inputs()
|
||||
{
|
||||
@ -539,6 +723,7 @@ matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const bool use_sp
|
||||
else if (pstring("MAT").equals(m_iterative_solver))
|
||||
{
|
||||
typedef matrix_solver_direct_t<m_N,_storage_N> solver_mat;
|
||||
//typedef matrix_solver_GCR_t<m_N,_storage_N> solver_mat;
|
||||
return palloc(solver_mat(&m_params, size));
|
||||
}
|
||||
else if (pstring("SM").equals(m_iterative_solver))
|
||||
|
Loading…
Reference in New Issue
Block a user