Recover from creating solvers by copy paste. Move common code were it

belongs. (nw)
This commit is contained in:
couriersud 2016-04-13 21:23:55 +02:00
parent 4f1ca77643
commit cd0441b678
11 changed files with 233 additions and 409 deletions

View File

@ -23,7 +23,7 @@ NETLIST_START(dummy)
PARAM(Solver.MIN_TIMESTEP, 10e-6)
PARAM(Solver.PARALLEL, 0)
PARAM(Solver.SOR_FACTOR, 1.00)
PARAM(Solver.PIVOT, 1)
PARAM(Solver.PIVOT, 0)
#else
SOLVER(Solver, 12000)
PARAM(Solver.ACCURACY, 1e-8)

View File

@ -60,7 +60,8 @@ struct mat_cr_t
for (unsigned i = 1; ia[i] < lnz; i++) // row i
{
const unsigned iai1 = ia[i + 1];
for (unsigned pk = ia[i]; pk < diag[i]; pk++) // all columns left of diag in row i
const unsigned pke = diag[i];
for (unsigned pk = ia[i]; pk < pke; pk++) // all columns left of diag in row i
{
// pk == (i, k)
const unsigned k = ja[pk];

View File

@ -129,9 +129,9 @@ protected:
T delta(const T * RESTRICT V);
template <typename T>
void build_LE_A(T & child);
void build_LE_A();
template <typename T>
void build_LE_RHS(T & child);
void build_LE_RHS();
pvector_t<terms_t *> m_terms;
pvector_t<analog_net_t *> m_nets;
@ -158,6 +158,9 @@ private:
logic_input_t m_fb_sync;
logic_output_t m_Q_sync;
/* calculate matrix */
void setup_matrix();
void step(const netlist_time &delta);
void update_inputs();
@ -169,7 +172,7 @@ template <typename T>
T matrix_solver_t::delta(const T * RESTRICT V)
{
/* FIXME: Ideally we should also include currents (RHS) here. This would
* need a revaluation of the right hand side after voltages have been updated
* need a reevaluation of the right hand side after voltages have been updated
* and thus belong into a different calculation. This applies to all solvers.
*/
@ -188,10 +191,12 @@ void matrix_solver_t::store(const T * RESTRICT V)
}
template <typename T>
void matrix_solver_t::build_LE_A(T & child)
void matrix_solver_t::build_LE_A()
{
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
T &child = static_cast<T &>(*this);
const unsigned iN = child.N();
for (unsigned k = 0; k < iN; k++)
{
@ -219,8 +224,11 @@ void matrix_solver_t::build_LE_A(T & child)
}
template <typename T>
void matrix_solver_t::build_LE_RHS(T & child)
void matrix_solver_t::build_LE_RHS()
{
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
T &child = static_cast<T &>(*this);
const unsigned iN = child.N();
for (unsigned k = 0; k < iN; k++)
{

View File

@ -198,157 +198,15 @@ ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::vsetup(analog_net_t::lis
matrix_solver_t::setup_base(nets);
for (unsigned k = 0; k < N(); k++)
{
m_terms[k]->m_railstart = m_terms[k]->count();
for (unsigned i = 0; i < m_rails_temp[k]->count(); i++)
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->net_other()[i], false);
m_rails_temp[k]->clear(); // no longer needed
m_terms[k]->set_pointers();
}
for (unsigned k = 0; k < N(); k++)
pfree(m_rails_temp[k]); // no longer needed
m_rails_temp.clear();
#if 1
/* Sort in descending order by number of connected matrix voltages.
* The idea is, that for Gauss-Seidel algo the first voltage computed
* depends on the greatest number of previous voltages thus taking into
* account the maximum amout of information.
*
* This actually improves performance on popeye slightly. Average
* GS computations reduce from 2.509 to 2.370
*
* Smallest to largest : 2.613
* Unsorted : 2.509
* Largest to smallest : 2.370
*
* Sorting as a general matrix pre-conditioning is mentioned in
* literature but I have found no articles about Gauss Seidel.
*
* For Gaussian Elimination however increasing order is better suited.
* FIXME: Even better would be to sort on elements right of the matrix diagonal.
*
*/
int sort_order = (type() == GAUSS_SEIDEL ? 1 : -1);
for (unsigned k = 0; k < N() / 2; k++)
for (unsigned i = 0; i < N() - 1; i++)
{
if ((m_terms[i]->m_railstart - m_terms[i+1]->m_railstart) * sort_order < 0)
{
std::swap(m_terms[i], m_terms[i+1]);
std::swap(m_nets[i], m_nets[i+1]);
}
}
for (unsigned k = 0; k < N(); k++)
{
int *other = m_terms[k]->net_other();
for (unsigned i = 0; i < m_terms[k]->count(); i++)
if (other[i] != -1)
other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
}
#endif
/* create a list of non zero elements right of the diagonal
* These list anticipate the population of array elements by
* Gaussian elimination.
*/
/* add RHS element */
for (unsigned k = 0; k < N(); k++)
{
terms_t * t = m_terms[k];
/* pretty brutal */
int *other = t->net_other();
t->m_nz.clear();
if (k==0)
t->m_nzrd.clear();
else
{
t->m_nzrd = m_terms[k-1]->m_nzrd;
unsigned j=0;
while(j < t->m_nzrd.size())
{
if (t->m_nzrd[j] < k + 1)
t->m_nzrd.remove_at(j);
else
j++;
}
}
for (unsigned i = 0; i < t->m_railstart; i++)
{
if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1))
t->m_nzrd.push_back(other[i]);
if (!t->m_nz.contains(other[i]))
t->m_nz.push_back(other[i]);
}
/* Add RHS element */
if (!t->m_nzrd.contains(N()))
t->m_nzrd.push_back(N());
/* and sort */
psort_list(t->m_nzrd);
t->m_nz.push_back(k); // add diagonal
psort_list(t->m_nz);
}
/* create a list of non zero elements below diagonal k
* This should reduce cache misses ...
*/
bool touched[_storage_N][_storage_N] = { { false } };
for (unsigned k = 0; k < N(); k++)
{
m_terms[k]->m_nzbd.clear();
for (unsigned j = 0; j < m_terms[k]->m_nz.size(); j++)
touched[k][m_terms[k]->m_nz[j]] = true;
}
unsigned ops = 0;
for (unsigned k = 0; k < N(); k++)
{
ops++; // 1/A(k,k)
for (unsigned row = k + 1; row < N(); row++)
{
if (touched[row][k])
{
ops++;
if (!m_terms[k]->m_nzbd.contains(row))
m_terms[k]->m_nzbd.push_back(row);
for (unsigned col = k; col < N(); col++)
if (touched[k][col])
{
touched[row][col] = true;
ops += 2;
}
}
}
}
log().verbose("Number of mults/adds for {1}: {2}", name(), ops);
if (0)
for (unsigned k = 0; k < N(); k++)
{
pstring line = pfmt("{1}")(k, "3");
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
line += pfmt(" {1}")(m_terms[k]->m_nzrd[j], "3");
log().verbose("{1}", line);
}
/*
* save states
*/
save(NLNAME(m_last_RHS));
for (unsigned k = 0; k < N(); k++)
@ -356,15 +214,7 @@ ATTR_COLD void matrix_solver_direct_t<m_N, _storage_N>::vsetup(analog_net_t::lis
pstring num = pfmt("{1}")(k);
save(RHS(k), "RHS." + num);
save(m_terms[k]->m_last_V, "lastV." + num);
save(m_terms[k]->m_DD_n_m_1, "m_DD_n_m_1." + num);
save(m_terms[k]->m_h_n_m_1, "m_h_n_m_1." + num);
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
}
}
@ -556,8 +406,8 @@ int matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const
template <unsigned m_N, unsigned _storage_N>
inline int matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
this->build_LE_A(*this);
this->build_LE_RHS(*this);
build_LE_A<matrix_solver_direct_t>();
build_LE_RHS<matrix_solver_direct_t>();
for (unsigned i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i);

View File

@ -30,8 +30,8 @@ public:
inline int matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNUSED const bool newton_raphson)
{
this->build_LE_A(*this);
this->build_LE_RHS(*this);
build_LE_A<matrix_solver_direct1_t>();
build_LE_RHS<matrix_solver_direct1_t>();
//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);
nl_double new_val[1] = { RHS(0) / A(0,0) };

View File

@ -30,8 +30,8 @@ public:
inline int matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED const bool newton_raphson)
{
build_LE_A(*this);
build_LE_RHS(*this);
build_LE_A<matrix_solver_direct2_t>();
build_LE_RHS<matrix_solver_direct2_t>();
const nl_double a = A(0,0);
const nl_double b = A(0,1);

View File

@ -32,25 +32,10 @@ public:
, m_use_more_precise_stop_condition(false)
, m_accuracy_mult(1.0)
{
unsigned mr=this->N(); /* FIXME: maximum iterations locked in here */
for (unsigned i = 0; i < mr + 1; i++)
m_ht[i] = new nl_double[mr];
for (unsigned i = 0; i < this->N(); i++)
m_v[i] = new nl_double[_storage_N];
}
virtual ~matrix_solver_GMRES_t()
{
unsigned mr=this->N(); /* FIXME: maximum iterations locked in here */
for (unsigned i = 0; i < mr + 1; i++)
delete[] m_ht[i];
for (unsigned i = 0; i < this->N(); i++)
delete[] m_v[i];
}
virtual void vsetup(analog_net_t::list_t &nets) override;
@ -72,9 +57,9 @@ private:
nl_double m_c[_storage_N + 1]; /* mr + 1 */
nl_double m_g[_storage_N + 1]; /* mr + 1 */
nl_double * RESTRICT m_ht[_storage_N + 1]; /* (mr + 1), mr */
nl_double m_s[_storage_N]; /* mr + 1 */
nl_double * RESTRICT m_v[_storage_N + 1]; /*(mr + 1), n */
nl_double m_ht[_storage_N + 1][_storage_N]; /* (mr + 1), mr */
nl_double m_s[_storage_N + 1]; /* mr + 1 */
nl_double m_v[_storage_N + 1][_storage_N]; /*(mr + 1), n */
//double m_y[_storage_N]; /* mr + 1 */
nl_double m_accuracy_mult; // FXIME: Save state
@ -182,11 +167,10 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton
const nl_double accuracy = this->m_params.m_accuracy;
int mr = _storage_N;
if (_storage_N > 3 )
mr = (int) sqrt(iN);
mr = std::min(mr, this->m_params.m_gs_loops);
int iter = 4;
int mr = iN;
if (iN > 3 )
mr = (int) sqrt(iN) * 2;
int iter = std::max(1, this->m_params.m_gs_loops);
int gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy);
int failed = mr * iter;
@ -221,7 +205,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton
}
template <typename T>
inline void givens_mult( const T c, const T s, T & g0, T & g1 )
inline void givens_mult( const T & c, const T & s, T & g0, T & g1 )
{
const T tg0 = c * g0 - s * g1;
const T tg1 = s * g0 + c * g1;
@ -339,7 +323,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (nl_double * RESTRIC
for (unsigned j = 0; j < k; j++)
givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]);
mu = std::sqrt(std::pow(m_ht[k][k], 2) + std::pow(m_ht[k1][k], 2));
mu = std::hypot(m_ht[k][k], m_ht[k1][k]);
m_c[k] = m_ht[k][k] / mu;
m_s[k] = -m_ht[k1][k] / mu;

View File

@ -125,121 +125,15 @@ ATTR_COLD void matrix_solver_sm_t<m_N, _storage_N>::vsetup(analog_net_t::list_t
matrix_solver_t::setup_base(nets);
for (unsigned k = 0; k < N(); k++)
{
m_terms[k]->m_railstart = m_terms[k]->count();
for (unsigned i = 0; i < m_rails_temp[k]->count(); i++)
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->net_other()[i], false);
m_rails_temp[k]->clear(); // no longer needed
m_terms[k]->set_pointers();
}
/* create a list of non zero elements. */
for (unsigned k = 0; k < N(); k++)
{
terms_t * t = m_terms[k];
/* pretty brutal */
int *other = t->net_other();
t->m_nz.clear();
for (unsigned i = 0; i < t->m_railstart; i++)
if (!t->m_nz.contains(other[i]))
t->m_nz.push_back(other[i]);
t->m_nz.push_back(k); // add diagonal
/* and sort */
psort_list(t->m_nz);
}
/* create a list of non zero elements right of the diagonal
* These list anticipate the population of array elements by
* Gaussian elimination.
*/
for (unsigned k = 0; k < N(); k++)
{
terms_t * t = m_terms[k];
/* pretty brutal */
int *other = t->net_other();
if (k==0)
t->m_nzrd.clear();
else
{
t->m_nzrd = m_terms[k-1]->m_nzrd;
unsigned j=0;
while(j < t->m_nzrd.size())
{
if (t->m_nzrd[j] < k + 1)
t->m_nzrd.remove_at(j);
else
j++;
}
}
for (unsigned i = 0; i < t->m_railstart; i++)
if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1))
t->m_nzrd.push_back(other[i]);
/* and sort */
psort_list(t->m_nzrd);
}
/* create a list of non zero elements below diagonal k
* This should reduce cache misses ...
*/
bool touched[_storage_N][_storage_N] = { { false } };
for (unsigned k = 0; k < N(); k++)
{
m_terms[k]->m_nzbd.clear();
for (unsigned j = 0; j < m_terms[k]->m_nz.size(); j++)
touched[k][m_terms[k]->m_nz[j]] = true;
}
for (unsigned k = 0; k < N(); k++)
{
for (unsigned row = k + 1; row < N(); row++)
{
if (touched[row][k])
{
if (!m_terms[k]->m_nzbd.contains(row))
m_terms[k]->m_nzbd.push_back(row);
for (unsigned col = k; col < N(); col++)
if (touched[k][col])
touched[row][col] = true;
}
}
}
if (0)
for (unsigned k = 0; k < N(); k++)
{
pstring line = pfmt("{1}")(k, "3");
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
line += pfmt(" {1}")(m_terms[k]->m_nzrd[j], "3");
log().verbose("{1}", line);
}
/*
* save states
*/
save(NLNAME(m_last_RHS));
for (unsigned k = 0; k < N(); k++)
{
pstring num = pfmt("{1}")(k);
save(RHS(k), "RHS" + num);
save(m_terms[k]->m_last_V, "lastV" + num);
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
save(RHS(k), "RHS." + num);
}
}
@ -420,8 +314,8 @@ int matrix_solver_sm_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const boo
template <unsigned m_N, unsigned _storage_N>
inline int matrix_solver_sm_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
this->build_LE_A(*this);
this->build_LE_RHS(*this);
build_LE_A<matrix_solver_sm_t>();
build_LE_RHS<matrix_solver_sm_t>();
for (unsigned i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i);

View File

@ -15,6 +15,7 @@
#include <algorithm>
#include "solver/nld_ms_direct.h"
#include "solver/nld_matrix_solver.h"
#include "solver/nld_solver.h"
NETLIB_NAMESPACE_DEVICES_START()
@ -22,6 +23,9 @@ NETLIB_NAMESPACE_DEVICES_START()
template <unsigned m_N, unsigned _storage_N>
class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<m_N, _storage_N>
{
friend class matrix_solver_t;
public:
matrix_solver_SOR_mat_t(const solver_parameters_t *params, int size)
@ -124,12 +128,13 @@ int matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newt
ATTR_ALIGN nl_double new_v[_storage_N] = { 0.0 };
const unsigned iN = this->N();
matrix_solver_t::build_LE_A<matrix_solver_SOR_mat_t>();
matrix_solver_t::build_LE_RHS<matrix_solver_SOR_mat_t>();
bool resched = false;
int resched_cnt = 0;
this->build_LE_A(*this);
this->build_LE_RHS(*this);
#if 0
static int ws_cnt = 0;

View File

@ -138,121 +138,15 @@ ATTR_COLD void matrix_solver_w_t<m_N, _storage_N>::vsetup(analog_net_t::list_t &
matrix_solver_t::setup_base(nets);
for (unsigned k = 0; k < N(); k++)
{
m_terms[k]->m_railstart = m_terms[k]->count();
for (unsigned i = 0; i < m_rails_temp[k]->count(); i++)
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->net_other()[i], false);
m_rails_temp[k]->clear(); // no longer needed
m_terms[k]->set_pointers();
}
/* create a list of non zero elements. */
for (unsigned k = 0; k < N(); k++)
{
terms_t * t = m_terms[k];
/* pretty brutal */
int *other = t->net_other();
t->m_nz.clear();
for (unsigned i = 0; i < t->m_railstart; i++)
if (!t->m_nz.contains(other[i]))
t->m_nz.push_back(other[i]);
t->m_nz.push_back(k); // add diagonal
/* and sort */
psort_list(t->m_nz);
}
/* create a list of non zero elements right of the diagonal
* These list anticipate the population of array elements by
* Gaussian elimination.
*/
for (unsigned k = 0; k < N(); k++)
{
terms_t * t = m_terms[k];
/* pretty brutal */
int *other = t->net_other();
if (k==0)
t->m_nzrd.clear();
else
{
t->m_nzrd = m_terms[k-1]->m_nzrd;
unsigned j=0;
while(j < t->m_nzrd.size())
{
if (t->m_nzrd[j] < k + 1)
t->m_nzrd.remove_at(j);
else
j++;
}
}
for (unsigned i = 0; i < t->m_railstart; i++)
if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1))
t->m_nzrd.push_back(other[i]);
/* and sort */
psort_list(t->m_nzrd);
}
/* create a list of non zero elements below diagonal k
* This should reduce cache misses ...
*/
bool touched[_storage_N][_storage_N] = { { false } };
for (unsigned k = 0; k < N(); k++)
{
m_terms[k]->m_nzbd.clear();
for (unsigned j = 0; j < m_terms[k]->m_nz.size(); j++)
touched[k][m_terms[k]->m_nz[j]] = true;
}
for (unsigned k = 0; k < N(); k++)
{
for (unsigned row = k + 1; row < N(); row++)
{
if (touched[row][k])
{
if (!m_terms[k]->m_nzbd.contains(row))
m_terms[k]->m_nzbd.push_back(row);
for (unsigned col = k; col < N(); col++)
if (touched[k][col])
touched[row][col] = true;
}
}
}
if (0)
for (unsigned k = 0; k < N(); k++)
{
pstring line = pfmt("{1}")(k, "3");
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
line += pfmt(" {1}")(m_terms[k]->m_nzrd[j], "3");
log().verbose("{1}", line);
}
/*
* save states
*/
save(NLNAME(m_last_RHS));
for (unsigned k = 0; k < N(); k++)
{
pstring num = pfmt("{1}")(k);
save(RHS(k), "RHS" + num);
save(m_terms[k]->m_last_V, "lastV" + num);
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
save(RHS(k), "RHS." + num);
}
}
@ -482,8 +376,8 @@ int matrix_solver_w_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const bool
template <unsigned m_N, unsigned _storage_N>
inline int matrix_solver_w_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
this->build_LE_A(*this);
this->build_LE_RHS(*this);
build_LE_A<matrix_solver_w_t>();
build_LE_RHS<matrix_solver_w_t>();
for (unsigned i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i);
@ -513,6 +407,9 @@ matrix_solver_w_t<m_N, _storage_N>::matrix_solver_w_t(const eSolverType type, co
,m_cnt(0)
, m_dim(size)
{
#if (NL_USE_DYNAMIC_ALLOCATION)
m_A = palloc_array(nl_ext_double, N() * m_pitch);
#endif
for (unsigned k = 0; k < N(); k++)
{
m_last_RHS[k] = 0.0;

View File

@ -43,6 +43,7 @@
#if 1
#include "nld_ms_direct.h"
//#include "nld_ms_gcr.h"
#else
#include "nld_ms_direct_lu.h"
#endif
@ -200,8 +201,191 @@ ATTR_COLD void matrix_solver_t::setup_base(analog_net_t::list_t &nets)
log().debug("added net with {1} populated connections\n", net->m_core_terms.size());
}
/* now setup the matrix */
setup_matrix();
}
ATTR_COLD void matrix_solver_t::setup_matrix()
{
const unsigned iN = m_nets.size();
for (unsigned k = 0; k < iN; k++)
{
m_terms[k]->m_railstart = m_terms[k]->count();
for (unsigned i = 0; i < m_rails_temp[k]->count(); i++)
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->net_other()[i], false);
m_rails_temp[k]->clear(); // no longer needed
m_terms[k]->set_pointers();
}
for (unsigned k = 0; k < iN; k++)
pfree(m_rails_temp[k]); // no longer needed
m_rails_temp.clear();
#if 0
/* Sort in descending order by number of connected matrix voltages.
* The idea is, that for Gauss-Seidel algo the first voltage computed
* depends on the greatest number of previous voltages thus taking into
* account the maximum amout of information.
*
* This actually improves performance on popeye slightly. Average
* GS computations reduce from 2.509 to 2.370
*
* Smallest to largest : 2.613
* Unsorted : 2.509
* Largest to smallest : 2.370
*
* Sorting as a general matrix pre-conditioning is mentioned in
* literature but I have found no articles about Gauss Seidel.
*
* For Gaussian Elimination however increasing order is better suited.
* FIXME: Even better would be to sort on elements right of the matrix diagonal.
*
*/
int sort_order = (type() == GAUSS_SEIDEL ? 1 : -1) * -1;
for (unsigned k = 0; k < iN / 2; k++)
for (unsigned i = 0; i < iN - 1; i++)
{
if ((m_terms[i]->m_railstart - m_terms[i+1]->m_railstart) * sort_order < 0)
{
std::swap(m_terms[i], m_terms[i+1]);
std::swap(m_nets[i], m_nets[i+1]);
}
}
for (unsigned k = 0; k < iN; k++)
{
int *other = m_terms[k]->net_other();
for (unsigned i = 0; i < m_terms[k]->count(); i++)
if (other[i] != -1)
other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
}
#endif
/* create a list of non zero elements. */
for (unsigned k = 0; k < iN; k++)
{
terms_t * t = m_terms[k];
/* pretty brutal */
int *other = t->net_other();
t->m_nz.clear();
for (unsigned i = 0; i < t->m_railstart; i++)
if (!t->m_nz.contains(other[i]))
t->m_nz.push_back(other[i]);
t->m_nz.push_back(k); // add diagonal
/* and sort */
psort_list(t->m_nz);
}
/* create a list of non zero elements right of the diagonal
* These list anticipate the population of array elements by
* Gaussian elimination.
*/
for (unsigned k = 0; k < iN; k++)
{
terms_t * t = m_terms[k];
/* pretty brutal */
int *other = t->net_other();
if (k==0)
t->m_nzrd.clear();
else
{
t->m_nzrd = m_terms[k-1]->m_nzrd;
unsigned j=0;
while(j < t->m_nzrd.size())
{
if (t->m_nzrd[j] < k + 1)
t->m_nzrd.remove_at(j);
else
j++;
}
}
for (unsigned i = 0; i < t->m_railstart; i++)
if (!t->m_nzrd.contains(other[i]) && other[i] >= (int) (k + 1))
t->m_nzrd.push_back(other[i]);
/* and sort */
psort_list(t->m_nzrd);
}
/* create a list of non zero elements below diagonal k
* This should reduce cache misses ...
*/
bool **touched = new bool*[iN];
for (unsigned k=0; k<iN; k++)
touched[k] = new bool[iN];
for (unsigned k = 0; k < iN; k++)
{
for (unsigned j = 0; j < iN; j++)
touched[k][j] = false;
for (unsigned j = 0; j < m_terms[k]->m_nz.size(); j++)
touched[k][m_terms[k]->m_nz[j]] = true;
}
unsigned ops = 0;
for (unsigned k = 0; k < iN; k++)
{
ops++; // 1/A(k,k)
for (unsigned row = k + 1; row < iN; row++)
{
if (touched[row][k])
{
ops++;
if (!m_terms[k]->m_nzbd.contains(row))
m_terms[k]->m_nzbd.push_back(row);
for (unsigned col = k + 1; col < iN; col++)
if (touched[k][col])
{
touched[row][col] = true;
ops += 2;
}
}
}
}
log().verbose("Number of mults/adds for {1}: {2}", name(), ops);
if (0)
for (unsigned k = 0; k < iN; k++)
{
pstring line = pfmt("{1}")(k, "3");
for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
line += pfmt(" {1}")(m_terms[k]->m_nzrd[j], "3");
log().verbose("{1}", line);
}
/*
* save states
*/
for (unsigned k = 0; k < iN; k++)
{
pstring num = pfmt("{1}")(k);
save(m_terms[k]->m_last_V, "lastV." + num);
save(m_terms[k]->m_DD_n_m_1, "m_DD_n_m_1." + num);
save(m_terms[k]->m_h_n_m_1, "m_h_n_m_1." + num);
save(m_terms[k]->go(),"GO" + num, m_terms[k]->count());
save(m_terms[k]->gt(),"GT" + num, m_terms[k]->count());
save(m_terms[k]->Idr(),"IDR" + num , m_terms[k]->count());
}
for (unsigned k=0; k<iN; k++)
delete [] touched[k];
delete [] touched;
}
void matrix_solver_t::update_inputs()
{
@ -539,6 +723,7 @@ matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const bool use_sp
else if (pstring("MAT").equals(m_iterative_solver))
{
typedef matrix_solver_direct_t<m_N,_storage_N> solver_mat;
//typedef matrix_solver_GCR_t<m_N,_storage_N> solver_mat;
return palloc(solver_mat(&m_params, size));
}
else if (pstring("SM").equals(m_iterative_solver))