mirror of
https://github.com/holub/mame
synced 2025-04-25 09:50:04 +03:00
Significant speed improvement:
- added a new solver using compressed row format - fixed sorting As a result, netlist performance on kidniki nearly doubled. The performance increase is mainly due to the fact that sorting decreases the number of operations for gaussian elimination of the kidniki matrix from ~7800 to 707. In addition, compressed row format improves L1 usage. [Couriersud]
This commit is contained in:
parent
cd0441b678
commit
d9df811529
@ -15,7 +15,8 @@ NETLIST_START(dummy)
|
||||
PARAM(Solver.GS_LOOPS, 1)
|
||||
PARAM(Solver.GS_THRESHOLD, 6)
|
||||
//PARAM(Solver.ITERATIVE, "W")
|
||||
PARAM(Solver.ITERATIVE, "MAT")
|
||||
PARAM(Solver.ITERATIVE, "MAT_CR")
|
||||
//PARAM(Solver.ITERATIVE, "MAT")
|
||||
//PARAM(Solver.ITERATIVE, "GMRES")
|
||||
//PARAM(Solver.ITERATIVE, "SOR")
|
||||
PARAM(Solver.DYNAMIC_TS, 0)
|
||||
|
@ -64,6 +64,9 @@ public:
|
||||
template <typename T>
|
||||
static T sqrt(const T &x) { return std::sqrt(x); }
|
||||
|
||||
template <typename T>
|
||||
static T hypot(const T &x, const T &y) { return std::hypot(x, y); }
|
||||
|
||||
// this one has an accuracy of better than 5%. That's enough for our purpose
|
||||
// add c3 and it'll be better than 1%
|
||||
|
||||
|
@ -307,7 +307,7 @@ void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
||||
{
|
||||
x_i[p] = i;
|
||||
x_start[p] = chunks * p;
|
||||
x_stop[p] = std::min(chunks*(p+1), eb);
|
||||
x_stop[p] = nl_math::min(chunks*(p+1), eb);
|
||||
if (p<num_thr && x_start[p] < x_stop[p]) thr_process(p, this, NULL);
|
||||
}
|
||||
if (x_start[num_thr] < x_stop[num_thr])
|
||||
@ -327,17 +327,14 @@ void matrix_solver_direct_t<m_N, _storage_N>::LE_solve()
|
||||
const auto &nzrd = m_terms[i]->m_nzrd;
|
||||
const auto &nzbd = m_terms[i]->m_nzbd;
|
||||
|
||||
/* Eliminate column i from row j */
|
||||
|
||||
for (auto & j : nzbd)
|
||||
{
|
||||
//__builtin_prefetch((const void*)(&A(j+2,0)),0,0);
|
||||
const nl_double f1 = - A(j,i) * f;
|
||||
const nl_double f1 = -f * A(j,i);
|
||||
for (auto & k : nzrd)
|
||||
A(j,k) += A(i,k) * f1;
|
||||
//RHS(j) += RHS(i) * f1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
280
src/lib/netlist/solver/nld_ms_gcr.h
Normal file
280
src/lib/netlist/solver/nld_ms_gcr.h
Normal file
@ -0,0 +1,280 @@
|
||||
// license:GPL-2.0+
|
||||
// copyright-holders:Couriersud
|
||||
/*
|
||||
* nld_ms_gcr.h
|
||||
*
|
||||
* Gaussian elimination using compressed row format.
|
||||
*
|
||||
* Fow w==1 we will do the classic Gauss-Seidel approach
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NLD_MS_GCR_H_
|
||||
#define NLD_MS_GCR_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "solver/mat_cr.h"
|
||||
#include "solver/nld_ms_direct.h"
|
||||
#include "solver/nld_solver.h"
|
||||
#include "solver/vector_base.h"
|
||||
|
||||
#define NL_USE_SSE 0
|
||||
|
||||
NETLIB_NAMESPACE_DEVICES_START()
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
class matrix_solver_GCR_t: public matrix_solver_direct_t<m_N, _storage_N>
|
||||
{
|
||||
public:
|
||||
|
||||
matrix_solver_GCR_t(const solver_parameters_t *params, int size)
|
||||
: matrix_solver_direct_t<m_N, _storage_N>(matrix_solver_t::GAUSSIAN_ELIMINATION, params, size)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~matrix_solver_GCR_t()
|
||||
{
|
||||
}
|
||||
|
||||
virtual void vsetup(analog_net_t::list_t &nets) override;
|
||||
virtual int vsolve_non_dynamic(const bool newton_raphson) override;
|
||||
|
||||
private:
|
||||
|
||||
pvector_t<int> m_term_cr[_storage_N];
|
||||
mat_cr_t<_storage_N> mat;
|
||||
nl_double m_A[_storage_N * _storage_N];
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// matrix_solver - GMRES
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
void matrix_solver_GCR_t<m_N, _storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
{
|
||||
matrix_solver_direct_t<m_N, _storage_N>::vsetup(nets);
|
||||
|
||||
unsigned nz = 0;
|
||||
const unsigned iN = this->N();
|
||||
|
||||
/* build the final matrix */
|
||||
|
||||
bool touched[_storage_N][_storage_N] = { { false } };
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
for (auto & j : this->m_terms[k]->m_nz)
|
||||
touched[k][j] = true;
|
||||
}
|
||||
|
||||
unsigned ops = 0;
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
ops++; // 1/A(k,k)
|
||||
for (unsigned row = k + 1; row < iN; row++)
|
||||
{
|
||||
if (touched[row][k])
|
||||
{
|
||||
ops++;
|
||||
for (unsigned col = k + 1; col < iN; col++)
|
||||
if (touched[k][col])
|
||||
{
|
||||
touched[row][col] = true;
|
||||
ops += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (unsigned k=0; k<iN; k++)
|
||||
{
|
||||
mat.ia[k] = nz;
|
||||
|
||||
for (unsigned j=0; j<iN; j++)
|
||||
{
|
||||
if (touched[k][j])
|
||||
{
|
||||
mat.ja[nz] = j;
|
||||
if (j == k)
|
||||
mat.diag[k] = nz;
|
||||
nz++;
|
||||
}
|
||||
}
|
||||
|
||||
m_term_cr[k].clear();
|
||||
/* build pointers into the compressed row format matrix for each terminal */
|
||||
for (unsigned j=0; j< this->m_terms[k]->m_railstart;j++)
|
||||
{
|
||||
for (unsigned i = mat.ia[k]; i<nz; i++)
|
||||
if (this->m_terms[k]->net_other()[j] == (int) mat.ja[i])
|
||||
{
|
||||
m_term_cr[k].push_back(i);
|
||||
break;
|
||||
}
|
||||
nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart);
|
||||
}
|
||||
}
|
||||
|
||||
mat.ia[iN] = nz;
|
||||
mat.nz_num = nz;
|
||||
|
||||
this->log().verbose("Ops: {1} Occupancy ratio: {2}\n", ops, (double) nz / double (iN * iN));
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
int matrix_solver_GCR_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
const unsigned iN = this->N();
|
||||
|
||||
ATTR_ALIGN nl_double RHS[_storage_N];
|
||||
ATTR_ALIGN nl_double new_V[_storage_N];
|
||||
|
||||
for (unsigned i=0, e=mat.nz_num; i<e; i++)
|
||||
m_A[i] = 0.0;
|
||||
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
nl_double gtot_t = 0.0;
|
||||
nl_double RHS_t = 0.0;
|
||||
|
||||
const unsigned term_count = this->m_terms[k]->count();
|
||||
const unsigned railstart = this->m_terms[k]->m_railstart;
|
||||
const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
|
||||
const nl_double * const RESTRICT go = this->m_terms[k]->go();
|
||||
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
|
||||
const nl_double * const * RESTRICT other_cur_analog = this->m_terms[k]->other_curanalog();
|
||||
|
||||
#if (NL_USE_SSE)
|
||||
__m128d mg = _mm_set_pd(0.0, 0.0);
|
||||
__m128d mr = _mm_set_pd(0.0, 0.0);
|
||||
unsigned i = 0;
|
||||
for (; i < term_count - 1; i+=2)
|
||||
{
|
||||
mg = _mm_add_pd(mg, _mm_loadu_pd(>[i]));
|
||||
mr = _mm_add_pd(mr, _mm_loadu_pd(&Idr[i]));
|
||||
}
|
||||
gtot_t = _mm_cvtsd_f64(mg) + _mm_cvtsd_f64(_mm_unpackhi_pd(mg,mg));
|
||||
RHS_t = _mm_cvtsd_f64(mr) + _mm_cvtsd_f64(_mm_unpackhi_pd(mr,mr));
|
||||
for (; i < term_count; i++)
|
||||
{
|
||||
gtot_t += gt[i];
|
||||
RHS_t += Idr[i];
|
||||
}
|
||||
#else
|
||||
for (unsigned i = 0; i < term_count; i++)
|
||||
{
|
||||
gtot_t += gt[i];
|
||||
RHS_t += Idr[i];
|
||||
}
|
||||
#endif
|
||||
for (unsigned i = railstart; i < term_count; i++)
|
||||
RHS_t += go[i] * *other_cur_analog[i];
|
||||
|
||||
RHS[k] = RHS_t;
|
||||
|
||||
// add diagonal element
|
||||
m_A[mat.diag[k]] = gtot_t;
|
||||
|
||||
for (unsigned i = 0; i < railstart; i++)
|
||||
{
|
||||
const unsigned pi = m_term_cr[k][i];
|
||||
m_A[pi] -= go[i];
|
||||
}
|
||||
}
|
||||
mat.ia[iN] = mat.nz_num;
|
||||
|
||||
/* now solve it */
|
||||
|
||||
for (unsigned i = 0; i < iN - 1; i++)
|
||||
{
|
||||
const auto &nzbd = this->m_terms[i]->m_nzbd;
|
||||
|
||||
if (nzbd.size() > 0)
|
||||
{
|
||||
unsigned pi = mat.diag[i];
|
||||
const nl_double f = 1.0 / m_A[pi++];
|
||||
const unsigned piie = mat.ia[i+1];
|
||||
|
||||
for (auto & j : nzbd)
|
||||
{
|
||||
// proceed to column i
|
||||
//__builtin_prefetch(&m_A[mat.diag[j+1]], 1);
|
||||
unsigned pj = mat.ia[j];
|
||||
|
||||
while (mat.ja[pj] < i)
|
||||
pj++;
|
||||
|
||||
const nl_double f1 = - m_A[pj++] * f;
|
||||
|
||||
// subtract row i from j */
|
||||
for (unsigned pii = pi; pii<piie; )
|
||||
{
|
||||
while (mat.ja[pj] < mat.ja[pii])
|
||||
pj++;
|
||||
m_A[pj++] += m_A[pii++] * f1;
|
||||
}
|
||||
RHS[j] += f1 * RHS[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* backward substitution
|
||||
*
|
||||
*/
|
||||
|
||||
/* row n-1 */
|
||||
new_V[iN - 1] = RHS[iN - 1] / m_A[mat.diag[iN - 1]];
|
||||
|
||||
for (int j = iN - 2; j >= 0; j--)
|
||||
{
|
||||
//__builtin_prefetch(&new_V[j-1], 1);
|
||||
//if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0);
|
||||
#if (NL_USE_SSE)
|
||||
__m128d tmp = _mm_set_pd1(0.0);
|
||||
const unsigned e = mat.ia[j+1];
|
||||
unsigned pk = mat.diag[j] + 1;
|
||||
for (; pk < e - 1; pk+=2)
|
||||
{
|
||||
//tmp += m_A[pk] * new_V[mat.ja[pk]];
|
||||
tmp = _mm_add_pd(tmp, _mm_mul_pd(_mm_set_pd(m_A[pk], m_A[pk+1]),
|
||||
_mm_set_pd(new_V[mat.ja[pk]], new_V[mat.ja[pk+1]])));
|
||||
}
|
||||
double tmpx = _mm_cvtsd_f64(tmp) + _mm_cvtsd_f64(_mm_unpackhi_pd(tmp,tmp));
|
||||
for (; pk < e; pk++)
|
||||
{
|
||||
tmpx += m_A[pk] * new_V[mat.ja[pk]];
|
||||
}
|
||||
new_V[j] = (RHS[j] - tmpx) / m_A[mat.diag[j]];
|
||||
#else
|
||||
double tmp = 0;
|
||||
const unsigned e = mat.ia[j+1];
|
||||
for (unsigned pk = mat.diag[j] + 1; pk < e; pk++)
|
||||
{
|
||||
tmp += m_A[pk] * new_V[mat.ja[pk]];
|
||||
}
|
||||
new_V[j] = (RHS[j] - tmp) / m_A[mat.diag[j]];
|
||||
#endif
|
||||
}
|
||||
this->m_stat_calculations++;
|
||||
|
||||
if (newton_raphson)
|
||||
{
|
||||
nl_double err = this->delta(new_V);
|
||||
|
||||
this->store(new_V);
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->store(new_V);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
NETLIB_NAMESPACE_DEVICES_END()
|
||||
|
||||
#endif /* NLD_MS_GCR_H_ */
|
@ -27,7 +27,7 @@ class matrix_solver_GMRES_t: public matrix_solver_direct_t<m_N, _storage_N>
|
||||
public:
|
||||
|
||||
matrix_solver_GMRES_t(const solver_parameters_t *params, int size)
|
||||
: matrix_solver_direct_t<m_N, _storage_N>(matrix_solver_t::GAUSS_SEIDEL, params, size)
|
||||
: matrix_solver_direct_t<m_N, _storage_N>(matrix_solver_t::GAUSSIAN_ELIMINATION, params, size)
|
||||
, m_use_iLU_preconditioning(true)
|
||||
, m_use_more_precise_stop_condition(false)
|
||||
, m_accuracy_mult(1.0)
|
||||
@ -49,6 +49,7 @@ private:
|
||||
|
||||
bool m_use_iLU_preconditioning;
|
||||
bool m_use_more_precise_stop_condition;
|
||||
nl_double m_accuracy_mult; // FXIME: Save state
|
||||
|
||||
mat_cr_t<_storage_N> mat;
|
||||
|
||||
@ -60,9 +61,8 @@ private:
|
||||
nl_double m_ht[_storage_N + 1][_storage_N]; /* (mr + 1), mr */
|
||||
nl_double m_s[_storage_N + 1]; /* mr + 1 */
|
||||
nl_double m_v[_storage_N + 1][_storage_N]; /*(mr + 1), n */
|
||||
//double m_y[_storage_N]; /* mr + 1 */
|
||||
nl_double m_y[_storage_N + 1]; /* mr + 1 */
|
||||
|
||||
nl_double m_accuracy_mult; // FXIME: Save state
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
@ -124,7 +124,6 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton
|
||||
//nz_num = 0;
|
||||
ATTR_ALIGN nl_double RHS[_storage_N];
|
||||
ATTR_ALIGN nl_double new_V[_storage_N];
|
||||
ATTR_ALIGN nl_double l_V[_storage_N];
|
||||
|
||||
for (unsigned i=0, e=mat.nz_num; i<e; i++)
|
||||
m_A[i] = 0.0;
|
||||
@ -141,7 +140,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton
|
||||
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
|
||||
const nl_double * const * RESTRICT other_cur_analog = this->m_terms[k]->other_curanalog();
|
||||
|
||||
l_V[k] = new_V[k] = this->m_nets[k]->m_cur_Analog;
|
||||
new_V[k] = this->m_nets[k]->m_cur_Analog;
|
||||
|
||||
for (unsigned i = 0; i < term_count; i++)
|
||||
{
|
||||
@ -170,7 +169,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton
|
||||
int mr = iN;
|
||||
if (iN > 3 )
|
||||
mr = (int) sqrt(iN) * 2;
|
||||
int iter = std::max(1, this->m_params.m_gs_loops);
|
||||
int iter = nl_math::max(1, this->m_params.m_gs_loops);
|
||||
int gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy);
|
||||
int failed = mr * iter;
|
||||
|
||||
@ -185,21 +184,15 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton
|
||||
|
||||
if (newton_raphson)
|
||||
{
|
||||
nl_double err = 0;
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
err = std::max(nl_math::abs(l_V[k] - new_V[k]), err);
|
||||
nl_double err = this->delta(new_V);
|
||||
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
this->m_nets[k]->m_cur_Analog += 1.0 * (new_V[k] - this->m_nets[k]->m_cur_Analog);
|
||||
if (err > accuracy)
|
||||
return 2;
|
||||
else
|
||||
return 1;
|
||||
this->store(new_V);
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
this->m_nets[k]->m_cur_Analog = new_V[k];
|
||||
this->store(new_V);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -266,12 +259,12 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (nl_double * RESTRIC
|
||||
mat.mult_vec(m_A, t, Ax);
|
||||
mat.solveLUx(m_LU, Ax);
|
||||
|
||||
const nl_double rho_to_accuracy = std::sqrt(vecmult2(n, Ax)) / accuracy;
|
||||
const nl_double rho_to_accuracy = nl_math::sqrt(vecmult2(n, Ax)) / accuracy;
|
||||
|
||||
rho_delta = accuracy * rho_to_accuracy;
|
||||
}
|
||||
else
|
||||
rho_delta = accuracy * std::sqrt((double) n) * m_accuracy_mult;
|
||||
rho_delta = accuracy * nl_math::sqrt((double) n) * m_accuracy_mult;
|
||||
|
||||
for (unsigned itr = 0; itr < restart_max; itr++)
|
||||
{
|
||||
@ -291,7 +284,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (nl_double * RESTRIC
|
||||
mat.solveLUx(m_LU, residual);
|
||||
}
|
||||
|
||||
rho = std::sqrt(vecmult2(n, residual));
|
||||
rho = nl_math::sqrt(vecmult2(n, residual));
|
||||
|
||||
vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]);
|
||||
|
||||
@ -315,7 +308,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (nl_double * RESTRIC
|
||||
m_ht[j][k] = vecmult(n, m_v[k1], m_v[j]);
|
||||
vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]);
|
||||
}
|
||||
m_ht[k1][k] = std::sqrt(vecmult2(n, m_v[k1]));
|
||||
m_ht[k1][k] = nl_math::sqrt(vecmult2(n, m_v[k1]));
|
||||
|
||||
if (m_ht[k1][k] != 0.0)
|
||||
vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]);
|
||||
@ -323,7 +316,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (nl_double * RESTRIC
|
||||
for (unsigned j = 0; j < k; j++)
|
||||
givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]);
|
||||
|
||||
mu = std::hypot(m_ht[k][k], m_ht[k1][k]);
|
||||
mu = nl_math::hypot(m_ht[k][k], m_ht[k1][k]);
|
||||
|
||||
m_c[k] = m_ht[k][k] / mu;
|
||||
m_s[k] = -m_ht[k1][k] / mu;
|
||||
@ -332,7 +325,7 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (nl_double * RESTRIC
|
||||
|
||||
givens_mult(m_c[k], m_s[k], m_g[k], m_g[k1]);
|
||||
|
||||
rho = std::abs(m_g[k1]);
|
||||
rho = nl_math::abs(m_g[k1]);
|
||||
|
||||
itr_used = itr_used + 1;
|
||||
|
||||
@ -347,8 +340,6 @@ int matrix_solver_GMRES_t<m_N, _storage_N>::solve_ilu_gmres (nl_double * RESTRIC
|
||||
/* didn't converge within accuracy */
|
||||
last_k = mr - 1;
|
||||
|
||||
nl_double m_y[_storage_N + 1];
|
||||
|
||||
/* Solve the system H * y = g */
|
||||
/* x += m_v[j] * m_y[j] */
|
||||
for (int i = last_k; i >= 0; i--)
|
||||
|
@ -146,7 +146,7 @@ int matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_r
|
||||
|
||||
const nl_double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
|
||||
|
||||
err = std::max(nl_math::abs(new_val - new_V[k]), err);
|
||||
err = nl_math::max(nl_math::abs(new_val - new_V[k]), err);
|
||||
new_V[k] = new_val;
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,7 @@ nl_double matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve()
|
||||
|
||||
// FIXME: used to be 1e90, but this would not be compatible with float
|
||||
if (sqo > NL_FCONST(1e-20))
|
||||
m_lp_fact = std::min(nl_math::sqrt(sq/sqo), (nl_double) 2.0);
|
||||
m_lp_fact = nl_math::min(nl_math::sqrt(sq/sqo), (nl_double) 2.0);
|
||||
else
|
||||
m_lp_fact = NL_FCONST(0.0);
|
||||
}
|
||||
@ -190,7 +190,7 @@ int matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newt
|
||||
Idrive = Idrive + this->A(k,p[i]) * new_v[p[i]];
|
||||
|
||||
const nl_double delta = m_omega * (this->RHS(k) - Idrive) / this->A(k,k);
|
||||
cerr = std::max(cerr, nl_math::abs(delta));
|
||||
cerr = nl_math::max(cerr, nl_math::abs(delta));
|
||||
new_v[k] += delta;
|
||||
}
|
||||
|
||||
|
@ -125,9 +125,6 @@ private:
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
matrix_solver_w_t<m_N, _storage_N>::~matrix_solver_w_t()
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
pfree_array(m_A);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <unsigned m_N, unsigned _storage_N>
|
||||
@ -276,17 +273,22 @@ int matrix_solver_w_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const bool
|
||||
/* construct w = transform(V) * y
|
||||
* dim: rowcount x iN
|
||||
* */
|
||||
nl_double w[_storage_N] = {0};
|
||||
nl_double w[_storage_N];
|
||||
for (unsigned i = 0; i < rowcount; i++)
|
||||
{
|
||||
const unsigned r = rows[i];
|
||||
double tmp = 0.0;
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
w[i] += VT(rows[i],k) * new_V[k];
|
||||
tmp += VT(r,k) * new_V[k];
|
||||
w[i] = tmp;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < rowcount; i++)
|
||||
for (unsigned k=0; k< rowcount; k++)
|
||||
H[i][k] = 0.0;
|
||||
|
||||
for (unsigned i = 0; i < rowcount; i++)
|
||||
H[i][i] += 1.0;
|
||||
H[i][i] = 1.0;
|
||||
/* Construct H = (I + VT*Z) */
|
||||
for (unsigned i = 0; i < rowcount; i++)
|
||||
for (unsigned k=0; k< colcount[i]; k++)
|
||||
@ -348,16 +350,17 @@ int matrix_solver_w_t<m_N, _storage_N>::solve_non_dynamic(ATTR_UNUSED const bool
|
||||
}
|
||||
m_cnt++;
|
||||
|
||||
for (unsigned i=0; i<iN; i++)
|
||||
{
|
||||
nl_double tmp = 0.0;
|
||||
for (unsigned j=0; j<iN; j++)
|
||||
if (0)
|
||||
for (unsigned i=0; i<iN; i++)
|
||||
{
|
||||
tmp += A(i,j) * new_V[j];
|
||||
nl_double tmp = 0.0;
|
||||
for (unsigned j=0; j<iN; j++)
|
||||
{
|
||||
tmp += A(i,j) * new_V[j];
|
||||
}
|
||||
if (nl_math::abs(tmp-RHS(i)) > 1e-6)
|
||||
printf("%s failed on row %d: %f RHS: %f\n", this->name().cstr(), i, nl_math::abs(tmp-RHS(i)), RHS(i));
|
||||
}
|
||||
if (std::fabs(tmp-RHS(i)) > 1e-6)
|
||||
printf("%s failed on row %d: %f RHS: %f\n", this->name().cstr(), i, std::fabs(tmp-RHS(i)), RHS(i));
|
||||
}
|
||||
if (newton_raphson)
|
||||
{
|
||||
nl_double err = delta(new_V);
|
||||
@ -392,9 +395,6 @@ matrix_solver_w_t<m_N, _storage_N>::matrix_solver_w_t(const solver_parameters_t
|
||||
,m_cnt(0)
|
||||
, m_dim(size)
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
m_A = palloc_array(nl_ext_double, N() * m_pitch);
|
||||
#endif
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_last_RHS[k] = 0.0;
|
||||
@ -407,9 +407,6 @@ matrix_solver_w_t<m_N, _storage_N>::matrix_solver_w_t(const eSolverType type, co
|
||||
,m_cnt(0)
|
||||
, m_dim(size)
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
m_A = palloc_array(nl_ext_double, N() * m_pitch);
|
||||
#endif
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_last_RHS[k] = 0.0;
|
||||
|
@ -43,7 +43,7 @@
|
||||
|
||||
#if 1
|
||||
#include "nld_ms_direct.h"
|
||||
//#include "nld_ms_gcr.h"
|
||||
#include "nld_ms_gcr.h"
|
||||
#else
|
||||
#include "nld_ms_direct_lu.h"
|
||||
#endif
|
||||
@ -223,7 +223,7 @@ ATTR_COLD void matrix_solver_t::setup_matrix()
|
||||
pfree(m_rails_temp[k]); // no longer needed
|
||||
|
||||
m_rails_temp.clear();
|
||||
#if 0
|
||||
#if 1
|
||||
|
||||
/* Sort in descending order by number of connected matrix voltages.
|
||||
* The idea is, that for Gauss-Seidel algo the first voltage computed
|
||||
@ -245,15 +245,15 @@ ATTR_COLD void matrix_solver_t::setup_matrix()
|
||||
*
|
||||
*/
|
||||
|
||||
int sort_order = (type() == GAUSS_SEIDEL ? 1 : -1) * -1;
|
||||
int sort_order = -1;//(type() == GAUSS_SEIDEL ? 1 : -1);
|
||||
|
||||
for (unsigned k = 0; k < iN / 2; k++)
|
||||
for (unsigned i = 0; i < iN - 1; i++)
|
||||
for (unsigned k = 0; k < iN - 1; k++)
|
||||
for (unsigned i = k+1; i < iN; i++)
|
||||
{
|
||||
if ((m_terms[i]->m_railstart - m_terms[i+1]->m_railstart) * sort_order < 0)
|
||||
if (((int) m_terms[k]->m_railstart - (int) m_terms[i]->m_railstart) * sort_order < 0)
|
||||
{
|
||||
std::swap(m_terms[i], m_terms[i+1]);
|
||||
std::swap(m_nets[i], m_nets[i+1]);
|
||||
std::swap(m_terms[i], m_terms[k]);
|
||||
std::swap(m_nets[i], m_nets[k]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -720,10 +720,14 @@ matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const bool use_sp
|
||||
typedef matrix_solver_SOR_mat_t<m_N,_storage_N> solver_sor_mat;
|
||||
return palloc(solver_sor_mat(&m_params, size));
|
||||
}
|
||||
else if (pstring("MAT_CR").equals(m_iterative_solver))
|
||||
{
|
||||
typedef matrix_solver_GCR_t<m_N,_storage_N> solver_mat;
|
||||
return palloc(solver_mat(&m_params, size));
|
||||
}
|
||||
else if (pstring("MAT").equals(m_iterative_solver))
|
||||
{
|
||||
typedef matrix_solver_direct_t<m_N,_storage_N> solver_mat;
|
||||
//typedef matrix_solver_GCR_t<m_N,_storage_N> solver_mat;
|
||||
return palloc(solver_mat(&m_params, size));
|
||||
}
|
||||
else if (pstring("SM").equals(m_iterative_solver))
|
||||
|
@ -36,14 +36,14 @@ private:
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
inline void vec_set (const std::size_t n, const T &scalar, T * RESTRICT result)
|
||||
inline void vec_set (const std::size_t & n, const T &scalar, T * RESTRICT result)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] = scalar;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T vecmult (const std::size_t n, const T * RESTRICT a1, const T * RESTRICT a2 )
|
||||
inline T vecmult (const std::size_t & n, const T * RESTRICT a1, const T * RESTRICT a2 )
|
||||
{
|
||||
T value = 0.0;
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
@ -52,7 +52,7 @@ inline T vecmult (const std::size_t n, const T * RESTRICT a1, const T * RESTRICT
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline T vecmult2 (const std::size_t n, const T *a1)
|
||||
inline T vecmult2 (const std::size_t & n, const T *a1)
|
||||
{
|
||||
T value = 0.0;
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
@ -64,7 +64,7 @@ inline T vecmult2 (const std::size_t n, const T *a1)
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void vec_mult_scalar (const std::size_t n, const T * RESTRICT v, const T scalar, T * RESTRICT result)
|
||||
inline void vec_mult_scalar (const std::size_t & n, const T * RESTRICT v, const T & scalar, T * RESTRICT result)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
{
|
||||
@ -73,37 +73,37 @@ inline void vec_mult_scalar (const std::size_t n, const T * RESTRICT v, const T
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void vec_add_mult_scalar (const std::size_t n, const T * RESTRICT v, const T scalar, T * RESTRICT result)
|
||||
inline void vec_add_mult_scalar (const std::size_t & n, const T * RESTRICT v, const T scalar, T * RESTRICT result)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] += scalar * v[i];
|
||||
}
|
||||
|
||||
inline void vec_add_ip(const std::size_t n, const double * RESTRICT v, double * RESTRICT result)
|
||||
inline void vec_add_ip(const std::size_t & n, const double * RESTRICT v, double * RESTRICT result)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] += v[i];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void vec_sub(const std::size_t n, const T * RESTRICT v1, const T * RESTRICT v2, T * RESTRICT result)
|
||||
inline void vec_sub(const std::size_t & n, const T * RESTRICT v1, const T * RESTRICT v2, T * RESTRICT result)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] = v1[i] - v2[i];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void vec_scale (const std::size_t n, T * RESTRICT v, const T scalar)
|
||||
inline void vec_scale (const std::size_t & n, T * RESTRICT v, const T scalar)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
v[i] = scalar * v[i];
|
||||
}
|
||||
|
||||
inline double vec_maxabs(const std::size_t n, const double * RESTRICT v)
|
||||
inline double vec_maxabs(const std::size_t & n, const double * RESTRICT v)
|
||||
{
|
||||
double ret = 0.0;
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
ret = std::max(ret, std::abs(v[i]));
|
||||
ret = nl_math::max(ret, nl_math::abs(v[i]));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -419,8 +419,8 @@ NETLIST_START(kidniki_interface)
|
||||
PARAM(Solver.NR_LOOPS, 300)
|
||||
PARAM(Solver.GS_LOOPS, 1)
|
||||
PARAM(Solver.GS_THRESHOLD, 6)
|
||||
PARAM(Solver.ITERATIVE, "SOR")
|
||||
//PARAM(Solver.ITERATIVE, "MAT")
|
||||
//PARAM(Solver.ITERATIVE, "SOR")
|
||||
PARAM(Solver.ITERATIVE, "MAT_CR")
|
||||
//PARAM(Solver.ITERATIVE, "GMRES")
|
||||
PARAM(Solver.PARALLEL, 0)
|
||||
PARAM(Solver.SOR_FACTOR, 1.00)
|
||||
|
Loading…
Reference in New Issue
Block a user