mirror of
https://github.com/holub/mame
synced 2025-10-05 00:38:58 +03:00

Object model optimisation. Merge remote-tracking branch 'origin/master' into netlist_dev Fix a merge issue. #if ==> #elif. Ouch. Default PHAS_PMF_INTERNAL=0 for 32bit windows mingw. Change UINT8 to uint_[fast|least|8_t. Move state_var so it can be used by base devices as well. Remove last traces of ATTR_ALIGN. Refactored netlist_time into a template. Removed implicit double assignment to netlist. Doomed to produce bugs. Instead, use netlist_time::from_double. Switch to using proper (i.e. bool type) param_logic_t. Formally differentiate between logic inputs (e.g. switches) and int inputs (e.g. resistor ladders or selection switches). Added parameter USE_DEACTIVATE to truthtable devices. Added more constexpr to netlist_time. Fixed some ifdef code paths. - More c++. - Simplify main processing loop. As a nice side-effect that squeezed out some cycles. - More cycle squeezing. - Removed pvector_t. - Use std::sort. - Refactored netlist state manager. - Introduction of state_var object template to be used on device state members. - Changed remaining save occurrences to state_var. - Rewrote nltool's listdevices command. This allowed removal of one member from devices which served solely for listdevices. - Remove hashmap_t. Fix kidniki regression.
391 lines
9.1 KiB
C++
391 lines
9.1 KiB
C++
// license:GPL-2.0+
|
|
// copyright-holders:Couriersud
|
|
/*
|
|
* nld_ms_sor.h
|
|
*
|
|
* Generic successive over relaxation solver.
|
|
*
|
|
* Fow w==1 we will do the classic Gauss-Seidel approach
|
|
*
|
|
*/
|
|
|
|
#ifndef NLD_MS_GMRES_H_
|
|
#define NLD_MS_GMRES_H_
|
|
|
|
#include <algorithm>
|
|
|
|
#include "solver/mat_cr.h"
|
|
#include "solver/nld_ms_direct.h"
|
|
#include "solver/nld_solver.h"
|
|
#include "solver/vector_base.h"
|
|
|
|
namespace netlist
|
|
{
|
|
namespace devices
|
|
{
|
|
|
|
template <unsigned m_N, unsigned storage_N>
|
|
class matrix_solver_GMRES_t: public matrix_solver_direct_t<m_N, storage_N>
|
|
{
|
|
public:
|
|
|
|
matrix_solver_GMRES_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, int size)
|
|
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
|
|
, m_use_iLU_preconditioning(true)
|
|
, m_use_more_precise_stop_condition(false)
|
|
, m_accuracy_mult(1.0)
|
|
{
|
|
}
|
|
|
|
virtual ~matrix_solver_GMRES_t()
|
|
{
|
|
}
|
|
|
|
virtual void vsetup(analog_net_t::list_t &nets) override;
|
|
virtual int vsolve_non_dynamic(const bool newton_raphson) override;
|
|
|
|
private:
|
|
|
|
int solve_ilu_gmres(nl_double * RESTRICT x, const nl_double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, nl_double accuracy);
|
|
|
|
std::vector<int> m_term_cr[storage_N];
|
|
|
|
bool m_use_iLU_preconditioning;
|
|
bool m_use_more_precise_stop_condition;
|
|
nl_double m_accuracy_mult; // FXIME: Save state
|
|
|
|
mat_cr_t<storage_N> mat;
|
|
|
|
nl_double m_A[storage_N * storage_N];
|
|
nl_double m_LU[storage_N * storage_N];
|
|
|
|
nl_double m_c[storage_N + 1]; /* mr + 1 */
|
|
nl_double m_g[storage_N + 1]; /* mr + 1 */
|
|
nl_double m_ht[storage_N + 1][storage_N]; /* (mr + 1), mr */
|
|
nl_double m_s[storage_N + 1]; /* mr + 1 */
|
|
nl_double m_v[storage_N + 1][storage_N]; /*(mr + 1), n */
|
|
nl_double m_y[storage_N + 1]; /* mr + 1 */
|
|
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// matrix_solver - GMRES
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <unsigned m_N, unsigned storage_N>
|
|
void matrix_solver_GMRES_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
|
{
|
|
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
|
|
|
|
unsigned nz = 0;
|
|
const unsigned iN = this->N();
|
|
|
|
for (unsigned k=0; k<iN; k++)
|
|
{
|
|
terms_t * RESTRICT row = this->m_terms[k];
|
|
mat.ia[k] = nz;
|
|
|
|
for (unsigned j=0; j<row->m_nz.size(); j++)
|
|
{
|
|
mat.ja[nz] = row->m_nz[j];
|
|
if (row->m_nz[j] == k)
|
|
mat.diag[k] = nz;
|
|
nz++;
|
|
}
|
|
|
|
/* build pointers into the compressed row format matrix for each terminal */
|
|
|
|
for (unsigned j=0; j< this->m_terms[k]->m_railstart;j++)
|
|
{
|
|
for (unsigned i = mat.ia[k]; i<nz; i++)
|
|
if (this->m_terms[k]->net_other()[j] == (int) mat.ja[i])
|
|
{
|
|
m_term_cr[k].push_back(i);
|
|
break;
|
|
}
|
|
nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart);
|
|
}
|
|
}
|
|
|
|
mat.ia[iN] = nz;
|
|
mat.nz_num = nz;
|
|
}
|
|
|
|
template <unsigned m_N, unsigned storage_N>
|
|
int matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
|
{
|
|
const unsigned iN = this->N();
|
|
|
|
/* ideally, we could get an estimate for the spectral radius of
|
|
* Inv(D - L) * U
|
|
*
|
|
* and estimate using
|
|
*
|
|
* omega = 2.0 / (1.0 + std::sqrt(1-rho))
|
|
*/
|
|
|
|
//nz_num = 0;
|
|
nl_double RHS[storage_N];
|
|
nl_double new_V[storage_N];
|
|
|
|
for (unsigned i=0, e=mat.nz_num; i<e; i++)
|
|
m_A[i] = 0.0;
|
|
|
|
for (unsigned k = 0; k < iN; k++)
|
|
{
|
|
nl_double gtot_t = 0.0;
|
|
nl_double RHS_t = 0.0;
|
|
|
|
const unsigned term_count = this->m_terms[k]->count();
|
|
const unsigned railstart = this->m_terms[k]->m_railstart;
|
|
const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
|
|
const nl_double * const RESTRICT go = this->m_terms[k]->go();
|
|
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
|
|
const nl_double * const * RESTRICT other_cur_analog = this->m_terms[k]->other_curanalog();
|
|
|
|
new_V[k] = this->m_nets[k]->m_cur_Analog;
|
|
|
|
for (unsigned i = 0; i < term_count; i++)
|
|
{
|
|
gtot_t = gtot_t + gt[i];
|
|
RHS_t = RHS_t + Idr[i];
|
|
}
|
|
|
|
for (unsigned i = railstart; i < term_count; i++)
|
|
RHS_t = RHS_t + go[i] * *other_cur_analog[i];
|
|
|
|
RHS[k] = RHS_t;
|
|
|
|
// add diagonal element
|
|
m_A[mat.diag[k]] = gtot_t;
|
|
|
|
for (unsigned i = 0; i < railstart; i++)
|
|
{
|
|
const unsigned pi = m_term_cr[k][i];
|
|
m_A[pi] -= go[i];
|
|
}
|
|
}
|
|
mat.ia[iN] = mat.nz_num;
|
|
|
|
const nl_double accuracy = this->m_params.m_accuracy;
|
|
|
|
int mr = iN;
|
|
if (iN > 3 )
|
|
mr = (int) sqrt(iN) * 2;
|
|
int iter = std::max(1, this->m_params.m_gs_loops);
|
|
int gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy);
|
|
int failed = mr * iter;
|
|
|
|
this->m_iterative_total += gsl;
|
|
this->m_stat_calculations++;
|
|
|
|
if (gsl>=failed)
|
|
{
|
|
this->m_iterative_fail++;
|
|
return matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(newton_raphson);
|
|
}
|
|
|
|
if (newton_raphson)
|
|
{
|
|
nl_double err = this->delta(new_V);
|
|
|
|
this->store(new_V);
|
|
|
|
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
|
}
|
|
else
|
|
{
|
|
this->store(new_V);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
inline void givens_mult( const T & c, const T & s, T & g0, T & g1 )
|
|
{
|
|
const T tg0 = c * g0 - s * g1;
|
|
const T tg1 = s * g0 + c * g1;
|
|
|
|
g0 = tg0;
|
|
g1 = tg1;
|
|
}
|
|
|
|
template <unsigned m_N, unsigned storage_N>
|
|
int matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RESTRICT x, const nl_double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, nl_double accuracy)
|
|
{
|
|
/*-------------------------------------------------------------------------
|
|
* The code below was inspired by code published by John Burkardt under
|
|
* the LPGL here:
|
|
*
|
|
* http://people.sc.fsu.edu/~jburkardt/cpp_src/mgmres/mgmres.html
|
|
*
|
|
* The code below was completely written from scratch based on the pseudo code
|
|
* found here:
|
|
*
|
|
* http://de.wikipedia.org/wiki/GMRES-Verfahren
|
|
*
|
|
* The Algorithm itself is described in
|
|
*
|
|
* Yousef Saad,
|
|
* Iterative Methods for Sparse Linear Systems,
|
|
* Second Edition,
|
|
* SIAM, 20003,
|
|
* ISBN: 0898715342,
|
|
* LC: QA188.S17.
|
|
*
|
|
*------------------------------------------------------------------------*/
|
|
|
|
unsigned itr_used = 0;
|
|
double rho_delta = 0.0;
|
|
|
|
const unsigned n = this->N();
|
|
|
|
if (m_use_iLU_preconditioning)
|
|
mat.incomplete_LU_factorization(m_A, m_LU);
|
|
|
|
if (m_use_more_precise_stop_condition)
|
|
{
|
|
/* derive residual for a given delta x
|
|
*
|
|
* LU y = A dx
|
|
*
|
|
* ==> rho / accuracy = sqrt(y * y)
|
|
*
|
|
* This approach will approximate the iterative stop condition
|
|
* based |xnew - xold| pretty precisely. But it is slow, or expressed
|
|
* differently: The invest doesn't pay off.
|
|
* Therefore we use the approach in the else part.
|
|
*/
|
|
nl_double t[storage_N];
|
|
nl_double Ax[storage_N];
|
|
vec_set(n, accuracy, t);
|
|
mat.mult_vec(m_A, t, Ax);
|
|
mat.solveLUx(m_LU, Ax);
|
|
|
|
const nl_double rho_to_accuracy = std::sqrt(vecmult2(n, Ax)) / accuracy;
|
|
|
|
rho_delta = accuracy * rho_to_accuracy;
|
|
}
|
|
else
|
|
rho_delta = accuracy * std::sqrt((double) n) * m_accuracy_mult;
|
|
|
|
for (unsigned itr = 0; itr < restart_max; itr++)
|
|
{
|
|
unsigned last_k = mr;
|
|
nl_double mu;
|
|
nl_double rho;
|
|
|
|
nl_double Ax[storage_N];
|
|
nl_double residual[storage_N];
|
|
|
|
mat.mult_vec(m_A, x, Ax);
|
|
|
|
vec_sub(n, rhs, Ax, residual);
|
|
|
|
if (m_use_iLU_preconditioning)
|
|
{
|
|
mat.solveLUx(m_LU, residual);
|
|
}
|
|
|
|
rho = std::sqrt(vecmult2(n, residual));
|
|
|
|
vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]);
|
|
|
|
vec_set(mr+1, NL_FCONST(0.0), m_g);
|
|
m_g[0] = rho;
|
|
|
|
for (unsigned i = 0; i < mr; i++)
|
|
vec_set(mr + 1, NL_FCONST(0.0), m_ht[i]);
|
|
|
|
for (unsigned k = 0; k < mr; k++)
|
|
{
|
|
const unsigned k1 = k + 1;
|
|
|
|
mat.mult_vec(m_A, m_v[k], m_v[k1]);
|
|
|
|
if (m_use_iLU_preconditioning)
|
|
mat.solveLUx(m_LU, m_v[k1]);
|
|
|
|
for (unsigned j = 0; j <= k; j++)
|
|
{
|
|
m_ht[j][k] = vecmult(n, m_v[k1], m_v[j]);
|
|
vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]);
|
|
}
|
|
m_ht[k1][k] = std::sqrt(vecmult2(n, m_v[k1]));
|
|
|
|
if (m_ht[k1][k] != 0.0)
|
|
vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]);
|
|
|
|
for (unsigned j = 0; j < k; j++)
|
|
givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]);
|
|
|
|
mu = std::hypot(m_ht[k][k], m_ht[k1][k]);
|
|
|
|
m_c[k] = m_ht[k][k] / mu;
|
|
m_s[k] = -m_ht[k1][k] / mu;
|
|
m_ht[k][k] = m_c[k] * m_ht[k][k] - m_s[k] * m_ht[k1][k];
|
|
m_ht[k1][k] = 0.0;
|
|
|
|
givens_mult(m_c[k], m_s[k], m_g[k], m_g[k1]);
|
|
|
|
rho = std::abs(m_g[k1]);
|
|
|
|
itr_used = itr_used + 1;
|
|
|
|
if (rho <= rho_delta)
|
|
{
|
|
last_k = k;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (last_k >= mr)
|
|
/* didn't converge within accuracy */
|
|
last_k = mr - 1;
|
|
|
|
/* Solve the system H * y = g */
|
|
/* x += m_v[j] * m_y[j] */
|
|
for (int i = last_k; i >= 0; i--)
|
|
{
|
|
double tmp = m_g[i];
|
|
for (unsigned j = i + 1; j <= last_k; j++)
|
|
{
|
|
tmp -= m_ht[i][j] * m_y[j];
|
|
}
|
|
m_y[i] = tmp / m_ht[i][i];
|
|
}
|
|
|
|
for (unsigned i = 0; i <= last_k; i++)
|
|
vec_add_mult_scalar(n, m_v[i], m_y[i], x);
|
|
|
|
#if 1
|
|
if (rho <= rho_delta)
|
|
{
|
|
break;
|
|
}
|
|
#else
|
|
/* we try to approximate the x difference between to steps using m_v[last_k] */
|
|
|
|
double xdelta = m_y[last_k] * vec_maxabs(n, m_v[last_k]);
|
|
if (xdelta < accuracy)
|
|
{
|
|
if (m_accuracy_mult < 16384.0)
|
|
m_accuracy_mult = m_accuracy_mult * 2.0;
|
|
break;
|
|
}
|
|
else
|
|
m_accuracy_mult = m_accuracy_mult / 2.0;
|
|
|
|
#endif
|
|
}
|
|
|
|
return itr_used;
|
|
}
|
|
|
|
|
|
|
|
} //namespace devices
|
|
} // namespace netlist
|
|
|
|
#endif /* NLD_MS_GMRES_H_ */
|