Fix a hidden bug in the GMRES solver and more optimization. (nw)

This commit is contained in:
couriersud 2017-01-27 23:29:04 +01:00
parent 563b60a8ab
commit 4eee6b09a9
14 changed files with 273 additions and 234 deletions

View File

@ -124,8 +124,6 @@ NETLIB_UPDATE_TERMINALS(QBJT_switch)
m_RB.set(gb, v, 0.0);
m_RC.set(gc, 0.0, 0.0);
//m_RB.update_dev();
//m_RC.update_dev();
m_state_on = new_state;
}
}

View File

@ -13,13 +13,16 @@
#include <algorithm>
#include "plib/pconfig.h"
template<int storage_N>
//template<std::size_t storage_N, typename C = std::size_t>
template<std::size_t storage_N, typename C = uint16_t>
struct mat_cr_t
{
unsigned nz_num;
unsigned ia[storage_N + 1];
unsigned ja[storage_N * storage_N];
unsigned diag[storage_N]; /* n */
typedef C type;
C nz_num;
C ia[storage_N + 1];
C ja[storage_N * storage_N];
C diag[storage_N]; /* n */
template<typename T>
void mult_vec(const T * RESTRICT A, const T * RESTRICT x, T * RESTRICT res)
@ -28,14 +31,14 @@ struct mat_cr_t
* res = A * x
*/
unsigned i = 0;
unsigned k = 0;
const unsigned oe = nz_num;
std::size_t i = 0;
std::size_t k = 0;
const std::size_t oe = nz_num;
while (k < oe)
{
T tmp = 0.0;
const unsigned e = ia[i+1];
const std::size_t e = ia[i+1];
for (; k < e; k++)
tmp += A[k] * x[ja[k]];
res[i++] = tmp;
@ -52,28 +55,28 @@ struct mat_cr_t
*
*/
const unsigned lnz = nz_num;
const std::size_t lnz = nz_num;
for (unsigned k = 0; k < lnz; k++)
for (std::size_t k = 0; k < lnz; k++)
LU[k] = A[k];
for (unsigned i = 1; ia[i] < lnz; i++) // row i
for (std::size_t i = 1; ia[i] < lnz; i++) // row i
{
const unsigned iai1 = ia[i + 1];
const unsigned pke = diag[i];
for (unsigned pk = ia[i]; pk < pke; pk++) // all columns left of diag in row i
const std::size_t iai1 = ia[i + 1];
const std::size_t pke = diag[i];
for (std::size_t pk = ia[i]; pk < pke; pk++) // all columns left of diag in row i
{
// pk == (i, k)
const unsigned k = ja[pk];
const unsigned iak1 = ia[k + 1];
const std::size_t k = ja[pk];
const std::size_t iak1 = ia[k + 1];
const T LUpk = LU[pk] = LU[pk] / LU[diag[k]];
unsigned pt = ia[k];
std::size_t pt = ia[k];
for (unsigned pj = pk + 1; pj < iai1; pj++) // pj = (i, j)
for (std::size_t pj = pk + 1; pj < iai1; pj++) // pj = (i, j)
{
// we can assume that within a row ja increases continuously */
const unsigned ej = ja[pj];
const std::size_t ej = ja[pj];
while (ja[pt] < ej && pt < iak1)
pt++;
if (pt < iak1 && ja[pt] == ej)
@ -108,15 +111,15 @@ struct mat_cr_t
*
*/
unsigned i;
std::size_t i;
for (i = 1; ia[i] < nz_num; i++ )
{
T tmp = 0.0;
const unsigned j1 = ia[i];
const unsigned j2 = diag[i];
const std::size_t j1 = ia[i];
const std::size_t j2 = diag[i];
for (unsigned j = j1; j < j2; j++ )
for (std::size_t j = j1; j < j2; j++ )
tmp += LU[j] * r[ja[j]];
r[i] -= tmp;
@ -124,11 +127,11 @@ struct mat_cr_t
// i now is equal to n;
for (; 0 < i; i-- )
{
const unsigned im1 = i - 1;
const std::size_t im1 = i - 1;
T tmp = 0.0;
const unsigned j1 = diag[im1] + 1;
const unsigned j2 = ia[im1+1];
for (unsigned j = j1; j < j2; j++ )
const std::size_t j1 = diag[im1] + 1;
const std::size_t j2 = ia[im1+1];
for (std::size_t j = j1; j < j2; j++ )
tmp += LU[j] * r[ja[j]];
r[im1] = (r[im1] - tmp) / LU[diag[im1]];
}

View File

@ -376,6 +376,7 @@ void matrix_solver_t::reset()
void matrix_solver_t::update() NL_NOEXCEPT
{
const netlist_time new_timestep = solve();
update_inputs();
if (m_params.m_dynamic_ts && has_timestep_devices() && new_timestep > netlist_time::zero())
{
@ -387,6 +388,7 @@ void matrix_solver_t::update() NL_NOEXCEPT
void matrix_solver_t::update_forced()
{
ATTR_UNUSED const netlist_time new_timestep = solve();
update_inputs();
if (m_params.m_dynamic_ts && has_timestep_devices())
{
@ -447,7 +449,6 @@ const netlist_time matrix_solver_t::solve()
step(delta);
solve_base();
const netlist_time next_time_step = compute_next_timestep(delta.as_double());
update_inputs();
return next_time_step;
}

View File

@ -113,7 +113,11 @@ public:
void solve_base();
/* after every call to solve, update inputs must be called.
* this can be done as well as a batch to ease parallel processing.
*/
const netlist_time solve();
void update_inputs();
inline bool has_dynamic_devices() const { return m_dynamic_devices.size() > 0; }
inline bool has_timestep_devices() const { return m_step_devices.size() > 0; }
@ -188,8 +192,6 @@ private:
void step(const netlist_time &delta);
void update_inputs();
const eSortType m_sort;
};
@ -201,9 +203,9 @@ T matrix_solver_t::delta(const T * RESTRICT V)
* and thus belong into a different calculation. This applies to all solvers.
*/
std::size_t iN = this->m_terms.size();
const std::size_t iN = this->m_terms.size();
T cerr = 0;
for (unsigned i = 0; i < iN; i++)
for (std::size_t i = 0; i < iN; i++)
cerr = std::max(cerr, std::abs(V[i] - static_cast<T>(this->m_nets[i]->m_cur_Analog)));
return cerr;
}
@ -211,7 +213,8 @@ T matrix_solver_t::delta(const T * RESTRICT V)
template <typename T>
void matrix_solver_t::store(const T * RESTRICT V)
{
for (std::size_t i = 0, iN=m_terms.size(); i < iN; i++)
const std::size_t iN = this->m_terms.size();
for (std::size_t i = 0; i < iN; i++)
this->m_nets[i]->m_cur_Analog = V[i];
}
@ -222,12 +225,12 @@ void matrix_solver_t::build_LE_A()
T &child = static_cast<T &>(*this);
const unsigned iN = child.N();
for (unsigned k = 0; k < iN; k++)
const std::size_t iN = child.N();
for (std::size_t k = 0; k < iN; k++)
{
terms_for_net_t *terms = m_terms[k].get();
for (unsigned i=0; i < iN; i++)
for (std::size_t i=0; i < iN; i++)
child.A(k,i) = 0.0;
const std::size_t terms_count = terms->count();
@ -236,7 +239,7 @@ void matrix_solver_t::build_LE_A()
{
nl_double akk = 0.0;
for (unsigned i = 0; i < terms_count; i++)
for (std::size_t i = 0; i < terms_count; i++)
akk += gt[i];
child.A(k,k) = akk;
@ -256,8 +259,8 @@ void matrix_solver_t::build_LE_RHS()
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
T &child = static_cast<T &>(*this);
const unsigned iN = child.N();
for (unsigned k = 0; k < iN; k++)
const std::size_t iN = child.N();
for (std::size_t k = 0; k < iN; k++)
{
nl_double rhsk_a = 0.0;
nl_double rhsk_b = 0.0;

View File

@ -113,7 +113,7 @@ static void thr_dispose()
}
#endif
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
#if TEST_PARALLEL
class matrix_solver_direct_t: public matrix_solver_t, public thr_intf
#else
@ -123,8 +123,8 @@ class matrix_solver_direct_t: public matrix_solver_t
friend class matrix_solver_t;
public:
matrix_solver_direct_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const unsigned size);
matrix_solver_direct_t(netlist_t &anetlist, const pstring &name, const eSortType sort, const solver_parameters_t *params, const unsigned size);
matrix_solver_direct_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size);
matrix_solver_direct_t(netlist_t &anetlist, const pstring &name, const eSortType sort, const solver_parameters_t *params, const std::size_t size);
virtual ~matrix_solver_direct_t();
@ -135,7 +135,7 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson);
inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; }
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
void LE_solve();
@ -173,7 +173,7 @@ private:
#endif
//nl_ext_double m_RHSx[storage_N];
const unsigned m_dim;
const std::size_t m_dim;
};
@ -181,7 +181,7 @@ private:
// matrix_solver_direct
// ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t()
{
#if (NL_USE_DYNAMIC_ALLOCATION)
@ -192,23 +192,23 @@ matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t()
#endif
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_t::setup_base(nets);
/* add RHS element */
for (unsigned k = 0; k < N(); k++)
for (std::size_t k = 0; k < N(); k++)
{
terms_for_net_t * t = m_terms[k].get();
if (!plib::container::contains(t->m_nzrd, N()))
t->m_nzrd.push_back(N());
if (!plib::container::contains(t->m_nzrd, static_cast<unsigned>(N())))
t->m_nzrd.push_back(static_cast<unsigned>(N()));
}
netlist().save(*this, m_last_RHS, "m_last_RHS");
for (unsigned k = 0; k < N(); k++)
for (std::size_t k = 0; k < N(); k++)
netlist().save(*this, RHS(k), plib::pfmt("RHS.{1}")(k));
}
@ -239,18 +239,18 @@ void matrix_solver_direct_t<m_N, storage_N>::do_work(const int id, void *param)
}
#endif
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
{
const unsigned kN = N();
const std::size_t kN = N();
for (unsigned i = 0; i < kN; i++) {
for (std::size_t i = 0; i < kN; i++) {
// FIXME: use a parameter to enable pivoting? m_pivot
if (!TEST_PARALLEL && m_params.m_pivot)
{
/* Find the row with the largest first value */
unsigned maxrow = i;
for (unsigned j = i + 1; j < kN; j++)
std::size_t maxrow = i;
for (std::size_t j = i + 1; j < kN; j++)
{
//if (std::abs(m_A[j][i]) > std::abs(m_A[maxrow][i]))
if (A(j,i) * A(j,i) > A(maxrow,i) * A(maxrow,i))
@ -260,7 +260,7 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
if (maxrow != i)
{
/* Swap the maxrow and ith row */
for (unsigned k = 0; k < kN + 1; k++) {
for (std::size_t k = 0; k < kN + 1; k++) {
std::swap(A(i,k), A(maxrow,k));
}
//std::swap(RHS(i), RHS(maxrow));
@ -270,7 +270,7 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
/* Eliminate column i from row j */
for (unsigned j = i + 1; j < kN; j++)
for (std::size_t j = i + 1; j < kN; j++)
{
const nl_double f1 = - A(j,i) * f;
if (f1 != NL_FCONST(0.0))
@ -335,17 +335,17 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
}
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
template <typename T>
void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
T * RESTRICT x)
{
const unsigned kN = N();
const std::size_t kN = N();
/* back substitution */
if (m_params.m_pivot)
{
for (unsigned j = kN; j-- > 0; )
for (std::size_t j = kN; j-- > 0; )
{
T tmp = 0;
for (std::size_t k = j+1; k < kN; k++)
@ -355,14 +355,14 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
}
else
{
for (unsigned j = kN; j-- > 0; )
for (std::size_t j = kN; j-- > 0; )
{
T tmp = 0;
const auto *p = m_terms[j]->m_nzrd.data();
const auto e = m_terms[j]->m_nzrd.size() - 1; /* exclude RHS element */
for (unsigned k = 0; k < e; k++)
for (std::size_t k = 0; k < e; k++)
{
const auto pk = p[k];
tmp += A(j,pk) * x[pk];
@ -373,7 +373,7 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
{
nl_double new_V[storage_N]; // = { 0.0 };
@ -396,22 +396,22 @@ unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool ne
}
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
inline unsigned matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
build_LE_A<matrix_solver_direct_t>();
build_LE_RHS<matrix_solver_direct_t>();
for (unsigned i=0, iN=N(); i < iN; i++)
for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i);
this->m_stat_calculations++;
return this->solve_non_dynamic(newton_raphson);
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetlist, const pstring &name,
const solver_parameters_t *params, const unsigned size)
const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, ASCENDING, params)
, m_dim(size)
{
@ -427,9 +427,9 @@ matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetli
#endif
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetlist, const pstring &name,
const eSortType sort, const solver_parameters_t *params, const unsigned size)
const eSortType sort, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, sort, params)
, m_dim(size)
{

View File

@ -30,13 +30,13 @@ namespace netlist
{
namespace devices
{
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_GCR_t: public matrix_solver_t
{
public:
matrix_solver_GCR_t(netlist_t &anetlist, const pstring &name,
const solver_parameters_t *params, const unsigned size)
const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, matrix_solver_t::ASCENDING, params)
, m_dim(size)
, m_proc(nullptr)
@ -47,7 +47,7 @@ public:
{
}
inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; }
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
virtual void vsetup(analog_net_t::list_t &nets) override;
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
@ -56,13 +56,15 @@ public:
private:
typedef typename mat_cr_t<storage_N>::type mattype;
void csc_private(plib::putf8_fmt_writer &strm);
using extsolver = void (*)(double * RESTRICT m_A, double * RESTRICT RHS);
pstring static_compile_name();
unsigned m_dim;
const std::size_t m_dim;
std::vector<unsigned> m_term_cr[storage_N];
mat_cr_t<storage_N> mat;
nl_double m_A[storage_N * storage_N];
@ -75,18 +77,18 @@ private:
// matrix_solver - GMRES
// ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{
setup_base(nets);
unsigned nz = 0;
const unsigned iN = this->N();
mattype nz = 0;
const std::size_t iN = this->N();
/* build the final matrix */
bool touched[storage_N][storage_N] = { { false } };
for (unsigned k = 0; k < iN; k++)
for (std::size_t k = 0; k < iN; k++)
{
for (auto &j : this->m_terms[k]->m_nz)
touched[k][j] = true;
@ -96,16 +98,16 @@ void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
unsigned ops = 0;
for (unsigned k = 0; k < iN; k++)
for (std::size_t k = 0; k < iN; k++)
{
ops++; // 1/A(k,k)
for (unsigned row = k + 1; row < iN; row++)
for (std::size_t row = k + 1; row < iN; row++)
{
if (touched[row][k])
{
ops++;
fc++;
for (unsigned col = k + 1; col < iN; col++)
for (std::size_t col = k + 1; col < iN; col++)
if (touched[k][col])
{
touched[row][col] = true;
@ -116,11 +118,11 @@ void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
}
for (unsigned k=0; k<iN; k++)
for (mattype k=0; k<iN; k++)
{
mat.ia[k] = nz;
for (unsigned j=0; j<iN; j++)
for (mattype j=0; j<iN; j++)
{
if (touched[k][j])
{
@ -133,10 +135,10 @@ void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
m_term_cr[k].clear();
/* build pointers into the compressed row format matrix for each terminal */
for (unsigned j=0; j< this->m_terms[k]->m_railstart;j++)
for (std::size_t j=0; j< this->m_terms[k]->m_railstart;j++)
{
int other = this->m_terms[k]->connected_net_idx()[j];
for (unsigned i = mat.ia[k]; i < nz; i++)
for (auto i = mat.ia[k]; i < nz; i++)
if (other == static_cast<int>(mat.ja[i]))
{
m_term_cr[k].push_back(i);
@ -166,27 +168,27 @@ void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &strm)
{
const unsigned iN = N();
for (unsigned i = 0; i < iN - 1; i++)
const std::size_t iN = N();
for (std::size_t i = 0; i < iN - 1; i++)
{
const auto &nzbd = this->m_terms[i]->m_nzbd;
if (nzbd.size() > 0)
{
unsigned pi = mat.diag[i];
std::size_t pi = mat.diag[i];
//const nl_double f = 1.0 / m_A[pi++];
strm("const double f{1} = 1.0 / m_A[{2}];\n", i, pi);
pi++;
const unsigned piie = mat.ia[i+1];
const std::size_t piie = mat.ia[i+1];
for (auto & j : nzbd)
{
// proceed to column i
unsigned pj = mat.ia[j];
std::size_t pj = mat.ia[j];
while (mat.ja[pj] < i)
pj++;
@ -196,7 +198,7 @@ void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &st
pj++;
// subtract row i from j */
for (unsigned pii = pi; pii<piie; )
for (std::size_t pii = pi; pii<piie; )
{
while (mat.ja[pj] < mat.ja[pii])
pj++;
@ -212,7 +214,7 @@ void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &st
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name()
{
plib::postringstream t;
@ -223,7 +225,7 @@ pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name()
return plib::pfmt("nl_gcr_{1:x}_{2}")(h( t.str() ))(mat.nz_num);
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_code()
{
plib::postringstream t;
@ -238,18 +240,18 @@ std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_c
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
const unsigned iN = this->N();
const std::size_t iN = this->N();
nl_double RHS[storage_N];
nl_double new_V[storage_N];
for (unsigned i=0, e=mat.nz_num; i<e; i++)
for (std::size_t i=0, e=mat.nz_num; i<e; i++)
m_A[i] = 0.0;
for (unsigned k = 0; k < iN; k++)
for (std::size_t k = 0; k < iN; k++)
{
terms_for_net_t *t = this->m_terms[k].get();
nl_double gtot_t = 0.0;
@ -295,7 +297,7 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
// add diagonal element
m_A[mat.diag[k]] = gtot_t;
for (unsigned i = 0; i < railstart; i++)
for (std::size_t i = 0; i < railstart; i++)
m_A[tcr[i]] -= go[i];
}
#else
@ -327,21 +329,21 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
}
else
{
for (unsigned i = 0; i < iN - 1; i++)
for (std::size_t i = 0; i < iN - 1; i++)
{
const auto &nzbd = this->m_terms[i]->m_nzbd;
if (nzbd.size() > 0)
{
unsigned pi = mat.diag[i];
std::size_t pi = mat.diag[i];
const nl_double f = 1.0 / m_A[pi++];
const unsigned piie = mat.ia[i+1];
const std::size_t piie = mat.ia[i+1];
for (auto & j : nzbd)
{
// proceed to column i
//__builtin_prefetch(&m_A[mat.diag[j+1]], 1);
unsigned pj = mat.ia[j];
std::size_t pj = mat.ia[j];
while (mat.ja[pj] < i)
pj++;
@ -349,7 +351,7 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
const nl_double f1 = - m_A[pj++] * f;
// subtract row i from j */
for (unsigned pii = pi; pii<piie; )
for (std::size_t pii = pi; pii<piie; )
{
while (mat.ja[pj] < mat.ja[pii])
pj++;
@ -368,7 +370,7 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
/* row n-1 */
new_V[iN - 1] = RHS[iN - 1] / m_A[mat.diag[iN - 1]];
for (unsigned j = iN - 1; j-- > 0;)
for (std::size_t j = iN - 1; j-- > 0;)
{
//__builtin_prefetch(&new_V[j-1], 1);
//if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0);
@ -390,8 +392,8 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
new_V[j] = (RHS[j] - tmpx) / m_A[mat.diag[j]];
#else
double tmp = 0;
const unsigned e = mat.ia[j+1];
for (unsigned pk = mat.diag[j] + 1; pk < e; pk++)
const std::size_t e = mat.ia[j+1];
for (std::size_t pk = mat.diag[j] + 1; pk < e; pk++)
{
tmp += m_A[pk] * new_V[mat.ja[pk]];
}

View File

@ -23,12 +23,12 @@ namespace netlist
{
namespace devices
{
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_GMRES_t: public matrix_solver_direct_t<m_N, storage_N>
{
public:
matrix_solver_GMRES_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const unsigned size)
matrix_solver_GMRES_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
, m_use_iLU_preconditioning(true)
, m_use_more_precise_stop_condition(false)
@ -45,7 +45,9 @@ public:
private:
unsigned solve_ilu_gmres(nl_double * RESTRICT x, const nl_double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, nl_double accuracy);
typedef typename mat_cr_t<storage_N>::type mattype;
unsigned solve_ilu_gmres(nl_double (& RESTRICT x)[storage_N], const nl_double * RESTRICT rhs, const unsigned restart_max, const std::size_t mr, nl_double accuracy);
std::vector<unsigned> m_term_cr[storage_N];
@ -71,22 +73,22 @@ private:
// matrix_solver - GMRES
// ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GMRES_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
unsigned nz = 0;
const unsigned iN = this->N();
mattype nz = 0;
const std::size_t iN = this->N();
for (unsigned k=0; k<iN; k++)
for (std::size_t k=0; k<iN; k++)
{
terms_for_net_t * RESTRICT row = this->m_terms[k].get();
mat.ia[k] = nz;
for (unsigned j=0; j<row->m_nz.size(); j++)
for (std::size_t j=0; j<row->m_nz.size(); j++)
{
mat.ja[nz] = row->m_nz[j];
mat.ja[nz] = static_cast<mattype>(row->m_nz[j]);
if (row->m_nz[j] == k)
mat.diag[k] = nz;
nz++;
@ -110,10 +112,10 @@ void matrix_solver_GMRES_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
mat.nz_num = nz;
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
const unsigned iN = this->N();
const std::size_t iN = this->N();
/* ideally, we could get an estimate for the spectral radius of
* Inv(D - L) * U
@ -130,7 +132,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
for (unsigned i=0, e=mat.nz_num; i<e; i++)
m_A[i] = 0.0;
for (unsigned k = 0; k < iN; k++)
for (std::size_t k = 0; k < iN; k++)
{
nl_double gtot_t = 0.0;
nl_double RHS_t = 0.0;
@ -168,12 +170,12 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
const nl_double accuracy = this->m_params.m_accuracy;
unsigned mr = iN;
std::size_t mr = iN;
if (iN > 3 )
mr = static_cast<unsigned>(std::sqrt(iN) * 2.0);
unsigned iter = std::max(1u, this->m_params.m_gs_loops);
unsigned gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy);
unsigned failed = mr * iter;
unsigned gsl = solve_ilu_gmres(new_V, RHS, iter, static_cast<unsigned>(mr), accuracy);
unsigned failed = static_cast<unsigned>(mr) * iter;
this->m_iterative_total += gsl;
this->m_stat_calculations++;
@ -200,7 +202,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
}
template <typename T>
inline void givens_mult( const T & c, const T & s, T & g0, T & g1 )
inline static void givens_mult( const T c, const T s, T & g0, T & g1 )
{
const T tg0 = c * g0 - s * g1;
const T tg1 = s * g0 + c * g1;
@ -209,8 +211,8 @@ inline void givens_mult( const T & c, const T & s, T & g0, T & g1 )
g1 = tg1;
}
template <unsigned m_N, unsigned storage_N>
unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RESTRICT x, const nl_double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, nl_double accuracy)
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RESTRICT x)[storage_N], const nl_double * RESTRICT rhs, const unsigned restart_max, const std::size_t mr, nl_double accuracy)
{
/*-------------------------------------------------------------------------
* The code below was inspired by code published by John Burkardt under
@ -237,7 +239,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RES
unsigned itr_used = 0;
double rho_delta = 0.0;
const unsigned n = this->N();
const std::size_t n = this->N();
if (m_use_iLU_preconditioning)
mat.incomplete_LU_factorization(m_A, m_LU);
@ -270,8 +272,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RES
for (unsigned itr = 0; itr < restart_max; itr++)
{
unsigned last_k = mr;
nl_double mu;
std::size_t last_k = mr;
nl_double rho;
nl_double Ax[storage_N];
@ -288,24 +289,27 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RES
rho = std::sqrt(vecmult2(n, residual));
vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]);
if (rho < rho_delta)
return itr_used + 1;
vec_set(mr+1, NL_FCONST(0.0), m_g);
m_g[0] = rho;
for (unsigned i = 0; i < mr; i++)
for (std::size_t i = 0; i < mr; i++)
vec_set(mr + 1, NL_FCONST(0.0), m_ht[i]);
for (unsigned k = 0; k < mr; k++)
vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]);
for (std::size_t k = 0; k < mr; k++)
{
const unsigned k1 = k + 1;
const std::size_t k1 = k + 1;
mat.mult_vec(m_A, m_v[k], m_v[k1]);
if (m_use_iLU_preconditioning)
mat.solveLUx(m_LU, m_v[k1]);
for (unsigned j = 0; j <= k; j++)
for (std::size_t j = 0; j <= k; j++)
{
m_ht[j][k] = vecmult(n, m_v[k1], m_v[j]);
vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]);
@ -315,13 +319,13 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RES
if (m_ht[k1][k] != 0.0)
vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]);
for (unsigned j = 0; j < k; j++)
for (std::size_t j = 0; j < k; j++)
givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]);
mu = std::hypot(m_ht[k][k], m_ht[k1][k]);
const nl_double mu = 1.0 / std::hypot(m_ht[k][k], m_ht[k1][k]);
m_c[k] = m_ht[k][k] / mu;
m_s[k] = -m_ht[k1][k] / mu;
m_c[k] = m_ht[k][k] * mu;
m_s[k] = -m_ht[k1][k] * mu;
m_ht[k][k] = m_c[k] * m_ht[k][k] - m_s[k] * m_ht[k1][k];
m_ht[k1][k] = 0.0;
@ -344,17 +348,17 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RES
/* Solve the system H * y = g */
/* x += m_v[j] * m_y[j] */
for (unsigned i = last_k + 1; i-- > 0;)
for (std::size_t i = last_k + 1; i-- > 0;)
{
double tmp = m_g[i];
for (unsigned j = i + 1; j <= last_k; j++)
for (std::size_t j = i + 1; j <= last_k; j++)
{
tmp -= m_ht[i][j] * m_y[j];
}
m_y[i] = tmp / m_ht[i][i];
}
for (unsigned i = 0; i <= last_k; i++)
for (std::size_t i = 0; i <= last_k; i++)
vec_add_mult_scalar(n, m_v[i], m_y[i], x);
#if 1

View File

@ -47,7 +47,7 @@ namespace netlist
//#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_sm_t: public matrix_solver_t
{
friend class matrix_solver_t;
@ -55,7 +55,7 @@ class matrix_solver_sm_t: public matrix_solver_t
public:
matrix_solver_sm_t(netlist_t &anetlist, const pstring &name,
const solver_parameters_t *params, const unsigned size);
const solver_parameters_t *params, const std::size_t size);
virtual ~matrix_solver_sm_t();
@ -66,7 +66,7 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson);
inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; }
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
void LE_invert();
@ -103,7 +103,7 @@ private:
//nl_ext_double m_RHSx[storage_N];
const unsigned m_dim;
const std::size_t m_dim;
};
@ -111,7 +111,7 @@ private:
// matrix_solver_direct
// ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t()
{
#if (NL_USE_DYNAMIC_ALLOCATION)
@ -119,7 +119,7 @@ matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t()
#endif
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_t::setup_base(nets);
@ -132,14 +132,14 @@ void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
{
const unsigned kN = N();
const std::size_t kN = N();
for (unsigned i = 0; i < kN; i++)
for (std::size_t i = 0; i < kN; i++)
{
for (unsigned j = 0; j < kN; j++)
for (std::size_t j = 0; j < kN; j++)
{
W(i,j) = lA(i,j) = A(i,j);
Ainv(i,j) = 0.0;
@ -147,7 +147,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
Ainv(i,i) = 1.0;
}
/* down */
for (unsigned i = 0; i < kN; i++)
for (std::size_t i = 0; i < kN; i++)
{
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i);
@ -166,28 +166,28 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
{
for (std::size_t k = 0; k < e; k++)
W(j,p[k]) += W(i,p[k]) * f1;
for (unsigned k = 0; k <= i; k ++)
for (std::size_t k = 0; k <= i; k ++)
Ainv(j,k) += Ainv(i,k) * f1;
}
}
}
/* up */
for (unsigned i = kN; i-- > 0; )
for (std::size_t i = kN; i-- > 0; )
{
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i);
for (unsigned j = i; j-- > 0; )
for (std::size_t j = i; j-- > 0; )
{
const nl_double f1 = - W(j,i) * f;
if (f1 != 0.0)
{
for (unsigned k = i; k < kN; k++)
for (std::size_t k = i; k < kN; k++)
W(j,k) += W(i,k) * f1;
for (unsigned k = 0; k < kN; k++)
for (std::size_t k = 0; k < kN; k++)
Ainv(j,k) += Ainv(i,k) * f1;
}
}
for (unsigned k = 0; k < kN; k++)
for (std::size_t k = 0; k < kN; k++)
{
Ainv(i,k) *= f;
lAinv(i,k) = Ainv(i,k);
@ -195,27 +195,27 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
}
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
template <typename T>
void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x(
T * RESTRICT x)
{
const unsigned kN = N();
const std::size_t kN = N();
for (unsigned i=0; i<kN; i++)
for (std::size_t i=0; i<kN; i++)
x[i] = 0.0;
for (unsigned k=0; k<kN; k++)
for (std::size_t k=0; k<kN; k++)
{
const nl_double f = RHS(k);
for (unsigned i=0; i<kN; i++)
for (std::size_t i=0; i<kN; i++)
x[i] += Ainv(i,k) * f;
}
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
{
static const bool incremental = true;
@ -305,29 +305,29 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
}
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
inline unsigned matrix_solver_sm_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
build_LE_A<matrix_solver_sm_t>();
build_LE_RHS<matrix_solver_sm_t>();
for (unsigned i=0, iN=N(); i < iN; i++)
for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i);
this->m_stat_calculations++;
return this->solve_non_dynamic(newton_raphson);
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_sm_t<m_N, storage_N>::matrix_solver_sm_t(netlist_t &anetlist, const pstring &name,
const solver_parameters_t *params, const unsigned size)
const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, NOSORT, params)
, m_dim(size)
{
#if (NL_USE_DYNAMIC_ALLOCATION)
m_A = palloc_array(nl_ext_double, N() * m_pitch);
#endif
for (unsigned k = 0; k < N(); k++)
for (std::size_t k = 0; k < N(); k++)
{
m_last_RHS[k] = 0.0;
}

View File

@ -21,12 +21,12 @@ namespace netlist
{
namespace devices
{
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_SOR_t: public matrix_solver_direct_t<m_N, storage_N>
{
public:
matrix_solver_SOR_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const unsigned size)
matrix_solver_SOR_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
, m_lp_fact(*this, "m_lp_fact", 0)
{
@ -46,16 +46,16 @@ private:
// ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_SOR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
const unsigned iN = this->N();
const std::size_t iN = this->N();
bool resched = false;
unsigned resched_cnt = 0;

View File

@ -22,14 +22,14 @@ namespace netlist
{
namespace devices
{
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<m_N, storage_N>
{
friend class matrix_solver_t;
public:
matrix_solver_SOR_mat_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const unsigned size)
matrix_solver_SOR_mat_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::DESCENDING, params, size)
, m_Vdelta(*this, "m_Vdelta", 0.0)
, m_omega(*this, "m_omega", params->m_gs_sor)
@ -58,7 +58,7 @@ private:
// matrix_solver - Gauss - Seidel
// ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_SOR_mat_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
@ -113,7 +113,7 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
}
#endif
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
/* The matrix based code looks a lot nicer but actually is 30% slower than
@ -123,7 +123,7 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
nl_double new_v[storage_N] = { 0.0 };
const unsigned iN = this->N();
const std::size_t iN = this->N();
matrix_solver_t::build_LE_A<matrix_solver_SOR_mat_t>();
matrix_solver_t::build_LE_RHS<matrix_solver_SOR_mat_t>();
@ -169,14 +169,14 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
}
#endif
for (unsigned k = 0; k < iN; k++)
for (std::size_t k = 0; k < iN; k++)
new_v[k] = this->m_nets[k]->m_cur_Analog;
do {
resched = false;
nl_double cerr = 0.0;
for (unsigned k = 0; k < iN; k++)
for (std::size_t k = 0; k < iN; k++)
{
nl_double Idrive = 0;

View File

@ -54,13 +54,13 @@ namespace netlist
//#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_w_t: public matrix_solver_t
{
friend class matrix_solver_t;
public:
matrix_solver_w_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const unsigned size);
matrix_solver_w_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size);
virtual ~matrix_solver_w_t();
@ -71,7 +71,7 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson);
inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; }
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
void LE_invert();
@ -115,7 +115,7 @@ private:
//nl_ext_double m_RHSx[storage_N];
const unsigned m_dim;
const std::size_t m_dim;
};
@ -123,12 +123,12 @@ private:
// matrix_solver_direct
// ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_w_t<m_N, storage_N>::~matrix_solver_w_t()
{
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_t::setup_base(nets);
@ -141,14 +141,14 @@ void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_w_t<m_N, storage_N>::LE_invert()
{
const unsigned kN = N();
const std::size_t kN = N();
for (unsigned i = 0; i < kN; i++)
for (std::size_t i = 0; i < kN; i++)
{
for (unsigned j = 0; j < kN; j++)
for (std::size_t j = 0; j < kN; j++)
{
W(i,j) = lA(i,j) = A(i,j);
Ainv(i,j) = 0.0;
@ -156,7 +156,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
Ainv(i,i) = 1.0;
}
/* down */
for (unsigned i = 0; i < kN; i++)
for (std::size_t i = 0; i < kN; i++)
{
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i);
@ -167,63 +167,63 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
const auto * RESTRICT const pb = m_terms[i]->m_nzbd.data();
const size_t eb = m_terms[i]->m_nzbd.size();
for (unsigned jb = 0; jb < eb; jb++)
for (std::size_t jb = 0; jb < eb; jb++)
{
const auto j = pb[jb];
const nl_double f1 = - W(j,i) * f;
if (f1 != 0.0)
{
for (unsigned k = 0; k < e; k++)
for (std::size_t k = 0; k < e; k++)
W(j,p[k]) += W(i,p[k]) * f1;
for (unsigned k = 0; k <= i; k ++)
for (std::size_t k = 0; k <= i; k ++)
Ainv(j,k) += Ainv(i,k) * f1;
}
}
}
/* up */
for (unsigned i = kN; i-- > 0; )
for (std::size_t i = kN; i-- > 0; )
{
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i);
for (unsigned j = i; j-- > 0; )
for (std::size_t j = i; j-- > 0; )
{
const nl_double f1 = - W(j,i) * f;
if (f1 != 0.0)
{
for (unsigned k = i; k < kN; k++)
for (std::size_t k = i; k < kN; k++)
W(j,k) += W(i,k) * f1;
for (unsigned k = 0; k < kN; k++)
for (std::size_t k = 0; k < kN; k++)
Ainv(j,k) += Ainv(i,k) * f1;
}
}
for (unsigned k = 0; k < kN; k++)
for (std::size_t k = 0; k < kN; k++)
{
Ainv(i,k) *= f;
}
}
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
template <typename T>
void matrix_solver_w_t<m_N, storage_N>::LE_compute_x(
T * RESTRICT x)
{
const unsigned kN = N();
const std::size_t kN = N();
for (unsigned i=0; i<kN; i++)
for (std::size_t i=0; i<kN; i++)
x[i] = 0.0;
for (unsigned k=0; k<kN; k++)
for (std::size_t k=0; k<kN; k++)
{
const nl_double f = RHS(k);
for (unsigned i=0; i<kN; i++)
for (std::size_t i=0; i<kN; i++)
x[i] += Ainv(i,k) * f;
}
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
{
const auto iN = N();
@ -369,27 +369,27 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
}
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
inline unsigned matrix_solver_w_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{
build_LE_A<matrix_solver_w_t>();
build_LE_RHS<matrix_solver_w_t>();
for (unsigned i=0, iN=N(); i < iN; i++)
for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i);
this->m_stat_calculations++;
return this->solve_non_dynamic(newton_raphson);
}
template <unsigned m_N, unsigned storage_N>
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_w_t<m_N, storage_N>::matrix_solver_w_t(netlist_t &anetlist, const pstring &name,
const solver_parameters_t *params, const unsigned size)
const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, NOSORT, params)
,m_cnt(0)
, m_dim(size)
{
for (unsigned k = 0; k < N(); k++)
for (std::size_t k = 0; k < N(); k++)
{
m_last_RHS[k] = 0.0;
}

View File

@ -108,6 +108,10 @@ NETLIB_UPDATE(solver)
ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve();
}
}
for (auto & solver : m_mat_solvers)
if (solver->has_timestep_devices() || force_solve)
solver->update_inputs();
}
else
for (int i = 0; i < t_cnt; i++)
@ -115,12 +119,16 @@ NETLIB_UPDATE(solver)
{
// Ignore return value
ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve();
solver->update_inputs();
}
#else
for (auto & solver : m_mat_solvers)
if (solver->has_timestep_devices() || force_solve)
{
// Ignore return value
ATTR_UNUSED const netlist_time ts = solver->solve();
solver->update_inputs();
}
#endif
/* step circuit */
@ -132,14 +140,14 @@ NETLIB_UPDATE(solver)
}
template <class C>
std::unique_ptr<matrix_solver_t> create_it(netlist_t &nl, pstring name, solver_parameters_t &params, unsigned size)
std::unique_ptr<matrix_solver_t> create_it(netlist_t &nl, pstring name, solver_parameters_t &params, std::size_t size)
{
typedef C solver;
return plib::make_unique<solver>(nl, name, &params, size);
}
template <int m_N, int storage_N>
std::unique_ptr<matrix_solver_t> NETLIB_NAME(solver)::create_solver(unsigned size, const bool use_specific)
template <std::size_t m_N, std::size_t storage_N>
std::unique_ptr<matrix_solver_t> NETLIB_NAME(solver)::create_solver(std::size_t size, const bool use_specific)
{
pstring solvername = plib::pfmt("Solver_{1}")(m_mat_solvers.size());
if (use_specific && m_N == 1)
@ -294,11 +302,11 @@ void NETLIB_NAME(solver)::post_start()
for (auto & grp : splitter.groups)
{
std::unique_ptr<matrix_solver_t> ms;
unsigned net_count = static_cast<unsigned>(grp.size());
std::size_t net_count = grp.size();
switch (net_count)
{
#if 0
#if 1
case 1:
ms = create_solver<1,1>(1, false);
break;
@ -341,6 +349,9 @@ void NETLIB_NAME(solver)::post_start()
case 31:
ms = create_solver<31,31>(31, use_specific);
break;
case 35:
ms = create_solver<31,31>(31, use_specific);
break;
case 49:
ms = create_solver<49,49>(49, use_specific);
break;
@ -352,7 +363,11 @@ void NETLIB_NAME(solver)::post_start()
#endif
default:
log().warning(MW_1_NO_SPECIFIC_SOLVER, net_count);
if (net_count <= 16)
if (net_count <= 8)
{
ms = create_solver<0, 8>(net_count, use_specific);
}
else if (net_count <= 16)
{
ms = create_solver<0,16>(net_count, use_specific);
}

View File

@ -112,8 +112,8 @@ private:
solver_parameters_t m_params;
template <int m_N, int storage_N>
std::unique_ptr<matrix_solver_t> create_solver(unsigned size, bool use_specific);
template <std::size_t m_N, std::size_t storage_N>
std::unique_ptr<matrix_solver_t> create_solver(std::size_t size, bool use_specific);
};
} //namespace devices

View File

@ -36,14 +36,14 @@ private:
#endif
template<typename T>
inline void vec_set (const std::size_t & n, const T &scalar, T * RESTRICT result)
inline static void vec_set (const std::size_t n, const T scalar, T * RESTRICT result)
{
for ( std::size_t i = 0; i < n; i++ )
result[i] = scalar;
}
template<typename T>
inline T vecmult (const std::size_t & n, const T * RESTRICT a1, const T * RESTRICT a2 )
inline static T vecmult (const std::size_t n, const T * RESTRICT a1, const T * RESTRICT a2 )
{
T value = 0.0;
for ( std::size_t i = 0; i < n; i++ )
@ -52,7 +52,7 @@ inline T vecmult (const std::size_t & n, const T * RESTRICT a1, const T * RESTRI
}
template<typename T>
inline T vecmult2 (const std::size_t & n, const T *a1)
inline static T vecmult2 (const std::size_t n, const T *a1)
{
T value = 0.0;
for ( std::size_t i = 0; i < n; i++ )
@ -63,43 +63,56 @@ inline T vecmult2 (const std::size_t & n, const T *a1)
return value;
}
template<typename T>
inline void vec_mult_scalar (const std::size_t & n, const T * RESTRICT v, const T & scalar, T * RESTRICT result)
template<typename T, std::size_t N>
inline static void vec_mult_scalar (const std::size_t n, const T (& RESTRICT v)[N], const T & scalar, T (& RESTRICT result)[N])
{
if (n != N)
for ( std::size_t i = 0; i < n; i++ )
{
result[i] = scalar * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
result[i] = scalar * v[i];
}
template<typename T, std::size_t N>
inline static void vec_add_mult_scalar (const std::size_t n, const T (& RESTRICT v)[N], const T scalar, T (& RESTRICT result)[N])
{
if (n != N)
for ( std::size_t i = 0; i < n; i++ )
result[i] = result[i] + scalar * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
result[i] = result[i] + scalar * v[i];
}
template<typename T>
inline void vec_add_mult_scalar (const std::size_t & n, const T * RESTRICT v, const T scalar, T * RESTRICT result)
inline static void vec_add_mult_scalar (const std::size_t & n, const T * RESTRICT v, const T scalar, T * RESTRICT result)
{
for ( std::size_t i = 0; i < n; i++ )
result[i] += scalar * v[i];
}
inline void vec_add_ip(const std::size_t & n, const double * RESTRICT v, double * RESTRICT result)
inline static void vec_add_ip(const std::size_t n, const double * RESTRICT v, double * RESTRICT result)
{
for ( std::size_t i = 0; i < n; i++ )
result[i] += v[i];
}
template<typename T>
inline void vec_sub(const std::size_t & n, const T * RESTRICT v1, const T * RESTRICT v2, T * RESTRICT result)
inline void vec_sub(const std::size_t n, const T * RESTRICT v1, const T * RESTRICT v2, T * RESTRICT result)
{
for ( std::size_t i = 0; i < n; i++ )
result[i] = v1[i] - v2[i];
}
template<typename T>
inline void vec_scale (const std::size_t & n, T * RESTRICT v, const T scalar)
inline void vec_scale (const std::size_t n, T * RESTRICT v, const T scalar)
{
for ( std::size_t i = 0; i < n; i++ )
v[i] = scalar * v[i];
}
inline double vec_maxabs(const std::size_t & n, const double * RESTRICT v)
inline double vec_maxabs(const std::size_t n, const double * RESTRICT v)
{
double ret = 0.0;
for ( std::size_t i = 0; i < n; i++ )