Fix a hidden bug in the GMRES solver and more optimization. (nw)

This commit is contained in:
couriersud 2017-01-27 23:29:04 +01:00
parent 563b60a8ab
commit 4eee6b09a9
14 changed files with 273 additions and 234 deletions

View File

@ -124,8 +124,6 @@ NETLIB_UPDATE_TERMINALS(QBJT_switch)
m_RB.set(gb, v, 0.0); m_RB.set(gb, v, 0.0);
m_RC.set(gc, 0.0, 0.0); m_RC.set(gc, 0.0, 0.0);
//m_RB.update_dev();
//m_RC.update_dev();
m_state_on = new_state; m_state_on = new_state;
} }
} }

View File

@ -13,13 +13,16 @@
#include <algorithm> #include <algorithm>
#include "plib/pconfig.h" #include "plib/pconfig.h"
template<int storage_N> //template<std::size_t storage_N, typename C = std::size_t>
template<std::size_t storage_N, typename C = uint16_t>
struct mat_cr_t struct mat_cr_t
{ {
unsigned nz_num; typedef C type;
unsigned ia[storage_N + 1];
unsigned ja[storage_N * storage_N]; C nz_num;
unsigned diag[storage_N]; /* n */ C ia[storage_N + 1];
C ja[storage_N * storage_N];
C diag[storage_N]; /* n */
template<typename T> template<typename T>
void mult_vec(const T * RESTRICT A, const T * RESTRICT x, T * RESTRICT res) void mult_vec(const T * RESTRICT A, const T * RESTRICT x, T * RESTRICT res)
@ -28,14 +31,14 @@ struct mat_cr_t
* res = A * x * res = A * x
*/ */
unsigned i = 0; std::size_t i = 0;
unsigned k = 0; std::size_t k = 0;
const unsigned oe = nz_num; const std::size_t oe = nz_num;
while (k < oe) while (k < oe)
{ {
T tmp = 0.0; T tmp = 0.0;
const unsigned e = ia[i+1]; const std::size_t e = ia[i+1];
for (; k < e; k++) for (; k < e; k++)
tmp += A[k] * x[ja[k]]; tmp += A[k] * x[ja[k]];
res[i++] = tmp; res[i++] = tmp;
@ -52,28 +55,28 @@ struct mat_cr_t
* *
*/ */
const unsigned lnz = nz_num; const std::size_t lnz = nz_num;
for (unsigned k = 0; k < lnz; k++) for (std::size_t k = 0; k < lnz; k++)
LU[k] = A[k]; LU[k] = A[k];
for (unsigned i = 1; ia[i] < lnz; i++) // row i for (std::size_t i = 1; ia[i] < lnz; i++) // row i
{ {
const unsigned iai1 = ia[i + 1]; const std::size_t iai1 = ia[i + 1];
const unsigned pke = diag[i]; const std::size_t pke = diag[i];
for (unsigned pk = ia[i]; pk < pke; pk++) // all columns left of diag in row i for (std::size_t pk = ia[i]; pk < pke; pk++) // all columns left of diag in row i
{ {
// pk == (i, k) // pk == (i, k)
const unsigned k = ja[pk]; const std::size_t k = ja[pk];
const unsigned iak1 = ia[k + 1]; const std::size_t iak1 = ia[k + 1];
const T LUpk = LU[pk] = LU[pk] / LU[diag[k]]; const T LUpk = LU[pk] = LU[pk] / LU[diag[k]];
unsigned pt = ia[k]; std::size_t pt = ia[k];
for (unsigned pj = pk + 1; pj < iai1; pj++) // pj = (i, j) for (std::size_t pj = pk + 1; pj < iai1; pj++) // pj = (i, j)
{ {
// we can assume that within a row ja increases continuously */ // we can assume that within a row ja increases continuously */
const unsigned ej = ja[pj]; const std::size_t ej = ja[pj];
while (ja[pt] < ej && pt < iak1) while (ja[pt] < ej && pt < iak1)
pt++; pt++;
if (pt < iak1 && ja[pt] == ej) if (pt < iak1 && ja[pt] == ej)
@ -108,15 +111,15 @@ struct mat_cr_t
* *
*/ */
unsigned i; std::size_t i;
for (i = 1; ia[i] < nz_num; i++ ) for (i = 1; ia[i] < nz_num; i++ )
{ {
T tmp = 0.0; T tmp = 0.0;
const unsigned j1 = ia[i]; const std::size_t j1 = ia[i];
const unsigned j2 = diag[i]; const std::size_t j2 = diag[i];
for (unsigned j = j1; j < j2; j++ ) for (std::size_t j = j1; j < j2; j++ )
tmp += LU[j] * r[ja[j]]; tmp += LU[j] * r[ja[j]];
r[i] -= tmp; r[i] -= tmp;
@ -124,11 +127,11 @@ struct mat_cr_t
// i now is equal to n; // i now is equal to n;
for (; 0 < i; i-- ) for (; 0 < i; i-- )
{ {
const unsigned im1 = i - 1; const std::size_t im1 = i - 1;
T tmp = 0.0; T tmp = 0.0;
const unsigned j1 = diag[im1] + 1; const std::size_t j1 = diag[im1] + 1;
const unsigned j2 = ia[im1+1]; const std::size_t j2 = ia[im1+1];
for (unsigned j = j1; j < j2; j++ ) for (std::size_t j = j1; j < j2; j++ )
tmp += LU[j] * r[ja[j]]; tmp += LU[j] * r[ja[j]];
r[im1] = (r[im1] - tmp) / LU[diag[im1]]; r[im1] = (r[im1] - tmp) / LU[diag[im1]];
} }

View File

@ -376,6 +376,7 @@ void matrix_solver_t::reset()
void matrix_solver_t::update() NL_NOEXCEPT void matrix_solver_t::update() NL_NOEXCEPT
{ {
const netlist_time new_timestep = solve(); const netlist_time new_timestep = solve();
update_inputs();
if (m_params.m_dynamic_ts && has_timestep_devices() && new_timestep > netlist_time::zero()) if (m_params.m_dynamic_ts && has_timestep_devices() && new_timestep > netlist_time::zero())
{ {
@ -387,6 +388,7 @@ void matrix_solver_t::update() NL_NOEXCEPT
void matrix_solver_t::update_forced() void matrix_solver_t::update_forced()
{ {
ATTR_UNUSED const netlist_time new_timestep = solve(); ATTR_UNUSED const netlist_time new_timestep = solve();
update_inputs();
if (m_params.m_dynamic_ts && has_timestep_devices()) if (m_params.m_dynamic_ts && has_timestep_devices())
{ {
@ -447,7 +449,6 @@ const netlist_time matrix_solver_t::solve()
step(delta); step(delta);
solve_base(); solve_base();
const netlist_time next_time_step = compute_next_timestep(delta.as_double()); const netlist_time next_time_step = compute_next_timestep(delta.as_double());
update_inputs();
return next_time_step; return next_time_step;
} }

View File

@ -113,7 +113,11 @@ public:
void solve_base(); void solve_base();
/* after every call to solve, update inputs must be called.
* this can be done as well as a batch to ease parallel processing.
*/
const netlist_time solve(); const netlist_time solve();
void update_inputs();
inline bool has_dynamic_devices() const { return m_dynamic_devices.size() > 0; } inline bool has_dynamic_devices() const { return m_dynamic_devices.size() > 0; }
inline bool has_timestep_devices() const { return m_step_devices.size() > 0; } inline bool has_timestep_devices() const { return m_step_devices.size() > 0; }
@ -188,8 +192,6 @@ private:
void step(const netlist_time &delta); void step(const netlist_time &delta);
void update_inputs();
const eSortType m_sort; const eSortType m_sort;
}; };
@ -201,9 +203,9 @@ T matrix_solver_t::delta(const T * RESTRICT V)
* and thus belong into a different calculation. This applies to all solvers. * and thus belong into a different calculation. This applies to all solvers.
*/ */
std::size_t iN = this->m_terms.size(); const std::size_t iN = this->m_terms.size();
T cerr = 0; T cerr = 0;
for (unsigned i = 0; i < iN; i++) for (std::size_t i = 0; i < iN; i++)
cerr = std::max(cerr, std::abs(V[i] - static_cast<T>(this->m_nets[i]->m_cur_Analog))); cerr = std::max(cerr, std::abs(V[i] - static_cast<T>(this->m_nets[i]->m_cur_Analog)));
return cerr; return cerr;
} }
@ -211,7 +213,8 @@ T matrix_solver_t::delta(const T * RESTRICT V)
template <typename T> template <typename T>
void matrix_solver_t::store(const T * RESTRICT V) void matrix_solver_t::store(const T * RESTRICT V)
{ {
for (std::size_t i = 0, iN=m_terms.size(); i < iN; i++) const std::size_t iN = this->m_terms.size();
for (std::size_t i = 0; i < iN; i++)
this->m_nets[i]->m_cur_Analog = V[i]; this->m_nets[i]->m_cur_Analog = V[i];
} }
@ -222,12 +225,12 @@ void matrix_solver_t::build_LE_A()
T &child = static_cast<T &>(*this); T &child = static_cast<T &>(*this);
const unsigned iN = child.N(); const std::size_t iN = child.N();
for (unsigned k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
terms_for_net_t *terms = m_terms[k].get(); terms_for_net_t *terms = m_terms[k].get();
for (unsigned i=0; i < iN; i++) for (std::size_t i=0; i < iN; i++)
child.A(k,i) = 0.0; child.A(k,i) = 0.0;
const std::size_t terms_count = terms->count(); const std::size_t terms_count = terms->count();
@ -236,7 +239,7 @@ void matrix_solver_t::build_LE_A()
{ {
nl_double akk = 0.0; nl_double akk = 0.0;
for (unsigned i = 0; i < terms_count; i++) for (std::size_t i = 0; i < terms_count; i++)
akk += gt[i]; akk += gt[i];
child.A(k,k) = akk; child.A(k,k) = akk;
@ -256,8 +259,8 @@ void matrix_solver_t::build_LE_RHS()
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t"); static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
T &child = static_cast<T &>(*this); T &child = static_cast<T &>(*this);
const unsigned iN = child.N(); const std::size_t iN = child.N();
for (unsigned k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
nl_double rhsk_a = 0.0; nl_double rhsk_a = 0.0;
nl_double rhsk_b = 0.0; nl_double rhsk_b = 0.0;

View File

@ -113,7 +113,7 @@ static void thr_dispose()
} }
#endif #endif
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
#if TEST_PARALLEL #if TEST_PARALLEL
class matrix_solver_direct_t: public matrix_solver_t, public thr_intf class matrix_solver_direct_t: public matrix_solver_t, public thr_intf
#else #else
@ -123,8 +123,8 @@ class matrix_solver_direct_t: public matrix_solver_t
friend class matrix_solver_t; friend class matrix_solver_t;
public: public:
matrix_solver_direct_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const unsigned size); matrix_solver_direct_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size);
matrix_solver_direct_t(netlist_t &anetlist, const pstring &name, const eSortType sort, const solver_parameters_t *params, const unsigned size); matrix_solver_direct_t(netlist_t &anetlist, const pstring &name, const eSortType sort, const solver_parameters_t *params, const std::size_t size);
virtual ~matrix_solver_direct_t(); virtual ~matrix_solver_direct_t();
@ -135,7 +135,7 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override; virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson); unsigned solve_non_dynamic(const bool newton_raphson);
inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; } constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
void LE_solve(); void LE_solve();
@ -173,7 +173,7 @@ private:
#endif #endif
//nl_ext_double m_RHSx[storage_N]; //nl_ext_double m_RHSx[storage_N];
const unsigned m_dim; const std::size_t m_dim;
}; };
@ -181,7 +181,7 @@ private:
// matrix_solver_direct // matrix_solver_direct
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t() matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t()
{ {
#if (NL_USE_DYNAMIC_ALLOCATION) #if (NL_USE_DYNAMIC_ALLOCATION)
@ -192,23 +192,23 @@ matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t()
#endif #endif
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_t::setup_base(nets); matrix_solver_t::setup_base(nets);
/* add RHS element */ /* add RHS element */
for (unsigned k = 0; k < N(); k++) for (std::size_t k = 0; k < N(); k++)
{ {
terms_for_net_t * t = m_terms[k].get(); terms_for_net_t * t = m_terms[k].get();
if (!plib::container::contains(t->m_nzrd, N())) if (!plib::container::contains(t->m_nzrd, static_cast<unsigned>(N())))
t->m_nzrd.push_back(N()); t->m_nzrd.push_back(static_cast<unsigned>(N()));
} }
netlist().save(*this, m_last_RHS, "m_last_RHS"); netlist().save(*this, m_last_RHS, "m_last_RHS");
for (unsigned k = 0; k < N(); k++) for (std::size_t k = 0; k < N(); k++)
netlist().save(*this, RHS(k), plib::pfmt("RHS.{1}")(k)); netlist().save(*this, RHS(k), plib::pfmt("RHS.{1}")(k));
} }
@ -239,18 +239,18 @@ void matrix_solver_direct_t<m_N, storage_N>::do_work(const int id, void *param)
} }
#endif #endif
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_direct_t<m_N, storage_N>::LE_solve() void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
{ {
const unsigned kN = N(); const std::size_t kN = N();
for (unsigned i = 0; i < kN; i++) { for (std::size_t i = 0; i < kN; i++) {
// FIXME: use a parameter to enable pivoting? m_pivot // FIXME: use a parameter to enable pivoting? m_pivot
if (!TEST_PARALLEL && m_params.m_pivot) if (!TEST_PARALLEL && m_params.m_pivot)
{ {
/* Find the row with the largest first value */ /* Find the row with the largest first value */
unsigned maxrow = i; std::size_t maxrow = i;
for (unsigned j = i + 1; j < kN; j++) for (std::size_t j = i + 1; j < kN; j++)
{ {
//if (std::abs(m_A[j][i]) > std::abs(m_A[maxrow][i])) //if (std::abs(m_A[j][i]) > std::abs(m_A[maxrow][i]))
if (A(j,i) * A(j,i) > A(maxrow,i) * A(maxrow,i)) if (A(j,i) * A(j,i) > A(maxrow,i) * A(maxrow,i))
@ -260,7 +260,7 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
if (maxrow != i) if (maxrow != i)
{ {
/* Swap the maxrow and ith row */ /* Swap the maxrow and ith row */
for (unsigned k = 0; k < kN + 1; k++) { for (std::size_t k = 0; k < kN + 1; k++) {
std::swap(A(i,k), A(maxrow,k)); std::swap(A(i,k), A(maxrow,k));
} }
//std::swap(RHS(i), RHS(maxrow)); //std::swap(RHS(i), RHS(maxrow));
@ -270,7 +270,7 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
/* Eliminate column i from row j */ /* Eliminate column i from row j */
for (unsigned j = i + 1; j < kN; j++) for (std::size_t j = i + 1; j < kN; j++)
{ {
const nl_double f1 = - A(j,i) * f; const nl_double f1 = - A(j,i) * f;
if (f1 != NL_FCONST(0.0)) if (f1 != NL_FCONST(0.0))
@ -335,17 +335,17 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
} }
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
template <typename T> template <typename T>
void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst( void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
T * RESTRICT x) T * RESTRICT x)
{ {
const unsigned kN = N(); const std::size_t kN = N();
/* back substitution */ /* back substitution */
if (m_params.m_pivot) if (m_params.m_pivot)
{ {
for (unsigned j = kN; j-- > 0; ) for (std::size_t j = kN; j-- > 0; )
{ {
T tmp = 0; T tmp = 0;
for (std::size_t k = j+1; k < kN; k++) for (std::size_t k = j+1; k < kN; k++)
@ -355,14 +355,14 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
} }
else else
{ {
for (unsigned j = kN; j-- > 0; ) for (std::size_t j = kN; j-- > 0; )
{ {
T tmp = 0; T tmp = 0;
const auto *p = m_terms[j]->m_nzrd.data(); const auto *p = m_terms[j]->m_nzrd.data();
const auto e = m_terms[j]->m_nzrd.size() - 1; /* exclude RHS element */ const auto e = m_terms[j]->m_nzrd.size() - 1; /* exclude RHS element */
for (unsigned k = 0; k < e; k++) for (std::size_t k = 0; k < e; k++)
{ {
const auto pk = p[k]; const auto pk = p[k];
tmp += A(j,pk) * x[pk]; tmp += A(j,pk) * x[pk];
@ -373,7 +373,7 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
{ {
nl_double new_V[storage_N]; // = { 0.0 }; nl_double new_V[storage_N]; // = { 0.0 };
@ -396,22 +396,22 @@ unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool ne
} }
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
inline unsigned matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) inline unsigned matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{ {
build_LE_A<matrix_solver_direct_t>(); build_LE_A<matrix_solver_direct_t>();
build_LE_RHS<matrix_solver_direct_t>(); build_LE_RHS<matrix_solver_direct_t>();
for (unsigned i=0, iN=N(); i < iN; i++) for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i); m_last_RHS[i] = RHS(i);
this->m_stat_calculations++; this->m_stat_calculations++;
return this->solve_non_dynamic(newton_raphson); return this->solve_non_dynamic(newton_raphson);
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetlist, const pstring &name, matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetlist, const pstring &name,
const solver_parameters_t *params, const unsigned size) const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, ASCENDING, params) : matrix_solver_t(anetlist, name, ASCENDING, params)
, m_dim(size) , m_dim(size)
{ {
@ -427,9 +427,9 @@ matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetli
#endif #endif
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetlist, const pstring &name, matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetlist, const pstring &name,
const eSortType sort, const solver_parameters_t *params, const unsigned size) const eSortType sort, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, sort, params) : matrix_solver_t(anetlist, name, sort, params)
, m_dim(size) , m_dim(size)
{ {

View File

@ -30,13 +30,13 @@ namespace netlist
{ {
namespace devices namespace devices
{ {
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_GCR_t: public matrix_solver_t class matrix_solver_GCR_t: public matrix_solver_t
{ {
public: public:
matrix_solver_GCR_t(netlist_t &anetlist, const pstring &name, matrix_solver_GCR_t(netlist_t &anetlist, const pstring &name,
const solver_parameters_t *params, const unsigned size) const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, matrix_solver_t::ASCENDING, params) : matrix_solver_t(anetlist, name, matrix_solver_t::ASCENDING, params)
, m_dim(size) , m_dim(size)
, m_proc(nullptr) , m_proc(nullptr)
@ -47,7 +47,7 @@ public:
{ {
} }
inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; } constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
virtual void vsetup(analog_net_t::list_t &nets) override; virtual void vsetup(analog_net_t::list_t &nets) override;
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override; virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
@ -56,13 +56,15 @@ public:
private: private:
typedef typename mat_cr_t<storage_N>::type mattype;
void csc_private(plib::putf8_fmt_writer &strm); void csc_private(plib::putf8_fmt_writer &strm);
using extsolver = void (*)(double * RESTRICT m_A, double * RESTRICT RHS); using extsolver = void (*)(double * RESTRICT m_A, double * RESTRICT RHS);
pstring static_compile_name(); pstring static_compile_name();
unsigned m_dim; const std::size_t m_dim;
std::vector<unsigned> m_term_cr[storage_N]; std::vector<unsigned> m_term_cr[storage_N];
mat_cr_t<storage_N> mat; mat_cr_t<storage_N> mat;
nl_double m_A[storage_N * storage_N]; nl_double m_A[storage_N * storage_N];
@ -75,18 +77,18 @@ private:
// matrix_solver - GMRES // matrix_solver - GMRES
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{ {
setup_base(nets); setup_base(nets);
unsigned nz = 0; mattype nz = 0;
const unsigned iN = this->N(); const std::size_t iN = this->N();
/* build the final matrix */ /* build the final matrix */
bool touched[storage_N][storage_N] = { { false } }; bool touched[storage_N][storage_N] = { { false } };
for (unsigned k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
for (auto &j : this->m_terms[k]->m_nz) for (auto &j : this->m_terms[k]->m_nz)
touched[k][j] = true; touched[k][j] = true;
@ -96,16 +98,16 @@ void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
unsigned ops = 0; unsigned ops = 0;
for (unsigned k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
ops++; // 1/A(k,k) ops++; // 1/A(k,k)
for (unsigned row = k + 1; row < iN; row++) for (std::size_t row = k + 1; row < iN; row++)
{ {
if (touched[row][k]) if (touched[row][k])
{ {
ops++; ops++;
fc++; fc++;
for (unsigned col = k + 1; col < iN; col++) for (std::size_t col = k + 1; col < iN; col++)
if (touched[k][col]) if (touched[k][col])
{ {
touched[row][col] = true; touched[row][col] = true;
@ -116,11 +118,11 @@ void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
} }
for (unsigned k=0; k<iN; k++) for (mattype k=0; k<iN; k++)
{ {
mat.ia[k] = nz; mat.ia[k] = nz;
for (unsigned j=0; j<iN; j++) for (mattype j=0; j<iN; j++)
{ {
if (touched[k][j]) if (touched[k][j])
{ {
@ -133,10 +135,10 @@ void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
m_term_cr[k].clear(); m_term_cr[k].clear();
/* build pointers into the compressed row format matrix for each terminal */ /* build pointers into the compressed row format matrix for each terminal */
for (unsigned j=0; j< this->m_terms[k]->m_railstart;j++) for (std::size_t j=0; j< this->m_terms[k]->m_railstart;j++)
{ {
int other = this->m_terms[k]->connected_net_idx()[j]; int other = this->m_terms[k]->connected_net_idx()[j];
for (unsigned i = mat.ia[k]; i < nz; i++) for (auto i = mat.ia[k]; i < nz; i++)
if (other == static_cast<int>(mat.ja[i])) if (other == static_cast<int>(mat.ja[i]))
{ {
m_term_cr[k].push_back(i); m_term_cr[k].push_back(i);
@ -166,27 +168,27 @@ void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &strm) void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &strm)
{ {
const unsigned iN = N(); const std::size_t iN = N();
for (unsigned i = 0; i < iN - 1; i++) for (std::size_t i = 0; i < iN - 1; i++)
{ {
const auto &nzbd = this->m_terms[i]->m_nzbd; const auto &nzbd = this->m_terms[i]->m_nzbd;
if (nzbd.size() > 0) if (nzbd.size() > 0)
{ {
unsigned pi = mat.diag[i]; std::size_t pi = mat.diag[i];
//const nl_double f = 1.0 / m_A[pi++]; //const nl_double f = 1.0 / m_A[pi++];
strm("const double f{1} = 1.0 / m_A[{2}];\n", i, pi); strm("const double f{1} = 1.0 / m_A[{2}];\n", i, pi);
pi++; pi++;
const unsigned piie = mat.ia[i+1]; const std::size_t piie = mat.ia[i+1];
for (auto & j : nzbd) for (auto & j : nzbd)
{ {
// proceed to column i // proceed to column i
unsigned pj = mat.ia[j]; std::size_t pj = mat.ia[j];
while (mat.ja[pj] < i) while (mat.ja[pj] < i)
pj++; pj++;
@ -196,7 +198,7 @@ void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &st
pj++; pj++;
// subtract row i from j */ // subtract row i from j */
for (unsigned pii = pi; pii<piie; ) for (std::size_t pii = pi; pii<piie; )
{ {
while (mat.ja[pj] < mat.ja[pii]) while (mat.ja[pj] < mat.ja[pii])
pj++; pj++;
@ -212,7 +214,7 @@ void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &st
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name() pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name()
{ {
plib::postringstream t; plib::postringstream t;
@ -223,7 +225,7 @@ pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name()
return plib::pfmt("nl_gcr_{1:x}_{2}")(h( t.str() ))(mat.nz_num); return plib::pfmt("nl_gcr_{1:x}_{2}")(h( t.str() ))(mat.nz_num);
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_code() std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_code()
{ {
plib::postringstream t; plib::postringstream t;
@ -238,18 +240,18 @@ std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_c
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{ {
const unsigned iN = this->N(); const std::size_t iN = this->N();
nl_double RHS[storage_N]; nl_double RHS[storage_N];
nl_double new_V[storage_N]; nl_double new_V[storage_N];
for (unsigned i=0, e=mat.nz_num; i<e; i++) for (std::size_t i=0, e=mat.nz_num; i<e; i++)
m_A[i] = 0.0; m_A[i] = 0.0;
for (unsigned k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
terms_for_net_t *t = this->m_terms[k].get(); terms_for_net_t *t = this->m_terms[k].get();
nl_double gtot_t = 0.0; nl_double gtot_t = 0.0;
@ -295,7 +297,7 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
// add diagonal element // add diagonal element
m_A[mat.diag[k]] = gtot_t; m_A[mat.diag[k]] = gtot_t;
for (unsigned i = 0; i < railstart; i++) for (std::size_t i = 0; i < railstart; i++)
m_A[tcr[i]] -= go[i]; m_A[tcr[i]] -= go[i];
} }
#else #else
@ -327,21 +329,21 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
} }
else else
{ {
for (unsigned i = 0; i < iN - 1; i++) for (std::size_t i = 0; i < iN - 1; i++)
{ {
const auto &nzbd = this->m_terms[i]->m_nzbd; const auto &nzbd = this->m_terms[i]->m_nzbd;
if (nzbd.size() > 0) if (nzbd.size() > 0)
{ {
unsigned pi = mat.diag[i]; std::size_t pi = mat.diag[i];
const nl_double f = 1.0 / m_A[pi++]; const nl_double f = 1.0 / m_A[pi++];
const unsigned piie = mat.ia[i+1]; const std::size_t piie = mat.ia[i+1];
for (auto & j : nzbd) for (auto & j : nzbd)
{ {
// proceed to column i // proceed to column i
//__builtin_prefetch(&m_A[mat.diag[j+1]], 1); //__builtin_prefetch(&m_A[mat.diag[j+1]], 1);
unsigned pj = mat.ia[j]; std::size_t pj = mat.ia[j];
while (mat.ja[pj] < i) while (mat.ja[pj] < i)
pj++; pj++;
@ -349,7 +351,7 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
const nl_double f1 = - m_A[pj++] * f; const nl_double f1 = - m_A[pj++] * f;
// subtract row i from j */ // subtract row i from j */
for (unsigned pii = pi; pii<piie; ) for (std::size_t pii = pi; pii<piie; )
{ {
while (mat.ja[pj] < mat.ja[pii]) while (mat.ja[pj] < mat.ja[pii])
pj++; pj++;
@ -368,7 +370,7 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
/* row n-1 */ /* row n-1 */
new_V[iN - 1] = RHS[iN - 1] / m_A[mat.diag[iN - 1]]; new_V[iN - 1] = RHS[iN - 1] / m_A[mat.diag[iN - 1]];
for (unsigned j = iN - 1; j-- > 0;) for (std::size_t j = iN - 1; j-- > 0;)
{ {
//__builtin_prefetch(&new_V[j-1], 1); //__builtin_prefetch(&new_V[j-1], 1);
//if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0); //if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0);
@ -390,8 +392,8 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
new_V[j] = (RHS[j] - tmpx) / m_A[mat.diag[j]]; new_V[j] = (RHS[j] - tmpx) / m_A[mat.diag[j]];
#else #else
double tmp = 0; double tmp = 0;
const unsigned e = mat.ia[j+1]; const std::size_t e = mat.ia[j+1];
for (unsigned pk = mat.diag[j] + 1; pk < e; pk++) for (std::size_t pk = mat.diag[j] + 1; pk < e; pk++)
{ {
tmp += m_A[pk] * new_V[mat.ja[pk]]; tmp += m_A[pk] * new_V[mat.ja[pk]];
} }

View File

@ -23,12 +23,12 @@ namespace netlist
{ {
namespace devices namespace devices
{ {
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_GMRES_t: public matrix_solver_direct_t<m_N, storage_N> class matrix_solver_GMRES_t: public matrix_solver_direct_t<m_N, storage_N>
{ {
public: public:
matrix_solver_GMRES_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const unsigned size) matrix_solver_GMRES_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size) : matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
, m_use_iLU_preconditioning(true) , m_use_iLU_preconditioning(true)
, m_use_more_precise_stop_condition(false) , m_use_more_precise_stop_condition(false)
@ -45,7 +45,9 @@ public:
private: private:
unsigned solve_ilu_gmres(nl_double * RESTRICT x, const nl_double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, nl_double accuracy); typedef typename mat_cr_t<storage_N>::type mattype;
unsigned solve_ilu_gmres(nl_double (& RESTRICT x)[storage_N], const nl_double * RESTRICT rhs, const unsigned restart_max, const std::size_t mr, nl_double accuracy);
std::vector<unsigned> m_term_cr[storage_N]; std::vector<unsigned> m_term_cr[storage_N];
@ -71,22 +73,22 @@ private:
// matrix_solver - GMRES // matrix_solver - GMRES
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GMRES_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_GMRES_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets); matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
unsigned nz = 0; mattype nz = 0;
const unsigned iN = this->N(); const std::size_t iN = this->N();
for (unsigned k=0; k<iN; k++) for (std::size_t k=0; k<iN; k++)
{ {
terms_for_net_t * RESTRICT row = this->m_terms[k].get(); terms_for_net_t * RESTRICT row = this->m_terms[k].get();
mat.ia[k] = nz; mat.ia[k] = nz;
for (unsigned j=0; j<row->m_nz.size(); j++) for (std::size_t j=0; j<row->m_nz.size(); j++)
{ {
mat.ja[nz] = row->m_nz[j]; mat.ja[nz] = static_cast<mattype>(row->m_nz[j]);
if (row->m_nz[j] == k) if (row->m_nz[j] == k)
mat.diag[k] = nz; mat.diag[k] = nz;
nz++; nz++;
@ -110,10 +112,10 @@ void matrix_solver_GMRES_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
mat.nz_num = nz; mat.nz_num = nz;
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{ {
const unsigned iN = this->N(); const std::size_t iN = this->N();
/* ideally, we could get an estimate for the spectral radius of /* ideally, we could get an estimate for the spectral radius of
* Inv(D - L) * U * Inv(D - L) * U
@ -130,7 +132,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
for (unsigned i=0, e=mat.nz_num; i<e; i++) for (unsigned i=0, e=mat.nz_num; i<e; i++)
m_A[i] = 0.0; m_A[i] = 0.0;
for (unsigned k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
nl_double gtot_t = 0.0; nl_double gtot_t = 0.0;
nl_double RHS_t = 0.0; nl_double RHS_t = 0.0;
@ -168,12 +170,12 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
const nl_double accuracy = this->m_params.m_accuracy; const nl_double accuracy = this->m_params.m_accuracy;
unsigned mr = iN; std::size_t mr = iN;
if (iN > 3 ) if (iN > 3 )
mr = static_cast<unsigned>(std::sqrt(iN) * 2.0); mr = static_cast<unsigned>(std::sqrt(iN) * 2.0);
unsigned iter = std::max(1u, this->m_params.m_gs_loops); unsigned iter = std::max(1u, this->m_params.m_gs_loops);
unsigned gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy); unsigned gsl = solve_ilu_gmres(new_V, RHS, iter, static_cast<unsigned>(mr), accuracy);
unsigned failed = mr * iter; unsigned failed = static_cast<unsigned>(mr) * iter;
this->m_iterative_total += gsl; this->m_iterative_total += gsl;
this->m_stat_calculations++; this->m_stat_calculations++;
@ -200,7 +202,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
} }
template <typename T> template <typename T>
inline void givens_mult( const T & c, const T & s, T & g0, T & g1 ) inline static void givens_mult( const T c, const T s, T & g0, T & g1 )
{ {
const T tg0 = c * g0 - s * g1; const T tg0 = c * g0 - s * g1;
const T tg1 = s * g0 + c * g1; const T tg1 = s * g0 + c * g1;
@ -209,8 +211,8 @@ inline void givens_mult( const T & c, const T & s, T & g0, T & g1 )
g1 = tg1; g1 = tg1;
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RESTRICT x, const nl_double * RESTRICT rhs, const unsigned restart_max, const unsigned mr, nl_double accuracy) unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RESTRICT x)[storage_N], const nl_double * RESTRICT rhs, const unsigned restart_max, const std::size_t mr, nl_double accuracy)
{ {
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* The code below was inspired by code published by John Burkardt under * The code below was inspired by code published by John Burkardt under
@ -237,7 +239,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RES
unsigned itr_used = 0; unsigned itr_used = 0;
double rho_delta = 0.0; double rho_delta = 0.0;
const unsigned n = this->N(); const std::size_t n = this->N();
if (m_use_iLU_preconditioning) if (m_use_iLU_preconditioning)
mat.incomplete_LU_factorization(m_A, m_LU); mat.incomplete_LU_factorization(m_A, m_LU);
@ -270,8 +272,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RES
for (unsigned itr = 0; itr < restart_max; itr++) for (unsigned itr = 0; itr < restart_max; itr++)
{ {
unsigned last_k = mr; std::size_t last_k = mr;
nl_double mu;
nl_double rho; nl_double rho;
nl_double Ax[storage_N]; nl_double Ax[storage_N];
@ -288,24 +289,27 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RES
rho = std::sqrt(vecmult2(n, residual)); rho = std::sqrt(vecmult2(n, residual));
vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]); if (rho < rho_delta)
return itr_used + 1;
vec_set(mr+1, NL_FCONST(0.0), m_g); vec_set(mr+1, NL_FCONST(0.0), m_g);
m_g[0] = rho; m_g[0] = rho;
for (unsigned i = 0; i < mr; i++) for (std::size_t i = 0; i < mr; i++)
vec_set(mr + 1, NL_FCONST(0.0), m_ht[i]); vec_set(mr + 1, NL_FCONST(0.0), m_ht[i]);
for (unsigned k = 0; k < mr; k++) vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]);
for (std::size_t k = 0; k < mr; k++)
{ {
const unsigned k1 = k + 1; const std::size_t k1 = k + 1;
mat.mult_vec(m_A, m_v[k], m_v[k1]); mat.mult_vec(m_A, m_v[k], m_v[k1]);
if (m_use_iLU_preconditioning) if (m_use_iLU_preconditioning)
mat.solveLUx(m_LU, m_v[k1]); mat.solveLUx(m_LU, m_v[k1]);
for (unsigned j = 0; j <= k; j++) for (std::size_t j = 0; j <= k; j++)
{ {
m_ht[j][k] = vecmult(n, m_v[k1], m_v[j]); m_ht[j][k] = vecmult(n, m_v[k1], m_v[j]);
vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]); vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]);
@ -315,13 +319,13 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RES
if (m_ht[k1][k] != 0.0) if (m_ht[k1][k] != 0.0)
vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]); vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]);
for (unsigned j = 0; j < k; j++) for (std::size_t j = 0; j < k; j++)
givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]); givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]);
mu = std::hypot(m_ht[k][k], m_ht[k1][k]); const nl_double mu = 1.0 / std::hypot(m_ht[k][k], m_ht[k1][k]);
m_c[k] = m_ht[k][k] / mu; m_c[k] = m_ht[k][k] * mu;
m_s[k] = -m_ht[k1][k] / mu; m_s[k] = -m_ht[k1][k] * mu;
m_ht[k][k] = m_c[k] * m_ht[k][k] - m_s[k] * m_ht[k1][k]; m_ht[k][k] = m_c[k] * m_ht[k][k] - m_s[k] * m_ht[k1][k];
m_ht[k1][k] = 0.0; m_ht[k1][k] = 0.0;
@ -344,17 +348,17 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double * RES
/* Solve the system H * y = g */ /* Solve the system H * y = g */
/* x += m_v[j] * m_y[j] */ /* x += m_v[j] * m_y[j] */
for (unsigned i = last_k + 1; i-- > 0;) for (std::size_t i = last_k + 1; i-- > 0;)
{ {
double tmp = m_g[i]; double tmp = m_g[i];
for (unsigned j = i + 1; j <= last_k; j++) for (std::size_t j = i + 1; j <= last_k; j++)
{ {
tmp -= m_ht[i][j] * m_y[j]; tmp -= m_ht[i][j] * m_y[j];
} }
m_y[i] = tmp / m_ht[i][i]; m_y[i] = tmp / m_ht[i][i];
} }
for (unsigned i = 0; i <= last_k; i++) for (std::size_t i = 0; i <= last_k; i++)
vec_add_mult_scalar(n, m_v[i], m_y[i], x); vec_add_mult_scalar(n, m_v[i], m_y[i], x);
#if 1 #if 1

View File

@ -47,7 +47,7 @@ namespace netlist
//#define nl_ext_double long double // slightly slower //#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double #define nl_ext_double nl_double
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_sm_t: public matrix_solver_t class matrix_solver_sm_t: public matrix_solver_t
{ {
friend class matrix_solver_t; friend class matrix_solver_t;
@ -55,7 +55,7 @@ class matrix_solver_sm_t: public matrix_solver_t
public: public:
matrix_solver_sm_t(netlist_t &anetlist, const pstring &name, matrix_solver_sm_t(netlist_t &anetlist, const pstring &name,
const solver_parameters_t *params, const unsigned size); const solver_parameters_t *params, const std::size_t size);
virtual ~matrix_solver_sm_t(); virtual ~matrix_solver_sm_t();
@ -66,7 +66,7 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override; virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson); unsigned solve_non_dynamic(const bool newton_raphson);
inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; } constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
void LE_invert(); void LE_invert();
@ -103,7 +103,7 @@ private:
//nl_ext_double m_RHSx[storage_N]; //nl_ext_double m_RHSx[storage_N];
const unsigned m_dim; const std::size_t m_dim;
}; };
@ -111,7 +111,7 @@ private:
// matrix_solver_direct // matrix_solver_direct
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t() matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t()
{ {
#if (NL_USE_DYNAMIC_ALLOCATION) #if (NL_USE_DYNAMIC_ALLOCATION)
@ -119,7 +119,7 @@ matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t()
#endif #endif
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_t::setup_base(nets); matrix_solver_t::setup_base(nets);
@ -132,14 +132,14 @@ void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_sm_t<m_N, storage_N>::LE_invert() void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
{ {
const unsigned kN = N(); const std::size_t kN = N();
for (unsigned i = 0; i < kN; i++) for (std::size_t i = 0; i < kN; i++)
{ {
for (unsigned j = 0; j < kN; j++) for (std::size_t j = 0; j < kN; j++)
{ {
W(i,j) = lA(i,j) = A(i,j); W(i,j) = lA(i,j) = A(i,j);
Ainv(i,j) = 0.0; Ainv(i,j) = 0.0;
@ -147,7 +147,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
Ainv(i,i) = 1.0; Ainv(i,i) = 1.0;
} }
/* down */ /* down */
for (unsigned i = 0; i < kN; i++) for (std::size_t i = 0; i < kN; i++)
{ {
/* FIXME: Singular matrix? */ /* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i); const nl_double f = 1.0 / W(i,i);
@ -166,28 +166,28 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
{ {
for (std::size_t k = 0; k < e; k++) for (std::size_t k = 0; k < e; k++)
W(j,p[k]) += W(i,p[k]) * f1; W(j,p[k]) += W(i,p[k]) * f1;
for (unsigned k = 0; k <= i; k ++) for (std::size_t k = 0; k <= i; k ++)
Ainv(j,k) += Ainv(i,k) * f1; Ainv(j,k) += Ainv(i,k) * f1;
} }
} }
} }
/* up */ /* up */
for (unsigned i = kN; i-- > 0; ) for (std::size_t i = kN; i-- > 0; )
{ {
/* FIXME: Singular matrix? */ /* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i); const nl_double f = 1.0 / W(i,i);
for (unsigned j = i; j-- > 0; ) for (std::size_t j = i; j-- > 0; )
{ {
const nl_double f1 = - W(j,i) * f; const nl_double f1 = - W(j,i) * f;
if (f1 != 0.0) if (f1 != 0.0)
{ {
for (unsigned k = i; k < kN; k++) for (std::size_t k = i; k < kN; k++)
W(j,k) += W(i,k) * f1; W(j,k) += W(i,k) * f1;
for (unsigned k = 0; k < kN; k++) for (std::size_t k = 0; k < kN; k++)
Ainv(j,k) += Ainv(i,k) * f1; Ainv(j,k) += Ainv(i,k) * f1;
} }
} }
for (unsigned k = 0; k < kN; k++) for (std::size_t k = 0; k < kN; k++)
{ {
Ainv(i,k) *= f; Ainv(i,k) *= f;
lAinv(i,k) = Ainv(i,k); lAinv(i,k) = Ainv(i,k);
@ -195,27 +195,27 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
} }
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
template <typename T> template <typename T>
void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x( void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x(
T * RESTRICT x) T * RESTRICT x)
{ {
const unsigned kN = N(); const std::size_t kN = N();
for (unsigned i=0; i<kN; i++) for (std::size_t i=0; i<kN; i++)
x[i] = 0.0; x[i] = 0.0;
for (unsigned k=0; k<kN; k++) for (std::size_t k=0; k<kN; k++)
{ {
const nl_double f = RHS(k); const nl_double f = RHS(k);
for (unsigned i=0; i<kN; i++) for (std::size_t i=0; i<kN; i++)
x[i] += Ainv(i,k) * f; x[i] += Ainv(i,k) * f;
} }
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
{ {
static const bool incremental = true; static const bool incremental = true;
@ -305,29 +305,29 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
} }
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
inline unsigned matrix_solver_sm_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) inline unsigned matrix_solver_sm_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{ {
build_LE_A<matrix_solver_sm_t>(); build_LE_A<matrix_solver_sm_t>();
build_LE_RHS<matrix_solver_sm_t>(); build_LE_RHS<matrix_solver_sm_t>();
for (unsigned i=0, iN=N(); i < iN; i++) for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i); m_last_RHS[i] = RHS(i);
this->m_stat_calculations++; this->m_stat_calculations++;
return this->solve_non_dynamic(newton_raphson); return this->solve_non_dynamic(newton_raphson);
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
matrix_solver_sm_t<m_N, storage_N>::matrix_solver_sm_t(netlist_t &anetlist, const pstring &name, matrix_solver_sm_t<m_N, storage_N>::matrix_solver_sm_t(netlist_t &anetlist, const pstring &name,
const solver_parameters_t *params, const unsigned size) const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, NOSORT, params) : matrix_solver_t(anetlist, name, NOSORT, params)
, m_dim(size) , m_dim(size)
{ {
#if (NL_USE_DYNAMIC_ALLOCATION) #if (NL_USE_DYNAMIC_ALLOCATION)
m_A = palloc_array(nl_ext_double, N() * m_pitch); m_A = palloc_array(nl_ext_double, N() * m_pitch);
#endif #endif
for (unsigned k = 0; k < N(); k++) for (std::size_t k = 0; k < N(); k++)
{ {
m_last_RHS[k] = 0.0; m_last_RHS[k] = 0.0;
} }

View File

@ -21,12 +21,12 @@ namespace netlist
{ {
namespace devices namespace devices
{ {
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_SOR_t: public matrix_solver_direct_t<m_N, storage_N> class matrix_solver_SOR_t: public matrix_solver_direct_t<m_N, storage_N>
{ {
public: public:
matrix_solver_SOR_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const unsigned size) matrix_solver_SOR_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size) : matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
, m_lp_fact(*this, "m_lp_fact", 0) , m_lp_fact(*this, "m_lp_fact", 0)
{ {
@ -46,16 +46,16 @@ private:
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_SOR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_SOR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets); matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{ {
const unsigned iN = this->N(); const std::size_t iN = this->N();
bool resched = false; bool resched = false;
unsigned resched_cnt = 0; unsigned resched_cnt = 0;

View File

@ -22,14 +22,14 @@ namespace netlist
{ {
namespace devices namespace devices
{ {
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<m_N, storage_N> class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<m_N, storage_N>
{ {
friend class matrix_solver_t; friend class matrix_solver_t;
public: public:
matrix_solver_SOR_mat_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const unsigned size) matrix_solver_SOR_mat_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::DESCENDING, params, size) : matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::DESCENDING, params, size)
, m_Vdelta(*this, "m_Vdelta", 0.0) , m_Vdelta(*this, "m_Vdelta", 0.0)
, m_omega(*this, "m_omega", params->m_gs_sor) , m_omega(*this, "m_omega", params->m_gs_sor)
@ -58,7 +58,7 @@ private:
// matrix_solver - Gauss - Seidel // matrix_solver - Gauss - Seidel
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_SOR_mat_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_SOR_mat_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets); matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
@ -113,7 +113,7 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
} }
#endif #endif
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{ {
/* The matrix based code looks a lot nicer but actually is 30% slower than /* The matrix based code looks a lot nicer but actually is 30% slower than
@ -123,7 +123,7 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
nl_double new_v[storage_N] = { 0.0 }; nl_double new_v[storage_N] = { 0.0 };
const unsigned iN = this->N(); const std::size_t iN = this->N();
matrix_solver_t::build_LE_A<matrix_solver_SOR_mat_t>(); matrix_solver_t::build_LE_A<matrix_solver_SOR_mat_t>();
matrix_solver_t::build_LE_RHS<matrix_solver_SOR_mat_t>(); matrix_solver_t::build_LE_RHS<matrix_solver_SOR_mat_t>();
@ -169,14 +169,14 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
} }
#endif #endif
for (unsigned k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
new_v[k] = this->m_nets[k]->m_cur_Analog; new_v[k] = this->m_nets[k]->m_cur_Analog;
do { do {
resched = false; resched = false;
nl_double cerr = 0.0; nl_double cerr = 0.0;
for (unsigned k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
nl_double Idrive = 0; nl_double Idrive = 0;

View File

@ -54,13 +54,13 @@ namespace netlist
//#define nl_ext_double long double // slightly slower //#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double #define nl_ext_double nl_double
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_w_t: public matrix_solver_t class matrix_solver_w_t: public matrix_solver_t
{ {
friend class matrix_solver_t; friend class matrix_solver_t;
public: public:
matrix_solver_w_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const unsigned size); matrix_solver_w_t(netlist_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size);
virtual ~matrix_solver_w_t(); virtual ~matrix_solver_w_t();
@ -71,7 +71,7 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override; virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson); unsigned solve_non_dynamic(const bool newton_raphson);
inline unsigned N() const { if (m_N == 0) return m_dim; else return m_N; } constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
void LE_invert(); void LE_invert();
@ -115,7 +115,7 @@ private:
//nl_ext_double m_RHSx[storage_N]; //nl_ext_double m_RHSx[storage_N];
const unsigned m_dim; const std::size_t m_dim;
}; };
@ -123,12 +123,12 @@ private:
// matrix_solver_direct // matrix_solver_direct
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
matrix_solver_w_t<m_N, storage_N>::~matrix_solver_w_t() matrix_solver_w_t<m_N, storage_N>::~matrix_solver_w_t()
{ {
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_t::setup_base(nets); matrix_solver_t::setup_base(nets);
@ -141,14 +141,14 @@ void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_w_t<m_N, storage_N>::LE_invert() void matrix_solver_w_t<m_N, storage_N>::LE_invert()
{ {
const unsigned kN = N(); const std::size_t kN = N();
for (unsigned i = 0; i < kN; i++) for (std::size_t i = 0; i < kN; i++)
{ {
for (unsigned j = 0; j < kN; j++) for (std::size_t j = 0; j < kN; j++)
{ {
W(i,j) = lA(i,j) = A(i,j); W(i,j) = lA(i,j) = A(i,j);
Ainv(i,j) = 0.0; Ainv(i,j) = 0.0;
@ -156,7 +156,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
Ainv(i,i) = 1.0; Ainv(i,i) = 1.0;
} }
/* down */ /* down */
for (unsigned i = 0; i < kN; i++) for (std::size_t i = 0; i < kN; i++)
{ {
/* FIXME: Singular matrix? */ /* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i); const nl_double f = 1.0 / W(i,i);
@ -167,63 +167,63 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
const auto * RESTRICT const pb = m_terms[i]->m_nzbd.data(); const auto * RESTRICT const pb = m_terms[i]->m_nzbd.data();
const size_t eb = m_terms[i]->m_nzbd.size(); const size_t eb = m_terms[i]->m_nzbd.size();
for (unsigned jb = 0; jb < eb; jb++) for (std::size_t jb = 0; jb < eb; jb++)
{ {
const auto j = pb[jb]; const auto j = pb[jb];
const nl_double f1 = - W(j,i) * f; const nl_double f1 = - W(j,i) * f;
if (f1 != 0.0) if (f1 != 0.0)
{ {
for (unsigned k = 0; k < e; k++) for (std::size_t k = 0; k < e; k++)
W(j,p[k]) += W(i,p[k]) * f1; W(j,p[k]) += W(i,p[k]) * f1;
for (unsigned k = 0; k <= i; k ++) for (std::size_t k = 0; k <= i; k ++)
Ainv(j,k) += Ainv(i,k) * f1; Ainv(j,k) += Ainv(i,k) * f1;
} }
} }
} }
/* up */ /* up */
for (unsigned i = kN; i-- > 0; ) for (std::size_t i = kN; i-- > 0; )
{ {
/* FIXME: Singular matrix? */ /* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i); const nl_double f = 1.0 / W(i,i);
for (unsigned j = i; j-- > 0; ) for (std::size_t j = i; j-- > 0; )
{ {
const nl_double f1 = - W(j,i) * f; const nl_double f1 = - W(j,i) * f;
if (f1 != 0.0) if (f1 != 0.0)
{ {
for (unsigned k = i; k < kN; k++) for (std::size_t k = i; k < kN; k++)
W(j,k) += W(i,k) * f1; W(j,k) += W(i,k) * f1;
for (unsigned k = 0; k < kN; k++) for (std::size_t k = 0; k < kN; k++)
Ainv(j,k) += Ainv(i,k) * f1; Ainv(j,k) += Ainv(i,k) * f1;
} }
} }
for (unsigned k = 0; k < kN; k++) for (std::size_t k = 0; k < kN; k++)
{ {
Ainv(i,k) *= f; Ainv(i,k) *= f;
} }
} }
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
template <typename T> template <typename T>
void matrix_solver_w_t<m_N, storage_N>::LE_compute_x( void matrix_solver_w_t<m_N, storage_N>::LE_compute_x(
T * RESTRICT x) T * RESTRICT x)
{ {
const unsigned kN = N(); const std::size_t kN = N();
for (unsigned i=0; i<kN; i++) for (std::size_t i=0; i<kN; i++)
x[i] = 0.0; x[i] = 0.0;
for (unsigned k=0; k<kN; k++) for (std::size_t k=0; k<kN; k++)
{ {
const nl_double f = RHS(k); const nl_double f = RHS(k);
for (unsigned i=0; i<kN; i++) for (std::size_t i=0; i<kN; i++)
x[i] += Ainv(i,k) * f; x[i] += Ainv(i,k) * f;
} }
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
{ {
const auto iN = N(); const auto iN = N();
@ -369,27 +369,27 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
} }
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
inline unsigned matrix_solver_w_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) inline unsigned matrix_solver_w_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
{ {
build_LE_A<matrix_solver_w_t>(); build_LE_A<matrix_solver_w_t>();
build_LE_RHS<matrix_solver_w_t>(); build_LE_RHS<matrix_solver_w_t>();
for (unsigned i=0, iN=N(); i < iN; i++) for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i); m_last_RHS[i] = RHS(i);
this->m_stat_calculations++; this->m_stat_calculations++;
return this->solve_non_dynamic(newton_raphson); return this->solve_non_dynamic(newton_raphson);
} }
template <unsigned m_N, unsigned storage_N> template <std::size_t m_N, std::size_t storage_N>
matrix_solver_w_t<m_N, storage_N>::matrix_solver_w_t(netlist_t &anetlist, const pstring &name, matrix_solver_w_t<m_N, storage_N>::matrix_solver_w_t(netlist_t &anetlist, const pstring &name,
const solver_parameters_t *params, const unsigned size) const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, NOSORT, params) : matrix_solver_t(anetlist, name, NOSORT, params)
,m_cnt(0) ,m_cnt(0)
, m_dim(size) , m_dim(size)
{ {
for (unsigned k = 0; k < N(); k++) for (std::size_t k = 0; k < N(); k++)
{ {
m_last_RHS[k] = 0.0; m_last_RHS[k] = 0.0;
} }

View File

@ -108,6 +108,10 @@ NETLIB_UPDATE(solver)
ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve(); ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve();
} }
} }
for (auto & solver : m_mat_solvers)
if (solver->has_timestep_devices() || force_solve)
solver->update_inputs();
} }
else else
for (int i = 0; i < t_cnt; i++) for (int i = 0; i < t_cnt; i++)
@ -115,12 +119,16 @@ NETLIB_UPDATE(solver)
{ {
// Ignore return value // Ignore return value
ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve(); ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve();
solver->update_inputs();
} }
#else #else
for (auto & solver : m_mat_solvers) for (auto & solver : m_mat_solvers)
if (solver->has_timestep_devices() || force_solve) if (solver->has_timestep_devices() || force_solve)
{
// Ignore return value // Ignore return value
ATTR_UNUSED const netlist_time ts = solver->solve(); ATTR_UNUSED const netlist_time ts = solver->solve();
solver->update_inputs();
}
#endif #endif
/* step circuit */ /* step circuit */
@ -132,14 +140,14 @@ NETLIB_UPDATE(solver)
} }
template <class C> template <class C>
std::unique_ptr<matrix_solver_t> create_it(netlist_t &nl, pstring name, solver_parameters_t &params, unsigned size) std::unique_ptr<matrix_solver_t> create_it(netlist_t &nl, pstring name, solver_parameters_t &params, std::size_t size)
{ {
typedef C solver; typedef C solver;
return plib::make_unique<solver>(nl, name, &params, size); return plib::make_unique<solver>(nl, name, &params, size);
} }
template <int m_N, int storage_N> template <std::size_t m_N, std::size_t storage_N>
std::unique_ptr<matrix_solver_t> NETLIB_NAME(solver)::create_solver(unsigned size, const bool use_specific) std::unique_ptr<matrix_solver_t> NETLIB_NAME(solver)::create_solver(std::size_t size, const bool use_specific)
{ {
pstring solvername = plib::pfmt("Solver_{1}")(m_mat_solvers.size()); pstring solvername = plib::pfmt("Solver_{1}")(m_mat_solvers.size());
if (use_specific && m_N == 1) if (use_specific && m_N == 1)
@ -294,11 +302,11 @@ void NETLIB_NAME(solver)::post_start()
for (auto & grp : splitter.groups) for (auto & grp : splitter.groups)
{ {
std::unique_ptr<matrix_solver_t> ms; std::unique_ptr<matrix_solver_t> ms;
unsigned net_count = static_cast<unsigned>(grp.size()); std::size_t net_count = grp.size();
switch (net_count) switch (net_count)
{ {
#if 0 #if 1
case 1: case 1:
ms = create_solver<1,1>(1, false); ms = create_solver<1,1>(1, false);
break; break;
@ -341,6 +349,9 @@ void NETLIB_NAME(solver)::post_start()
case 31: case 31:
ms = create_solver<31,31>(31, use_specific); ms = create_solver<31,31>(31, use_specific);
break; break;
case 35:
ms = create_solver<31,31>(31, use_specific);
break;
case 49: case 49:
ms = create_solver<49,49>(49, use_specific); ms = create_solver<49,49>(49, use_specific);
break; break;
@ -352,7 +363,11 @@ void NETLIB_NAME(solver)::post_start()
#endif #endif
default: default:
log().warning(MW_1_NO_SPECIFIC_SOLVER, net_count); log().warning(MW_1_NO_SPECIFIC_SOLVER, net_count);
if (net_count <= 16) if (net_count <= 8)
{
ms = create_solver<0, 8>(net_count, use_specific);
}
else if (net_count <= 16)
{ {
ms = create_solver<0,16>(net_count, use_specific); ms = create_solver<0,16>(net_count, use_specific);
} }

View File

@ -112,8 +112,8 @@ private:
solver_parameters_t m_params; solver_parameters_t m_params;
template <int m_N, int storage_N> template <std::size_t m_N, std::size_t storage_N>
std::unique_ptr<matrix_solver_t> create_solver(unsigned size, bool use_specific); std::unique_ptr<matrix_solver_t> create_solver(std::size_t size, bool use_specific);
}; };
} //namespace devices } //namespace devices

View File

@ -36,14 +36,14 @@ private:
#endif #endif
template<typename T> template<typename T>
inline void vec_set (const std::size_t & n, const T &scalar, T * RESTRICT result) inline static void vec_set (const std::size_t n, const T scalar, T * RESTRICT result)
{ {
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )
result[i] = scalar; result[i] = scalar;
} }
template<typename T> template<typename T>
inline T vecmult (const std::size_t & n, const T * RESTRICT a1, const T * RESTRICT a2 ) inline static T vecmult (const std::size_t n, const T * RESTRICT a1, const T * RESTRICT a2 )
{ {
T value = 0.0; T value = 0.0;
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )
@ -52,7 +52,7 @@ inline T vecmult (const std::size_t & n, const T * RESTRICT a1, const T * RESTRI
} }
template<typename T> template<typename T>
inline T vecmult2 (const std::size_t & n, const T *a1) inline static T vecmult2 (const std::size_t n, const T *a1)
{ {
T value = 0.0; T value = 0.0;
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )
@ -63,43 +63,56 @@ inline T vecmult2 (const std::size_t & n, const T *a1)
return value; return value;
} }
template<typename T> template<typename T, std::size_t N>
inline void vec_mult_scalar (const std::size_t & n, const T * RESTRICT v, const T & scalar, T * RESTRICT result) inline static void vec_mult_scalar (const std::size_t n, const T (& RESTRICT v)[N], const T & scalar, T (& RESTRICT result)[N])
{ {
if (n != N)
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )
{
result[i] = scalar * v[i]; result[i] = scalar * v[i];
} else
for ( std::size_t i = 0; i < N; i++ )
result[i] = scalar * v[i];
}
template<typename T, std::size_t N>
inline static void vec_add_mult_scalar (const std::size_t n, const T (& RESTRICT v)[N], const T scalar, T (& RESTRICT result)[N])
{
if (n != N)
for ( std::size_t i = 0; i < n; i++ )
result[i] = result[i] + scalar * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
result[i] = result[i] + scalar * v[i];
} }
template<typename T> template<typename T>
inline void vec_add_mult_scalar (const std::size_t & n, const T * RESTRICT v, const T scalar, T * RESTRICT result) inline static void vec_add_mult_scalar (const std::size_t & n, const T * RESTRICT v, const T scalar, T * RESTRICT result)
{ {
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )
result[i] += scalar * v[i]; result[i] += scalar * v[i];
} }
inline void vec_add_ip(const std::size_t & n, const double * RESTRICT v, double * RESTRICT result) inline static void vec_add_ip(const std::size_t n, const double * RESTRICT v, double * RESTRICT result)
{ {
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )
result[i] += v[i]; result[i] += v[i];
} }
template<typename T> template<typename T>
inline void vec_sub(const std::size_t & n, const T * RESTRICT v1, const T * RESTRICT v2, T * RESTRICT result) inline void vec_sub(const std::size_t n, const T * RESTRICT v1, const T * RESTRICT v2, T * RESTRICT result)
{ {
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )
result[i] = v1[i] - v2[i]; result[i] = v1[i] - v2[i];
} }
template<typename T> template<typename T>
inline void vec_scale (const std::size_t & n, T * RESTRICT v, const T scalar) inline void vec_scale (const std::size_t n, T * RESTRICT v, const T scalar)
{ {
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )
v[i] = scalar * v[i]; v[i] = scalar * v[i];
} }
inline double vec_maxabs(const std::size_t & n, const double * RESTRICT v) inline double vec_maxabs(const std::size_t n, const double * RESTRICT v)
{ {
double ret = 0.0; double ret = 0.0;
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )