netlist: code maintenance. (nw)

- some readability improvements
- some simplifications
- kidniki uses frontiers again (speed improvement)
This commit is contained in:
couriersud 2019-10-10 00:32:32 +02:00
parent 67621f11d9
commit 161cc143a5
14 changed files with 246 additions and 204 deletions

View File

@ -125,7 +125,7 @@ namespace netlist
// nld_solver.cpp
PERRMSGV(MF_UNKNOWN_SOLVER_TYPE, 1, "Unknown solver type: {1}")
//PERRMSGV(MF_UNKNOWN_SOLVER_TYPE, 1, "Unknown solver type: {1}")
PERRMSGV(MF_NETGROUP_SIZE_EXCEEDED_1, 1, "Encountered netgroup with > {1} nets")
PERRMSGV(MI_NO_SPECIFIC_SOLVER, 1, "No specific solver found for netlist of size {1}")

View File

@ -72,7 +72,7 @@ namespace plib
}
template<typename V>
void solve_LU_inplace(V &v)
void solve_inplace(V &v)
{
m_LU.solveLU(v);
}
@ -91,6 +91,7 @@ namespace plib
mat_precondition_diag(std::size_t size, int dummy = 0)
: m_mat(size)
, m_diag(size)
, nzcol(size)
{
plib::unused_var(dummy);
}
@ -99,6 +100,18 @@ namespace plib
void build(M &fill)
{
m_mat.build_from_fill_mat(fill, 0);
for (std::size_t i = 0; i< m_diag.size(); i++)
{
for (std::size_t j = 0; j< m_diag.size(); j++)
{
std::size_t k=m_mat.row_idx[j];
while (m_mat.col_idx[k] < i && k < m_mat.row_idx[j+1])
k++;
if (m_mat.col_idx[k] == i && k < m_mat.row_idx[j+1])
nzcol[i].push_back(k);
}
nzcol[i].push_back(static_cast<std::size_t>(-1));
}
}
template<typename R, typename V>
@ -111,12 +124,51 @@ namespace plib
{
for (std::size_t i = 0; i< m_diag.size(); i++)
{
m_diag[i] = 1.0 / m_mat.A[m_mat.diag[i]];
// ILUT: 265%
FT v(0.0);
#if 0
// doesn't works, Mame perforamnce drops significantly%
// 136%
for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++)
v += m_mat.A[j] * m_mat.A[j];
m_diag[i] = 1.0 / std::sqrt(v);
#elif 0
// works halfway, i.e. Mame perforamnce 50%
// 147% - lowest average solution time with 7.094
for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++)
v += m_mat.A[j] * m_mat.A[j];
m_diag[i] = m_mat.A[m_mat.diag[i]] / v;
#elif 0
// works halfway, i.e. Mame perforamnce 50%
// sum over column i
// 344% - lowest average solution time with 3.06
std::size_t nzcolp = 0;
const auto &nz = nzcol[i];
std::size_t j;
while ((j = nz[nzcolp++])!=static_cast<std::size_t>(-1)) // NOLINT(bugprone-infinite-loop)
{
v += m_mat.A[j] * m_mat.A[j];
}
m_diag[i] = m_mat.A[m_mat.diag[i]] / v;
#elif 0
// works halfway, i.e. Mame perforamnce 50%
// 151%
for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++)
v += std::abs(m_mat.A[j]);
m_diag[i] = 1.0 / v;
#else
// 124%
for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++)
v = std::max(v, std::abs(m_mat.A[j]));
m_diag[i] = 1.0 / v;
#endif
//m_diag[i] = 1.0 / m_mat.A[m_mat.diag[i]];
}
}
template<typename V>
void solve_LU_inplace(V &v)
void solve_inplace(V &v)
{
for (std::size_t i = 0; i< m_diag.size(); i++)
v[i] = v[i] * m_diag[i];
@ -124,6 +176,7 @@ namespace plib
plib::matrix_compressed_rows_t<FT, SIZE> m_mat;
plib::parray<FT, SIZE> m_diag;
plib::parray<std::vector<std::size_t>, SIZE > nzcol;
};
template <typename FT, int SIZE>
@ -152,7 +205,7 @@ namespace plib
}
template<typename V>
void solve_LU_inplace(V &v)
void solve_inplace(V &v)
{
plib::unused_var(v);
}
@ -163,7 +216,7 @@ namespace plib
/* FIXME: hardcoding RESTART to 20 becomes an issue on very large
* systems.
*/
template <typename FT, int SIZE, int RESTART = 20>
template <typename FT, int SIZE, int RESTART = 80>
struct gmres_t
{
public:
@ -191,18 +244,18 @@ namespace plib
std::size_t size() const { return (SIZE<=0) ? m_size : static_cast<std::size_t>(SIZE); }
template <int k, typename OPS, typename VT>
bool do_k(OPS &ops, VT &x, FT rho_delta, bool dummy)
bool do_k(OPS &ops, VT &x, std::size_t &itr_used, FT rho_delta, bool dummy)
{
plib::unused_var(dummy);
//printf("%d\n", k);
if (do_k<k-1, OPS>(ops, x, rho_delta, do_khelper<k-1>::value))
if (do_k<k-1, OPS>(ops, x, itr_used, rho_delta, do_khelper<k-1>::value))
return true;
const std::size_t kp1 = k + 1;
const std::size_t n = size();
ops.calc_rhs(m_v[kp1], m_v[k]);
ops.solve_LU_inplace(m_v[kp1]);
ops.solve_inplace(m_v[kp1]);
for (std::size_t j = 0; j <= k; j++)
{
@ -229,7 +282,7 @@ namespace plib
FT rho = std::abs(m_g[kp1]);
// FIXME ..
//itr_used = itr_used + 1;
itr_used = itr_used + 1;
if (rho <= rho_delta || k == RESTART-1)
{
@ -252,10 +305,9 @@ namespace plib
}
template <int k, typename OPS, typename VT>
bool do_k(OPS &ops, VT &x, FT rho_delta, float dummy)
bool do_k(OPS &ops, VT &x, std::size_t &itr_used, FT rho_delta, float dummy)
{
plib::unused_var(ops, x, rho_delta, dummy);
//printf("here\n");
plib::unused_var(ops, x, itr_used, rho_delta, dummy);
return false;
}
@ -307,7 +359,7 @@ namespace plib
vec_set_scalar(n, residual, accuracy);
ops.calc_rhs(Ax, residual);
ops.solve_LU_inplace(Ax);
ops.solve_inplace(Ax);
const float_type rho_to_accuracy = std::sqrt(vec_mult2<FT>(n, Ax)) / accuracy;
@ -320,7 +372,7 @@ namespace plib
* Using
*
* vec_set(n, x, rhs);
* ops.solve_LU_inplace(x);
* ops.solve_inplace(x);
*
* to get a starting point for x degrades convergence speed compared
* to using the last solution for x.
@ -337,7 +389,7 @@ namespace plib
vec_sub(n, residual, rhs, Ax);
ops.solve_LU_inplace(residual);
ops.solve_inplace(residual);
rho = std::sqrt(vec_mult2<FT>(n, residual));
@ -353,7 +405,7 @@ namespace plib
vec_mult_scalar(n, m_v[0], residual, constants<FT>::one() / rho);
if (do_k<RESTART-1>(ops, x, rho_delta, true))
if (do_k<RESTART-1>(ops, x, itr_used, rho_delta, true))
// converged
break;
}
@ -446,7 +498,7 @@ namespace plib
for (int i = 0; i < iter_max; i++)
{
ops.solve_LU_inplace(residual);
ops.solve_inplace(residual);
if (i==0)
{
vec_set(size(), p, residual);

View File

@ -207,6 +207,7 @@ namespace plib
// proceed to column i
std::size_t pj = row_idx[j];
std::size_t pje = row_idx[j+1];
while (col_idx[pj] < i)
pj++;
@ -216,7 +217,7 @@ namespace plib
// subtract row i from j
// fill-in available assumed, i.e. matrix was prepared
for (std::size_t pii = pi; pii<piie; pii++)
for (std::size_t pii = pi; pii<piie && pj < pje; pii++)
{
while (col_idx[pj] < col_idx[pii])
pj++;
@ -229,6 +230,14 @@ namespace plib
}
}
int get_parallel_level(std::size_t k) const
{
for (std::size_t i = 0; i < m_ge_par.size(); i++)
if (plib::container::contains( m_ge_par[i], k))
return static_cast<int>(i);
return -1;
}
template <typename V>
void gaussian_elimination_parallel(V & RHS)
{
@ -314,25 +323,17 @@ namespace plib
* res = A * x
*/
#if 0
parray<value_type, N < 0 ? -N * N : N *N> xi(m_size*m_size);
//std::vector<value_type> xi(m_size*m_size);
plib::omp::for_static(0, constants<index_type>::zero(), row_idx[m_size], [this, &x, &xi](index_type k)
{
xi[k] = x[col_idx[k]];
});
#endif
#if 1
//plib::omp::set_num_threads(4);
plib::omp::for_static(0, constants<index_type>::zero(), m_size, [this, &res, &x](index_type row)
{
T tmp = 0.0;
const index_type e = row_idx[row+1];
T tmp(0.0);
const index_type e(row_idx[row+1]);
for (index_type k = row_idx[row]; k < e; k++)
tmp += A[k] * x[col_idx[k]];
res[row] = tmp;
});
#else
// this is a bit faster than the version above
std::size_t row = 0;
std::size_t k = 0;
const std::size_t oe = nz_num;
@ -373,10 +374,10 @@ namespace plib
template <typename LUMAT>
void reduction_copy_from(LUMAT & src)
{
C sp = 0;
for (std::size_t r=0; r<src.size(); r++)
C sp(0);
for (index_type r=0; r<src.size(); r++)
{
C dp = row_idx[r];
C dp(row_idx[r]);
while(sp < src.row_idx[r+1])
{
/* advance dp to source column and fill 0s if necessary */
@ -397,7 +398,7 @@ namespace plib
template <typename LUMAT>
void raw_copy_from(LUMAT & src)
{
for (std::size_t k = 0; k < nz_num; k++)
for (index_type k = 0; k < nz_num; k++)
A[k] = src.A[k];
}
@ -422,32 +423,32 @@ namespace plib
*/
index_type p(0);
while (std::size_t i = ilu_rows[p++]) // NOLINT(bugprone-infinite-loop)
while (auto i = ilu_rows[p++]) // NOLINT(bugprone-infinite-loop)
{
const std::size_t p_i_end = row_idx[i + 1];
const auto p_i_end = row_idx[i + 1];
// loop over all columns k left of diag in row i
//if (row_idx[i] < diag[i])
// printf("occ %d\n", (int)i);
for (std::size_t i_k = row_idx[i]; i_k < diag[i]; i_k++)
for (auto i_k = row_idx[i]; i_k < diag[i]; i_k++)
{
const std::size_t k = col_idx[i_k];
const std::size_t p_k_end = row_idx[k + 1];
const auto k(col_idx[i_k]);
const auto p_k_end(row_idx[k + 1]);
const T LUp_i_k = A[i_k] = A[i_k] / A[diag[k]];
std::size_t k_j = diag[k] + 1;
std::size_t i_j = i_k + 1;
auto k_j(diag[k] + 1);
auto i_j(i_k + 1);
while (i_j < p_i_end && k_j < p_k_end ) // pj = (i, j)
{
// we can assume that within a row ja increases continuously */
const std::size_t c_i_j = col_idx[i_j]; // row i, column j
const std::size_t c_k_j = col_idx[k_j]; // row k, column j
if (c_k_j < c_i_j)
k_j++;
else if (c_k_j == c_i_j)
A[i_j++] -= LUp_i_k * A[k_j++];
else
i_j++;
const auto c_i_j(col_idx[i_j]); // row i, column j
const auto c_k_j(col_idx[k_j]); // row k, column j
if (c_k_j == c_i_j)
A[i_j] -= LUp_i_k * A[k_j];
k_j += (c_k_j <= c_i_j ? 1 : 0);
i_j += (c_k_j >= c_i_j ? 1 : 0);
}
}
}
@ -477,23 +478,23 @@ namespace plib
* This can be solved for x using backwards elimination in U.
*
*/
for (std::size_t i = 1; i < size(); ++i )
for (index_type i = 1; i < size(); ++i )
{
T tmp = 0.0;
const std::size_t j1 = row_idx[i];
const std::size_t j2 = diag[i];
const auto j1(row_idx[i]);
const auto j2(diag[i]);
for (std::size_t j = j1; j < j2; ++j )
for (auto j = j1; j < j2; ++j )
tmp += A[j] * r[col_idx[j]];
r[i] -= tmp;
}
// i now is equal to n;
for (std::size_t i = size(); i-- > 0; )
for (auto i = size(); i-- > 0; )
{
T tmp = 0.0;
const std::size_t di = diag[i];
const std::size_t j2 = row_idx[i+1];
for (std::size_t j = di + 1; j < j2; j++ )
const auto di(diag[i]);
const auto j2(row_idx[i+1]);
for (auto j = di + 1; j < j2; j++ )
tmp += A[j] * r[col_idx[j]];
r[i] = (r[i] - tmp) / A[di];
}

View File

@ -62,7 +62,7 @@ public:
//return m_sym(args...);
}
bool resolved() { return m_sym != nullptr; }
bool resolved() const { return m_sym != nullptr; }
private:
calltype m_sym;
};

View File

@ -242,18 +242,21 @@ namespace plib
ename (E v) : m_v(v) { } \
template <typename T> explicit ename(T val) { m_v = static_cast<E>(val); } \
bool set_from_string (const pstring &s) { \
static const pstring strings = # __VA_ARGS__; \
int f = from_string_int(strings, s); \
int f = from_string_int(strings(), s); \
if (f>=0) { m_v = static_cast<E>(f); return true; } else { return false; } \
} \
operator E() const {return m_v;} \
bool operator==(const ename &rhs) const {return m_v == rhs.m_v;} \
bool operator==(const E &rhs) const {return m_v == rhs;} \
std::string name() const { \
static const pstring strings = # __VA_ARGS__; \
return nthstr(static_cast<int>(m_v), strings); \
return nthstr(static_cast<int>(m_v), strings()); \
} \
private: E m_v; };
private: E m_v; \
static pstring strings() {\
static const pstring lstrings = # __VA_ARGS__; \
return lstrings; \
} \
};
#endif /* PUTIL_H_ */

View File

@ -15,15 +15,16 @@ namespace netlist
namespace devices
{
terms_for_net_t::terms_for_net_t()
terms_for_net_t::terms_for_net_t(analog_net_t * net)
: m_railstart(0)
, m_last_V(0.0)
, m_DD_n_m_1(0.0)
, m_h_n_m_1(1e-12)
, m_net(net)
{
}
void terms_for_net_t::add(terminal_t *term, int net_other, bool sorted)
void terms_for_net_t::add_terminal(terminal_t *term, int net_other, bool sorted)
{
if (sorted)
for (std::size_t i=0; i < m_connected_net_idx.size(); i++)
@ -65,13 +66,11 @@ namespace devices
log().debug("New solver setup\n");
m_nets.clear();
m_terms.clear();
for (auto & net : nets)
{
m_nets.push_back(net);
m_terms.push_back(plib::make_unique<terms_for_net_t>());
m_terms.push_back(plib::make_unique<terms_for_net_t>(net));
m_rails_temp.push_back(plib::make_unique<terms_for_net_t>());
}
@ -159,7 +158,7 @@ namespace devices
*
*/
const std::size_t iN = m_nets.size();
const std::size_t iN = m_terms.size();
switch (sort)
{
@ -174,7 +173,6 @@ namespace devices
if (pi < pk)
{
std::swap(m_terms[i], m_terms[k]);
std::swap(m_nets[i], m_nets[k]);
pk = get_weight_around_diag(k,k);
}
}
@ -183,7 +181,7 @@ namespace devices
break;
case matrix_sort_type_e::PREFER_IDENTITY_TOP_LEFT:
{
for (std::size_t k = 0; k < iN - 1; k++)
for (std::size_t k = 0; k < iN - 2; k++)
{
auto pk = get_left_right_of_diag(k,k);
for (std::size_t i = k+1; i < iN; i++)
@ -192,7 +190,6 @@ namespace devices
if (pi.first <= pk.first && pi.second >= pk.second)
{
std::swap(m_terms[i], m_terms[k]);
std::swap(m_nets[i], m_nets[k]);
pk = get_left_right_of_diag(k,k);
}
}
@ -210,7 +207,6 @@ namespace devices
if ((static_cast<int>(m_terms[k]->m_railstart) - static_cast<int>(m_terms[i]->m_railstart)) * sort_order < 0)
{
std::swap(m_terms[i], m_terms[k]);
std::swap(m_nets[i], m_nets[k]);
}
}
}
@ -231,13 +227,13 @@ namespace devices
void matrix_solver_t::setup_matrix()
{
const std::size_t iN = m_nets.size();
const std::size_t iN = m_terms.size();
for (std::size_t k = 0; k < iN; k++)
{
m_terms[k]->m_railstart = m_terms[k]->count();
for (std::size_t i = 0; i < m_rails_temp[k]->count(); i++)
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->m_connected_net_idx.data()[i], false);
this->m_terms[k]->add_terminal(m_rails_temp[k]->terms()[i], m_rails_temp[k]->m_connected_net_idx.data()[i], false);
}
// free all - no longer needed
@ -464,10 +460,10 @@ namespace devices
return next_time_step;
}
int matrix_solver_t::get_net_idx(detail::net_t *net)
int matrix_solver_t::get_net_idx(const analog_net_t *net) const
{
for (std::size_t k = 0; k < m_nets.size(); k++)
if (m_nets[k] == net)
for (std::size_t k = 0; k < m_terms.size(); k++)
if (m_terms[k]->isNet(net))
return static_cast<int>(k);
return -1;
}
@ -539,19 +535,19 @@ namespace devices
{
if (term->connected_terminal()->net().isRailNet())
{
m_rails_temp[k]->add(term, -1, false);
m_rails_temp[k]->add_terminal(term, -1, false);
}
else
{
int ot = get_net_idx(&term->connected_terminal()->net());
if (ot>=0)
{
m_terms[k]->add(term, ot, true);
m_terms[k]->add_terminal(term, ot, true);
}
/* Should this be allowed ? */
else // if (ot<0)
{
m_rails_temp[k]->add(term, ot, true);
m_rails_temp[k]->add_terminal(term, ot, true);
log().fatal(MF_FOUND_TERM_WITH_MISSING_OTHERNET(term->name()));
}
}
@ -565,12 +561,11 @@ namespace devices
{
for (std::size_t k = 0, iN=m_terms.size(); k < iN; k++)
{
analog_net_t *n = m_nets[k];
terms_for_net_t *t = m_terms[k].get();
//const nl_double DD_n = (n->Q_Analog() - t->m_last_V);
// avoid floating point exceptions
const nl_double DD_n = std::max(-1e100, std::min(1e100,(n->Q_Analog() - t->m_last_V)));
const nl_double DD_n = std::max(-1e100, std::min(1e100,(t->getV() - t->m_last_V)));
const nl_double hn = cur_ts;
//printf("%g %g %g %g\n", DD_n, hn, t->m_DD_n_m_1, t->m_h_n_m_1);
@ -587,7 +582,7 @@ namespace devices
if (new_net_timestep < new_solver_timestep)
new_solver_timestep = new_net_timestep;
t->m_last_V = n->Q_Analog();
t->m_last_V = t->getV();
}
if (new_solver_timestep < m_params.m_min_timestep)
{
@ -608,7 +603,7 @@ namespace devices
{
log().verbose("==============================================");
log().verbose("Solver {1}", this->name());
log().verbose(" ==> {1} nets", this->m_nets.size()); //, (*(*groups[i].first())->m_core_terms.first())->name());
log().verbose(" ==> {1} nets", this->m_terms.size()); //, (*(*groups[i].first())->m_core_terms.first())->name());
log().verbose(" has {1} elements", this->has_dynamic_devices() ? "dynamic" : "no dynamic");
log().verbose(" has {1} elements", this->has_timestep_devices() ? "timestep" : "no timestep");
log().verbose(" {1:6.3} average newton raphson loops",

View File

@ -29,6 +29,16 @@ namespace devices
PREFER_BAND_MATRIX
)
P_ENUM(matrix_type_e,
SOR_MAT,
MAT_CR,
MAT,
SM,
W,
SOR,
GMRES
)
struct solver_parameters_t
{
solver_parameters_t(device_t &parent)
@ -36,7 +46,7 @@ namespace devices
/* iteration parameters */
, m_gs_sor(parent, "SOR_FACTOR", 1.059)
, m_method(parent, "METHOD", "MAT_CR")
, m_method(parent, "METHOD", matrix_type_e::MAT_CR)
, m_accuracy(parent, "ACCURACY", 1e-7)
, m_gs_loops(parent, "GS_LOOPS", 9) // Gauss-Seidel loops
@ -75,7 +85,7 @@ namespace devices
param_double_t m_freq;
param_double_t m_gs_sor;
param_str_t m_method;
param_enum_t<matrix_type_e> m_method;
param_double_t m_accuracy;
param_num_t<std::size_t> m_gs_loops;
param_double_t m_gmin;
@ -99,16 +109,22 @@ namespace devices
class terms_for_net_t : plib::nocopyassignmove
{
public:
terms_for_net_t();
terms_for_net_t(analog_net_t * net = nullptr);
void clear();
void add(terminal_t *term, int net_other, bool sorted);
void add_terminal(terminal_t *term, int net_other, bool sorted);
std::size_t count() const { return m_terms.size(); }
terminal_t **terms() { return m_terms.data(); }
nl_double getV() const { return m_net->Q_Analog(); }
void setV(nl_double v) { m_net->set_Q_Analog(v); }
bool isNet(const analog_net_t * net) const { return net == m_net; }
std::size_t m_railstart;
std::vector<unsigned> m_nz; /* all non zero for multiplication */
@ -122,6 +138,7 @@ namespace devices
std::vector<int> m_connected_net_idx;
private:
analog_net_t * m_net;
std::vector<terminal_t *> m_terms;
};
@ -172,7 +189,7 @@ namespace devices
NETLIB_RESETI();
public:
int get_net_idx(detail::net_t *net);
int get_net_idx(const analog_net_t *net) const;
std::pair<int, int> get_left_right_of_diag(std::size_t row, std::size_t diag);
double get_weight_around_diag(std::size_t row, std::size_t diag);
@ -215,7 +232,7 @@ namespace devices
void set_pointers()
{
const std::size_t iN = this->m_nets.size();
const std::size_t iN = this->m_terms.size();
std::size_t max_count = 0;
std::size_t max_rail = 0;
@ -287,6 +304,52 @@ namespace devices
}
template <typename T, typename M>
void log_fill(const T &fill, M &mat)
{
/* FIXME: move this to the cr matrix class and use computed
* parallel ordering once it makes sense.
*/
const std::size_t iN = fill.size();
std::vector<unsigned> levL(iN, 0);
std::vector<unsigned> levU(iN, 0);
// parallel scheme for L x = y
for (std::size_t k = 0; k < iN; k++)
{
unsigned lm=0;
for (std::size_t j = 0; j<k; j++)
if (fill[k][j] < M::FILL_INFINITY)
lm = std::max(lm, levL[j]);
levL[k] = 1+lm;
}
// parallel scheme for U x = y
for (std::size_t k = iN; k-- > 0; )
{
unsigned lm=0;
for (std::size_t j = iN; --j > k; )
if (fill[k][j] < M::FILL_INFINITY)
lm = std::max(lm, levU[j]);
levU[k] = 1+lm;
}
for (std::size_t k = 0; k < iN; k++)
{
unsigned fm = 0;
pstring ml = "";
for (std::size_t j = 0; j < iN; j++)
{
ml += fill[k][j] == 0 ? 'X' : fill[k][j] < M::FILL_INFINITY ? '+' : '.';
if (fill[k][j] < M::FILL_INFINITY)
if (fill[k][j] > fm)
fm = fill[k][j];
}
this->log().verbose("{1:4} {2} {3:4} {4:4} {5:4} {6:4}", k, ml, levL[k], levU[k], mat.get_parallel_level(k), fm);
}
}
template <typename T>
using aligned_alloc = plib::aligned_allocator<T, PALIGN_VECTOROPT>;
@ -296,10 +359,8 @@ namespace devices
plib::pmatrix2d<nl_double *, aligned_alloc<nl_double *>> m_mat_ptr;
plib::pmatrix2d<nl_double *, aligned_alloc<nl_double *>> m_connected_net_Vn;
plib::pmatrix2d<nl_double> m_test;
std::vector<plib::unique_ptr<terms_for_net_t>> m_terms;
std::vector<analog_net_t *> m_nets;
std::vector<pool_owned_ptr<proxied_analog_output_t>> m_inps;
std::vector<plib::unique_ptr<terms_for_net_t>> m_rails_temp;
@ -340,7 +401,7 @@ namespace devices
const std::size_t iN = this->m_terms.size();
typename std::decay<decltype(V[0])>::type cerr = 0;
for (std::size_t i = 0; i < iN; i++)
cerr = std::max(cerr, std::abs(V[i] - this->m_nets[i]->Q_Analog()));
cerr = std::max(cerr, std::abs(V[i] - this->m_terms[i]->getV()));
return cerr;
}
@ -349,7 +410,7 @@ namespace devices
{
const std::size_t iN = this->m_terms.size();
for (std::size_t i = 0; i < iN; i++)
this->m_nets[i]->set_Q_Analog(V[i]);
this->m_terms[i]->setV(V[i]);
}
template <typename T>

View File

@ -10,8 +10,8 @@
#include "nld_matrix_solver.h"
#include "nld_solver.h"
#include "plib/mat_cr.h"
#include "plib/vector_ops.h"
#include "plib/parray.h"
#include <algorithm>
#include <cmath>

View File

@ -56,9 +56,7 @@ namespace devices
using mat_index_type = typename plib::matrix_compressed_rows_t<FT, SIZE>::index_type;
void csc_private(plib::putf8_fmt_writer &strm);
using extsolver = void (*)(double * m_A, double * RHS, double * V);
void generate_code(plib::putf8_fmt_writer &strm);
pstring static_compile_name();
@ -68,7 +66,6 @@ namespace devices
mat_type mat;
//extsolver m_proc;
plib::dynproc<void, double * , double * , double * > m_proc;
};
@ -77,16 +74,6 @@ namespace devices
// matrix_solver - GCR
// ----------------------------------------------------------------------------------------
// FIXME: namespace or static class member
template <typename V>
std::size_t inline get_level(const V &v, std::size_t k)
{
for (std::size_t i = 0; i < v.size(); i++)
if (plib::container::contains(v[i], k))
return i;
throw plib::pexception("Error in get_level");
}
template <typename FT, int SIZE>
void matrix_solver_GCR_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{
@ -113,48 +100,7 @@ namespace devices
auto gr = mat.gaussian_extend_fill_mat(fill);
/* FIXME: move this to the cr matrix class and use computed
* parallel ordering once it makes sense.
*/
std::vector<unsigned> levL(iN, 0);
std::vector<unsigned> levU(iN, 0);
// parallel scheme for L x = y
for (std::size_t k = 0; k < iN; k++)
{
unsigned lm=0;
for (std::size_t j = 0; j<k; j++)
if (fill[k][j] < decltype(mat)::FILL_INFINITY)
lm = std::max(lm, levL[j]);
levL[k] = 1+lm;
}
// parallel scheme for U x = y
for (std::size_t k = iN; k-- > 0; )
{
unsigned lm=0;
for (std::size_t j = iN; --j > k; )
if (fill[k][j] < decltype(mat)::FILL_INFINITY)
lm = std::max(lm, levU[j]);
levU[k] = 1+lm;
}
for (std::size_t k = 0; k < iN; k++)
{
unsigned fm = 0;
pstring ml = "";
for (std::size_t j = 0; j < iN; j++)
{
ml += fill[k][j] == 0 ? 'X' : fill[k][j] < decltype(mat)::FILL_INFINITY ? '+' : '.';
if (fill[k][j] < decltype(mat)::FILL_INFINITY)
if (fill[k][j] > fm)
fm = fill[k][j];
}
this->log().verbose("{1:4} {2} {3:4} {4:4} {5:4} {6:4}", k, ml, levL[k], levU[k], get_level(mat.m_ge_par, k), fm);
}
log_fill(fill, mat);
mat.build_from_fill_mat(fill);
@ -197,7 +143,7 @@ namespace devices
}
template <typename FT, int SIZE>
void matrix_solver_GCR_t<FT, SIZE>::csc_private(plib::putf8_fmt_writer &strm)
void matrix_solver_GCR_t<FT, SIZE>::generate_code(plib::putf8_fmt_writer &strm)
{
const std::size_t iN = N();
@ -265,7 +211,7 @@ namespace devices
std::stringstream t;
t.imbue(std::locale::classic());
plib::putf8_fmt_writer w(&t);
csc_private(w);
generate_code(w);
std::hash<typename std::remove_const<std::remove_reference<decltype(t.str())>::type>::type> h;
return plib::pfmt("nl_gcr_{1:x}_{2}")(h( t.str() ))(mat.nz_num);
@ -281,7 +227,7 @@ namespace devices
strm.writeline(plib::pfmt("extern \"C\" void {1}(double * __restrict m_A, double * __restrict RHS, double * __restrict V)\n")(name));
strm.writeline("{\n");
csc_private(strm);
generate_code(strm);
strm.writeline("}\n");
// some compilers (_WIN32, _WIN64, mac osx) need an explicit cast
return std::pair<pstring, pstring>(name, pstring(t.str()));

View File

@ -80,6 +80,7 @@ namespace devices
}
m_ops.build(fill);
this->log_fill(fill, m_ops.m_mat);
/* build pointers into the compressed row format matrix for each terminal */
@ -115,7 +116,7 @@ namespace devices
for (std::size_t k = 0; k < iN; k++)
{
this->m_new_V[k] = this->m_nets[k]->Q_Analog();
this->m_new_V[k] = this->m_terms[k]->getV();
}
const float_type accuracy = this->m_params.m_accuracy;

View File

@ -90,7 +90,7 @@ unsigned matrix_solver_SOR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_rap
const float_type * const Idr = this->m_Idrn[k];
auto other_cur_analog = this->m_connected_net_Vn[k];
this->m_new_V[k] = this->m_nets[k]->Q_Analog();
this->m_new_V[k] = this->m_terms[k]->getV();
for (std::size_t i = 0; i < term_count; i++)
{

View File

@ -166,7 +166,7 @@ namespace devices
#endif
for (std::size_t k = 0; k < iN; k++)
this->m_new_V[k] = this->m_nets[k]->Q_Analog();
this->m_new_V[k] = this->m_terms[k]->getV();
do {
resched = false;

View File

@ -119,50 +119,33 @@ namespace devices
template <typename FT, int SIZE>
plib::unique_ptr<matrix_solver_t> NETLIB_NAME(solver)::create_solver(std::size_t size, const pstring &solvername)
{
if (m_params.m_method() == "SOR_MAT")
switch (m_params.m_method())
{
return create_it<matrix_solver_SOR_mat_t<FT, SIZE>>(state(), solvername, m_params, size);
//typedef matrix_solver_SOR_mat_t<m_N,storage_N> solver_sor_mat;
//return plib::make_unique<solver_sor_mat>(state(), solvername, &m_params, size);
}
else if (m_params.m_method() == "MAT_CR")
{
if (size > 0) // GCR always outperforms MAT solver
{
return create_it<matrix_solver_GCR_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else
{
case matrix_type_e::MAT_CR:
if (size > 0) // GCR always outperforms MAT solver
{
return create_it<matrix_solver_GCR_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else
{
return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
}
case matrix_type_e::SOR_MAT:
return create_it<matrix_solver_SOR_mat_t<FT, SIZE>>(state(), solvername, m_params, size);
case matrix_type_e::MAT:
return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
}
}
else if (m_params.m_method() == "MAT")
{
return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else if (m_params.m_method() == "SM")
{
/* Sherman-Morrison Formula */
return create_it<matrix_solver_sm_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else if (m_params.m_method() == "W")
{
/* Woodbury Formula */
return create_it<matrix_solver_w_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else if (m_params.m_method() == "SOR")
{
return create_it<matrix_solver_SOR_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else if (m_params.m_method() == "GMRES")
{
return create_it<matrix_solver_GMRES_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else
{
log().fatal(MF_UNKNOWN_SOLVER_TYPE(m_params.m_method()));
return plib::unique_ptr<matrix_solver_t>();
case matrix_type_e::SM:
/* Sherman-Morrison Formula */
return create_it<matrix_solver_sm_t<FT, SIZE>>(state(), solvername, m_params, size);
case matrix_type_e::W:
/* Woodbury Formula */
return create_it<matrix_solver_w_t<FT, SIZE>>(state(), solvername, m_params, size);
case matrix_type_e::SOR:
return create_it<matrix_solver_SOR_t<FT, SIZE>>(state(), solvername, m_params, size);
case matrix_type_e::GMRES:
return create_it<matrix_solver_GMRES_t<FT, SIZE>>(state(), solvername, m_params, size);
}
return plib::unique_ptr<matrix_solver_t>();
}
struct net_splitter

View File

@ -375,15 +375,15 @@ NETLIST_START(kidniki)
PARAM(Solver.DYNAMIC_LTE, 5e-4)
PARAM(Solver.DYNAMIC_MIN_TIMESTEP, 20e-6)
PARAM(Solver.SORT_TYPE, "PREFER_IDENTITY_TOP_LEFT")
//PARAM(Solver.SORT_TYPE, "PREFER_BAND_MATRIX")
#else
SOLVER(Solver, 24000)
//PARAM(Solver.SORT_TYPE, "PREFER_BAND_MATRIX")
SOLVER(Solver, 48000)
PARAM(Solver.ACCURACY, 1e-7)
PARAM(Solver.NR_LOOPS, 10000)
PARAM(Solver.GS_LOOPS, 300000)
PARAM(Solver.GS_LOOPS, 100)
//PARAM(Solver.METHOD, "MAT_CR")
PARAM(Solver.METHOD, "GMRES")
//PARAM(Solver.SOR_FACTOR, 1.73)
//PARAM(Solver.METHOD, "SOR")
#endif
#if (USE_FRONTIERS)