mirror of
https://github.com/holub/mame
synced 2025-04-21 07:52:35 +03:00
netlist: code maintenance. (nw)
- some readability improvements - some simplifications - kidniki uses frontiers again (speed improvement)
This commit is contained in:
parent
67621f11d9
commit
161cc143a5
@ -125,7 +125,7 @@ namespace netlist
|
||||
|
||||
// nld_solver.cpp
|
||||
|
||||
PERRMSGV(MF_UNKNOWN_SOLVER_TYPE, 1, "Unknown solver type: {1}")
|
||||
//PERRMSGV(MF_UNKNOWN_SOLVER_TYPE, 1, "Unknown solver type: {1}")
|
||||
PERRMSGV(MF_NETGROUP_SIZE_EXCEEDED_1, 1, "Encountered netgroup with > {1} nets")
|
||||
|
||||
PERRMSGV(MI_NO_SPECIFIC_SOLVER, 1, "No specific solver found for netlist of size {1}")
|
||||
|
@ -72,7 +72,7 @@ namespace plib
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
void solve_LU_inplace(V &v)
|
||||
void solve_inplace(V &v)
|
||||
{
|
||||
m_LU.solveLU(v);
|
||||
}
|
||||
@ -91,6 +91,7 @@ namespace plib
|
||||
mat_precondition_diag(std::size_t size, int dummy = 0)
|
||||
: m_mat(size)
|
||||
, m_diag(size)
|
||||
, nzcol(size)
|
||||
{
|
||||
plib::unused_var(dummy);
|
||||
}
|
||||
@ -99,6 +100,18 @@ namespace plib
|
||||
void build(M &fill)
|
||||
{
|
||||
m_mat.build_from_fill_mat(fill, 0);
|
||||
for (std::size_t i = 0; i< m_diag.size(); i++)
|
||||
{
|
||||
for (std::size_t j = 0; j< m_diag.size(); j++)
|
||||
{
|
||||
std::size_t k=m_mat.row_idx[j];
|
||||
while (m_mat.col_idx[k] < i && k < m_mat.row_idx[j+1])
|
||||
k++;
|
||||
if (m_mat.col_idx[k] == i && k < m_mat.row_idx[j+1])
|
||||
nzcol[i].push_back(k);
|
||||
}
|
||||
nzcol[i].push_back(static_cast<std::size_t>(-1));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename R, typename V>
|
||||
@ -111,12 +124,51 @@ namespace plib
|
||||
{
|
||||
for (std::size_t i = 0; i< m_diag.size(); i++)
|
||||
{
|
||||
m_diag[i] = 1.0 / m_mat.A[m_mat.diag[i]];
|
||||
// ILUT: 265%
|
||||
FT v(0.0);
|
||||
#if 0
|
||||
// doesn't works, Mame perforamnce drops significantly%
|
||||
// 136%
|
||||
for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++)
|
||||
v += m_mat.A[j] * m_mat.A[j];
|
||||
m_diag[i] = 1.0 / std::sqrt(v);
|
||||
#elif 0
|
||||
// works halfway, i.e. Mame perforamnce 50%
|
||||
// 147% - lowest average solution time with 7.094
|
||||
for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++)
|
||||
v += m_mat.A[j] * m_mat.A[j];
|
||||
m_diag[i] = m_mat.A[m_mat.diag[i]] / v;
|
||||
#elif 0
|
||||
// works halfway, i.e. Mame perforamnce 50%
|
||||
// sum over column i
|
||||
// 344% - lowest average solution time with 3.06
|
||||
std::size_t nzcolp = 0;
|
||||
const auto &nz = nzcol[i];
|
||||
std::size_t j;
|
||||
|
||||
while ((j = nz[nzcolp++])!=static_cast<std::size_t>(-1)) // NOLINT(bugprone-infinite-loop)
|
||||
{
|
||||
v += m_mat.A[j] * m_mat.A[j];
|
||||
}
|
||||
m_diag[i] = m_mat.A[m_mat.diag[i]] / v;
|
||||
#elif 0
|
||||
// works halfway, i.e. Mame perforamnce 50%
|
||||
// 151%
|
||||
for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++)
|
||||
v += std::abs(m_mat.A[j]);
|
||||
m_diag[i] = 1.0 / v;
|
||||
#else
|
||||
// 124%
|
||||
for (std::size_t j = m_mat.row_idx[i]; j< m_mat.row_idx[i+1]; j++)
|
||||
v = std::max(v, std::abs(m_mat.A[j]));
|
||||
m_diag[i] = 1.0 / v;
|
||||
#endif
|
||||
//m_diag[i] = 1.0 / m_mat.A[m_mat.diag[i]];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
void solve_LU_inplace(V &v)
|
||||
void solve_inplace(V &v)
|
||||
{
|
||||
for (std::size_t i = 0; i< m_diag.size(); i++)
|
||||
v[i] = v[i] * m_diag[i];
|
||||
@ -124,6 +176,7 @@ namespace plib
|
||||
|
||||
plib::matrix_compressed_rows_t<FT, SIZE> m_mat;
|
||||
plib::parray<FT, SIZE> m_diag;
|
||||
plib::parray<std::vector<std::size_t>, SIZE > nzcol;
|
||||
};
|
||||
|
||||
template <typename FT, int SIZE>
|
||||
@ -152,7 +205,7 @@ namespace plib
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
void solve_LU_inplace(V &v)
|
||||
void solve_inplace(V &v)
|
||||
{
|
||||
plib::unused_var(v);
|
||||
}
|
||||
@ -163,7 +216,7 @@ namespace plib
|
||||
/* FIXME: hardcoding RESTART to 20 becomes an issue on very large
|
||||
* systems.
|
||||
*/
|
||||
template <typename FT, int SIZE, int RESTART = 20>
|
||||
template <typename FT, int SIZE, int RESTART = 80>
|
||||
struct gmres_t
|
||||
{
|
||||
public:
|
||||
@ -191,18 +244,18 @@ namespace plib
|
||||
std::size_t size() const { return (SIZE<=0) ? m_size : static_cast<std::size_t>(SIZE); }
|
||||
|
||||
template <int k, typename OPS, typename VT>
|
||||
bool do_k(OPS &ops, VT &x, FT rho_delta, bool dummy)
|
||||
bool do_k(OPS &ops, VT &x, std::size_t &itr_used, FT rho_delta, bool dummy)
|
||||
{
|
||||
plib::unused_var(dummy);
|
||||
//printf("%d\n", k);
|
||||
if (do_k<k-1, OPS>(ops, x, rho_delta, do_khelper<k-1>::value))
|
||||
if (do_k<k-1, OPS>(ops, x, itr_used, rho_delta, do_khelper<k-1>::value))
|
||||
return true;
|
||||
|
||||
const std::size_t kp1 = k + 1;
|
||||
const std::size_t n = size();
|
||||
|
||||
ops.calc_rhs(m_v[kp1], m_v[k]);
|
||||
ops.solve_LU_inplace(m_v[kp1]);
|
||||
ops.solve_inplace(m_v[kp1]);
|
||||
|
||||
for (std::size_t j = 0; j <= k; j++)
|
||||
{
|
||||
@ -229,7 +282,7 @@ namespace plib
|
||||
FT rho = std::abs(m_g[kp1]);
|
||||
|
||||
// FIXME ..
|
||||
//itr_used = itr_used + 1;
|
||||
itr_used = itr_used + 1;
|
||||
|
||||
if (rho <= rho_delta || k == RESTART-1)
|
||||
{
|
||||
@ -252,10 +305,9 @@ namespace plib
|
||||
}
|
||||
|
||||
template <int k, typename OPS, typename VT>
|
||||
bool do_k(OPS &ops, VT &x, FT rho_delta, float dummy)
|
||||
bool do_k(OPS &ops, VT &x, std::size_t &itr_used, FT rho_delta, float dummy)
|
||||
{
|
||||
plib::unused_var(ops, x, rho_delta, dummy);
|
||||
//printf("here\n");
|
||||
plib::unused_var(ops, x, itr_used, rho_delta, dummy);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -307,7 +359,7 @@ namespace plib
|
||||
vec_set_scalar(n, residual, accuracy);
|
||||
ops.calc_rhs(Ax, residual);
|
||||
|
||||
ops.solve_LU_inplace(Ax);
|
||||
ops.solve_inplace(Ax);
|
||||
|
||||
const float_type rho_to_accuracy = std::sqrt(vec_mult2<FT>(n, Ax)) / accuracy;
|
||||
|
||||
@ -320,7 +372,7 @@ namespace plib
|
||||
* Using
|
||||
*
|
||||
* vec_set(n, x, rhs);
|
||||
* ops.solve_LU_inplace(x);
|
||||
* ops.solve_inplace(x);
|
||||
*
|
||||
* to get a starting point for x degrades convergence speed compared
|
||||
* to using the last solution for x.
|
||||
@ -337,7 +389,7 @@ namespace plib
|
||||
|
||||
vec_sub(n, residual, rhs, Ax);
|
||||
|
||||
ops.solve_LU_inplace(residual);
|
||||
ops.solve_inplace(residual);
|
||||
|
||||
rho = std::sqrt(vec_mult2<FT>(n, residual));
|
||||
|
||||
@ -353,7 +405,7 @@ namespace plib
|
||||
|
||||
vec_mult_scalar(n, m_v[0], residual, constants<FT>::one() / rho);
|
||||
|
||||
if (do_k<RESTART-1>(ops, x, rho_delta, true))
|
||||
if (do_k<RESTART-1>(ops, x, itr_used, rho_delta, true))
|
||||
// converged
|
||||
break;
|
||||
}
|
||||
@ -446,7 +498,7 @@ namespace plib
|
||||
|
||||
for (int i = 0; i < iter_max; i++)
|
||||
{
|
||||
ops.solve_LU_inplace(residual);
|
||||
ops.solve_inplace(residual);
|
||||
if (i==0)
|
||||
{
|
||||
vec_set(size(), p, residual);
|
||||
|
@ -207,6 +207,7 @@ namespace plib
|
||||
// proceed to column i
|
||||
|
||||
std::size_t pj = row_idx[j];
|
||||
std::size_t pje = row_idx[j+1];
|
||||
|
||||
while (col_idx[pj] < i)
|
||||
pj++;
|
||||
@ -216,7 +217,7 @@ namespace plib
|
||||
// subtract row i from j
|
||||
// fill-in available assumed, i.e. matrix was prepared
|
||||
|
||||
for (std::size_t pii = pi; pii<piie; pii++)
|
||||
for (std::size_t pii = pi; pii<piie && pj < pje; pii++)
|
||||
{
|
||||
while (col_idx[pj] < col_idx[pii])
|
||||
pj++;
|
||||
@ -229,6 +230,14 @@ namespace plib
|
||||
}
|
||||
}
|
||||
|
||||
int get_parallel_level(std::size_t k) const
|
||||
{
|
||||
for (std::size_t i = 0; i < m_ge_par.size(); i++)
|
||||
if (plib::container::contains( m_ge_par[i], k))
|
||||
return static_cast<int>(i);
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <typename V>
|
||||
void gaussian_elimination_parallel(V & RHS)
|
||||
{
|
||||
@ -314,25 +323,17 @@ namespace plib
|
||||
* res = A * x
|
||||
*/
|
||||
#if 0
|
||||
parray<value_type, N < 0 ? -N * N : N *N> xi(m_size*m_size);
|
||||
//std::vector<value_type> xi(m_size*m_size);
|
||||
|
||||
plib::omp::for_static(0, constants<index_type>::zero(), row_idx[m_size], [this, &x, &xi](index_type k)
|
||||
{
|
||||
xi[k] = x[col_idx[k]];
|
||||
});
|
||||
#endif
|
||||
#if 1
|
||||
//plib::omp::set_num_threads(4);
|
||||
plib::omp::for_static(0, constants<index_type>::zero(), m_size, [this, &res, &x](index_type row)
|
||||
{
|
||||
T tmp = 0.0;
|
||||
const index_type e = row_idx[row+1];
|
||||
T tmp(0.0);
|
||||
const index_type e(row_idx[row+1]);
|
||||
for (index_type k = row_idx[row]; k < e; k++)
|
||||
tmp += A[k] * x[col_idx[k]];
|
||||
res[row] = tmp;
|
||||
});
|
||||
#else
|
||||
// this is a bit faster than the version above
|
||||
std::size_t row = 0;
|
||||
std::size_t k = 0;
|
||||
const std::size_t oe = nz_num;
|
||||
@ -373,10 +374,10 @@ namespace plib
|
||||
template <typename LUMAT>
|
||||
void reduction_copy_from(LUMAT & src)
|
||||
{
|
||||
C sp = 0;
|
||||
for (std::size_t r=0; r<src.size(); r++)
|
||||
C sp(0);
|
||||
for (index_type r=0; r<src.size(); r++)
|
||||
{
|
||||
C dp = row_idx[r];
|
||||
C dp(row_idx[r]);
|
||||
while(sp < src.row_idx[r+1])
|
||||
{
|
||||
/* advance dp to source column and fill 0s if necessary */
|
||||
@ -397,7 +398,7 @@ namespace plib
|
||||
template <typename LUMAT>
|
||||
void raw_copy_from(LUMAT & src)
|
||||
{
|
||||
for (std::size_t k = 0; k < nz_num; k++)
|
||||
for (index_type k = 0; k < nz_num; k++)
|
||||
A[k] = src.A[k];
|
||||
}
|
||||
|
||||
@ -422,32 +423,32 @@ namespace plib
|
||||
*/
|
||||
|
||||
index_type p(0);
|
||||
while (std::size_t i = ilu_rows[p++]) // NOLINT(bugprone-infinite-loop)
|
||||
while (auto i = ilu_rows[p++]) // NOLINT(bugprone-infinite-loop)
|
||||
{
|
||||
const std::size_t p_i_end = row_idx[i + 1];
|
||||
const auto p_i_end = row_idx[i + 1];
|
||||
// loop over all columns k left of diag in row i
|
||||
//if (row_idx[i] < diag[i])
|
||||
// printf("occ %d\n", (int)i);
|
||||
for (std::size_t i_k = row_idx[i]; i_k < diag[i]; i_k++)
|
||||
for (auto i_k = row_idx[i]; i_k < diag[i]; i_k++)
|
||||
{
|
||||
const std::size_t k = col_idx[i_k];
|
||||
const std::size_t p_k_end = row_idx[k + 1];
|
||||
const auto k(col_idx[i_k]);
|
||||
const auto p_k_end(row_idx[k + 1]);
|
||||
const T LUp_i_k = A[i_k] = A[i_k] / A[diag[k]];
|
||||
|
||||
std::size_t k_j = diag[k] + 1;
|
||||
std::size_t i_j = i_k + 1;
|
||||
auto k_j(diag[k] + 1);
|
||||
auto i_j(i_k + 1);
|
||||
|
||||
while (i_j < p_i_end && k_j < p_k_end ) // pj = (i, j)
|
||||
{
|
||||
// we can assume that within a row ja increases continuously */
|
||||
const std::size_t c_i_j = col_idx[i_j]; // row i, column j
|
||||
const std::size_t c_k_j = col_idx[k_j]; // row k, column j
|
||||
if (c_k_j < c_i_j)
|
||||
k_j++;
|
||||
else if (c_k_j == c_i_j)
|
||||
A[i_j++] -= LUp_i_k * A[k_j++];
|
||||
else
|
||||
i_j++;
|
||||
const auto c_i_j(col_idx[i_j]); // row i, column j
|
||||
const auto c_k_j(col_idx[k_j]); // row k, column j
|
||||
|
||||
if (c_k_j == c_i_j)
|
||||
A[i_j] -= LUp_i_k * A[k_j];
|
||||
k_j += (c_k_j <= c_i_j ? 1 : 0);
|
||||
i_j += (c_k_j >= c_i_j ? 1 : 0);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -477,23 +478,23 @@ namespace plib
|
||||
* This can be solved for x using backwards elimination in U.
|
||||
*
|
||||
*/
|
||||
for (std::size_t i = 1; i < size(); ++i )
|
||||
for (index_type i = 1; i < size(); ++i )
|
||||
{
|
||||
T tmp = 0.0;
|
||||
const std::size_t j1 = row_idx[i];
|
||||
const std::size_t j2 = diag[i];
|
||||
const auto j1(row_idx[i]);
|
||||
const auto j2(diag[i]);
|
||||
|
||||
for (std::size_t j = j1; j < j2; ++j )
|
||||
for (auto j = j1; j < j2; ++j )
|
||||
tmp += A[j] * r[col_idx[j]];
|
||||
r[i] -= tmp;
|
||||
}
|
||||
// i now is equal to n;
|
||||
for (std::size_t i = size(); i-- > 0; )
|
||||
for (auto i = size(); i-- > 0; )
|
||||
{
|
||||
T tmp = 0.0;
|
||||
const std::size_t di = diag[i];
|
||||
const std::size_t j2 = row_idx[i+1];
|
||||
for (std::size_t j = di + 1; j < j2; j++ )
|
||||
const auto di(diag[i]);
|
||||
const auto j2(row_idx[i+1]);
|
||||
for (auto j = di + 1; j < j2; j++ )
|
||||
tmp += A[j] * r[col_idx[j]];
|
||||
r[i] = (r[i] - tmp) / A[di];
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ public:
|
||||
//return m_sym(args...);
|
||||
}
|
||||
|
||||
bool resolved() { return m_sym != nullptr; }
|
||||
bool resolved() const { return m_sym != nullptr; }
|
||||
private:
|
||||
calltype m_sym;
|
||||
};
|
||||
|
@ -242,18 +242,21 @@ namespace plib
|
||||
ename (E v) : m_v(v) { } \
|
||||
template <typename T> explicit ename(T val) { m_v = static_cast<E>(val); } \
|
||||
bool set_from_string (const pstring &s) { \
|
||||
static const pstring strings = # __VA_ARGS__; \
|
||||
int f = from_string_int(strings, s); \
|
||||
int f = from_string_int(strings(), s); \
|
||||
if (f>=0) { m_v = static_cast<E>(f); return true; } else { return false; } \
|
||||
} \
|
||||
operator E() const {return m_v;} \
|
||||
bool operator==(const ename &rhs) const {return m_v == rhs.m_v;} \
|
||||
bool operator==(const E &rhs) const {return m_v == rhs;} \
|
||||
std::string name() const { \
|
||||
static const pstring strings = # __VA_ARGS__; \
|
||||
return nthstr(static_cast<int>(m_v), strings); \
|
||||
return nthstr(static_cast<int>(m_v), strings()); \
|
||||
} \
|
||||
private: E m_v; };
|
||||
private: E m_v; \
|
||||
static pstring strings() {\
|
||||
static const pstring lstrings = # __VA_ARGS__; \
|
||||
return lstrings; \
|
||||
} \
|
||||
};
|
||||
|
||||
|
||||
#endif /* PUTIL_H_ */
|
||||
|
@ -15,15 +15,16 @@ namespace netlist
|
||||
namespace devices
|
||||
{
|
||||
|
||||
terms_for_net_t::terms_for_net_t()
|
||||
terms_for_net_t::terms_for_net_t(analog_net_t * net)
|
||||
: m_railstart(0)
|
||||
, m_last_V(0.0)
|
||||
, m_DD_n_m_1(0.0)
|
||||
, m_h_n_m_1(1e-12)
|
||||
, m_net(net)
|
||||
{
|
||||
}
|
||||
|
||||
void terms_for_net_t::add(terminal_t *term, int net_other, bool sorted)
|
||||
void terms_for_net_t::add_terminal(terminal_t *term, int net_other, bool sorted)
|
||||
{
|
||||
if (sorted)
|
||||
for (std::size_t i=0; i < m_connected_net_idx.size(); i++)
|
||||
@ -65,13 +66,11 @@ namespace devices
|
||||
|
||||
log().debug("New solver setup\n");
|
||||
|
||||
m_nets.clear();
|
||||
m_terms.clear();
|
||||
|
||||
for (auto & net : nets)
|
||||
{
|
||||
m_nets.push_back(net);
|
||||
m_terms.push_back(plib::make_unique<terms_for_net_t>());
|
||||
m_terms.push_back(plib::make_unique<terms_for_net_t>(net));
|
||||
m_rails_temp.push_back(plib::make_unique<terms_for_net_t>());
|
||||
}
|
||||
|
||||
@ -159,7 +158,7 @@ namespace devices
|
||||
*
|
||||
*/
|
||||
|
||||
const std::size_t iN = m_nets.size();
|
||||
const std::size_t iN = m_terms.size();
|
||||
|
||||
switch (sort)
|
||||
{
|
||||
@ -174,7 +173,6 @@ namespace devices
|
||||
if (pi < pk)
|
||||
{
|
||||
std::swap(m_terms[i], m_terms[k]);
|
||||
std::swap(m_nets[i], m_nets[k]);
|
||||
pk = get_weight_around_diag(k,k);
|
||||
}
|
||||
}
|
||||
@ -183,7 +181,7 @@ namespace devices
|
||||
break;
|
||||
case matrix_sort_type_e::PREFER_IDENTITY_TOP_LEFT:
|
||||
{
|
||||
for (std::size_t k = 0; k < iN - 1; k++)
|
||||
for (std::size_t k = 0; k < iN - 2; k++)
|
||||
{
|
||||
auto pk = get_left_right_of_diag(k,k);
|
||||
for (std::size_t i = k+1; i < iN; i++)
|
||||
@ -192,7 +190,6 @@ namespace devices
|
||||
if (pi.first <= pk.first && pi.second >= pk.second)
|
||||
{
|
||||
std::swap(m_terms[i], m_terms[k]);
|
||||
std::swap(m_nets[i], m_nets[k]);
|
||||
pk = get_left_right_of_diag(k,k);
|
||||
}
|
||||
}
|
||||
@ -210,7 +207,6 @@ namespace devices
|
||||
if ((static_cast<int>(m_terms[k]->m_railstart) - static_cast<int>(m_terms[i]->m_railstart)) * sort_order < 0)
|
||||
{
|
||||
std::swap(m_terms[i], m_terms[k]);
|
||||
std::swap(m_nets[i], m_nets[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -231,13 +227,13 @@ namespace devices
|
||||
|
||||
void matrix_solver_t::setup_matrix()
|
||||
{
|
||||
const std::size_t iN = m_nets.size();
|
||||
const std::size_t iN = m_terms.size();
|
||||
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
m_terms[k]->m_railstart = m_terms[k]->count();
|
||||
for (std::size_t i = 0; i < m_rails_temp[k]->count(); i++)
|
||||
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->m_connected_net_idx.data()[i], false);
|
||||
this->m_terms[k]->add_terminal(m_rails_temp[k]->terms()[i], m_rails_temp[k]->m_connected_net_idx.data()[i], false);
|
||||
}
|
||||
|
||||
// free all - no longer needed
|
||||
@ -464,10 +460,10 @@ namespace devices
|
||||
return next_time_step;
|
||||
}
|
||||
|
||||
int matrix_solver_t::get_net_idx(detail::net_t *net)
|
||||
int matrix_solver_t::get_net_idx(const analog_net_t *net) const
|
||||
{
|
||||
for (std::size_t k = 0; k < m_nets.size(); k++)
|
||||
if (m_nets[k] == net)
|
||||
for (std::size_t k = 0; k < m_terms.size(); k++)
|
||||
if (m_terms[k]->isNet(net))
|
||||
return static_cast<int>(k);
|
||||
return -1;
|
||||
}
|
||||
@ -539,19 +535,19 @@ namespace devices
|
||||
{
|
||||
if (term->connected_terminal()->net().isRailNet())
|
||||
{
|
||||
m_rails_temp[k]->add(term, -1, false);
|
||||
m_rails_temp[k]->add_terminal(term, -1, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
int ot = get_net_idx(&term->connected_terminal()->net());
|
||||
if (ot>=0)
|
||||
{
|
||||
m_terms[k]->add(term, ot, true);
|
||||
m_terms[k]->add_terminal(term, ot, true);
|
||||
}
|
||||
/* Should this be allowed ? */
|
||||
else // if (ot<0)
|
||||
{
|
||||
m_rails_temp[k]->add(term, ot, true);
|
||||
m_rails_temp[k]->add_terminal(term, ot, true);
|
||||
log().fatal(MF_FOUND_TERM_WITH_MISSING_OTHERNET(term->name()));
|
||||
}
|
||||
}
|
||||
@ -565,12 +561,11 @@ namespace devices
|
||||
{
|
||||
for (std::size_t k = 0, iN=m_terms.size(); k < iN; k++)
|
||||
{
|
||||
analog_net_t *n = m_nets[k];
|
||||
terms_for_net_t *t = m_terms[k].get();
|
||||
|
||||
//const nl_double DD_n = (n->Q_Analog() - t->m_last_V);
|
||||
// avoid floating point exceptions
|
||||
const nl_double DD_n = std::max(-1e100, std::min(1e100,(n->Q_Analog() - t->m_last_V)));
|
||||
const nl_double DD_n = std::max(-1e100, std::min(1e100,(t->getV() - t->m_last_V)));
|
||||
const nl_double hn = cur_ts;
|
||||
|
||||
//printf("%g %g %g %g\n", DD_n, hn, t->m_DD_n_m_1, t->m_h_n_m_1);
|
||||
@ -587,7 +582,7 @@ namespace devices
|
||||
if (new_net_timestep < new_solver_timestep)
|
||||
new_solver_timestep = new_net_timestep;
|
||||
|
||||
t->m_last_V = n->Q_Analog();
|
||||
t->m_last_V = t->getV();
|
||||
}
|
||||
if (new_solver_timestep < m_params.m_min_timestep)
|
||||
{
|
||||
@ -608,7 +603,7 @@ namespace devices
|
||||
{
|
||||
log().verbose("==============================================");
|
||||
log().verbose("Solver {1}", this->name());
|
||||
log().verbose(" ==> {1} nets", this->m_nets.size()); //, (*(*groups[i].first())->m_core_terms.first())->name());
|
||||
log().verbose(" ==> {1} nets", this->m_terms.size()); //, (*(*groups[i].first())->m_core_terms.first())->name());
|
||||
log().verbose(" has {1} elements", this->has_dynamic_devices() ? "dynamic" : "no dynamic");
|
||||
log().verbose(" has {1} elements", this->has_timestep_devices() ? "timestep" : "no timestep");
|
||||
log().verbose(" {1:6.3} average newton raphson loops",
|
||||
|
@ -29,6 +29,16 @@ namespace devices
|
||||
PREFER_BAND_MATRIX
|
||||
)
|
||||
|
||||
P_ENUM(matrix_type_e,
|
||||
SOR_MAT,
|
||||
MAT_CR,
|
||||
MAT,
|
||||
SM,
|
||||
W,
|
||||
SOR,
|
||||
GMRES
|
||||
)
|
||||
|
||||
struct solver_parameters_t
|
||||
{
|
||||
solver_parameters_t(device_t &parent)
|
||||
@ -36,7 +46,7 @@ namespace devices
|
||||
|
||||
/* iteration parameters */
|
||||
, m_gs_sor(parent, "SOR_FACTOR", 1.059)
|
||||
, m_method(parent, "METHOD", "MAT_CR")
|
||||
, m_method(parent, "METHOD", matrix_type_e::MAT_CR)
|
||||
, m_accuracy(parent, "ACCURACY", 1e-7)
|
||||
, m_gs_loops(parent, "GS_LOOPS", 9) // Gauss-Seidel loops
|
||||
|
||||
@ -75,7 +85,7 @@ namespace devices
|
||||
|
||||
param_double_t m_freq;
|
||||
param_double_t m_gs_sor;
|
||||
param_str_t m_method;
|
||||
param_enum_t<matrix_type_e> m_method;
|
||||
param_double_t m_accuracy;
|
||||
param_num_t<std::size_t> m_gs_loops;
|
||||
param_double_t m_gmin;
|
||||
@ -99,16 +109,22 @@ namespace devices
|
||||
class terms_for_net_t : plib::nocopyassignmove
|
||||
{
|
||||
public:
|
||||
terms_for_net_t();
|
||||
terms_for_net_t(analog_net_t * net = nullptr);
|
||||
|
||||
void clear();
|
||||
|
||||
void add(terminal_t *term, int net_other, bool sorted);
|
||||
void add_terminal(terminal_t *term, int net_other, bool sorted);
|
||||
|
||||
std::size_t count() const { return m_terms.size(); }
|
||||
|
||||
terminal_t **terms() { return m_terms.data(); }
|
||||
|
||||
nl_double getV() const { return m_net->Q_Analog(); }
|
||||
|
||||
void setV(nl_double v) { m_net->set_Q_Analog(v); }
|
||||
|
||||
bool isNet(const analog_net_t * net) const { return net == m_net; }
|
||||
|
||||
std::size_t m_railstart;
|
||||
|
||||
std::vector<unsigned> m_nz; /* all non zero for multiplication */
|
||||
@ -122,6 +138,7 @@ namespace devices
|
||||
|
||||
std::vector<int> m_connected_net_idx;
|
||||
private:
|
||||
analog_net_t * m_net;
|
||||
std::vector<terminal_t *> m_terms;
|
||||
|
||||
};
|
||||
@ -172,7 +189,7 @@ namespace devices
|
||||
NETLIB_RESETI();
|
||||
|
||||
public:
|
||||
int get_net_idx(detail::net_t *net);
|
||||
int get_net_idx(const analog_net_t *net) const;
|
||||
std::pair<int, int> get_left_right_of_diag(std::size_t row, std::size_t diag);
|
||||
double get_weight_around_diag(std::size_t row, std::size_t diag);
|
||||
|
||||
@ -215,7 +232,7 @@ namespace devices
|
||||
|
||||
void set_pointers()
|
||||
{
|
||||
const std::size_t iN = this->m_nets.size();
|
||||
const std::size_t iN = this->m_terms.size();
|
||||
|
||||
std::size_t max_count = 0;
|
||||
std::size_t max_rail = 0;
|
||||
@ -287,6 +304,52 @@ namespace devices
|
||||
|
||||
}
|
||||
|
||||
template <typename T, typename M>
|
||||
void log_fill(const T &fill, M &mat)
|
||||
{
|
||||
/* FIXME: move this to the cr matrix class and use computed
|
||||
* parallel ordering once it makes sense.
|
||||
*/
|
||||
|
||||
const std::size_t iN = fill.size();
|
||||
|
||||
std::vector<unsigned> levL(iN, 0);
|
||||
std::vector<unsigned> levU(iN, 0);
|
||||
|
||||
// parallel scheme for L x = y
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
unsigned lm=0;
|
||||
for (std::size_t j = 0; j<k; j++)
|
||||
if (fill[k][j] < M::FILL_INFINITY)
|
||||
lm = std::max(lm, levL[j]);
|
||||
levL[k] = 1+lm;
|
||||
}
|
||||
|
||||
// parallel scheme for U x = y
|
||||
for (std::size_t k = iN; k-- > 0; )
|
||||
{
|
||||
unsigned lm=0;
|
||||
for (std::size_t j = iN; --j > k; )
|
||||
if (fill[k][j] < M::FILL_INFINITY)
|
||||
lm = std::max(lm, levU[j]);
|
||||
levU[k] = 1+lm;
|
||||
}
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
unsigned fm = 0;
|
||||
pstring ml = "";
|
||||
for (std::size_t j = 0; j < iN; j++)
|
||||
{
|
||||
ml += fill[k][j] == 0 ? 'X' : fill[k][j] < M::FILL_INFINITY ? '+' : '.';
|
||||
if (fill[k][j] < M::FILL_INFINITY)
|
||||
if (fill[k][j] > fm)
|
||||
fm = fill[k][j];
|
||||
}
|
||||
this->log().verbose("{1:4} {2} {3:4} {4:4} {5:4} {6:4}", k, ml, levL[k], levU[k], mat.get_parallel_level(k), fm);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using aligned_alloc = plib::aligned_allocator<T, PALIGN_VECTOROPT>;
|
||||
|
||||
@ -296,10 +359,8 @@ namespace devices
|
||||
plib::pmatrix2d<nl_double *, aligned_alloc<nl_double *>> m_mat_ptr;
|
||||
plib::pmatrix2d<nl_double *, aligned_alloc<nl_double *>> m_connected_net_Vn;
|
||||
|
||||
plib::pmatrix2d<nl_double> m_test;
|
||||
|
||||
std::vector<plib::unique_ptr<terms_for_net_t>> m_terms;
|
||||
std::vector<analog_net_t *> m_nets;
|
||||
|
||||
std::vector<pool_owned_ptr<proxied_analog_output_t>> m_inps;
|
||||
|
||||
std::vector<plib::unique_ptr<terms_for_net_t>> m_rails_temp;
|
||||
@ -340,7 +401,7 @@ namespace devices
|
||||
const std::size_t iN = this->m_terms.size();
|
||||
typename std::decay<decltype(V[0])>::type cerr = 0;
|
||||
for (std::size_t i = 0; i < iN; i++)
|
||||
cerr = std::max(cerr, std::abs(V[i] - this->m_nets[i]->Q_Analog()));
|
||||
cerr = std::max(cerr, std::abs(V[i] - this->m_terms[i]->getV()));
|
||||
return cerr;
|
||||
}
|
||||
|
||||
@ -349,7 +410,7 @@ namespace devices
|
||||
{
|
||||
const std::size_t iN = this->m_terms.size();
|
||||
for (std::size_t i = 0; i < iN; i++)
|
||||
this->m_nets[i]->set_Q_Analog(V[i]);
|
||||
this->m_terms[i]->setV(V[i]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -10,8 +10,8 @@
|
||||
|
||||
#include "nld_matrix_solver.h"
|
||||
#include "nld_solver.h"
|
||||
#include "plib/mat_cr.h"
|
||||
#include "plib/vector_ops.h"
|
||||
#include "plib/parray.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
@ -56,9 +56,7 @@ namespace devices
|
||||
|
||||
using mat_index_type = typename plib::matrix_compressed_rows_t<FT, SIZE>::index_type;
|
||||
|
||||
void csc_private(plib::putf8_fmt_writer &strm);
|
||||
|
||||
using extsolver = void (*)(double * m_A, double * RHS, double * V);
|
||||
void generate_code(plib::putf8_fmt_writer &strm);
|
||||
|
||||
pstring static_compile_name();
|
||||
|
||||
@ -68,7 +66,6 @@ namespace devices
|
||||
|
||||
mat_type mat;
|
||||
|
||||
//extsolver m_proc;
|
||||
plib::dynproc<void, double * , double * , double * > m_proc;
|
||||
|
||||
};
|
||||
@ -77,16 +74,6 @@ namespace devices
|
||||
// matrix_solver - GCR
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
// FIXME: namespace or static class member
|
||||
template <typename V>
|
||||
std::size_t inline get_level(const V &v, std::size_t k)
|
||||
{
|
||||
for (std::size_t i = 0; i < v.size(); i++)
|
||||
if (plib::container::contains(v[i], k))
|
||||
return i;
|
||||
throw plib::pexception("Error in get_level");
|
||||
}
|
||||
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_GCR_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
|
||||
{
|
||||
@ -113,48 +100,7 @@ namespace devices
|
||||
|
||||
auto gr = mat.gaussian_extend_fill_mat(fill);
|
||||
|
||||
/* FIXME: move this to the cr matrix class and use computed
|
||||
* parallel ordering once it makes sense.
|
||||
*/
|
||||
|
||||
std::vector<unsigned> levL(iN, 0);
|
||||
std::vector<unsigned> levU(iN, 0);
|
||||
|
||||
// parallel scheme for L x = y
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
unsigned lm=0;
|
||||
for (std::size_t j = 0; j<k; j++)
|
||||
if (fill[k][j] < decltype(mat)::FILL_INFINITY)
|
||||
lm = std::max(lm, levL[j]);
|
||||
levL[k] = 1+lm;
|
||||
}
|
||||
|
||||
// parallel scheme for U x = y
|
||||
for (std::size_t k = iN; k-- > 0; )
|
||||
{
|
||||
unsigned lm=0;
|
||||
for (std::size_t j = iN; --j > k; )
|
||||
if (fill[k][j] < decltype(mat)::FILL_INFINITY)
|
||||
lm = std::max(lm, levU[j]);
|
||||
levU[k] = 1+lm;
|
||||
}
|
||||
|
||||
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
unsigned fm = 0;
|
||||
pstring ml = "";
|
||||
for (std::size_t j = 0; j < iN; j++)
|
||||
{
|
||||
ml += fill[k][j] == 0 ? 'X' : fill[k][j] < decltype(mat)::FILL_INFINITY ? '+' : '.';
|
||||
if (fill[k][j] < decltype(mat)::FILL_INFINITY)
|
||||
if (fill[k][j] > fm)
|
||||
fm = fill[k][j];
|
||||
}
|
||||
this->log().verbose("{1:4} {2} {3:4} {4:4} {5:4} {6:4}", k, ml, levL[k], levU[k], get_level(mat.m_ge_par, k), fm);
|
||||
}
|
||||
|
||||
log_fill(fill, mat);
|
||||
|
||||
mat.build_from_fill_mat(fill);
|
||||
|
||||
@ -197,7 +143,7 @@ namespace devices
|
||||
}
|
||||
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_GCR_t<FT, SIZE>::csc_private(plib::putf8_fmt_writer &strm)
|
||||
void matrix_solver_GCR_t<FT, SIZE>::generate_code(plib::putf8_fmt_writer &strm)
|
||||
{
|
||||
const std::size_t iN = N();
|
||||
|
||||
@ -265,7 +211,7 @@ namespace devices
|
||||
std::stringstream t;
|
||||
t.imbue(std::locale::classic());
|
||||
plib::putf8_fmt_writer w(&t);
|
||||
csc_private(w);
|
||||
generate_code(w);
|
||||
std::hash<typename std::remove_const<std::remove_reference<decltype(t.str())>::type>::type> h;
|
||||
|
||||
return plib::pfmt("nl_gcr_{1:x}_{2}")(h( t.str() ))(mat.nz_num);
|
||||
@ -281,7 +227,7 @@ namespace devices
|
||||
|
||||
strm.writeline(plib::pfmt("extern \"C\" void {1}(double * __restrict m_A, double * __restrict RHS, double * __restrict V)\n")(name));
|
||||
strm.writeline("{\n");
|
||||
csc_private(strm);
|
||||
generate_code(strm);
|
||||
strm.writeline("}\n");
|
||||
// some compilers (_WIN32, _WIN64, mac osx) need an explicit cast
|
||||
return std::pair<pstring, pstring>(name, pstring(t.str()));
|
||||
|
@ -80,6 +80,7 @@ namespace devices
|
||||
}
|
||||
|
||||
m_ops.build(fill);
|
||||
this->log_fill(fill, m_ops.m_mat);
|
||||
|
||||
/* build pointers into the compressed row format matrix for each terminal */
|
||||
|
||||
@ -115,7 +116,7 @@ namespace devices
|
||||
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
this->m_new_V[k] = this->m_nets[k]->Q_Analog();
|
||||
this->m_new_V[k] = this->m_terms[k]->getV();
|
||||
}
|
||||
|
||||
const float_type accuracy = this->m_params.m_accuracy;
|
||||
|
@ -90,7 +90,7 @@ unsigned matrix_solver_SOR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_rap
|
||||
const float_type * const Idr = this->m_Idrn[k];
|
||||
auto other_cur_analog = this->m_connected_net_Vn[k];
|
||||
|
||||
this->m_new_V[k] = this->m_nets[k]->Q_Analog();
|
||||
this->m_new_V[k] = this->m_terms[k]->getV();
|
||||
|
||||
for (std::size_t i = 0; i < term_count; i++)
|
||||
{
|
||||
|
@ -166,7 +166,7 @@ namespace devices
|
||||
#endif
|
||||
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
this->m_new_V[k] = this->m_nets[k]->Q_Analog();
|
||||
this->m_new_V[k] = this->m_terms[k]->getV();
|
||||
|
||||
do {
|
||||
resched = false;
|
||||
|
@ -119,50 +119,33 @@ namespace devices
|
||||
template <typename FT, int SIZE>
|
||||
plib::unique_ptr<matrix_solver_t> NETLIB_NAME(solver)::create_solver(std::size_t size, const pstring &solvername)
|
||||
{
|
||||
if (m_params.m_method() == "SOR_MAT")
|
||||
switch (m_params.m_method())
|
||||
{
|
||||
return create_it<matrix_solver_SOR_mat_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
//typedef matrix_solver_SOR_mat_t<m_N,storage_N> solver_sor_mat;
|
||||
//return plib::make_unique<solver_sor_mat>(state(), solvername, &m_params, size);
|
||||
}
|
||||
else if (m_params.m_method() == "MAT_CR")
|
||||
{
|
||||
if (size > 0) // GCR always outperforms MAT solver
|
||||
{
|
||||
return create_it<matrix_solver_GCR_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
case matrix_type_e::MAT_CR:
|
||||
if (size > 0) // GCR always outperforms MAT solver
|
||||
{
|
||||
return create_it<matrix_solver_GCR_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
case matrix_type_e::SOR_MAT:
|
||||
return create_it<matrix_solver_SOR_mat_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
case matrix_type_e::MAT:
|
||||
return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
}
|
||||
else if (m_params.m_method() == "MAT")
|
||||
{
|
||||
return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else if (m_params.m_method() == "SM")
|
||||
{
|
||||
/* Sherman-Morrison Formula */
|
||||
return create_it<matrix_solver_sm_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else if (m_params.m_method() == "W")
|
||||
{
|
||||
/* Woodbury Formula */
|
||||
return create_it<matrix_solver_w_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else if (m_params.m_method() == "SOR")
|
||||
{
|
||||
return create_it<matrix_solver_SOR_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else if (m_params.m_method() == "GMRES")
|
||||
{
|
||||
return create_it<matrix_solver_GMRES_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
log().fatal(MF_UNKNOWN_SOLVER_TYPE(m_params.m_method()));
|
||||
return plib::unique_ptr<matrix_solver_t>();
|
||||
case matrix_type_e::SM:
|
||||
/* Sherman-Morrison Formula */
|
||||
return create_it<matrix_solver_sm_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
case matrix_type_e::W:
|
||||
/* Woodbury Formula */
|
||||
return create_it<matrix_solver_w_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
case matrix_type_e::SOR:
|
||||
return create_it<matrix_solver_SOR_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
case matrix_type_e::GMRES:
|
||||
return create_it<matrix_solver_GMRES_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
return plib::unique_ptr<matrix_solver_t>();
|
||||
}
|
||||
|
||||
struct net_splitter
|
||||
|
@ -375,15 +375,15 @@ NETLIST_START(kidniki)
|
||||
PARAM(Solver.DYNAMIC_LTE, 5e-4)
|
||||
PARAM(Solver.DYNAMIC_MIN_TIMESTEP, 20e-6)
|
||||
PARAM(Solver.SORT_TYPE, "PREFER_IDENTITY_TOP_LEFT")
|
||||
//PARAM(Solver.SORT_TYPE, "PREFER_BAND_MATRIX")
|
||||
#else
|
||||
SOLVER(Solver, 24000)
|
||||
//PARAM(Solver.SORT_TYPE, "PREFER_BAND_MATRIX")
|
||||
SOLVER(Solver, 48000)
|
||||
PARAM(Solver.ACCURACY, 1e-7)
|
||||
PARAM(Solver.NR_LOOPS, 10000)
|
||||
PARAM(Solver.GS_LOOPS, 300000)
|
||||
PARAM(Solver.GS_LOOPS, 100)
|
||||
//PARAM(Solver.METHOD, "MAT_CR")
|
||||
PARAM(Solver.METHOD, "GMRES")
|
||||
//PARAM(Solver.SOR_FACTOR, 1.73)
|
||||
//PARAM(Solver.METHOD, "SOR")
|
||||
#endif
|
||||
|
||||
#if (USE_FRONTIERS)
|
||||
|
Loading…
Reference in New Issue
Block a user