Cleanup of solver code. (nw)

This commit is contained in:
couriersud 2017-05-08 18:44:54 +02:00
parent a27f10c4a7
commit 6dfe04c620
9 changed files with 46 additions and 289 deletions

View File

@ -86,6 +86,7 @@ matrix_solver_t::matrix_solver_t(netlist_t &anetlist, const pstring &name,
, m_last_step(*this, "m_last_step", netlist_time::zero())
, m_fb_sync(*this, "FB_sync")
, m_Q_sync(*this, "Q_sync")
, m_ops(0)
, m_sort(sort)
{
connect_post_start(m_fb_sync, m_Q_sync);
@ -184,12 +185,14 @@ void matrix_solver_t::setup_matrix()
for (std::size_t i = 0; i < m_rails_temp[k]->count(); i++)
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->connected_net_idx()[i], false);
m_rails_temp[k]->clear(); // no longer needed
m_terms[k]->set_pointers();
}
for (unsigned k = 0; k < iN; k++)
plib::pfree(m_rails_temp[k]); // no longer needed
for (terms_for_net_t *rt : m_rails_temp)
{
rt->clear(); // no longer needed
plib::pfree(rt); // no longer needed
}
m_rails_temp.clear();
@ -227,12 +230,12 @@ void matrix_solver_t::setup_matrix()
}
}
for (unsigned k = 0; k < iN; k++)
for (auto &term : m_terms)
{
int *other = m_terms[k]->connected_net_idx();
for (unsigned i = 0; i < m_terms[k]->count(); i++)
int *other = term->connected_net_idx();
for (unsigned i = 0; i < term->count(); i++)
if (other[i] != -1)
other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
other[i] = get_net_idx(&term->terms()[i]->m_otherterm->net());
}
}
@ -303,27 +306,27 @@ void matrix_solver_t::setup_matrix()
touched[k][m_terms[k]->m_nz[j]] = true;
}
unsigned ops = 0;
m_ops = 0;
for (unsigned k = 0; k < iN; k++)
{
ops++; // 1/A(k,k)
m_ops++; // 1/A(k,k)
for (unsigned row = k + 1; row < iN; row++)
{
if (touched[row][k])
{
ops++;
m_ops++;
if (!plib::container::contains(m_terms[k]->m_nzbd, row))
m_terms[k]->m_nzbd.push_back(row);
for (unsigned col = k + 1; col < iN; col++)
if (touched[k][col])
{
touched[row][col] = true;
ops += 2;
m_ops += 2;
}
}
}
}
log().verbose("Number of mults/adds for {1}: {2}", name(), ops);
log().verbose("Number of mults/adds for {1}: {2}", name(), m_ops);
if ((0))
for (unsigned k = 0; k < iN; k++)

View File

@ -139,6 +139,9 @@ public:
return std::pair<pstring, pstring>("", plib::pfmt("/* solver doesn't support static compile */\n\n"));
}
/* return number of floating point operations for solve */
std::size_t ops() { return m_ops; }
protected:
matrix_solver_t(netlist_t &anetlist, const pstring &name,
@ -191,6 +194,7 @@ private:
void step(const netlist_time &delta);
std::size_t m_ops;
const eSortType m_sort;
};

View File

@ -14,18 +14,6 @@
#include "nld_matrix_solver.h"
#include "vector_base.h"
/* Disabling dynamic allocation gives a ~10% boost in performance
* This flag has been added to support continuous storage for arrays
* going forward in case we implement cuda solvers in the future.
*/
#define NL_USE_DYNAMIC_ALLOCATION (0)
#define TEST_PARALLEL (0 )
#if TEST_PARALLEL
#include <thread>
#include <atomic>
#endif
namespace netlist
{
namespace devices
@ -34,92 +22,9 @@ namespace netlist
//#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double
#if TEST_PARALLEL
#define MAXTHR 10
static constexpr int num_thr = 3;
struct thr_intf
{
virtual ~thr_intf() = default;
virtual void do_work(const int id, void *param) = 0;
};
struct ti_t
{
/*volatile */std::atomic<int> lo;
thr_intf *intf;
void *params;
// int block[29]; /* make it 256 bytes */
};
static ti_t ti[MAXTHR];
static std::thread thr[MAXTHR];
int thr_init = 0;
static void thr_process_proc(int id)
{
while (true)
{
while (ti[id].lo.load() == 0)
;
if (ti[id].lo.load() == 2)
return;
ti[id].intf->do_work(id, ti[id].params);
ti[id].lo.store(0);
}
}
static void thr_process(int id, thr_intf *intf, void *params)
{
ti[id].intf = intf;
ti[id].params = params;
ti[id].lo.store(1);
}
static void thr_wait()
{
int c=1;
while (c > 0)
{
c=0;
for (int i=0; i<num_thr; i++)
c += ti[i].lo.load();
}
}
static void thr_initialize()
{
thr_init++;
if (thr_init == 1)
{
for (int i=0; i<num_thr; i++)
{
ti[i].lo = 0;
thr[i] = std::thread(thr_process_proc, i);
}
}
}
static void thr_dispose()
{
thr_init--;
if (thr_init == 0)
{
for (int i=0; i<num_thr; i++)
ti[i].lo = 2;
for (int i=0; i<num_thr; i++)
thr[i].join();
}
}
#endif
template <std::size_t m_N, std::size_t storage_N>
#if TEST_PARALLEL
class matrix_solver_direct_t: public matrix_solver_t, public thr_intf
#else
class matrix_solver_direct_t: public matrix_solver_t
#endif
{
friend class matrix_solver_t;
public:
@ -143,24 +48,10 @@ protected:
template <typename T>
void LE_back_subst(T * RESTRICT x);
#if TEST_PARALLEL
int x_i[10];
int x_start[10];
int x_stop[10];
virtual void do_work(const int id, void *param) override;
#endif
#if (NL_USE_DYNAMIC_ALLOCATION)
template <typename T1, typename T2>
inline nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r * m_pitch + c]; }
template <typename T1>
inline nl_ext_double &RHS(const T1 &r) { return m_A[r * m_pitch + N()]; }
#else
template <typename T1, typename T2>
nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
template <typename T1>
nl_ext_double &RHS(const T1 &r) { return m_A[r][N()]; }
#endif
nl_double m_last_RHS[storage_N]; // right hand side - contains currents
private:
@ -168,11 +59,8 @@ private:
static constexpr std::size_t m_pitch = (((storage_N + 1) + 7) / 8) * 8;
//static const std::size_t m_pitch = (((storage_N + 1) + 15) / 16) * 16;
//static const std::size_t m_pitch = (((storage_N + 1) + 31) / 32) * 32;
#if (NL_USE_DYNAMIC_ALLOCATION)
nl_ext_double * RESTRICT m_A;
#else
nl_ext_double m_A[storage_N][m_pitch];
#endif
//nl_ext_double m_RHSx[storage_N];
const std::size_t m_dim;
@ -186,12 +74,6 @@ private:
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t()
{
#if (NL_USE_DYNAMIC_ALLOCATION)
plib::pfree_array(m_A);
#endif
#if TEST_PARALLEL
thr_dispose();
#endif
}
template <std::size_t m_N, std::size_t storage_N>
@ -215,65 +97,14 @@ void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
}
#if TEST_PARALLEL
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_direct_t<m_N, storage_N>::do_work(const int id, void *param)
{
const int i = x_i[id];
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / A(i,i);
const unsigned * RESTRICT const p = m_terms[i]->m_nzrd.data();
const unsigned e = m_terms[i]->m_nzrd.size();
/* Eliminate column i from row j */
const unsigned * RESTRICT const pb = m_terms[i]->m_nzbd.data();
const unsigned sj = x_start[id];
const unsigned se = x_stop[id];
for (unsigned jb = sj; jb < se; jb++)
{
const unsigned j = pb[jb];
const nl_double f1 = - A(j,i) * f;
for (unsigned k = 0; k < e; k++)
A(j,p[k]) += A(i,p[k]) * f1;
}
}
#endif
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
{
const std::size_t kN = N();
if (!(!TEST_PARALLEL && m_params.m_pivot))
if (!m_params.m_pivot)
{
for (std::size_t i = 0; i < kN; i++)
{
#if TEST_PARALLEL
const unsigned eb = m_terms[i]->m_nzbd.size();
if (eb > 0)
{
//printf("here %d\n", eb);
unsigned chunks = (eb + num_thr) / (num_thr + 1);
for (int p=0; p < num_thr + 1; p++)
{
x_i[p] = i;
x_start[p] = chunks * p;
x_stop[p] = std::min(chunks*(p+1), eb);
if (p<num_thr && x_start[p] < x_stop[p]) thr_process(p, this, nullptr);
}
if (x_start[num_thr] < x_stop[num_thr])
do_work(num_thr, nullptr);
thr_wait();
}
else if (eb > 0)
{
x_i[0] = i;
x_start[0] = 0;
x_stop[0] = eb;
do_work(0, nullptr);
}
#else
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / A(i,i);
@ -287,7 +118,6 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
A(j,k) += A(i,k) * f1;
//RHS(j) += RHS(i) * f1;
}
#endif
}
}
else
@ -385,19 +215,9 @@ unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool ne
this->LE_solve();
this->LE_back_subst(new_V);
if (newton_raphson)
{
nl_double err = delta(new_V);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
else
{
store(new_V);
return 1;
}
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
template <std::size_t m_N, std::size_t storage_N>
@ -419,16 +239,10 @@ matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetli
: matrix_solver_t(anetlist, name, ASCENDING, params)
, m_dim(size)
{
#if (NL_USE_DYNAMIC_ALLOCATION)
m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
#endif
for (unsigned k = 0; k < N(); k++)
{
m_last_RHS[k] = 0.0;
}
#if TEST_PARALLEL
thr_initialize();
#endif
}
template <std::size_t m_N, std::size_t storage_N>
@ -437,16 +251,10 @@ matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetli
: matrix_solver_t(anetlist, name, sort, params)
, m_dim(size)
{
#if (NL_USE_DYNAMIC_ALLOCATION)
m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
#endif
for (unsigned k = 0; k < N(); k++)
{
m_last_RHS[k] = 0.0;
}
#if TEST_PARALLEL
thr_initialize();
#endif
}
} //namespace devices

View File

@ -36,19 +36,11 @@ inline unsigned matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNUSED const bo
build_LE_RHS<matrix_solver_direct1_t>();
//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);
nl_double new_val[1] = { RHS(0) / A(0,0) };
nl_double new_V[1] = { RHS(0) / A(0,0) };
if (has_dynamic_devices())
{
nl_double err = this->delta(new_val);
store(new_val);
if (err > m_params.m_accuracy )
return 2;
else
return 1;
}
store(new_val);
return 1;
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
} //namespace devices

View File

@ -40,21 +40,13 @@ inline unsigned matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED const bo
const nl_double c = A(1,0);
const nl_double d = A(1,1);
nl_double new_val[2];
new_val[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c);
new_val[0] = (RHS(0) - b * new_val[1]) / a;
nl_double new_V[2];
new_V[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c);
new_V[0] = (RHS(0) - b * new_V[1]) / a;
if (has_dynamic_devices())
{
nl_double err = this->delta(new_val);
store(new_val);
if (err > m_params.m_accuracy )
return 2;
else
return 1;
}
store(new_val);
return 1;
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
} //namespace devices

View File

@ -448,19 +448,9 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
this->m_stat_calculations++;
if (newton_raphson)
{
nl_double err = this->delta(new_V);
this->store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
else
{
this->store(new_V);
return 1;
}
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
} //namespace devices

View File

@ -186,19 +186,9 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
return matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(newton_raphson);
}
if (newton_raphson)
{
nl_double err = this->delta(new_V);
this->store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
else
{
this->store(new_V);
return 1;
}
const nl_double err = (newton_raphson ? this->delta(new_V) : 0.0);
this->store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
template <typename T>

View File

@ -114,9 +114,6 @@ private:
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t()
{
#if (NL_USE_DYNAMIC_ALLOCATION)
plib::pfree_array(m_A);
#endif
}
template <std::size_t m_N, std::size_t storage_N>
@ -290,19 +287,9 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
this->LE_compute_x(new_V);
if (newton_raphson)
{
nl_double err = delta(new_V);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
else
{
store(new_V);
return 1;
}
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
template <std::size_t m_N, std::size_t storage_N>

View File

@ -354,19 +354,10 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
if (std::abs(tmp-RHS(i)) > 1e-6)
printf("%s failed on row %d: %f RHS: %f\n", this->name().c_str(), i, std::abs(tmp-RHS(i)), RHS(i));
}
if (newton_raphson)
{
nl_double err = delta(new_V);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
else
{
store(new_V);
return 1;
}
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
template <std::size_t m_N, std::size_t storage_N>