mirror of
https://github.com/holub/mame
synced 2025-04-23 08:49:55 +03:00
Cleanup of solver code. (nw)
This commit is contained in:
parent
a27f10c4a7
commit
6dfe04c620
@ -86,6 +86,7 @@ matrix_solver_t::matrix_solver_t(netlist_t &anetlist, const pstring &name,
|
||||
, m_last_step(*this, "m_last_step", netlist_time::zero())
|
||||
, m_fb_sync(*this, "FB_sync")
|
||||
, m_Q_sync(*this, "Q_sync")
|
||||
, m_ops(0)
|
||||
, m_sort(sort)
|
||||
{
|
||||
connect_post_start(m_fb_sync, m_Q_sync);
|
||||
@ -184,12 +185,14 @@ void matrix_solver_t::setup_matrix()
|
||||
for (std::size_t i = 0; i < m_rails_temp[k]->count(); i++)
|
||||
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->connected_net_idx()[i], false);
|
||||
|
||||
m_rails_temp[k]->clear(); // no longer needed
|
||||
m_terms[k]->set_pointers();
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
plib::pfree(m_rails_temp[k]); // no longer needed
|
||||
for (terms_for_net_t *rt : m_rails_temp)
|
||||
{
|
||||
rt->clear(); // no longer needed
|
||||
plib::pfree(rt); // no longer needed
|
||||
}
|
||||
|
||||
m_rails_temp.clear();
|
||||
|
||||
@ -227,12 +230,12 @@ void matrix_solver_t::setup_matrix()
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
for (auto &term : m_terms)
|
||||
{
|
||||
int *other = m_terms[k]->connected_net_idx();
|
||||
for (unsigned i = 0; i < m_terms[k]->count(); i++)
|
||||
int *other = term->connected_net_idx();
|
||||
for (unsigned i = 0; i < term->count(); i++)
|
||||
if (other[i] != -1)
|
||||
other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
|
||||
other[i] = get_net_idx(&term->terms()[i]->m_otherterm->net());
|
||||
}
|
||||
}
|
||||
|
||||
@ -303,27 +306,27 @@ void matrix_solver_t::setup_matrix()
|
||||
touched[k][m_terms[k]->m_nz[j]] = true;
|
||||
}
|
||||
|
||||
unsigned ops = 0;
|
||||
m_ops = 0;
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
{
|
||||
ops++; // 1/A(k,k)
|
||||
m_ops++; // 1/A(k,k)
|
||||
for (unsigned row = k + 1; row < iN; row++)
|
||||
{
|
||||
if (touched[row][k])
|
||||
{
|
||||
ops++;
|
||||
m_ops++;
|
||||
if (!plib::container::contains(m_terms[k]->m_nzbd, row))
|
||||
m_terms[k]->m_nzbd.push_back(row);
|
||||
for (unsigned col = k + 1; col < iN; col++)
|
||||
if (touched[k][col])
|
||||
{
|
||||
touched[row][col] = true;
|
||||
ops += 2;
|
||||
m_ops += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log().verbose("Number of mults/adds for {1}: {2}", name(), ops);
|
||||
log().verbose("Number of mults/adds for {1}: {2}", name(), m_ops);
|
||||
|
||||
if ((0))
|
||||
for (unsigned k = 0; k < iN; k++)
|
||||
|
@ -139,6 +139,9 @@ public:
|
||||
return std::pair<pstring, pstring>("", plib::pfmt("/* solver doesn't support static compile */\n\n"));
|
||||
}
|
||||
|
||||
/* return number of floating point operations for solve */
|
||||
std::size_t ops() { return m_ops; }
|
||||
|
||||
protected:
|
||||
|
||||
matrix_solver_t(netlist_t &anetlist, const pstring &name,
|
||||
@ -191,6 +194,7 @@ private:
|
||||
|
||||
void step(const netlist_time &delta);
|
||||
|
||||
std::size_t m_ops;
|
||||
const eSortType m_sort;
|
||||
};
|
||||
|
||||
|
@ -14,18 +14,6 @@
|
||||
#include "nld_matrix_solver.h"
|
||||
#include "vector_base.h"
|
||||
|
||||
/* Disabling dynamic allocation gives a ~10% boost in performance
|
||||
* This flag has been added to support continuous storage for arrays
|
||||
* going forward in case we implement cuda solvers in the future.
|
||||
*/
|
||||
#define NL_USE_DYNAMIC_ALLOCATION (0)
|
||||
#define TEST_PARALLEL (0 )
|
||||
|
||||
#if TEST_PARALLEL
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#endif
|
||||
|
||||
namespace netlist
|
||||
{
|
||||
namespace devices
|
||||
@ -34,92 +22,9 @@ namespace netlist
|
||||
//#define nl_ext_double long double // slightly slower
|
||||
#define nl_ext_double nl_double
|
||||
|
||||
#if TEST_PARALLEL
|
||||
#define MAXTHR 10
|
||||
static constexpr int num_thr = 3;
|
||||
|
||||
struct thr_intf
|
||||
{
|
||||
virtual ~thr_intf() = default;
|
||||
virtual void do_work(const int id, void *param) = 0;
|
||||
};
|
||||
|
||||
struct ti_t
|
||||
{
|
||||
/*volatile */std::atomic<int> lo;
|
||||
thr_intf *intf;
|
||||
void *params;
|
||||
// int block[29]; /* make it 256 bytes */
|
||||
};
|
||||
|
||||
static ti_t ti[MAXTHR];
|
||||
static std::thread thr[MAXTHR];
|
||||
|
||||
int thr_init = 0;
|
||||
|
||||
static void thr_process_proc(int id)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
while (ti[id].lo.load() == 0)
|
||||
;
|
||||
if (ti[id].lo.load() == 2)
|
||||
return;
|
||||
ti[id].intf->do_work(id, ti[id].params);
|
||||
ti[id].lo.store(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void thr_process(int id, thr_intf *intf, void *params)
|
||||
{
|
||||
ti[id].intf = intf;
|
||||
ti[id].params = params;
|
||||
ti[id].lo.store(1);
|
||||
}
|
||||
|
||||
static void thr_wait()
|
||||
{
|
||||
int c=1;
|
||||
while (c > 0)
|
||||
{
|
||||
c=0;
|
||||
for (int i=0; i<num_thr; i++)
|
||||
c += ti[i].lo.load();
|
||||
}
|
||||
}
|
||||
|
||||
static void thr_initialize()
|
||||
{
|
||||
thr_init++;
|
||||
if (thr_init == 1)
|
||||
{
|
||||
for (int i=0; i<num_thr; i++)
|
||||
{
|
||||
ti[i].lo = 0;
|
||||
thr[i] = std::thread(thr_process_proc, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void thr_dispose()
|
||||
{
|
||||
thr_init--;
|
||||
if (thr_init == 0)
|
||||
{
|
||||
for (int i=0; i<num_thr; i++)
|
||||
ti[i].lo = 2;
|
||||
for (int i=0; i<num_thr; i++)
|
||||
thr[i].join();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
#if TEST_PARALLEL
|
||||
class matrix_solver_direct_t: public matrix_solver_t, public thr_intf
|
||||
#else
|
||||
class matrix_solver_direct_t: public matrix_solver_t
|
||||
#endif
|
||||
{
|
||||
friend class matrix_solver_t;
|
||||
public:
|
||||
@ -143,24 +48,10 @@ protected:
|
||||
template <typename T>
|
||||
void LE_back_subst(T * RESTRICT x);
|
||||
|
||||
#if TEST_PARALLEL
|
||||
int x_i[10];
|
||||
int x_start[10];
|
||||
int x_stop[10];
|
||||
virtual void do_work(const int id, void *param) override;
|
||||
#endif
|
||||
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
template <typename T1, typename T2>
|
||||
inline nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r * m_pitch + c]; }
|
||||
template <typename T1>
|
||||
inline nl_ext_double &RHS(const T1 &r) { return m_A[r * m_pitch + N()]; }
|
||||
#else
|
||||
template <typename T1, typename T2>
|
||||
nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
|
||||
template <typename T1>
|
||||
nl_ext_double &RHS(const T1 &r) { return m_A[r][N()]; }
|
||||
#endif
|
||||
nl_double m_last_RHS[storage_N]; // right hand side - contains currents
|
||||
|
||||
private:
|
||||
@ -168,11 +59,8 @@ private:
|
||||
static constexpr std::size_t m_pitch = (((storage_N + 1) + 7) / 8) * 8;
|
||||
//static const std::size_t m_pitch = (((storage_N + 1) + 15) / 16) * 16;
|
||||
//static const std::size_t m_pitch = (((storage_N + 1) + 31) / 32) * 32;
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
nl_ext_double * RESTRICT m_A;
|
||||
#else
|
||||
nl_ext_double m_A[storage_N][m_pitch];
|
||||
#endif
|
||||
|
||||
//nl_ext_double m_RHSx[storage_N];
|
||||
|
||||
const std::size_t m_dim;
|
||||
@ -186,12 +74,6 @@ private:
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t()
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
plib::pfree_array(m_A);
|
||||
#endif
|
||||
#if TEST_PARALLEL
|
||||
thr_dispose();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
@ -215,65 +97,14 @@ void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if TEST_PARALLEL
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_direct_t<m_N, storage_N>::do_work(const int id, void *param)
|
||||
{
|
||||
const int i = x_i[id];
|
||||
/* FIXME: Singular matrix? */
|
||||
const nl_double f = 1.0 / A(i,i);
|
||||
const unsigned * RESTRICT const p = m_terms[i]->m_nzrd.data();
|
||||
const unsigned e = m_terms[i]->m_nzrd.size();
|
||||
|
||||
/* Eliminate column i from row j */
|
||||
|
||||
const unsigned * RESTRICT const pb = m_terms[i]->m_nzbd.data();
|
||||
const unsigned sj = x_start[id];
|
||||
const unsigned se = x_stop[id];
|
||||
for (unsigned jb = sj; jb < se; jb++)
|
||||
{
|
||||
const unsigned j = pb[jb];
|
||||
const nl_double f1 = - A(j,i) * f;
|
||||
for (unsigned k = 0; k < e; k++)
|
||||
A(j,p[k]) += A(i,p[k]) * f1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
|
||||
{
|
||||
const std::size_t kN = N();
|
||||
if (!(!TEST_PARALLEL && m_params.m_pivot))
|
||||
if (!m_params.m_pivot)
|
||||
{
|
||||
for (std::size_t i = 0; i < kN; i++)
|
||||
{
|
||||
#if TEST_PARALLEL
|
||||
const unsigned eb = m_terms[i]->m_nzbd.size();
|
||||
if (eb > 0)
|
||||
{
|
||||
//printf("here %d\n", eb);
|
||||
unsigned chunks = (eb + num_thr) / (num_thr + 1);
|
||||
for (int p=0; p < num_thr + 1; p++)
|
||||
{
|
||||
x_i[p] = i;
|
||||
x_start[p] = chunks * p;
|
||||
x_stop[p] = std::min(chunks*(p+1), eb);
|
||||
if (p<num_thr && x_start[p] < x_stop[p]) thr_process(p, this, nullptr);
|
||||
}
|
||||
if (x_start[num_thr] < x_stop[num_thr])
|
||||
do_work(num_thr, nullptr);
|
||||
thr_wait();
|
||||
}
|
||||
else if (eb > 0)
|
||||
{
|
||||
x_i[0] = i;
|
||||
x_start[0] = 0;
|
||||
x_stop[0] = eb;
|
||||
do_work(0, nullptr);
|
||||
}
|
||||
#else
|
||||
|
||||
/* FIXME: Singular matrix? */
|
||||
const nl_double f = 1.0 / A(i,i);
|
||||
@ -287,7 +118,6 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
|
||||
A(j,k) += A(i,k) * f1;
|
||||
//RHS(j) += RHS(i) * f1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -385,19 +215,9 @@ unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool ne
|
||||
this->LE_solve();
|
||||
this->LE_back_subst(new_V);
|
||||
|
||||
if (newton_raphson)
|
||||
{
|
||||
nl_double err = delta(new_V);
|
||||
|
||||
store(new_V);
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
store(new_V);
|
||||
return 1;
|
||||
}
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
@ -419,16 +239,10 @@ matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetli
|
||||
: matrix_solver_t(anetlist, name, ASCENDING, params)
|
||||
, m_dim(size)
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
|
||||
#endif
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_last_RHS[k] = 0.0;
|
||||
}
|
||||
#if TEST_PARALLEL
|
||||
thr_initialize();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
@ -437,16 +251,10 @@ matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetli
|
||||
: matrix_solver_t(anetlist, name, sort, params)
|
||||
, m_dim(size)
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
|
||||
#endif
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_last_RHS[k] = 0.0;
|
||||
}
|
||||
#if TEST_PARALLEL
|
||||
thr_initialize();
|
||||
#endif
|
||||
}
|
||||
|
||||
} //namespace devices
|
||||
|
@ -36,19 +36,11 @@ inline unsigned matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNUSED const bo
|
||||
build_LE_RHS<matrix_solver_direct1_t>();
|
||||
//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);
|
||||
|
||||
nl_double new_val[1] = { RHS(0) / A(0,0) };
|
||||
nl_double new_V[1] = { RHS(0) / A(0,0) };
|
||||
|
||||
if (has_dynamic_devices())
|
||||
{
|
||||
nl_double err = this->delta(new_val);
|
||||
store(new_val);
|
||||
if (err > m_params.m_accuracy )
|
||||
return 2;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
store(new_val);
|
||||
return 1;
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
} //namespace devices
|
||||
|
@ -40,21 +40,13 @@ inline unsigned matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED const bo
|
||||
const nl_double c = A(1,0);
|
||||
const nl_double d = A(1,1);
|
||||
|
||||
nl_double new_val[2];
|
||||
new_val[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c);
|
||||
new_val[0] = (RHS(0) - b * new_val[1]) / a;
|
||||
nl_double new_V[2];
|
||||
new_V[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c);
|
||||
new_V[0] = (RHS(0) - b * new_V[1]) / a;
|
||||
|
||||
if (has_dynamic_devices())
|
||||
{
|
||||
nl_double err = this->delta(new_val);
|
||||
store(new_val);
|
||||
if (err > m_params.m_accuracy )
|
||||
return 2;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
store(new_val);
|
||||
return 1;
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
} //namespace devices
|
||||
|
@ -448,19 +448,9 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
|
||||
|
||||
this->m_stat_calculations++;
|
||||
|
||||
if (newton_raphson)
|
||||
{
|
||||
nl_double err = this->delta(new_V);
|
||||
|
||||
this->store(new_V);
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->store(new_V);
|
||||
return 1;
|
||||
}
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
} //namespace devices
|
||||
|
@ -186,19 +186,9 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
|
||||
return matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(newton_raphson);
|
||||
}
|
||||
|
||||
if (newton_raphson)
|
||||
{
|
||||
nl_double err = this->delta(new_V);
|
||||
|
||||
this->store(new_V);
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->store(new_V);
|
||||
return 1;
|
||||
}
|
||||
const nl_double err = (newton_raphson ? this->delta(new_V) : 0.0);
|
||||
this->store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -114,9 +114,6 @@ private:
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t()
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
plib::pfree_array(m_A);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
@ -290,19 +287,9 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
|
||||
|
||||
this->LE_compute_x(new_V);
|
||||
|
||||
if (newton_raphson)
|
||||
{
|
||||
nl_double err = delta(new_V);
|
||||
|
||||
store(new_V);
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
store(new_V);
|
||||
return 1;
|
||||
}
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
|
@ -354,19 +354,10 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
|
||||
if (std::abs(tmp-RHS(i)) > 1e-6)
|
||||
printf("%s failed on row %d: %f RHS: %f\n", this->name().c_str(), i, std::abs(tmp-RHS(i)), RHS(i));
|
||||
}
|
||||
if (newton_raphson)
|
||||
{
|
||||
nl_double err = delta(new_V);
|
||||
|
||||
store(new_V);
|
||||
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
store(new_V);
|
||||
return 1;
|
||||
}
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
|
Loading…
Reference in New Issue
Block a user