Cleanup of solver code. (nw)

2025-04-23 08:49:55 +03:00 · 2017-05-08 18:44:54 +02:00 · 2017-05-08 18:44:54 +02:00 · 6dfe04c620
commit 6dfe04c620
parent a27f10c4a7
9 changed files with 46 additions and 289 deletions
--- a/src/lib/netlist/solver/nld_matrix_solver.cpp
+++ b/src/lib/netlist/solver/nld_matrix_solver.cpp
@ -86,6 +86,7 @@ matrix_solver_t::matrix_solver_t(netlist_t &anetlist, const pstring &name,
 	, m_last_step(*this, "m_last_step", netlist_time::zero())
 	, m_fb_sync(*this, "FB_sync")
 	, m_Q_sync(*this, "Q_sync")
+	, m_ops(0)
 	, m_sort(sort)
 {
 	connect_post_start(m_fb_sync, m_Q_sync);
@ -184,12 +185,14 @@ void matrix_solver_t::setup_matrix()
 		for (std::size_t i = 0; i < m_rails_temp[k]->count(); i++)
 			this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->connected_net_idx()[i], false);

-		m_rails_temp[k]->clear(); // no longer needed
 		m_terms[k]->set_pointers();
 	}

-	for (unsigned k = 0; k < iN; k++)
-		plib::pfree(m_rails_temp[k]); // no longer needed
+	for (terms_for_net_t *rt : m_rails_temp)
+	{
+		rt->clear(); // no longer needed
+		plib::pfree(rt); // no longer needed
+	}

 	m_rails_temp.clear();

@ -227,12 +230,12 @@ void matrix_solver_t::setup_matrix()
 				}
 			}

-		for (unsigned k = 0; k < iN; k++)
+		for (auto &term : m_terms)
 		{
-			int *other = m_terms[k]->connected_net_idx();
-			for (unsigned i = 0; i < m_terms[k]->count(); i++)
+			int *other = term->connected_net_idx();
+			for (unsigned i = 0; i < term->count(); i++)
 				if (other[i] != -1)
-					other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
+					other[i] = get_net_idx(&term->terms()[i]->m_otherterm->net());
 		}
 	}

@ -303,27 +306,27 @@ void matrix_solver_t::setup_matrix()
 			touched[k][m_terms[k]->m_nz[j]] = true;
 	}

-	unsigned ops = 0;
+	m_ops = 0;
 	for (unsigned k = 0; k < iN; k++)
 	{
-		ops++; // 1/A(k,k)
+		m_ops++; // 1/A(k,k)
 		for (unsigned row = k + 1; row < iN; row++)
 		{
 			if (touched[row][k])
 			{
-				ops++;
+				m_ops++;
 				if (!plib::container::contains(m_terms[k]->m_nzbd, row))
 					m_terms[k]->m_nzbd.push_back(row);
 				for (unsigned col = k + 1; col < iN; col++)
 					if (touched[k][col])
 					{
 						touched[row][col] = true;
-						ops += 2;
+						m_ops += 2;
 					}
 			}
 		}
 	}
-	log().verbose("Number of mults/adds for {1}: {2}", name(), ops);
+	log().verbose("Number of mults/adds for {1}: {2}", name(), m_ops);

 	if ((0))
 		for (unsigned k = 0; k < iN; k++)
--- a/src/lib/netlist/solver/nld_matrix_solver.h
+++ b/src/lib/netlist/solver/nld_matrix_solver.h
@ -139,6 +139,9 @@ public:
 		return std::pair<pstring, pstring>("", plib::pfmt("/* solver doesn't support static compile */\n\n"));
 	}

+	/* return number of floating point operations for solve */
+	std::size_t ops() { return m_ops; }
+
 protected:

 	matrix_solver_t(netlist_t &anetlist, const pstring &name,
@ -191,6 +194,7 @@ private:

 	void step(const netlist_time &delta);

+	std::size_t m_ops;
 	const eSortType m_sort;
 };

--- a/src/lib/netlist/solver/nld_ms_direct.h
+++ b/src/lib/netlist/solver/nld_ms_direct.h
@ -14,18 +14,6 @@
 #include "nld_matrix_solver.h"
 #include "vector_base.h"

-/* Disabling dynamic allocation gives a ~10% boost in performance
- * This flag has been added to support continuous storage for arrays
- * going forward in case we implement cuda solvers in the future.
- */
-#define NL_USE_DYNAMIC_ALLOCATION (0)
-#define TEST_PARALLEL (0    )
-
-#if TEST_PARALLEL
-#include <thread>
-#include <atomic>
-#endif
-
 namespace netlist
 {
 	namespace devices
@ -34,92 +22,9 @@ namespace netlist
 //#define nl_ext_double long double // slightly slower
 #define nl_ext_double nl_double

-#if TEST_PARALLEL
-#define MAXTHR 10
-static constexpr int num_thr = 3;
-
-struct thr_intf
-{
-	virtual ~thr_intf() = default;
-	virtual void do_work(const int id, void *param) = 0;
-};
-
-struct ti_t
-{
-	/*volatile */std::atomic<int> lo;
-	thr_intf *intf;
-	void *params;
-//  int block[29]; /* make it 256 bytes */
-};
-
-static ti_t ti[MAXTHR];
-static std::thread thr[MAXTHR];
-
-int thr_init = 0;
-
-static void thr_process_proc(int id)
-{
-	while (true)
-	{
-		while (ti[id].lo.load() == 0)
-			;
-		if (ti[id].lo.load() == 2)
-			return;
-		ti[id].intf->do_work(id, ti[id].params);
-		ti[id].lo.store(0);
-	}
-}
-
-static void thr_process(int id, thr_intf *intf, void *params)
-{
-	ti[id].intf = intf;
-	ti[id].params = params;
-	ti[id].lo.store(1);
-}
-
-static void thr_wait()
-{
-	int c=1;
-	while (c > 0)
-	{
-		c=0;
-		for (int i=0; i<num_thr; i++)
-			c += ti[i].lo.load();
-	}
-}
-
-static void thr_initialize()
-{
-	thr_init++;
-	if (thr_init == 1)
-	{
-		for (int i=0; i<num_thr; i++)
-		{
-			ti[i].lo = 0;
-			thr[i] = std::thread(thr_process_proc, i);
-		}
-	}
-}
-
-static void thr_dispose()
-{
-	thr_init--;
-	if (thr_init == 0)
-	{
-		for (int i=0; i<num_thr; i++)
-			ti[i].lo = 2;
-		for (int i=0; i<num_thr; i++)
-			thr[i].join();
-	}
-}
-#endif

 template <std::size_t m_N, std::size_t storage_N>
-#if TEST_PARALLEL
-class matrix_solver_direct_t: public matrix_solver_t, public thr_intf
-#else
 class matrix_solver_direct_t: public matrix_solver_t
-#endif
 {
 	friend class matrix_solver_t;
 public:
@ -143,24 +48,10 @@ protected:
 	template <typename T>
 	void LE_back_subst(T * RESTRICT x);

-#if TEST_PARALLEL
-	int x_i[10];
-	int x_start[10];
-	int x_stop[10];
-	virtual void do_work(const int id, void *param) override;
-#endif
-
-#if (NL_USE_DYNAMIC_ALLOCATION)
-	template <typename T1, typename T2>
-	inline nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r * m_pitch + c]; }
-	template <typename T1>
-	inline nl_ext_double &RHS(const T1 &r) { return m_A[r * m_pitch + N()]; }
-#else
 	template <typename T1, typename T2>
 	nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
 	template <typename T1>
 	nl_ext_double &RHS(const T1 &r) { return m_A[r][N()]; }
-#endif
 	nl_double m_last_RHS[storage_N]; // right hand side - contains currents

 private:
@ -168,11 +59,8 @@ private:
 	static constexpr std::size_t m_pitch = (((storage_N + 1) + 7) / 8) * 8;
 	//static const std::size_t m_pitch = (((storage_N + 1) + 15) / 16) * 16;
 	//static const std::size_t m_pitch = (((storage_N + 1) + 31) / 32) * 32;
-#if (NL_USE_DYNAMIC_ALLOCATION)
-	nl_ext_double * RESTRICT m_A;
-#else
 	nl_ext_double m_A[storage_N][m_pitch];
-#endif
+
 	//nl_ext_double m_RHSx[storage_N];

 	const std::size_t m_dim;
@ -186,12 +74,6 @@ private:
 template <std::size_t m_N, std::size_t storage_N>
 matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t()
 {
-#if (NL_USE_DYNAMIC_ALLOCATION)
-	plib::pfree_array(m_A);
-#endif
-#if TEST_PARALLEL
-	thr_dispose();
-#endif
 }

 template <std::size_t m_N, std::size_t storage_N>
@ -215,65 +97,14 @@ void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
 }


-
-#if TEST_PARALLEL
-template <std::size_t m_N, std::size_t storage_N>
-void matrix_solver_direct_t<m_N, storage_N>::do_work(const int id, void *param)
-{
-	const int i = x_i[id];
-	/* FIXME: Singular matrix? */
-	const nl_double f = 1.0 / A(i,i);
-	const unsigned * RESTRICT const p = m_terms[i]->m_nzrd.data();
-	const unsigned e = m_terms[i]->m_nzrd.size();
-
-	/* Eliminate column i from row j */
-
-	const unsigned * RESTRICT const pb = m_terms[i]->m_nzbd.data();
-	const unsigned sj = x_start[id];
-	const unsigned se = x_stop[id];
-	for (unsigned jb = sj; jb < se; jb++)
-	{
-		const unsigned j = pb[jb];
-		const nl_double f1 = - A(j,i) * f;
-		for (unsigned k = 0; k < e; k++)
-			A(j,p[k]) += A(i,p[k]) * f1;
-	}
-}
-#endif
-
 template <std::size_t m_N, std::size_t storage_N>
 void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
 {
 	const std::size_t kN = N();
-	if (!(!TEST_PARALLEL && m_params.m_pivot))
+	if (!m_params.m_pivot)
 	{
 		for (std::size_t i = 0; i < kN; i++)
 		{
-#if TEST_PARALLEL
-			const unsigned eb = m_terms[i]->m_nzbd.size();
-			if (eb > 0)
-			{
-				//printf("here %d\n", eb);
-				unsigned chunks = (eb + num_thr) / (num_thr + 1);
-				for (int p=0; p < num_thr + 1; p++)
-				{
-					x_i[p] = i;
-					x_start[p] = chunks * p;
-					x_stop[p] = std::min(chunks*(p+1), eb);
-					if (p<num_thr && x_start[p] < x_stop[p]) thr_process(p, this, nullptr);
-				}
-				if (x_start[num_thr] < x_stop[num_thr])
-					do_work(num_thr, nullptr);
-				thr_wait();
-			}
-			else if (eb > 0)
-			{
-				x_i[0] = i;
-				x_start[0] = 0;
-				x_stop[0] = eb;
-				do_work(0, nullptr);
-			}
-#else

 			/* FIXME: Singular matrix? */
 			const nl_double f = 1.0 / A(i,i);
@ -287,7 +118,6 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
 					A(j,k) += A(i,k) * f1;
 				//RHS(j) += RHS(i) * f1;
 			}
-#endif
 		}
 	}
 	else
@ -385,19 +215,9 @@ unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool ne
 	this->LE_solve();
 	this->LE_back_subst(new_V);

-	if (newton_raphson)
-	{
-		nl_double err = delta(new_V);
-
-		store(new_V);
-
-		return (err > this->m_params.m_accuracy) ? 2 : 1;
-	}
-	else
-	{
-		store(new_V);
-		return 1;
-	}
+	const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
+	store(new_V);
+	return (err > this->m_params.m_accuracy) ? 2 : 1;
 }

 template <std::size_t m_N, std::size_t storage_N>
@ -419,16 +239,10 @@ matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetli
 : matrix_solver_t(anetlist, name, ASCENDING, params)
 , m_dim(size)
 {
-#if (NL_USE_DYNAMIC_ALLOCATION)
-	m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
-#endif
 	for (unsigned k = 0; k < N(); k++)
 	{
 		m_last_RHS[k] = 0.0;
 	}
-#if TEST_PARALLEL
-	thr_initialize();
-#endif
 }

 template <std::size_t m_N, std::size_t storage_N>
@ -437,16 +251,10 @@ matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_t &anetli
 : matrix_solver_t(anetlist, name, sort, params)
 , m_dim(size)
 {
-#if (NL_USE_DYNAMIC_ALLOCATION)
-	m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
-#endif
 	for (unsigned k = 0; k < N(); k++)
 	{
 		m_last_RHS[k] = 0.0;
 	}
-#if TEST_PARALLEL
-	thr_initialize();
-#endif
 }

 	} //namespace devices
--- a/src/lib/netlist/solver/nld_ms_direct1.h
+++ b/src/lib/netlist/solver/nld_ms_direct1.h
@ -36,19 +36,11 @@ inline unsigned matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNUSED const bo
 	build_LE_RHS<matrix_solver_direct1_t>();
 	//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);

-	nl_double new_val[1] = { RHS(0) / A(0,0) };
+	nl_double new_V[1] = { RHS(0) / A(0,0) };

-	if (has_dynamic_devices())
-	{
-		nl_double err = this->delta(new_val);
-		store(new_val);
-		if (err > m_params.m_accuracy )
-			return 2;
-		else
-			return 1;
-	}
-	store(new_val);
-	return 1;
+	const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
+	store(new_V);
+	return (err > this->m_params.m_accuracy) ? 2 : 1;
 }

 	} //namespace devices
--- a/src/lib/netlist/solver/nld_ms_direct2.h
+++ b/src/lib/netlist/solver/nld_ms_direct2.h
@ -40,21 +40,13 @@ inline unsigned matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED const bo
 	const nl_double c = A(1,0);
 	const nl_double d = A(1,1);

-	nl_double new_val[2];
-	new_val[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c);
-	new_val[0] = (RHS(0) - b * new_val[1]) / a;
+	nl_double new_V[2];
+	new_V[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c);
+	new_V[0] = (RHS(0) - b * new_V[1]) / a;

-	if (has_dynamic_devices())
-	{
-		nl_double err = this->delta(new_val);
-		store(new_val);
-		if (err > m_params.m_accuracy )
-			return 2;
-		else
-			return 1;
-	}
-	store(new_val);
-	return 1;
+	const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
+	store(new_V);
+	return (err > this->m_params.m_accuracy) ? 2 : 1;
 }

 	} //namespace devices
--- a/src/lib/netlist/solver/nld_ms_gcr.h
+++ b/src/lib/netlist/solver/nld_ms_gcr.h
@ -448,19 +448,9 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt

 	this->m_stat_calculations++;

-	if (newton_raphson)
-	{
-		nl_double err = this->delta(new_V);
-
-		this->store(new_V);
-
-		return (err > this->m_params.m_accuracy) ? 2 : 1;
-	}
-	else
-	{
-		this->store(new_V);
-		return 1;
-	}
+	const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
+	store(new_V);
+	return (err > this->m_params.m_accuracy) ? 2 : 1;
 }

 	} //namespace devices
--- a/src/lib/netlist/solver/nld_ms_gmres.h
+++ b/src/lib/netlist/solver/nld_ms_gmres.h
@ -186,19 +186,9 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
 		return matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(newton_raphson);
 	}

-	if (newton_raphson)
-	{
-		nl_double err = this->delta(new_V);
-
-		this->store(new_V);
-
-		return (err > this->m_params.m_accuracy) ? 2 : 1;
-	}
-	else
-	{
-		this->store(new_V);
-		return 1;
-	}
+	const nl_double err = (newton_raphson ? this->delta(new_V) : 0.0);
+	this->store(new_V);
+	return (err > this->m_params.m_accuracy) ? 2 : 1;
 }

 template <typename T>
--- a/src/lib/netlist/solver/nld_ms_sm.h
+++ b/src/lib/netlist/solver/nld_ms_sm.h
@ -114,9 +114,6 @@ private:
 template <std::size_t m_N, std::size_t storage_N>
 matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t()
 {
-#if (NL_USE_DYNAMIC_ALLOCATION)
-	plib::pfree_array(m_A);
-#endif
 }

 template <std::size_t m_N, std::size_t storage_N>
@ -290,19 +287,9 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton

 	this->LE_compute_x(new_V);

-	if (newton_raphson)
-	{
-		nl_double err = delta(new_V);
-
-		store(new_V);
-
-		return (err > this->m_params.m_accuracy) ? 2 : 1;
-	}
-	else
-	{
-		store(new_V);
-		return 1;
-	}
+	const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
+	store(new_V);
+	return (err > this->m_params.m_accuracy) ? 2 : 1;
 }

 template <std::size_t m_N, std::size_t storage_N>
--- a/src/lib/netlist/solver/nld_ms_w.h
+++ b/src/lib/netlist/solver/nld_ms_w.h
@ -354,19 +354,10 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
 			if (std::abs(tmp-RHS(i)) > 1e-6)
 				printf("%s failed on row %d: %f RHS: %f\n", this->name().c_str(), i, std::abs(tmp-RHS(i)), RHS(i));
 		}
-	if (newton_raphson)
-	{
-		nl_double err = delta(new_V);

-		store(new_V);
-
-		return (err > this->m_params.m_accuracy) ? 2 : 1;
-	}
-	else
-	{
-		store(new_V);
-		return 1;
-	}
+	const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
+	store(new_V);
+	return (err > this->m_params.m_accuracy) ? 2 : 1;
 }

 template <std::size_t m_N, std::size_t storage_N>