From 45% to 60% to 99%. That's the improvement achieved for a 89x89

audio matrix mostly solved by elimination. Cleaned up some code as well. [Couriersud]
2025-07-07 10:58:41 +03:00 · 2015-06-10 21:49:52 +02:00 · 2015-06-10 21:49:52 +02:00 · dad7a6bab5
commit dad7a6bab5
parent b2a7d75ef0
8 changed files with 221 additions and 174 deletions
--- a/nl_examples/kidniki.c
+++ b/nl_examples/kidniki.c
@ -9,13 +9,14 @@
 NETLIST_START(dummy)
 	SOLVER(Solver, 12000)
 	PARAM(Solver.ACCURACY, 1e-8)
-	PARAM(Solver.NR_LOOPS, 200)
-	PARAM(Solver.GS_LOOPS, 4)
-	PARAM(Solver.SOR_FACTOR, 1)
+	PARAM(Solver.NR_LOOPS, 300)
+	PARAM(Solver.GS_LOOPS, 3)
+	//PARAM(Solver.GS_THRESHOLD, 99) // Force Gaussian elimination here
+	PARAM(Solver.SOR_FACTOR, 1.05)
 	#if 0
-	PARAM(Solver.SOR_FACTOR, 1)
+	//PARAM(Solver.SOR_FACTOR, 1)
 	PARAM(Solver.DYNAMIC_TS, 1)
-	PARAM(Solver.LTE, 1e1)
+	PARAM(Solver.LTE, 1e-1)
 	#endif
 	//FIXME proper models!
 	NET_MODEL(".model 2SC945 NPN(Is=2.04f Xti=3 Eg=1.11 Vaf=6 Bf=400 Ikf=20m Xtb=1.5 Br=3.377 Rc=1 Cjc=1p Mjc=.3333 Vjc=.75 Fc=.5 Cje=25p Mje=.3333 Vje=.75 Tr=450n Tf=20n Itf=0 Vtf=0 Xtf=0 VCEO=45V ICrating=150M MFG=Toshiba)")
@ -352,13 +353,20 @@ NETLIST_START(opamp)

    VCCS(G1)
    PARAM(G1.RI, RES_K(1000))
+#if 1
    PARAM(G1.G, 100)  // typical OP-AMP amplification 100 * 1000 = 100000
    RES(RP1, 1000)
    CAP(CP1, 1.59e-5)   // <== change to 1.59e-3 for 10Khz bandwidth
+#else
+    PARAM(G1.G, 1)  // typical OP-AMP amplification 100 * 1000 = 100000
+    RES(RP1, 100000)
+    CAP(CP1, 1.59e-7)   // <== change to 1.59e-3 for 10Khz bandwidth
+#endif
    VCVS(EBUF)
    PARAM(EBUF.RO, 50)
    PARAM(EBUF.G, 1)

+//    PARAM(EBUF.RI, 1e20)
 //    NET_C(EBUF.ON, GND)

    NET_C(G1.ON, GND)
@ -373,7 +381,15 @@ NETLIST_START(opamp)
    DIODE(DN,"1N914")

    NET_C(DP.K, VCC)
+#if 1
    NET_C(DP.A, DN.K, RP1.1)
+#else
+    RES(RDP, 1000)
+    RES(RDN, 1000)
+    NET_C(RDP.1, DP.A)
+    NET_C(RDN.1, DN.K)
+    NET_C(RDP.2, RDN.2, RP1.1)
+#endif
    NET_C(DN.A, GND)

    NET_C(EBUF.IP, RP1.1)
--- a/src/emu/netlist/analog/nld_ms_direct.h
+++ b/src/emu/netlist/analog/nld_ms_direct.h
@ -36,11 +36,11 @@ protected:

 	ATTR_HOT int solve_non_dynamic(const bool newton_raphson);
 	ATTR_HOT void build_LE_A();
-	ATTR_HOT void build_LE_RHS();
+	ATTR_HOT void build_LE_RHS(nl_double * RESTRICT rhs);
 	ATTR_HOT void LE_solve();
 	ATTR_HOT void LE_back_subst(nl_double * RESTRICT x);
 	ATTR_HOT nl_double delta(const nl_double * RESTRICT V);
-	ATTR_HOT void store(const nl_double * RESTRICT V, const bool store_RHS);
+	ATTR_HOT void store(const nl_double * RESTRICT V);

 	/* bring the whole system to the current time
 	 * Don't schedule a new calculation time. The recalculation has to be
@ -88,7 +88,7 @@ ATTR_HOT nl_double netlist_matrix_solver_direct_t<m_N, _storage_N>::compute_next
 		 * FIXME: We should extend the logic to use either all nets or
 		 *        only output nets.
 		 */
-		for (unsigned k = 0; k < N(); k++)
+		for (unsigned k = 0, iN=N(); k < iN; k++)
 		{
 			netlist_analog_net_t *n = m_nets[k];

@ -209,7 +209,61 @@ ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::vsetup(netlist_a

 #endif

-	//ATTR_ALIGN nl_double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
+	/* create a list of non zero elements right of the diagonal
+	 * These list anticipate the population of array elements by
+	 * Gaussian elimination.
+	 */
+	for (unsigned k = 0; k < N(); k++)
+	{
+		terms_t * t = m_terms[k];
+		/* pretty brutal */
+		int *other = t->net_other();
+
+		t->m_nz.clear();
+
+		if (k==0)
+			t->m_nzrd.clear();
+		else
+		{
+			t->m_nzrd = m_terms[k-1]->m_nzrd;
+			int j=0;
+			while(j < t->m_nzrd.size())
+			{
+				if (t->m_nzrd[j] < k + 1)
+					t->m_nzrd.remove_at(j);
+				else
+					j++;
+			}
+		}
+
+		for (unsigned j = 0; j < N(); j++)
+		{
+			for (unsigned i = 0; i < t->m_railstart; i++)
+			{
+				if (!t->m_nzrd.contains(other[i]) && other[i] >= k + 1)
+					t->m_nzrd.add(other[i]);
+				if (!t->m_nz.contains(other[i]))
+					t->m_nz.add(other[i]);
+			}
+		}
+		psort_list(t->m_nzrd);
+
+		t->m_nz.add(k);		// add diagonal
+		psort_list(t->m_nz);
+	}
+
+	if(0)
+		for (unsigned k = 0; k < N(); k++)
+		{
+			printf("%3d: ", k);
+			for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++)
+				printf(" %3d", m_terms[k]->m_nzrd[j]);
+			printf("\n");
+		}
+
+	/*
+	 * save states
+	 */
 	save(NLNAME(m_RHS));
 	save(NLNAME(m_last_RHS));
 	save(NLNAME(m_last_V));
@ -229,9 +283,10 @@ ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::vsetup(netlist_a
 template <unsigned m_N, unsigned _storage_N>
 ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE_A()
 {
-	for (unsigned k = 0; k < N(); k++)
+	const unsigned iN = N();
+	for (unsigned k = 0; k < iN; k++)
 	{
-		for (unsigned i=0; i < N(); i++)
+		for (unsigned i=0; i < iN; i++)
 			m_A[k][i] = 0.0;

 		nl_double akk  = 0.0;
@ -247,14 +302,15 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE_A()
 		m_A[k][k] += akk;

 		for (unsigned i = 0; i < railstart; i++)
-			m_A[k][net_other[i]] += -go[i];
+			m_A[k][net_other[i]] -= go[i];
 	}
 }

 template <unsigned m_N, unsigned _storage_N>
-ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS()
+ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS(nl_double * RESTRICT rhs)
 {
-	for (unsigned k = 0; k < N(); k++)
+	const unsigned iN = N();
+	for (unsigned k = 0; k < iN; k++)
 	{
 		nl_double rhsk_a = 0.0;
 		nl_double rhsk_b = 0.0;
@ -271,7 +327,7 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE_RHS()
 			//rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
 			rhsk_b = rhsk_b + go[i] * *other_cur_analog[i];

-		m_RHS[k] = rhsk_a + rhsk_b;
+		rhs[k] = rhsk_a + rhsk_b;
 	}
 }

@ -314,16 +370,38 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::LE_solve()

 		/* FIXME: Singular matrix? */
 		const nl_double f = 1.0 / m_A[i][i];
+		const double * RESTRICT s = &m_A[i][0];
+		const int *p = m_terms[i]->m_nzrd.data();
+		const unsigned e = m_terms[i]->m_nzrd.size();

 		/* Eliminate column i from row j */

 		for (unsigned j = i + 1; j < kN; j++)
 		{
-			const nl_double f1 = - m_A[j][i] * f;
+			double * RESTRICT d = &m_A[j][0];
+			const nl_double f1 = - d[i] * f;
 			if (f1 != NL_FCONST(0.0))
 			{
-				for (unsigned k = i + 1; k < kN; k++)
-					m_A[j][k] += m_A[i][k] * f1;
+#if 0
+				/*  The code below is 30% faster than the original
+				 *  implementation which is given here for reference.
+				 *
+				 *	for (unsigned k = i + 1; k < kN; k++)
+				 *		m_A[j][k] = m_A[j][k] + m_A[i][k] * f1;
+				 */
+				double * RESTRICT d = &m_A[j][i+1];
+				const double * RESTRICT s = &m_A[i][i+1];
+				const int e = kN - i - 1;
+
+				for (int k = 0; k < e; k++)
+					d[k] = d[k] + s[k] * f1;
+#else
+				for (unsigned k = 0; k < e; k++)
+				{
+					const unsigned pk = p[k];
+					d[pk] += s[pk] * f1;
+				}
+#endif
 				m_RHS[j] += m_RHS[i] * f1;
 			}
 		}
@ -341,9 +419,28 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::LE_back_subst(
 	{
 		nl_double tmp = 0;

+#if 1
+#if 0
+		const double * RESTRICT A = &m_A[j][j+1];
+		const double * RESTRICT xp = &x[j+1];
+		const int e = kN - j - 1;
+		for (int k = 0; k < e; k++)
+			tmp += A[k] * xp[k];
+#else
+		const double * RESTRICT A = &m_A[j][0];
+		const int *p = m_terms[j]->m_nzrd.data();
+		const unsigned e = m_terms[j]->m_nzrd.size();
+
+		for (unsigned k = 0; k < e; k++)
+		{
+			const unsigned pk = p[k];
+			tmp += A[pk] * x[pk];
+		}
+#endif
+#else
 		for (unsigned k = j + 1; k < kN; k++)
 			tmp += m_A[j][k] * x[k];
-
+#endif
 		x[j] = (m_RHS[j] - tmp) / m_A[j][j];
 	}
 #if 0
@ -363,40 +460,32 @@ template <unsigned m_N, unsigned _storage_N>
 ATTR_HOT nl_double netlist_matrix_solver_direct_t<m_N, _storage_N>::delta(
 		const nl_double * RESTRICT V)
 {
+	/* FIXME: Ideally we should also include currents (RHS) here. This would
+	 * need a revaluation of the right hand side after voltages have been updated
+	 * and thus belong into a different calculation. This applies to all solvers.
+	 */
+
+	const unsigned iN = this->N();
 	nl_double cerr = 0;
-	nl_double cerr2 = 0;
-	for (unsigned i = 0; i < this->N(); i++)
-	{
-		const nl_double e = nl_math::abs(V[i] - this->m_nets[i]->m_cur_Analog);
-		const nl_double e2 = nl_math::abs(m_RHS[i] - this->m_last_RHS[i]);
-		cerr = (e > cerr ? e : cerr);
-		cerr2 = (e2 > cerr2 ? e2 : cerr2);
-	}
-	// FIXME: Review
-	return cerr + cerr2*NL_FCONST(100000.0);
+	for (unsigned i = 0; i < iN; i++)
+		cerr = std::max(cerr, nl_math::abs(V[i] - this->m_nets[i]->m_cur_Analog));
+	return cerr;
 }

 template <unsigned m_N, unsigned _storage_N>
 ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::store(
-		const nl_double * RESTRICT V, const bool store_RHS)
+		const nl_double * RESTRICT V)
 {
-	for (unsigned i = 0; i < this->N(); i++)
+	for (unsigned i = 0, iN=N(); i < iN; i++)
 	{
 		this->m_nets[i]->m_cur_Analog = V[i];
 	}
-	if (store_RHS)
-	{
-		for (unsigned i = 0; i < this->N(); i++)
-		{
-			this->m_last_RHS[i] = m_RHS[i];
-		}
-	}
 }

 template <unsigned m_N, unsigned _storage_N>
 ATTR_HOT nl_double netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve()
 {
-	solve_base<netlist_matrix_solver_direct_t>(this);
+	this->solve_base(this);
 	return this->compute_next_timestep();
 }

@ -408,17 +497,17 @@ ATTR_HOT int netlist_matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic(

 	this->LE_back_subst(new_v);

-	if (this->is_dynamic())
+	if (newton_raphson)
 	{
 		nl_double err = delta(new_v);

-		store(new_v, true);
+		store(new_v);

 		return (err > this->m_params.m_accuracy) ? 2 : 1;
 	}
 	else
 	{
-		store(new_v, false);  // ==> No need to store RHS
+		store(new_v);
 		return 1;
 	}
 }
@ -427,7 +516,11 @@ template <unsigned m_N, unsigned _storage_N>
 ATTR_HOT inline int netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic(const bool newton_raphson)
 {
 	this->build_LE_A();
-	this->build_LE_RHS();
+	this->build_LE_RHS(m_last_RHS);
+
+	for (unsigned i=0, iN=N(); i < iN; i++)
+		m_RHS[i] = m_last_RHS[i];
+
 	this->LE_solve();

 	return this->solve_non_dynamic(newton_raphson);
--- a/src/emu/netlist/analog/nld_ms_direct1.h
+++ b/src/emu/netlist/analog/nld_ms_direct1.h
@ -38,7 +38,7 @@ ATTR_HOT inline int netlist_matrix_solver_direct1_t::vsolve_non_dynamic(ATTR_UNU
 {
 	netlist_analog_net_t *net = m_nets[0];
 	this->build_LE_A();
-	this->build_LE_RHS();
+	this->build_LE_RHS(m_RHS);
 	//NL_VERBOSE_OUT(("%f %f\n", new_val, m_RHS[0] / m_A[0][0]);

 	nl_double new_val =  m_RHS[0] / m_A[0][0];
--- a/src/emu/netlist/analog/nld_ms_direct2.h
+++ b/src/emu/netlist/analog/nld_ms_direct2.h
@ -39,7 +39,7 @@ ATTR_HOT nl_double netlist_matrix_solver_direct2_t::vsolve()
 ATTR_HOT inline int netlist_matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNUSED const bool newton_raphson)
 {
 	build_LE_A();
-	build_LE_RHS();
+	build_LE_RHS(m_RHS);

 	const nl_double a = m_A[0][0];
 	const nl_double b = m_A[0][1];
@ -53,13 +53,13 @@ ATTR_HOT inline int netlist_matrix_solver_direct2_t::vsolve_non_dynamic(ATTR_UNU
 	if (is_dynamic())
 	{
 		nl_double err = this->delta(new_val);
-		store(new_val, true);
+		store(new_val);
 		if (err > m_params.m_accuracy )
 			return 2;
 		else
 			return 1;
 	}
-	store(new_val, false);
+	store(new_val);
 	return 1;
 }

--- a/src/emu/netlist/analog/nld_ms_sor.h
+++ b/src/emu/netlist/analog/nld_ms_sor.h
@ -100,8 +100,7 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dyn
 	 * omega = 2.0 / (1.0 + nl_math::sqrt(1-rho))
 	 */

-	const nl_double ws = this->m_params.m_sor; //1.045; //2.0 / (1.0 + /*sin*/(3.14159 * 5.5 / (double) (m_nets.count()+1)));
-	//const nl_double ws = 2.0 / (1.0 + sin(3.14159 * 4 / (double) (this->N())));
+	const nl_double ws = this->m_params.m_sor;

 	ATTR_ALIGN nl_double w[_storage_N];
 	ATTR_ALIGN nl_double one_m_w[_storage_N];
@ -114,34 +113,31 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dyn
 		nl_double gabs_t = 0.0;
 		nl_double RHS_t = 0.0;

+		const int term_count = this->m_terms[k]->count();
+		const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
+		const nl_double * const RESTRICT go = this->m_terms[k]->go();
+		const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
+		const nl_double * const *other_cur_analog = this->m_terms[k]->other_curanalog();
+
 		new_V[k] = this->m_nets[k]->m_cur_Analog;

+		for (unsigned i = 0; i < term_count; i++)
 		{
-			const int term_count = this->m_terms[k]->count();
-			const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
-			const nl_double * const RESTRICT go = this->m_terms[k]->go();
-			const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
-			const nl_double * const *other_cur_analog = this->m_terms[k]->other_curanalog();
-
-			for (int i = 0; i < term_count; i++)
-			{
-				gtot_t = gtot_t + gt[i];
-				RHS_t = RHS_t + Idr[i];
-			}
-
-			if (USE_GABS)
-				for (int i = 0; i < term_count; i++)
-					gabs_t = gabs_t + nl_math::abs(go[i]);
-
-			for (int i = this->m_terms[k]->m_railstart; i < term_count; i++)
-				RHS_t = RHS_t  + go[i] * *other_cur_analog[i];
+			gtot_t = gtot_t + gt[i];
+			RHS_t = RHS_t + Idr[i];
 		}

+		for (int i = this->m_terms[k]->m_railstart; i < term_count; i++)
+			RHS_t = RHS_t  + go[i] * *other_cur_analog[i];
+
 		RHS[k] = RHS_t;

 		if (USE_GABS)
 		{
-			gabs_t *= NL_FCONST(0.95); // avoid rounding issues
+			for (int i = 0; i < term_count; i++)
+				gabs_t = gabs_t + nl_math::abs(go[i]);
+
+			gabs_t *= NL_FCONST(0.5); // derived by try and error
 			if (gabs_t <= gtot_t)
 			{
 				w[k] = ws / gtot_t;
@ -164,7 +160,7 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dyn

 	do {
 		resched = false;
-
+		double err = 0;
 		for (int k = 0; k < iN; k++)
 		{
 			const int * RESTRICT net_other = this->m_terms[k]->net_other();
@ -177,10 +173,13 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_t<m_N, _storage_N>::vsolve_non_dyn

 			const nl_double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];

-			resched = resched || (nl_math::abs(new_val - new_V[k]) > accuracy);
+			err = std::max(nl_math::abs(new_val - new_V[k]), err);
 			new_V[k] = new_val;
 		}

+		if (err > accuracy)
+			resched = true;
+
 		resched_cnt++;
 	} while (resched && (resched_cnt < this->m_params.m_gs_loops));

--- a/src/emu/netlist/analog/nld_ms_sor_mat.h
+++ b/src/emu/netlist/analog/nld_ms_sor_mat.h
@ -30,11 +30,6 @@ public:
 		, m_gs_fail(0)
 		, m_gs_total(0)
 		{
-			pstring p = nl_util::environment("NETLIST_STATS");
-			if (p != "")
-				m_log_stats = (bool) p.as_long();
-			else
-				m_log_stats = false;
 		}

 	virtual ~netlist_matrix_solver_SOR_mat_t() {}
@ -53,8 +48,6 @@ private:
 	nl_double m_lp_fact;
 	int m_gs_fail;
 	int m_gs_total;
-	bool m_log_stats;
-
 };

 // ----------------------------------------------------------------------------------------
@ -64,15 +57,15 @@ private:
 template <unsigned m_N, unsigned _storage_N>
 void netlist_matrix_solver_SOR_mat_t<m_N, _storage_N>::log_stats()
 {
-	if (this->m_stat_calculations != 0 && m_log_stats)
+	if (this->m_stat_calculations != 0 && this->m_params.m_log_stats)
 	{
-		this->netlist().log("==============================================\n");
-		this->netlist().log("Solver %s\n", this->name().cstr());
-		this->netlist().log("       ==> %d nets\n", this->N()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
-		this->netlist().log("       has %s elements\n", this->is_dynamic() ? "dynamic" : "no dynamic");
-		this->netlist().log("       has %s elements\n", this->is_timestep() ? "timestep" : "no timestep");
-		this->netlist().log("       %6.3f average newton raphson loops\n", (double) this->m_stat_newton_raphson / (double) this->m_stat_vsolver_calls);
-		this->netlist().log("       %10d invocations (%6d Hz)  %10d gs fails (%6.2f%%) %6.3f average\n",
+		this->netlist().log("==============================================");
+		this->netlist().log("Solver %s", this->name().cstr());
+		this->netlist().log("       ==> %d nets", this->N()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
+		this->netlist().log("       has %s elements", this->is_dynamic() ? "dynamic" : "no dynamic");
+		this->netlist().log("       has %s elements", this->is_timestep() ? "timestep" : "no timestep");
+		this->netlist().log("       %6.3f average newton raphson loops", (double) this->m_stat_newton_raphson / (double) this->m_stat_vsolver_calls);
+		this->netlist().log("       %10d invocations (%6d Hz)  %10d gs fails (%6.2f%%) %6.3f average",
 				this->m_stat_calculations,
 				this->m_stat_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
 				this->m_gs_fail,
@ -149,20 +142,19 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non


 	ATTR_ALIGN nl_double new_v[_storage_N] = { 0.0 };
-	const int iN = this->N();
+	const unsigned iN = this->N();

 	bool resched = false;

 	int  resched_cnt = 0;

 	this->build_LE_A();
-	this->build_LE_RHS();
+	this->build_LE_RHS(this->m_RHS);

-#if 1
 #if 0
 	static int ws_cnt = 0;
 	ws_cnt++;
-	if (0 && ws_cnt % 100 == 0)
+	if (1 && ws_cnt % 200 == 0)
 	{
 		// update omega
 		nl_double lambdaN = 0;
@ -181,8 +173,8 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non
 			for (int i=0; i<iN; i++)
 				s = s + nl_math::abs(this->m_A[k][i]);
 			akk = s / akk - 1.0;
-			//if ( akk > lambdaN)
-			//  lambdaN = akk;
+			if ( akk > lambdaN)
+			    lambdaN = akk;
 			if (akk < lambda1)
 				lambda1 = akk;
 	#endif
@ -191,99 +183,31 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non

 		//ws = 2.0 / (2.0 - lambdaN - lambda1);
 		m_omega = 2.0 / (2.0 - lambda1);
-		//printf("%f\n", ws);
+		//printf("%f %f %f\n", m_omega, lambda1, lambdaN);
 	}
 #endif

 	for (int k = 0; k < iN; k++)
 		new_v[k] = this->m_nets[k]->m_cur_Analog;

-#else
-	{
-		nl_double frob;
-		frob = 0;
-		nl_double rmin = 1e99, rmax = -1e99;
-		for (int k = 0; k < iN; k++)
-		{
-			new_v[k] = this->m_nets[k]->m_cur_Analog;
-			nl_double s=0.0;
-			for (int i = 0; i < iN; i++)
-			{
-				frob += this->m_A[k][i] * this->m_A[k][i];
-				s = s + nl_math::abs(this->m_A[k][i]);
-			}
-
-			if (s<rmin)
-				rmin = s;
-			if (s>rmax)
-				rmax = s;
-		}
-#if 0
-		nl_double frobA = nl_math::sqrt(frob /(iN));
-		if (1 &&frobA < 1.0)
-			//ws = 2.0 / (1.0 + nl_math::sqrt(1.0-frobA));
-			ws = 2.0 / (2.0 - frobA);
-		else
-			ws = 1.0;
-		ws = 0.9;
-#else
-		// calculate an estimate for rho.
-		// This is based on the Perron???Frobenius theorem for positive matrices.
-		// No mathematical proof here. The following estimates the
-		// optimal relaxation parameter pretty well. Unfortunately, the
-		// overhead is bigger than the gain. Consequently the fast GS below
-		// uses a fixed GS. One can however use this here to determine a
-		// suitable parameter.
-		nl_double rm = (rmax + rmin) * 0.5;
-		if (rm < 1.0)
-			ws = 2.0 / (1.0 + nl_math::sqrt(1.0-rm));
-		else
-			ws = 1.0;
-		if (ws > 1.02 && rmax > 1.001)
-			printf("rmin %f rmax %f ws %f\n", rmin, rmax, ws);
-#endif
-	}
-#endif
-	// Frobenius norm for (D-L)^(-1)U
-	//nl_double frobU;
-	//nl_double frobL;
-	//nl_double norm;
 	do {
 		resched = false;
 		nl_double cerr = 0.0;
-		//frobU = 0;
-		//frobL = 0;
-		//norm = 0;

 		for (int k = 0; k < iN; k++)
 		{
 			nl_double Idrive = 0;
-			//nl_double norm_t = 0;
-			// Reduction loops need -ffast-math
-			for (int i = 0; i < iN; i++)
-				Idrive += this->m_A[k][i] * new_v[i];

-#if 0
-			for (int i = 0; i < iN; i++)
-			{
-				if (i < k) frobL += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] /this-> m_A[k][k];
-				if (i > k) frobU += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] / this->m_A[k][k];
-				norm_t += nl_math::abs(this->m_A[k][i]);
-			}
-#endif
-			//if (norm_t > norm) norm = norm_t;
-#if 0
-			const nl_double new_val = (1.0-ws) * new_v[k] + ws * (this->m_RHS[k] - Idrive + this->m_A[k][k] * new_v[k]) / this->m_A[k][k];
+			const double * RESTRICT A = &this->m_A[k][0];
+			const int *p = this->m_terms[k]->m_nz.data();
+			const unsigned e = this->m_terms[k]->m_nz.size();

-			const nl_double e = nl_math::abs(new_val - new_v[k]);
-			cerr = (e > cerr ? e : cerr);
-			new_v[k] = new_val;
-#else
-			const nl_double delta = m_omega * (this->m_RHS[k] - Idrive) / this->m_A[k][k];
-			const nl_double adelta = nl_math::abs(delta);
-			cerr = (adelta > cerr ? adelta : cerr);
+			for (unsigned i = 0; i < e; i++)
+				Idrive = Idrive + A[p[i]] * new_v[p[i]];
+
+			const nl_double delta = m_omega * (this->m_RHS[k] - Idrive) / A[k];
+			cerr = std::max(cerr, nl_math::abs(delta));
 			new_v[k] += delta;
-#endif
 		}

 		if (cerr > this->m_params.m_accuracy)
@ -291,14 +215,9 @@ ATTR_HOT inline int netlist_matrix_solver_SOR_mat_t<m_N, _storage_N>::vsolve_non
 			resched = true;
 		}
 		resched_cnt++;
-		//ATTR_UNUSED nl_double frobUL = nl_math::sqrt((frobU + frobL) / (double) (iN) / (double) (iN));
 	} while (resched && (resched_cnt < this->m_params.m_gs_loops));
-	//printf("Frobenius %f %f %f %f %f\n", nl_math::sqrt(frobU), nl_math::sqrt(frobL), frobUL, frobA, norm);
-	//printf("Omega Estimate1 %f %f\n", 2.0 / (1.0 + nl_math::sqrt(1-frobUL)), 2.0 / (1.0 + nl_math::sqrt(1-frobA)) ); //        printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
-	//printf("Omega Estimate2 %f %f\n", 2.0 / (2.0 - frobUL), 2.0 / (2.0 - frobA) ); //        printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));

-
-	this->store(new_v, false);
+	this->store(new_v);
 	this->m_gs_total += resched_cnt;
 	if (resched)
 	{
--- a/src/emu/netlist/analog/nld_solver.h
+++ b/src/emu/netlist/analog/nld_solver.h
@ -81,6 +81,8 @@ class terms_t

 	unsigned m_railstart;

+	plist_t<int> m_nzrd; /* non zero right of the diagonal for elimination */
+	plist_t<int> m_nz;   /* all non zero for multiplication */
 private:
 	plist_t<netlist_terminal_t *> m_term;
 	plist_t<int> m_net_other;
--- a/src/emu/netlist/plib/plists.h
+++ b/src/emu/netlist/plib/plists.h
@ -11,6 +11,7 @@
 #define PLISTS_H_

 #include <cstring>
+#include <algorithm>

 #include "palloc.h"
 #include "pstring.h"
@ -582,4 +583,21 @@ public:
 	}
 };

+// ----------------------------------------------------------------------------------------
+// sort a list ... slow, I am lazy
+// elements must support ">" operator.
+// ----------------------------------------------------------------------------------------
+
+template<typename Class>
+static inline void psort_list(Class &sl)
+{
+	for(int i = 0; i < (int) sl.size() - 1; i++)
+	{
+        for(int j = i + 1; j < sl.size(); j++)
+            if(sl[i] > sl[j])
+            	std::swap(sl[i], sl[j]);
+
+	}
+}
+
 #endif /* PLISTS_H_ */