netlist: fix a bug and some performance tweaks. (nw)

2025-07-15 06:27:46 +03:00 · 2019-02-20 20:16:08 +01:00 · 2019-02-20 20:16:08 +01:00 · a5f3787058
commit a5f3787058
parent 085ddc5daf
6 changed files with 74 additions and 33 deletions
--- a/src/lib/netlist/nl_config.h
+++ b/src/lib/netlist/nl_config.h
@ -28,7 +28,7 @@
 * Your mileage may vary.
 *
 */
-#define USE_MEMPOOL                 (1)
+#define USE_MEMPOOL                 (0)

 /*! Store input values in logic_terminal_t.
 *
--- a/src/lib/netlist/plib/gmres.h
+++ b/src/lib/netlist/plib/gmres.h
@ -338,7 +338,7 @@ namespace plib
 		plib::parray<float_type, RESTART + 1> m_y;       		/* mr + 1 */

 		//plib::parray<float_type, SIZE> m_v[RESTART + 1];  /* mr + 1, n */
-		std::array<std::array<float_type, storage_N>, RESTART + 1> m_v;  /* mr + 1, n */
+		plib::parray<plib::parray<float_type, storage_N>, RESTART + 1> m_v;  /* mr + 1, n */

 		std::size_t m_size;

--- a/src/lib/netlist/plib/pmempool.h
+++ b/src/lib/netlist/plib/pmempool.h
@ -48,17 +48,18 @@ namespace plib {
 	private:
 		struct block
 		{
-			block(mempool *mp)
+			block(mempool *mp, std::size_t min_bytes)
 			: m_num_alloc(0)
-			, m_free(mp->m_min_alloc)
 			, m_cur(0)
 			, m_data(nullptr)
 			, m_mempool(mp)
 			{
-				std::size_t alloc_bytes = (mp->m_min_alloc + mp->m_min_align - 1) & ~(mp->m_min_align - 1);
+				min_bytes = std::max(mp->m_min_alloc, min_bytes);
+				m_free = min_bytes;
+				std::size_t alloc_bytes = (min_bytes + mp->m_min_align - 1) & ~(mp->m_min_align - 1);
 				m_data_allocated = static_cast<char *>(::operator new(alloc_bytes));
 				void *r = m_data_allocated;
-				std::align(mp->m_min_align, mp->m_min_alloc, r, alloc_bytes);
+				std::align(mp->m_min_align, min_bytes, r, alloc_bytes);
 				m_data  = reinterpret_cast<char *>(r);
 			}
 			std::size_t m_num_alloc;
@ -80,9 +81,9 @@ namespace plib {
 		};


-		block * new_block()
+		block * new_block(std::size_t min_bytes)
 		{
-			auto *b = new block(this);
+			auto *b = new block(this, min_bytes);
 			m_blocks.push_back(b);
 			return b;
 		}
@ -144,7 +145,7 @@ namespace plib {
 				}
 			}
 			{
-				block *b = new_block();
+				block *b = new_block(rs);
 				b->m_num_alloc = 1;
 				b->m_free = m_min_alloc - rs;
 				auto ret = reinterpret_cast<void *>(b->m_data + b->m_cur);
--- a/src/lib/netlist/plib/vector_ops.h
+++ b/src/lib/netlist/plib/vector_ops.h
@ -28,7 +28,7 @@ namespace plib
 	template<typename VT, typename T>
 	void vec_set_scalar (const std::size_t n, VT &v, T && scalar)
 	{
-		const T s(std::forward<T>(scalar));
+		const typename std::remove_reference<decltype(v[0])>::type s(std::forward<T>(scalar));
 		for ( std::size_t i = 0; i < n; i++ )
 			v[i] = s;
 	}
@ -43,25 +43,50 @@ namespace plib
 	template<typename T, typename V1, typename V2>
 	T vec_mult (const std::size_t n, const V1 & v1, const V2 & v2 )
 	{
-		T value = 0.0;
-		for ( std::size_t i = 0; i < n; i++ )
-			value += v1[i] * v2[i];
-		return value;
+		PALIGNAS_VECTOROPT() T value[8] = {0};
+		for (std::size_t i = 0; i < n ; i++ )
+		{
+			value[i & 7] += v1[i] * v2[i];
+		}
+		return value[0] + value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7];
 	}

 	template<typename T, typename VT>
 	T vec_mult2 (const std::size_t n, const VT &v)
 	{
-		T value = 0.0;
-		for ( std::size_t i = 0; i < n; i++ )
-			value += v[i] * v[i];
-		return value;
+		PALIGNAS_VECTOROPT() T value[8] = {0};
+		for (std::size_t i = 0; i < n ; i++ )
+		{
+			value[i & 7] += v[i] * v[i];
+		}
+		return value[0] + value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7];
+	}
+
+	template<typename T, typename VT>
+	T vec_sum (const std::size_t n, const VT &v)
+	{
+		if (n<8)
+		{
+			PALIGNAS_VECTOROPT() T value(0);
+			for (std::size_t i = 0; i < n ; i++ )
+				value += v[i];
+
+			return value;
+		}
+		else
+		{
+			PALIGNAS_VECTOROPT() T value[8] = {0};
+			for (std::size_t i = 0; i < n ; i++ )
+				value[i & 7] += v[i];
+
+			return ((value[0] + value[1]) + (value[2] + value[3])) + ((value[4] + value[5]) + (value[6] + value[7]));
+		}
 	}

 	template<typename VV, typename T, typename VR>
 	void vec_mult_scalar (const std::size_t n, const VV & v, T && scalar, VR & result)
 	{
-		const T s(std::forward<T>(scalar));
+		const typename std::remove_reference<decltype(v[0])>::type s(std::forward<T>(scalar));
 		for ( std::size_t i = 0; i < n; i++ )
 			result[i] = s * v[i];
 	}
@ -69,9 +94,9 @@ namespace plib
 	template<typename VV, typename T, typename VR>
 	void vec_add_mult_scalar (const std::size_t n, const VV & v, T && scalar, VR & result)
 	{
-		const T s(std::forward<T>(scalar));
+		const typename std::remove_reference<decltype(v[0])>::type s(std::forward<T>(scalar));
 		for ( std::size_t i = 0; i < n; i++ )
-			result[i] = result[i] + s * v[i];
+			result[i] += s * v[i];
 	}

 	template<typename T>
@ -98,9 +123,9 @@ namespace plib
 	template<typename V, typename T>
 	void vec_scale(const std::size_t n, V & v, T &&scalar)
 	{
-		const T s(std::forward<T>(scalar));
+		const typename std::remove_reference<decltype(v[0])>::type s(std::forward<T>(scalar));
 		for ( std::size_t i = 0; i < n; i++ )
-			v[i] = s * v[i];
+			v[i] *= s;
 	}

 	template<typename T, typename V>
--- a/src/lib/netlist/solver/nld_matrix_solver.h
+++ b/src/lib/netlist/solver/nld_matrix_solver.h
@ -11,6 +11,7 @@
 #include "netlist/nl_base.h"
 #include "netlist/nl_errstr.h"
 #include "netlist/plib/putil.h"
+#include "netlist/plib/vector_ops.h"

 namespace netlist
 {
@ -54,27 +55,41 @@ public:

 	void set_pointers();

+	/* FIXME: this works a bit better for larger matrices */
 	template <typename AP, typename FT>
-	void fill_matrix(AP &tcr, FT &RHS)
+	void fill_matrix/*_larger*/(AP &tcr, FT &RHS)
 	{
-		FT gtot_t = 0.0;
-		FT RHS_t = 0.0;

 		const std::size_t term_count = this->count();
 		const std::size_t railstart = this->m_railstart;
-		const FT * const * other_cur_analog = this->connected_net_V();
+		const FT * const * other_cur_analog = m_connected_net_V.data();
+		const FT * p_go = m_go.data();
+		const FT * p_gt = m_gt.data();
+		const FT * p_Idr = m_Idr.data();

 		for (std::size_t i = 0; i < railstart; i++)
 		{
-			*tcr[i]       -= m_go[i];
-			gtot_t        += m_gt[i];
-			RHS_t         += m_Idr[i];
+			*tcr[i]       -= p_go[i];
 		}

+#if 1
+		FT gtot_t = 0.0;
+		FT RHS_t = 0.0;
+
+		for (std::size_t i = 0; i < term_count; i++)
+		{
+			gtot_t        += p_gt[i];
+			RHS_t         += p_Idr[i];
+		}
+		// FIXME: Code above is faster than vec_sum - Check this
+#else
+		auto gtot_t = plib::vec_sum<FT>(term_count, p_gt);
+		auto RHS_t = plib::vec_sum<FT>(term_count, p_Idr);
+#endif
+
 		for (std::size_t i = railstart; i < term_count; i++)
 		{
-			RHS_t += (m_Idr[i] + m_go[i] * *other_cur_analog[i]);
-			gtot_t += m_gt[i];
+			RHS_t += (/*m_Idr[i]*/ + p_go[i] * *other_cur_analog[i]);
 		}

 		RHS = RHS_t;
--- a/src/lib/netlist/solver/nld_ms_sor.h
+++ b/src/lib/netlist/solver/nld_ms_sor.h
@ -88,7 +88,7 @@ unsigned matrix_solver_SOR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_rap
 		const float_type * const gt = this->m_terms[k]->gt();
 		const float_type * const go = this->m_terms[k]->go();
 		const float_type * const Idr = this->m_terms[k]->Idr();
-		const float_type * const *other_cur_analog = this->m_terms[k]->connected_net_V();
+		auto other_cur_analog = this->m_terms[k]->connected_net_V();

 		this->m_new_V[k] = this->m_nets[k]->Q_Analog();