From a5f37870584dc8bd5588ee32964f56462228a714 Mon Sep 17 00:00:00 2001 From: couriersud Date: Wed, 20 Feb 2019 20:16:08 +0100 Subject: [PATCH] netlist: fix a bug and some performance tweaks. (nw) --- src/lib/netlist/nl_config.h | 2 +- src/lib/netlist/plib/gmres.h | 2 +- src/lib/netlist/plib/pmempool.h | 15 +++--- src/lib/netlist/plib/vector_ops.h | 53 ++++++++++++++++------ src/lib/netlist/solver/nld_matrix_solver.h | 33 ++++++++++---- src/lib/netlist/solver/nld_ms_sor.h | 2 +- 6 files changed, 74 insertions(+), 33 deletions(-) diff --git a/src/lib/netlist/nl_config.h b/src/lib/netlist/nl_config.h index efe74002354..de9a370db2f 100644 --- a/src/lib/netlist/nl_config.h +++ b/src/lib/netlist/nl_config.h @@ -28,7 +28,7 @@ * Your mileage may vary. * */ -#define USE_MEMPOOL (1) +#define USE_MEMPOOL (0) /*! Store input values in logic_terminal_t. * diff --git a/src/lib/netlist/plib/gmres.h b/src/lib/netlist/plib/gmres.h index edd15a49380..345c96b5c63 100644 --- a/src/lib/netlist/plib/gmres.h +++ b/src/lib/netlist/plib/gmres.h @@ -338,7 +338,7 @@ namespace plib plib::parray m_y; /* mr + 1 */ //plib::parray m_v[RESTART + 1]; /* mr + 1, n */ - std::array, RESTART + 1> m_v; /* mr + 1, n */ + plib::parray, RESTART + 1> m_v; /* mr + 1, n */ std::size_t m_size; diff --git a/src/lib/netlist/plib/pmempool.h b/src/lib/netlist/plib/pmempool.h index 91fa7d55589..80e30ada324 100644 --- a/src/lib/netlist/plib/pmempool.h +++ b/src/lib/netlist/plib/pmempool.h @@ -48,17 +48,18 @@ namespace plib { private: struct block { - block(mempool *mp) + block(mempool *mp, std::size_t min_bytes) : m_num_alloc(0) - , m_free(mp->m_min_alloc) , m_cur(0) , m_data(nullptr) , m_mempool(mp) { - std::size_t alloc_bytes = (mp->m_min_alloc + mp->m_min_align - 1) & ~(mp->m_min_align - 1); + min_bytes = std::max(mp->m_min_alloc, min_bytes); + m_free = min_bytes; + std::size_t alloc_bytes = (min_bytes + mp->m_min_align - 1) & ~(mp->m_min_align - 1); m_data_allocated = static_cast(::operator new(alloc_bytes)); void *r = m_data_allocated; - std::align(mp->m_min_align, mp->m_min_alloc, r, alloc_bytes); + std::align(mp->m_min_align, min_bytes, r, alloc_bytes); m_data = reinterpret_cast(r); } std::size_t m_num_alloc; @@ -80,9 +81,9 @@ namespace plib { }; - block * new_block() + block * new_block(std::size_t min_bytes) { - auto *b = new block(this); + auto *b = new block(this, min_bytes); m_blocks.push_back(b); return b; } @@ -144,7 +145,7 @@ namespace plib { } } { - block *b = new_block(); + block *b = new_block(rs); b->m_num_alloc = 1; b->m_free = m_min_alloc - rs; auto ret = reinterpret_cast(b->m_data + b->m_cur); diff --git a/src/lib/netlist/plib/vector_ops.h b/src/lib/netlist/plib/vector_ops.h index dde6c7c4049..f5a9e336d0d 100644 --- a/src/lib/netlist/plib/vector_ops.h +++ b/src/lib/netlist/plib/vector_ops.h @@ -28,7 +28,7 @@ namespace plib template void vec_set_scalar (const std::size_t n, VT &v, T && scalar) { - const T s(std::forward(scalar)); + const typename std::remove_reference::type s(std::forward(scalar)); for ( std::size_t i = 0; i < n; i++ ) v[i] = s; } @@ -43,25 +43,50 @@ namespace plib template T vec_mult (const std::size_t n, const V1 & v1, const V2 & v2 ) { - T value = 0.0; - for ( std::size_t i = 0; i < n; i++ ) - value += v1[i] * v2[i]; - return value; + PALIGNAS_VECTOROPT() T value[8] = {0}; + for (std::size_t i = 0; i < n ; i++ ) + { + value[i & 7] += v1[i] * v2[i]; + } + return value[0] + value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7]; } template T vec_mult2 (const std::size_t n, const VT &v) { - T value = 0.0; - for ( std::size_t i = 0; i < n; i++ ) - value += v[i] * v[i]; - return value; + PALIGNAS_VECTOROPT() T value[8] = {0}; + for (std::size_t i = 0; i < n ; i++ ) + { + value[i & 7] += v[i] * v[i]; + } + return value[0] + value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7]; + } + + template + T vec_sum (const std::size_t n, const VT &v) + { + if (n<8) + { + PALIGNAS_VECTOROPT() T value(0); + for (std::size_t i = 0; i < n ; i++ ) + value += v[i]; + + return value; + } + else + { + PALIGNAS_VECTOROPT() T value[8] = {0}; + for (std::size_t i = 0; i < n ; i++ ) + value[i & 7] += v[i]; + + return ((value[0] + value[1]) + (value[2] + value[3])) + ((value[4] + value[5]) + (value[6] + value[7])); + } } template void vec_mult_scalar (const std::size_t n, const VV & v, T && scalar, VR & result) { - const T s(std::forward(scalar)); + const typename std::remove_reference::type s(std::forward(scalar)); for ( std::size_t i = 0; i < n; i++ ) result[i] = s * v[i]; } @@ -69,9 +94,9 @@ namespace plib template void vec_add_mult_scalar (const std::size_t n, const VV & v, T && scalar, VR & result) { - const T s(std::forward(scalar)); + const typename std::remove_reference::type s(std::forward(scalar)); for ( std::size_t i = 0; i < n; i++ ) - result[i] = result[i] + s * v[i]; + result[i] += s * v[i]; } template @@ -98,9 +123,9 @@ namespace plib template void vec_scale(const std::size_t n, V & v, T &&scalar) { - const T s(std::forward(scalar)); + const typename std::remove_reference::type s(std::forward(scalar)); for ( std::size_t i = 0; i < n; i++ ) - v[i] = s * v[i]; + v[i] *= s; } template diff --git a/src/lib/netlist/solver/nld_matrix_solver.h b/src/lib/netlist/solver/nld_matrix_solver.h index 97ca20448f9..803437d76e2 100644 --- a/src/lib/netlist/solver/nld_matrix_solver.h +++ b/src/lib/netlist/solver/nld_matrix_solver.h @@ -11,6 +11,7 @@ #include "netlist/nl_base.h" #include "netlist/nl_errstr.h" #include "netlist/plib/putil.h" +#include "netlist/plib/vector_ops.h" namespace netlist { @@ -54,27 +55,41 @@ public: void set_pointers(); + /* FIXME: this works a bit better for larger matrices */ template - void fill_matrix(AP &tcr, FT &RHS) + void fill_matrix/*_larger*/(AP &tcr, FT &RHS) { - FT gtot_t = 0.0; - FT RHS_t = 0.0; const std::size_t term_count = this->count(); const std::size_t railstart = this->m_railstart; - const FT * const * other_cur_analog = this->connected_net_V(); + const FT * const * other_cur_analog = m_connected_net_V.data(); + const FT * p_go = m_go.data(); + const FT * p_gt = m_gt.data(); + const FT * p_Idr = m_Idr.data(); for (std::size_t i = 0; i < railstart; i++) { - *tcr[i] -= m_go[i]; - gtot_t += m_gt[i]; - RHS_t += m_Idr[i]; + *tcr[i] -= p_go[i]; } +#if 1 + FT gtot_t = 0.0; + FT RHS_t = 0.0; + + for (std::size_t i = 0; i < term_count; i++) + { + gtot_t += p_gt[i]; + RHS_t += p_Idr[i]; + } + // FIXME: Code above is faster than vec_sum - Check this +#else + auto gtot_t = plib::vec_sum(term_count, p_gt); + auto RHS_t = plib::vec_sum(term_count, p_Idr); +#endif + for (std::size_t i = railstart; i < term_count; i++) { - RHS_t += (m_Idr[i] + m_go[i] * *other_cur_analog[i]); - gtot_t += m_gt[i]; + RHS_t += (/*m_Idr[i]*/ + p_go[i] * *other_cur_analog[i]); } RHS = RHS_t; diff --git a/src/lib/netlist/solver/nld_ms_sor.h b/src/lib/netlist/solver/nld_ms_sor.h index 5261524cba1..5dfc395963a 100644 --- a/src/lib/netlist/solver/nld_ms_sor.h +++ b/src/lib/netlist/solver/nld_ms_sor.h @@ -88,7 +88,7 @@ unsigned matrix_solver_SOR_t::vsolve_non_dynamic(const bool newton_rap const float_type * const gt = this->m_terms[k]->gt(); const float_type * const go = this->m_terms[k]->go(); const float_type * const Idr = this->m_terms[k]->Idr(); - const float_type * const *other_cur_analog = this->m_terms[k]->connected_net_V(); + auto other_cur_analog = this->m_terms[k]->connected_net_V(); this->m_new_V[k] = this->m_nets[k]->Q_Analog();