From 6a3efe52d6dbd2b8dfb4a5fa38e2a855b6353fb9 Mon Sep 17 00:00:00 2001 From: couriersud Date: Mon, 25 Feb 2019 21:17:59 +0100 Subject: [PATCH] netlist: fix bugs in the alignment code. (nw) --- src/lib/netlist/nl_base.h | 6 +-- src/lib/netlist/plib/palloc.h | 52 +++++++++------------ src/lib/netlist/plib/pconfig.h | 2 +- src/lib/netlist/plib/pmatrix2d.h | 6 ++- src/lib/netlist/plib/ptypes.h | 53 ++++++++++++++++++++++ src/lib/netlist/solver/nld_matrix_solver.h | 20 +++++--- src/lib/netlist/solver/nld_ms_gcr.h | 14 +++--- src/lib/netlist/solver/nld_ms_gmres.h | 13 +++--- 8 files changed, 107 insertions(+), 59 deletions(-) diff --git a/src/lib/netlist/nl_base.h b/src/lib/netlist/nl_base.h index e54d3b1abae..3d2b8cd1364 100644 --- a/src/lib/netlist/nl_base.h +++ b/src/lib/netlist/nl_base.h @@ -13,9 +13,6 @@ #error "nl_base.h included. Please correct." #endif -#include -#include - #include "plib/palloc.h" // owned_ptr #include "plib/pdynlib.h" #include "plib/pfmtlog.h" @@ -29,6 +26,9 @@ #include "nltypes.h" #include "plib/ptime.h" +#include +#include + //============================================================ // MACROS / New Syntax //============================================================ diff --git a/src/lib/netlist/plib/palloc.h b/src/lib/netlist/plib/palloc.h index d8e4ffbb5d7..d7b00262f7f 100644 --- a/src/lib/netlist/plib/palloc.h +++ b/src/lib/netlist/plib/palloc.h @@ -28,8 +28,6 @@ namespace plib { // Memory allocation //============================================================ - static constexpr bool is_pow2(std::size_t v) noexcept { return !(v & (v-1)); } - #if (USE_ALIGNED_ALLOCATION) static inline void *paligned_alloc( size_t alignment, size_t size ) { @@ -52,24 +50,6 @@ namespace plib { free(ptr); } - static constexpr bool is_pow2(std::size_t v) noexcept { return !(v & (v-1)); } - - template - inline C14CONSTEXPR T *assume_aligned_ptr(T *p) noexcept - { - static_assert(ALIGN >= alignof(T), "Alignment must be greater or equal to alignof(T)"); - static_assert(is_pow2(ALIGN), "Alignment must be a power of 2"); - //auto t = reinterpret_cast(p); - //if (t & (ALIGN-1)) - // printf("alignment error!"); - return reinterpret_cast(__builtin_assume_aligned(p, ALIGN)); - } - - template - inline C14CONSTEXPR const T *assume_aligned_ptr(const T *p) noexcept - { - return reinterpret_cast(__builtin_assume_aligned(p, ALIGN)); - } #else static inline void *paligned_alloc( size_t alignment, size_t size ) { @@ -84,7 +64,7 @@ namespace plib { #endif template - inline C14CONSTEXPR T *assume_aligned_ptr(T *p) noexcept + /*inline */ C14CONSTEXPR T *assume_aligned_ptr(T *p) noexcept { static_assert(ALIGN >= alignof(T), "Alignment must be greater or equal to alignof(T)"); static_assert(is_pow2(ALIGN), "Alignment must be a power of 2"); @@ -99,7 +79,7 @@ namespace plib { } template - inline C14CONSTEXPR const T *assume_aligned_ptr(const T *p) noexcept + constexpr const T *assume_aligned_ptr(const T *p) noexcept { static_assert(ALIGN >= alignof(T), "Alignment must be greater or equal to alignof(T)"); static_assert(is_pow2(ALIGN), "Alignment must be a power of 2"); @@ -272,7 +252,7 @@ namespace plib { { public: using value_type = T; - static constexpr const std::size_t align_size = (USE_ALIGNED_ALLOCATION) ? ALIGN : alignof(std::max_align_t); + static constexpr const std::size_t align_size = ALIGN; static_assert(align_size >= alignof(T) && (align_size % alignof(T)) == 0, "ALIGN must be greater than alignof(T) and a multiple"); @@ -340,20 +320,30 @@ namespace plib { struct align_traits { static constexpr const std::size_t align_size = alignof(std::max_align_t); + static constexpr const std::size_t value_size = sizeof(typename T::value_type); +#if 0 static constexpr const std::size_t stride_size = - (sizeof(T) % align_size == 0 ? 1 //T is a multiple of align_size - : (align_size % sizeof(T) != 0 ? align_size // align_size is not a multiple of T - : align_size / sizeof(T))); + ((value_size % align_size) == 0 ? 1 //T is a multiple of align_size + : ((align_size % value_size) != 0 ? align_size // align_size is not a multiple of T + : align_size / value_size)); +#else + static constexpr const std::size_t stride_size = lcm(align_size, value_size) / value_size; +#endif }; template struct align_traits::value, void>::type> { static constexpr const std::size_t align_size = T::align_size; + static constexpr const std::size_t value_size = sizeof(typename T::value_type); +#if 0 static constexpr const std::size_t stride_size = - (sizeof(T) % align_size == 0 ? 1 //T is a multiple of align_size - : (align_size % sizeof(T) != 0 ? align_size // align_size is not a multiple of T - : align_size / sizeof(T))); + ((value_size % align_size) == 0 ? 1 //T is a multiple of align_size + : ((align_size % value_size) != 0 ? align_size // align_size is not a multiple of T + : align_size / value_size)); +#else + static constexpr const std::size_t stride_size = lcm(align_size, value_size) / value_size; +#endif }; //============================================================ @@ -377,11 +367,11 @@ namespace plib { C14CONSTEXPR reference operator[](size_type i) noexcept { - return assume_aligned_ptr(&((*this)[0]))[i]; + return assume_aligned_ptr(&(base::operator[](0)))[i]; } constexpr const_reference operator[](size_type i) const noexcept { - return assume_aligned_ptr(&((*this)[0]))[i]; + return assume_aligned_ptr(&(base::operator[](0)))[i]; } pointer data() noexcept { return assume_aligned_ptr(base::data()); } diff --git a/src/lib/netlist/plib/pconfig.h b/src/lib/netlist/plib/pconfig.h index 158fb3faae7..c521f24e5f8 100644 --- a/src/lib/netlist/plib/pconfig.h +++ b/src/lib/netlist/plib/pconfig.h @@ -48,7 +48,7 @@ */ #define PALIGN_CACHELINE (64) -#define PALIGN_VECTOROPT (8) +#define PALIGN_VECTOROPT (32) #define PALIGNAS_CACHELINE() PALIGNAS(PALIGN_CACHELINE) #define PALIGNAS_VECTOROPT() PALIGNAS(PALIGN_VECTOROPT) diff --git a/src/lib/netlist/plib/pmatrix2d.h b/src/lib/netlist/plib/pmatrix2d.h index f0439105e5c..52228548cbf 100644 --- a/src/lib/netlist/plib/pmatrix2d.h +++ b/src/lib/netlist/plib/pmatrix2d.h @@ -36,15 +36,17 @@ namespace plib } pmatrix2d(std::size_t N, std::size_t M) - : m_N(N), m_M(M), m_stride((M+stride_size-1) & ~(stride_size-1)), m_v(N * m_stride) + : m_N(N), m_M(M) { + m_stride = ((M + stride_size-1) / stride_size) * stride_size; + m_v.resize(N * m_stride); } void resize(std::size_t N, std::size_t M) { m_N = N; m_M = M; - m_stride = (M+stride_size-1) & ~(stride_size-1); + m_stride = ((M + stride_size-1) / stride_size) * stride_size; m_v.resize(N * m_stride); } diff --git a/src/lib/netlist/plib/ptypes.h b/src/lib/netlist/plib/ptypes.h index 99bc5715a05..6b4a4af2f75 100644 --- a/src/lib/netlist/plib/ptypes.h +++ b/src/lib/netlist/plib/ptypes.h @@ -81,6 +81,59 @@ namespace plib template inline void unused_var(Ts&&...) {} + //============================================================ + // is_pow2 + //============================================================ + template + constexpr bool is_pow2(T v) noexcept + { + static_assert(is_integral::value, "is_pow2 needs integer arguments"); + return !(v & (v-1)); + } + + + //============================================================ + // abs, lcd, gcm + //============================================================ + + template + constexpr + typename std::enable_if::value && std::is_signed::value, T>::type + abs(T v) + { + return v < 0 ? -v : v; + } + + template + constexpr + typename std::enable_if::value && std::is_unsigned::value, T>::type + abs(T v) + { + return v; + } + + template + constexpr typename std::common_type::type + gcd(M m, N n) + { + static_assert(std::is_integral::value, "gcd: M must be an integer"); + static_assert(std::is_integral::value, "gcd: N must be an integer"); + + return m == 0 ? plib::abs(n) + : n == 0 ? plib::abs(m) + : gcd(n, m % n); + } + + template + constexpr typename std::common_type::type + lcm(M m, N n) + { + static_assert(std::is_integral::value, "lcm: M must be an integer"); + static_assert(std::is_integral::value, "lcm: N must be an integer"); + + return (m != 0 && n != 0) ? (plib::abs(m) / gcd(m, n)) * plib::abs(n) : 0; + } + } // namespace plib //============================================================ diff --git a/src/lib/netlist/solver/nld_matrix_solver.h b/src/lib/netlist/solver/nld_matrix_solver.h index 88d4dbb4bb0..efd60a0c5fa 100644 --- a/src/lib/netlist/solver/nld_matrix_solver.h +++ b/src/lib/netlist/solver/nld_matrix_solver.h @@ -172,14 +172,19 @@ namespace devices { const std::size_t iN = this->m_nets.size(); - std::size_t max_col = 0; + std::size_t max_count = 0; + std::size_t max_rail = 0; for (std::size_t k = 0; k < iN; k++) - max_col = std::max(max_col, m_terms[k]->count()); + { + max_count = std::max(max_count, m_terms[k]->count()); + max_rail = std::max(max_rail, m_terms[k]->m_railstart); + } - m_gtn.resize(iN, max_col); - m_gon.resize(iN, max_col); - m_Idrn.resize(iN, max_col); - m_connected_net_Vn.resize(iN, max_col); + m_mat_ptr.resize(iN, max_rail+1); + m_gtn.resize(iN, max_count); + m_gon.resize(iN, max_count); + m_Idrn.resize(iN, max_count); + m_connected_net_Vn.resize(iN, max_count); for (std::size_t k = 0; k < iN; k++) { @@ -199,7 +204,7 @@ namespace devices for (std::size_t k = 0; k < N; k++) { auto *net = m_terms[k].get(); - auto **tcr_r = tcr[k].data(); + auto **tcr_r = &(tcr[k][0]); const std::size_t term_count = net->count(); const std::size_t railstart = net->m_railstart; @@ -243,6 +248,7 @@ namespace devices plib::pmatrix2d> m_gon; plib::pmatrix2d> m_gtn; plib::pmatrix2d> m_Idrn; + plib::pmatrix2d> m_mat_ptr; plib::pmatrix2d> m_connected_net_Vn; plib::pmatrix2d m_test; diff --git a/src/lib/netlist/solver/nld_ms_gcr.h b/src/lib/netlist/solver/nld_ms_gcr.h index c9ab4a535a9..19dea107bc9 100644 --- a/src/lib/netlist/solver/nld_ms_gcr.h +++ b/src/lib/netlist/solver/nld_ms_gcr.h @@ -66,9 +66,6 @@ private: plib::parray RHS; plib::parray new_V; - std::array, storage_N> m_term_cr; - // std::array, storage_N> m_term_cr; - mat_type mat; //extsolver m_proc; @@ -163,7 +160,7 @@ void matrix_solver_GCR_t::vsetup(analog_net_t::list_t &nets) for (mat_index_type k=0; km_terms[k]->m_railstart;j++) { @@ -171,12 +168,13 @@ void matrix_solver_GCR_t::vsetup(analog_net_t::list_t &nets) for (auto i = mat.row_idx[k]; i < mat.row_idx[k+1]; i++) if (other == static_cast(mat.col_idx[i])) { - m_term_cr[k].push_back(&mat.A[i]); + m_mat_ptr[k][j] = &mat.A[i]; + cnt++; break; } } - nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart); - m_term_cr[k].push_back(&mat.A[mat.diag[k]]); + nl_assert(cnt == this->m_terms[k]->m_railstart); + m_mat_ptr[k][this->m_terms[k]->m_railstart] = &mat.A[mat.diag[k]]; } this->log().verbose("maximum fill: {1}", gr.first); @@ -297,7 +295,7 @@ unsigned matrix_solver_GCR_t::vsolve_non_dynamic(const bool newton_rap /* populate matrix */ - this->fill_matrix(iN, m_term_cr, RHS); + this->fill_matrix(iN, m_mat_ptr, RHS); /* now solve it */ diff --git a/src/lib/netlist/solver/nld_ms_gmres.h b/src/lib/netlist/solver/nld_ms_gmres.h index f3d894881e2..2ff515ebda7 100644 --- a/src/lib/netlist/solver/nld_ms_gmres.h +++ b/src/lib/netlist/solver/nld_ms_gmres.h @@ -37,7 +37,6 @@ namespace devices */ matrix_solver_GMRES_t(netlist_state_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size) : matrix_solver_direct_t(anetlist, name, matrix_solver_t::PREFER_BAND_MATRIX, params, size) - , m_term_cr(size) //, m_ops(size, 2) , m_ops(size, 4) , m_gmres(size) @@ -51,9 +50,7 @@ namespace devices using mattype = typename plib::matrix_compressed_rows_t::index_type; - plib::parray, SIZE> m_term_cr; plib::mat_precondition_ILU m_ops; - //plib::mat_precondition_diag m_ops; plib::gmres_t m_gmres; }; @@ -86,17 +83,19 @@ namespace devices for (std::size_t k=0; km_terms[k]->m_railstart;j++) { for (std::size_t i = m_ops.m_mat.row_idx[k]; im_terms[k]->m_connected_net_idx[j] == static_cast(m_ops.m_mat.col_idx[i])) { - m_term_cr[k].push_back(&m_ops.m_mat.A[i]); + this->m_mat_ptr[k][j] = &m_ops.m_mat.A[i]; + cnt++; break; } } - nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart); - m_term_cr[k].push_back(&m_ops.m_mat.A[m_ops.m_mat.diag[k]]); + nl_assert(cnt == this->m_terms[k]->m_railstart); + this->m_mat_ptr[k][this->m_terms[k]->m_railstart] = &m_ops.m_mat.A[m_ops.m_mat.diag[k]]; } } @@ -111,7 +110,7 @@ namespace devices m_ops.m_mat.set_scalar(0.0); /* populate matrix and V for first estimate */ - this->fill_matrix(iN, m_term_cr, RHS); + this->fill_matrix(iN, this->m_mat_ptr, RHS); for (std::size_t k = 0; k < iN; k++) {