diff --git a/src/lib/netlist/build/makefile b/src/lib/netlist/build/makefile index f628e9eaed6..17dfe2f8edc 100644 --- a/src/lib/netlist/build/makefile +++ b/src/lib/netlist/build/makefile @@ -232,7 +232,7 @@ maketree: $(sort $(OBJDIRS)) .PHONY: clang clang-5 mingw doc native native: - $(MAKE) CEXTRAFLAGS="-march=native -Wall -Wpedantic -Wsign-compare -Wextra " + $(MAKE) CEXTRAFLAGS="-march=native -msse4.2 -Wall -Wpedantic -Wsign-compare -Wextra " clang: $(MAKE) CC=clang++-9 LD=clang++-9 CEXTRAFLAGS="-march=native -Weverything -Werror -Wno-unused-template -Wno-non-virtual-dtor -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wweak-template-vtables -Wno-exit-time-destructors" diff --git a/src/lib/netlist/devices/nlid_system.h b/src/lib/netlist/devices/nlid_system.h index bc77f7ffd13..b5febc6ae3b 100644 --- a/src/lib/netlist/devices/nlid_system.h +++ b/src/lib/netlist/devices/nlid_system.h @@ -139,12 +139,13 @@ namespace netlist total += pati[i]; } netlist_time ttotal = netlist_time::zero(); - for (unsigned i=0; i(m_size - 1); + for (unsigned i=0; i < sm1; i++) { m_inc[i] = base * pati[i]; ttotal += m_inc[i]; } - m_inc[m_size - 1] = base * total - ttotal; + m_inc[sm1] = base * total - ttotal; } } NETLIB_UPDATEI(); diff --git a/src/lib/netlist/nl_config.h b/src/lib/netlist/nl_config.h index d5d195367ec..faa5008ff56 100644 --- a/src/lib/netlist/nl_config.h +++ b/src/lib/netlist/nl_config.h @@ -28,7 +28,7 @@ * Your mileage may vary. * */ -#define USE_MEMPOOL (0) +#define USE_MEMPOOL (1) /*! Store input values in logic_terminal_t. * diff --git a/src/lib/netlist/plib/palloc.h b/src/lib/netlist/plib/palloc.h index 44f3bdb395e..c7aad9372d3 100644 --- a/src/lib/netlist/plib/palloc.h +++ b/src/lib/netlist/plib/palloc.h @@ -28,19 +28,21 @@ namespace plib { // Memory allocation //============================================================ -#if (USE_ALIGNED_OPTIMIZATIONS) + static constexpr bool is_pow2(std::size_t v) noexcept { return !(v & (v-1)); } + +#if (USE_ALIGNED_ALLOCATION) static inline void *paligned_alloc( size_t alignment, size_t size ) { #if defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER) return _aligned_malloc(size, alignment); #elif defined(__APPLE__) - void* p; - if (::posix_memalign(&p, alignment, size) != 0) { - p = nullptr; - } - return p; + void* p; + if (::posix_memalign(&p, alignment, size) != 0) { + p = nullptr; + } + return p; #else - return aligned_alloc(alignment, size); + return aligned_alloc(alignment, size); #endif } @@ -79,19 +81,35 @@ namespace plib { { ::operator delete(ptr); } +#endif template inline C14CONSTEXPR T *assume_aligned_ptr(T *p) noexcept { + static_assert(ALIGN >= alignof(T), "Alignment must be greater or equal to alignof(T)"); + static_assert(is_pow2(ALIGN), "Alignment must be a power of 2"); + //auto t = reinterpret_cast(p); + //if (t & (ALIGN-1)) + // printf("alignment error!"); +#if (USE_ALIGNED_HINTS) + return reinterpret_cast(__builtin_assume_aligned(p, ALIGN)); +#else return p; +#endif } template inline C14CONSTEXPR const T *assume_aligned_ptr(const T *p) noexcept { + static_assert(ALIGN >= alignof(T), "Alignment must be greater or equal to alignof(T)"); + static_assert(is_pow2(ALIGN), "Alignment must be a power of 2"); +#if (USE_ALIGNED_HINTS) + return reinterpret_cast(__builtin_assume_aligned(p, ALIGN)); +#else return p; - } #endif + } + template inline T *pnew(Args&&... args) { @@ -124,8 +142,8 @@ namespace plib { constexpr pdefault_deleter() noexcept = default; template::value>::type> - pdefault_deleter(const pdefault_deleter&) noexcept { } + std::enable_if::value>::type> + pdefault_deleter(const pdefault_deleter&) noexcept { } void operator()(T *p) const { @@ -245,33 +263,38 @@ namespace plib { return std::move(a); } + //============================================================ + // Aligned allocator for use with containers + //============================================================ + template class aligned_allocator { public: - using value_type = T; + using value_type = T; + static constexpr const std::size_t align_size = (USE_ALIGNED_ALLOCATION) ? ALIGN : alignof(std::max_align_t); - static_assert(ALIGN >= alignof(T) && (ALIGN % alignof(T)) == 0, - "ALIGN must be greater than alignof(T) and a multiple"); + static_assert(align_size >= alignof(T) && (align_size % alignof(T)) == 0, + "ALIGN must be greater than alignof(T) and a multiple"); - aligned_allocator() noexcept = default; - ~aligned_allocator() noexcept = default; + aligned_allocator() noexcept = default; + ~aligned_allocator() noexcept = default; - aligned_allocator(const aligned_allocator&) noexcept = default; - aligned_allocator& operator=(const aligned_allocator&) noexcept = delete; + aligned_allocator(const aligned_allocator&) noexcept = default; + aligned_allocator& operator=(const aligned_allocator&) noexcept = delete; - aligned_allocator(aligned_allocator&&) noexcept = default; - aligned_allocator& operator=(aligned_allocator&&) = delete; + aligned_allocator(aligned_allocator&&) noexcept = default; + aligned_allocator& operator=(aligned_allocator&&) = delete; - template - aligned_allocator(const aligned_allocator& rhs) noexcept + template + aligned_allocator(const aligned_allocator& rhs) noexcept + { + unused_var(rhs); + } + + template struct rebind { - unused_var(rhs); - } - - template struct rebind - { - using other = aligned_allocator; + using other = aligned_allocator; }; T* allocate(std::size_t n) @@ -306,12 +329,38 @@ namespace plib { return !(lhs == rhs); } - // FIXME: needs to be somewhere else -#if 0 - template - using aligned_vector = std::vector>; - //using aligned_vector = std::vector>; -#else + //============================================================ + // traits to determine alignment size and stride size + // from types supporting alignment + //============================================================ + + PDEFINE_HAS_MEMBER(has_align, align_size); + + template + struct align_traits + { + static constexpr const std::size_t align_size = alignof(std::max_align_t); + static constexpr const std::size_t stride_size = + (sizeof(T) % align_size == 0 ? 1 //T is a multiple of align_size + : (align_size % sizeof(T) != 0 ? align_size // align_size is not a multiple of T + : align_size / sizeof(T))); + }; + + template + struct align_traits::value, void>::type> + { + static constexpr const std::size_t align_size = T::align_size; + static constexpr const std::size_t stride_size = + (sizeof(T) % align_size == 0 ? 1 //T is a multiple of align_size + : (align_size % sizeof(T) != 0 ? align_size // align_size is not a multiple of T + : align_size / sizeof(T))); + }; + + //============================================================ + // Aligned vector + //============================================================ + + // FIXME: needs a separate file template class aligned_vector : public std::vector> { @@ -326,13 +375,13 @@ namespace plib { using base::base; - reference operator[](size_type i) noexcept + C14CONSTEXPR reference operator[](size_type i) noexcept { - return assume_aligned_ptr(this->data())[i]; + return assume_aligned_ptr(&((*this)[0]))[i]; } constexpr const_reference operator[](size_type i) const noexcept { - return assume_aligned_ptr(this->data())[i]; + return assume_aligned_ptr(&((*this)[0]))[i]; } pointer data() noexcept { return assume_aligned_ptr(base::data()); } @@ -340,11 +389,6 @@ namespace plib { }; - -#endif - - - } // namespace plib #endif /* PALLOC_H_ */ diff --git a/src/lib/netlist/plib/parray.h b/src/lib/netlist/plib/parray.h index 9b64f5343d2..1be37b908cd 100644 --- a/src/lib/netlist/plib/parray.h +++ b/src/lib/netlist/plib/parray.h @@ -104,7 +104,7 @@ namespace plib { return m_a[i]; } #else - reference operator[](size_type i) noexcept + C14CONSTEXPR reference operator[](size_type i) noexcept { return assume_aligned_ptr(&m_a[0])[i]; } diff --git a/src/lib/netlist/plib/pconfig.h b/src/lib/netlist/plib/pconfig.h index 1a411fe1aad..ecb9cc3511b 100644 --- a/src/lib/netlist/plib/pconfig.h +++ b/src/lib/netlist/plib/pconfig.h @@ -41,6 +41,8 @@ #define USE_ALIGNED_OPTIMIZATIONS (0) #endif +#define USE_ALIGNED_ALLOCATION (USE_ALIGNED_OPTIMIZATIONS) +#define USE_ALIGNED_HINTS (USE_ALIGNED_OPTIMIZATIONS) /* * Standard alignment macros */ diff --git a/src/lib/netlist/plib/pfmtlog.h b/src/lib/netlist/plib/pfmtlog.h index 4c01f96dba7..50acf457114 100644 --- a/src/lib/netlist/plib/pfmtlog.h +++ b/src/lib/netlist/plib/pfmtlog.h @@ -9,6 +9,7 @@ #include "pstring.h" #include "ptypes.h" +#include "putil.h" #include diff --git a/src/lib/netlist/plib/pmatrix2d.h b/src/lib/netlist/plib/pmatrix2d.h new file mode 100644 index 00000000000..f0439105e5c --- /dev/null +++ b/src/lib/netlist/plib/pmatrix2d.h @@ -0,0 +1,82 @@ +// license:GPL-2.0+ +// copyright-holders:Couriersud +/* + * pmatrix2d.h + * + * NxM regular matrix + * + */ + +#ifndef PMATRIX2D_H_ +#define PMATRIX2D_H_ + +#include "palloc.h" + +#include +#include +#include +#include +#include + +namespace plib +{ + + + template> + class pmatrix2d + { + public: + using value_type = T; + + static constexpr const std::size_t align_size = align_traits::align_size; + static constexpr const std::size_t stride_size = align_traits::stride_size; + pmatrix2d() + : m_N(0), m_M(0), m_stride(8) + { + } + + pmatrix2d(std::size_t N, std::size_t M) + : m_N(N), m_M(M), m_stride((M+stride_size-1) & ~(stride_size-1)), m_v(N * m_stride) + { + } + + void resize(std::size_t N, std::size_t M) + { + m_N = N; + m_M = M; + m_stride = (M+stride_size-1) & ~(stride_size-1); + m_v.resize(N * m_stride); + } + + C14CONSTEXPR T * operator[] (std::size_t row) noexcept + { + return assume_aligned_ptr(&m_v[m_stride * row]); + } + + constexpr const T * operator[] (std::size_t row) const noexcept + { + return assume_aligned_ptr(&m_v[m_stride * row]); + } + + T & operator()(std::size_t r, std::size_t c) noexcept + { + return (*this)[r][c]; + } + + const T & operator()(std::size_t r, std::size_t c) const noexcept + { + return (*this)[r][c]; + } + + private: + + std::size_t m_N; + std::size_t m_M; + std::size_t m_stride; + + std::vector m_v; + }; + +} // namespace plib + +#endif /* MAT_CR_H_ */ diff --git a/src/lib/netlist/plib/ptypes.h b/src/lib/netlist/plib/ptypes.h index a3b11b65e74..84f6453e8f2 100644 --- a/src/lib/netlist/plib/ptypes.h +++ b/src/lib/netlist/plib/ptypes.h @@ -81,38 +81,19 @@ namespace plib template inline void unused_var(Ts&&...) {} - - - //============================================================ - // penum - strongly typed enumeration - //============================================================ - - struct penum_base - { - protected: - static int from_string_int(const char *str, const char *x); - static std::string nthstr(int n, const char *str); - }; - } // namespace plib -#define P_ENUM(ename, ...) \ - struct ename : public plib::penum_base { \ - enum E { __VA_ARGS__ }; \ - ename (E v) : m_v(v) { } \ - bool set_from_string (const std::string &s) { \ - static char const *const strings = # __VA_ARGS__; \ - int f = from_string_int(strings, s.c_str()); \ - if (f>=0) { m_v = static_cast(f); return true; } else { return false; } \ - } \ - operator E() const {return m_v;} \ - bool operator==(const ename &rhs) const {return m_v == rhs.m_v;} \ - bool operator==(const E &rhs) const {return m_v == rhs;} \ - std::string name() const { \ - static char const *const strings = # __VA_ARGS__; \ - return nthstr(static_cast(m_v), strings); \ - } \ - private: E m_v; }; +//============================================================ +// Define a "has member" trait. +//============================================================ +#define PDEFINE_HAS_MEMBER(name, member) \ + template class name \ + { \ + template static long test(decltype(&U:: member)); \ + template static char test(...); \ + public: \ + static constexpr const bool value = sizeof(test(nullptr)) == sizeof(long); \ + } #endif /* PTYPES_H_ */ diff --git a/src/lib/netlist/plib/putil.h b/src/lib/netlist/plib/putil.h index cd55be63185..fbf6f2af26d 100644 --- a/src/lib/netlist/plib/putil.h +++ b/src/lib/netlist/plib/putil.h @@ -93,6 +93,37 @@ namespace plib const std::string &token, const std::size_t maxsplit); + + //============================================================ + // penum - strongly typed enumeration + //============================================================ + + struct penum_base + { + protected: + static int from_string_int(const char *str, const char *x); + static std::string nthstr(int n, const char *str); + }; + } // namespace plib +#define P_ENUM(ename, ...) \ + struct ename : public plib::penum_base { \ + enum E { __VA_ARGS__ }; \ + ename (E v) : m_v(v) { } \ + bool set_from_string (const std::string &s) { \ + static char const *const strings = # __VA_ARGS__; \ + int f = from_string_int(strings, s.c_str()); \ + if (f>=0) { m_v = static_cast(f); return true; } else { return false; } \ + } \ + operator E() const {return m_v;} \ + bool operator==(const ename &rhs) const {return m_v == rhs.m_v;} \ + bool operator==(const E &rhs) const {return m_v == rhs;} \ + std::string name() const { \ + static char const *const strings = # __VA_ARGS__; \ + return nthstr(static_cast(m_v), strings); \ + } \ + private: E m_v; }; + + #endif /* PUTIL_H_ */ diff --git a/src/lib/netlist/solver/nld_matrix_solver.cpp b/src/lib/netlist/solver/nld_matrix_solver.cpp index cb905251df3..3fcdd591e83 100644 --- a/src/lib/netlist/solver/nld_matrix_solver.cpp +++ b/src/lib/netlist/solver/nld_matrix_solver.cpp @@ -39,20 +39,6 @@ namespace devices m_connected_net_idx.push_back(net_other); } - void terms_for_net_t::set_pointers() - { - m_gt.resize(count(), 0.0); - m_go.resize(count(), 0.0); - m_Idr.resize(count(), 0.0); - m_connected_net_V.resize(count(), nullptr); - - for (std::size_t i = 0; i < count(); i++) - { - m_terms[i]->set_ptrs(&m_gt[i], &m_go[i], &m_Idr[i]); - m_connected_net_V[i] = m_terms[i]->otherterm()->net().Q_Analog_state_ptr(); - } - } - // ---------------------------------------------------------------------------------------- // matrix_solver // ---------------------------------------------------------------------------------------- @@ -254,8 +240,6 @@ namespace devices m_terms[k]->m_railstart = m_terms[k]->count(); for (std::size_t i = 0; i < m_rails_temp[k]->count(); i++) this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->m_connected_net_idx.data()[i], false); - - m_terms[k]->set_pointers(); } // free all - no longer needed @@ -263,6 +247,8 @@ namespace devices sort_terms(m_sort); + this->set_pointers(); + /* create a list of non zero elements. */ for (unsigned k = 0; k < iN; k++) { @@ -371,9 +357,9 @@ namespace devices state().save(*this, m_terms[k]->m_h_n_m_1, this->name(), "m_h_n_m_1." + num); // FIXME: This shouldn't be necessary, recalculate on each entry ... - state().save(*this, m_terms[k]->m_go.data(),"GO" + num, this->name(), m_terms[k]->count()); - state().save(*this, m_terms[k]->m_gt.data(),"GT" + num, this->name(), m_terms[k]->count()); - state().save(*this, m_terms[k]->m_Idr.data(),"IDR" + num, this->name(), m_terms[k]->count()); + state().save(*this, m_gon[k],"GO" + num, this->name(), m_terms[k]->count()); + state().save(*this, m_gtn[k],"GT" + num, this->name(), m_terms[k]->count()); + state().save(*this, m_Idrn[k],"IDR" + num, this->name(), m_terms[k]->count()); } } diff --git a/src/lib/netlist/solver/nld_matrix_solver.h b/src/lib/netlist/solver/nld_matrix_solver.h index 903c39257d0..ac5a2a464f7 100644 --- a/src/lib/netlist/solver/nld_matrix_solver.h +++ b/src/lib/netlist/solver/nld_matrix_solver.h @@ -10,9 +10,12 @@ #include "netlist/nl_base.h" #include "netlist/nl_errstr.h" -#include "netlist/plib/palloc.h" -#include "netlist/plib/putil.h" -#include "netlist/plib/vector_ops.h" +#include "plib/palloc.h" +#include "plib/putil.h" +#include "plib/vector_ops.h" +#include "plib/pmatrix2d.h" + +#include namespace netlist { @@ -51,8 +54,6 @@ namespace devices terminal_t **terms() { return m_terms.data(); } - void set_pointers(); - std::size_t m_railstart; std::vector m_nz; /* all non zero for multiplication */ @@ -65,10 +66,6 @@ namespace devices nl_double m_h_n_m_1; std::vector m_connected_net_idx; - plib::aligned_vector m_go; - plib::aligned_vector m_gt; - plib::aligned_vector m_Idr; - plib::aligned_vector m_connected_net_V; private: std::vector m_terms; @@ -171,6 +168,31 @@ namespace devices template void build_LE_RHS(T &child); + void set_pointers() + { + const std::size_t iN = this->m_nets.size(); + + std::size_t max_col = 0; + for (std::size_t k = 0; k < iN; k++) + max_col = std::max(max_col, m_terms[k]->count()); + + m_gtn.resize(iN, max_col); + m_gon.resize(iN, max_col); + m_Idrn.resize(iN, max_col); + m_connected_net_Vn.resize(iN, max_col); + + for (std::size_t k = 0; k < iN; k++) + { + auto count = m_terms[k]->count(); + + for (std::size_t i = 0; i < count; i++) + { + m_terms[k]->terms()[i]->set_ptrs(&m_gtn[k][i], &m_gon[k][i], &m_Idrn[k][i]); + m_connected_net_Vn[k][i] = m_terms[k]->terms()[i]->otherterm()->net().Q_Analog_state_ptr(); + } + } + } + template void fill_matrix(std::size_t N, AP &tcr, FT &RHS) { @@ -181,17 +203,21 @@ namespace devices const std::size_t term_count = net->count(); const std::size_t railstart = net->m_railstart; + const auto &go = m_gon[k]; + const auto > = m_gtn[k]; + const auto &Idr = m_Idrn[k]; + const auto &cnV = m_connected_net_Vn[k]; for (std::size_t i = 0; i < railstart; i++) - *tcr_r[i] -= net->m_go[i]; + *tcr_r[i] -= go[i]; typename FT::value_type gtot_t = 0.0; typename FT::value_type RHS_t = 0.0; for (std::size_t i = 0; i < term_count; i++) { - gtot_t += net->m_gt[i]; - RHS_t += net->m_Idr[i]; + gtot_t += gt[i]; + RHS_t += Idr[i]; } // FIXME: Code above is faster than vec_sum - Check this #if 0 @@ -201,7 +227,7 @@ namespace devices for (std::size_t i = railstart; i < term_count; i++) { - RHS_t += (/*m_Idr[i]*/ + net->m_go[i] * *net->m_connected_net_V[i]); + RHS_t += (/*m_Idr[i]*/ + go[i] * *cnV[i]); } RHS[k] = RHS_t; @@ -211,6 +237,16 @@ namespace devices } + template + using aligned_alloc = plib::aligned_allocator; + + plib::pmatrix2d> m_gon; + plib::pmatrix2d> m_gtn; + plib::pmatrix2d> m_Idrn; + plib::pmatrix2d> m_connected_net_Vn; + + plib::pmatrix2d m_test; + std::vector> m_terms; std::vector m_nets; std::vector> m_inps; @@ -283,7 +319,7 @@ namespace devices const std::size_t terms_count = terms->count(); const std::size_t railstart = terms->m_railstart; - const float_type * const gt = terms->m_gt.data(); + const float_type * const gt = m_gtn[k]; { float_type akk = 0.0; @@ -293,7 +329,7 @@ namespace devices Ak[k] = akk; } - const float_type * const go = terms->m_go.data(); + const float_type * const go = m_gon[k]; int * net_other = terms->m_connected_net_idx.data(); for (std::size_t i = 0; i < railstart; i++) @@ -314,9 +350,9 @@ namespace devices float_type rhsk_b = 0.0; const std::size_t terms_count = m_terms[k]->count(); - const float_type * const go = m_terms[k]->m_go.data(); - const float_type * const Idr = m_terms[k]->m_Idr.data(); - const float_type * const * other_cur_analog = m_terms[k]->m_connected_net_V.data(); + const float_type * const go = m_gon[k]; + const float_type * const Idr = m_Idrn[k]; + const float_type * const * other_cur_analog = m_connected_net_Vn[k]; for (std::size_t i = 0; i < terms_count; i++) rhsk_a = rhsk_a + Idr[i]; diff --git a/src/lib/netlist/solver/nld_ms_sor.h b/src/lib/netlist/solver/nld_ms_sor.h index 450803e25d8..9e11cb67d75 100644 --- a/src/lib/netlist/solver/nld_ms_sor.h +++ b/src/lib/netlist/solver/nld_ms_sor.h @@ -85,10 +85,10 @@ unsigned matrix_solver_SOR_t::vsolve_non_dynamic(const bool newton_rap float_type RHS_t = 0.0; const std::size_t term_count = this->m_terms[k]->count(); - const float_type * const gt = this->m_terms[k]->m_gt.data(); - const float_type * const go = this->m_terms[k]->m_go.data(); - const float_type * const Idr = this->m_terms[k]->m_Idr.data(); - auto other_cur_analog = this->m_terms[k]->m_connected_net_V.data(); + const float_type * const gt = this->m_gtn[k]; + const float_type * const go = this->m_gon[k]; + const float_type * const Idr = this->m_Idrn[k]; + auto other_cur_analog = this->m_connected_net_Vn[k]; this->m_new_V[k] = this->m_nets[k]->Q_Analog(); @@ -136,7 +136,7 @@ unsigned matrix_solver_SOR_t::vsolve_non_dynamic(const bool newton_rap { const int * net_other = this->m_terms[k]->m_connected_net_idx.data(); const std::size_t railstart = this->m_terms[k]->m_railstart; - const float_type * go = this->m_terms[k]->m_go.data(); + const float_type * go = this->m_gon[k]; float_type Idrive = 0.0; for (std::size_t i = 0; i < railstart; i++) diff --git a/src/lib/netlist/solver/nld_solver.cpp b/src/lib/netlist/solver/nld_solver.cpp index 77bf710c9c9..d9685f24ae0 100644 --- a/src/lib/netlist/solver/nld_solver.cpp +++ b/src/lib/netlist/solver/nld_solver.cpp @@ -192,7 +192,7 @@ poolptr NETLIB_NAME(solver)::create_solver_x(std::size_t size, else return this->create_solver_x(size, solvername); } -}; +} struct net_splitter {