netlist: more alignment related refactoring. (nw)

This commit is contained in:
couriersud 2019-02-24 13:58:47 +01:00
parent a7a8186283
commit 0e07f9ac34
14 changed files with 283 additions and 119 deletions

View File

@ -232,7 +232,7 @@ maketree: $(sort $(OBJDIRS))
.PHONY: clang clang-5 mingw doc native
native:
$(MAKE) CEXTRAFLAGS="-march=native -Wall -Wpedantic -Wsign-compare -Wextra "
$(MAKE) CEXTRAFLAGS="-march=native -msse4.2 -Wall -Wpedantic -Wsign-compare -Wextra "
clang:
$(MAKE) CC=clang++-9 LD=clang++-9 CEXTRAFLAGS="-march=native -Weverything -Werror -Wno-unused-template -Wno-non-virtual-dtor -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wweak-template-vtables -Wno-exit-time-destructors"

View File

@ -139,12 +139,13 @@ namespace netlist
total += pati[i];
}
netlist_time ttotal = netlist_time::zero();
for (unsigned i=0; i<m_size - 1; i++)
auto sm1 = static_cast<uint8_t>(m_size - 1);
for (unsigned i=0; i < sm1; i++)
{
m_inc[i] = base * pati[i];
ttotal += m_inc[i];
}
m_inc[m_size - 1] = base * total - ttotal;
m_inc[sm1] = base * total - ttotal;
}
}
NETLIB_UPDATEI();

View File

@ -28,7 +28,7 @@
* Your mileage may vary.
*
*/
#define USE_MEMPOOL (0)
#define USE_MEMPOOL (1)
/*! Store input values in logic_terminal_t.
*

View File

@ -28,19 +28,21 @@ namespace plib {
// Memory allocation
//============================================================
#if (USE_ALIGNED_OPTIMIZATIONS)
static constexpr bool is_pow2(std::size_t v) noexcept { return !(v & (v-1)); }
#if (USE_ALIGNED_ALLOCATION)
static inline void *paligned_alloc( size_t alignment, size_t size )
{
#if defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)
return _aligned_malloc(size, alignment);
#elif defined(__APPLE__)
void* p;
if (::posix_memalign(&p, alignment, size) != 0) {
p = nullptr;
}
return p;
void* p;
if (::posix_memalign(&p, alignment, size) != 0) {
p = nullptr;
}
return p;
#else
return aligned_alloc(alignment, size);
return aligned_alloc(alignment, size);
#endif
}
@ -79,19 +81,35 @@ namespace plib {
{
::operator delete(ptr);
}
#endif
template <typename T, std::size_t ALIGN>
inline C14CONSTEXPR T *assume_aligned_ptr(T *p) noexcept
{
static_assert(ALIGN >= alignof(T), "Alignment must be greater or equal to alignof(T)");
static_assert(is_pow2(ALIGN), "Alignment must be a power of 2");
//auto t = reinterpret_cast<std::uintptr_t>(p);
//if (t & (ALIGN-1))
// printf("alignment error!");
#if (USE_ALIGNED_HINTS)
return reinterpret_cast<T *>(__builtin_assume_aligned(p, ALIGN));
#else
return p;
#endif
}
template <typename T, std::size_t ALIGN>
inline C14CONSTEXPR const T *assume_aligned_ptr(const T *p) noexcept
{
static_assert(ALIGN >= alignof(T), "Alignment must be greater or equal to alignof(T)");
static_assert(is_pow2(ALIGN), "Alignment must be a power of 2");
#if (USE_ALIGNED_HINTS)
return reinterpret_cast<const T *>(__builtin_assume_aligned(p, ALIGN));
#else
return p;
}
#endif
}
template<typename T, typename... Args>
inline T *pnew(Args&&... args)
{
@ -124,8 +142,8 @@ namespace plib {
constexpr pdefault_deleter() noexcept = default;
template<typename U, typename = typename
std::enable_if<std::is_convertible< U*, T*>::value>::type>
pdefault_deleter(const pdefault_deleter<U>&) noexcept { }
std::enable_if<std::is_convertible< U*, T*>::value>::type>
pdefault_deleter(const pdefault_deleter<U>&) noexcept { }
void operator()(T *p) const
{
@ -245,33 +263,38 @@ namespace plib {
return std::move(a);
}
//============================================================
// Aligned allocator for use with containers
//============================================================
template <class T, std::size_t ALIGN = alignof(T)>
class aligned_allocator
{
public:
using value_type = T;
using value_type = T;
static constexpr const std::size_t align_size = (USE_ALIGNED_ALLOCATION) ? ALIGN : alignof(std::max_align_t);
static_assert(ALIGN >= alignof(T) && (ALIGN % alignof(T)) == 0,
"ALIGN must be greater than alignof(T) and a multiple");
static_assert(align_size >= alignof(T) && (align_size % alignof(T)) == 0,
"ALIGN must be greater than alignof(T) and a multiple");
aligned_allocator() noexcept = default;
~aligned_allocator() noexcept = default;
aligned_allocator() noexcept = default;
~aligned_allocator() noexcept = default;
aligned_allocator(const aligned_allocator&) noexcept = default;
aligned_allocator& operator=(const aligned_allocator&) noexcept = delete;
aligned_allocator(const aligned_allocator&) noexcept = default;
aligned_allocator& operator=(const aligned_allocator&) noexcept = delete;
aligned_allocator(aligned_allocator&&) noexcept = default;
aligned_allocator& operator=(aligned_allocator&&) = delete;
aligned_allocator(aligned_allocator&&) noexcept = default;
aligned_allocator& operator=(aligned_allocator&&) = delete;
template <class U>
aligned_allocator(const aligned_allocator<U, ALIGN>& rhs) noexcept
template <class U>
aligned_allocator(const aligned_allocator<U, ALIGN>& rhs) noexcept
{
unused_var(rhs);
}
template <class U> struct rebind
{
unused_var(rhs);
}
template <class U> struct rebind
{
using other = aligned_allocator<U, ALIGN>;
using other = aligned_allocator<U, ALIGN>;
};
T* allocate(std::size_t n)
@ -306,12 +329,38 @@ namespace plib {
return !(lhs == rhs);
}
// FIXME: needs to be somewhere else
#if 0
template <class T, std::size_t ALIGN = alignof(T)>
using aligned_vector = std::vector<T, aligned_allocator<T, alignof(T)>>;
//using aligned_vector = std::vector<T, aligned_allocator<T, ALIGN>>;
#else
//============================================================
// traits to determine alignment size and stride size
// from types supporting alignment
//============================================================
PDEFINE_HAS_MEMBER(has_align, align_size);
template <typename T, typename X = void>
struct align_traits
{
static constexpr const std::size_t align_size = alignof(std::max_align_t);
static constexpr const std::size_t stride_size =
(sizeof(T) % align_size == 0 ? 1 //T is a multiple of align_size
: (align_size % sizeof(T) != 0 ? align_size // align_size is not a multiple of T
: align_size / sizeof(T)));
};
template <typename T>
struct align_traits<T, typename std::enable_if<has_align<T>::value, void>::type>
{
static constexpr const std::size_t align_size = T::align_size;
static constexpr const std::size_t stride_size =
(sizeof(T) % align_size == 0 ? 1 //T is a multiple of align_size
: (align_size % sizeof(T) != 0 ? align_size // align_size is not a multiple of T
: align_size / sizeof(T)));
};
//============================================================
// Aligned vector
//============================================================
// FIXME: needs a separate file
template <class T, std::size_t ALIGN = alignof(T)>
class aligned_vector : public std::vector<T, aligned_allocator<T, ALIGN>>
{
@ -326,13 +375,13 @@ namespace plib {
using base::base;
reference operator[](size_type i) noexcept
C14CONSTEXPR reference operator[](size_type i) noexcept
{
return assume_aligned_ptr<T, ALIGN>(this->data())[i];
return assume_aligned_ptr<T, ALIGN>(&((*this)[0]))[i];
}
constexpr const_reference operator[](size_type i) const noexcept
{
return assume_aligned_ptr<T, ALIGN>(this->data())[i];
return assume_aligned_ptr<T, ALIGN>(&((*this)[0]))[i];
}
pointer data() noexcept { return assume_aligned_ptr<T, ALIGN>(base::data()); }
@ -340,11 +389,6 @@ namespace plib {
};
#endif
} // namespace plib
#endif /* PALLOC_H_ */

View File

@ -104,7 +104,7 @@ namespace plib {
return m_a[i];
}
#else
reference operator[](size_type i) noexcept
C14CONSTEXPR reference operator[](size_type i) noexcept
{
return assume_aligned_ptr<FT, PALIGN_VECTOROPT>(&m_a[0])[i];
}

View File

@ -41,6 +41,8 @@
#define USE_ALIGNED_OPTIMIZATIONS (0)
#endif
#define USE_ALIGNED_ALLOCATION (USE_ALIGNED_OPTIMIZATIONS)
#define USE_ALIGNED_HINTS (USE_ALIGNED_OPTIMIZATIONS)
/*
* Standard alignment macros
*/

View File

@ -9,6 +9,7 @@
#include "pstring.h"
#include "ptypes.h"
#include "putil.h"
#include <limits>

View File

@ -0,0 +1,82 @@
// license:GPL-2.0+
// copyright-holders:Couriersud
/*
* pmatrix2d.h
*
* NxM regular matrix
*
*/
#ifndef PMATRIX2D_H_
#define PMATRIX2D_H_
#include "palloc.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <type_traits>
#include <vector>
namespace plib
{
template<typename T, typename A = std::allocator<T>>
class pmatrix2d
{
public:
using value_type = T;
static constexpr const std::size_t align_size = align_traits<A>::align_size;
static constexpr const std::size_t stride_size = align_traits<A>::stride_size;
pmatrix2d()
: m_N(0), m_M(0), m_stride(8)
{
}
pmatrix2d(std::size_t N, std::size_t M)
: m_N(N), m_M(M), m_stride((M+stride_size-1) & ~(stride_size-1)), m_v(N * m_stride)
{
}
void resize(std::size_t N, std::size_t M)
{
m_N = N;
m_M = M;
m_stride = (M+stride_size-1) & ~(stride_size-1);
m_v.resize(N * m_stride);
}
C14CONSTEXPR T * operator[] (std::size_t row) noexcept
{
return assume_aligned_ptr<T, align_size>(&m_v[m_stride * row]);
}
constexpr const T * operator[] (std::size_t row) const noexcept
{
return assume_aligned_ptr<T, align_size>(&m_v[m_stride * row]);
}
T & operator()(std::size_t r, std::size_t c) noexcept
{
return (*this)[r][c];
}
const T & operator()(std::size_t r, std::size_t c) const noexcept
{
return (*this)[r][c];
}
private:
std::size_t m_N;
std::size_t m_M;
std::size_t m_stride;
std::vector<T, A> m_v;
};
} // namespace plib
#endif /* MAT_CR_H_ */

View File

@ -81,38 +81,19 @@ namespace plib
template<typename... Ts>
inline void unused_var(Ts&&...) {}
//============================================================
// penum - strongly typed enumeration
//============================================================
struct penum_base
{
protected:
static int from_string_int(const char *str, const char *x);
static std::string nthstr(int n, const char *str);
};
} // namespace plib
#define P_ENUM(ename, ...) \
struct ename : public plib::penum_base { \
enum E { __VA_ARGS__ }; \
ename (E v) : m_v(v) { } \
bool set_from_string (const std::string &s) { \
static char const *const strings = # __VA_ARGS__; \
int f = from_string_int(strings, s.c_str()); \
if (f>=0) { m_v = static_cast<E>(f); return true; } else { return false; } \
} \
operator E() const {return m_v;} \
bool operator==(const ename &rhs) const {return m_v == rhs.m_v;} \
bool operator==(const E &rhs) const {return m_v == rhs;} \
std::string name() const { \
static char const *const strings = # __VA_ARGS__; \
return nthstr(static_cast<int>(m_v), strings); \
} \
private: E m_v; };
//============================================================
// Define a "has member" trait.
//============================================================
#define PDEFINE_HAS_MEMBER(name, member) \
template <typename T> class name \
{ \
template <typename U> static long test(decltype(&U:: member)); \
template <typename U> static char test(...); \
public: \
static constexpr const bool value = sizeof(test<T>(nullptr)) == sizeof(long); \
}
#endif /* PTYPES_H_ */

View File

@ -93,6 +93,37 @@ namespace plib
const std::string &token,
const std::size_t maxsplit);
//============================================================
// penum - strongly typed enumeration
//============================================================
struct penum_base
{
protected:
static int from_string_int(const char *str, const char *x);
static std::string nthstr(int n, const char *str);
};
} // namespace plib
#define P_ENUM(ename, ...) \
struct ename : public plib::penum_base { \
enum E { __VA_ARGS__ }; \
ename (E v) : m_v(v) { } \
bool set_from_string (const std::string &s) { \
static char const *const strings = # __VA_ARGS__; \
int f = from_string_int(strings, s.c_str()); \
if (f>=0) { m_v = static_cast<E>(f); return true; } else { return false; } \
} \
operator E() const {return m_v;} \
bool operator==(const ename &rhs) const {return m_v == rhs.m_v;} \
bool operator==(const E &rhs) const {return m_v == rhs;} \
std::string name() const { \
static char const *const strings = # __VA_ARGS__; \
return nthstr(static_cast<int>(m_v), strings); \
} \
private: E m_v; };
#endif /* PUTIL_H_ */

View File

@ -39,20 +39,6 @@ namespace devices
m_connected_net_idx.push_back(net_other);
}
void terms_for_net_t::set_pointers()
{
m_gt.resize(count(), 0.0);
m_go.resize(count(), 0.0);
m_Idr.resize(count(), 0.0);
m_connected_net_V.resize(count(), nullptr);
for (std::size_t i = 0; i < count(); i++)
{
m_terms[i]->set_ptrs(&m_gt[i], &m_go[i], &m_Idr[i]);
m_connected_net_V[i] = m_terms[i]->otherterm()->net().Q_Analog_state_ptr();
}
}
// ----------------------------------------------------------------------------------------
// matrix_solver
// ----------------------------------------------------------------------------------------
@ -254,8 +240,6 @@ namespace devices
m_terms[k]->m_railstart = m_terms[k]->count();
for (std::size_t i = 0; i < m_rails_temp[k]->count(); i++)
this->m_terms[k]->add(m_rails_temp[k]->terms()[i], m_rails_temp[k]->m_connected_net_idx.data()[i], false);
m_terms[k]->set_pointers();
}
// free all - no longer needed
@ -263,6 +247,8 @@ namespace devices
sort_terms(m_sort);
this->set_pointers();
/* create a list of non zero elements. */
for (unsigned k = 0; k < iN; k++)
{
@ -371,9 +357,9 @@ namespace devices
state().save(*this, m_terms[k]->m_h_n_m_1, this->name(), "m_h_n_m_1." + num);
// FIXME: This shouldn't be necessary, recalculate on each entry ...
state().save(*this, m_terms[k]->m_go.data(),"GO" + num, this->name(), m_terms[k]->count());
state().save(*this, m_terms[k]->m_gt.data(),"GT" + num, this->name(), m_terms[k]->count());
state().save(*this, m_terms[k]->m_Idr.data(),"IDR" + num, this->name(), m_terms[k]->count());
state().save(*this, m_gon[k],"GO" + num, this->name(), m_terms[k]->count());
state().save(*this, m_gtn[k],"GT" + num, this->name(), m_terms[k]->count());
state().save(*this, m_Idrn[k],"IDR" + num, this->name(), m_terms[k]->count());
}
}

View File

@ -10,9 +10,12 @@
#include "netlist/nl_base.h"
#include "netlist/nl_errstr.h"
#include "netlist/plib/palloc.h"
#include "netlist/plib/putil.h"
#include "netlist/plib/vector_ops.h"
#include "plib/palloc.h"
#include "plib/putil.h"
#include "plib/vector_ops.h"
#include "plib/pmatrix2d.h"
#include <cmath>
namespace netlist
{
@ -51,8 +54,6 @@ namespace devices
terminal_t **terms() { return m_terms.data(); }
void set_pointers();
std::size_t m_railstart;
std::vector<unsigned> m_nz; /* all non zero for multiplication */
@ -65,10 +66,6 @@ namespace devices
nl_double m_h_n_m_1;
std::vector<int> m_connected_net_idx;
plib::aligned_vector<nl_double, PALIGN_VECTOROPT> m_go;
plib::aligned_vector<nl_double, PALIGN_VECTOROPT> m_gt;
plib::aligned_vector<nl_double, PALIGN_VECTOROPT> m_Idr;
plib::aligned_vector<nl_double *, PALIGN_VECTOROPT> m_connected_net_V;
private:
std::vector<terminal_t *> m_terms;
@ -171,6 +168,31 @@ namespace devices
template <typename T>
void build_LE_RHS(T &child);
void set_pointers()
{
const std::size_t iN = this->m_nets.size();
std::size_t max_col = 0;
for (std::size_t k = 0; k < iN; k++)
max_col = std::max(max_col, m_terms[k]->count());
m_gtn.resize(iN, max_col);
m_gon.resize(iN, max_col);
m_Idrn.resize(iN, max_col);
m_connected_net_Vn.resize(iN, max_col);
for (std::size_t k = 0; k < iN; k++)
{
auto count = m_terms[k]->count();
for (std::size_t i = 0; i < count; i++)
{
m_terms[k]->terms()[i]->set_ptrs(&m_gtn[k][i], &m_gon[k][i], &m_Idrn[k][i]);
m_connected_net_Vn[k][i] = m_terms[k]->terms()[i]->otherterm()->net().Q_Analog_state_ptr();
}
}
}
template <typename AP, typename FT>
void fill_matrix(std::size_t N, AP &tcr, FT &RHS)
{
@ -181,17 +203,21 @@ namespace devices
const std::size_t term_count = net->count();
const std::size_t railstart = net->m_railstart;
const auto &go = m_gon[k];
const auto &gt = m_gtn[k];
const auto &Idr = m_Idrn[k];
const auto &cnV = m_connected_net_Vn[k];
for (std::size_t i = 0; i < railstart; i++)
*tcr_r[i] -= net->m_go[i];
*tcr_r[i] -= go[i];
typename FT::value_type gtot_t = 0.0;
typename FT::value_type RHS_t = 0.0;
for (std::size_t i = 0; i < term_count; i++)
{
gtot_t += net->m_gt[i];
RHS_t += net->m_Idr[i];
gtot_t += gt[i];
RHS_t += Idr[i];
}
// FIXME: Code above is faster than vec_sum - Check this
#if 0
@ -201,7 +227,7 @@ namespace devices
for (std::size_t i = railstart; i < term_count; i++)
{
RHS_t += (/*m_Idr[i]*/ + net->m_go[i] * *net->m_connected_net_V[i]);
RHS_t += (/*m_Idr[i]*/ + go[i] * *cnV[i]);
}
RHS[k] = RHS_t;
@ -211,6 +237,16 @@ namespace devices
}
template <typename T>
using aligned_alloc = plib::aligned_allocator<T, PALIGN_VECTOROPT>;
plib::pmatrix2d<nl_double, aligned_alloc<nl_double>> m_gon;
plib::pmatrix2d<nl_double, aligned_alloc<nl_double>> m_gtn;
plib::pmatrix2d<nl_double, aligned_alloc<nl_double>> m_Idrn;
plib::pmatrix2d<nl_double *, aligned_alloc<nl_double *>> m_connected_net_Vn;
plib::pmatrix2d<nl_double> m_test;
std::vector<plib::unique_ptr<terms_for_net_t>> m_terms;
std::vector<analog_net_t *> m_nets;
std::vector<poolptr<proxied_analog_output_t>> m_inps;
@ -283,7 +319,7 @@ namespace devices
const std::size_t terms_count = terms->count();
const std::size_t railstart = terms->m_railstart;
const float_type * const gt = terms->m_gt.data();
const float_type * const gt = m_gtn[k];
{
float_type akk = 0.0;
@ -293,7 +329,7 @@ namespace devices
Ak[k] = akk;
}
const float_type * const go = terms->m_go.data();
const float_type * const go = m_gon[k];
int * net_other = terms->m_connected_net_idx.data();
for (std::size_t i = 0; i < railstart; i++)
@ -314,9 +350,9 @@ namespace devices
float_type rhsk_b = 0.0;
const std::size_t terms_count = m_terms[k]->count();
const float_type * const go = m_terms[k]->m_go.data();
const float_type * const Idr = m_terms[k]->m_Idr.data();
const float_type * const * other_cur_analog = m_terms[k]->m_connected_net_V.data();
const float_type * const go = m_gon[k];
const float_type * const Idr = m_Idrn[k];
const float_type * const * other_cur_analog = m_connected_net_Vn[k];
for (std::size_t i = 0; i < terms_count; i++)
rhsk_a = rhsk_a + Idr[i];

View File

@ -85,10 +85,10 @@ unsigned matrix_solver_SOR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_rap
float_type RHS_t = 0.0;
const std::size_t term_count = this->m_terms[k]->count();
const float_type * const gt = this->m_terms[k]->m_gt.data();
const float_type * const go = this->m_terms[k]->m_go.data();
const float_type * const Idr = this->m_terms[k]->m_Idr.data();
auto other_cur_analog = this->m_terms[k]->m_connected_net_V.data();
const float_type * const gt = this->m_gtn[k];
const float_type * const go = this->m_gon[k];
const float_type * const Idr = this->m_Idrn[k];
auto other_cur_analog = this->m_connected_net_Vn[k];
this->m_new_V[k] = this->m_nets[k]->Q_Analog();
@ -136,7 +136,7 @@ unsigned matrix_solver_SOR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_rap
{
const int * net_other = this->m_terms[k]->m_connected_net_idx.data();
const std::size_t railstart = this->m_terms[k]->m_railstart;
const float_type * go = this->m_terms[k]->m_go.data();
const float_type * go = this->m_gon[k];
float_type Idrive = 0.0;
for (std::size_t i = 0; i < railstart; i++)

View File

@ -192,7 +192,7 @@ poolptr<matrix_solver_t> NETLIB_NAME(solver)::create_solver_x(std::size_t size,
else
return this->create_solver_x<FT, SIZE / 2>(size, solvername);
}
};
}
struct net_splitter
{