netlist: fix a bug and some performance tweaks. (nw)

This commit is contained in:
couriersud 2019-02-20 20:16:08 +01:00
parent 085ddc5daf
commit a5f3787058
6 changed files with 74 additions and 33 deletions

View File

@ -28,7 +28,7 @@
* Your mileage may vary.
*
*/
#define USE_MEMPOOL (1)
#define USE_MEMPOOL (0)
/*! Store input values in logic_terminal_t.
*

View File

@ -338,7 +338,7 @@ namespace plib
plib::parray<float_type, RESTART + 1> m_y; /* mr + 1 */
//plib::parray<float_type, SIZE> m_v[RESTART + 1]; /* mr + 1, n */
std::array<std::array<float_type, storage_N>, RESTART + 1> m_v; /* mr + 1, n */
plib::parray<plib::parray<float_type, storage_N>, RESTART + 1> m_v; /* mr + 1, n */
std::size_t m_size;

View File

@ -48,17 +48,18 @@ namespace plib {
private:
struct block
{
block(mempool *mp)
block(mempool *mp, std::size_t min_bytes)
: m_num_alloc(0)
, m_free(mp->m_min_alloc)
, m_cur(0)
, m_data(nullptr)
, m_mempool(mp)
{
std::size_t alloc_bytes = (mp->m_min_alloc + mp->m_min_align - 1) & ~(mp->m_min_align - 1);
min_bytes = std::max(mp->m_min_alloc, min_bytes);
m_free = min_bytes;
std::size_t alloc_bytes = (min_bytes + mp->m_min_align - 1) & ~(mp->m_min_align - 1);
m_data_allocated = static_cast<char *>(::operator new(alloc_bytes));
void *r = m_data_allocated;
std::align(mp->m_min_align, mp->m_min_alloc, r, alloc_bytes);
std::align(mp->m_min_align, min_bytes, r, alloc_bytes);
m_data = reinterpret_cast<char *>(r);
}
std::size_t m_num_alloc;
@ -80,9 +81,9 @@ namespace plib {
};
block * new_block()
block * new_block(std::size_t min_bytes)
{
auto *b = new block(this);
auto *b = new block(this, min_bytes);
m_blocks.push_back(b);
return b;
}
@ -144,7 +145,7 @@ namespace plib {
}
}
{
block *b = new_block();
block *b = new_block(rs);
b->m_num_alloc = 1;
b->m_free = m_min_alloc - rs;
auto ret = reinterpret_cast<void *>(b->m_data + b->m_cur);

View File

@ -28,7 +28,7 @@ namespace plib
template<typename VT, typename T>
void vec_set_scalar (const std::size_t n, VT &v, T && scalar)
{
const T s(std::forward<T>(scalar));
const typename std::remove_reference<decltype(v[0])>::type s(std::forward<T>(scalar));
for ( std::size_t i = 0; i < n; i++ )
v[i] = s;
}
@ -43,25 +43,50 @@ namespace plib
template<typename T, typename V1, typename V2>
T vec_mult (const std::size_t n, const V1 & v1, const V2 & v2 )
{
T value = 0.0;
for ( std::size_t i = 0; i < n; i++ )
value += v1[i] * v2[i];
return value;
PALIGNAS_VECTOROPT() T value[8] = {0};
for (std::size_t i = 0; i < n ; i++ )
{
value[i & 7] += v1[i] * v2[i];
}
return value[0] + value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7];
}
template<typename T, typename VT>
T vec_mult2 (const std::size_t n, const VT &v)
{
T value = 0.0;
for ( std::size_t i = 0; i < n; i++ )
value += v[i] * v[i];
return value;
PALIGNAS_VECTOROPT() T value[8] = {0};
for (std::size_t i = 0; i < n ; i++ )
{
value[i & 7] += v[i] * v[i];
}
return value[0] + value[1] + value[2] + value[3] + value[4] + value[5] + value[6] + value[7];
}
template<typename T, typename VT>
T vec_sum (const std::size_t n, const VT &v)
{
if (n<8)
{
PALIGNAS_VECTOROPT() T value(0);
for (std::size_t i = 0; i < n ; i++ )
value += v[i];
return value;
}
else
{
PALIGNAS_VECTOROPT() T value[8] = {0};
for (std::size_t i = 0; i < n ; i++ )
value[i & 7] += v[i];
return ((value[0] + value[1]) + (value[2] + value[3])) + ((value[4] + value[5]) + (value[6] + value[7]));
}
}
template<typename VV, typename T, typename VR>
void vec_mult_scalar (const std::size_t n, const VV & v, T && scalar, VR & result)
{
const T s(std::forward<T>(scalar));
const typename std::remove_reference<decltype(v[0])>::type s(std::forward<T>(scalar));
for ( std::size_t i = 0; i < n; i++ )
result[i] = s * v[i];
}
@ -69,9 +94,9 @@ namespace plib
template<typename VV, typename T, typename VR>
void vec_add_mult_scalar (const std::size_t n, const VV & v, T && scalar, VR & result)
{
const T s(std::forward<T>(scalar));
const typename std::remove_reference<decltype(v[0])>::type s(std::forward<T>(scalar));
for ( std::size_t i = 0; i < n; i++ )
result[i] = result[i] + s * v[i];
result[i] += s * v[i];
}
template<typename T>
@ -98,9 +123,9 @@ namespace plib
template<typename V, typename T>
void vec_scale(const std::size_t n, V & v, T &&scalar)
{
const T s(std::forward<T>(scalar));
const typename std::remove_reference<decltype(v[0])>::type s(std::forward<T>(scalar));
for ( std::size_t i = 0; i < n; i++ )
v[i] = s * v[i];
v[i] *= s;
}
template<typename T, typename V>

View File

@ -11,6 +11,7 @@
#include "netlist/nl_base.h"
#include "netlist/nl_errstr.h"
#include "netlist/plib/putil.h"
#include "netlist/plib/vector_ops.h"
namespace netlist
{
@ -54,27 +55,41 @@ public:
void set_pointers();
/* FIXME: this works a bit better for larger matrices */
template <typename AP, typename FT>
void fill_matrix(AP &tcr, FT &RHS)
void fill_matrix/*_larger*/(AP &tcr, FT &RHS)
{
FT gtot_t = 0.0;
FT RHS_t = 0.0;
const std::size_t term_count = this->count();
const std::size_t railstart = this->m_railstart;
const FT * const * other_cur_analog = this->connected_net_V();
const FT * const * other_cur_analog = m_connected_net_V.data();
const FT * p_go = m_go.data();
const FT * p_gt = m_gt.data();
const FT * p_Idr = m_Idr.data();
for (std::size_t i = 0; i < railstart; i++)
{
*tcr[i] -= m_go[i];
gtot_t += m_gt[i];
RHS_t += m_Idr[i];
*tcr[i] -= p_go[i];
}
#if 1
FT gtot_t = 0.0;
FT RHS_t = 0.0;
for (std::size_t i = 0; i < term_count; i++)
{
gtot_t += p_gt[i];
RHS_t += p_Idr[i];
}
// FIXME: Code above is faster than vec_sum - Check this
#else
auto gtot_t = plib::vec_sum<FT>(term_count, p_gt);
auto RHS_t = plib::vec_sum<FT>(term_count, p_Idr);
#endif
for (std::size_t i = railstart; i < term_count; i++)
{
RHS_t += (m_Idr[i] + m_go[i] * *other_cur_analog[i]);
gtot_t += m_gt[i];
RHS_t += (/*m_Idr[i]*/ + p_go[i] * *other_cur_analog[i]);
}
RHS = RHS_t;

View File

@ -88,7 +88,7 @@ unsigned matrix_solver_SOR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_rap
const float_type * const gt = this->m_terms[k]->gt();
const float_type * const go = this->m_terms[k]->go();
const float_type * const Idr = this->m_terms[k]->Idr();
const float_type * const *other_cur_analog = this->m_terms[k]->connected_net_V();
auto other_cur_analog = this->m_terms[k]->connected_net_V();
this->m_new_V[k] = this->m_nets[k]->Q_Analog();