netlist: refactor code for better scalability and flexibility. (nw)

These changes aim to remove some of the duplication of code in the
various solvers.
Tested with gcc-7 clang-8 and nvcc-9.2
This commit is contained in:
couriersud 2019-01-21 00:36:57 +01:00
parent 6d5e014a6f
commit 32b0442c73
19 changed files with 1054 additions and 868 deletions

View File

@ -207,7 +207,7 @@ native:
$(MAKE) CEXTRAFLAGS="-march=native -Wall -Wpedantic -Wsign-compare -Wextra -Wno-unused-parameter"
clang:
$(MAKE) CC=clang++ LD=clang++ CEXTRAFLAGS="-march=native -Wno-unused-parameter -Weverything -Werror -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wweak-template-vtables -Wno-exit-time-destructors"
$(MAKE) CC=clang++ LD=clang++ CEXTRAFLAGS="-march=native -Wno-c++11-narrowing -Wno-unused-parameter -Weverything -Werror -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wweak-template-vtables -Wno-exit-time-destructors"
clang-5:
$(MAKE) CC=clang++-5.0 LD=clang++-5.0 CEXTRAFLAGS="-march=native -Weverything -Werror -Wno-inconsistent-missing-destructor-override -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wno-weak-template-vtables -Wno-exit-time-destructors"
@ -217,6 +217,7 @@ nvcc:
CEXTRAFLAGS="-x cu -DNVCCBUILD=1 --expt-extended-lambda --expt-relaxed-constexpr --default-stream per-thread --restrict"
#
# -Wno-c++11-narrowing : seems a bit broken
# Mostly done: -Wno-weak-vtables -Wno-cast-align
# FIXME: -Wno-weak-vtables -Wno-missing-variable-declarations -Wno-conversion -Wno-exit-time-destructors
# FIXME: -Winconsistent-missing-destructor-override : c++ community has diverging opinions on this https://github.com/isocpp/CppCoreGuidelines/issues/721

View File

@ -322,11 +322,11 @@ namespace netlist
const T &value //!< Initial value after construction
);
//! Copy Constructor.
constexpr state_var(const state_var &rhs) noexcept = default;
constexpr state_var(const state_var &rhs) = default;
//! Move Constructor.
constexpr state_var(state_var &&rhs) noexcept = default;
//! Assignment operator to assign value of a state var.
C14CONSTEXPR state_var &operator=(const state_var &rhs) noexcept = default;
C14CONSTEXPR state_var &operator=(const state_var &rhs) = default;
//! Assignment move operator to assign value of a state var.
C14CONSTEXPR state_var &operator=(state_var &&rhs) noexcept = default;
//! Assignment operator to assign value of type T.

View File

@ -0,0 +1,117 @@
// license:GPL-2.0+
// copyright-holders:Couriersud
/*
* parray.h
*
*/
#ifndef PARRAY_H_
#define PARRAY_H_
#include "pconfig.h"
#include "pexception.h"
#include <memory>
#include <utility>
#include <vector>
#include <array>
#include <type_traits>
namespace plib {
template <typename FT, int SIZE>
struct sizeabs
{
static constexpr std::size_t ABS() { return (SIZE < 0) ? static_cast<std::size_t>(0 - SIZE) : static_cast<std::size_t>(SIZE); }
typedef typename std::array<FT, ABS()> container;
};
template <typename FT>
struct sizeabs<FT, 0>
{
static constexpr const std::size_t ABS = 0;
typedef typename std::vector<FT> container;
};
/**
* \brief Array with preallocated or dynamic allocation
*
* Passing SIZE > 0 has the same functionality as a std::array.
* SIZE = 0 is pure dynamic allocation, the actual array size is passed to the
* constructor.
* SIZE < 0 reserves std::abs(SIZE) elements statically in place allocated. The
* actual size is passed in by the constructor.
* This array is purely intended for HPC application where depending on the
* architecture a preference dynamic/static has to be made.
*
* This struct is not intended to be a full replacement to std::array.
* It is a subset to enable switching between dynamic and static allocation.
* I consider > 10% performance difference to be a use case.
*/
template <typename FT, int SIZE>
struct parray
{
public:
static constexpr std::size_t SIZEABS() { return sizeabs<FT, SIZE>::ABS(); }
typedef typename sizeabs<FT, SIZE>::container base_type;
typedef typename base_type::size_type size_type;
typedef typename base_type::reference reference;
typedef typename base_type::const_reference const_reference;
template <int X = SIZE >
parray(size_type size, typename std::enable_if<X==0, int>::type = 0)
: m_a(size), m_size(size)
{
}
#if 0
/* allow construction in fixed size arrays */
template <int X = SIZE >
parray(typename std::enable_if<X==0, int>::type = 0)
: m_size(0)
{
}
#endif
template <int X = SIZE >
parray(size_type size, typename std::enable_if<X!=0, int>::type = 0)
: m_size(size)
{
if (SIZE < 0 && size > SIZEABS())
throw plib::pexception("parray: size error " + plib::to_string(size) + ">" + plib::to_string(SIZEABS()));
else if (SIZE > 0 && size != SIZEABS())
throw plib::pexception("parray: size error");
}
inline size_type size() const noexcept { return SIZE <= 0 ? m_size : SIZEABS(); }
constexpr size_type max_size() const noexcept { return base_type::max_size(); }
bool empty() const noexcept { return size() == 0; }
#if 0
reference operator[](size_type i) /*noexcept*/
{
if (i >= m_size) throw plib::pexception("limits error " + to_string(i) + ">=" + to_string(m_size));
return m_a[i];
}
const_reference operator[](size_type i) const /*noexcept*/
{
if (i >= m_size) throw plib::pexception("limits error " + to_string(i) + ">=" + to_string(m_size));
return m_a[i];
}
#else
reference operator[](size_type i) noexcept { return m_a[i]; }
constexpr const_reference operator[](size_type i) const noexcept { return m_a[i]; }
#endif
FT * data() noexcept { return m_a.data(); }
const FT * data() const noexcept { return m_a.data(); }
private:
base_type m_a;
size_type m_size;
};
}
#endif /* PARRAY_H_ */

View File

@ -103,7 +103,7 @@ public:
template<typename C>
void save_item(const void *owner, std::vector<C> &v, const pstring &stname)
{
save_state(v.data(), owner, stname, v.size());
save_state_ptr(owner, stname, datatype_f<C>::f(), v.size(), v.data());
}
void pre_save();

View File

@ -11,60 +11,232 @@
#define MAT_CR_H_
#include <algorithm>
#include <type_traits>
#include <array>
#include <vector>
#include <cmath>
#include "../plib/pconfig.h"
#include "../plib/palloc.h"
#include "../plib/pstate.h"
#include "../plib/parray.h"
template<std::size_t N, typename C = uint16_t, typename T = double>
namespace plib
{
template<typename T, int N, typename C = uint16_t>
struct mat_cr_t
{
typedef C index_type;
typedef T value_type;
C diag[N]; // diagonal index pointer n
C ia[N+1]; // row index pointer n + 1
C ja[N*N]; // column index array nz_num, initially (n * n)
T A[N*N]; // Matrix elements nz_num, initially (n * n)
parray<C, N> diag; // diagonal index pointer n
parray<C, (N == 0) ? 0 : (N < 0 ? N - 1 : N + 1)> row_idx; // row index pointer n + 1
parray<C, N < 0 ? -N * N : N *N> col_idx; // column index array nz_num, initially (n * n)
parray<T, N < 0 ? -N * N : N *N> A; // Matrix elements nz_num, initially (n * n)
//parray<C, N < 0 ? -N * N / 2 : N * N / 2> nzbd; // Support for gaussian elimination
parray<C, N < 0 ? -N * (N-1) / 2 : N * (N+1) / 2 > nzbd; // Support for gaussian elimination
// contains elimination rows below the diagonal
std::size_t size;
std::size_t m_size;
std::size_t nz_num;
explicit mat_cr_t(const std::size_t n)
: size(n)
: diag(n)
, row_idx(n+1)
, col_idx(n*n)
, A(n*n)
, nzbd(n * (n+1) / 2)
, m_size(n)
, nz_num(0)
{
#if 0
#if 0
ia = plib::palloc_array<C>(n + 1);
ja = plib::palloc_array<C>(n * n);
diag = plib::palloc_array<C>(n);
#else
diag = plib::palloc_array<C>(n + (n + 1) + n * n);
ia = diag + n;
ja = ia + (n+1);
A = plib::palloc_array<T>(n * n);
#endif
#endif
for (std::size_t i=0; i<n+1; i++)
A[i] = 0;
}
~mat_cr_t()
{
#if 0
plib::pfree_array(diag);
#if 0
plib::pfree_array(ia);
plib::pfree_array(ja);
#endif
plib::pfree_array(A);
#endif
}
std::size_t size() const { return m_size; }
void set_scalar(const T scalar)
{
for (std::size_t i=0, e=nz_num; i<e; i++)
A[i] = scalar;
}
void mult_vec(const T * RESTRICT x, T * RESTRICT res)
void set(C r, C c, T val)
{
C ri = row_idx[r];
while (ri < row_idx[r+1] && col_idx[ri] < c)
ri++;
// we have the position now;
if (nz_num > 0 && col_idx[ri] == c)
A[ri] = val;
else
{
for (C i = nz_num; i>ri; i--)
{
A[i] = A[i-1];
col_idx[i] = col_idx[i-1];
}
A[ri] = val;
col_idx[ri] = c;
for (C i = row_idx[r]; i < size()+1;i++)
row_idx[i]++;
nz_num++;
if (c==r)
diag[r] = ri;
}
}
enum constants_e
{
FILL_INFINITY = 9999999
};
template <typename M>
std::pair<std::size_t, std::size_t> gaussian_extend_fill_mat(M &fill)
{
std::size_t ops = 0;
std::size_t fill_max = 0;
for (std::size_t k = 0; k < fill.size(); k++)
{
ops++; // 1/A(k,k)
for (std::size_t row = k + 1; row < fill.size(); row++)
{
if (fill[row][k] < FILL_INFINITY)
{
ops++;
for (std::size_t col = k + 1; col < fill[row].size(); col++)
//if (fill[k][col] < FILL_INFINITY)
{
auto f = std::min(fill[row][col], 1 + fill[row][k] + fill[k][col]);
if (f < FILL_INFINITY)
{
if (f > fill_max)
fill_max = f;
ops += 2;
}
fill[row][col] = f;
}
}
}
}
return { fill_max, ops };
}
template <typename M>
void build_from_fill_mat(const M &f, std::size_t max_fill = FILL_INFINITY - 1,
unsigned band_width = FILL_INFINITY)
{
C nz = 0;
if (nz_num != 0)
throw pexception("build_from_mat only allowed on empty CR matrix");
for (std::size_t k=0; k < size(); k++)
{
row_idx[k] = nz;
for (std::size_t j=0; j < size(); j++)
if (f[k][j] <= max_fill && std::abs(static_cast<int>(k)-static_cast<int>(j)) <= static_cast<int>(band_width))
{
col_idx[nz] = static_cast<C>(j);
if (j == k)
diag[k] = nz;
nz++;
}
}
row_idx[size()] = nz;
nz_num = nz;
/* build nzbd */
std::size_t p=0;
for (std::size_t k=0; k < size(); k++)
{
for (std::size_t j=k + 1; j < size(); j++)
if (f[j][k] < FILL_INFINITY)
nzbd[p++] = static_cast<C>(j);
nzbd[p++] = 0; // end of sequence
}
}
template <typename V>
void gaussian_elimination(V & RHS)
{
std::size_t nzbdp = 0;
const std::size_t iN = size();
for (std::size_t i = 0; i < iN - 1; i++)
{
std::size_t pi = diag[i];
const value_type f = 1.0 / A[pi++];
const std::size_t piie = row_idx[i+1];
while (auto j = nzbd[nzbdp++])
{
// proceed to column i
std::size_t pj = row_idx[j];
while (col_idx[pj] < i)
pj++;
const value_type f1 = - A[pj++] * f;
// subtract row i from j */
for (std::size_t pii = pi; pii<piie; pii++)
{
while (col_idx[pj] < col_idx[pii])
pj++;
if (col_idx[pj] == col_idx[pii])
A[pj++] += A[pii] * f1;
}
RHS[j] += f1 * RHS[i];
}
}
}
template <typename V1, typename V2>
void gaussian_back_substitution(V1 &V, const V2 &RHS)
{
const std::size_t iN = size();
/* row n-1 */
V[iN - 1] = RHS[iN - 1] / A[diag[iN - 1]];
for (std::size_t j = iN - 1; j-- > 0;)
{
value_type tmp = 0;
const auto jdiag = diag[j];
const std::size_t e = row_idx[j+1];
for (std::size_t pk = jdiag + 1; pk < e; pk++)
tmp += A[pk] * V[col_idx[pk]];
V[j] = (RHS[j] - tmp) / A[jdiag];
}
}
template <typename V1>
void gaussian_back_substitution(V1 &V)
{
const std::size_t iN = size();
/* row n-1 */
V[iN - 1] = V[iN - 1] / A[diag[iN - 1]];
for (std::size_t j = iN - 1; j-- > 0;)
{
value_type tmp = 0;
const auto jdiag = diag[j];
const std::size_t e = row_idx[j+1];
for (std::size_t pk = jdiag + 1; pk < e; pk++)
tmp += A[pk] * V[col_idx[pk]];
V[j] = (V[j] - tmp) / A[jdiag];
}
}
template <typename VTV, typename VTR>
void mult_vec(const VTV & RESTRICT x, VTR & RESTRICT res)
{
/*
* res = A * x
@ -77,14 +249,68 @@ struct mat_cr_t
while (k < oe)
{
T tmp = 0.0;
const std::size_t e = ia[i+1];
const std::size_t e = row_idx[i+1];
for (; k < e; k++)
tmp += A[k] * x[ja[k]];
tmp += A[k] * x[col_idx[k]];
res[i++] = tmp;
}
}
void incomplete_LU_factorization(T * RESTRICT LU)
/* throws error if P(source)>P(destination) */
template <typename LUMAT>
void slim_copy_from(LUMAT & src)
{
for (std::size_t r=0; r<src.size(); r++)
{
C dp = row_idx[r];
for (C sp = src.row_idx[r]; sp < src.row_idx[r+1]; sp++)
{
/* advance dp to source column and fill 0s if necessary */
while (col_idx[dp] < src.col_idx[sp])
A[dp++] = 0;
if (row_idx[r+1] <= dp || col_idx[dp] != src.col_idx[sp])
throw plib::pexception("slim_copy_from error");
A[dp++] = src.A[sp];
}
/* fill remaining elements in row */
while (dp < row_idx[r+1])
A[dp++] = 0;
}
}
/* only copies common elements */
template <typename LUMAT>
void reduction_copy_from(LUMAT & src)
{
C sp = 0;
for (std::size_t r=0; r<src.size(); r++)
{
C dp = row_idx[r];
while(sp < src.row_idx[r+1])
{
/* advance dp to source column and fill 0s if necessary */
if (col_idx[dp] < src.col_idx[sp])
A[dp++] = 0;
else if (col_idx[dp] == src.col_idx[sp])
A[dp++] = src.A[sp++];
else
sp++;
}
/* fill remaining elements in row */
while (dp < row_idx[r+1])
A[dp++] = 0;
}
}
/* checks at all - may crash */
template <typename LUMAT>
void raw_copy_from(LUMAT & src)
{
for (std::size_t k = 0; k < nz_num; k++)
A[k] = src.A[k];
}
void incomplete_LU_factorization()
{
/*
* incomplete LU Factorization according to http://de.wikipedia.org/wiki/ILU-Zerlegung
@ -93,38 +319,67 @@ struct mat_cr_t
*
*/
#if 0
const std::size_t lnz = nz_num;
for (std::size_t k = 0; k < lnz; k++)
LU[k] = A[k];
for (std::size_t i = 1; ia[i] < lnz; i++) // row i
for (std::size_t i = 1; row_idx[i] < lnz; i++) // row i
{
const std::size_t iai1 = ia[i + 1];
const std::size_t pke = diag[i];
for (std::size_t pk = ia[i]; pk < pke; pk++) // all columns left of diag in row i
const std::size_t p_i_end = row_idx[i + 1];
// loop over all columns left of diag in row i
for (std::size_t p_i_k = row_idx[i]; p_i_k < diag[i]; p_i_k++)
{
// pk == (i, k)
const std::size_t k = ja[pk];
const std::size_t iak1 = ia[k + 1];
const T LUpk = LU[pk] = LU[pk] / LU[diag[k]];
const std::size_t k = col_idx[p_i_k];
// get start of row k
const std::size_t p_k_end = row_idx[k + 1];
std::size_t pt = ia[k];
const T LUp_i_k = A[p_i_k] = A[p_i_k] / A[diag[k]];
for (std::size_t pj = pk + 1; pj < iai1; pj++) // pj = (i, j)
std::size_t p_k_j = row_idx[k];
for (std::size_t p_i_j = p_i_k + 1; p_i_j < p_i_end; p_i_j++) // pj = (i, j)
{
// we can assume that within a row ja increases continuously */
const std::size_t ej = ja[pj];
while (ja[pt] < ej && pt < iak1)
pt++;
if (pt < iak1 && ja[pt] == ej)
LU[pj] = LU[pj] - LUpk * LU[pt];
}
const std::size_t j = col_idx[p_i_j]; // row i, column j
while (col_idx[p_k_j] < j && p_k_j < p_k_end)
p_k_j++;
if (p_k_j < p_k_end && col_idx[p_k_j] == j)
A[p_i_j] = A[p_i_j] - LUp_i_k * A[p_k_j];
}
}
}
#else
for (std::size_t i = 1; i < m_size; i++) // row i
{
const std::size_t p_i_end = row_idx[i + 1];
// loop over all columns k left of diag in row i
for (std::size_t i_k = row_idx[i]; i_k < diag[i]; i_k++)
{
const std::size_t k = col_idx[i_k];
const std::size_t p_k_end = row_idx[k + 1];
const T LUp_i_k = A[i_k] = A[i_k] / A[diag[k]];
void solveLUx (const T * RESTRICT LU, T * RESTRICT r)
// get start of row k
//std::size_t k_j = row_idx[k];
std::size_t k_j = diag[k];
for (std::size_t i_j = i_k + 1; i_j < p_i_end; i_j++) // pj = (i, j)
{
// we can assume that within a row ja increases continuously */
const std::size_t j = col_idx[i_j]; // row i, column j
while (col_idx[k_j] < j && k_j < p_k_end)
k_j++;
if (k_j >= p_k_end)
break;
if (col_idx[k_j] == j)
A[i_j] = A[i_j] - LUp_i_k * A[k_j];
}
}
}
#endif
}
template <typename R>
void solveLUx (R &r)
{
/*
* Solve a linear equation Ax = r
@ -147,29 +402,30 @@ struct mat_cr_t
* This can be solved for x using backwards elimination in U.
*
*/
for (std::size_t i = 1; ia[i] < nz_num; ++i )
for (std::size_t i = 1; i < m_size; ++i )
{
T tmp = 0.0;
const std::size_t j1 = ia[i];
const std::size_t j1 = row_idx[i];
const std::size_t j2 = diag[i];
for (std::size_t j = j1; j < j2; ++j )
tmp += LU[j] * r[ja[j]];
tmp += A[j] * r[col_idx[j]];
r[i] -= tmp;
}
// i now is equal to n;
for (std::size_t i = size; i-- > 0; )
for (std::size_t i = m_size; i-- > 0; )
{
T tmp = 0.0;
const std::size_t di = diag[i];
const std::size_t j2 = ia[i+1];
const std::size_t j2 = row_idx[i+1];
for (std::size_t j = di + 1; j < j2; j++ )
tmp += LU[j] * r[ja[j]];
r[i] = (r[i] - tmp) / LU[di];
tmp += A[j] * r[col_idx[j]];
r[i] = (r[i] - tmp) / A[di];
}
}
};
}
#endif /* MAT_CR_H_ */

View File

@ -54,12 +54,43 @@ public:
void set_pointers();
template <typename AP, typename FT>
void fill_matrix(AP &tcr, FT &RHS)
{
FT gtot_t = 0.0;
FT RHS_t = 0.0;
const std::size_t term_count = this->count();
const std::size_t railstart = this->m_railstart;
const FT * const * RESTRICT other_cur_analog = this->connected_net_V();
for (std::size_t i = 0; i < railstart; i++)
{
*tcr[i] -= m_go[i];
gtot_t += m_gt[i];
RHS_t += m_Idr[i];
}
for (std::size_t i = railstart; i < term_count; i++)
{
RHS_t += (m_Idr[i] + m_go[i] * *other_cur_analog[i]);
gtot_t += m_gt[i];
}
RHS = RHS_t;
// update diagonal element ...
*tcr[railstart] += gtot_t; //mat.A[mat.diag[k]] += gtot_t;
}
std::size_t m_railstart;
std::vector<unsigned> m_nz; /* all non zero for multiplication */
std::vector<unsigned> m_nzrd; /* non zero right of the diagonal for elimination, may include RHS element */
std::vector<unsigned> m_nzbd; /* non zero below of the diagonal for elimination */
/* state */
nl_double m_last_V;
nl_double m_DD_n_m_1;
@ -157,14 +188,15 @@ protected:
/* virtual */ void add_term(std::size_t net_idx, terminal_t *term);
template <typename T>
void store(const T * RESTRICT V);
template <typename T>
T delta(const T * RESTRICT V);
void store(const T & RESTRICT V);
template <typename T>
void build_LE_A();
auto delta(const T & RESTRICT V) -> typename std::decay<decltype(V[0])>::type;
template <typename T>
void build_LE_RHS();
void build_LE_A(T &child);
template <typename T>
void build_LE_RHS(T &child);
std::vector<std::unique_ptr<terms_for_net_t>> m_terms;
std::vector<analog_net_t *> m_nets;
@ -199,7 +231,7 @@ private:
};
template <typename T>
T matrix_solver_t::delta(const T * RESTRICT V)
auto matrix_solver_t::delta(const T & RESTRICT V) -> typename std::decay<decltype(V[0])>::type
{
/* NOTE: Ideally we should also include currents (RHS) here. This would
* need a reevaluation of the right hand side after voltages have been updated
@ -207,14 +239,14 @@ T matrix_solver_t::delta(const T * RESTRICT V)
*/
const std::size_t iN = this->m_terms.size();
T cerr = 0;
typename std::decay<decltype(V[0])>::type cerr = 0;
for (std::size_t i = 0; i < iN; i++)
cerr = std::max(cerr, std::abs(V[i] - static_cast<T>(this->m_nets[i]->Q_Analog())));
cerr = std::max(cerr, std::abs(V[i] - this->m_nets[i]->Q_Analog()));
return cerr;
}
template <typename T>
void matrix_solver_t::store(const T * RESTRICT V)
void matrix_solver_t::store(const T & RESTRICT V)
{
const std::size_t iN = this->m_terms.size();
for (std::size_t i = 0; i < iN; i++)
@ -222,34 +254,33 @@ void matrix_solver_t::store(const T * RESTRICT V)
}
template <typename T>
void matrix_solver_t::build_LE_A()
void matrix_solver_t::build_LE_A(T &child)
{
typedef typename T::float_type float_type;
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
T &child = static_cast<T &>(*this);
const std::size_t iN = child.N();
for (std::size_t k = 0; k < iN; k++)
{
terms_for_net_t *terms = m_terms[k].get();
nl_double * Ak = &child.A(k, 0ul);
float_type * Ak = &child.A(k, 0ul);
for (std::size_t i=0; i < iN; i++)
Ak[i] = 0.0;
const std::size_t terms_count = terms->count();
const std::size_t railstart = terms->m_railstart;
const nl_double * const RESTRICT gt = terms->gt();
const float_type * const RESTRICT gt = terms->gt();
{
nl_double akk = 0.0;
float_type akk = 0.0;
for (std::size_t i = 0; i < terms_count; i++)
akk += gt[i];
Ak[k] = akk;
}
const nl_double * const RESTRICT go = terms->go();
const float_type * const RESTRICT go = terms->go();
int * RESTRICT net_other = terms->connected_net_idx();
for (std::size_t i = 0; i < railstart; i++)
@ -258,21 +289,21 @@ void matrix_solver_t::build_LE_A()
}
template <typename T>
void matrix_solver_t::build_LE_RHS()
void matrix_solver_t::build_LE_RHS(T &child)
{
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
T &child = static_cast<T &>(*this);
typedef typename T::float_type float_type;
const std::size_t iN = child.N();
for (std::size_t k = 0; k < iN; k++)
{
nl_double rhsk_a = 0.0;
nl_double rhsk_b = 0.0;
float_type rhsk_a = 0.0;
float_type rhsk_b = 0.0;
const std::size_t terms_count = m_terms[k]->count();
const nl_double * const RESTRICT go = m_terms[k]->go();
const nl_double * const RESTRICT Idr = m_terms[k]->Idr();
const nl_double * const * RESTRICT other_cur_analog = m_terms[k]->connected_net_V();
const float_type * const RESTRICT go = m_terms[k]->go();
const float_type * const RESTRICT Idr = m_terms[k]->Idr();
const float_type * const * RESTRICT other_cur_analog = m_terms[k]->connected_net_V();
for (std::size_t i = 0; i < terms_count; i++)
rhsk_a = rhsk_a + Idr[i];

View File

@ -14,28 +14,21 @@
#include "nld_solver.h"
#include "nld_matrix_solver.h"
#include "vector_base.h"
/* Disabling dynamic allocation gives a ~10% boost in performance
* This flag has been added to support continuous storage for arrays
* going forward in case we implement cuda solvers in the future.
*/
#define NL_USE_DYNAMIC_ALLOCATION (1)
#include "mat_cr.h"
namespace netlist
{
namespace devices
{
//#define nl_ext_double _float128 // slow, very slow
//#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double
namespace devices
{
template <std::size_t m_N, std::size_t storage_N>
template <typename FT, int SIZE>
class matrix_solver_direct_t: public matrix_solver_t
{
friend class matrix_solver_t;
public:
typedef FT float_type;
matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size);
matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name, const eSortType sort, const solver_parameters_t *params, const std::size_t size);
@ -48,36 +41,25 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson);
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
constexpr std::size_t N() const { return (SIZE > 0) ? static_cast<std::size_t>(SIZE) : m_dim; }
void LE_solve();
template <typename T>
void LE_back_subst(T * RESTRICT x);
void LE_back_subst(T & RESTRICT x);
#if (NL_USE_DYNAMIC_ALLOCATION)
nl_ext_double &A(const std::size_t r, const std::size_t c) { return m_A[r * m_pitch + c]; }
nl_ext_double &RHS(const std::size_t r) { return m_A[r * m_pitch + N()]; }
#else
nl_ext_double &A(const std::size_t r, const std::size_t c) { return m_A[r][c]; }
nl_ext_double &RHS(const std::size_t r) { return m_A[r][N()]; }
#endif
nl_double m_last_RHS[storage_N]; // right hand side - contains currents
FT &A(std::size_t r, std::size_t c) { return m_A[r * m_pitch + c]; }
FT &RHS(std::size_t r) { return m_A[r * m_pitch + N()]; }
plib::parray<FT, SIZE> m_last_RHS;
plib::parray<FT, SIZE> m_new_V;
private:
//static const std::size_t m_pitch = (((storage_N + 1) + 0) / 1) * 1;
static constexpr std::size_t m_pitch = (((storage_N + 1) + 7) / 8) * 8;
//static const std::size_t m_pitch = (((storage_N + 1) + 15) / 16) * 16;
//static const std::size_t m_pitch = (((storage_N + 1) + 31) / 32) * 32;
#if (NL_USE_DYNAMIC_ALLOCATION)
//nl_ext_double * RESTRICT m_A;
std::vector<nl_ext_double> m_A;
#else
nl_ext_double m_A[storage_N][m_pitch];
#endif
//nl_ext_double m_RHSx[storage_N];
static constexpr const std::size_t SIZEABS = plib::parray<FT, SIZE>::SIZEABS();
static constexpr const std::size_t m_pitch_ABS = (((SIZEABS + 1) + 7) / 8) * 8;
const std::size_t m_dim;
const std::size_t m_pitch;
plib::parray<FT, SIZE * m_pitch_ABS> m_A;
};
@ -85,16 +67,13 @@ private:
// matrix_solver_direct
// ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t()
template <typename FT, int SIZE>
matrix_solver_direct_t<FT, SIZE>::~matrix_solver_direct_t()
{
#if (NL_USE_DYNAMIC_ALLOCATION)
//plib::pfree_array(m_A);
#endif
}
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <typename FT, int SIZE>
void matrix_solver_direct_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_t::setup_base(nets);
@ -107,34 +86,31 @@ void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
t->m_nzrd.push_back(static_cast<unsigned>(N()));
}
state().save(*this, m_last_RHS, "m_last_RHS");
state().save(*this, m_last_RHS.data(), "m_last_RHS", m_last_RHS.size());
for (std::size_t k = 0; k < N(); k++)
state().save(*this, RHS(k), plib::pfmt("RHS.{1}")(k));
}
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
template <typename FT, int SIZE>
void matrix_solver_direct_t<FT, SIZE>::LE_solve()
{
const std::size_t kN = N();
if (!m_params.m_pivot)
{
for (std::size_t i = 0; i < kN; i++)
{
/* FIXME: Singular matrix? */
nl_double *Ai = &A(i, 0);
const nl_double f = 1.0 / A(i,i);
const FT f = 1.0 / A(i,i);
const auto &nzrd = m_terms[i]->m_nzrd;
const auto &nzbd = m_terms[i]->m_nzbd;
for (std::size_t j : nzbd)
{
nl_double *Aj = &A(j, 0);
const nl_double f1 = -f * Aj[i];
const FT f1 = -f * A(j, i);
for (std::size_t k : nzrd)
Aj[k] += Ai[k] * f1;
A(j, k) += A(i, k) * f1;
//RHS(j) += RHS(i) * f1;
}
}
@ -161,17 +137,17 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
//std::swap(RHS(i), RHS(maxrow));
}
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / A(i,i);
const FT f = 1.0 / A(i,i);
/* Eliminate column i from row j */
for (std::size_t j = i + 1; j < kN; j++)
{
const nl_double f1 = - A(j,i) * f;
const FT f1 = - A(j,i) * f;
if (f1 != NL_FCONST(0.0))
{
const nl_double * RESTRICT pi = &A(i,i+1);
nl_double * RESTRICT pj = &A(j,i+1);
const FT * RESTRICT pi = &A(i,i+1);
FT * RESTRICT pj = &A(j,i+1);
#if 1
vec_add_mult_scalar_p(kN-i,pi,f1,pj);
#else
@ -188,10 +164,10 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
}
}
template <std::size_t m_N, std::size_t storage_N>
template <typename FT, int SIZE>
template <typename T>
void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
T * RESTRICT x)
void matrix_solver_direct_t<FT, SIZE>::LE_back_subst(
T & RESTRICT x)
{
const std::size_t kN = N();
@ -200,7 +176,7 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
{
for (std::size_t j = kN; j-- > 0; )
{
T tmp = 0;
FT tmp = 0;
for (std::size_t k = j+1; k < kN; k++)
tmp += A(j,k) * x[k];
x[j] = (RHS(j) - tmp) / A(j,j);
@ -210,40 +186,33 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
{
for (std::size_t j = kN; j-- > 0; )
{
T tmp = 0;
const auto *p = m_terms[j]->m_nzrd.data();
const auto e = m_terms[j]->m_nzrd.size() - 1; /* exclude RHS element */
T * Aj = &A(j,0);
for (std::size_t k = 0; k < e; k++)
{
const auto pk = p[k];
tmp += Aj[pk] * x[pk];
}
FT tmp = 0;
const auto &nzrd = m_terms[j]->m_nzrd;
const auto e = nzrd.size() - 1; /* exclude RHS element */
for ( std::size_t k = 0; k < e; k++)
tmp += A(j, nzrd[k]) * x[nzrd[k]];
x[j] = (RHS(j) - tmp) / A(j,j);
}
}
}
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
template <typename FT, int SIZE>
unsigned matrix_solver_direct_t<FT, SIZE>::solve_non_dynamic(const bool newton_raphson)
{
nl_double new_V[storage_N]; // = { 0.0 };
this->LE_solve();
this->LE_back_subst(new_V);
this->LE_back_subst(m_new_V);
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
const FT err = (newton_raphson ? delta(m_new_V) : 0.0);
store(m_new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
template <typename FT, int SIZE>
unsigned matrix_solver_direct_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{
build_LE_A<matrix_solver_direct_t>();
build_LE_RHS<matrix_solver_direct_t>();
this->build_LE_A(*this);
this->build_LE_RHS(*this);
for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i);
@ -252,33 +221,33 @@ unsigned matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(const bool n
return this->solve_non_dynamic(newton_raphson);
}
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name,
template <typename FT, int SIZE>
matrix_solver_direct_t<FT, SIZE>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name,
const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, ASCENDING, params)
, m_last_RHS(size)
, m_new_V(size)
, m_dim(size)
, m_pitch(m_pitch_ABS ? m_pitch_ABS : (((m_dim + 1) + 7) / 8) * 8)
, m_A(size * m_pitch)
{
#if (NL_USE_DYNAMIC_ALLOCATION)
m_A.resize(N() * m_pitch);
//m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
#endif
for (unsigned k = 0; k < N(); k++)
{
m_last_RHS[k] = 0.0;
}
}
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name,
template <typename FT, int SIZE>
matrix_solver_direct_t<FT, SIZE>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name,
const eSortType sort, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, sort, params)
, m_last_RHS(size)
, m_new_V(size)
, m_dim(size)
, m_pitch(m_pitch_ABS ? m_pitch_ABS : (((m_dim + 1) + 7) / 8) * 8)
, m_A(size * m_pitch)
{
#if (NL_USE_DYNAMIC_ALLOCATION)
m_A.resize(N() * m_pitch);
//m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
#endif
for (unsigned k = 0; k < N(); k++)
for (std::size_t k = 0; k < N(); k++)
{
m_last_RHS[k] = 0.0;
}

View File

@ -13,37 +13,41 @@
namespace netlist
{
namespace devices
{
class matrix_solver_direct1_t: public matrix_solver_direct_t<1,1>
namespace devices
{
public:
template <typename FT>
class matrix_solver_direct1_t: public matrix_solver_direct_t<FT, 1>
{
public:
typedef FT float_type;
typedef matrix_solver_direct_t<FT, 1> base_type;
matrix_solver_direct1_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params)
: matrix_solver_direct_t<1, 1>(anetlist, name, params, 1)
: matrix_solver_direct_t<FT, 1>(anetlist, name, params, 1)
{}
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
};
// ----------------------------------------------------------------------------------------
// matrix_solver - Direct1
// ----------------------------------------------------------------------------------------
inline unsigned matrix_solver_direct1_t::vsolve_non_dynamic(const bool newton_raphson)
{
build_LE_A<matrix_solver_direct1_t>();
build_LE_RHS<matrix_solver_direct1_t>();
// ----------------------------------------------------------------------------------------
// matrix_solver - Direct1
// ----------------------------------------------------------------------------------------
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override
{
this->build_LE_A(*this);
this->build_LE_RHS(*this);
//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);
nl_double new_V[1] = { RHS(0) / A(0,0) };
FT new_V[1] = { this->RHS(0) / this->A(0,0) };
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
const FT err = (newton_raphson ? this->delta(new_V) : 0.0);
this->store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
}
} //namespace devices
};
} //namespace devices
} // namespace netlist

View File

@ -13,43 +13,48 @@
namespace netlist
{
namespace devices
{
class matrix_solver_direct2_t: public matrix_solver_direct_t<2,2>
namespace devices
{
public:
template <typename FT>
class matrix_solver_direct2_t: public matrix_solver_direct_t<FT, 2>
{
public:
typedef FT float_type;
matrix_solver_direct2_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params)
: matrix_solver_direct_t<2, 2>(anetlist, name, params, 2)
: matrix_solver_direct_t<double, 2>(anetlist, name, params, 2)
{}
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
};
};
// ----------------------------------------------------------------------------------------
// matrix_solver - Direct2
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// matrix_solver - Direct2
// ----------------------------------------------------------------------------------------
inline unsigned matrix_solver_direct2_t::vsolve_non_dynamic(const bool newton_raphson)
{
build_LE_A<matrix_solver_direct2_t>();
build_LE_RHS<matrix_solver_direct2_t>();
template <typename FT>
inline unsigned matrix_solver_direct2_t<FT>::vsolve_non_dynamic(const bool newton_raphson)
{
this->build_LE_A(*this);
this->build_LE_RHS(*this);
const nl_double a = A(0,0);
const nl_double b = A(0,1);
const nl_double c = A(1,0);
const nl_double d = A(1,1);
const float_type a = this->A(0,0);
const float_type b = this->A(0,1);
const float_type c = this->A(1,0);
const float_type d = this->A(1,1);
nl_double new_V[2];
new_V[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c);
new_V[0] = (RHS(0) - b * new_V[1]) / a;
float_type new_V[2];
new_V[1] = (a * this->RHS(1) - c * this->RHS(0)) / (a * d - b * c);
new_V[0] = (this->RHS(0) - b * new_V[1]) / a;
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
const float_type err = (newton_raphson ? this->delta(new_V) : 0.0);
this->store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
}
} //namespace devices
} //namespace devices
} // namespace netlist
#endif /* NLD_MS_DIRECT2_H_ */

View File

@ -21,17 +21,23 @@
namespace netlist
{
namespace devices
{
template <std::size_t m_N, std::size_t storage_N>
namespace devices
{
template <typename FT, int SIZE>
class matrix_solver_GCR_t: public matrix_solver_t
{
public:
// FIXME: dirty hack to make this compile
static constexpr const std::size_t storage_N = 100;
matrix_solver_GCR_t(netlist_base_t &anetlist, const pstring &name,
const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, matrix_solver_t::ASCENDING, params)
, m_dim(size)
, RHS(size)
, new_V(size)
, mat(size)
, m_proc()
{
@ -41,7 +47,7 @@ public:
{
}
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
constexpr std::size_t N() const { return m_dim; }
virtual void vsetup(analog_net_t::list_t &nets) override;
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
@ -51,7 +57,7 @@ public:
private:
//typedef typename mat_cr_t<storage_N>::type mattype;
typedef typename mat_cr_t<storage_N>::index_type mattype;
typedef typename plib::mat_cr_t<FT, SIZE>::index_type mat_index_type;
void csc_private(plib::putf8_fmt_writer &strm);
@ -60,8 +66,12 @@ private:
pstring static_compile_name();
const std::size_t m_dim;
std::vector<unsigned> m_term_cr[storage_N];
mat_cr_t<storage_N> mat;
plib::parray<FT, SIZE> RHS;
plib::parray<FT, SIZE> new_V;
std::vector<FT *> m_term_cr[storage_N];
plib::mat_cr_t<FT, SIZE> mat;
//extsolver m_proc;
plib::dynproc<void, double * RESTRICT, double * RESTRICT, double * RESTRICT> m_proc;
@ -72,166 +82,89 @@ private:
// matrix_solver - GCR
// ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <typename FT, int SIZE>
void matrix_solver_GCR_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{
setup_base(nets);
mattype nz = 0;
const std::size_t iN = this->N();
/* build the final matrix */
bool touched[storage_N][storage_N] = { { false } };
std::vector<std::vector<unsigned>> fill(iN);
std::size_t raw_elements = 0;
for (std::size_t k = 0; k < iN; k++)
{
fill[k].resize(iN, decltype(mat)::FILL_INFINITY);
for (auto &j : this->m_terms[k]->m_nz)
touched[k][j] = true;
{
fill[k][j] = 0;
raw_elements++;
}
unsigned fc = 0;
}
unsigned ops = 0;
auto gr = mat.gaussian_extend_fill_mat(fill);
for (std::size_t k = 0; k < iN; k++)
{
ops++; // 1/A(k,k)
for (std::size_t row = k + 1; row < iN; row++)
unsigned fm = 0;
pstring ml = "";
for (std::size_t j = 0; j < iN; j++)
{
if (touched[row][k])
{
ops++;
fc++;
for (std::size_t col = k + 1; col < iN; col++)
if (touched[k][col])
{
touched[row][col] = true;
ops += 2;
}
}
ml += fill[k][j] < decltype(mat)::FILL_INFINITY ? "X" : "_";
if (fill[k][j] < decltype(mat)::FILL_INFINITY)
if (fill[k][j] > fm)
fm = fill[k][j];
}
this->log().verbose("{1:4} {2} {3:4}", k, ml, fm);
}
for (mattype k=0; k<iN; k++)
{
mat.ia[k] = nz;
mat.build_from_fill_mat(fill);
for (mattype j=0; j<iN; j++)
for (mat_index_type k=0; k<iN; k++)
{
if (touched[k][j])
{
mat.ja[nz] = j;
if (j == k)
mat.diag[k] = nz;
nz++;
}
}
m_term_cr[k].clear();
/* build pointers into the compressed row format matrix for each terminal */
for (std::size_t j=0; j< this->m_terms[k]->m_railstart;j++)
{
int other = this->m_terms[k]->connected_net_idx()[j];
for (auto i = mat.ia[k]; i < nz; i++)
if (other == static_cast<int>(mat.ja[i]))
for (auto i = mat.row_idx[k]; i < mat.row_idx[k+1]; i++)
if (other == static_cast<int>(mat.col_idx[i]))
{
m_term_cr[k].push_back(i);
m_term_cr[k].push_back(&mat.A[i]);
break;
}
}
nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart);
m_term_cr[k].push_back(&mat.A[mat.diag[k]]);
}
mat.ia[iN] = nz;
mat.nz_num = nz;
this->log().verbose("Ops: {1} Occupancy ratio: {2}\n", ops,
static_cast<double>(nz) / static_cast<double>(iN * iN));
this->log().verbose("maximum fill: {1}", gr.first);
this->log().verbose("Post elimination occupancy ratio: {2} Ops: {1}", gr.second,
static_cast<double>(mat.nz_num) / static_cast<double>(iN * iN));
this->log().verbose(" Pre elimination occupancy ratio: {2}",
static_cast<double>(raw_elements) / static_cast<double>(iN * iN));
// FIXME: Move me
if (state().lib().isLoaded())
{
pstring symname = static_compile_name();
#if 0
m_proc = this->netlist().lib().template getsym<extsolver>(symname);
if (m_proc != nullptr)
this->log().verbose("External static solver {1} found ...", symname);
else
this->log().warning("External static solver {1} not found ...", symname);
#else
m_proc.load(this->state().lib(), symname);
if (m_proc.resolved())
this->log().warning("External static solver {1} found ...", symname);
else
this->log().warning("External static solver {1} not found ...", symname);
#endif
}
}
#if 0
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &strm)
{
const std::size_t iN = N();
for (std::size_t i = 0; i < iN - 1; i++)
{
const auto &nzbd = this->m_terms[i]->m_nzbd;
if (nzbd.size() > 0)
{
std::size_t pi = mat.diag[i];
//const nl_double f = 1.0 / m_A[pi++];
strm("const double f{1} = 1.0 / m_A[{2}];\n", i, pi);
pi++;
const std::size_t piie = mat.ia[i+1];
//for (auto & j : nzbd)
for (std::size_t j : nzbd)
{
// proceed to column i
std::size_t pj = mat.ia[j];
while (mat.ja[pj] < i)
pj++;
//const nl_double f1 = - m_A[pj++] * f;
strm("\tconst double f{1}_{2} = -f{3} * m_A[{4}];\n", i, j, i, pj);
pj++;
// subtract row i from j */
for (std::size_t pii = pi; pii<piie; )
{
while (mat.ja[pj] < mat.ja[pii])
pj++;
//m_A[pj++] += m_A[pii++] * f1;
strm("\tm_A[{1}] += m_A[{2}] * f{3}_{4};\n", pj, pii, i, j);
pj++; pii++;
}
//RHS[j] += f1 * RHS[i];
strm("\tRHS[{1}] += f{2}_{3} * RHS[{4}];\n", j, i, j, i);
}
}
}
//new_V[iN - 1] = RHS[iN - 1] / mat.A[mat.diag[iN - 1]];
strm("\tV[{1}] = RHS[{2}] / m_A[{3}];\n", iN - 1, iN - 1, mat.diag[iN - 1]);
for (std::size_t j = iN - 1; j-- > 0;)
{
strm("\tdouble tmp{1} = 0.0;\n", j);
const std::size_t e = mat.ia[j+1];
for (std::size_t pk = mat.diag[j] + 1; pk < e; pk++)
{
strm("\ttmp{1} += m_A[{2}] * V[{3}];\n", j, pk, mat.ja[pk]);
}
strm("\tV[{1}] = (RHS[{1}] - tmp{1}) / m_A[{4}];\n", j, j, j, mat.diag[j]);
}
}
#else
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &strm)
template <typename FT, int SIZE>
void matrix_solver_GCR_t<FT, SIZE>::csc_private(plib::putf8_fmt_writer &strm)
{
const std::size_t iN = N();
@ -246,28 +179,28 @@ void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &st
{
std::size_t pi = mat.diag[i];
//const nl_double f = 1.0 / m_A[pi++];
//const FT f = 1.0 / m_A[pi++];
strm("const double f{1} = 1.0 / m_A{2};\n", i, pi);
pi++;
const std::size_t piie = mat.ia[i+1];
const std::size_t piie = mat.row_idx[i+1];
//for (auto & j : nzbd)
for (std::size_t j : nzbd)
{
// proceed to column i
std::size_t pj = mat.ia[j];
std::size_t pj = mat.row_idx[j];
while (mat.ja[pj] < i)
while (mat.col_idx[pj] < i)
pj++;
//const nl_double f1 = - m_A[pj++] * f;
//const FT f1 = - m_A[pj++] * f;
strm("\tconst double f{1}_{2} = -f{3} * m_A{4};\n", i, j, i, pj);
pj++;
// subtract row i from j */
for (std::size_t pii = pi; pii<piie; )
{
while (mat.ja[pj] < mat.ja[pii])
while (mat.col_idx[pj] < mat.col_idx[pii])
pj++;
//m_A[pj++] += m_A[pii++] * f1;
strm("\tm_A{1} += m_A{2} * f{3}_{4};\n", pj, pii, i, j);
@ -284,18 +217,17 @@ void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &st
for (std::size_t j = iN - 1; j-- > 0;)
{
strm("\tdouble tmp{1} = 0.0;\n", j);
const std::size_t e = mat.ia[j+1];
const std::size_t e = mat.row_idx[j+1];
for (std::size_t pk = mat.diag[j] + 1; pk < e; pk++)
{
strm("\ttmp{1} += m_A{2} * V[{3}];\n", j, pk, mat.ja[pk]);
strm("\ttmp{1} += m_A{2} * V[{3}];\n", j, pk, mat.col_idx[pk]);
}
strm("\tV[{1}] = (RHS[{1}] - tmp{1}) / m_A{4};\n", j, j, j, mat.diag[j]);
}
}
#endif
template <std::size_t m_N, std::size_t storage_N>
pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name()
template <typename FT, int SIZE>
pstring matrix_solver_GCR_t<FT, SIZE>::static_compile_name()
{
plib::postringstream t;
plib::putf8_fmt_writer w(&t);
@ -305,8 +237,8 @@ pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name()
return plib::pfmt("nl_gcr_{1:x}_{2}")(h( t.str() ))(mat.nz_num);
}
template <std::size_t m_N, std::size_t storage_N>
std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_code()
template <typename FT, int SIZE>
std::pair<pstring, pstring> matrix_solver_GCR_t<FT, SIZE>::create_solver_code()
{
plib::postringstream t;
plib::putf8_fmt_writer strm(&t);
@ -320,69 +252,16 @@ std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_c
}
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
template <typename FT, int SIZE>
unsigned matrix_solver_GCR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{
const std::size_t iN = this->N();
nl_double RHS[storage_N];
nl_double new_V[storage_N];
mat.set_scalar(0.0);
/* populate matrix */
for (std::size_t k = 0; k < iN; k++)
{
terms_for_net_t *t = this->m_terms[k].get();
nl_double gtot_t = 0.0;
nl_double RHS_t = 0.0;
const std::size_t term_count = t->count();
const std::size_t railstart = t->m_railstart;
const nl_double * const RESTRICT gt = t->gt();
const nl_double * const RESTRICT go = t->go();
const nl_double * const RESTRICT Idr = t->Idr();
const nl_double * const * RESTRICT other_cur_analog = t->connected_net_V();
const unsigned * const RESTRICT tcr = m_term_cr[k].data();
#if 0
for (std::size_t i = 0; i < term_count; i++)
{
gtot_t += gt[i];
RHS_t += Idr[i];
}
for (std::size_t i = railstart; i < term_count; i++)
RHS_t += go[i] * *other_cur_analog[i];
RHS[k] = RHS_t;
// add diagonal element
mat.A[mat.diag[k]] = gtot_t;
for (std::size_t i = 0; i < railstart; i++)
mat.A[tcr[i]] -= go[i];
}
#else
for (std::size_t i = 0; i < railstart; i++)
mat.A[tcr[i]] -= go[i];
for (std::size_t i = 0; i < railstart; i++)
{
gtot_t += gt[i];
RHS_t += Idr[i];
}
for (std::size_t i = railstart; i < term_count; i++)
{
RHS_t += (Idr[i] + go[i] * *other_cur_analog[i]);
gtot_t += gt[i];
}
RHS[k] = RHS_t;
mat.A[mat.diag[k]] += gtot_t;
}
#endif
mat.ia[iN] = static_cast<mattype>(mat.nz_num);
this->m_terms[k]->fill_matrix(m_term_cr[k], RHS[k]);
/* now solve it */
@ -394,63 +273,14 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
}
else
{
for (std::size_t i = 0; i < iN - 1; i++)
{
const auto &nzbd = this->m_terms[i]->m_nzbd;
if (nzbd.size() > 0)
{
std::size_t pi = mat.diag[i];
const nl_double f = 1.0 / mat.A[pi++];
const std::size_t piie = mat.ia[i+1];
for (std::size_t j : nzbd) // for (std::size_t j = i + 1; j < iN; j++)
{
// proceed to column i
//__builtin_prefetch(&m_A[mat.diag[j+1]], 1);
std::size_t pj = mat.ia[j];
while (mat.ja[pj] < i)
pj++;
const nl_double f1 = - mat.A[pj++] * f;
// subtract row i from j */
for (std::size_t pii = pi; pii<piie; )
{
while (mat.ja[pj] < mat.ja[pii])
pj++;
mat.A[pj++] += mat.A[pii++] * f1;
}
RHS[j] += f1 * RHS[i];
}
}
}
/* backward substitution
*
*/
/* row n-1 */
new_V[iN - 1] = RHS[iN - 1] / mat.A[mat.diag[iN - 1]];
for (std::size_t j = iN - 1; j-- > 0;)
{
//__builtin_prefetch(&new_V[j-1], 1);
//if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0);
double tmp = 0;
auto jdiag = mat.diag[j];
const std::size_t e = mat.ia[j+1];
for (std::size_t pk = jdiag + 1; pk < e; pk++)
{
tmp += mat.A[pk] * new_V[mat.ja[pk]];
}
new_V[j] = (RHS[j] - tmp) / mat.A[jdiag];
}
mat.gaussian_elimination(RHS);
/* backward substitution */
mat.gaussian_back_substitution(new_V, RHS);
}
this->m_stat_calculations++;
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
const FT err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}

View File

@ -15,6 +15,7 @@
#include <algorithm>
#include <cmath>
#include "../plib/parray.h"
#include "mat_cr.h"
#include "nld_ms_direct.h"
#include "nld_solver.h"
@ -24,17 +25,34 @@ namespace netlist
{
namespace devices
{
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_GMRES_t: public matrix_solver_direct_t<m_N, storage_N>
template <typename FT, int SIZE>
class matrix_solver_GMRES_t: public matrix_solver_direct_t<FT, SIZE>
{
public:
typedef FT float_type;
// FIXME: dirty hack to make this compile
static constexpr const std::size_t storage_N = plib::sizeabs<FT, SIZE>::ABS();
// Maximum iterations before a restart ...
static constexpr const std::size_t restart_N = (storage_N > 0 ? 20 : 0);
/* Sort rows in ascending order. This should minimize fill-in and thus
* maximize the efficiency of the incomplete LUT.
*/
matrix_solver_GMRES_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
: matrix_solver_direct_t<FT, SIZE>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
, m_use_iLU_preconditioning(true)
, m_use_more_precise_stop_condition(false)
, m_use_more_precise_stop_condition(true)
, m_ILU_scale(0)
, m_accuracy_mult(1.0)
, m_term_cr(size)
, mat(size)
, residual(size)
, Ax(size)
, m_LU(size)
//, m_v(size)
{
}
@ -48,26 +66,32 @@ public:
private:
//typedef typename mat_cr_t<storage_N>::type mattype;
typedef typename mat_cr_t<storage_N>::index_type mattype;
typedef typename plib::mat_cr_t<FT, SIZE>::index_type mattype;
unsigned solve_ilu_gmres(nl_double (& RESTRICT x)[storage_N], const nl_double (& RESTRICT rhs)[storage_N], const unsigned restart_max, std::size_t mr, nl_double accuracy);
template <typename VT, typename VRHS>
std::size_t solve_ilu_gmres(VT &x, const VRHS & rhs, const std::size_t restart_max, std::size_t mr, float_type accuracy);
std::vector<unsigned> m_term_cr[storage_N];
bool m_use_iLU_preconditioning;
bool m_use_more_precise_stop_condition;
nl_double m_accuracy_mult; // FXIME: Save state
std::size_t m_ILU_scale;
float_type m_accuracy_mult; // FXIME: Save state
mat_cr_t<storage_N> mat;
plib::parray<std::vector<FT *>, SIZE> m_term_cr;
plib::mat_cr_t<float_type, SIZE> mat;
plib::parray<float_type, SIZE> residual;
plib::parray<float_type, SIZE> Ax;
nl_double m_LU[storage_N * storage_N];
plib::mat_cr_t<float_type, SIZE> m_LU;
nl_double m_c[storage_N + 1]; /* mr + 1 */
nl_double m_g[storage_N + 1]; /* mr + 1 */
nl_double m_ht[storage_N + 1][storage_N]; /* (mr + 1), mr */
nl_double m_s[storage_N + 1]; /* mr + 1 */
nl_double m_v[storage_N + 1][storage_N]; /*(mr + 1), n */
nl_double m_y[storage_N + 1]; /* mr + 1 */
float_type m_c[restart_N + 1]; /* mr + 1 */
float_type m_g[restart_N + 1]; /* mr + 1 */
float_type m_ht[restart_N + 1][restart_N]; /* (mr + 1), mr */
float_type m_s[restart_N + 1]; /* mr + 1 */
float_type m_y[restart_N + 1]; /* mr + 1 */
//plib::parray<float_type, SIZE> m_v[restart_N + 1]; /* mr + 1, n */
float_type m_v[restart_N + 1][storage_N]; /* mr + 1, n */
};
@ -75,106 +99,72 @@ private:
// matrix_solver - GMRES
// ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GMRES_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <typename FT, int SIZE>
void matrix_solver_GMRES_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
matrix_solver_direct_t<FT, SIZE>::vsetup(nets);
mattype nz = 0;
const std::size_t iN = this->N();
std::vector<std::vector<unsigned>> fill(iN);
for (std::size_t k=0; k<iN; k++)
{
fill[k].resize(iN, decltype(mat)::FILL_INFINITY);
terms_for_net_t * RESTRICT row = this->m_terms[k].get();
mat.ia[k] = nz;
for (std::size_t j=0; j<row->m_nz.size(); j++)
{
mat.ja[nz] = static_cast<mattype>(row->m_nz[j]);
if (row->m_nz[j] == k)
mat.diag[k] = nz;
nz++;
fill[k][static_cast<mattype>(row->m_nz[j])] = 0;
}
}
mat.build_from_fill_mat(fill, 0);
m_LU.gaussian_extend_fill_mat(fill);
m_LU.build_from_fill_mat(fill, m_ILU_scale); // ILU(2)
//m_LU.build_from_fill_mat(fill, 9999, 20); // Band matrix width 20
/* build pointers into the compressed row format matrix for each terminal */
for (unsigned j=0; j< this->m_terms[k]->m_railstart;j++)
for (std::size_t k=0; k<iN; k++)
{
for (unsigned i = mat.ia[k]; i<nz; i++)
if (this->m_terms[k]->connected_net_idx()[j] == static_cast<int>(mat.ja[i]))
for (std::size_t j=0; j< this->m_terms[k]->m_railstart;j++)
{
m_term_cr[k].push_back(i);
for (std::size_t i = mat.row_idx[k]; i<mat.row_idx[k+1]; i++)
if (this->m_terms[k]->connected_net_idx()[j] == static_cast<int>(mat.col_idx[i]))
{
m_term_cr[k].push_back(&mat.A[i]);
break;
}
}
nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart);
m_term_cr[k].push_back(&mat.A[mat.diag[k]]);
}
mat.ia[iN] = nz;
mat.nz_num = nz;
}
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
template <typename FT, int SIZE>
unsigned matrix_solver_GMRES_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{
const std::size_t iN = this->N();
/* ideally, we could get an estimate for the spectral radius of
* Inv(D - L) * U
*
* and estimate using
*
* omega = 2.0 / (1.0 + std::sqrt(1-rho))
*/
//nz_num = 0;
nl_double RHS[storage_N];
nl_double new_V[storage_N];
plib::parray<FT, SIZE> RHS(iN);
//float_type new_V[storage_N];
mat.set_scalar(0.0);
/* populate matrix and V for first estimate */
for (std::size_t k = 0; k < iN; k++)
{
nl_double gtot_t = 0.0;
nl_double RHS_t = 0.0;
const std::size_t term_count = this->m_terms[k]->count();
const std::size_t railstart = this->m_terms[k]->m_railstart;
const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
const nl_double * const RESTRICT go = this->m_terms[k]->go();
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
const nl_double * const * RESTRICT other_cur_analog = this->m_terms[k]->connected_net_V();
for (std::size_t i = 0; i < term_count; i++)
{
gtot_t = gtot_t + gt[i];
RHS_t = RHS_t + Idr[i];
this->m_terms[k]->fill_matrix(m_term_cr[k], RHS[k]);
this->m_new_V[k] = this->m_nets[k]->Q_Analog();
}
for (std::size_t i = railstart; i < term_count; i++)
RHS_t = RHS_t + go[i] * *other_cur_analog[i];
RHS[k] = RHS_t;
// add diagonal element
mat.A[mat.diag[k]] = gtot_t;
for (std::size_t i = 0; i < railstart; i++)
{
const std::size_t pi = m_term_cr[k][i];
mat.A[pi] -= go[i];
}
new_V[k] = this->m_nets[k]->Q_Analog();
}
mat.ia[iN] = static_cast<mattype>(mat.nz_num);
const nl_double accuracy = this->m_params.m_accuracy;
//mat.row_idx[iN] = static_cast<mattype>(mat.nz_num);
const float_type accuracy = this->m_params.m_accuracy;
const std::size_t mr = (iN > 3 ) ? static_cast<std::size_t>(std::sqrt(iN) * 2.0) : iN;
unsigned iter = std::max(1u, this->m_params.m_gs_loops);
unsigned gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy);
std::size_t iter = std::max(1u, this->m_params.m_gs_loops);
std::size_t gsl = solve_ilu_gmres(this->m_new_V, RHS, iter, mr, accuracy);
const std::size_t failed = mr * iter;
this->m_iterative_total += gsl;
@ -183,26 +173,29 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
if (gsl >= failed)
{
this->m_iterative_fail++;
return matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(newton_raphson);
return matrix_solver_direct_t<FT, SIZE>::vsolve_non_dynamic(newton_raphson);
}
const nl_double err = (newton_raphson ? this->delta(new_V) : 0.0);
this->store(new_V);
//if (newton_raphson)
// printf("%e %e\n", this->delta(this->m_new_V), this->m_params.m_accuracy);
const float_type err = (newton_raphson ? this->delta(this->m_new_V) : 0.0);
this->store(this->m_new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
template <typename T>
inline void givens_mult( const T c, const T s, T & g0, T & g1 )
{
const T tg0 = c * g0 - s * g1;
const T tg1 = s * g0 + c * g1;
const T g0_last(g0);
g0 = tg0;
g1 = tg1;
g0 = c * g0 - s * g1;
g1 = s * g0_last + c * g1;
}
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RESTRICT x)[storage_N], const nl_double (& RESTRICT rhs)[storage_N], const unsigned restart_max, std::size_t mr, nl_double accuracy)
template <typename FT, int SIZE>
template <typename VT, typename VRHS>
std::size_t matrix_solver_GMRES_t<FT, SIZE>::solve_ilu_gmres (VT &x, const VRHS &rhs, const std::size_t restart_max, std::size_t mr, float_type accuracy)
{
/*-------------------------------------------------------------------------
* The code below was inspired by code published by John Burkardt under
@ -226,15 +219,22 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
*
*------------------------------------------------------------------------*/
unsigned itr_used = 0;
std::size_t itr_used = 0;
double rho_delta = 0.0;
const std::size_t n = this->N();
if (mr > restart_N) mr = restart_N;
if (mr > n) mr = n;
if (m_use_iLU_preconditioning)
mat.incomplete_LU_factorization(m_LU);
{
if (m_ILU_scale < 1)
m_LU.raw_copy_from(mat);
else
m_LU.reduction_copy_from(mat);
m_LU.incomplete_LU_factorization();
}
if (m_use_more_precise_stop_condition)
{
@ -249,27 +249,22 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
* differently: The invest doesn't pay off.
* Therefore we use the approach in the else part.
*/
nl_double t[storage_N];
nl_double Ax[storage_N];
vec_set(n, accuracy, t);
mat.mult_vec(t, Ax);
vec_set_scalar(n, residual, accuracy);
mat.mult_vec(residual, Ax);
mat.solveLUx(m_LU, Ax);
m_LU.solveLUx(Ax);
const nl_double rho_to_accuracy = std::sqrt(vec_mult2(n, Ax)) / accuracy;
const float_type rho_to_accuracy = std::sqrt(vec_mult2<FT>(n, Ax)) / accuracy;
rho_delta = accuracy * rho_to_accuracy;
}
else
rho_delta = accuracy * std::sqrt(n) * m_accuracy_mult;
rho_delta = accuracy * std::sqrt(static_cast<FT>(n)) * m_accuracy_mult;
for (unsigned itr = 0; itr < restart_max; itr++)
for (std::size_t itr = 0; itr < restart_max; itr++)
{
std::size_t last_k = mr;
nl_double rho;
nl_double Ax[storage_N];
nl_double residual[storage_N];
float_type rho;
mat.mult_vec(x, Ax);
@ -277,19 +272,19 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
if (m_use_iLU_preconditioning)
{
mat.solveLUx(m_LU, residual);
m_LU.solveLUx(residual);
}
rho = std::sqrt(vec_mult2(n, residual));
rho = std::sqrt(vec_mult2<FT>(n, residual));
if (rho < rho_delta)
return itr_used + 1;
vec_set(mr+1, NL_FCONST(0.0), m_g);
vec_set_scalar(mr+1, m_g, NL_FCONST(0.0));
m_g[0] = rho;
for (std::size_t i = 0; i < mr + 1; i++)
vec_set(mr, NL_FCONST(0.0), m_ht[i]);
vec_set_scalar(mr, m_ht[i], NL_FCONST(0.0));
vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]);
@ -300,14 +295,14 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
mat.mult_vec(m_v[k], m_v[k1]);
if (m_use_iLU_preconditioning)
mat.solveLUx(m_LU, m_v[k1]);
m_LU.solveLUx(m_v[k1]);
for (std::size_t j = 0; j <= k; j++)
{
m_ht[j][k] = vec_mult(n, m_v[k1], m_v[j]);
m_ht[j][k] = vec_mult<float_type>(n, m_v[k1], m_v[j]);
vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]);
}
m_ht[k1][k] = std::sqrt(vec_mult2(n, m_v[k1]));
m_ht[k1][k] = std::sqrt(vec_mult2<FT>(n, m_v[k1]));
if (m_ht[k1][k] != 0.0)
vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]);
@ -315,7 +310,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
for (std::size_t j = 0; j < k; j++)
givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]);
const nl_double mu = 1.0 / std::hypot(m_ht[k][k], m_ht[k1][k]);
const float_type mu = 1.0 / std::hypot(m_ht[k][k], m_ht[k1][k]);
m_c[k] = m_ht[k][k] * mu;
m_s[k] = -m_ht[k1][k] * mu;
@ -345,36 +340,17 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
{
double tmp = m_g[i];
for (std::size_t j = i + 1; j <= last_k; j++)
{
tmp -= m_ht[i][j] * m_y[j];
}
m_y[i] = tmp / m_ht[i][i];
}
for (std::size_t i = 0; i <= last_k; i++)
vec_add_mult_scalar(n, m_v[i], m_y[i], x);
#if 1
if (rho <= rho_delta)
{
break;
}
#else
/* we try to approximate the x difference between to steps using m_v[last_k] */
double xdelta = m_y[last_k] * vec_maxabs(n, m_v[last_k]);
if (xdelta < accuracy)
{
if (m_accuracy_mult < 16384.0)
m_accuracy_mult = m_accuracy_mult * 2.0;
break;
}
else
m_accuracy_mult = m_accuracy_mult / 2.0;
#endif
}
return itr_used;
}

View File

@ -41,19 +41,21 @@
namespace netlist
{
namespace devices
{
//#define nl_ext_double _float128 // slow, very slow
//#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double
namespace devices
{
template <std::size_t m_N, std::size_t storage_N>
template <typename FT, int SIZE>
class matrix_solver_sm_t: public matrix_solver_t
{
friend class matrix_solver_t;
public:
typedef FT float_ext_type;
typedef FT float_type;
// FIXME: dirty hack to make this compile
static constexpr const std::size_t storage_N = 100;
matrix_solver_sm_t(netlist_base_t &anetlist, const pstring &name,
const solver_parameters_t *params, const std::size_t size);
@ -66,7 +68,7 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson);
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
constexpr std::size_t N() const { return m_dim; }
void LE_invert();
@ -75,33 +77,33 @@ protected:
template <typename T1, typename T2>
nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
float_ext_type &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
template <typename T1, typename T2>
nl_ext_double &W(const T1 &r, const T2 &c) { return m_W[r][c]; }
float_ext_type &W(const T1 &r, const T2 &c) { return m_W[r][c]; }
template <typename T1, typename T2>
nl_ext_double &Ainv(const T1 &r, const T2 &c) { return m_Ainv[r][c]; }
float_ext_type &Ainv(const T1 &r, const T2 &c) { return m_Ainv[r][c]; }
template <typename T1>
nl_ext_double &RHS(const T1 &r) { return m_RHS[r]; }
float_ext_type &RHS(const T1 &r) { return m_RHS[r]; }
template <typename T1, typename T2>
nl_ext_double &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; }
float_ext_type &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; }
template <typename T1, typename T2>
nl_ext_double &lAinv(const T1 &r, const T2 &c) { return m_lAinv[r][c]; }
float_ext_type &lAinv(const T1 &r, const T2 &c) { return m_lAinv[r][c]; }
nl_double m_last_RHS[storage_N]; // right hand side - contains currents
float_type m_last_RHS[storage_N]; // right hand side - contains currents
private:
static constexpr std::size_t m_pitch = ((( storage_N) + 7) / 8) * 8;
nl_ext_double m_A[storage_N][m_pitch];
nl_ext_double m_Ainv[storage_N][m_pitch];
nl_ext_double m_W[storage_N][m_pitch];
nl_ext_double m_RHS[storage_N]; // right hand side - contains currents
float_ext_type m_A[storage_N][m_pitch];
float_ext_type m_Ainv[storage_N][m_pitch];
float_ext_type m_W[storage_N][m_pitch];
float_ext_type m_RHS[storage_N]; // right hand side - contains currents
nl_ext_double m_lA[storage_N][m_pitch];
nl_ext_double m_lAinv[storage_N][m_pitch];
float_ext_type m_lA[storage_N][m_pitch];
float_ext_type m_lAinv[storage_N][m_pitch];
//nl_ext_double m_RHSx[storage_N];
//float_ext_type m_RHSx[storage_N];
const std::size_t m_dim;
std::size_t m_cnt;
@ -112,13 +114,13 @@ private:
// matrix_solver_direct
// ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t()
template <typename FT, int SIZE>
matrix_solver_sm_t<FT, SIZE>::~matrix_solver_sm_t()
{
}
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <typename FT, int SIZE>
void matrix_solver_sm_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_t::setup_base(nets);
@ -130,8 +132,8 @@ void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
template <typename FT, int SIZE>
void matrix_solver_sm_t<FT, SIZE>::LE_invert()
{
const std::size_t kN = N();
@ -148,7 +150,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
for (std::size_t i = 0; i < kN; i++)
{
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i);
const float_type f = 1.0 / W(i,i);
const auto * RESTRICT const p = m_terms[i]->m_nzrd.data();
const std::size_t e = m_terms[i]->m_nzrd.size();
@ -159,7 +161,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
for (std::size_t jb = 0; jb < eb; jb++)
{
const unsigned j = pb[jb];
const nl_double f1 = - W(j,i) * f;
const float_type f1 = - W(j,i) * f;
if (f1 != 0.0)
{
for (std::size_t k = 0; k < e; k++)
@ -173,10 +175,10 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
for (std::size_t i = kN; i-- > 0; )
{
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i);
const float_type f = 1.0 / W(i,i);
for (std::size_t j = i; j-- > 0; )
{
const nl_double f1 = - W(j,i) * f;
const float_type f1 = - W(j,i) * f;
if (f1 != 0.0)
{
for (std::size_t k = i; k < kN; k++)
@ -193,9 +195,9 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
}
}
template <std::size_t m_N, std::size_t storage_N>
template <typename FT, int SIZE>
template <typename T>
void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x(
void matrix_solver_sm_t<FT, SIZE>::LE_compute_x(
T * RESTRICT x)
{
const std::size_t kN = N();
@ -205,7 +207,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x(
for (std::size_t k=0; k<kN; k++)
{
const nl_double f = RHS(k);
const float_type f = RHS(k);
for (std::size_t i=0; i<kN; i++)
x[i] += Ainv(i,k) * f;
@ -213,13 +215,13 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x(
}
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
template <typename FT, int SIZE>
unsigned matrix_solver_sm_t<FT, SIZE>::solve_non_dynamic(const bool newton_raphson)
{
static constexpr const bool incremental = true;
const std::size_t iN = N();
nl_double new_V[storage_N]; // = { 0.0 };
float_type new_V[storage_N]; // = { 0.0 };
if ((m_cnt % 50) == 0)
{
@ -236,7 +238,7 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
}
for (std::size_t row = 0; row < iN; row ++)
{
nl_double v[m_pitch] = {0};
float_type v[m_pitch] = {0};
std::size_t cols[m_pitch];
std::size_t colcount = 0;
@ -252,10 +254,10 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
if (colcount > 0)
{
nl_double lamba = 0.0;
nl_double w[m_pitch] = {0};
float_type lamba = 0.0;
float_type w[m_pitch] = {0};
nl_double z[m_pitch];
float_type z[m_pitch];
/* compute w and lamba */
for (std::size_t i = 0; i < iN; i++)
z[i] = Ainv(i, row); /* u is row'th column */
@ -266,7 +268,7 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
for (std::size_t j=0; j<colcount; j++)
{
std::size_t col = cols[j];
nl_double f = v[col];
float_type f = v[col];
for (std::size_t k = 0; k < iN; k++)
w[k] += Ainv(col,k) * f; /* Transpose(Ainv) * v */
}
@ -274,7 +276,7 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
lamba = -1.0 / (1.0 + lamba);
for (std::size_t i=0; i<iN; i++)
{
const nl_double f = lamba * z[i];
const float_type f = lamba * z[i];
if (f != 0.0)
for (std::size_t k = 0; k < iN; k++)
Ainv(i,k) += f * w[k];
@ -288,16 +290,16 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
this->LE_compute_x(new_V);
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
const float_type err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_sm_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
template <typename FT, int SIZE>
unsigned matrix_solver_sm_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{
build_LE_A<matrix_solver_sm_t>();
build_LE_RHS<matrix_solver_sm_t>();
this->build_LE_A(*this);
this->build_LE_RHS(*this);
for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i);
@ -306,8 +308,8 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::vsolve_non_dynamic(const bool newto
return this->solve_non_dynamic(newton_raphson);
}
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_sm_t<m_N, storage_N>::matrix_solver_sm_t(netlist_base_t &anetlist, const pstring &name,
template <typename FT, int SIZE>
matrix_solver_sm_t<FT, SIZE>::matrix_solver_sm_t(netlist_base_t &anetlist, const pstring &name,
const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, NOSORT, params)
, m_dim(size)

View File

@ -20,15 +20,22 @@
namespace netlist
{
namespace devices
{
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_SOR_t: public matrix_solver_direct_t<m_N, storage_N>
{
template <typename FT, int SIZE>
class matrix_solver_SOR_t: public matrix_solver_direct_t<FT, SIZE>
{
public:
typedef FT float_type;
matrix_solver_SOR_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
: matrix_solver_direct_t<FT, SIZE>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
, m_lp_fact(*this, "m_lp_fact", 0)
, w(size, 0.0)
, one_m_w(size, 0.0)
, RHS(size, 0.0)
, new_V(size, 0.0)
{
}
@ -38,7 +45,11 @@ public:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
private:
state_var<nl_double> m_lp_fact;
state_var<float_type> m_lp_fact;
std::vector<float_type> w;
std::vector<float_type> one_m_w;
std::vector<float_type> RHS;
std::vector<float_type> new_V;
};
// ----------------------------------------------------------------------------------------
@ -46,14 +57,14 @@ private:
// ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_SOR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <typename FT, int SIZE>
void matrix_solver_SOR_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
matrix_solver_direct_t<FT, SIZE>::vsetup(nets);
}
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
template <typename FT, int SIZE>
unsigned matrix_solver_SOR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{
const std::size_t iN = this->N();
bool resched = false;
@ -67,24 +78,19 @@ unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
* omega = 2.0 / (1.0 + std::sqrt(1-rho))
*/
const nl_double ws = this->m_params.m_gs_sor;
nl_double w[storage_N];
nl_double one_m_w[storage_N];
nl_double RHS[storage_N];
nl_double new_V[storage_N];
const float_type ws = this->m_params.m_gs_sor;
for (std::size_t k = 0; k < iN; k++)
{
nl_double gtot_t = 0.0;
nl_double gabs_t = 0.0;
nl_double RHS_t = 0.0;
float_type gtot_t = 0.0;
float_type gabs_t = 0.0;
float_type RHS_t = 0.0;
const std::size_t term_count = this->m_terms[k]->count();
const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
const nl_double * const RESTRICT go = this->m_terms[k]->go();
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
const nl_double * const *other_cur_analog = this->m_terms[k]->connected_net_V();
const float_type * const RESTRICT gt = this->m_terms[k]->gt();
const float_type * const RESTRICT go = this->m_terms[k]->go();
const float_type * const RESTRICT Idr = this->m_terms[k]->Idr();
const float_type * const *other_cur_analog = this->m_terms[k]->connected_net_V();
new_V[k] = this->m_nets[k]->Q_Analog();
@ -123,22 +129,22 @@ unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
}
}
const nl_double accuracy = this->m_params.m_accuracy;
const float_type accuracy = this->m_params.m_accuracy;
do {
resched = false;
nl_double err = 0;
float_type err = 0;
for (std::size_t k = 0; k < iN; k++)
{
const int * RESTRICT net_other = this->m_terms[k]->connected_net_idx();
const std::size_t railstart = this->m_terms[k]->m_railstart;
const nl_double * RESTRICT go = this->m_terms[k]->go();
const float_type * RESTRICT go = this->m_terms[k]->go();
nl_double Idrive = 0.0;
float_type Idrive = 0.0;
for (std::size_t i = 0; i < railstart; i++)
Idrive = Idrive + go[i] * new_V[net_other[i]];
Idrive = Idrive + go[i] * new_V[static_cast<std::size_t>(net_other[i])];
const nl_double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
const float_type new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
err = std::max(std::abs(new_val - new_V[k]), err);
new_V[k] = new_val;
@ -158,7 +164,7 @@ unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
{
// Fallback to direct solver ...
this->m_iterative_fail++;
return matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(newton_raphson);
return matrix_solver_direct_t<FT, SIZE>::vsolve_non_dynamic(newton_raphson);
}
for (std::size_t k = 0; k < iN; k++)

View File

@ -22,20 +22,24 @@ namespace netlist
{
namespace devices
{
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<m_N, storage_N>
template <typename FT, int SIZE>
class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<FT, SIZE>
{
friend class matrix_solver_t;
public:
typedef FT float_type;
matrix_solver_SOR_mat_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::DESCENDING, params, size)
, m_Vdelta(*this, "m_Vdelta", 0.0)
: matrix_solver_direct_t<FT, SIZE>(anetlist, name, matrix_solver_t::DESCENDING, params, size)
, m_Vdelta(*this, "m_Vdelta", std::vector<float_type>(size))
, m_omega(*this, "m_omega", params->m_gs_sor)
, m_lp_fact(*this, "m_lp_fact", 0)
, m_gs_fail(*this, "m_gs_fail", 0)
, m_gs_total(*this, "m_gs_total", 0)
, new_V(size, 0.0)
{
}
@ -46,28 +50,31 @@ public:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
private:
state_var<nl_double[storage_N]> m_Vdelta;
//state_var<float_type[storage_N]> m_Vdelta;
state_var<std::vector<float_type>> m_Vdelta;
state_var<nl_double> m_omega;
state_var<nl_double> m_lp_fact;
state_var<float_type> m_omega;
state_var<float_type> m_lp_fact;
state_var<int> m_gs_fail;
state_var<int> m_gs_total;
std::vector<float_type> new_V;
};
// ----------------------------------------------------------------------------------------
// matrix_solver - Gauss - Seidel
// ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_SOR_mat_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <typename FT, int SIZE>
void matrix_solver_SOR_mat_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
matrix_solver_direct_t<FT, SIZE>::vsetup(nets);
}
#if 0
//FIXME: move to solve_base
template <unsigned m_N, unsigned storage_N>
nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
float_type matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
{
/*
* enable linear prediction on first newton pass
@ -89,13 +96,13 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
if (USE_LINEAR_PREDICTION)
{
nl_double sq = 0;
nl_double sqo = 0;
const nl_double rez_cts = 1.0 / this->current_timestep();
float_type sq = 0;
float_type sqo = 0;
const float_type rez_cts = 1.0 / this->current_timestep();
for (unsigned k = 0; k < this->N(); k++)
{
const analog_net_t *n = this->m_nets[k];
const nl_double nv = (n->Q_Analog() - this->m_last_V[k]) * rez_cts ;
const float_type nv = (n->Q_Analog() - this->m_last_V[k]) * rez_cts ;
sq += nv * nv;
sqo += this->m_Vdelta[k] * this->m_Vdelta[k];
this->m_Vdelta[k] = nv;
@ -103,7 +110,7 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
// FIXME: used to be 1e90, but this would not be compatible with float
if (sqo > NL_FCONST(1e-20))
m_lp_fact = std::min(std::sqrt(sq/sqo), (nl_double) 2.0);
m_lp_fact = std::min(std::sqrt(sq/sqo), (float_type) 2.0);
else
m_lp_fact = NL_FCONST(0.0);
}
@ -113,8 +120,8 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
}
#endif
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
template <typename FT, int SIZE>
unsigned matrix_solver_SOR_mat_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{
/* The matrix based code looks a lot nicer but actually is 30% slower than
* the optimized code which works directly on the data structures.
@ -122,11 +129,10 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
*/
nl_double new_v[storage_N] = { 0.0 };
const std::size_t iN = this->N();
matrix_solver_t::build_LE_A<matrix_solver_SOR_mat_t>();
matrix_solver_t::build_LE_RHS<matrix_solver_SOR_mat_t>();
this->build_LE_A(*this);
this->build_LE_RHS(*this);
bool resched = false;
@ -139,19 +145,19 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
if (1 && ws_cnt % 200 == 0)
{
// update omega
nl_double lambdaN = 0;
nl_double lambda1 = 1e9;
float_type lambdaN = 0;
float_type lambda1 = 1e9;
for (int k = 0; k < iN; k++)
{
#if 0
nl_double akk = std::abs(this->m_A[k][k]);
float_type akk = std::abs(this->m_A[k][k]);
if ( akk > lambdaN)
lambdaN = akk;
if (akk < lambda1)
lambda1 = akk;
#else
nl_double akk = std::abs(this->m_A[k][k]);
nl_double s = 0.0;
float_type akk = std::abs(this->m_A[k][k]);
float_type s = 0.0;
for (int i=0; i<iN; i++)
s = s + std::abs(this->m_A[k][i]);
akk = s / akk - 1.0;
@ -170,25 +176,25 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
#endif
for (std::size_t k = 0; k < iN; k++)
new_v[k] = this->m_nets[k]->Q_Analog();
new_V[k] = this->m_nets[k]->Q_Analog();
do {
resched = false;
nl_double cerr = 0.0;
float_type cerr = 0.0;
for (std::size_t k = 0; k < iN; k++)
{
nl_double Idrive = 0;
float_type Idrive = 0;
const auto *p = this->m_terms[k]->m_nz.data();
const std::size_t e = this->m_terms[k]->m_nz.size();
for (std::size_t i = 0; i < e; i++)
Idrive = Idrive + this->A(k,p[i]) * new_v[p[i]];
Idrive = Idrive + this->A(k,p[i]) * new_V[p[i]];
const nl_double delta = m_omega * (this->RHS(k) - Idrive) / this->A(k,k);
const float_type delta = m_omega * (this->RHS(k) - Idrive) / this->A(k,k);
cerr = std::max(cerr, std::abs(delta));
new_v[k] += delta;
new_V[k] += delta;
}
if (cerr > this->m_params.m_accuracy)
@ -208,10 +214,10 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
//this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
this->m_gs_fail++;
return matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(newton_raphson);
return matrix_solver_direct_t<FT, SIZE>::solve_non_dynamic(newton_raphson);
}
else {
this->store(new_v);
this->store(new_V.data());
return resched_cnt;
}

View File

@ -50,15 +50,18 @@ namespace netlist
{
namespace devices
{
//#define nl_ext_double _float128 // slow, very slow
//#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double
template <std::size_t m_N, std::size_t storage_N>
template <typename FT, int SIZE>
class matrix_solver_w_t: public matrix_solver_t
{
friend class matrix_solver_t;
public:
typedef FT float_ext_type;
typedef FT float_type;
// FIXME: dirty hack to make this compile
static constexpr const std::size_t storage_N = 100;
matrix_solver_w_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size);
@ -71,7 +74,7 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson);
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
constexpr std::size_t N() const { return m_dim; }
void LE_invert();
@ -80,40 +83,40 @@ protected:
template <typename T1, typename T2>
nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
float_ext_type &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
template <typename T1, typename T2>
nl_ext_double &W(const T1 &r, const T2 &c) { return m_W[r][c]; }
float_ext_type &W(const T1 &r, const T2 &c) { return m_W[r][c]; }
/* access to Ainv for fixed columns over row, there store transposed */
template <typename T1, typename T2>
nl_ext_double &Ainv(const T1 &r, const T2 &c) { return m_Ainv[c][r]; }
float_ext_type &Ainv(const T1 &r, const T2 &c) { return m_Ainv[c][r]; }
template <typename T1>
nl_ext_double &RHS(const T1 &r) { return m_RHS[r]; }
float_ext_type &RHS(const T1 &r) { return m_RHS[r]; }
template <typename T1, typename T2>
nl_ext_double &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; }
float_ext_type &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; }
nl_double m_last_RHS[storage_N]; // right hand side - contains currents
float_type m_last_RHS[storage_N]; // right hand side - contains currents
private:
static constexpr std::size_t m_pitch = ((( storage_N) + 7) / 8) * 8;
nl_ext_double m_A[storage_N][m_pitch];
nl_ext_double m_Ainv[storage_N][m_pitch];
nl_ext_double m_W[storage_N][m_pitch];
nl_ext_double m_RHS[storage_N]; // right hand side - contains currents
float_ext_type m_A[storage_N][m_pitch];
float_ext_type m_Ainv[storage_N][m_pitch];
float_ext_type m_W[storage_N][m_pitch];
float_ext_type m_RHS[storage_N]; // right hand side - contains currents
nl_ext_double m_lA[storage_N][m_pitch];
float_ext_type m_lA[storage_N][m_pitch];
/* temporary */
nl_double H[storage_N][m_pitch] ;
float_type H[storage_N][m_pitch] ;
unsigned rows[storage_N];
unsigned cols[storage_N][m_pitch];
unsigned colcount[storage_N];
unsigned m_cnt;
//nl_ext_double m_RHSx[storage_N];
//float_ext_type m_RHSx[storage_N];
const std::size_t m_dim;
@ -123,13 +126,13 @@ private:
// matrix_solver_direct
// ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_w_t<m_N, storage_N>::~matrix_solver_w_t()
template <typename FT, int SIZE>
matrix_solver_w_t<FT, SIZE>::~matrix_solver_w_t()
{
}
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <typename FT, int SIZE>
void matrix_solver_w_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{
matrix_solver_t::setup_base(nets);
@ -141,8 +144,8 @@ void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_w_t<m_N, storage_N>::LE_invert()
template <typename FT, int SIZE>
void matrix_solver_w_t<FT, SIZE>::LE_invert()
{
const std::size_t kN = N();
@ -159,7 +162,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
for (std::size_t i = 0; i < kN; i++)
{
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i);
const float_type f = 1.0 / W(i,i);
const auto * RESTRICT const p = m_terms[i]->m_nzrd.data();
const size_t e = m_terms[i]->m_nzrd.size();
@ -170,7 +173,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
for (std::size_t jb = 0; jb < eb; jb++)
{
const auto j = pb[jb];
const nl_double f1 = - W(j,i) * f;
const float_type f1 = - W(j,i) * f;
if (f1 != 0.0)
{
for (std::size_t k = 0; k < e; k++)
@ -184,10 +187,10 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
for (std::size_t i = kN; i-- > 0; )
{
/* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i);
const float_type f = 1.0 / W(i,i);
for (std::size_t j = i; j-- > 0; )
{
const nl_double f1 = - W(j,i) * f;
const float_type f1 = - W(j,i) * f;
if (f1 != 0.0)
{
for (std::size_t k = i; k < kN; k++)
@ -203,9 +206,9 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
}
}
template <std::size_t m_N, std::size_t storage_N>
template <typename FT, int SIZE>
template <typename T>
void matrix_solver_w_t<m_N, storage_N>::LE_compute_x(
void matrix_solver_w_t<FT, SIZE>::LE_compute_x(
T * RESTRICT x)
{
const std::size_t kN = N();
@ -215,7 +218,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_compute_x(
for (std::size_t k=0; k<kN; k++)
{
const nl_double f = RHS(k);
const float_type f = RHS(k);
for (std::size_t i=0; i<kN; i++)
x[i] += Ainv(i,k) * f;
@ -223,12 +226,12 @@ void matrix_solver_w_t<m_N, storage_N>::LE_compute_x(
}
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
template <typename FT, int SIZE>
unsigned matrix_solver_w_t<FT, SIZE>::solve_non_dynamic(const bool newton_raphson)
{
const auto iN = N();
nl_double new_V[storage_N]; // = { 0.0 };
float_type new_V[storage_N]; // = { 0.0 };
if ((m_cnt % 100) == 0)
{
@ -266,7 +269,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
/* construct w = transform(V) * y
* dim: rowcount x iN
* */
nl_double w[storage_N];
float_type w[storage_N];
for (unsigned i = 0; i < rowcount; i++)
{
const unsigned r = rows[i];
@ -287,7 +290,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
for (unsigned k=0; k< colcount[i]; k++)
{
const unsigned col = cols[i][k];
nl_double f = VT(rows[i],col);
float_type f = VT(rows[i],col);
if (f!=0.0)
for (unsigned j= 0; j < rowcount; j++)
H[i][j] += f * Ainv(col,rows[j]);
@ -298,15 +301,15 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
{
if (H[i][i] == 0.0)
printf("%s H singular\n", this->name().c_str());
const nl_double f = 1.0 / H[i][i];
const float_type f = 1.0 / H[i][i];
for (unsigned j = i+1; j < rowcount; j++)
{
const nl_double f1 = - f * H[j][i];
const float_type f1 = - f * H[j][i];
if (f1!=0.0)
{
nl_double *pj = &H[j][i+1];
const nl_double *pi = &H[i][i+1];
float_type *pj = &H[j][i+1];
const float_type *pi = &H[i][i+1];
for (unsigned k = 0; k < rowcount-i-1; k++)
pj[k] += f1 * pi[k];
//H[j][k] += f1 * H[i][k];
@ -316,12 +319,12 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
}
/* Back substitution */
//inv(H) w = t w = H t
nl_double t[storage_N]; // FIXME: convert to member
float_type t[storage_N]; // FIXME: convert to member
for (unsigned j = rowcount; j-- > 0; )
{
nl_double tmp = 0;
const nl_double *pj = &H[j][j+1];
const nl_double *tj = &t[j+1];
float_type tmp = 0;
const float_type *pj = &H[j][j+1];
const float_type *tj = &t[j+1];
for (unsigned k = 0; k < rowcount-j-1; k++)
tmp += pj[k] * tj[k];
//tmp += H[j][k] * t[k];
@ -331,7 +334,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
/* x = y - Zt */
for (unsigned i=0; i<iN; i++)
{
nl_double tmp = 0.0;
float_type tmp = 0.0;
for (unsigned j=0; j<rowcount;j++)
{
const unsigned row = rows[j];
@ -346,7 +349,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
if (0)
for (unsigned i=0; i<iN; i++)
{
nl_double tmp = 0.0;
float_type tmp = 0.0;
for (unsigned j=0; j<iN; j++)
{
tmp += A(i,j) * new_V[j];
@ -355,16 +358,16 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
printf("%s failed on row %d: %f RHS: %f\n", this->name().c_str(), i, std::abs(tmp-RHS(i)), RHS(i));
}
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
const float_type err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
template <std::size_t m_N, std::size_t storage_N>
unsigned matrix_solver_w_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
template <typename FT, int SIZE>
unsigned matrix_solver_w_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{
build_LE_A<matrix_solver_w_t>();
build_LE_RHS<matrix_solver_w_t>();
this->build_LE_A(*this);
this->build_LE_RHS(*this);
for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i);
@ -373,8 +376,8 @@ unsigned matrix_solver_w_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton
return this->solve_non_dynamic(newton_raphson);
}
template <std::size_t m_N, std::size_t storage_N>
matrix_solver_w_t<m_N, storage_N>::matrix_solver_w_t(netlist_base_t &anetlist, const pstring &name,
template <typename FT, int SIZE>
matrix_solver_w_t<FT, SIZE>::matrix_solver_w_t(netlist_base_t &anetlist, const pstring &name,
const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, NOSORT, params)
,m_cnt(0)

View File

@ -133,12 +133,12 @@ matrix_solver_t * create_it(netlist_base_t &nl, pstring name, solver_parameters_
return plib::palloc<C>(nl, name, &params, size);
}
template <std::size_t m_N, std::size_t storage_N>
template <typename FT, int SIZE>
matrix_solver_t * NETLIB_NAME(solver)::create_solver(std::size_t size, const pstring &solvername)
{
if (m_method() == "SOR_MAT")
{
return create_it<matrix_solver_SOR_mat_t<m_N, storage_N>>(state(), solvername, m_params, size);
return create_it<matrix_solver_SOR_mat_t<FT, SIZE>>(state(), solvername, m_params, size);
//typedef matrix_solver_SOR_mat_t<m_N,storage_N> solver_sor_mat;
//return plib::make_unique<solver_sor_mat>(state(), solvername, &m_params, size);
}
@ -146,34 +146,34 @@ matrix_solver_t * NETLIB_NAME(solver)::create_solver(std::size_t size, const pst
{
if (size > 0) // GCR always outperforms MAT solver
{
return create_it<matrix_solver_GCR_t<m_N, storage_N>>(state(), solvername, m_params, size);
return create_it<matrix_solver_GCR_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else
{
return create_it<matrix_solver_direct_t<m_N, storage_N>>(state(), solvername, m_params, size);
return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
}
}
else if (m_method() == "MAT")
{
return create_it<matrix_solver_direct_t<m_N, storage_N>>(state(), solvername, m_params, size);
return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else if (m_method() == "SM")
{
/* Sherman-Morrison Formula */
return create_it<matrix_solver_sm_t<m_N, storage_N>>(state(), solvername, m_params, size);
return create_it<matrix_solver_sm_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else if (m_method() == "W")
{
/* Woodbury Formula */
return create_it<matrix_solver_w_t<m_N, storage_N>>(state(), solvername, m_params, size);
return create_it<matrix_solver_w_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else if (m_method() == "SOR")
{
return create_it<matrix_solver_SOR_t<m_N, storage_N>>(state(), solvername, m_params, size);
return create_it<matrix_solver_SOR_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else if (m_method() == "GMRES")
{
return create_it<matrix_solver_GMRES_t<m_N, storage_N>>(state(), solvername, m_params, size);
return create_it<matrix_solver_GMRES_t<FT, SIZE>>(state(), solvername, m_params, size);
}
else
{
@ -288,64 +288,64 @@ void NETLIB_NAME(solver)::post_start()
switch (net_count)
{
#if 0
#if 1
case 1:
if (use_specific)
ms = plib::palloc<matrix_solver_direct1_t>(state(), sname, &m_params);
ms = plib::palloc<matrix_solver_direct1_t<double>>(state(), sname, &m_params);
else
ms = create_solver<1,1>(1, sname);
ms = create_solver<double, 1>(1, sname);
break;
case 2:
if (use_specific)
ms = plib::palloc<matrix_solver_direct2_t>(state(), sname, &m_params);
ms = plib::palloc<matrix_solver_direct2_t<double>>(state(), sname, &m_params);
else
ms = create_solver<2,2>(2, sname);
ms = create_solver<double, 2>(2, sname);
break;
#if 1
case 3:
ms = create_solver<3,3>(3, sname);
ms = create_solver<double, 3>(3, sname);
break;
case 4:
ms = create_solver<4,4>(4, sname);
ms = create_solver<double, 4>(4, sname);
break;
case 5:
ms = create_solver<5,5>(5, sname);
ms = create_solver<double, 5>(5, sname);
break;
case 6:
ms = create_solver<6,6>(6, sname);
ms = create_solver<double, 6>(6, sname);
break;
case 7:
ms = create_solver<7,7>(7, sname);
ms = create_solver<double, 7>(7, sname);
break;
case 8:
ms = create_solver<8,8>(8, sname);
ms = create_solver<double, 8>(8, sname);
break;
case 9:
ms = create_solver<9,9>(9, sname);
ms = create_solver<double, 9>(9, sname);
break;
case 10:
ms = create_solver<10,10>(10, sname);
ms = create_solver<double, 10>(10, sname);
break;
case 11:
ms = create_solver<11,11>(11, sname);
ms = create_solver<double, 11>(11, sname);
break;
case 12:
ms = create_solver<12,12>(12, sname);
ms = create_solver<double, 12>(12, sname);
break;
case 15:
ms = create_solver<15,15>(15, sname);
ms = create_solver<double, 15>(15, sname);
break;
case 31:
ms = create_solver<31,31>(31, sname);
ms = create_solver<double, 31>(31, sname);
break;
case 35:
ms = create_solver<35,35>(35, sname);
ms = create_solver<double, 35>(35, sname);
break;
case 43:
ms = create_solver<43,43>(43, sname);
ms = create_solver<double, 43>(43, sname);
break;
case 49:
ms = create_solver<49,49>(49, sname);
ms = create_solver<double, 49>(49, sname);
break;
#endif
#if 0
@ -358,32 +358,31 @@ void NETLIB_NAME(solver)::post_start()
log().warning(MW_1_NO_SPECIFIC_SOLVER, net_count);
if (net_count <= 8)
{
ms = create_solver<0, 8>(net_count, sname);
ms = create_solver<double, -8>(net_count, sname);
}
else if (net_count <= 16)
{
ms = create_solver<0,16>(net_count, sname);
ms = create_solver<double, -16>(net_count, sname);
}
else if (net_count <= 32)
{
ms = create_solver<0,32>(net_count, sname);
ms = create_solver<double, -32>(net_count, sname);
}
else
if (net_count <= 64)
{
ms = create_solver<0,64>(net_count, sname);
ms = create_solver<double, -64>(net_count, sname);
}
else
if (net_count <= 128)
{
ms = create_solver<0,128>(net_count, sname);
ms = create_solver<double, -128>(net_count, sname);
}
else
{
log().fatal(MF_1_NETGROUP_SIZE_EXCEEDED_1, 128);
ms = nullptr; /* tease compilers */
}
break;
}

View File

@ -102,7 +102,7 @@ private:
solver_parameters_t m_params;
template <std::size_t m_N, std::size_t storage_N>
template <typename FT, int SIZE>
matrix_solver_t * create_solver(std::size_t size, const pstring &solvername);
};

View File

@ -12,6 +12,7 @@
#include <algorithm>
#include <cmath>
#include <type_traits>
#include "../plib/pconfig.h"
#if 0
@ -36,63 +37,50 @@ private:
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
template<typename T, std::size_t N>
void vec_set (const std::size_t n, const T scalar, T (& RESTRICT v)[N])
template<typename VT, typename T>
void vec_set_scalar (const std::size_t n, VT &v, const T & scalar)
{
if (n != N)
for ( std::size_t i = 0; i < n; i++ )
v[i] = scalar;
else
for ( std::size_t i = 0; i < N; i++ )
v[i] = scalar;
}
template<typename T, std::size_t N>
T vec_mult (const std::size_t n, const T (& RESTRICT v1)[N], const T (& RESTRICT v2)[N] )
template<typename VT, typename VS>
void vec_set (const std::size_t n, VT &v, const VS & source)
{
for ( std::size_t i = 0; i < n; i++ )
v[i] = source [i];
}
template<typename T, typename V1, typename V2>
T vec_mult (const std::size_t n, const V1 & v1, const V2 & v2 )
{
T value = 0.0;
if (n != N)
for ( std::size_t i = 0; i < n; i++ )
value += v1[i] * v2[i];
else
for ( std::size_t i = 0; i < N; i++ )
value += v1[i] * v2[i];
return value;
}
template<typename T, std::size_t N>
T vec_mult2 (const std::size_t n, const T (& RESTRICT v)[N])
template<typename T, typename VT>
T vec_mult2 (const std::size_t n, const VT &v)
{
T value = 0.0;
if (n != N)
for ( std::size_t i = 0; i < n; i++ )
value += v[i] * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
value += v[i] * v[i];
return value;
}
template<typename T, std::size_t N>
void vec_mult_scalar (const std::size_t n, const T (& RESTRICT v)[N], const T & scalar, T (& RESTRICT result)[N])
template<typename VV, typename T, typename VR>
void vec_mult_scalar (const std::size_t n, const VV & v, const T & scalar, VR & result)
{
if (n != N)
for ( std::size_t i = 0; i < n; i++ )
result[i] = scalar * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
result[i] = scalar * v[i];
}
template<typename T, std::size_t N>
void vec_add_mult_scalar (const std::size_t n, const T (& RESTRICT v)[N], const T scalar, T (& RESTRICT result)[N])
template<typename VV, typename T, typename VR>
void vec_add_mult_scalar (const std::size_t n, const VV & v, const T scalar, VR & result)
{
if (n != N)
for ( std::size_t i = 0; i < n; i++ )
result[i] = result[i] + scalar * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
result[i] = result[i] + scalar * v[i];
}
template<typename T>
@ -102,37 +90,29 @@ void vec_add_mult_scalar_p(const std::size_t & n, const T * RESTRICT v, const T
result[i] += scalar * v[i];
}
template<typename T>
void vec_add_ip(const std::size_t n, const T * RESTRICT v, T * RESTRICT result)
template<typename V, typename R>
void vec_add_ip(const std::size_t n, const V & v, R & result)
{
for ( std::size_t i = 0; i < n; i++ )
result[i] += v[i];
}
template<typename T, std::size_t N>
void vec_sub(const std::size_t n, const T (& RESTRICT v1)[N], const T (& RESTRICT v2)[N], T (& RESTRICT result)[N])
template<typename V1, typename V2, typename VR>
void vec_sub(const std::size_t n, const V1 &v1, const V2 & v2, VR & result)
{
if (n != N)
for ( std::size_t i = 0; i < n; i++ )
result[i] = v1[i] - v2[i];
else
for ( std::size_t i = 0; i < N; i++ )
result[i] = v1[i] - v2[i];
}
template<typename T, std::size_t N>
void vec_scale(const std::size_t n, T (& RESTRICT v)[N], const T scalar)
template<typename V, typename T>
void vec_scale(const std::size_t n, V & v, const T scalar)
{
if (n != N)
for ( std::size_t i = 0; i < n; i++ )
v[i] = scalar * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
v[i] = scalar * v[i];
}
template<typename T>
T vec_maxabs(const std::size_t n, const T * RESTRICT v)
template<typename T, typename V>
T vec_maxabs(const std::size_t n, const V & v)
{
T ret = 0.0;
for ( std::size_t i = 0; i < n; i++ )

View File

@ -317,15 +317,16 @@ NETLIST_START(kidniki)
PARAM(Solver.METHOD, "MAT_CR")
//PARAM(Solver.METHOD, "MAT")
//PARAM(Solver.METHOD, "GMRES")
PARAM(Solver.SOR_FACTOR, 1.00)
PARAM(Solver.SOR_FACTOR, 1.0)
PARAM(Solver.DYNAMIC_TS, 0)
PARAM(Solver.DYNAMIC_LTE, 5e-4)
PARAM(Solver.DYNAMIC_MIN_TIMESTEP, 20e-6)
#else
SOLVER(Solver, 18000)
PARAM(Solver.ACCURACY, 1e-6)
PARAM(Solver.ACCURACY, 1e-7)
PARAM(Solver.NR_LOOPS, 100)
PARAM(Solver.GS_LOOPS, 20)
//PARAM(Solver.METHOD, "MAT_CR")
PARAM(Solver.METHOD, "GMRES")
#endif