netlist: refactor code for better scalability and flexibility. (nw)

These changes aim to remove some of the duplication of code in the
various solvers.
Tested with gcc-7 clang-8 and nvcc-9.2
This commit is contained in:
couriersud 2019-01-21 00:36:57 +01:00
parent 6d5e014a6f
commit 32b0442c73
19 changed files with 1054 additions and 868 deletions

View File

@ -207,7 +207,7 @@ native:
$(MAKE) CEXTRAFLAGS="-march=native -Wall -Wpedantic -Wsign-compare -Wextra -Wno-unused-parameter" $(MAKE) CEXTRAFLAGS="-march=native -Wall -Wpedantic -Wsign-compare -Wextra -Wno-unused-parameter"
clang: clang:
$(MAKE) CC=clang++ LD=clang++ CEXTRAFLAGS="-march=native -Wno-unused-parameter -Weverything -Werror -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wweak-template-vtables -Wno-exit-time-destructors" $(MAKE) CC=clang++ LD=clang++ CEXTRAFLAGS="-march=native -Wno-c++11-narrowing -Wno-unused-parameter -Weverything -Werror -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wweak-template-vtables -Wno-exit-time-destructors"
clang-5: clang-5:
$(MAKE) CC=clang++-5.0 LD=clang++-5.0 CEXTRAFLAGS="-march=native -Weverything -Werror -Wno-inconsistent-missing-destructor-override -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wno-weak-template-vtables -Wno-exit-time-destructors" $(MAKE) CC=clang++-5.0 LD=clang++-5.0 CEXTRAFLAGS="-march=native -Weverything -Werror -Wno-inconsistent-missing-destructor-override -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wno-weak-template-vtables -Wno-exit-time-destructors"
@ -217,6 +217,7 @@ nvcc:
CEXTRAFLAGS="-x cu -DNVCCBUILD=1 --expt-extended-lambda --expt-relaxed-constexpr --default-stream per-thread --restrict" CEXTRAFLAGS="-x cu -DNVCCBUILD=1 --expt-extended-lambda --expt-relaxed-constexpr --default-stream per-thread --restrict"
# #
# -Wno-c++11-narrowing : seems a bit broken
# Mostly done: -Wno-weak-vtables -Wno-cast-align # Mostly done: -Wno-weak-vtables -Wno-cast-align
# FIXME: -Wno-weak-vtables -Wno-missing-variable-declarations -Wno-conversion -Wno-exit-time-destructors # FIXME: -Wno-weak-vtables -Wno-missing-variable-declarations -Wno-conversion -Wno-exit-time-destructors
# FIXME: -Winconsistent-missing-destructor-override : c++ community has diverging opinions on this https://github.com/isocpp/CppCoreGuidelines/issues/721 # FIXME: -Winconsistent-missing-destructor-override : c++ community has diverging opinions on this https://github.com/isocpp/CppCoreGuidelines/issues/721

View File

@ -322,11 +322,11 @@ namespace netlist
const T &value //!< Initial value after construction const T &value //!< Initial value after construction
); );
//! Copy Constructor. //! Copy Constructor.
constexpr state_var(const state_var &rhs) noexcept = default; constexpr state_var(const state_var &rhs) = default;
//! Move Constructor. //! Move Constructor.
constexpr state_var(state_var &&rhs) noexcept = default; constexpr state_var(state_var &&rhs) noexcept = default;
//! Assignment operator to assign value of a state var. //! Assignment operator to assign value of a state var.
C14CONSTEXPR state_var &operator=(const state_var &rhs) noexcept = default; C14CONSTEXPR state_var &operator=(const state_var &rhs) = default;
//! Assignment move operator to assign value of a state var. //! Assignment move operator to assign value of a state var.
C14CONSTEXPR state_var &operator=(state_var &&rhs) noexcept = default; C14CONSTEXPR state_var &operator=(state_var &&rhs) noexcept = default;
//! Assignment operator to assign value of type T. //! Assignment operator to assign value of type T.

View File

@ -0,0 +1,117 @@
// license:GPL-2.0+
// copyright-holders:Couriersud
/*
* parray.h
*
*/
#ifndef PARRAY_H_
#define PARRAY_H_
#include "pconfig.h"
#include "pexception.h"
#include <memory>
#include <utility>
#include <vector>
#include <array>
#include <type_traits>
namespace plib {
template <typename FT, int SIZE>
struct sizeabs
{
static constexpr std::size_t ABS() { return (SIZE < 0) ? static_cast<std::size_t>(0 - SIZE) : static_cast<std::size_t>(SIZE); }
typedef typename std::array<FT, ABS()> container;
};
template <typename FT>
struct sizeabs<FT, 0>
{
static constexpr const std::size_t ABS = 0;
typedef typename std::vector<FT> container;
};
/**
* \brief Array with preallocated or dynamic allocation
*
* Passing SIZE > 0 has the same functionality as a std::array.
* SIZE = 0 is pure dynamic allocation, the actual array size is passed to the
* constructor.
* SIZE < 0 reserves std::abs(SIZE) elements statically in place allocated. The
* actual size is passed in by the constructor.
* This array is purely intended for HPC application where depending on the
* architecture a preference dynamic/static has to be made.
*
* This struct is not intended to be a full replacement to std::array.
* It is a subset to enable switching between dynamic and static allocation.
* I consider > 10% performance difference to be a use case.
*/
template <typename FT, int SIZE>
struct parray
{
public:
static constexpr std::size_t SIZEABS() { return sizeabs<FT, SIZE>::ABS(); }
typedef typename sizeabs<FT, SIZE>::container base_type;
typedef typename base_type::size_type size_type;
typedef typename base_type::reference reference;
typedef typename base_type::const_reference const_reference;
template <int X = SIZE >
parray(size_type size, typename std::enable_if<X==0, int>::type = 0)
: m_a(size), m_size(size)
{
}
#if 0
/* allow construction in fixed size arrays */
template <int X = SIZE >
parray(typename std::enable_if<X==0, int>::type = 0)
: m_size(0)
{
}
#endif
template <int X = SIZE >
parray(size_type size, typename std::enable_if<X!=0, int>::type = 0)
: m_size(size)
{
if (SIZE < 0 && size > SIZEABS())
throw plib::pexception("parray: size error " + plib::to_string(size) + ">" + plib::to_string(SIZEABS()));
else if (SIZE > 0 && size != SIZEABS())
throw plib::pexception("parray: size error");
}
inline size_type size() const noexcept { return SIZE <= 0 ? m_size : SIZEABS(); }
constexpr size_type max_size() const noexcept { return base_type::max_size(); }
bool empty() const noexcept { return size() == 0; }
#if 0
reference operator[](size_type i) /*noexcept*/
{
if (i >= m_size) throw plib::pexception("limits error " + to_string(i) + ">=" + to_string(m_size));
return m_a[i];
}
const_reference operator[](size_type i) const /*noexcept*/
{
if (i >= m_size) throw plib::pexception("limits error " + to_string(i) + ">=" + to_string(m_size));
return m_a[i];
}
#else
reference operator[](size_type i) noexcept { return m_a[i]; }
constexpr const_reference operator[](size_type i) const noexcept { return m_a[i]; }
#endif
FT * data() noexcept { return m_a.data(); }
const FT * data() const noexcept { return m_a.data(); }
private:
base_type m_a;
size_type m_size;
};
}
#endif /* PARRAY_H_ */

View File

@ -103,7 +103,7 @@ public:
template<typename C> template<typename C>
void save_item(const void *owner, std::vector<C> &v, const pstring &stname) void save_item(const void *owner, std::vector<C> &v, const pstring &stname)
{ {
save_state(v.data(), owner, stname, v.size()); save_state_ptr(owner, stname, datatype_f<C>::f(), v.size(), v.data());
} }
void pre_save(); void pre_save();

View File

@ -11,60 +11,232 @@
#define MAT_CR_H_ #define MAT_CR_H_
#include <algorithm> #include <algorithm>
#include <type_traits>
#include <array>
#include <vector>
#include <cmath>
#include "../plib/pconfig.h" #include "../plib/pconfig.h"
#include "../plib/palloc.h" #include "../plib/palloc.h"
#include "../plib/pstate.h"
#include "../plib/parray.h"
template<std::size_t N, typename C = uint16_t, typename T = double> namespace plib
{
template<typename T, int N, typename C = uint16_t>
struct mat_cr_t struct mat_cr_t
{ {
typedef C index_type; typedef C index_type;
typedef T value_type; typedef T value_type;
C diag[N]; // diagonal index pointer n parray<C, N> diag; // diagonal index pointer n
C ia[N+1]; // row index pointer n + 1 parray<C, (N == 0) ? 0 : (N < 0 ? N - 1 : N + 1)> row_idx; // row index pointer n + 1
C ja[N*N]; // column index array nz_num, initially (n * n) parray<C, N < 0 ? -N * N : N *N> col_idx; // column index array nz_num, initially (n * n)
T A[N*N]; // Matrix elements nz_num, initially (n * n) parray<T, N < 0 ? -N * N : N *N> A; // Matrix elements nz_num, initially (n * n)
//parray<C, N < 0 ? -N * N / 2 : N * N / 2> nzbd; // Support for gaussian elimination
parray<C, N < 0 ? -N * (N-1) / 2 : N * (N+1) / 2 > nzbd; // Support for gaussian elimination
// contains elimination rows below the diagonal
std::size_t size; std::size_t m_size;
std::size_t nz_num; std::size_t nz_num;
explicit mat_cr_t(const std::size_t n) explicit mat_cr_t(const std::size_t n)
: size(n) : diag(n)
, row_idx(n+1)
, col_idx(n*n)
, A(n*n)
, nzbd(n * (n+1) / 2)
, m_size(n)
, nz_num(0) , nz_num(0)
{ {
#if 0 for (std::size_t i=0; i<n+1; i++)
#if 0 A[i] = 0;
ia = plib::palloc_array<C>(n + 1);
ja = plib::palloc_array<C>(n * n);
diag = plib::palloc_array<C>(n);
#else
diag = plib::palloc_array<C>(n + (n + 1) + n * n);
ia = diag + n;
ja = ia + (n+1);
A = plib::palloc_array<T>(n * n);
#endif
#endif
} }
~mat_cr_t() ~mat_cr_t()
{ {
#if 0
plib::pfree_array(diag);
#if 0
plib::pfree_array(ia);
plib::pfree_array(ja);
#endif
plib::pfree_array(A);
#endif
} }
std::size_t size() const { return m_size; }
void set_scalar(const T scalar) void set_scalar(const T scalar)
{ {
for (std::size_t i=0, e=nz_num; i<e; i++) for (std::size_t i=0, e=nz_num; i<e; i++)
A[i] = scalar; A[i] = scalar;
} }
void mult_vec(const T * RESTRICT x, T * RESTRICT res) void set(C r, C c, T val)
{
C ri = row_idx[r];
while (ri < row_idx[r+1] && col_idx[ri] < c)
ri++;
// we have the position now;
if (nz_num > 0 && col_idx[ri] == c)
A[ri] = val;
else
{
for (C i = nz_num; i>ri; i--)
{
A[i] = A[i-1];
col_idx[i] = col_idx[i-1];
}
A[ri] = val;
col_idx[ri] = c;
for (C i = row_idx[r]; i < size()+1;i++)
row_idx[i]++;
nz_num++;
if (c==r)
diag[r] = ri;
}
}
enum constants_e
{
FILL_INFINITY = 9999999
};
template <typename M>
std::pair<std::size_t, std::size_t> gaussian_extend_fill_mat(M &fill)
{
std::size_t ops = 0;
std::size_t fill_max = 0;
for (std::size_t k = 0; k < fill.size(); k++)
{
ops++; // 1/A(k,k)
for (std::size_t row = k + 1; row < fill.size(); row++)
{
if (fill[row][k] < FILL_INFINITY)
{
ops++;
for (std::size_t col = k + 1; col < fill[row].size(); col++)
//if (fill[k][col] < FILL_INFINITY)
{
auto f = std::min(fill[row][col], 1 + fill[row][k] + fill[k][col]);
if (f < FILL_INFINITY)
{
if (f > fill_max)
fill_max = f;
ops += 2;
}
fill[row][col] = f;
}
}
}
}
return { fill_max, ops };
}
template <typename M>
void build_from_fill_mat(const M &f, std::size_t max_fill = FILL_INFINITY - 1,
unsigned band_width = FILL_INFINITY)
{
C nz = 0;
if (nz_num != 0)
throw pexception("build_from_mat only allowed on empty CR matrix");
for (std::size_t k=0; k < size(); k++)
{
row_idx[k] = nz;
for (std::size_t j=0; j < size(); j++)
if (f[k][j] <= max_fill && std::abs(static_cast<int>(k)-static_cast<int>(j)) <= static_cast<int>(band_width))
{
col_idx[nz] = static_cast<C>(j);
if (j == k)
diag[k] = nz;
nz++;
}
}
row_idx[size()] = nz;
nz_num = nz;
/* build nzbd */
std::size_t p=0;
for (std::size_t k=0; k < size(); k++)
{
for (std::size_t j=k + 1; j < size(); j++)
if (f[j][k] < FILL_INFINITY)
nzbd[p++] = static_cast<C>(j);
nzbd[p++] = 0; // end of sequence
}
}
template <typename V>
void gaussian_elimination(V & RHS)
{
std::size_t nzbdp = 0;
const std::size_t iN = size();
for (std::size_t i = 0; i < iN - 1; i++)
{
std::size_t pi = diag[i];
const value_type f = 1.0 / A[pi++];
const std::size_t piie = row_idx[i+1];
while (auto j = nzbd[nzbdp++])
{
// proceed to column i
std::size_t pj = row_idx[j];
while (col_idx[pj] < i)
pj++;
const value_type f1 = - A[pj++] * f;
// subtract row i from j */
for (std::size_t pii = pi; pii<piie; pii++)
{
while (col_idx[pj] < col_idx[pii])
pj++;
if (col_idx[pj] == col_idx[pii])
A[pj++] += A[pii] * f1;
}
RHS[j] += f1 * RHS[i];
}
}
}
template <typename V1, typename V2>
void gaussian_back_substitution(V1 &V, const V2 &RHS)
{
const std::size_t iN = size();
/* row n-1 */
V[iN - 1] = RHS[iN - 1] / A[diag[iN - 1]];
for (std::size_t j = iN - 1; j-- > 0;)
{
value_type tmp = 0;
const auto jdiag = diag[j];
const std::size_t e = row_idx[j+1];
for (std::size_t pk = jdiag + 1; pk < e; pk++)
tmp += A[pk] * V[col_idx[pk]];
V[j] = (RHS[j] - tmp) / A[jdiag];
}
}
template <typename V1>
void gaussian_back_substitution(V1 &V)
{
const std::size_t iN = size();
/* row n-1 */
V[iN - 1] = V[iN - 1] / A[diag[iN - 1]];
for (std::size_t j = iN - 1; j-- > 0;)
{
value_type tmp = 0;
const auto jdiag = diag[j];
const std::size_t e = row_idx[j+1];
for (std::size_t pk = jdiag + 1; pk < e; pk++)
tmp += A[pk] * V[col_idx[pk]];
V[j] = (V[j] - tmp) / A[jdiag];
}
}
template <typename VTV, typename VTR>
void mult_vec(const VTV & RESTRICT x, VTR & RESTRICT res)
{ {
/* /*
* res = A * x * res = A * x
@ -77,14 +249,68 @@ struct mat_cr_t
while (k < oe) while (k < oe)
{ {
T tmp = 0.0; T tmp = 0.0;
const std::size_t e = ia[i+1]; const std::size_t e = row_idx[i+1];
for (; k < e; k++) for (; k < e; k++)
tmp += A[k] * x[ja[k]]; tmp += A[k] * x[col_idx[k]];
res[i++] = tmp; res[i++] = tmp;
} }
} }
void incomplete_LU_factorization(T * RESTRICT LU) /* throws error if P(source)>P(destination) */
template <typename LUMAT>
void slim_copy_from(LUMAT & src)
{
for (std::size_t r=0; r<src.size(); r++)
{
C dp = row_idx[r];
for (C sp = src.row_idx[r]; sp < src.row_idx[r+1]; sp++)
{
/* advance dp to source column and fill 0s if necessary */
while (col_idx[dp] < src.col_idx[sp])
A[dp++] = 0;
if (row_idx[r+1] <= dp || col_idx[dp] != src.col_idx[sp])
throw plib::pexception("slim_copy_from error");
A[dp++] = src.A[sp];
}
/* fill remaining elements in row */
while (dp < row_idx[r+1])
A[dp++] = 0;
}
}
/* only copies common elements */
template <typename LUMAT>
void reduction_copy_from(LUMAT & src)
{
C sp = 0;
for (std::size_t r=0; r<src.size(); r++)
{
C dp = row_idx[r];
while(sp < src.row_idx[r+1])
{
/* advance dp to source column and fill 0s if necessary */
if (col_idx[dp] < src.col_idx[sp])
A[dp++] = 0;
else if (col_idx[dp] == src.col_idx[sp])
A[dp++] = src.A[sp++];
else
sp++;
}
/* fill remaining elements in row */
while (dp < row_idx[r+1])
A[dp++] = 0;
}
}
/* checks at all - may crash */
template <typename LUMAT>
void raw_copy_from(LUMAT & src)
{
for (std::size_t k = 0; k < nz_num; k++)
A[k] = src.A[k];
}
void incomplete_LU_factorization()
{ {
/* /*
* incomplete LU Factorization according to http://de.wikipedia.org/wiki/ILU-Zerlegung * incomplete LU Factorization according to http://de.wikipedia.org/wiki/ILU-Zerlegung
@ -93,38 +319,67 @@ struct mat_cr_t
* *
*/ */
#if 0
const std::size_t lnz = nz_num; const std::size_t lnz = nz_num;
for (std::size_t k = 0; k < lnz; k++) for (std::size_t i = 1; row_idx[i] < lnz; i++) // row i
LU[k] = A[k];
for (std::size_t i = 1; ia[i] < lnz; i++) // row i
{ {
const std::size_t iai1 = ia[i + 1]; const std::size_t p_i_end = row_idx[i + 1];
const std::size_t pke = diag[i]; // loop over all columns left of diag in row i
for (std::size_t pk = ia[i]; pk < pke; pk++) // all columns left of diag in row i for (std::size_t p_i_k = row_idx[i]; p_i_k < diag[i]; p_i_k++)
{ {
// pk == (i, k) // pk == (i, k)
const std::size_t k = ja[pk]; const std::size_t k = col_idx[p_i_k];
const std::size_t iak1 = ia[k + 1]; // get start of row k
const T LUpk = LU[pk] = LU[pk] / LU[diag[k]]; const std::size_t p_k_end = row_idx[k + 1];
std::size_t pt = ia[k]; const T LUp_i_k = A[p_i_k] = A[p_i_k] / A[diag[k]];
for (std::size_t pj = pk + 1; pj < iai1; pj++) // pj = (i, j) std::size_t p_k_j = row_idx[k];
for (std::size_t p_i_j = p_i_k + 1; p_i_j < p_i_end; p_i_j++) // pj = (i, j)
{ {
// we can assume that within a row ja increases continuously */ // we can assume that within a row ja increases continuously */
const std::size_t ej = ja[pj]; const std::size_t j = col_idx[p_i_j]; // row i, column j
while (ja[pt] < ej && pt < iak1) while (col_idx[p_k_j] < j && p_k_j < p_k_end)
pt++; p_k_j++;
if (pt < iak1 && ja[pt] == ej) if (p_k_j < p_k_end && col_idx[p_k_j] == j)
LU[pj] = LU[pj] - LUpk * LU[pt]; A[p_i_j] = A[p_i_j] - LUp_i_k * A[p_k_j];
} }
} }
} }
} #else
for (std::size_t i = 1; i < m_size; i++) // row i
{
const std::size_t p_i_end = row_idx[i + 1];
// loop over all columns k left of diag in row i
for (std::size_t i_k = row_idx[i]; i_k < diag[i]; i_k++)
{
const std::size_t k = col_idx[i_k];
const std::size_t p_k_end = row_idx[k + 1];
const T LUp_i_k = A[i_k] = A[i_k] / A[diag[k]];
void solveLUx (const T * RESTRICT LU, T * RESTRICT r) // get start of row k
//std::size_t k_j = row_idx[k];
std::size_t k_j = diag[k];
for (std::size_t i_j = i_k + 1; i_j < p_i_end; i_j++) // pj = (i, j)
{
// we can assume that within a row ja increases continuously */
const std::size_t j = col_idx[i_j]; // row i, column j
while (col_idx[k_j] < j && k_j < p_k_end)
k_j++;
if (k_j >= p_k_end)
break;
if (col_idx[k_j] == j)
A[i_j] = A[i_j] - LUp_i_k * A[k_j];
}
}
}
#endif
}
template <typename R>
void solveLUx (R &r)
{ {
/* /*
* Solve a linear equation Ax = r * Solve a linear equation Ax = r
@ -147,29 +402,30 @@ struct mat_cr_t
* This can be solved for x using backwards elimination in U. * This can be solved for x using backwards elimination in U.
* *
*/ */
for (std::size_t i = 1; i < m_size; ++i )
for (std::size_t i = 1; ia[i] < nz_num; ++i )
{ {
T tmp = 0.0; T tmp = 0.0;
const std::size_t j1 = ia[i]; const std::size_t j1 = row_idx[i];
const std::size_t j2 = diag[i]; const std::size_t j2 = diag[i];
for (std::size_t j = j1; j < j2; ++j ) for (std::size_t j = j1; j < j2; ++j )
tmp += LU[j] * r[ja[j]]; tmp += A[j] * r[col_idx[j]];
r[i] -= tmp; r[i] -= tmp;
} }
// i now is equal to n; // i now is equal to n;
for (std::size_t i = size; i-- > 0; ) for (std::size_t i = m_size; i-- > 0; )
{ {
T tmp = 0.0; T tmp = 0.0;
const std::size_t di = diag[i]; const std::size_t di = diag[i];
const std::size_t j2 = ia[i+1]; const std::size_t j2 = row_idx[i+1];
for (std::size_t j = di + 1; j < j2; j++ ) for (std::size_t j = di + 1; j < j2; j++ )
tmp += LU[j] * r[ja[j]]; tmp += A[j] * r[col_idx[j]];
r[i] = (r[i] - tmp) / LU[di]; r[i] = (r[i] - tmp) / A[di];
} }
} }
}; };
}
#endif /* MAT_CR_H_ */ #endif /* MAT_CR_H_ */

View File

@ -54,12 +54,43 @@ public:
void set_pointers(); void set_pointers();
template <typename AP, typename FT>
void fill_matrix(AP &tcr, FT &RHS)
{
FT gtot_t = 0.0;
FT RHS_t = 0.0;
const std::size_t term_count = this->count();
const std::size_t railstart = this->m_railstart;
const FT * const * RESTRICT other_cur_analog = this->connected_net_V();
for (std::size_t i = 0; i < railstart; i++)
{
*tcr[i] -= m_go[i];
gtot_t += m_gt[i];
RHS_t += m_Idr[i];
}
for (std::size_t i = railstart; i < term_count; i++)
{
RHS_t += (m_Idr[i] + m_go[i] * *other_cur_analog[i]);
gtot_t += m_gt[i];
}
RHS = RHS_t;
// update diagonal element ...
*tcr[railstart] += gtot_t; //mat.A[mat.diag[k]] += gtot_t;
}
std::size_t m_railstart; std::size_t m_railstart;
std::vector<unsigned> m_nz; /* all non zero for multiplication */ std::vector<unsigned> m_nz; /* all non zero for multiplication */
std::vector<unsigned> m_nzrd; /* non zero right of the diagonal for elimination, may include RHS element */ std::vector<unsigned> m_nzrd; /* non zero right of the diagonal for elimination, may include RHS element */
std::vector<unsigned> m_nzbd; /* non zero below of the diagonal for elimination */ std::vector<unsigned> m_nzbd; /* non zero below of the diagonal for elimination */
/* state */ /* state */
nl_double m_last_V; nl_double m_last_V;
nl_double m_DD_n_m_1; nl_double m_DD_n_m_1;
@ -157,14 +188,15 @@ protected:
/* virtual */ void add_term(std::size_t net_idx, terminal_t *term); /* virtual */ void add_term(std::size_t net_idx, terminal_t *term);
template <typename T> template <typename T>
void store(const T * RESTRICT V); void store(const T & RESTRICT V);
template <typename T>
T delta(const T * RESTRICT V);
template <typename T> template <typename T>
void build_LE_A(); auto delta(const T & RESTRICT V) -> typename std::decay<decltype(V[0])>::type;
template <typename T> template <typename T>
void build_LE_RHS(); void build_LE_A(T &child);
template <typename T>
void build_LE_RHS(T &child);
std::vector<std::unique_ptr<terms_for_net_t>> m_terms; std::vector<std::unique_ptr<terms_for_net_t>> m_terms;
std::vector<analog_net_t *> m_nets; std::vector<analog_net_t *> m_nets;
@ -199,7 +231,7 @@ private:
}; };
template <typename T> template <typename T>
T matrix_solver_t::delta(const T * RESTRICT V) auto matrix_solver_t::delta(const T & RESTRICT V) -> typename std::decay<decltype(V[0])>::type
{ {
/* NOTE: Ideally we should also include currents (RHS) here. This would /* NOTE: Ideally we should also include currents (RHS) here. This would
* need a reevaluation of the right hand side after voltages have been updated * need a reevaluation of the right hand side after voltages have been updated
@ -207,14 +239,14 @@ T matrix_solver_t::delta(const T * RESTRICT V)
*/ */
const std::size_t iN = this->m_terms.size(); const std::size_t iN = this->m_terms.size();
T cerr = 0; typename std::decay<decltype(V[0])>::type cerr = 0;
for (std::size_t i = 0; i < iN; i++) for (std::size_t i = 0; i < iN; i++)
cerr = std::max(cerr, std::abs(V[i] - static_cast<T>(this->m_nets[i]->Q_Analog()))); cerr = std::max(cerr, std::abs(V[i] - this->m_nets[i]->Q_Analog()));
return cerr; return cerr;
} }
template <typename T> template <typename T>
void matrix_solver_t::store(const T * RESTRICT V) void matrix_solver_t::store(const T & RESTRICT V)
{ {
const std::size_t iN = this->m_terms.size(); const std::size_t iN = this->m_terms.size();
for (std::size_t i = 0; i < iN; i++) for (std::size_t i = 0; i < iN; i++)
@ -222,34 +254,33 @@ void matrix_solver_t::store(const T * RESTRICT V)
} }
template <typename T> template <typename T>
void matrix_solver_t::build_LE_A() void matrix_solver_t::build_LE_A(T &child)
{ {
typedef typename T::float_type float_type;
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t"); static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
T &child = static_cast<T &>(*this);
const std::size_t iN = child.N(); const std::size_t iN = child.N();
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
terms_for_net_t *terms = m_terms[k].get(); terms_for_net_t *terms = m_terms[k].get();
nl_double * Ak = &child.A(k, 0ul); float_type * Ak = &child.A(k, 0ul);
for (std::size_t i=0; i < iN; i++) for (std::size_t i=0; i < iN; i++)
Ak[i] = 0.0; Ak[i] = 0.0;
const std::size_t terms_count = terms->count(); const std::size_t terms_count = terms->count();
const std::size_t railstart = terms->m_railstart; const std::size_t railstart = terms->m_railstart;
const nl_double * const RESTRICT gt = terms->gt(); const float_type * const RESTRICT gt = terms->gt();
{ {
nl_double akk = 0.0; float_type akk = 0.0;
for (std::size_t i = 0; i < terms_count; i++) for (std::size_t i = 0; i < terms_count; i++)
akk += gt[i]; akk += gt[i];
Ak[k] = akk; Ak[k] = akk;
} }
const nl_double * const RESTRICT go = terms->go(); const float_type * const RESTRICT go = terms->go();
int * RESTRICT net_other = terms->connected_net_idx(); int * RESTRICT net_other = terms->connected_net_idx();
for (std::size_t i = 0; i < railstart; i++) for (std::size_t i = 0; i < railstart; i++)
@ -258,21 +289,21 @@ void matrix_solver_t::build_LE_A()
} }
template <typename T> template <typename T>
void matrix_solver_t::build_LE_RHS() void matrix_solver_t::build_LE_RHS(T &child)
{ {
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t"); static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
T &child = static_cast<T &>(*this); typedef typename T::float_type float_type;
const std::size_t iN = child.N(); const std::size_t iN = child.N();
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
nl_double rhsk_a = 0.0; float_type rhsk_a = 0.0;
nl_double rhsk_b = 0.0; float_type rhsk_b = 0.0;
const std::size_t terms_count = m_terms[k]->count(); const std::size_t terms_count = m_terms[k]->count();
const nl_double * const RESTRICT go = m_terms[k]->go(); const float_type * const RESTRICT go = m_terms[k]->go();
const nl_double * const RESTRICT Idr = m_terms[k]->Idr(); const float_type * const RESTRICT Idr = m_terms[k]->Idr();
const nl_double * const * RESTRICT other_cur_analog = m_terms[k]->connected_net_V(); const float_type * const * RESTRICT other_cur_analog = m_terms[k]->connected_net_V();
for (std::size_t i = 0; i < terms_count; i++) for (std::size_t i = 0; i < terms_count; i++)
rhsk_a = rhsk_a + Idr[i]; rhsk_a = rhsk_a + Idr[i];

View File

@ -14,28 +14,21 @@
#include "nld_solver.h" #include "nld_solver.h"
#include "nld_matrix_solver.h" #include "nld_matrix_solver.h"
#include "vector_base.h" #include "vector_base.h"
#include "mat_cr.h"
/* Disabling dynamic allocation gives a ~10% boost in performance
* This flag has been added to support continuous storage for arrays
* going forward in case we implement cuda solvers in the future.
*/
#define NL_USE_DYNAMIC_ALLOCATION (1)
namespace netlist namespace netlist
{ {
namespace devices namespace devices
{ {
//#define nl_ext_double _float128 // slow, very slow
//#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double
template <typename FT, int SIZE>
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_direct_t: public matrix_solver_t class matrix_solver_direct_t: public matrix_solver_t
{ {
friend class matrix_solver_t; friend class matrix_solver_t;
public: public:
typedef FT float_type;
matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size); matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size);
matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name, const eSortType sort, const solver_parameters_t *params, const std::size_t size); matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name, const eSortType sort, const solver_parameters_t *params, const std::size_t size);
@ -48,36 +41,25 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override; virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson); unsigned solve_non_dynamic(const bool newton_raphson);
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; } constexpr std::size_t N() const { return (SIZE > 0) ? static_cast<std::size_t>(SIZE) : m_dim; }
void LE_solve(); void LE_solve();
template <typename T> template <typename T>
void LE_back_subst(T * RESTRICT x); void LE_back_subst(T & RESTRICT x);
#if (NL_USE_DYNAMIC_ALLOCATION) FT &A(std::size_t r, std::size_t c) { return m_A[r * m_pitch + c]; }
nl_ext_double &A(const std::size_t r, const std::size_t c) { return m_A[r * m_pitch + c]; } FT &RHS(std::size_t r) { return m_A[r * m_pitch + N()]; }
nl_ext_double &RHS(const std::size_t r) { return m_A[r * m_pitch + N()]; } plib::parray<FT, SIZE> m_last_RHS;
#else plib::parray<FT, SIZE> m_new_V;
nl_ext_double &A(const std::size_t r, const std::size_t c) { return m_A[r][c]; }
nl_ext_double &RHS(const std::size_t r) { return m_A[r][N()]; }
#endif
nl_double m_last_RHS[storage_N]; // right hand side - contains currents
private: private:
//static const std::size_t m_pitch = (((storage_N + 1) + 0) / 1) * 1; static constexpr const std::size_t SIZEABS = plib::parray<FT, SIZE>::SIZEABS();
static constexpr std::size_t m_pitch = (((storage_N + 1) + 7) / 8) * 8; static constexpr const std::size_t m_pitch_ABS = (((SIZEABS + 1) + 7) / 8) * 8;
//static const std::size_t m_pitch = (((storage_N + 1) + 15) / 16) * 16;
//static const std::size_t m_pitch = (((storage_N + 1) + 31) / 32) * 32;
#if (NL_USE_DYNAMIC_ALLOCATION)
//nl_ext_double * RESTRICT m_A;
std::vector<nl_ext_double> m_A;
#else
nl_ext_double m_A[storage_N][m_pitch];
#endif
//nl_ext_double m_RHSx[storage_N];
const std::size_t m_dim; const std::size_t m_dim;
const std::size_t m_pitch;
plib::parray<FT, SIZE * m_pitch_ABS> m_A;
}; };
@ -85,16 +67,13 @@ private:
// matrix_solver_direct // matrix_solver_direct
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t() matrix_solver_direct_t<FT, SIZE>::~matrix_solver_direct_t()
{ {
#if (NL_USE_DYNAMIC_ALLOCATION)
//plib::pfree_array(m_A);
#endif
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_direct_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_t::setup_base(nets); matrix_solver_t::setup_base(nets);
@ -107,34 +86,31 @@ void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
t->m_nzrd.push_back(static_cast<unsigned>(N())); t->m_nzrd.push_back(static_cast<unsigned>(N()));
} }
state().save(*this, m_last_RHS, "m_last_RHS"); state().save(*this, m_last_RHS.data(), "m_last_RHS", m_last_RHS.size());
for (std::size_t k = 0; k < N(); k++) for (std::size_t k = 0; k < N(); k++)
state().save(*this, RHS(k), plib::pfmt("RHS.{1}")(k)); state().save(*this, RHS(k), plib::pfmt("RHS.{1}")(k));
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
void matrix_solver_direct_t<m_N, storage_N>::LE_solve() void matrix_solver_direct_t<FT, SIZE>::LE_solve()
{ {
const std::size_t kN = N(); const std::size_t kN = N();
if (!m_params.m_pivot) if (!m_params.m_pivot)
{ {
for (std::size_t i = 0; i < kN; i++) for (std::size_t i = 0; i < kN; i++)
{ {
/* FIXME: Singular matrix? */ /* FIXME: Singular matrix? */
nl_double *Ai = &A(i, 0); const FT f = 1.0 / A(i,i);
const nl_double f = 1.0 / A(i,i);
const auto &nzrd = m_terms[i]->m_nzrd; const auto &nzrd = m_terms[i]->m_nzrd;
const auto &nzbd = m_terms[i]->m_nzbd; const auto &nzbd = m_terms[i]->m_nzbd;
for (std::size_t j : nzbd) for (std::size_t j : nzbd)
{ {
nl_double *Aj = &A(j, 0); const FT f1 = -f * A(j, i);
const nl_double f1 = -f * Aj[i];
for (std::size_t k : nzrd) for (std::size_t k : nzrd)
Aj[k] += Ai[k] * f1; A(j, k) += A(i, k) * f1;
//RHS(j) += RHS(i) * f1; //RHS(j) += RHS(i) * f1;
} }
} }
@ -161,17 +137,17 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
//std::swap(RHS(i), RHS(maxrow)); //std::swap(RHS(i), RHS(maxrow));
} }
/* FIXME: Singular matrix? */ /* FIXME: Singular matrix? */
const nl_double f = 1.0 / A(i,i); const FT f = 1.0 / A(i,i);
/* Eliminate column i from row j */ /* Eliminate column i from row j */
for (std::size_t j = i + 1; j < kN; j++) for (std::size_t j = i + 1; j < kN; j++)
{ {
const nl_double f1 = - A(j,i) * f; const FT f1 = - A(j,i) * f;
if (f1 != NL_FCONST(0.0)) if (f1 != NL_FCONST(0.0))
{ {
const nl_double * RESTRICT pi = &A(i,i+1); const FT * RESTRICT pi = &A(i,i+1);
nl_double * RESTRICT pj = &A(j,i+1); FT * RESTRICT pj = &A(j,i+1);
#if 1 #if 1
vec_add_mult_scalar_p(kN-i,pi,f1,pj); vec_add_mult_scalar_p(kN-i,pi,f1,pj);
#else #else
@ -188,10 +164,10 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
} }
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
template <typename T> template <typename T>
void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst( void matrix_solver_direct_t<FT, SIZE>::LE_back_subst(
T * RESTRICT x) T & RESTRICT x)
{ {
const std::size_t kN = N(); const std::size_t kN = N();
@ -200,7 +176,7 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
{ {
for (std::size_t j = kN; j-- > 0; ) for (std::size_t j = kN; j-- > 0; )
{ {
T tmp = 0; FT tmp = 0;
for (std::size_t k = j+1; k < kN; k++) for (std::size_t k = j+1; k < kN; k++)
tmp += A(j,k) * x[k]; tmp += A(j,k) * x[k];
x[j] = (RHS(j) - tmp) / A(j,j); x[j] = (RHS(j) - tmp) / A(j,j);
@ -210,40 +186,33 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
{ {
for (std::size_t j = kN; j-- > 0; ) for (std::size_t j = kN; j-- > 0; )
{ {
T tmp = 0; FT tmp = 0;
const auto &nzrd = m_terms[j]->m_nzrd;
const auto *p = m_terms[j]->m_nzrd.data(); const auto e = nzrd.size() - 1; /* exclude RHS element */
const auto e = m_terms[j]->m_nzrd.size() - 1; /* exclude RHS element */ for ( std::size_t k = 0; k < e; k++)
T * Aj = &A(j,0); tmp += A(j, nzrd[k]) * x[nzrd[k]];
for (std::size_t k = 0; k < e; k++)
{
const auto pk = p[k];
tmp += Aj[pk] * x[pk];
}
x[j] = (RHS(j) - tmp) / A(j,j); x[j] = (RHS(j) - tmp) / A(j,j);
} }
} }
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_direct_t<FT, SIZE>::solve_non_dynamic(const bool newton_raphson)
{ {
nl_double new_V[storage_N]; // = { 0.0 };
this->LE_solve(); this->LE_solve();
this->LE_back_subst(new_V); this->LE_back_subst(m_new_V);
const nl_double err = (newton_raphson ? delta(new_V) : 0.0); const FT err = (newton_raphson ? delta(m_new_V) : 0.0);
store(new_V); store(m_new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1; return (err > this->m_params.m_accuracy) ? 2 : 1;
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_direct_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{ {
build_LE_A<matrix_solver_direct_t>(); this->build_LE_A(*this);
build_LE_RHS<matrix_solver_direct_t>(); this->build_LE_RHS(*this);
for (std::size_t i=0, iN=N(); i < iN; i++) for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i); m_last_RHS[i] = RHS(i);
@ -252,33 +221,33 @@ unsigned matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(const bool n
return this->solve_non_dynamic(newton_raphson); return this->solve_non_dynamic(newton_raphson);
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name, matrix_solver_direct_t<FT, SIZE>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name,
const solver_parameters_t *params, const std::size_t size) const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, ASCENDING, params) : matrix_solver_t(anetlist, name, ASCENDING, params)
, m_last_RHS(size)
, m_new_V(size)
, m_dim(size) , m_dim(size)
, m_pitch(m_pitch_ABS ? m_pitch_ABS : (((m_dim + 1) + 7) / 8) * 8)
, m_A(size * m_pitch)
{ {
#if (NL_USE_DYNAMIC_ALLOCATION)
m_A.resize(N() * m_pitch);
//m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
#endif
for (unsigned k = 0; k < N(); k++) for (unsigned k = 0; k < N(); k++)
{ {
m_last_RHS[k] = 0.0; m_last_RHS[k] = 0.0;
} }
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name, matrix_solver_direct_t<FT, SIZE>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name,
const eSortType sort, const solver_parameters_t *params, const std::size_t size) const eSortType sort, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, sort, params) : matrix_solver_t(anetlist, name, sort, params)
, m_last_RHS(size)
, m_new_V(size)
, m_dim(size) , m_dim(size)
, m_pitch(m_pitch_ABS ? m_pitch_ABS : (((m_dim + 1) + 7) / 8) * 8)
, m_A(size * m_pitch)
{ {
#if (NL_USE_DYNAMIC_ALLOCATION) for (std::size_t k = 0; k < N(); k++)
m_A.resize(N() * m_pitch);
//m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
#endif
for (unsigned k = 0; k < N(); k++)
{ {
m_last_RHS[k] = 0.0; m_last_RHS[k] = 0.0;
} }

View File

@ -13,37 +13,41 @@
namespace netlist namespace netlist
{ {
namespace devices namespace devices
{
template <typename FT>
class matrix_solver_direct1_t: public matrix_solver_direct_t<FT, 1>
{ {
class matrix_solver_direct1_t: public matrix_solver_direct_t<1,1> public:
{
public:
matrix_solver_direct1_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params) typedef FT float_type;
: matrix_solver_direct_t<1, 1>(anetlist, name, params, 1) typedef matrix_solver_direct_t<FT, 1> base_type;
{}
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
}; matrix_solver_direct1_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params)
: matrix_solver_direct_t<FT, 1>(anetlist, name, params, 1)
{}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// matrix_solver - Direct1 // matrix_solver - Direct1
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override
{
this->build_LE_A(*this);
this->build_LE_RHS(*this);
//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);
inline unsigned matrix_solver_direct1_t::vsolve_non_dynamic(const bool newton_raphson) FT new_V[1] = { this->RHS(0) / this->A(0,0) };
{
build_LE_A<matrix_solver_direct1_t>();
build_LE_RHS<matrix_solver_direct1_t>();
//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);
nl_double new_V[1] = { RHS(0) / A(0,0) }; const FT err = (newton_raphson ? this->delta(new_V) : 0.0);
this->store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
const nl_double err = (newton_raphson ? delta(new_V) : 0.0); };
store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
} //namespace devices
} //namespace devices
} // namespace netlist } // namespace netlist

View File

@ -13,43 +13,48 @@
namespace netlist namespace netlist
{ {
namespace devices namespace devices
{
template <typename FT>
class matrix_solver_direct2_t: public matrix_solver_direct_t<FT, 2>
{ {
class matrix_solver_direct2_t: public matrix_solver_direct_t<2,2> public:
{
public:
matrix_solver_direct2_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params) typedef FT float_type;
: matrix_solver_direct_t<2, 2>(anetlist, name, params, 2)
{}
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
}; matrix_solver_direct2_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params)
: matrix_solver_direct_t<double, 2>(anetlist, name, params, 2)
{}
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
// ---------------------------------------------------------------------------------------- };
// matrix_solver - Direct2
// ----------------------------------------------------------------------------------------
inline unsigned matrix_solver_direct2_t::vsolve_non_dynamic(const bool newton_raphson) // ----------------------------------------------------------------------------------------
{ // matrix_solver - Direct2
build_LE_A<matrix_solver_direct2_t>(); // ----------------------------------------------------------------------------------------
build_LE_RHS<matrix_solver_direct2_t>();
const nl_double a = A(0,0); template <typename FT>
const nl_double b = A(0,1); inline unsigned matrix_solver_direct2_t<FT>::vsolve_non_dynamic(const bool newton_raphson)
const nl_double c = A(1,0); {
const nl_double d = A(1,1); this->build_LE_A(*this);
this->build_LE_RHS(*this);
nl_double new_V[2]; const float_type a = this->A(0,0);
new_V[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c); const float_type b = this->A(0,1);
new_V[0] = (RHS(0) - b * new_V[1]) / a; const float_type c = this->A(1,0);
const float_type d = this->A(1,1);
const nl_double err = (newton_raphson ? delta(new_V) : 0.0); float_type new_V[2];
store(new_V); new_V[1] = (a * this->RHS(1) - c * this->RHS(0)) / (a * d - b * c);
return (err > this->m_params.m_accuracy) ? 2 : 1; new_V[0] = (this->RHS(0) - b * new_V[1]) / a;
}
} //namespace devices const float_type err = (newton_raphson ? this->delta(new_V) : 0.0);
this->store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1;
}
} //namespace devices
} // namespace netlist } // namespace netlist
#endif /* NLD_MS_DIRECT2_H_ */ #endif /* NLD_MS_DIRECT2_H_ */

View File

@ -21,17 +21,23 @@
namespace netlist namespace netlist
{ {
namespace devices namespace devices
{ {
template <std::size_t m_N, std::size_t storage_N>
template <typename FT, int SIZE>
class matrix_solver_GCR_t: public matrix_solver_t class matrix_solver_GCR_t: public matrix_solver_t
{ {
public: public:
// FIXME: dirty hack to make this compile
static constexpr const std::size_t storage_N = 100;
matrix_solver_GCR_t(netlist_base_t &anetlist, const pstring &name, matrix_solver_GCR_t(netlist_base_t &anetlist, const pstring &name,
const solver_parameters_t *params, const std::size_t size) const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, matrix_solver_t::ASCENDING, params) : matrix_solver_t(anetlist, name, matrix_solver_t::ASCENDING, params)
, m_dim(size) , m_dim(size)
, RHS(size)
, new_V(size)
, mat(size) , mat(size)
, m_proc() , m_proc()
{ {
@ -41,7 +47,7 @@ public:
{ {
} }
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; } constexpr std::size_t N() const { return m_dim; }
virtual void vsetup(analog_net_t::list_t &nets) override; virtual void vsetup(analog_net_t::list_t &nets) override;
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override; virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
@ -51,7 +57,7 @@ public:
private: private:
//typedef typename mat_cr_t<storage_N>::type mattype; //typedef typename mat_cr_t<storage_N>::type mattype;
typedef typename mat_cr_t<storage_N>::index_type mattype; typedef typename plib::mat_cr_t<FT, SIZE>::index_type mat_index_type;
void csc_private(plib::putf8_fmt_writer &strm); void csc_private(plib::putf8_fmt_writer &strm);
@ -60,8 +66,12 @@ private:
pstring static_compile_name(); pstring static_compile_name();
const std::size_t m_dim; const std::size_t m_dim;
std::vector<unsigned> m_term_cr[storage_N]; plib::parray<FT, SIZE> RHS;
mat_cr_t<storage_N> mat; plib::parray<FT, SIZE> new_V;
std::vector<FT *> m_term_cr[storage_N];
plib::mat_cr_t<FT, SIZE> mat;
//extsolver m_proc; //extsolver m_proc;
plib::dynproc<void, double * RESTRICT, double * RESTRICT, double * RESTRICT> m_proc; plib::dynproc<void, double * RESTRICT, double * RESTRICT, double * RESTRICT> m_proc;
@ -72,166 +82,89 @@ private:
// matrix_solver - GCR // matrix_solver - GCR
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_GCR_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{ {
setup_base(nets); setup_base(nets);
mattype nz = 0;
const std::size_t iN = this->N(); const std::size_t iN = this->N();
/* build the final matrix */ /* build the final matrix */
bool touched[storage_N][storage_N] = { { false } }; std::vector<std::vector<unsigned>> fill(iN);
std::size_t raw_elements = 0;
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
fill[k].resize(iN, decltype(mat)::FILL_INFINITY);
for (auto &j : this->m_terms[k]->m_nz) for (auto &j : this->m_terms[k]->m_nz)
touched[k][j] = true; {
fill[k][j] = 0;
raw_elements++;
}
} }
unsigned fc = 0; auto gr = mat.gaussian_extend_fill_mat(fill);
unsigned ops = 0;
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
ops++; // 1/A(k,k) unsigned fm = 0;
for (std::size_t row = k + 1; row < iN; row++) pstring ml = "";
for (std::size_t j = 0; j < iN; j++)
{ {
if (touched[row][k]) ml += fill[k][j] < decltype(mat)::FILL_INFINITY ? "X" : "_";
{ if (fill[k][j] < decltype(mat)::FILL_INFINITY)
ops++; if (fill[k][j] > fm)
fc++; fm = fill[k][j];
for (std::size_t col = k + 1; col < iN; col++)
if (touched[k][col])
{
touched[row][col] = true;
ops += 2;
}
}
} }
this->log().verbose("{1:4} {2} {3:4}", k, ml, fm);
} }
for (mattype k=0; k<iN; k++) mat.build_from_fill_mat(fill);
for (mat_index_type k=0; k<iN; k++)
{ {
mat.ia[k] = nz;
for (mattype j=0; j<iN; j++)
{
if (touched[k][j])
{
mat.ja[nz] = j;
if (j == k)
mat.diag[k] = nz;
nz++;
}
}
m_term_cr[k].clear(); m_term_cr[k].clear();
/* build pointers into the compressed row format matrix for each terminal */ /* build pointers into the compressed row format matrix for each terminal */
for (std::size_t j=0; j< this->m_terms[k]->m_railstart;j++) for (std::size_t j=0; j< this->m_terms[k]->m_railstart;j++)
{ {
int other = this->m_terms[k]->connected_net_idx()[j]; int other = this->m_terms[k]->connected_net_idx()[j];
for (auto i = mat.ia[k]; i < nz; i++) for (auto i = mat.row_idx[k]; i < mat.row_idx[k+1]; i++)
if (other == static_cast<int>(mat.ja[i])) if (other == static_cast<int>(mat.col_idx[i]))
{ {
m_term_cr[k].push_back(i); m_term_cr[k].push_back(&mat.A[i]);
break; break;
} }
} }
nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart); nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart);
m_term_cr[k].push_back(&mat.A[mat.diag[k]]);
} }
mat.ia[iN] = nz; this->log().verbose("maximum fill: {1}", gr.first);
mat.nz_num = nz; this->log().verbose("Post elimination occupancy ratio: {2} Ops: {1}", gr.second,
static_cast<double>(mat.nz_num) / static_cast<double>(iN * iN));
this->log().verbose("Ops: {1} Occupancy ratio: {2}\n", ops, this->log().verbose(" Pre elimination occupancy ratio: {2}",
static_cast<double>(nz) / static_cast<double>(iN * iN)); static_cast<double>(raw_elements) / static_cast<double>(iN * iN));
// FIXME: Move me // FIXME: Move me
if (state().lib().isLoaded()) if (state().lib().isLoaded())
{ {
pstring symname = static_compile_name(); pstring symname = static_compile_name();
#if 0
m_proc = this->netlist().lib().template getsym<extsolver>(symname);
if (m_proc != nullptr)
this->log().verbose("External static solver {1} found ...", symname);
else
this->log().warning("External static solver {1} not found ...", symname);
#else
m_proc.load(this->state().lib(), symname); m_proc.load(this->state().lib(), symname);
if (m_proc.resolved()) if (m_proc.resolved())
this->log().warning("External static solver {1} found ...", symname); this->log().warning("External static solver {1} found ...", symname);
else else
this->log().warning("External static solver {1} not found ...", symname); this->log().warning("External static solver {1} not found ...", symname);
#endif
} }
} }
#if 0
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &strm)
{
const std::size_t iN = N();
for (std::size_t i = 0; i < iN - 1; i++)
{
const auto &nzbd = this->m_terms[i]->m_nzbd;
if (nzbd.size() > 0) template <typename FT, int SIZE>
{ void matrix_solver_GCR_t<FT, SIZE>::csc_private(plib::putf8_fmt_writer &strm)
std::size_t pi = mat.diag[i];
//const nl_double f = 1.0 / m_A[pi++];
strm("const double f{1} = 1.0 / m_A[{2}];\n", i, pi);
pi++;
const std::size_t piie = mat.ia[i+1];
//for (auto & j : nzbd)
for (std::size_t j : nzbd)
{
// proceed to column i
std::size_t pj = mat.ia[j];
while (mat.ja[pj] < i)
pj++;
//const nl_double f1 = - m_A[pj++] * f;
strm("\tconst double f{1}_{2} = -f{3} * m_A[{4}];\n", i, j, i, pj);
pj++;
// subtract row i from j */
for (std::size_t pii = pi; pii<piie; )
{
while (mat.ja[pj] < mat.ja[pii])
pj++;
//m_A[pj++] += m_A[pii++] * f1;
strm("\tm_A[{1}] += m_A[{2}] * f{3}_{4};\n", pj, pii, i, j);
pj++; pii++;
}
//RHS[j] += f1 * RHS[i];
strm("\tRHS[{1}] += f{2}_{3} * RHS[{4}];\n", j, i, j, i);
}
}
}
//new_V[iN - 1] = RHS[iN - 1] / mat.A[mat.diag[iN - 1]];
strm("\tV[{1}] = RHS[{2}] / m_A[{3}];\n", iN - 1, iN - 1, mat.diag[iN - 1]);
for (std::size_t j = iN - 1; j-- > 0;)
{
strm("\tdouble tmp{1} = 0.0;\n", j);
const std::size_t e = mat.ia[j+1];
for (std::size_t pk = mat.diag[j] + 1; pk < e; pk++)
{
strm("\ttmp{1} += m_A[{2}] * V[{3}];\n", j, pk, mat.ja[pk]);
}
strm("\tV[{1}] = (RHS[{1}] - tmp{1}) / m_A[{4}];\n", j, j, j, mat.diag[j]);
}
}
#else
template <std::size_t m_N, std::size_t storage_N>
void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &strm)
{ {
const std::size_t iN = N(); const std::size_t iN = N();
@ -246,28 +179,28 @@ void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &st
{ {
std::size_t pi = mat.diag[i]; std::size_t pi = mat.diag[i];
//const nl_double f = 1.0 / m_A[pi++]; //const FT f = 1.0 / m_A[pi++];
strm("const double f{1} = 1.0 / m_A{2};\n", i, pi); strm("const double f{1} = 1.0 / m_A{2};\n", i, pi);
pi++; pi++;
const std::size_t piie = mat.ia[i+1]; const std::size_t piie = mat.row_idx[i+1];
//for (auto & j : nzbd) //for (auto & j : nzbd)
for (std::size_t j : nzbd) for (std::size_t j : nzbd)
{ {
// proceed to column i // proceed to column i
std::size_t pj = mat.ia[j]; std::size_t pj = mat.row_idx[j];
while (mat.ja[pj] < i) while (mat.col_idx[pj] < i)
pj++; pj++;
//const nl_double f1 = - m_A[pj++] * f; //const FT f1 = - m_A[pj++] * f;
strm("\tconst double f{1}_{2} = -f{3} * m_A{4};\n", i, j, i, pj); strm("\tconst double f{1}_{2} = -f{3} * m_A{4};\n", i, j, i, pj);
pj++; pj++;
// subtract row i from j */ // subtract row i from j */
for (std::size_t pii = pi; pii<piie; ) for (std::size_t pii = pi; pii<piie; )
{ {
while (mat.ja[pj] < mat.ja[pii]) while (mat.col_idx[pj] < mat.col_idx[pii])
pj++; pj++;
//m_A[pj++] += m_A[pii++] * f1; //m_A[pj++] += m_A[pii++] * f1;
strm("\tm_A{1} += m_A{2} * f{3}_{4};\n", pj, pii, i, j); strm("\tm_A{1} += m_A{2} * f{3}_{4};\n", pj, pii, i, j);
@ -284,18 +217,17 @@ void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &st
for (std::size_t j = iN - 1; j-- > 0;) for (std::size_t j = iN - 1; j-- > 0;)
{ {
strm("\tdouble tmp{1} = 0.0;\n", j); strm("\tdouble tmp{1} = 0.0;\n", j);
const std::size_t e = mat.ia[j+1]; const std::size_t e = mat.row_idx[j+1];
for (std::size_t pk = mat.diag[j] + 1; pk < e; pk++) for (std::size_t pk = mat.diag[j] + 1; pk < e; pk++)
{ {
strm("\ttmp{1} += m_A{2} * V[{3}];\n", j, pk, mat.ja[pk]); strm("\ttmp{1} += m_A{2} * V[{3}];\n", j, pk, mat.col_idx[pk]);
} }
strm("\tV[{1}] = (RHS[{1}] - tmp{1}) / m_A{4};\n", j, j, j, mat.diag[j]); strm("\tV[{1}] = (RHS[{1}] - tmp{1}) / m_A{4};\n", j, j, j, mat.diag[j]);
} }
} }
#endif
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name() pstring matrix_solver_GCR_t<FT, SIZE>::static_compile_name()
{ {
plib::postringstream t; plib::postringstream t;
plib::putf8_fmt_writer w(&t); plib::putf8_fmt_writer w(&t);
@ -305,8 +237,8 @@ pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name()
return plib::pfmt("nl_gcr_{1:x}_{2}")(h( t.str() ))(mat.nz_num); return plib::pfmt("nl_gcr_{1:x}_{2}")(h( t.str() ))(mat.nz_num);
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_code() std::pair<pstring, pstring> matrix_solver_GCR_t<FT, SIZE>::create_solver_code()
{ {
plib::postringstream t; plib::postringstream t;
plib::putf8_fmt_writer strm(&t); plib::putf8_fmt_writer strm(&t);
@ -320,69 +252,16 @@ std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_c
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_GCR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{ {
const std::size_t iN = this->N(); const std::size_t iN = this->N();
nl_double RHS[storage_N];
nl_double new_V[storage_N];
mat.set_scalar(0.0); mat.set_scalar(0.0);
/* populate matrix */
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ this->m_terms[k]->fill_matrix(m_term_cr[k], RHS[k]);
terms_for_net_t *t = this->m_terms[k].get();
nl_double gtot_t = 0.0;
nl_double RHS_t = 0.0;
const std::size_t term_count = t->count();
const std::size_t railstart = t->m_railstart;
const nl_double * const RESTRICT gt = t->gt();
const nl_double * const RESTRICT go = t->go();
const nl_double * const RESTRICT Idr = t->Idr();
const nl_double * const * RESTRICT other_cur_analog = t->connected_net_V();
const unsigned * const RESTRICT tcr = m_term_cr[k].data();
#if 0
for (std::size_t i = 0; i < term_count; i++)
{
gtot_t += gt[i];
RHS_t += Idr[i];
}
for (std::size_t i = railstart; i < term_count; i++)
RHS_t += go[i] * *other_cur_analog[i];
RHS[k] = RHS_t;
// add diagonal element
mat.A[mat.diag[k]] = gtot_t;
for (std::size_t i = 0; i < railstart; i++)
mat.A[tcr[i]] -= go[i];
}
#else
for (std::size_t i = 0; i < railstart; i++)
mat.A[tcr[i]] -= go[i];
for (std::size_t i = 0; i < railstart; i++)
{
gtot_t += gt[i];
RHS_t += Idr[i];
}
for (std::size_t i = railstart; i < term_count; i++)
{
RHS_t += (Idr[i] + go[i] * *other_cur_analog[i]);
gtot_t += gt[i];
}
RHS[k] = RHS_t;
mat.A[mat.diag[k]] += gtot_t;
}
#endif
mat.ia[iN] = static_cast<mattype>(mat.nz_num);
/* now solve it */ /* now solve it */
@ -394,63 +273,14 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
} }
else else
{ {
for (std::size_t i = 0; i < iN - 1; i++) mat.gaussian_elimination(RHS);
{ /* backward substitution */
const auto &nzbd = this->m_terms[i]->m_nzbd; mat.gaussian_back_substitution(new_V, RHS);
if (nzbd.size() > 0)
{
std::size_t pi = mat.diag[i];
const nl_double f = 1.0 / mat.A[pi++];
const std::size_t piie = mat.ia[i+1];
for (std::size_t j : nzbd) // for (std::size_t j = i + 1; j < iN; j++)
{
// proceed to column i
//__builtin_prefetch(&m_A[mat.diag[j+1]], 1);
std::size_t pj = mat.ia[j];
while (mat.ja[pj] < i)
pj++;
const nl_double f1 = - mat.A[pj++] * f;
// subtract row i from j */
for (std::size_t pii = pi; pii<piie; )
{
while (mat.ja[pj] < mat.ja[pii])
pj++;
mat.A[pj++] += mat.A[pii++] * f1;
}
RHS[j] += f1 * RHS[i];
}
}
}
/* backward substitution
*
*/
/* row n-1 */
new_V[iN - 1] = RHS[iN - 1] / mat.A[mat.diag[iN - 1]];
for (std::size_t j = iN - 1; j-- > 0;)
{
//__builtin_prefetch(&new_V[j-1], 1);
//if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0);
double tmp = 0;
auto jdiag = mat.diag[j];
const std::size_t e = mat.ia[j+1];
for (std::size_t pk = jdiag + 1; pk < e; pk++)
{
tmp += mat.A[pk] * new_V[mat.ja[pk]];
}
new_V[j] = (RHS[j] - tmp) / mat.A[jdiag];
}
} }
this->m_stat_calculations++; this->m_stat_calculations++;
const nl_double err = (newton_raphson ? delta(new_V) : 0.0); const FT err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V); store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1; return (err > this->m_params.m_accuracy) ? 2 : 1;
} }

View File

@ -15,6 +15,7 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include "../plib/parray.h"
#include "mat_cr.h" #include "mat_cr.h"
#include "nld_ms_direct.h" #include "nld_ms_direct.h"
#include "nld_solver.h" #include "nld_solver.h"
@ -24,17 +25,34 @@ namespace netlist
{ {
namespace devices namespace devices
{ {
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_GMRES_t: public matrix_solver_direct_t<m_N, storage_N> template <typename FT, int SIZE>
class matrix_solver_GMRES_t: public matrix_solver_direct_t<FT, SIZE>
{ {
public: public:
typedef FT float_type;
// FIXME: dirty hack to make this compile
static constexpr const std::size_t storage_N = plib::sizeabs<FT, SIZE>::ABS();
// Maximum iterations before a restart ...
static constexpr const std::size_t restart_N = (storage_N > 0 ? 20 : 0);
/* Sort rows in ascending order. This should minimize fill-in and thus
* maximize the efficiency of the incomplete LUT.
*/
matrix_solver_GMRES_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size) matrix_solver_GMRES_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size) : matrix_solver_direct_t<FT, SIZE>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
, m_use_iLU_preconditioning(true) , m_use_iLU_preconditioning(true)
, m_use_more_precise_stop_condition(false) , m_use_more_precise_stop_condition(true)
, m_ILU_scale(0)
, m_accuracy_mult(1.0) , m_accuracy_mult(1.0)
, m_term_cr(size)
, mat(size) , mat(size)
, residual(size)
, Ax(size)
, m_LU(size)
//, m_v(size)
{ {
} }
@ -48,26 +66,32 @@ public:
private: private:
//typedef typename mat_cr_t<storage_N>::type mattype; //typedef typename mat_cr_t<storage_N>::type mattype;
typedef typename mat_cr_t<storage_N>::index_type mattype; typedef typename plib::mat_cr_t<FT, SIZE>::index_type mattype;
unsigned solve_ilu_gmres(nl_double (& RESTRICT x)[storage_N], const nl_double (& RESTRICT rhs)[storage_N], const unsigned restart_max, std::size_t mr, nl_double accuracy); template <typename VT, typename VRHS>
std::size_t solve_ilu_gmres(VT &x, const VRHS & rhs, const std::size_t restart_max, std::size_t mr, float_type accuracy);
std::vector<unsigned> m_term_cr[storage_N];
bool m_use_iLU_preconditioning; bool m_use_iLU_preconditioning;
bool m_use_more_precise_stop_condition; bool m_use_more_precise_stop_condition;
nl_double m_accuracy_mult; // FXIME: Save state std::size_t m_ILU_scale;
float_type m_accuracy_mult; // FXIME: Save state
mat_cr_t<storage_N> mat; plib::parray<std::vector<FT *>, SIZE> m_term_cr;
plib::mat_cr_t<float_type, SIZE> mat;
plib::parray<float_type, SIZE> residual;
plib::parray<float_type, SIZE> Ax;
nl_double m_LU[storage_N * storage_N]; plib::mat_cr_t<float_type, SIZE> m_LU;
nl_double m_c[storage_N + 1]; /* mr + 1 */ float_type m_c[restart_N + 1]; /* mr + 1 */
nl_double m_g[storage_N + 1]; /* mr + 1 */ float_type m_g[restart_N + 1]; /* mr + 1 */
nl_double m_ht[storage_N + 1][storage_N]; /* (mr + 1), mr */ float_type m_ht[restart_N + 1][restart_N]; /* (mr + 1), mr */
nl_double m_s[storage_N + 1]; /* mr + 1 */ float_type m_s[restart_N + 1]; /* mr + 1 */
nl_double m_v[storage_N + 1][storage_N]; /*(mr + 1), n */ float_type m_y[restart_N + 1]; /* mr + 1 */
nl_double m_y[storage_N + 1]; /* mr + 1 */
//plib::parray<float_type, SIZE> m_v[restart_N + 1]; /* mr + 1, n */
float_type m_v[restart_N + 1][storage_N]; /* mr + 1, n */
}; };
@ -75,106 +99,72 @@ private:
// matrix_solver - GMRES // matrix_solver - GMRES
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
void matrix_solver_GMRES_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_GMRES_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets); matrix_solver_direct_t<FT, SIZE>::vsetup(nets);
mattype nz = 0;
const std::size_t iN = this->N(); const std::size_t iN = this->N();
std::vector<std::vector<unsigned>> fill(iN);
for (std::size_t k=0; k<iN; k++) for (std::size_t k=0; k<iN; k++)
{ {
fill[k].resize(iN, decltype(mat)::FILL_INFINITY);
terms_for_net_t * RESTRICT row = this->m_terms[k].get(); terms_for_net_t * RESTRICT row = this->m_terms[k].get();
mat.ia[k] = nz;
for (std::size_t j=0; j<row->m_nz.size(); j++) for (std::size_t j=0; j<row->m_nz.size(); j++)
{ {
mat.ja[nz] = static_cast<mattype>(row->m_nz[j]); fill[k][static_cast<mattype>(row->m_nz[j])] = 0;
if (row->m_nz[j] == k)
mat.diag[k] = nz;
nz++;
} }
}
/* build pointers into the compressed row format matrix for each terminal */ mat.build_from_fill_mat(fill, 0);
m_LU.gaussian_extend_fill_mat(fill);
m_LU.build_from_fill_mat(fill, m_ILU_scale); // ILU(2)
//m_LU.build_from_fill_mat(fill, 9999, 20); // Band matrix width 20
for (unsigned j=0; j< this->m_terms[k]->m_railstart;j++) /* build pointers into the compressed row format matrix for each terminal */
for (std::size_t k=0; k<iN; k++)
{
for (std::size_t j=0; j< this->m_terms[k]->m_railstart;j++)
{ {
for (unsigned i = mat.ia[k]; i<nz; i++) for (std::size_t i = mat.row_idx[k]; i<mat.row_idx[k+1]; i++)
if (this->m_terms[k]->connected_net_idx()[j] == static_cast<int>(mat.ja[i])) if (this->m_terms[k]->connected_net_idx()[j] == static_cast<int>(mat.col_idx[i]))
{ {
m_term_cr[k].push_back(i); m_term_cr[k].push_back(&mat.A[i]);
break; break;
} }
} }
nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart); nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart);
m_term_cr[k].push_back(&mat.A[mat.diag[k]]);
} }
mat.ia[iN] = nz;
mat.nz_num = nz;
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_GMRES_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{ {
const std::size_t iN = this->N(); const std::size_t iN = this->N();
/* ideally, we could get an estimate for the spectral radius of plib::parray<FT, SIZE> RHS(iN);
* Inv(D - L) * U //float_type new_V[storage_N];
*
* and estimate using
*
* omega = 2.0 / (1.0 + std::sqrt(1-rho))
*/
//nz_num = 0;
nl_double RHS[storage_N];
nl_double new_V[storage_N];
mat.set_scalar(0.0); mat.set_scalar(0.0);
/* populate matrix and V for first estimate */
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
nl_double gtot_t = 0.0; this->m_terms[k]->fill_matrix(m_term_cr[k], RHS[k]);
nl_double RHS_t = 0.0; this->m_new_V[k] = this->m_nets[k]->Q_Analog();
const std::size_t term_count = this->m_terms[k]->count();
const std::size_t railstart = this->m_terms[k]->m_railstart;
const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
const nl_double * const RESTRICT go = this->m_terms[k]->go();
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
const nl_double * const * RESTRICT other_cur_analog = this->m_terms[k]->connected_net_V();
for (std::size_t i = 0; i < term_count; i++)
{
gtot_t = gtot_t + gt[i];
RHS_t = RHS_t + Idr[i];
}
for (std::size_t i = railstart; i < term_count; i++)
RHS_t = RHS_t + go[i] * *other_cur_analog[i];
RHS[k] = RHS_t;
// add diagonal element
mat.A[mat.diag[k]] = gtot_t;
for (std::size_t i = 0; i < railstart; i++)
{
const std::size_t pi = m_term_cr[k][i];
mat.A[pi] -= go[i];
}
new_V[k] = this->m_nets[k]->Q_Analog();
} }
mat.ia[iN] = static_cast<mattype>(mat.nz_num);
const nl_double accuracy = this->m_params.m_accuracy; //mat.row_idx[iN] = static_cast<mattype>(mat.nz_num);
const float_type accuracy = this->m_params.m_accuracy;
const std::size_t mr = (iN > 3 ) ? static_cast<std::size_t>(std::sqrt(iN) * 2.0) : iN; const std::size_t mr = (iN > 3 ) ? static_cast<std::size_t>(std::sqrt(iN) * 2.0) : iN;
unsigned iter = std::max(1u, this->m_params.m_gs_loops); std::size_t iter = std::max(1u, this->m_params.m_gs_loops);
unsigned gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy); std::size_t gsl = solve_ilu_gmres(this->m_new_V, RHS, iter, mr, accuracy);
const std::size_t failed = mr * iter; const std::size_t failed = mr * iter;
this->m_iterative_total += gsl; this->m_iterative_total += gsl;
@ -183,26 +173,29 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
if (gsl >= failed) if (gsl >= failed)
{ {
this->m_iterative_fail++; this->m_iterative_fail++;
return matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(newton_raphson); return matrix_solver_direct_t<FT, SIZE>::vsolve_non_dynamic(newton_raphson);
} }
const nl_double err = (newton_raphson ? this->delta(new_V) : 0.0); //if (newton_raphson)
this->store(new_V); // printf("%e %e\n", this->delta(this->m_new_V), this->m_params.m_accuracy);
const float_type err = (newton_raphson ? this->delta(this->m_new_V) : 0.0);
this->store(this->m_new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1; return (err > this->m_params.m_accuracy) ? 2 : 1;
} }
template <typename T> template <typename T>
inline void givens_mult( const T c, const T s, T & g0, T & g1 ) inline void givens_mult( const T c, const T s, T & g0, T & g1 )
{ {
const T tg0 = c * g0 - s * g1; const T g0_last(g0);
const T tg1 = s * g0 + c * g1;
g0 = tg0; g0 = c * g0 - s * g1;
g1 = tg1; g1 = s * g0_last + c * g1;
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RESTRICT x)[storage_N], const nl_double (& RESTRICT rhs)[storage_N], const unsigned restart_max, std::size_t mr, nl_double accuracy) template <typename VT, typename VRHS>
std::size_t matrix_solver_GMRES_t<FT, SIZE>::solve_ilu_gmres (VT &x, const VRHS &rhs, const std::size_t restart_max, std::size_t mr, float_type accuracy)
{ {
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* The code below was inspired by code published by John Burkardt under * The code below was inspired by code published by John Burkardt under
@ -226,15 +219,22 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
* *
*------------------------------------------------------------------------*/ *------------------------------------------------------------------------*/
unsigned itr_used = 0; std::size_t itr_used = 0;
double rho_delta = 0.0; double rho_delta = 0.0;
const std::size_t n = this->N(); const std::size_t n = this->N();
if (mr > restart_N) mr = restart_N;
if (mr > n) mr = n; if (mr > n) mr = n;
if (m_use_iLU_preconditioning) if (m_use_iLU_preconditioning)
mat.incomplete_LU_factorization(m_LU); {
if (m_ILU_scale < 1)
m_LU.raw_copy_from(mat);
else
m_LU.reduction_copy_from(mat);
m_LU.incomplete_LU_factorization();
}
if (m_use_more_precise_stop_condition) if (m_use_more_precise_stop_condition)
{ {
@ -249,27 +249,22 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
* differently: The invest doesn't pay off. * differently: The invest doesn't pay off.
* Therefore we use the approach in the else part. * Therefore we use the approach in the else part.
*/ */
nl_double t[storage_N]; vec_set_scalar(n, residual, accuracy);
nl_double Ax[storage_N]; mat.mult_vec(residual, Ax);
vec_set(n, accuracy, t);
mat.mult_vec(t, Ax);
mat.solveLUx(m_LU, Ax); m_LU.solveLUx(Ax);
const nl_double rho_to_accuracy = std::sqrt(vec_mult2(n, Ax)) / accuracy; const float_type rho_to_accuracy = std::sqrt(vec_mult2<FT>(n, Ax)) / accuracy;
rho_delta = accuracy * rho_to_accuracy; rho_delta = accuracy * rho_to_accuracy;
} }
else else
rho_delta = accuracy * std::sqrt(n) * m_accuracy_mult; rho_delta = accuracy * std::sqrt(static_cast<FT>(n)) * m_accuracy_mult;
for (unsigned itr = 0; itr < restart_max; itr++) for (std::size_t itr = 0; itr < restart_max; itr++)
{ {
std::size_t last_k = mr; std::size_t last_k = mr;
nl_double rho; float_type rho;
nl_double Ax[storage_N];
nl_double residual[storage_N];
mat.mult_vec(x, Ax); mat.mult_vec(x, Ax);
@ -277,19 +272,19 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
if (m_use_iLU_preconditioning) if (m_use_iLU_preconditioning)
{ {
mat.solveLUx(m_LU, residual); m_LU.solveLUx(residual);
} }
rho = std::sqrt(vec_mult2(n, residual)); rho = std::sqrt(vec_mult2<FT>(n, residual));
if (rho < rho_delta) if (rho < rho_delta)
return itr_used + 1; return itr_used + 1;
vec_set(mr+1, NL_FCONST(0.0), m_g); vec_set_scalar(mr+1, m_g, NL_FCONST(0.0));
m_g[0] = rho; m_g[0] = rho;
for (std::size_t i = 0; i < mr + 1; i++) for (std::size_t i = 0; i < mr + 1; i++)
vec_set(mr, NL_FCONST(0.0), m_ht[i]); vec_set_scalar(mr, m_ht[i], NL_FCONST(0.0));
vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]); vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]);
@ -300,14 +295,14 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
mat.mult_vec(m_v[k], m_v[k1]); mat.mult_vec(m_v[k], m_v[k1]);
if (m_use_iLU_preconditioning) if (m_use_iLU_preconditioning)
mat.solveLUx(m_LU, m_v[k1]); m_LU.solveLUx(m_v[k1]);
for (std::size_t j = 0; j <= k; j++) for (std::size_t j = 0; j <= k; j++)
{ {
m_ht[j][k] = vec_mult(n, m_v[k1], m_v[j]); m_ht[j][k] = vec_mult<float_type>(n, m_v[k1], m_v[j]);
vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]); vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]);
} }
m_ht[k1][k] = std::sqrt(vec_mult2(n, m_v[k1])); m_ht[k1][k] = std::sqrt(vec_mult2<FT>(n, m_v[k1]));
if (m_ht[k1][k] != 0.0) if (m_ht[k1][k] != 0.0)
vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]); vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]);
@ -315,7 +310,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
for (std::size_t j = 0; j < k; j++) for (std::size_t j = 0; j < k; j++)
givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]); givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]);
const nl_double mu = 1.0 / std::hypot(m_ht[k][k], m_ht[k1][k]); const float_type mu = 1.0 / std::hypot(m_ht[k][k], m_ht[k1][k]);
m_c[k] = m_ht[k][k] * mu; m_c[k] = m_ht[k][k] * mu;
m_s[k] = -m_ht[k1][k] * mu; m_s[k] = -m_ht[k1][k] * mu;
@ -345,36 +340,17 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
{ {
double tmp = m_g[i]; double tmp = m_g[i];
for (std::size_t j = i + 1; j <= last_k; j++) for (std::size_t j = i + 1; j <= last_k; j++)
{
tmp -= m_ht[i][j] * m_y[j]; tmp -= m_ht[i][j] * m_y[j];
}
m_y[i] = tmp / m_ht[i][i]; m_y[i] = tmp / m_ht[i][i];
} }
for (std::size_t i = 0; i <= last_k; i++) for (std::size_t i = 0; i <= last_k; i++)
vec_add_mult_scalar(n, m_v[i], m_y[i], x); vec_add_mult_scalar(n, m_v[i], m_y[i], x);
#if 1
if (rho <= rho_delta) if (rho <= rho_delta)
{
break; break;
}
#else
/* we try to approximate the x difference between to steps using m_v[last_k] */
double xdelta = m_y[last_k] * vec_maxabs(n, m_v[last_k]);
if (xdelta < accuracy)
{
if (m_accuracy_mult < 16384.0)
m_accuracy_mult = m_accuracy_mult * 2.0;
break;
}
else
m_accuracy_mult = m_accuracy_mult / 2.0;
#endif
} }
return itr_used; return itr_used;
} }

View File

@ -41,19 +41,21 @@
namespace netlist namespace netlist
{ {
namespace devices namespace devices
{ {
//#define nl_ext_double _float128 // slow, very slow
//#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
class matrix_solver_sm_t: public matrix_solver_t class matrix_solver_sm_t: public matrix_solver_t
{ {
friend class matrix_solver_t; friend class matrix_solver_t;
public: public:
typedef FT float_ext_type;
typedef FT float_type;
// FIXME: dirty hack to make this compile
static constexpr const std::size_t storage_N = 100;
matrix_solver_sm_t(netlist_base_t &anetlist, const pstring &name, matrix_solver_sm_t(netlist_base_t &anetlist, const pstring &name,
const solver_parameters_t *params, const std::size_t size); const solver_parameters_t *params, const std::size_t size);
@ -66,7 +68,7 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override; virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson); unsigned solve_non_dynamic(const bool newton_raphson);
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; } constexpr std::size_t N() const { return m_dim; }
void LE_invert(); void LE_invert();
@ -75,33 +77,33 @@ protected:
template <typename T1, typename T2> template <typename T1, typename T2>
nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; } float_ext_type &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
template <typename T1, typename T2> template <typename T1, typename T2>
nl_ext_double &W(const T1 &r, const T2 &c) { return m_W[r][c]; } float_ext_type &W(const T1 &r, const T2 &c) { return m_W[r][c]; }
template <typename T1, typename T2> template <typename T1, typename T2>
nl_ext_double &Ainv(const T1 &r, const T2 &c) { return m_Ainv[r][c]; } float_ext_type &Ainv(const T1 &r, const T2 &c) { return m_Ainv[r][c]; }
template <typename T1> template <typename T1>
nl_ext_double &RHS(const T1 &r) { return m_RHS[r]; } float_ext_type &RHS(const T1 &r) { return m_RHS[r]; }
template <typename T1, typename T2> template <typename T1, typename T2>
nl_ext_double &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; } float_ext_type &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; }
template <typename T1, typename T2> template <typename T1, typename T2>
nl_ext_double &lAinv(const T1 &r, const T2 &c) { return m_lAinv[r][c]; } float_ext_type &lAinv(const T1 &r, const T2 &c) { return m_lAinv[r][c]; }
nl_double m_last_RHS[storage_N]; // right hand side - contains currents float_type m_last_RHS[storage_N]; // right hand side - contains currents
private: private:
static constexpr std::size_t m_pitch = ((( storage_N) + 7) / 8) * 8; static constexpr std::size_t m_pitch = ((( storage_N) + 7) / 8) * 8;
nl_ext_double m_A[storage_N][m_pitch]; float_ext_type m_A[storage_N][m_pitch];
nl_ext_double m_Ainv[storage_N][m_pitch]; float_ext_type m_Ainv[storage_N][m_pitch];
nl_ext_double m_W[storage_N][m_pitch]; float_ext_type m_W[storage_N][m_pitch];
nl_ext_double m_RHS[storage_N]; // right hand side - contains currents float_ext_type m_RHS[storage_N]; // right hand side - contains currents
nl_ext_double m_lA[storage_N][m_pitch]; float_ext_type m_lA[storage_N][m_pitch];
nl_ext_double m_lAinv[storage_N][m_pitch]; float_ext_type m_lAinv[storage_N][m_pitch];
//nl_ext_double m_RHSx[storage_N]; //float_ext_type m_RHSx[storage_N];
const std::size_t m_dim; const std::size_t m_dim;
std::size_t m_cnt; std::size_t m_cnt;
@ -112,13 +114,13 @@ private:
// matrix_solver_direct // matrix_solver_direct
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t() matrix_solver_sm_t<FT, SIZE>::~matrix_solver_sm_t()
{ {
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_sm_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_t::setup_base(nets); matrix_solver_t::setup_base(nets);
@ -130,8 +132,8 @@ void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
void matrix_solver_sm_t<m_N, storage_N>::LE_invert() void matrix_solver_sm_t<FT, SIZE>::LE_invert()
{ {
const std::size_t kN = N(); const std::size_t kN = N();
@ -148,7 +150,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
for (std::size_t i = 0; i < kN; i++) for (std::size_t i = 0; i < kN; i++)
{ {
/* FIXME: Singular matrix? */ /* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i); const float_type f = 1.0 / W(i,i);
const auto * RESTRICT const p = m_terms[i]->m_nzrd.data(); const auto * RESTRICT const p = m_terms[i]->m_nzrd.data();
const std::size_t e = m_terms[i]->m_nzrd.size(); const std::size_t e = m_terms[i]->m_nzrd.size();
@ -159,7 +161,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
for (std::size_t jb = 0; jb < eb; jb++) for (std::size_t jb = 0; jb < eb; jb++)
{ {
const unsigned j = pb[jb]; const unsigned j = pb[jb];
const nl_double f1 = - W(j,i) * f; const float_type f1 = - W(j,i) * f;
if (f1 != 0.0) if (f1 != 0.0)
{ {
for (std::size_t k = 0; k < e; k++) for (std::size_t k = 0; k < e; k++)
@ -173,10 +175,10 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
for (std::size_t i = kN; i-- > 0; ) for (std::size_t i = kN; i-- > 0; )
{ {
/* FIXME: Singular matrix? */ /* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i); const float_type f = 1.0 / W(i,i);
for (std::size_t j = i; j-- > 0; ) for (std::size_t j = i; j-- > 0; )
{ {
const nl_double f1 = - W(j,i) * f; const float_type f1 = - W(j,i) * f;
if (f1 != 0.0) if (f1 != 0.0)
{ {
for (std::size_t k = i; k < kN; k++) for (std::size_t k = i; k < kN; k++)
@ -193,9 +195,9 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
} }
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
template <typename T> template <typename T>
void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x( void matrix_solver_sm_t<FT, SIZE>::LE_compute_x(
T * RESTRICT x) T * RESTRICT x)
{ {
const std::size_t kN = N(); const std::size_t kN = N();
@ -205,7 +207,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x(
for (std::size_t k=0; k<kN; k++) for (std::size_t k=0; k<kN; k++)
{ {
const nl_double f = RHS(k); const float_type f = RHS(k);
for (std::size_t i=0; i<kN; i++) for (std::size_t i=0; i<kN; i++)
x[i] += Ainv(i,k) * f; x[i] += Ainv(i,k) * f;
@ -213,13 +215,13 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x(
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_sm_t<FT, SIZE>::solve_non_dynamic(const bool newton_raphson)
{ {
static constexpr const bool incremental = true; static constexpr const bool incremental = true;
const std::size_t iN = N(); const std::size_t iN = N();
nl_double new_V[storage_N]; // = { 0.0 }; float_type new_V[storage_N]; // = { 0.0 };
if ((m_cnt % 50) == 0) if ((m_cnt % 50) == 0)
{ {
@ -236,7 +238,7 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
} }
for (std::size_t row = 0; row < iN; row ++) for (std::size_t row = 0; row < iN; row ++)
{ {
nl_double v[m_pitch] = {0}; float_type v[m_pitch] = {0};
std::size_t cols[m_pitch]; std::size_t cols[m_pitch];
std::size_t colcount = 0; std::size_t colcount = 0;
@ -252,10 +254,10 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
if (colcount > 0) if (colcount > 0)
{ {
nl_double lamba = 0.0; float_type lamba = 0.0;
nl_double w[m_pitch] = {0}; float_type w[m_pitch] = {0};
nl_double z[m_pitch]; float_type z[m_pitch];
/* compute w and lamba */ /* compute w and lamba */
for (std::size_t i = 0; i < iN; i++) for (std::size_t i = 0; i < iN; i++)
z[i] = Ainv(i, row); /* u is row'th column */ z[i] = Ainv(i, row); /* u is row'th column */
@ -266,7 +268,7 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
for (std::size_t j=0; j<colcount; j++) for (std::size_t j=0; j<colcount; j++)
{ {
std::size_t col = cols[j]; std::size_t col = cols[j];
nl_double f = v[col]; float_type f = v[col];
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
w[k] += Ainv(col,k) * f; /* Transpose(Ainv) * v */ w[k] += Ainv(col,k) * f; /* Transpose(Ainv) * v */
} }
@ -274,7 +276,7 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
lamba = -1.0 / (1.0 + lamba); lamba = -1.0 / (1.0 + lamba);
for (std::size_t i=0; i<iN; i++) for (std::size_t i=0; i<iN; i++)
{ {
const nl_double f = lamba * z[i]; const float_type f = lamba * z[i];
if (f != 0.0) if (f != 0.0)
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
Ainv(i,k) += f * w[k]; Ainv(i,k) += f * w[k];
@ -288,16 +290,16 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
this->LE_compute_x(new_V); this->LE_compute_x(new_V);
const nl_double err = (newton_raphson ? delta(new_V) : 0.0); const float_type err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V); store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1; return (err > this->m_params.m_accuracy) ? 2 : 1;
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_sm_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_sm_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{ {
build_LE_A<matrix_solver_sm_t>(); this->build_LE_A(*this);
build_LE_RHS<matrix_solver_sm_t>(); this->build_LE_RHS(*this);
for (std::size_t i=0, iN=N(); i < iN; i++) for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i); m_last_RHS[i] = RHS(i);
@ -306,8 +308,8 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::vsolve_non_dynamic(const bool newto
return this->solve_non_dynamic(newton_raphson); return this->solve_non_dynamic(newton_raphson);
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
matrix_solver_sm_t<m_N, storage_N>::matrix_solver_sm_t(netlist_base_t &anetlist, const pstring &name, matrix_solver_sm_t<FT, SIZE>::matrix_solver_sm_t(netlist_base_t &anetlist, const pstring &name,
const solver_parameters_t *params, const std::size_t size) const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, NOSORT, params) : matrix_solver_t(anetlist, name, NOSORT, params)
, m_dim(size) , m_dim(size)

View File

@ -20,15 +20,22 @@
namespace netlist namespace netlist
{ {
namespace devices namespace devices
{ {
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_SOR_t: public matrix_solver_direct_t<m_N, storage_N> template <typename FT, int SIZE>
class matrix_solver_SOR_t: public matrix_solver_direct_t<FT, SIZE>
{ {
public: public:
typedef FT float_type;
matrix_solver_SOR_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size) matrix_solver_SOR_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size) : matrix_solver_direct_t<FT, SIZE>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
, m_lp_fact(*this, "m_lp_fact", 0) , m_lp_fact(*this, "m_lp_fact", 0)
, w(size, 0.0)
, one_m_w(size, 0.0)
, RHS(size, 0.0)
, new_V(size, 0.0)
{ {
} }
@ -38,7 +45,11 @@ public:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override; virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
private: private:
state_var<nl_double> m_lp_fact; state_var<float_type> m_lp_fact;
std::vector<float_type> w;
std::vector<float_type> one_m_w;
std::vector<float_type> RHS;
std::vector<float_type> new_V;
}; };
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
@ -46,14 +57,14 @@ private:
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
void matrix_solver_SOR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_SOR_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets); matrix_solver_direct_t<FT, SIZE>::vsetup(nets);
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_SOR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{ {
const std::size_t iN = this->N(); const std::size_t iN = this->N();
bool resched = false; bool resched = false;
@ -67,24 +78,19 @@ unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
* omega = 2.0 / (1.0 + std::sqrt(1-rho)) * omega = 2.0 / (1.0 + std::sqrt(1-rho))
*/ */
const nl_double ws = this->m_params.m_gs_sor; const float_type ws = this->m_params.m_gs_sor;
nl_double w[storage_N];
nl_double one_m_w[storage_N];
nl_double RHS[storage_N];
nl_double new_V[storage_N];
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
nl_double gtot_t = 0.0; float_type gtot_t = 0.0;
nl_double gabs_t = 0.0; float_type gabs_t = 0.0;
nl_double RHS_t = 0.0; float_type RHS_t = 0.0;
const std::size_t term_count = this->m_terms[k]->count(); const std::size_t term_count = this->m_terms[k]->count();
const nl_double * const RESTRICT gt = this->m_terms[k]->gt(); const float_type * const RESTRICT gt = this->m_terms[k]->gt();
const nl_double * const RESTRICT go = this->m_terms[k]->go(); const float_type * const RESTRICT go = this->m_terms[k]->go();
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr(); const float_type * const RESTRICT Idr = this->m_terms[k]->Idr();
const nl_double * const *other_cur_analog = this->m_terms[k]->connected_net_V(); const float_type * const *other_cur_analog = this->m_terms[k]->connected_net_V();
new_V[k] = this->m_nets[k]->Q_Analog(); new_V[k] = this->m_nets[k]->Q_Analog();
@ -123,22 +129,22 @@ unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
} }
} }
const nl_double accuracy = this->m_params.m_accuracy; const float_type accuracy = this->m_params.m_accuracy;
do { do {
resched = false; resched = false;
nl_double err = 0; float_type err = 0;
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
const int * RESTRICT net_other = this->m_terms[k]->connected_net_idx(); const int * RESTRICT net_other = this->m_terms[k]->connected_net_idx();
const std::size_t railstart = this->m_terms[k]->m_railstart; const std::size_t railstart = this->m_terms[k]->m_railstart;
const nl_double * RESTRICT go = this->m_terms[k]->go(); const float_type * RESTRICT go = this->m_terms[k]->go();
nl_double Idrive = 0.0; float_type Idrive = 0.0;
for (std::size_t i = 0; i < railstart; i++) for (std::size_t i = 0; i < railstart; i++)
Idrive = Idrive + go[i] * new_V[net_other[i]]; Idrive = Idrive + go[i] * new_V[static_cast<std::size_t>(net_other[i])];
const nl_double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k]; const float_type new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
err = std::max(std::abs(new_val - new_V[k]), err); err = std::max(std::abs(new_val - new_V[k]), err);
new_V[k] = new_val; new_V[k] = new_val;
@ -158,7 +164,7 @@ unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
{ {
// Fallback to direct solver ... // Fallback to direct solver ...
this->m_iterative_fail++; this->m_iterative_fail++;
return matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(newton_raphson); return matrix_solver_direct_t<FT, SIZE>::vsolve_non_dynamic(newton_raphson);
} }
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)

View File

@ -22,20 +22,24 @@ namespace netlist
{ {
namespace devices namespace devices
{ {
template <std::size_t m_N, std::size_t storage_N>
class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<m_N, storage_N> template <typename FT, int SIZE>
class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<FT, SIZE>
{ {
friend class matrix_solver_t; friend class matrix_solver_t;
public: public:
typedef FT float_type;
matrix_solver_SOR_mat_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, std::size_t size) matrix_solver_SOR_mat_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, std::size_t size)
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::DESCENDING, params, size) : matrix_solver_direct_t<FT, SIZE>(anetlist, name, matrix_solver_t::DESCENDING, params, size)
, m_Vdelta(*this, "m_Vdelta", 0.0) , m_Vdelta(*this, "m_Vdelta", std::vector<float_type>(size))
, m_omega(*this, "m_omega", params->m_gs_sor) , m_omega(*this, "m_omega", params->m_gs_sor)
, m_lp_fact(*this, "m_lp_fact", 0) , m_lp_fact(*this, "m_lp_fact", 0)
, m_gs_fail(*this, "m_gs_fail", 0) , m_gs_fail(*this, "m_gs_fail", 0)
, m_gs_total(*this, "m_gs_total", 0) , m_gs_total(*this, "m_gs_total", 0)
, new_V(size, 0.0)
{ {
} }
@ -46,28 +50,31 @@ public:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override; virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
private: private:
state_var<nl_double[storage_N]> m_Vdelta; //state_var<float_type[storage_N]> m_Vdelta;
state_var<std::vector<float_type>> m_Vdelta;
state_var<nl_double> m_omega; state_var<float_type> m_omega;
state_var<nl_double> m_lp_fact; state_var<float_type> m_lp_fact;
state_var<int> m_gs_fail; state_var<int> m_gs_fail;
state_var<int> m_gs_total; state_var<int> m_gs_total;
std::vector<float_type> new_V;
}; };
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// matrix_solver - Gauss - Seidel // matrix_solver - Gauss - Seidel
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
void matrix_solver_SOR_mat_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_SOR_mat_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets); matrix_solver_direct_t<FT, SIZE>::vsetup(nets);
} }
#if 0 #if 0
//FIXME: move to solve_base //FIXME: move to solve_base
template <unsigned m_N, unsigned storage_N> template <unsigned m_N, unsigned storage_N>
nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve() float_type matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
{ {
/* /*
* enable linear prediction on first newton pass * enable linear prediction on first newton pass
@ -89,13 +96,13 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
if (USE_LINEAR_PREDICTION) if (USE_LINEAR_PREDICTION)
{ {
nl_double sq = 0; float_type sq = 0;
nl_double sqo = 0; float_type sqo = 0;
const nl_double rez_cts = 1.0 / this->current_timestep(); const float_type rez_cts = 1.0 / this->current_timestep();
for (unsigned k = 0; k < this->N(); k++) for (unsigned k = 0; k < this->N(); k++)
{ {
const analog_net_t *n = this->m_nets[k]; const analog_net_t *n = this->m_nets[k];
const nl_double nv = (n->Q_Analog() - this->m_last_V[k]) * rez_cts ; const float_type nv = (n->Q_Analog() - this->m_last_V[k]) * rez_cts ;
sq += nv * nv; sq += nv * nv;
sqo += this->m_Vdelta[k] * this->m_Vdelta[k]; sqo += this->m_Vdelta[k] * this->m_Vdelta[k];
this->m_Vdelta[k] = nv; this->m_Vdelta[k] = nv;
@ -103,7 +110,7 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
// FIXME: used to be 1e90, but this would not be compatible with float // FIXME: used to be 1e90, but this would not be compatible with float
if (sqo > NL_FCONST(1e-20)) if (sqo > NL_FCONST(1e-20))
m_lp_fact = std::min(std::sqrt(sq/sqo), (nl_double) 2.0); m_lp_fact = std::min(std::sqrt(sq/sqo), (float_type) 2.0);
else else
m_lp_fact = NL_FCONST(0.0); m_lp_fact = NL_FCONST(0.0);
} }
@ -113,8 +120,8 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
} }
#endif #endif
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_SOR_mat_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{ {
/* The matrix based code looks a lot nicer but actually is 30% slower than /* The matrix based code looks a lot nicer but actually is 30% slower than
* the optimized code which works directly on the data structures. * the optimized code which works directly on the data structures.
@ -122,11 +129,10 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
*/ */
nl_double new_v[storage_N] = { 0.0 };
const std::size_t iN = this->N(); const std::size_t iN = this->N();
matrix_solver_t::build_LE_A<matrix_solver_SOR_mat_t>(); this->build_LE_A(*this);
matrix_solver_t::build_LE_RHS<matrix_solver_SOR_mat_t>(); this->build_LE_RHS(*this);
bool resched = false; bool resched = false;
@ -139,19 +145,19 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
if (1 && ws_cnt % 200 == 0) if (1 && ws_cnt % 200 == 0)
{ {
// update omega // update omega
nl_double lambdaN = 0; float_type lambdaN = 0;
nl_double lambda1 = 1e9; float_type lambda1 = 1e9;
for (int k = 0; k < iN; k++) for (int k = 0; k < iN; k++)
{ {
#if 0 #if 0
nl_double akk = std::abs(this->m_A[k][k]); float_type akk = std::abs(this->m_A[k][k]);
if ( akk > lambdaN) if ( akk > lambdaN)
lambdaN = akk; lambdaN = akk;
if (akk < lambda1) if (akk < lambda1)
lambda1 = akk; lambda1 = akk;
#else #else
nl_double akk = std::abs(this->m_A[k][k]); float_type akk = std::abs(this->m_A[k][k]);
nl_double s = 0.0; float_type s = 0.0;
for (int i=0; i<iN; i++) for (int i=0; i<iN; i++)
s = s + std::abs(this->m_A[k][i]); s = s + std::abs(this->m_A[k][i]);
akk = s / akk - 1.0; akk = s / akk - 1.0;
@ -170,25 +176,25 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
#endif #endif
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
new_v[k] = this->m_nets[k]->Q_Analog(); new_V[k] = this->m_nets[k]->Q_Analog();
do { do {
resched = false; resched = false;
nl_double cerr = 0.0; float_type cerr = 0.0;
for (std::size_t k = 0; k < iN; k++) for (std::size_t k = 0; k < iN; k++)
{ {
nl_double Idrive = 0; float_type Idrive = 0;
const auto *p = this->m_terms[k]->m_nz.data(); const auto *p = this->m_terms[k]->m_nz.data();
const std::size_t e = this->m_terms[k]->m_nz.size(); const std::size_t e = this->m_terms[k]->m_nz.size();
for (std::size_t i = 0; i < e; i++) for (std::size_t i = 0; i < e; i++)
Idrive = Idrive + this->A(k,p[i]) * new_v[p[i]]; Idrive = Idrive + this->A(k,p[i]) * new_V[p[i]];
const nl_double delta = m_omega * (this->RHS(k) - Idrive) / this->A(k,k); const float_type delta = m_omega * (this->RHS(k) - Idrive) / this->A(k,k);
cerr = std::max(cerr, std::abs(delta)); cerr = std::max(cerr, std::abs(delta));
new_v[k] += delta; new_V[k] += delta;
} }
if (cerr > this->m_params.m_accuracy) if (cerr > this->m_params.m_accuracy)
@ -208,10 +214,10 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
//this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS"); //this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
this->m_gs_fail++; this->m_gs_fail++;
return matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(newton_raphson); return matrix_solver_direct_t<FT, SIZE>::solve_non_dynamic(newton_raphson);
} }
else { else {
this->store(new_v); this->store(new_V.data());
return resched_cnt; return resched_cnt;
} }

View File

@ -50,15 +50,18 @@ namespace netlist
{ {
namespace devices namespace devices
{ {
//#define nl_ext_double _float128 // slow, very slow
//#define nl_ext_double long double // slightly slower
#define nl_ext_double nl_double
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
class matrix_solver_w_t: public matrix_solver_t class matrix_solver_w_t: public matrix_solver_t
{ {
friend class matrix_solver_t; friend class matrix_solver_t;
public: public:
typedef FT float_ext_type;
typedef FT float_type;
// FIXME: dirty hack to make this compile
static constexpr const std::size_t storage_N = 100;
matrix_solver_w_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size); matrix_solver_w_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size);
@ -71,7 +74,7 @@ protected:
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override; virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
unsigned solve_non_dynamic(const bool newton_raphson); unsigned solve_non_dynamic(const bool newton_raphson);
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; } constexpr std::size_t N() const { return m_dim; }
void LE_invert(); void LE_invert();
@ -80,40 +83,40 @@ protected:
template <typename T1, typename T2> template <typename T1, typename T2>
nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; } float_ext_type &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
template <typename T1, typename T2> template <typename T1, typename T2>
nl_ext_double &W(const T1 &r, const T2 &c) { return m_W[r][c]; } float_ext_type &W(const T1 &r, const T2 &c) { return m_W[r][c]; }
/* access to Ainv for fixed columns over row, there store transposed */ /* access to Ainv for fixed columns over row, there store transposed */
template <typename T1, typename T2> template <typename T1, typename T2>
nl_ext_double &Ainv(const T1 &r, const T2 &c) { return m_Ainv[c][r]; } float_ext_type &Ainv(const T1 &r, const T2 &c) { return m_Ainv[c][r]; }
template <typename T1> template <typename T1>
nl_ext_double &RHS(const T1 &r) { return m_RHS[r]; } float_ext_type &RHS(const T1 &r) { return m_RHS[r]; }
template <typename T1, typename T2> template <typename T1, typename T2>
nl_ext_double &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; } float_ext_type &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; }
nl_double m_last_RHS[storage_N]; // right hand side - contains currents float_type m_last_RHS[storage_N]; // right hand side - contains currents
private: private:
static constexpr std::size_t m_pitch = ((( storage_N) + 7) / 8) * 8; static constexpr std::size_t m_pitch = ((( storage_N) + 7) / 8) * 8;
nl_ext_double m_A[storage_N][m_pitch]; float_ext_type m_A[storage_N][m_pitch];
nl_ext_double m_Ainv[storage_N][m_pitch]; float_ext_type m_Ainv[storage_N][m_pitch];
nl_ext_double m_W[storage_N][m_pitch]; float_ext_type m_W[storage_N][m_pitch];
nl_ext_double m_RHS[storage_N]; // right hand side - contains currents float_ext_type m_RHS[storage_N]; // right hand side - contains currents
nl_ext_double m_lA[storage_N][m_pitch]; float_ext_type m_lA[storage_N][m_pitch];
/* temporary */ /* temporary */
nl_double H[storage_N][m_pitch] ; float_type H[storage_N][m_pitch] ;
unsigned rows[storage_N]; unsigned rows[storage_N];
unsigned cols[storage_N][m_pitch]; unsigned cols[storage_N][m_pitch];
unsigned colcount[storage_N]; unsigned colcount[storage_N];
unsigned m_cnt; unsigned m_cnt;
//nl_ext_double m_RHSx[storage_N]; //float_ext_type m_RHSx[storage_N];
const std::size_t m_dim; const std::size_t m_dim;
@ -123,13 +126,13 @@ private:
// matrix_solver_direct // matrix_solver_direct
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
matrix_solver_w_t<m_N, storage_N>::~matrix_solver_w_t() matrix_solver_w_t<FT, SIZE>::~matrix_solver_w_t()
{ {
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets) void matrix_solver_w_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
{ {
matrix_solver_t::setup_base(nets); matrix_solver_t::setup_base(nets);
@ -141,8 +144,8 @@ void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
void matrix_solver_w_t<m_N, storage_N>::LE_invert() void matrix_solver_w_t<FT, SIZE>::LE_invert()
{ {
const std::size_t kN = N(); const std::size_t kN = N();
@ -159,7 +162,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
for (std::size_t i = 0; i < kN; i++) for (std::size_t i = 0; i < kN; i++)
{ {
/* FIXME: Singular matrix? */ /* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i); const float_type f = 1.0 / W(i,i);
const auto * RESTRICT const p = m_terms[i]->m_nzrd.data(); const auto * RESTRICT const p = m_terms[i]->m_nzrd.data();
const size_t e = m_terms[i]->m_nzrd.size(); const size_t e = m_terms[i]->m_nzrd.size();
@ -170,7 +173,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
for (std::size_t jb = 0; jb < eb; jb++) for (std::size_t jb = 0; jb < eb; jb++)
{ {
const auto j = pb[jb]; const auto j = pb[jb];
const nl_double f1 = - W(j,i) * f; const float_type f1 = - W(j,i) * f;
if (f1 != 0.0) if (f1 != 0.0)
{ {
for (std::size_t k = 0; k < e; k++) for (std::size_t k = 0; k < e; k++)
@ -184,10 +187,10 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
for (std::size_t i = kN; i-- > 0; ) for (std::size_t i = kN; i-- > 0; )
{ {
/* FIXME: Singular matrix? */ /* FIXME: Singular matrix? */
const nl_double f = 1.0 / W(i,i); const float_type f = 1.0 / W(i,i);
for (std::size_t j = i; j-- > 0; ) for (std::size_t j = i; j-- > 0; )
{ {
const nl_double f1 = - W(j,i) * f; const float_type f1 = - W(j,i) * f;
if (f1 != 0.0) if (f1 != 0.0)
{ {
for (std::size_t k = i; k < kN; k++) for (std::size_t k = i; k < kN; k++)
@ -203,9 +206,9 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
} }
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
template <typename T> template <typename T>
void matrix_solver_w_t<m_N, storage_N>::LE_compute_x( void matrix_solver_w_t<FT, SIZE>::LE_compute_x(
T * RESTRICT x) T * RESTRICT x)
{ {
const std::size_t kN = N(); const std::size_t kN = N();
@ -215,7 +218,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_compute_x(
for (std::size_t k=0; k<kN; k++) for (std::size_t k=0; k<kN; k++)
{ {
const nl_double f = RHS(k); const float_type f = RHS(k);
for (std::size_t i=0; i<kN; i++) for (std::size_t i=0; i<kN; i++)
x[i] += Ainv(i,k) * f; x[i] += Ainv(i,k) * f;
@ -223,12 +226,12 @@ void matrix_solver_w_t<m_N, storage_N>::LE_compute_x(
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_w_t<FT, SIZE>::solve_non_dynamic(const bool newton_raphson)
{ {
const auto iN = N(); const auto iN = N();
nl_double new_V[storage_N]; // = { 0.0 }; float_type new_V[storage_N]; // = { 0.0 };
if ((m_cnt % 100) == 0) if ((m_cnt % 100) == 0)
{ {
@ -266,7 +269,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
/* construct w = transform(V) * y /* construct w = transform(V) * y
* dim: rowcount x iN * dim: rowcount x iN
* */ * */
nl_double w[storage_N]; float_type w[storage_N];
for (unsigned i = 0; i < rowcount; i++) for (unsigned i = 0; i < rowcount; i++)
{ {
const unsigned r = rows[i]; const unsigned r = rows[i];
@ -287,7 +290,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
for (unsigned k=0; k< colcount[i]; k++) for (unsigned k=0; k< colcount[i]; k++)
{ {
const unsigned col = cols[i][k]; const unsigned col = cols[i][k];
nl_double f = VT(rows[i],col); float_type f = VT(rows[i],col);
if (f!=0.0) if (f!=0.0)
for (unsigned j= 0; j < rowcount; j++) for (unsigned j= 0; j < rowcount; j++)
H[i][j] += f * Ainv(col,rows[j]); H[i][j] += f * Ainv(col,rows[j]);
@ -298,15 +301,15 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
{ {
if (H[i][i] == 0.0) if (H[i][i] == 0.0)
printf("%s H singular\n", this->name().c_str()); printf("%s H singular\n", this->name().c_str());
const nl_double f = 1.0 / H[i][i]; const float_type f = 1.0 / H[i][i];
for (unsigned j = i+1; j < rowcount; j++) for (unsigned j = i+1; j < rowcount; j++)
{ {
const nl_double f1 = - f * H[j][i]; const float_type f1 = - f * H[j][i];
if (f1!=0.0) if (f1!=0.0)
{ {
nl_double *pj = &H[j][i+1]; float_type *pj = &H[j][i+1];
const nl_double *pi = &H[i][i+1]; const float_type *pi = &H[i][i+1];
for (unsigned k = 0; k < rowcount-i-1; k++) for (unsigned k = 0; k < rowcount-i-1; k++)
pj[k] += f1 * pi[k]; pj[k] += f1 * pi[k];
//H[j][k] += f1 * H[i][k]; //H[j][k] += f1 * H[i][k];
@ -316,12 +319,12 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
} }
/* Back substitution */ /* Back substitution */
//inv(H) w = t w = H t //inv(H) w = t w = H t
nl_double t[storage_N]; // FIXME: convert to member float_type t[storage_N]; // FIXME: convert to member
for (unsigned j = rowcount; j-- > 0; ) for (unsigned j = rowcount; j-- > 0; )
{ {
nl_double tmp = 0; float_type tmp = 0;
const nl_double *pj = &H[j][j+1]; const float_type *pj = &H[j][j+1];
const nl_double *tj = &t[j+1]; const float_type *tj = &t[j+1];
for (unsigned k = 0; k < rowcount-j-1; k++) for (unsigned k = 0; k < rowcount-j-1; k++)
tmp += pj[k] * tj[k]; tmp += pj[k] * tj[k];
//tmp += H[j][k] * t[k]; //tmp += H[j][k] * t[k];
@ -331,7 +334,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
/* x = y - Zt */ /* x = y - Zt */
for (unsigned i=0; i<iN; i++) for (unsigned i=0; i<iN; i++)
{ {
nl_double tmp = 0.0; float_type tmp = 0.0;
for (unsigned j=0; j<rowcount;j++) for (unsigned j=0; j<rowcount;j++)
{ {
const unsigned row = rows[j]; const unsigned row = rows[j];
@ -346,7 +349,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
if (0) if (0)
for (unsigned i=0; i<iN; i++) for (unsigned i=0; i<iN; i++)
{ {
nl_double tmp = 0.0; float_type tmp = 0.0;
for (unsigned j=0; j<iN; j++) for (unsigned j=0; j<iN; j++)
{ {
tmp += A(i,j) * new_V[j]; tmp += A(i,j) * new_V[j];
@ -355,16 +358,16 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
printf("%s failed on row %d: %f RHS: %f\n", this->name().c_str(), i, std::abs(tmp-RHS(i)), RHS(i)); printf("%s failed on row %d: %f RHS: %f\n", this->name().c_str(), i, std::abs(tmp-RHS(i)), RHS(i));
} }
const nl_double err = (newton_raphson ? delta(new_V) : 0.0); const float_type err = (newton_raphson ? delta(new_V) : 0.0);
store(new_V); store(new_V);
return (err > this->m_params.m_accuracy) ? 2 : 1; return (err > this->m_params.m_accuracy) ? 2 : 1;
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
unsigned matrix_solver_w_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson) unsigned matrix_solver_w_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
{ {
build_LE_A<matrix_solver_w_t>(); this->build_LE_A(*this);
build_LE_RHS<matrix_solver_w_t>(); this->build_LE_RHS(*this);
for (std::size_t i=0, iN=N(); i < iN; i++) for (std::size_t i=0, iN=N(); i < iN; i++)
m_last_RHS[i] = RHS(i); m_last_RHS[i] = RHS(i);
@ -373,8 +376,8 @@ unsigned matrix_solver_w_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton
return this->solve_non_dynamic(newton_raphson); return this->solve_non_dynamic(newton_raphson);
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
matrix_solver_w_t<m_N, storage_N>::matrix_solver_w_t(netlist_base_t &anetlist, const pstring &name, matrix_solver_w_t<FT, SIZE>::matrix_solver_w_t(netlist_base_t &anetlist, const pstring &name,
const solver_parameters_t *params, const std::size_t size) const solver_parameters_t *params, const std::size_t size)
: matrix_solver_t(anetlist, name, NOSORT, params) : matrix_solver_t(anetlist, name, NOSORT, params)
,m_cnt(0) ,m_cnt(0)

View File

@ -133,12 +133,12 @@ matrix_solver_t * create_it(netlist_base_t &nl, pstring name, solver_parameters_
return plib::palloc<C>(nl, name, &params, size); return plib::palloc<C>(nl, name, &params, size);
} }
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
matrix_solver_t * NETLIB_NAME(solver)::create_solver(std::size_t size, const pstring &solvername) matrix_solver_t * NETLIB_NAME(solver)::create_solver(std::size_t size, const pstring &solvername)
{ {
if (m_method() == "SOR_MAT") if (m_method() == "SOR_MAT")
{ {
return create_it<matrix_solver_SOR_mat_t<m_N, storage_N>>(state(), solvername, m_params, size); return create_it<matrix_solver_SOR_mat_t<FT, SIZE>>(state(), solvername, m_params, size);
//typedef matrix_solver_SOR_mat_t<m_N,storage_N> solver_sor_mat; //typedef matrix_solver_SOR_mat_t<m_N,storage_N> solver_sor_mat;
//return plib::make_unique<solver_sor_mat>(state(), solvername, &m_params, size); //return plib::make_unique<solver_sor_mat>(state(), solvername, &m_params, size);
} }
@ -146,34 +146,34 @@ matrix_solver_t * NETLIB_NAME(solver)::create_solver(std::size_t size, const pst
{ {
if (size > 0) // GCR always outperforms MAT solver if (size > 0) // GCR always outperforms MAT solver
{ {
return create_it<matrix_solver_GCR_t<m_N, storage_N>>(state(), solvername, m_params, size); return create_it<matrix_solver_GCR_t<FT, SIZE>>(state(), solvername, m_params, size);
} }
else else
{ {
return create_it<matrix_solver_direct_t<m_N, storage_N>>(state(), solvername, m_params, size); return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
} }
} }
else if (m_method() == "MAT") else if (m_method() == "MAT")
{ {
return create_it<matrix_solver_direct_t<m_N, storage_N>>(state(), solvername, m_params, size); return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
} }
else if (m_method() == "SM") else if (m_method() == "SM")
{ {
/* Sherman-Morrison Formula */ /* Sherman-Morrison Formula */
return create_it<matrix_solver_sm_t<m_N, storage_N>>(state(), solvername, m_params, size); return create_it<matrix_solver_sm_t<FT, SIZE>>(state(), solvername, m_params, size);
} }
else if (m_method() == "W") else if (m_method() == "W")
{ {
/* Woodbury Formula */ /* Woodbury Formula */
return create_it<matrix_solver_w_t<m_N, storage_N>>(state(), solvername, m_params, size); return create_it<matrix_solver_w_t<FT, SIZE>>(state(), solvername, m_params, size);
} }
else if (m_method() == "SOR") else if (m_method() == "SOR")
{ {
return create_it<matrix_solver_SOR_t<m_N, storage_N>>(state(), solvername, m_params, size); return create_it<matrix_solver_SOR_t<FT, SIZE>>(state(), solvername, m_params, size);
} }
else if (m_method() == "GMRES") else if (m_method() == "GMRES")
{ {
return create_it<matrix_solver_GMRES_t<m_N, storage_N>>(state(), solvername, m_params, size); return create_it<matrix_solver_GMRES_t<FT, SIZE>>(state(), solvername, m_params, size);
} }
else else
{ {
@ -288,64 +288,64 @@ void NETLIB_NAME(solver)::post_start()
switch (net_count) switch (net_count)
{ {
#if 0 #if 1
case 1: case 1:
if (use_specific) if (use_specific)
ms = plib::palloc<matrix_solver_direct1_t>(state(), sname, &m_params); ms = plib::palloc<matrix_solver_direct1_t<double>>(state(), sname, &m_params);
else else
ms = create_solver<1,1>(1, sname); ms = create_solver<double, 1>(1, sname);
break; break;
case 2: case 2:
if (use_specific) if (use_specific)
ms = plib::palloc<matrix_solver_direct2_t>(state(), sname, &m_params); ms = plib::palloc<matrix_solver_direct2_t<double>>(state(), sname, &m_params);
else else
ms = create_solver<2,2>(2, sname); ms = create_solver<double, 2>(2, sname);
break; break;
#if 1 #if 1
case 3: case 3:
ms = create_solver<3,3>(3, sname); ms = create_solver<double, 3>(3, sname);
break; break;
case 4: case 4:
ms = create_solver<4,4>(4, sname); ms = create_solver<double, 4>(4, sname);
break; break;
case 5: case 5:
ms = create_solver<5,5>(5, sname); ms = create_solver<double, 5>(5, sname);
break; break;
case 6: case 6:
ms = create_solver<6,6>(6, sname); ms = create_solver<double, 6>(6, sname);
break; break;
case 7: case 7:
ms = create_solver<7,7>(7, sname); ms = create_solver<double, 7>(7, sname);
break; break;
case 8: case 8:
ms = create_solver<8,8>(8, sname); ms = create_solver<double, 8>(8, sname);
break; break;
case 9: case 9:
ms = create_solver<9,9>(9, sname); ms = create_solver<double, 9>(9, sname);
break; break;
case 10: case 10:
ms = create_solver<10,10>(10, sname); ms = create_solver<double, 10>(10, sname);
break; break;
case 11: case 11:
ms = create_solver<11,11>(11, sname); ms = create_solver<double, 11>(11, sname);
break; break;
case 12: case 12:
ms = create_solver<12,12>(12, sname); ms = create_solver<double, 12>(12, sname);
break; break;
case 15: case 15:
ms = create_solver<15,15>(15, sname); ms = create_solver<double, 15>(15, sname);
break; break;
case 31: case 31:
ms = create_solver<31,31>(31, sname); ms = create_solver<double, 31>(31, sname);
break; break;
case 35: case 35:
ms = create_solver<35,35>(35, sname); ms = create_solver<double, 35>(35, sname);
break; break;
case 43: case 43:
ms = create_solver<43,43>(43, sname); ms = create_solver<double, 43>(43, sname);
break; break;
case 49: case 49:
ms = create_solver<49,49>(49, sname); ms = create_solver<double, 49>(49, sname);
break; break;
#endif #endif
#if 0 #if 0
@ -358,32 +358,31 @@ void NETLIB_NAME(solver)::post_start()
log().warning(MW_1_NO_SPECIFIC_SOLVER, net_count); log().warning(MW_1_NO_SPECIFIC_SOLVER, net_count);
if (net_count <= 8) if (net_count <= 8)
{ {
ms = create_solver<0, 8>(net_count, sname); ms = create_solver<double, -8>(net_count, sname);
} }
else if (net_count <= 16) else if (net_count <= 16)
{ {
ms = create_solver<0,16>(net_count, sname); ms = create_solver<double, -16>(net_count, sname);
} }
else if (net_count <= 32) else if (net_count <= 32)
{ {
ms = create_solver<0,32>(net_count, sname); ms = create_solver<double, -32>(net_count, sname);
} }
else else
if (net_count <= 64) if (net_count <= 64)
{ {
ms = create_solver<0,64>(net_count, sname); ms = create_solver<double, -64>(net_count, sname);
} }
else else
if (net_count <= 128) if (net_count <= 128)
{ {
ms = create_solver<0,128>(net_count, sname); ms = create_solver<double, -128>(net_count, sname);
} }
else else
{ {
log().fatal(MF_1_NETGROUP_SIZE_EXCEEDED_1, 128); log().fatal(MF_1_NETGROUP_SIZE_EXCEEDED_1, 128);
ms = nullptr; /* tease compilers */ ms = nullptr; /* tease compilers */
} }
break; break;
} }

View File

@ -102,7 +102,7 @@ private:
solver_parameters_t m_params; solver_parameters_t m_params;
template <std::size_t m_N, std::size_t storage_N> template <typename FT, int SIZE>
matrix_solver_t * create_solver(std::size_t size, const pstring &solvername); matrix_solver_t * create_solver(std::size_t size, const pstring &solvername);
}; };

View File

@ -12,6 +12,7 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <type_traits>
#include "../plib/pconfig.h" #include "../plib/pconfig.h"
#if 0 #if 0
@ -36,63 +37,50 @@ private:
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif #endif
template<typename T, std::size_t N> template<typename VT, typename T>
void vec_set (const std::size_t n, const T scalar, T (& RESTRICT v)[N]) void vec_set_scalar (const std::size_t n, VT &v, const T & scalar)
{ {
if (n != N) for ( std::size_t i = 0; i < n; i++ )
for ( std::size_t i = 0; i < n; i++ ) v[i] = scalar;
v[i] = scalar;
else
for ( std::size_t i = 0; i < N; i++ )
v[i] = scalar;
} }
template<typename T, std::size_t N> template<typename VT, typename VS>
T vec_mult (const std::size_t n, const T (& RESTRICT v1)[N], const T (& RESTRICT v2)[N] ) void vec_set (const std::size_t n, VT &v, const VS & source)
{
for ( std::size_t i = 0; i < n; i++ )
v[i] = source [i];
}
template<typename T, typename V1, typename V2>
T vec_mult (const std::size_t n, const V1 & v1, const V2 & v2 )
{ {
T value = 0.0; T value = 0.0;
if (n != N) for ( std::size_t i = 0; i < n; i++ )
for ( std::size_t i = 0; i < n; i++ ) value += v1[i] * v2[i];
value += v1[i] * v2[i];
else
for ( std::size_t i = 0; i < N; i++ )
value += v1[i] * v2[i];
return value; return value;
} }
template<typename T, std::size_t N> template<typename T, typename VT>
T vec_mult2 (const std::size_t n, const T (& RESTRICT v)[N]) T vec_mult2 (const std::size_t n, const VT &v)
{ {
T value = 0.0; T value = 0.0;
if (n != N) for ( std::size_t i = 0; i < n; i++ )
for ( std::size_t i = 0; i < n; i++ ) value += v[i] * v[i];
value += v[i] * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
value += v[i] * v[i];
return value; return value;
} }
template<typename T, std::size_t N> template<typename VV, typename T, typename VR>
void vec_mult_scalar (const std::size_t n, const T (& RESTRICT v)[N], const T & scalar, T (& RESTRICT result)[N]) void vec_mult_scalar (const std::size_t n, const VV & v, const T & scalar, VR & result)
{ {
if (n != N) for ( std::size_t i = 0; i < n; i++ )
for ( std::size_t i = 0; i < n; i++ ) result[i] = scalar * v[i];
result[i] = scalar * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
result[i] = scalar * v[i];
} }
template<typename T, std::size_t N> template<typename VV, typename T, typename VR>
void vec_add_mult_scalar (const std::size_t n, const T (& RESTRICT v)[N], const T scalar, T (& RESTRICT result)[N]) void vec_add_mult_scalar (const std::size_t n, const VV & v, const T scalar, VR & result)
{ {
if (n != N) for ( std::size_t i = 0; i < n; i++ )
for ( std::size_t i = 0; i < n; i++ ) result[i] = result[i] + scalar * v[i];
result[i] = result[i] + scalar * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
result[i] = result[i] + scalar * v[i];
} }
template<typename T> template<typename T>
@ -102,37 +90,29 @@ void vec_add_mult_scalar_p(const std::size_t & n, const T * RESTRICT v, const T
result[i] += scalar * v[i]; result[i] += scalar * v[i];
} }
template<typename T> template<typename V, typename R>
void vec_add_ip(const std::size_t n, const T * RESTRICT v, T * RESTRICT result) void vec_add_ip(const std::size_t n, const V & v, R & result)
{ {
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )
result[i] += v[i]; result[i] += v[i];
} }
template<typename T, std::size_t N> template<typename V1, typename V2, typename VR>
void vec_sub(const std::size_t n, const T (& RESTRICT v1)[N], const T (& RESTRICT v2)[N], T (& RESTRICT result)[N]) void vec_sub(const std::size_t n, const V1 &v1, const V2 & v2, VR & result)
{ {
if (n != N) for ( std::size_t i = 0; i < n; i++ )
for ( std::size_t i = 0; i < n; i++ ) result[i] = v1[i] - v2[i];
result[i] = v1[i] - v2[i];
else
for ( std::size_t i = 0; i < N; i++ )
result[i] = v1[i] - v2[i];
} }
template<typename T, std::size_t N> template<typename V, typename T>
void vec_scale(const std::size_t n, T (& RESTRICT v)[N], const T scalar) void vec_scale(const std::size_t n, V & v, const T scalar)
{ {
if (n != N) for ( std::size_t i = 0; i < n; i++ )
for ( std::size_t i = 0; i < n; i++ ) v[i] = scalar * v[i];
v[i] = scalar * v[i];
else
for ( std::size_t i = 0; i < N; i++ )
v[i] = scalar * v[i];
} }
template<typename T> template<typename T, typename V>
T vec_maxabs(const std::size_t n, const T * RESTRICT v) T vec_maxabs(const std::size_t n, const V & v)
{ {
T ret = 0.0; T ret = 0.0;
for ( std::size_t i = 0; i < n; i++ ) for ( std::size_t i = 0; i < n; i++ )

View File

@ -317,15 +317,16 @@ NETLIST_START(kidniki)
PARAM(Solver.METHOD, "MAT_CR") PARAM(Solver.METHOD, "MAT_CR")
//PARAM(Solver.METHOD, "MAT") //PARAM(Solver.METHOD, "MAT")
//PARAM(Solver.METHOD, "GMRES") //PARAM(Solver.METHOD, "GMRES")
PARAM(Solver.SOR_FACTOR, 1.00) PARAM(Solver.SOR_FACTOR, 1.0)
PARAM(Solver.DYNAMIC_TS, 0) PARAM(Solver.DYNAMIC_TS, 0)
PARAM(Solver.DYNAMIC_LTE, 5e-4) PARAM(Solver.DYNAMIC_LTE, 5e-4)
PARAM(Solver.DYNAMIC_MIN_TIMESTEP, 20e-6) PARAM(Solver.DYNAMIC_MIN_TIMESTEP, 20e-6)
#else #else
SOLVER(Solver, 18000) SOLVER(Solver, 18000)
PARAM(Solver.ACCURACY, 1e-6) PARAM(Solver.ACCURACY, 1e-7)
PARAM(Solver.NR_LOOPS, 100) PARAM(Solver.NR_LOOPS, 100)
PARAM(Solver.GS_LOOPS, 20) PARAM(Solver.GS_LOOPS, 20)
//PARAM(Solver.METHOD, "MAT_CR")
PARAM(Solver.METHOD, "GMRES") PARAM(Solver.METHOD, "GMRES")
#endif #endif