mirror of
https://github.com/holub/mame
synced 2025-07-04 01:18:59 +03:00
netlist: refactor code for better scalability and flexibility. (nw)
These changes aim to remove some of the duplication of code in the various solvers. Tested with gcc-7 clang-8 and nvcc-9.2
This commit is contained in:
parent
6d5e014a6f
commit
32b0442c73
@ -207,7 +207,7 @@ native:
|
||||
$(MAKE) CEXTRAFLAGS="-march=native -Wall -Wpedantic -Wsign-compare -Wextra -Wno-unused-parameter"
|
||||
|
||||
clang:
|
||||
$(MAKE) CC=clang++ LD=clang++ CEXTRAFLAGS="-march=native -Wno-unused-parameter -Weverything -Werror -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wweak-template-vtables -Wno-exit-time-destructors"
|
||||
$(MAKE) CC=clang++ LD=clang++ CEXTRAFLAGS="-march=native -Wno-c++11-narrowing -Wno-unused-parameter -Weverything -Werror -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wweak-template-vtables -Wno-exit-time-destructors"
|
||||
|
||||
clang-5:
|
||||
$(MAKE) CC=clang++-5.0 LD=clang++-5.0 CEXTRAFLAGS="-march=native -Weverything -Werror -Wno-inconsistent-missing-destructor-override -Wno-unreachable-code -Wno-padded -Wno-weak-vtables -Wno-missing-variable-declarations -Wconversion -Wno-c++98-compat -Wno-float-equal -Wno-global-constructors -Wno-c++98-compat-pedantic -Wno-format-nonliteral -Wno-weak-template-vtables -Wno-exit-time-destructors"
|
||||
@ -217,6 +217,7 @@ nvcc:
|
||||
CEXTRAFLAGS="-x cu -DNVCCBUILD=1 --expt-extended-lambda --expt-relaxed-constexpr --default-stream per-thread --restrict"
|
||||
|
||||
#
|
||||
# -Wno-c++11-narrowing : seems a bit broken
|
||||
# Mostly done: -Wno-weak-vtables -Wno-cast-align
|
||||
# FIXME: -Wno-weak-vtables -Wno-missing-variable-declarations -Wno-conversion -Wno-exit-time-destructors
|
||||
# FIXME: -Winconsistent-missing-destructor-override : c++ community has diverging opinions on this https://github.com/isocpp/CppCoreGuidelines/issues/721
|
||||
|
@ -322,11 +322,11 @@ namespace netlist
|
||||
const T &value //!< Initial value after construction
|
||||
);
|
||||
//! Copy Constructor.
|
||||
constexpr state_var(const state_var &rhs) noexcept = default;
|
||||
constexpr state_var(const state_var &rhs) = default;
|
||||
//! Move Constructor.
|
||||
constexpr state_var(state_var &&rhs) noexcept = default;
|
||||
//! Assignment operator to assign value of a state var.
|
||||
C14CONSTEXPR state_var &operator=(const state_var &rhs) noexcept = default;
|
||||
C14CONSTEXPR state_var &operator=(const state_var &rhs) = default;
|
||||
//! Assignment move operator to assign value of a state var.
|
||||
C14CONSTEXPR state_var &operator=(state_var &&rhs) noexcept = default;
|
||||
//! Assignment operator to assign value of type T.
|
||||
|
117
src/lib/netlist/plib/parray.h
Normal file
117
src/lib/netlist/plib/parray.h
Normal file
@ -0,0 +1,117 @@
|
||||
// license:GPL-2.0+
|
||||
// copyright-holders:Couriersud
|
||||
/*
|
||||
* parray.h
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef PARRAY_H_
|
||||
#define PARRAY_H_
|
||||
|
||||
#include "pconfig.h"
|
||||
#include "pexception.h"
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
namespace plib {
|
||||
|
||||
template <typename FT, int SIZE>
|
||||
struct sizeabs
|
||||
{
|
||||
static constexpr std::size_t ABS() { return (SIZE < 0) ? static_cast<std::size_t>(0 - SIZE) : static_cast<std::size_t>(SIZE); }
|
||||
typedef typename std::array<FT, ABS()> container;
|
||||
};
|
||||
|
||||
template <typename FT>
|
||||
struct sizeabs<FT, 0>
|
||||
{
|
||||
static constexpr const std::size_t ABS = 0;
|
||||
typedef typename std::vector<FT> container;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Array with preallocated or dynamic allocation
|
||||
*
|
||||
* Passing SIZE > 0 has the same functionality as a std::array.
|
||||
* SIZE = 0 is pure dynamic allocation, the actual array size is passed to the
|
||||
* constructor.
|
||||
* SIZE < 0 reserves std::abs(SIZE) elements statically in place allocated. The
|
||||
* actual size is passed in by the constructor.
|
||||
* This array is purely intended for HPC application where depending on the
|
||||
* architecture a preference dynamic/static has to be made.
|
||||
*
|
||||
* This struct is not intended to be a full replacement to std::array.
|
||||
* It is a subset to enable switching between dynamic and static allocation.
|
||||
* I consider > 10% performance difference to be a use case.
|
||||
*/
|
||||
|
||||
template <typename FT, int SIZE>
|
||||
struct parray
|
||||
{
|
||||
public:
|
||||
static constexpr std::size_t SIZEABS() { return sizeabs<FT, SIZE>::ABS(); }
|
||||
|
||||
typedef typename sizeabs<FT, SIZE>::container base_type;
|
||||
typedef typename base_type::size_type size_type;
|
||||
typedef typename base_type::reference reference;
|
||||
typedef typename base_type::const_reference const_reference;
|
||||
|
||||
template <int X = SIZE >
|
||||
parray(size_type size, typename std::enable_if<X==0, int>::type = 0)
|
||||
: m_a(size), m_size(size)
|
||||
{
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* allow construction in fixed size arrays */
|
||||
template <int X = SIZE >
|
||||
parray(typename std::enable_if<X==0, int>::type = 0)
|
||||
: m_size(0)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
template <int X = SIZE >
|
||||
parray(size_type size, typename std::enable_if<X!=0, int>::type = 0)
|
||||
: m_size(size)
|
||||
{
|
||||
if (SIZE < 0 && size > SIZEABS())
|
||||
throw plib::pexception("parray: size error " + plib::to_string(size) + ">" + plib::to_string(SIZEABS()));
|
||||
else if (SIZE > 0 && size != SIZEABS())
|
||||
throw plib::pexception("parray: size error");
|
||||
}
|
||||
|
||||
inline size_type size() const noexcept { return SIZE <= 0 ? m_size : SIZEABS(); }
|
||||
|
||||
constexpr size_type max_size() const noexcept { return base_type::max_size(); }
|
||||
|
||||
bool empty() const noexcept { return size() == 0; }
|
||||
|
||||
#if 0
|
||||
reference operator[](size_type i) /*noexcept*/
|
||||
{
|
||||
if (i >= m_size) throw plib::pexception("limits error " + to_string(i) + ">=" + to_string(m_size));
|
||||
return m_a[i];
|
||||
}
|
||||
const_reference operator[](size_type i) const /*noexcept*/
|
||||
{
|
||||
if (i >= m_size) throw plib::pexception("limits error " + to_string(i) + ">=" + to_string(m_size));
|
||||
return m_a[i];
|
||||
}
|
||||
#else
|
||||
reference operator[](size_type i) noexcept { return m_a[i]; }
|
||||
constexpr const_reference operator[](size_type i) const noexcept { return m_a[i]; }
|
||||
#endif
|
||||
FT * data() noexcept { return m_a.data(); }
|
||||
const FT * data() const noexcept { return m_a.data(); }
|
||||
|
||||
private:
|
||||
base_type m_a;
|
||||
size_type m_size;
|
||||
};
|
||||
}
|
||||
|
||||
#endif /* PARRAY_H_ */
|
@ -103,7 +103,7 @@ public:
|
||||
template<typename C>
|
||||
void save_item(const void *owner, std::vector<C> &v, const pstring &stname)
|
||||
{
|
||||
save_state(v.data(), owner, stname, v.size());
|
||||
save_state_ptr(owner, stname, datatype_f<C>::f(), v.size(), v.data());
|
||||
}
|
||||
|
||||
void pre_save();
|
||||
|
@ -11,60 +11,232 @@
|
||||
#define MAT_CR_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
#include <array>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
|
||||
#include "../plib/pconfig.h"
|
||||
#include "../plib/palloc.h"
|
||||
#include "../plib/pstate.h"
|
||||
#include "../plib/parray.h"
|
||||
|
||||
template<std::size_t N, typename C = uint16_t, typename T = double>
|
||||
namespace plib
|
||||
{
|
||||
|
||||
template<typename T, int N, typename C = uint16_t>
|
||||
struct mat_cr_t
|
||||
{
|
||||
typedef C index_type;
|
||||
typedef T value_type;
|
||||
|
||||
C diag[N]; // diagonal index pointer n
|
||||
C ia[N+1]; // row index pointer n + 1
|
||||
C ja[N*N]; // column index array nz_num, initially (n * n)
|
||||
T A[N*N]; // Matrix elements nz_num, initially (n * n)
|
||||
parray<C, N> diag; // diagonal index pointer n
|
||||
parray<C, (N == 0) ? 0 : (N < 0 ? N - 1 : N + 1)> row_idx; // row index pointer n + 1
|
||||
parray<C, N < 0 ? -N * N : N *N> col_idx; // column index array nz_num, initially (n * n)
|
||||
parray<T, N < 0 ? -N * N : N *N> A; // Matrix elements nz_num, initially (n * n)
|
||||
//parray<C, N < 0 ? -N * N / 2 : N * N / 2> nzbd; // Support for gaussian elimination
|
||||
parray<C, N < 0 ? -N * (N-1) / 2 : N * (N+1) / 2 > nzbd; // Support for gaussian elimination
|
||||
// contains elimination rows below the diagonal
|
||||
|
||||
std::size_t size;
|
||||
std::size_t m_size;
|
||||
std::size_t nz_num;
|
||||
|
||||
explicit mat_cr_t(const std::size_t n)
|
||||
: size(n)
|
||||
: diag(n)
|
||||
, row_idx(n+1)
|
||||
, col_idx(n*n)
|
||||
, A(n*n)
|
||||
, nzbd(n * (n+1) / 2)
|
||||
, m_size(n)
|
||||
, nz_num(0)
|
||||
{
|
||||
#if 0
|
||||
#if 0
|
||||
ia = plib::palloc_array<C>(n + 1);
|
||||
ja = plib::palloc_array<C>(n * n);
|
||||
diag = plib::palloc_array<C>(n);
|
||||
#else
|
||||
diag = plib::palloc_array<C>(n + (n + 1) + n * n);
|
||||
ia = diag + n;
|
||||
ja = ia + (n+1);
|
||||
A = plib::palloc_array<T>(n * n);
|
||||
#endif
|
||||
#endif
|
||||
for (std::size_t i=0; i<n+1; i++)
|
||||
A[i] = 0;
|
||||
}
|
||||
|
||||
~mat_cr_t()
|
||||
{
|
||||
#if 0
|
||||
plib::pfree_array(diag);
|
||||
#if 0
|
||||
plib::pfree_array(ia);
|
||||
plib::pfree_array(ja);
|
||||
#endif
|
||||
plib::pfree_array(A);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::size_t size() const { return m_size; }
|
||||
|
||||
void set_scalar(const T scalar)
|
||||
{
|
||||
for (std::size_t i=0, e=nz_num; i<e; i++)
|
||||
A[i] = scalar;
|
||||
}
|
||||
|
||||
void mult_vec(const T * RESTRICT x, T * RESTRICT res)
|
||||
void set(C r, C c, T val)
|
||||
{
|
||||
C ri = row_idx[r];
|
||||
while (ri < row_idx[r+1] && col_idx[ri] < c)
|
||||
ri++;
|
||||
// we have the position now;
|
||||
if (nz_num > 0 && col_idx[ri] == c)
|
||||
A[ri] = val;
|
||||
else
|
||||
{
|
||||
for (C i = nz_num; i>ri; i--)
|
||||
{
|
||||
A[i] = A[i-1];
|
||||
col_idx[i] = col_idx[i-1];
|
||||
}
|
||||
A[ri] = val;
|
||||
col_idx[ri] = c;
|
||||
for (C i = row_idx[r]; i < size()+1;i++)
|
||||
row_idx[i]++;
|
||||
nz_num++;
|
||||
if (c==r)
|
||||
diag[r] = ri;
|
||||
}
|
||||
}
|
||||
|
||||
enum constants_e
|
||||
{
|
||||
FILL_INFINITY = 9999999
|
||||
};
|
||||
|
||||
template <typename M>
|
||||
std::pair<std::size_t, std::size_t> gaussian_extend_fill_mat(M &fill)
|
||||
{
|
||||
std::size_t ops = 0;
|
||||
std::size_t fill_max = 0;
|
||||
|
||||
for (std::size_t k = 0; k < fill.size(); k++)
|
||||
{
|
||||
ops++; // 1/A(k,k)
|
||||
for (std::size_t row = k + 1; row < fill.size(); row++)
|
||||
{
|
||||
if (fill[row][k] < FILL_INFINITY)
|
||||
{
|
||||
ops++;
|
||||
for (std::size_t col = k + 1; col < fill[row].size(); col++)
|
||||
//if (fill[k][col] < FILL_INFINITY)
|
||||
{
|
||||
auto f = std::min(fill[row][col], 1 + fill[row][k] + fill[k][col]);
|
||||
if (f < FILL_INFINITY)
|
||||
{
|
||||
if (f > fill_max)
|
||||
fill_max = f;
|
||||
ops += 2;
|
||||
}
|
||||
fill[row][col] = f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return { fill_max, ops };
|
||||
}
|
||||
|
||||
template <typename M>
|
||||
void build_from_fill_mat(const M &f, std::size_t max_fill = FILL_INFINITY - 1,
|
||||
unsigned band_width = FILL_INFINITY)
|
||||
{
|
||||
C nz = 0;
|
||||
if (nz_num != 0)
|
||||
throw pexception("build_from_mat only allowed on empty CR matrix");
|
||||
for (std::size_t k=0; k < size(); k++)
|
||||
{
|
||||
row_idx[k] = nz;
|
||||
|
||||
for (std::size_t j=0; j < size(); j++)
|
||||
if (f[k][j] <= max_fill && std::abs(static_cast<int>(k)-static_cast<int>(j)) <= static_cast<int>(band_width))
|
||||
{
|
||||
col_idx[nz] = static_cast<C>(j);
|
||||
if (j == k)
|
||||
diag[k] = nz;
|
||||
nz++;
|
||||
}
|
||||
}
|
||||
|
||||
row_idx[size()] = nz;
|
||||
nz_num = nz;
|
||||
/* build nzbd */
|
||||
|
||||
std::size_t p=0;
|
||||
for (std::size_t k=0; k < size(); k++)
|
||||
{
|
||||
for (std::size_t j=k + 1; j < size(); j++)
|
||||
if (f[j][k] < FILL_INFINITY)
|
||||
nzbd[p++] = static_cast<C>(j);
|
||||
nzbd[p++] = 0; // end of sequence
|
||||
}
|
||||
}
|
||||
|
||||
template <typename V>
|
||||
void gaussian_elimination(V & RHS)
|
||||
{
|
||||
std::size_t nzbdp = 0;
|
||||
const std::size_t iN = size();
|
||||
|
||||
for (std::size_t i = 0; i < iN - 1; i++)
|
||||
{
|
||||
std::size_t pi = diag[i];
|
||||
const value_type f = 1.0 / A[pi++];
|
||||
const std::size_t piie = row_idx[i+1];
|
||||
|
||||
while (auto j = nzbd[nzbdp++])
|
||||
{
|
||||
// proceed to column i
|
||||
std::size_t pj = row_idx[j];
|
||||
|
||||
while (col_idx[pj] < i)
|
||||
pj++;
|
||||
|
||||
const value_type f1 = - A[pj++] * f;
|
||||
|
||||
// subtract row i from j */
|
||||
for (std::size_t pii = pi; pii<piie; pii++)
|
||||
{
|
||||
while (col_idx[pj] < col_idx[pii])
|
||||
pj++;
|
||||
if (col_idx[pj] == col_idx[pii])
|
||||
A[pj++] += A[pii] * f1;
|
||||
}
|
||||
RHS[j] += f1 * RHS[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename V1, typename V2>
|
||||
void gaussian_back_substitution(V1 &V, const V2 &RHS)
|
||||
{
|
||||
const std::size_t iN = size();
|
||||
/* row n-1 */
|
||||
V[iN - 1] = RHS[iN - 1] / A[diag[iN - 1]];
|
||||
|
||||
for (std::size_t j = iN - 1; j-- > 0;)
|
||||
{
|
||||
value_type tmp = 0;
|
||||
const auto jdiag = diag[j];
|
||||
const std::size_t e = row_idx[j+1];
|
||||
for (std::size_t pk = jdiag + 1; pk < e; pk++)
|
||||
tmp += A[pk] * V[col_idx[pk]];
|
||||
V[j] = (RHS[j] - tmp) / A[jdiag];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename V1>
|
||||
void gaussian_back_substitution(V1 &V)
|
||||
{
|
||||
const std::size_t iN = size();
|
||||
/* row n-1 */
|
||||
V[iN - 1] = V[iN - 1] / A[diag[iN - 1]];
|
||||
|
||||
for (std::size_t j = iN - 1; j-- > 0;)
|
||||
{
|
||||
value_type tmp = 0;
|
||||
const auto jdiag = diag[j];
|
||||
const std::size_t e = row_idx[j+1];
|
||||
for (std::size_t pk = jdiag + 1; pk < e; pk++)
|
||||
tmp += A[pk] * V[col_idx[pk]];
|
||||
V[j] = (V[j] - tmp) / A[jdiag];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename VTV, typename VTR>
|
||||
void mult_vec(const VTV & RESTRICT x, VTR & RESTRICT res)
|
||||
{
|
||||
/*
|
||||
* res = A * x
|
||||
@ -77,14 +249,68 @@ struct mat_cr_t
|
||||
while (k < oe)
|
||||
{
|
||||
T tmp = 0.0;
|
||||
const std::size_t e = ia[i+1];
|
||||
const std::size_t e = row_idx[i+1];
|
||||
for (; k < e; k++)
|
||||
tmp += A[k] * x[ja[k]];
|
||||
tmp += A[k] * x[col_idx[k]];
|
||||
res[i++] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
void incomplete_LU_factorization(T * RESTRICT LU)
|
||||
/* throws error if P(source)>P(destination) */
|
||||
template <typename LUMAT>
|
||||
void slim_copy_from(LUMAT & src)
|
||||
{
|
||||
for (std::size_t r=0; r<src.size(); r++)
|
||||
{
|
||||
C dp = row_idx[r];
|
||||
for (C sp = src.row_idx[r]; sp < src.row_idx[r+1]; sp++)
|
||||
{
|
||||
/* advance dp to source column and fill 0s if necessary */
|
||||
while (col_idx[dp] < src.col_idx[sp])
|
||||
A[dp++] = 0;
|
||||
if (row_idx[r+1] <= dp || col_idx[dp] != src.col_idx[sp])
|
||||
throw plib::pexception("slim_copy_from error");
|
||||
A[dp++] = src.A[sp];
|
||||
}
|
||||
/* fill remaining elements in row */
|
||||
while (dp < row_idx[r+1])
|
||||
A[dp++] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* only copies common elements */
|
||||
template <typename LUMAT>
|
||||
void reduction_copy_from(LUMAT & src)
|
||||
{
|
||||
C sp = 0;
|
||||
for (std::size_t r=0; r<src.size(); r++)
|
||||
{
|
||||
C dp = row_idx[r];
|
||||
while(sp < src.row_idx[r+1])
|
||||
{
|
||||
/* advance dp to source column and fill 0s if necessary */
|
||||
if (col_idx[dp] < src.col_idx[sp])
|
||||
A[dp++] = 0;
|
||||
else if (col_idx[dp] == src.col_idx[sp])
|
||||
A[dp++] = src.A[sp++];
|
||||
else
|
||||
sp++;
|
||||
}
|
||||
/* fill remaining elements in row */
|
||||
while (dp < row_idx[r+1])
|
||||
A[dp++] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* checks at all - may crash */
|
||||
template <typename LUMAT>
|
||||
void raw_copy_from(LUMAT & src)
|
||||
{
|
||||
for (std::size_t k = 0; k < nz_num; k++)
|
||||
A[k] = src.A[k];
|
||||
}
|
||||
|
||||
void incomplete_LU_factorization()
|
||||
{
|
||||
/*
|
||||
* incomplete LU Factorization according to http://de.wikipedia.org/wiki/ILU-Zerlegung
|
||||
@ -93,38 +319,67 @@ struct mat_cr_t
|
||||
*
|
||||
*/
|
||||
|
||||
#if 0
|
||||
const std::size_t lnz = nz_num;
|
||||
|
||||
for (std::size_t k = 0; k < lnz; k++)
|
||||
LU[k] = A[k];
|
||||
|
||||
for (std::size_t i = 1; ia[i] < lnz; i++) // row i
|
||||
for (std::size_t i = 1; row_idx[i] < lnz; i++) // row i
|
||||
{
|
||||
const std::size_t iai1 = ia[i + 1];
|
||||
const std::size_t pke = diag[i];
|
||||
for (std::size_t pk = ia[i]; pk < pke; pk++) // all columns left of diag in row i
|
||||
const std::size_t p_i_end = row_idx[i + 1];
|
||||
// loop over all columns left of diag in row i
|
||||
for (std::size_t p_i_k = row_idx[i]; p_i_k < diag[i]; p_i_k++)
|
||||
{
|
||||
// pk == (i, k)
|
||||
const std::size_t k = ja[pk];
|
||||
const std::size_t iak1 = ia[k + 1];
|
||||
const T LUpk = LU[pk] = LU[pk] / LU[diag[k]];
|
||||
const std::size_t k = col_idx[p_i_k];
|
||||
// get start of row k
|
||||
const std::size_t p_k_end = row_idx[k + 1];
|
||||
|
||||
std::size_t pt = ia[k];
|
||||
const T LUp_i_k = A[p_i_k] = A[p_i_k] / A[diag[k]];
|
||||
|
||||
for (std::size_t pj = pk + 1; pj < iai1; pj++) // pj = (i, j)
|
||||
std::size_t p_k_j = row_idx[k];
|
||||
|
||||
for (std::size_t p_i_j = p_i_k + 1; p_i_j < p_i_end; p_i_j++) // pj = (i, j)
|
||||
{
|
||||
// we can assume that within a row ja increases continuously */
|
||||
const std::size_t ej = ja[pj];
|
||||
while (ja[pt] < ej && pt < iak1)
|
||||
pt++;
|
||||
if (pt < iak1 && ja[pt] == ej)
|
||||
LU[pj] = LU[pj] - LUpk * LU[pt];
|
||||
}
|
||||
const std::size_t j = col_idx[p_i_j]; // row i, column j
|
||||
while (col_idx[p_k_j] < j && p_k_j < p_k_end)
|
||||
p_k_j++;
|
||||
if (p_k_j < p_k_end && col_idx[p_k_j] == j)
|
||||
A[p_i_j] = A[p_i_j] - LUp_i_k * A[p_k_j];
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (std::size_t i = 1; i < m_size; i++) // row i
|
||||
{
|
||||
const std::size_t p_i_end = row_idx[i + 1];
|
||||
// loop over all columns k left of diag in row i
|
||||
for (std::size_t i_k = row_idx[i]; i_k < diag[i]; i_k++)
|
||||
{
|
||||
const std::size_t k = col_idx[i_k];
|
||||
const std::size_t p_k_end = row_idx[k + 1];
|
||||
const T LUp_i_k = A[i_k] = A[i_k] / A[diag[k]];
|
||||
|
||||
void solveLUx (const T * RESTRICT LU, T * RESTRICT r)
|
||||
// get start of row k
|
||||
//std::size_t k_j = row_idx[k];
|
||||
std::size_t k_j = diag[k];
|
||||
|
||||
for (std::size_t i_j = i_k + 1; i_j < p_i_end; i_j++) // pj = (i, j)
|
||||
{
|
||||
// we can assume that within a row ja increases continuously */
|
||||
const std::size_t j = col_idx[i_j]; // row i, column j
|
||||
while (col_idx[k_j] < j && k_j < p_k_end)
|
||||
k_j++;
|
||||
if (k_j >= p_k_end)
|
||||
break;
|
||||
if (col_idx[k_j] == j)
|
||||
A[i_j] = A[i_j] - LUp_i_k * A[k_j];
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
template <typename R>
|
||||
void solveLUx (R &r)
|
||||
{
|
||||
/*
|
||||
* Solve a linear equation Ax = r
|
||||
@ -147,29 +402,30 @@ struct mat_cr_t
|
||||
* This can be solved for x using backwards elimination in U.
|
||||
*
|
||||
*/
|
||||
|
||||
for (std::size_t i = 1; ia[i] < nz_num; ++i )
|
||||
for (std::size_t i = 1; i < m_size; ++i )
|
||||
{
|
||||
T tmp = 0.0;
|
||||
const std::size_t j1 = ia[i];
|
||||
const std::size_t j1 = row_idx[i];
|
||||
const std::size_t j2 = diag[i];
|
||||
|
||||
for (std::size_t j = j1; j < j2; ++j )
|
||||
tmp += LU[j] * r[ja[j]];
|
||||
tmp += A[j] * r[col_idx[j]];
|
||||
|
||||
r[i] -= tmp;
|
||||
}
|
||||
// i now is equal to n;
|
||||
for (std::size_t i = size; i-- > 0; )
|
||||
for (std::size_t i = m_size; i-- > 0; )
|
||||
{
|
||||
T tmp = 0.0;
|
||||
const std::size_t di = diag[i];
|
||||
const std::size_t j2 = ia[i+1];
|
||||
const std::size_t j2 = row_idx[i+1];
|
||||
for (std::size_t j = di + 1; j < j2; j++ )
|
||||
tmp += LU[j] * r[ja[j]];
|
||||
r[i] = (r[i] - tmp) / LU[di];
|
||||
tmp += A[j] * r[col_idx[j]];
|
||||
r[i] = (r[i] - tmp) / A[di];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif /* MAT_CR_H_ */
|
||||
|
@ -54,12 +54,43 @@ public:
|
||||
|
||||
void set_pointers();
|
||||
|
||||
template <typename AP, typename FT>
|
||||
void fill_matrix(AP &tcr, FT &RHS)
|
||||
{
|
||||
FT gtot_t = 0.0;
|
||||
FT RHS_t = 0.0;
|
||||
|
||||
const std::size_t term_count = this->count();
|
||||
const std::size_t railstart = this->m_railstart;
|
||||
const FT * const * RESTRICT other_cur_analog = this->connected_net_V();
|
||||
|
||||
for (std::size_t i = 0; i < railstart; i++)
|
||||
{
|
||||
*tcr[i] -= m_go[i];
|
||||
gtot_t += m_gt[i];
|
||||
RHS_t += m_Idr[i];
|
||||
}
|
||||
|
||||
for (std::size_t i = railstart; i < term_count; i++)
|
||||
{
|
||||
RHS_t += (m_Idr[i] + m_go[i] * *other_cur_analog[i]);
|
||||
gtot_t += m_gt[i];
|
||||
}
|
||||
|
||||
RHS = RHS_t;
|
||||
// update diagonal element ...
|
||||
*tcr[railstart] += gtot_t; //mat.A[mat.diag[k]] += gtot_t;
|
||||
}
|
||||
|
||||
std::size_t m_railstart;
|
||||
|
||||
std::vector<unsigned> m_nz; /* all non zero for multiplication */
|
||||
std::vector<unsigned> m_nzrd; /* non zero right of the diagonal for elimination, may include RHS element */
|
||||
std::vector<unsigned> m_nzbd; /* non zero below of the diagonal for elimination */
|
||||
|
||||
|
||||
|
||||
|
||||
/* state */
|
||||
nl_double m_last_V;
|
||||
nl_double m_DD_n_m_1;
|
||||
@ -157,14 +188,15 @@ protected:
|
||||
/* virtual */ void add_term(std::size_t net_idx, terminal_t *term);
|
||||
|
||||
template <typename T>
|
||||
void store(const T * RESTRICT V);
|
||||
template <typename T>
|
||||
T delta(const T * RESTRICT V);
|
||||
void store(const T & RESTRICT V);
|
||||
|
||||
template <typename T>
|
||||
void build_LE_A();
|
||||
auto delta(const T & RESTRICT V) -> typename std::decay<decltype(V[0])>::type;
|
||||
|
||||
template <typename T>
|
||||
void build_LE_RHS();
|
||||
void build_LE_A(T &child);
|
||||
template <typename T>
|
||||
void build_LE_RHS(T &child);
|
||||
|
||||
std::vector<std::unique_ptr<terms_for_net_t>> m_terms;
|
||||
std::vector<analog_net_t *> m_nets;
|
||||
@ -199,7 +231,7 @@ private:
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T matrix_solver_t::delta(const T * RESTRICT V)
|
||||
auto matrix_solver_t::delta(const T & RESTRICT V) -> typename std::decay<decltype(V[0])>::type
|
||||
{
|
||||
/* NOTE: Ideally we should also include currents (RHS) here. This would
|
||||
* need a reevaluation of the right hand side after voltages have been updated
|
||||
@ -207,14 +239,14 @@ T matrix_solver_t::delta(const T * RESTRICT V)
|
||||
*/
|
||||
|
||||
const std::size_t iN = this->m_terms.size();
|
||||
T cerr = 0;
|
||||
typename std::decay<decltype(V[0])>::type cerr = 0;
|
||||
for (std::size_t i = 0; i < iN; i++)
|
||||
cerr = std::max(cerr, std::abs(V[i] - static_cast<T>(this->m_nets[i]->Q_Analog())));
|
||||
cerr = std::max(cerr, std::abs(V[i] - this->m_nets[i]->Q_Analog()));
|
||||
return cerr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void matrix_solver_t::store(const T * RESTRICT V)
|
||||
void matrix_solver_t::store(const T & RESTRICT V)
|
||||
{
|
||||
const std::size_t iN = this->m_terms.size();
|
||||
for (std::size_t i = 0; i < iN; i++)
|
||||
@ -222,34 +254,33 @@ void matrix_solver_t::store(const T * RESTRICT V)
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void matrix_solver_t::build_LE_A()
|
||||
void matrix_solver_t::build_LE_A(T &child)
|
||||
{
|
||||
typedef typename T::float_type float_type;
|
||||
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
|
||||
|
||||
T &child = static_cast<T &>(*this);
|
||||
|
||||
const std::size_t iN = child.N();
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
terms_for_net_t *terms = m_terms[k].get();
|
||||
nl_double * Ak = &child.A(k, 0ul);
|
||||
float_type * Ak = &child.A(k, 0ul);
|
||||
|
||||
for (std::size_t i=0; i < iN; i++)
|
||||
Ak[i] = 0.0;
|
||||
|
||||
const std::size_t terms_count = terms->count();
|
||||
const std::size_t railstart = terms->m_railstart;
|
||||
const nl_double * const RESTRICT gt = terms->gt();
|
||||
const float_type * const RESTRICT gt = terms->gt();
|
||||
|
||||
{
|
||||
nl_double akk = 0.0;
|
||||
float_type akk = 0.0;
|
||||
for (std::size_t i = 0; i < terms_count; i++)
|
||||
akk += gt[i];
|
||||
|
||||
Ak[k] = akk;
|
||||
}
|
||||
|
||||
const nl_double * const RESTRICT go = terms->go();
|
||||
const float_type * const RESTRICT go = terms->go();
|
||||
int * RESTRICT net_other = terms->connected_net_idx();
|
||||
|
||||
for (std::size_t i = 0; i < railstart; i++)
|
||||
@ -258,21 +289,21 @@ void matrix_solver_t::build_LE_A()
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void matrix_solver_t::build_LE_RHS()
|
||||
void matrix_solver_t::build_LE_RHS(T &child)
|
||||
{
|
||||
static_assert(std::is_base_of<matrix_solver_t, T>::value, "T must derive from matrix_solver_t");
|
||||
T &child = static_cast<T &>(*this);
|
||||
typedef typename T::float_type float_type;
|
||||
|
||||
const std::size_t iN = child.N();
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
nl_double rhsk_a = 0.0;
|
||||
nl_double rhsk_b = 0.0;
|
||||
float_type rhsk_a = 0.0;
|
||||
float_type rhsk_b = 0.0;
|
||||
|
||||
const std::size_t terms_count = m_terms[k]->count();
|
||||
const nl_double * const RESTRICT go = m_terms[k]->go();
|
||||
const nl_double * const RESTRICT Idr = m_terms[k]->Idr();
|
||||
const nl_double * const * RESTRICT other_cur_analog = m_terms[k]->connected_net_V();
|
||||
const float_type * const RESTRICT go = m_terms[k]->go();
|
||||
const float_type * const RESTRICT Idr = m_terms[k]->Idr();
|
||||
const float_type * const * RESTRICT other_cur_analog = m_terms[k]->connected_net_V();
|
||||
|
||||
for (std::size_t i = 0; i < terms_count; i++)
|
||||
rhsk_a = rhsk_a + Idr[i];
|
||||
|
@ -14,28 +14,21 @@
|
||||
#include "nld_solver.h"
|
||||
#include "nld_matrix_solver.h"
|
||||
#include "vector_base.h"
|
||||
|
||||
/* Disabling dynamic allocation gives a ~10% boost in performance
|
||||
* This flag has been added to support continuous storage for arrays
|
||||
* going forward in case we implement cuda solvers in the future.
|
||||
*/
|
||||
#define NL_USE_DYNAMIC_ALLOCATION (1)
|
||||
#include "mat_cr.h"
|
||||
|
||||
namespace netlist
|
||||
{
|
||||
namespace devices
|
||||
{
|
||||
//#define nl_ext_double _float128 // slow, very slow
|
||||
//#define nl_ext_double long double // slightly slower
|
||||
#define nl_ext_double nl_double
|
||||
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
template <typename FT, int SIZE>
|
||||
class matrix_solver_direct_t: public matrix_solver_t
|
||||
{
|
||||
friend class matrix_solver_t;
|
||||
public:
|
||||
|
||||
typedef FT float_type;
|
||||
|
||||
matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size);
|
||||
matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name, const eSortType sort, const solver_parameters_t *params, const std::size_t size);
|
||||
|
||||
@ -48,36 +41,25 @@ protected:
|
||||
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
|
||||
unsigned solve_non_dynamic(const bool newton_raphson);
|
||||
|
||||
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
|
||||
constexpr std::size_t N() const { return (SIZE > 0) ? static_cast<std::size_t>(SIZE) : m_dim; }
|
||||
|
||||
void LE_solve();
|
||||
|
||||
template <typename T>
|
||||
void LE_back_subst(T * RESTRICT x);
|
||||
void LE_back_subst(T & RESTRICT x);
|
||||
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
nl_ext_double &A(const std::size_t r, const std::size_t c) { return m_A[r * m_pitch + c]; }
|
||||
nl_ext_double &RHS(const std::size_t r) { return m_A[r * m_pitch + N()]; }
|
||||
#else
|
||||
nl_ext_double &A(const std::size_t r, const std::size_t c) { return m_A[r][c]; }
|
||||
nl_ext_double &RHS(const std::size_t r) { return m_A[r][N()]; }
|
||||
#endif
|
||||
nl_double m_last_RHS[storage_N]; // right hand side - contains currents
|
||||
FT &A(std::size_t r, std::size_t c) { return m_A[r * m_pitch + c]; }
|
||||
FT &RHS(std::size_t r) { return m_A[r * m_pitch + N()]; }
|
||||
plib::parray<FT, SIZE> m_last_RHS;
|
||||
plib::parray<FT, SIZE> m_new_V;
|
||||
|
||||
private:
|
||||
//static const std::size_t m_pitch = (((storage_N + 1) + 0) / 1) * 1;
|
||||
static constexpr std::size_t m_pitch = (((storage_N + 1) + 7) / 8) * 8;
|
||||
//static const std::size_t m_pitch = (((storage_N + 1) + 15) / 16) * 16;
|
||||
//static const std::size_t m_pitch = (((storage_N + 1) + 31) / 32) * 32;
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
//nl_ext_double * RESTRICT m_A;
|
||||
std::vector<nl_ext_double> m_A;
|
||||
#else
|
||||
nl_ext_double m_A[storage_N][m_pitch];
|
||||
#endif
|
||||
//nl_ext_double m_RHSx[storage_N];
|
||||
static constexpr const std::size_t SIZEABS = plib::parray<FT, SIZE>::SIZEABS();
|
||||
static constexpr const std::size_t m_pitch_ABS = (((SIZEABS + 1) + 7) / 8) * 8;
|
||||
|
||||
const std::size_t m_dim;
|
||||
const std::size_t m_pitch;
|
||||
plib::parray<FT, SIZE * m_pitch_ABS> m_A;
|
||||
|
||||
};
|
||||
|
||||
@ -85,16 +67,13 @@ private:
|
||||
// matrix_solver_direct
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
matrix_solver_direct_t<m_N, storage_N>::~matrix_solver_direct_t()
|
||||
template <typename FT, int SIZE>
|
||||
matrix_solver_direct_t<FT, SIZE>::~matrix_solver_direct_t()
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
//plib::pfree_array(m_A);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_direct_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
|
||||
{
|
||||
matrix_solver_t::setup_base(nets);
|
||||
|
||||
@ -107,34 +86,31 @@ void matrix_solver_direct_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
t->m_nzrd.push_back(static_cast<unsigned>(N()));
|
||||
}
|
||||
|
||||
state().save(*this, m_last_RHS, "m_last_RHS");
|
||||
state().save(*this, m_last_RHS.data(), "m_last_RHS", m_last_RHS.size());
|
||||
|
||||
for (std::size_t k = 0; k < N(); k++)
|
||||
state().save(*this, RHS(k), plib::pfmt("RHS.{1}")(k));
|
||||
}
|
||||
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_direct_t<FT, SIZE>::LE_solve()
|
||||
{
|
||||
const std::size_t kN = N();
|
||||
if (!m_params.m_pivot)
|
||||
{
|
||||
for (std::size_t i = 0; i < kN; i++)
|
||||
{
|
||||
|
||||
/* FIXME: Singular matrix? */
|
||||
nl_double *Ai = &A(i, 0);
|
||||
const nl_double f = 1.0 / A(i,i);
|
||||
const FT f = 1.0 / A(i,i);
|
||||
const auto &nzrd = m_terms[i]->m_nzrd;
|
||||
const auto &nzbd = m_terms[i]->m_nzbd;
|
||||
|
||||
for (std::size_t j : nzbd)
|
||||
{
|
||||
nl_double *Aj = &A(j, 0);
|
||||
const nl_double f1 = -f * Aj[i];
|
||||
const FT f1 = -f * A(j, i);
|
||||
for (std::size_t k : nzrd)
|
||||
Aj[k] += Ai[k] * f1;
|
||||
A(j, k) += A(i, k) * f1;
|
||||
//RHS(j) += RHS(i) * f1;
|
||||
}
|
||||
}
|
||||
@ -161,17 +137,17 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
|
||||
//std::swap(RHS(i), RHS(maxrow));
|
||||
}
|
||||
/* FIXME: Singular matrix? */
|
||||
const nl_double f = 1.0 / A(i,i);
|
||||
const FT f = 1.0 / A(i,i);
|
||||
|
||||
/* Eliminate column i from row j */
|
||||
|
||||
for (std::size_t j = i + 1; j < kN; j++)
|
||||
{
|
||||
const nl_double f1 = - A(j,i) * f;
|
||||
const FT f1 = - A(j,i) * f;
|
||||
if (f1 != NL_FCONST(0.0))
|
||||
{
|
||||
const nl_double * RESTRICT pi = &A(i,i+1);
|
||||
nl_double * RESTRICT pj = &A(j,i+1);
|
||||
const FT * RESTRICT pi = &A(i,i+1);
|
||||
FT * RESTRICT pj = &A(j,i+1);
|
||||
#if 1
|
||||
vec_add_mult_scalar_p(kN-i,pi,f1,pj);
|
||||
#else
|
||||
@ -188,10 +164,10 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_solve()
|
||||
}
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
template <typename FT, int SIZE>
|
||||
template <typename T>
|
||||
void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
|
||||
T * RESTRICT x)
|
||||
void matrix_solver_direct_t<FT, SIZE>::LE_back_subst(
|
||||
T & RESTRICT x)
|
||||
{
|
||||
const std::size_t kN = N();
|
||||
|
||||
@ -200,7 +176,7 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
|
||||
{
|
||||
for (std::size_t j = kN; j-- > 0; )
|
||||
{
|
||||
T tmp = 0;
|
||||
FT tmp = 0;
|
||||
for (std::size_t k = j+1; k < kN; k++)
|
||||
tmp += A(j,k) * x[k];
|
||||
x[j] = (RHS(j) - tmp) / A(j,j);
|
||||
@ -210,40 +186,33 @@ void matrix_solver_direct_t<m_N, storage_N>::LE_back_subst(
|
||||
{
|
||||
for (std::size_t j = kN; j-- > 0; )
|
||||
{
|
||||
T tmp = 0;
|
||||
|
||||
const auto *p = m_terms[j]->m_nzrd.data();
|
||||
const auto e = m_terms[j]->m_nzrd.size() - 1; /* exclude RHS element */
|
||||
T * Aj = &A(j,0);
|
||||
FT tmp = 0;
|
||||
const auto &nzrd = m_terms[j]->m_nzrd;
|
||||
const auto e = nzrd.size() - 1; /* exclude RHS element */
|
||||
for ( std::size_t k = 0; k < e; k++)
|
||||
{
|
||||
const auto pk = p[k];
|
||||
tmp += Aj[pk] * x[pk];
|
||||
}
|
||||
tmp += A(j, nzrd[k]) * x[nzrd[k]];
|
||||
x[j] = (RHS(j) - tmp) / A(j,j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT, int SIZE>
|
||||
unsigned matrix_solver_direct_t<FT, SIZE>::solve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
nl_double new_V[storage_N]; // = { 0.0 };
|
||||
|
||||
this->LE_solve();
|
||||
this->LE_back_subst(new_V);
|
||||
this->LE_back_subst(m_new_V);
|
||||
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
const FT err = (newton_raphson ? delta(m_new_V) : 0.0);
|
||||
store(m_new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT, int SIZE>
|
||||
unsigned matrix_solver_direct_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
build_LE_A<matrix_solver_direct_t>();
|
||||
build_LE_RHS<matrix_solver_direct_t>();
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
|
||||
for (std::size_t i=0, iN=N(); i < iN; i++)
|
||||
m_last_RHS[i] = RHS(i);
|
||||
@ -252,33 +221,33 @@ unsigned matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(const bool n
|
||||
return this->solve_non_dynamic(newton_raphson);
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name,
|
||||
template <typename FT, int SIZE>
|
||||
matrix_solver_direct_t<FT, SIZE>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name,
|
||||
const solver_parameters_t *params, const std::size_t size)
|
||||
: matrix_solver_t(anetlist, name, ASCENDING, params)
|
||||
, m_last_RHS(size)
|
||||
, m_new_V(size)
|
||||
, m_dim(size)
|
||||
, m_pitch(m_pitch_ABS ? m_pitch_ABS : (((m_dim + 1) + 7) / 8) * 8)
|
||||
, m_A(size * m_pitch)
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
m_A.resize(N() * m_pitch);
|
||||
//m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
|
||||
#endif
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
{
|
||||
m_last_RHS[k] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
matrix_solver_direct_t<m_N, storage_N>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name,
|
||||
template <typename FT, int SIZE>
|
||||
matrix_solver_direct_t<FT, SIZE>::matrix_solver_direct_t(netlist_base_t &anetlist, const pstring &name,
|
||||
const eSortType sort, const solver_parameters_t *params, const std::size_t size)
|
||||
: matrix_solver_t(anetlist, name, sort, params)
|
||||
, m_last_RHS(size)
|
||||
, m_new_V(size)
|
||||
, m_dim(size)
|
||||
, m_pitch(m_pitch_ABS ? m_pitch_ABS : (((m_dim + 1) + 7) / 8) * 8)
|
||||
, m_A(size * m_pitch)
|
||||
{
|
||||
#if (NL_USE_DYNAMIC_ALLOCATION)
|
||||
m_A.resize(N() * m_pitch);
|
||||
//m_A = plib::palloc_array<nl_ext_double>(N() * m_pitch);
|
||||
#endif
|
||||
for (unsigned k = 0; k < N(); k++)
|
||||
for (std::size_t k = 0; k < N(); k++)
|
||||
{
|
||||
m_last_RHS[k] = 0.0;
|
||||
}
|
||||
|
@ -15,34 +15,38 @@ namespace netlist
|
||||
{
|
||||
namespace devices
|
||||
{
|
||||
class matrix_solver_direct1_t: public matrix_solver_direct_t<1,1>
|
||||
template <typename FT>
|
||||
class matrix_solver_direct1_t: public matrix_solver_direct_t<FT, 1>
|
||||
{
|
||||
public:
|
||||
|
||||
matrix_solver_direct1_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params)
|
||||
: matrix_solver_direct_t<1, 1>(anetlist, name, params, 1)
|
||||
{}
|
||||
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
|
||||
typedef FT float_type;
|
||||
typedef matrix_solver_direct_t<FT, 1> base_type;
|
||||
|
||||
};
|
||||
matrix_solver_direct1_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params)
|
||||
: matrix_solver_direct_t<FT, 1>(anetlist, name, params, 1)
|
||||
{}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// matrix_solver - Direct1
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline unsigned matrix_solver_direct1_t::vsolve_non_dynamic(const bool newton_raphson)
|
||||
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override
|
||||
{
|
||||
build_LE_A<matrix_solver_direct1_t>();
|
||||
build_LE_RHS<matrix_solver_direct1_t>();
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
//NL_VERBOSE_OUT(("{1} {2}\n", new_val, m_RHS[0] / m_A[0][0]);
|
||||
|
||||
nl_double new_V[1] = { RHS(0) / A(0,0) };
|
||||
FT new_V[1] = { this->RHS(0) / this->A(0,0) };
|
||||
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
const FT err = (newton_raphson ? this->delta(new_V) : 0.0);
|
||||
this->store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
} //namespace devices
|
||||
} // namespace netlist
|
||||
|
||||
|
@ -15,12 +15,16 @@ namespace netlist
|
||||
{
|
||||
namespace devices
|
||||
{
|
||||
class matrix_solver_direct2_t: public matrix_solver_direct_t<2,2>
|
||||
|
||||
template <typename FT>
|
||||
class matrix_solver_direct2_t: public matrix_solver_direct_t<FT, 2>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef FT float_type;
|
||||
|
||||
matrix_solver_direct2_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params)
|
||||
: matrix_solver_direct_t<2, 2>(anetlist, name, params, 2)
|
||||
: matrix_solver_direct_t<double, 2>(anetlist, name, params, 2)
|
||||
{}
|
||||
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
|
||||
|
||||
@ -30,22 +34,23 @@ public:
|
||||
// matrix_solver - Direct2
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline unsigned matrix_solver_direct2_t::vsolve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT>
|
||||
inline unsigned matrix_solver_direct2_t<FT>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
build_LE_A<matrix_solver_direct2_t>();
|
||||
build_LE_RHS<matrix_solver_direct2_t>();
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
|
||||
const nl_double a = A(0,0);
|
||||
const nl_double b = A(0,1);
|
||||
const nl_double c = A(1,0);
|
||||
const nl_double d = A(1,1);
|
||||
const float_type a = this->A(0,0);
|
||||
const float_type b = this->A(0,1);
|
||||
const float_type c = this->A(1,0);
|
||||
const float_type d = this->A(1,1);
|
||||
|
||||
nl_double new_V[2];
|
||||
new_V[1] = (a * RHS(1) - c * RHS(0)) / (a * d - b * c);
|
||||
new_V[0] = (RHS(0) - b * new_V[1]) / a;
|
||||
float_type new_V[2];
|
||||
new_V[1] = (a * this->RHS(1) - c * this->RHS(0)) / (a * d - b * c);
|
||||
new_V[0] = (this->RHS(0) - b * new_V[1]) / a;
|
||||
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
const float_type err = (newton_raphson ? this->delta(new_V) : 0.0);
|
||||
this->store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
|
@ -23,15 +23,21 @@ namespace netlist
|
||||
{
|
||||
namespace devices
|
||||
{
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
|
||||
template <typename FT, int SIZE>
|
||||
class matrix_solver_GCR_t: public matrix_solver_t
|
||||
{
|
||||
public:
|
||||
|
||||
// FIXME: dirty hack to make this compile
|
||||
static constexpr const std::size_t storage_N = 100;
|
||||
|
||||
matrix_solver_GCR_t(netlist_base_t &anetlist, const pstring &name,
|
||||
const solver_parameters_t *params, const std::size_t size)
|
||||
: matrix_solver_t(anetlist, name, matrix_solver_t::ASCENDING, params)
|
||||
, m_dim(size)
|
||||
, RHS(size)
|
||||
, new_V(size)
|
||||
, mat(size)
|
||||
, m_proc()
|
||||
{
|
||||
@ -41,7 +47,7 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
|
||||
constexpr std::size_t N() const { return m_dim; }
|
||||
|
||||
virtual void vsetup(analog_net_t::list_t &nets) override;
|
||||
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
|
||||
@ -51,7 +57,7 @@ public:
|
||||
private:
|
||||
|
||||
//typedef typename mat_cr_t<storage_N>::type mattype;
|
||||
typedef typename mat_cr_t<storage_N>::index_type mattype;
|
||||
typedef typename plib::mat_cr_t<FT, SIZE>::index_type mat_index_type;
|
||||
|
||||
void csc_private(plib::putf8_fmt_writer &strm);
|
||||
|
||||
@ -60,8 +66,12 @@ private:
|
||||
pstring static_compile_name();
|
||||
|
||||
const std::size_t m_dim;
|
||||
std::vector<unsigned> m_term_cr[storage_N];
|
||||
mat_cr_t<storage_N> mat;
|
||||
plib::parray<FT, SIZE> RHS;
|
||||
plib::parray<FT, SIZE> new_V;
|
||||
|
||||
std::vector<FT *> m_term_cr[storage_N];
|
||||
|
||||
plib::mat_cr_t<FT, SIZE> mat;
|
||||
|
||||
//extsolver m_proc;
|
||||
plib::dynproc<void, double * RESTRICT, double * RESTRICT, double * RESTRICT> m_proc;
|
||||
@ -72,166 +82,89 @@ private:
|
||||
// matrix_solver - GCR
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_GCR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_GCR_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
|
||||
{
|
||||
setup_base(nets);
|
||||
|
||||
mattype nz = 0;
|
||||
const std::size_t iN = this->N();
|
||||
|
||||
/* build the final matrix */
|
||||
|
||||
bool touched[storage_N][storage_N] = { { false } };
|
||||
std::vector<std::vector<unsigned>> fill(iN);
|
||||
|
||||
std::size_t raw_elements = 0;
|
||||
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
fill[k].resize(iN, decltype(mat)::FILL_INFINITY);
|
||||
for (auto &j : this->m_terms[k]->m_nz)
|
||||
touched[k][j] = true;
|
||||
{
|
||||
fill[k][j] = 0;
|
||||
raw_elements++;
|
||||
}
|
||||
|
||||
unsigned fc = 0;
|
||||
}
|
||||
|
||||
unsigned ops = 0;
|
||||
auto gr = mat.gaussian_extend_fill_mat(fill);
|
||||
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
ops++; // 1/A(k,k)
|
||||
for (std::size_t row = k + 1; row < iN; row++)
|
||||
unsigned fm = 0;
|
||||
pstring ml = "";
|
||||
for (std::size_t j = 0; j < iN; j++)
|
||||
{
|
||||
if (touched[row][k])
|
||||
{
|
||||
ops++;
|
||||
fc++;
|
||||
for (std::size_t col = k + 1; col < iN; col++)
|
||||
if (touched[k][col])
|
||||
{
|
||||
touched[row][col] = true;
|
||||
ops += 2;
|
||||
}
|
||||
}
|
||||
ml += fill[k][j] < decltype(mat)::FILL_INFINITY ? "X" : "_";
|
||||
if (fill[k][j] < decltype(mat)::FILL_INFINITY)
|
||||
if (fill[k][j] > fm)
|
||||
fm = fill[k][j];
|
||||
}
|
||||
this->log().verbose("{1:4} {2} {3:4}", k, ml, fm);
|
||||
}
|
||||
|
||||
|
||||
for (mattype k=0; k<iN; k++)
|
||||
{
|
||||
mat.ia[k] = nz;
|
||||
mat.build_from_fill_mat(fill);
|
||||
|
||||
for (mattype j=0; j<iN; j++)
|
||||
for (mat_index_type k=0; k<iN; k++)
|
||||
{
|
||||
if (touched[k][j])
|
||||
{
|
||||
mat.ja[nz] = j;
|
||||
if (j == k)
|
||||
mat.diag[k] = nz;
|
||||
nz++;
|
||||
}
|
||||
}
|
||||
|
||||
m_term_cr[k].clear();
|
||||
/* build pointers into the compressed row format matrix for each terminal */
|
||||
for (std::size_t j=0; j< this->m_terms[k]->m_railstart;j++)
|
||||
{
|
||||
int other = this->m_terms[k]->connected_net_idx()[j];
|
||||
for (auto i = mat.ia[k]; i < nz; i++)
|
||||
if (other == static_cast<int>(mat.ja[i]))
|
||||
for (auto i = mat.row_idx[k]; i < mat.row_idx[k+1]; i++)
|
||||
if (other == static_cast<int>(mat.col_idx[i]))
|
||||
{
|
||||
m_term_cr[k].push_back(i);
|
||||
m_term_cr[k].push_back(&mat.A[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart);
|
||||
m_term_cr[k].push_back(&mat.A[mat.diag[k]]);
|
||||
}
|
||||
|
||||
mat.ia[iN] = nz;
|
||||
mat.nz_num = nz;
|
||||
|
||||
this->log().verbose("Ops: {1} Occupancy ratio: {2}\n", ops,
|
||||
static_cast<double>(nz) / static_cast<double>(iN * iN));
|
||||
this->log().verbose("maximum fill: {1}", gr.first);
|
||||
this->log().verbose("Post elimination occupancy ratio: {2} Ops: {1}", gr.second,
|
||||
static_cast<double>(mat.nz_num) / static_cast<double>(iN * iN));
|
||||
this->log().verbose(" Pre elimination occupancy ratio: {2}",
|
||||
static_cast<double>(raw_elements) / static_cast<double>(iN * iN));
|
||||
|
||||
// FIXME: Move me
|
||||
|
||||
if (state().lib().isLoaded())
|
||||
{
|
||||
pstring symname = static_compile_name();
|
||||
#if 0
|
||||
m_proc = this->netlist().lib().template getsym<extsolver>(symname);
|
||||
if (m_proc != nullptr)
|
||||
this->log().verbose("External static solver {1} found ...", symname);
|
||||
else
|
||||
this->log().warning("External static solver {1} not found ...", symname);
|
||||
#else
|
||||
m_proc.load(this->state().lib(), symname);
|
||||
if (m_proc.resolved())
|
||||
this->log().warning("External static solver {1} found ...", symname);
|
||||
else
|
||||
this->log().warning("External static solver {1} not found ...", symname);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
#if 0
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &strm)
|
||||
{
|
||||
const std::size_t iN = N();
|
||||
for (std::size_t i = 0; i < iN - 1; i++)
|
||||
{
|
||||
const auto &nzbd = this->m_terms[i]->m_nzbd;
|
||||
|
||||
if (nzbd.size() > 0)
|
||||
{
|
||||
std::size_t pi = mat.diag[i];
|
||||
|
||||
//const nl_double f = 1.0 / m_A[pi++];
|
||||
strm("const double f{1} = 1.0 / m_A[{2}];\n", i, pi);
|
||||
pi++;
|
||||
const std::size_t piie = mat.ia[i+1];
|
||||
|
||||
//for (auto & j : nzbd)
|
||||
for (std::size_t j : nzbd)
|
||||
{
|
||||
// proceed to column i
|
||||
std::size_t pj = mat.ia[j];
|
||||
|
||||
while (mat.ja[pj] < i)
|
||||
pj++;
|
||||
|
||||
//const nl_double f1 = - m_A[pj++] * f;
|
||||
strm("\tconst double f{1}_{2} = -f{3} * m_A[{4}];\n", i, j, i, pj);
|
||||
pj++;
|
||||
|
||||
// subtract row i from j */
|
||||
for (std::size_t pii = pi; pii<piie; )
|
||||
{
|
||||
while (mat.ja[pj] < mat.ja[pii])
|
||||
pj++;
|
||||
//m_A[pj++] += m_A[pii++] * f1;
|
||||
strm("\tm_A[{1}] += m_A[{2}] * f{3}_{4};\n", pj, pii, i, j);
|
||||
pj++; pii++;
|
||||
}
|
||||
//RHS[j] += f1 * RHS[i];
|
||||
strm("\tRHS[{1}] += f{2}_{3} * RHS[{4}];\n", j, i, j, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//new_V[iN - 1] = RHS[iN - 1] / mat.A[mat.diag[iN - 1]];
|
||||
strm("\tV[{1}] = RHS[{2}] / m_A[{3}];\n", iN - 1, iN - 1, mat.diag[iN - 1]);
|
||||
for (std::size_t j = iN - 1; j-- > 0;)
|
||||
{
|
||||
strm("\tdouble tmp{1} = 0.0;\n", j);
|
||||
const std::size_t e = mat.ia[j+1];
|
||||
for (std::size_t pk = mat.diag[j] + 1; pk < e; pk++)
|
||||
{
|
||||
strm("\ttmp{1} += m_A[{2}] * V[{3}];\n", j, pk, mat.ja[pk]);
|
||||
}
|
||||
strm("\tV[{1}] = (RHS[{1}] - tmp{1}) / m_A[{4}];\n", j, j, j, mat.diag[j]);
|
||||
}
|
||||
}
|
||||
#else
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &strm)
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_GCR_t<FT, SIZE>::csc_private(plib::putf8_fmt_writer &strm)
|
||||
{
|
||||
const std::size_t iN = N();
|
||||
|
||||
@ -246,28 +179,28 @@ void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &st
|
||||
{
|
||||
std::size_t pi = mat.diag[i];
|
||||
|
||||
//const nl_double f = 1.0 / m_A[pi++];
|
||||
//const FT f = 1.0 / m_A[pi++];
|
||||
strm("const double f{1} = 1.0 / m_A{2};\n", i, pi);
|
||||
pi++;
|
||||
const std::size_t piie = mat.ia[i+1];
|
||||
const std::size_t piie = mat.row_idx[i+1];
|
||||
|
||||
//for (auto & j : nzbd)
|
||||
for (std::size_t j : nzbd)
|
||||
{
|
||||
// proceed to column i
|
||||
std::size_t pj = mat.ia[j];
|
||||
std::size_t pj = mat.row_idx[j];
|
||||
|
||||
while (mat.ja[pj] < i)
|
||||
while (mat.col_idx[pj] < i)
|
||||
pj++;
|
||||
|
||||
//const nl_double f1 = - m_A[pj++] * f;
|
||||
//const FT f1 = - m_A[pj++] * f;
|
||||
strm("\tconst double f{1}_{2} = -f{3} * m_A{4};\n", i, j, i, pj);
|
||||
pj++;
|
||||
|
||||
// subtract row i from j */
|
||||
for (std::size_t pii = pi; pii<piie; )
|
||||
{
|
||||
while (mat.ja[pj] < mat.ja[pii])
|
||||
while (mat.col_idx[pj] < mat.col_idx[pii])
|
||||
pj++;
|
||||
//m_A[pj++] += m_A[pii++] * f1;
|
||||
strm("\tm_A{1} += m_A{2} * f{3}_{4};\n", pj, pii, i, j);
|
||||
@ -284,18 +217,17 @@ void matrix_solver_GCR_t<m_N, storage_N>::csc_private(plib::putf8_fmt_writer &st
|
||||
for (std::size_t j = iN - 1; j-- > 0;)
|
||||
{
|
||||
strm("\tdouble tmp{1} = 0.0;\n", j);
|
||||
const std::size_t e = mat.ia[j+1];
|
||||
const std::size_t e = mat.row_idx[j+1];
|
||||
for (std::size_t pk = mat.diag[j] + 1; pk < e; pk++)
|
||||
{
|
||||
strm("\ttmp{1} += m_A{2} * V[{3}];\n", j, pk, mat.ja[pk]);
|
||||
strm("\ttmp{1} += m_A{2} * V[{3}];\n", j, pk, mat.col_idx[pk]);
|
||||
}
|
||||
strm("\tV[{1}] = (RHS[{1}] - tmp{1}) / m_A{4};\n", j, j, j, mat.diag[j]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name()
|
||||
template <typename FT, int SIZE>
|
||||
pstring matrix_solver_GCR_t<FT, SIZE>::static_compile_name()
|
||||
{
|
||||
plib::postringstream t;
|
||||
plib::putf8_fmt_writer w(&t);
|
||||
@ -305,8 +237,8 @@ pstring matrix_solver_GCR_t<m_N, storage_N>::static_compile_name()
|
||||
return plib::pfmt("nl_gcr_{1:x}_{2}")(h( t.str() ))(mat.nz_num);
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_code()
|
||||
template <typename FT, int SIZE>
|
||||
std::pair<pstring, pstring> matrix_solver_GCR_t<FT, SIZE>::create_solver_code()
|
||||
{
|
||||
plib::postringstream t;
|
||||
plib::putf8_fmt_writer strm(&t);
|
||||
@ -320,69 +252,16 @@ std::pair<pstring, pstring> matrix_solver_GCR_t<m_N, storage_N>::create_solver_c
|
||||
}
|
||||
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT, int SIZE>
|
||||
unsigned matrix_solver_GCR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
const std::size_t iN = this->N();
|
||||
|
||||
nl_double RHS[storage_N];
|
||||
nl_double new_V[storage_N];
|
||||
|
||||
mat.set_scalar(0.0);
|
||||
|
||||
/* populate matrix */
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
terms_for_net_t *t = this->m_terms[k].get();
|
||||
nl_double gtot_t = 0.0;
|
||||
nl_double RHS_t = 0.0;
|
||||
|
||||
const std::size_t term_count = t->count();
|
||||
const std::size_t railstart = t->m_railstart;
|
||||
const nl_double * const RESTRICT gt = t->gt();
|
||||
const nl_double * const RESTRICT go = t->go();
|
||||
const nl_double * const RESTRICT Idr = t->Idr();
|
||||
const nl_double * const * RESTRICT other_cur_analog = t->connected_net_V();
|
||||
const unsigned * const RESTRICT tcr = m_term_cr[k].data();
|
||||
|
||||
#if 0
|
||||
for (std::size_t i = 0; i < term_count; i++)
|
||||
{
|
||||
gtot_t += gt[i];
|
||||
RHS_t += Idr[i];
|
||||
}
|
||||
|
||||
for (std::size_t i = railstart; i < term_count; i++)
|
||||
RHS_t += go[i] * *other_cur_analog[i];
|
||||
|
||||
RHS[k] = RHS_t;
|
||||
|
||||
// add diagonal element
|
||||
mat.A[mat.diag[k]] = gtot_t;
|
||||
|
||||
for (std::size_t i = 0; i < railstart; i++)
|
||||
mat.A[tcr[i]] -= go[i];
|
||||
}
|
||||
#else
|
||||
for (std::size_t i = 0; i < railstart; i++)
|
||||
mat.A[tcr[i]] -= go[i];
|
||||
|
||||
for (std::size_t i = 0; i < railstart; i++)
|
||||
{
|
||||
gtot_t += gt[i];
|
||||
RHS_t += Idr[i];
|
||||
}
|
||||
|
||||
for (std::size_t i = railstart; i < term_count; i++)
|
||||
{
|
||||
RHS_t += (Idr[i] + go[i] * *other_cur_analog[i]);
|
||||
gtot_t += gt[i];
|
||||
}
|
||||
|
||||
RHS[k] = RHS_t;
|
||||
mat.A[mat.diag[k]] += gtot_t;
|
||||
}
|
||||
#endif
|
||||
mat.ia[iN] = static_cast<mattype>(mat.nz_num);
|
||||
this->m_terms[k]->fill_matrix(m_term_cr[k], RHS[k]);
|
||||
|
||||
/* now solve it */
|
||||
|
||||
@ -394,63 +273,14 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
|
||||
}
|
||||
else
|
||||
{
|
||||
for (std::size_t i = 0; i < iN - 1; i++)
|
||||
{
|
||||
const auto &nzbd = this->m_terms[i]->m_nzbd;
|
||||
|
||||
if (nzbd.size() > 0)
|
||||
{
|
||||
std::size_t pi = mat.diag[i];
|
||||
const nl_double f = 1.0 / mat.A[pi++];
|
||||
const std::size_t piie = mat.ia[i+1];
|
||||
|
||||
for (std::size_t j : nzbd) // for (std::size_t j = i + 1; j < iN; j++)
|
||||
{
|
||||
// proceed to column i
|
||||
//__builtin_prefetch(&m_A[mat.diag[j+1]], 1);
|
||||
std::size_t pj = mat.ia[j];
|
||||
|
||||
while (mat.ja[pj] < i)
|
||||
pj++;
|
||||
|
||||
const nl_double f1 = - mat.A[pj++] * f;
|
||||
|
||||
// subtract row i from j */
|
||||
for (std::size_t pii = pi; pii<piie; )
|
||||
{
|
||||
while (mat.ja[pj] < mat.ja[pii])
|
||||
pj++;
|
||||
mat.A[pj++] += mat.A[pii++] * f1;
|
||||
}
|
||||
RHS[j] += f1 * RHS[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
/* backward substitution
|
||||
*
|
||||
*/
|
||||
|
||||
/* row n-1 */
|
||||
new_V[iN - 1] = RHS[iN - 1] / mat.A[mat.diag[iN - 1]];
|
||||
|
||||
for (std::size_t j = iN - 1; j-- > 0;)
|
||||
{
|
||||
//__builtin_prefetch(&new_V[j-1], 1);
|
||||
//if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0);
|
||||
double tmp = 0;
|
||||
auto jdiag = mat.diag[j];
|
||||
const std::size_t e = mat.ia[j+1];
|
||||
for (std::size_t pk = jdiag + 1; pk < e; pk++)
|
||||
{
|
||||
tmp += mat.A[pk] * new_V[mat.ja[pk]];
|
||||
}
|
||||
new_V[j] = (RHS[j] - tmp) / mat.A[jdiag];
|
||||
}
|
||||
mat.gaussian_elimination(RHS);
|
||||
/* backward substitution */
|
||||
mat.gaussian_back_substitution(new_V, RHS);
|
||||
}
|
||||
|
||||
this->m_stat_calculations++;
|
||||
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
const FT err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "../plib/parray.h"
|
||||
#include "mat_cr.h"
|
||||
#include "nld_ms_direct.h"
|
||||
#include "nld_solver.h"
|
||||
@ -24,17 +25,34 @@ namespace netlist
|
||||
{
|
||||
namespace devices
|
||||
{
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
class matrix_solver_GMRES_t: public matrix_solver_direct_t<m_N, storage_N>
|
||||
|
||||
template <typename FT, int SIZE>
|
||||
class matrix_solver_GMRES_t: public matrix_solver_direct_t<FT, SIZE>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef FT float_type;
|
||||
// FIXME: dirty hack to make this compile
|
||||
static constexpr const std::size_t storage_N = plib::sizeabs<FT, SIZE>::ABS();
|
||||
|
||||
// Maximum iterations before a restart ...
|
||||
static constexpr const std::size_t restart_N = (storage_N > 0 ? 20 : 0);
|
||||
|
||||
/* Sort rows in ascending order. This should minimize fill-in and thus
|
||||
* maximize the efficiency of the incomplete LUT.
|
||||
*/
|
||||
matrix_solver_GMRES_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size)
|
||||
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
|
||||
: matrix_solver_direct_t<FT, SIZE>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
|
||||
, m_use_iLU_preconditioning(true)
|
||||
, m_use_more_precise_stop_condition(false)
|
||||
, m_use_more_precise_stop_condition(true)
|
||||
, m_ILU_scale(0)
|
||||
, m_accuracy_mult(1.0)
|
||||
, m_term_cr(size)
|
||||
, mat(size)
|
||||
, residual(size)
|
||||
, Ax(size)
|
||||
, m_LU(size)
|
||||
//, m_v(size)
|
||||
{
|
||||
}
|
||||
|
||||
@ -48,26 +66,32 @@ public:
|
||||
private:
|
||||
|
||||
//typedef typename mat_cr_t<storage_N>::type mattype;
|
||||
typedef typename mat_cr_t<storage_N>::index_type mattype;
|
||||
typedef typename plib::mat_cr_t<FT, SIZE>::index_type mattype;
|
||||
|
||||
unsigned solve_ilu_gmres(nl_double (& RESTRICT x)[storage_N], const nl_double (& RESTRICT rhs)[storage_N], const unsigned restart_max, std::size_t mr, nl_double accuracy);
|
||||
template <typename VT, typename VRHS>
|
||||
std::size_t solve_ilu_gmres(VT &x, const VRHS & rhs, const std::size_t restart_max, std::size_t mr, float_type accuracy);
|
||||
|
||||
std::vector<unsigned> m_term_cr[storage_N];
|
||||
|
||||
bool m_use_iLU_preconditioning;
|
||||
bool m_use_more_precise_stop_condition;
|
||||
nl_double m_accuracy_mult; // FXIME: Save state
|
||||
std::size_t m_ILU_scale;
|
||||
float_type m_accuracy_mult; // FXIME: Save state
|
||||
|
||||
mat_cr_t<storage_N> mat;
|
||||
plib::parray<std::vector<FT *>, SIZE> m_term_cr;
|
||||
plib::mat_cr_t<float_type, SIZE> mat;
|
||||
plib::parray<float_type, SIZE> residual;
|
||||
plib::parray<float_type, SIZE> Ax;
|
||||
|
||||
nl_double m_LU[storage_N * storage_N];
|
||||
plib::mat_cr_t<float_type, SIZE> m_LU;
|
||||
|
||||
nl_double m_c[storage_N + 1]; /* mr + 1 */
|
||||
nl_double m_g[storage_N + 1]; /* mr + 1 */
|
||||
nl_double m_ht[storage_N + 1][storage_N]; /* (mr + 1), mr */
|
||||
nl_double m_s[storage_N + 1]; /* mr + 1 */
|
||||
nl_double m_v[storage_N + 1][storage_N]; /*(mr + 1), n */
|
||||
nl_double m_y[storage_N + 1]; /* mr + 1 */
|
||||
float_type m_c[restart_N + 1]; /* mr + 1 */
|
||||
float_type m_g[restart_N + 1]; /* mr + 1 */
|
||||
float_type m_ht[restart_N + 1][restart_N]; /* (mr + 1), mr */
|
||||
float_type m_s[restart_N + 1]; /* mr + 1 */
|
||||
float_type m_y[restart_N + 1]; /* mr + 1 */
|
||||
|
||||
//plib::parray<float_type, SIZE> m_v[restart_N + 1]; /* mr + 1, n */
|
||||
float_type m_v[restart_N + 1][storage_N]; /* mr + 1, n */
|
||||
|
||||
};
|
||||
|
||||
@ -75,106 +99,72 @@ private:
|
||||
// matrix_solver - GMRES
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_GMRES_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_GMRES_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
|
||||
{
|
||||
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
|
||||
matrix_solver_direct_t<FT, SIZE>::vsetup(nets);
|
||||
|
||||
mattype nz = 0;
|
||||
const std::size_t iN = this->N();
|
||||
|
||||
std::vector<std::vector<unsigned>> fill(iN);
|
||||
|
||||
for (std::size_t k=0; k<iN; k++)
|
||||
{
|
||||
fill[k].resize(iN, decltype(mat)::FILL_INFINITY);
|
||||
terms_for_net_t * RESTRICT row = this->m_terms[k].get();
|
||||
mat.ia[k] = nz;
|
||||
|
||||
for (std::size_t j=0; j<row->m_nz.size(); j++)
|
||||
{
|
||||
mat.ja[nz] = static_cast<mattype>(row->m_nz[j]);
|
||||
if (row->m_nz[j] == k)
|
||||
mat.diag[k] = nz;
|
||||
nz++;
|
||||
fill[k][static_cast<mattype>(row->m_nz[j])] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
mat.build_from_fill_mat(fill, 0);
|
||||
m_LU.gaussian_extend_fill_mat(fill);
|
||||
m_LU.build_from_fill_mat(fill, m_ILU_scale); // ILU(2)
|
||||
//m_LU.build_from_fill_mat(fill, 9999, 20); // Band matrix width 20
|
||||
|
||||
/* build pointers into the compressed row format matrix for each terminal */
|
||||
|
||||
for (unsigned j=0; j< this->m_terms[k]->m_railstart;j++)
|
||||
for (std::size_t k=0; k<iN; k++)
|
||||
{
|
||||
for (unsigned i = mat.ia[k]; i<nz; i++)
|
||||
if (this->m_terms[k]->connected_net_idx()[j] == static_cast<int>(mat.ja[i]))
|
||||
for (std::size_t j=0; j< this->m_terms[k]->m_railstart;j++)
|
||||
{
|
||||
m_term_cr[k].push_back(i);
|
||||
for (std::size_t i = mat.row_idx[k]; i<mat.row_idx[k+1]; i++)
|
||||
if (this->m_terms[k]->connected_net_idx()[j] == static_cast<int>(mat.col_idx[i]))
|
||||
{
|
||||
m_term_cr[k].push_back(&mat.A[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
nl_assert(m_term_cr[k].size() == this->m_terms[k]->m_railstart);
|
||||
m_term_cr[k].push_back(&mat.A[mat.diag[k]]);
|
||||
}
|
||||
}
|
||||
|
||||
mat.ia[iN] = nz;
|
||||
mat.nz_num = nz;
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT, int SIZE>
|
||||
unsigned matrix_solver_GMRES_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
const std::size_t iN = this->N();
|
||||
|
||||
/* ideally, we could get an estimate for the spectral radius of
|
||||
* Inv(D - L) * U
|
||||
*
|
||||
* and estimate using
|
||||
*
|
||||
* omega = 2.0 / (1.0 + std::sqrt(1-rho))
|
||||
*/
|
||||
|
||||
//nz_num = 0;
|
||||
nl_double RHS[storage_N];
|
||||
nl_double new_V[storage_N];
|
||||
plib::parray<FT, SIZE> RHS(iN);
|
||||
//float_type new_V[storage_N];
|
||||
|
||||
mat.set_scalar(0.0);
|
||||
|
||||
/* populate matrix and V for first estimate */
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
nl_double gtot_t = 0.0;
|
||||
nl_double RHS_t = 0.0;
|
||||
|
||||
const std::size_t term_count = this->m_terms[k]->count();
|
||||
const std::size_t railstart = this->m_terms[k]->m_railstart;
|
||||
const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
|
||||
const nl_double * const RESTRICT go = this->m_terms[k]->go();
|
||||
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
|
||||
const nl_double * const * RESTRICT other_cur_analog = this->m_terms[k]->connected_net_V();
|
||||
|
||||
for (std::size_t i = 0; i < term_count; i++)
|
||||
{
|
||||
gtot_t = gtot_t + gt[i];
|
||||
RHS_t = RHS_t + Idr[i];
|
||||
this->m_terms[k]->fill_matrix(m_term_cr[k], RHS[k]);
|
||||
this->m_new_V[k] = this->m_nets[k]->Q_Analog();
|
||||
}
|
||||
|
||||
for (std::size_t i = railstart; i < term_count; i++)
|
||||
RHS_t = RHS_t + go[i] * *other_cur_analog[i];
|
||||
|
||||
RHS[k] = RHS_t;
|
||||
|
||||
// add diagonal element
|
||||
mat.A[mat.diag[k]] = gtot_t;
|
||||
|
||||
for (std::size_t i = 0; i < railstart; i++)
|
||||
{
|
||||
const std::size_t pi = m_term_cr[k][i];
|
||||
mat.A[pi] -= go[i];
|
||||
}
|
||||
|
||||
new_V[k] = this->m_nets[k]->Q_Analog();
|
||||
|
||||
}
|
||||
|
||||
mat.ia[iN] = static_cast<mattype>(mat.nz_num);
|
||||
const nl_double accuracy = this->m_params.m_accuracy;
|
||||
//mat.row_idx[iN] = static_cast<mattype>(mat.nz_num);
|
||||
const float_type accuracy = this->m_params.m_accuracy;
|
||||
|
||||
const std::size_t mr = (iN > 3 ) ? static_cast<std::size_t>(std::sqrt(iN) * 2.0) : iN;
|
||||
unsigned iter = std::max(1u, this->m_params.m_gs_loops);
|
||||
unsigned gsl = solve_ilu_gmres(new_V, RHS, iter, mr, accuracy);
|
||||
std::size_t iter = std::max(1u, this->m_params.m_gs_loops);
|
||||
std::size_t gsl = solve_ilu_gmres(this->m_new_V, RHS, iter, mr, accuracy);
|
||||
const std::size_t failed = mr * iter;
|
||||
|
||||
this->m_iterative_total += gsl;
|
||||
@ -183,26 +173,29 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::vsolve_non_dynamic(const bool ne
|
||||
if (gsl >= failed)
|
||||
{
|
||||
this->m_iterative_fail++;
|
||||
return matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(newton_raphson);
|
||||
return matrix_solver_direct_t<FT, SIZE>::vsolve_non_dynamic(newton_raphson);
|
||||
}
|
||||
|
||||
const nl_double err = (newton_raphson ? this->delta(new_V) : 0.0);
|
||||
this->store(new_V);
|
||||
//if (newton_raphson)
|
||||
// printf("%e %e\n", this->delta(this->m_new_V), this->m_params.m_accuracy);
|
||||
|
||||
const float_type err = (newton_raphson ? this->delta(this->m_new_V) : 0.0);
|
||||
this->store(this->m_new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void givens_mult( const T c, const T s, T & g0, T & g1 )
|
||||
{
|
||||
const T tg0 = c * g0 - s * g1;
|
||||
const T tg1 = s * g0 + c * g1;
|
||||
const T g0_last(g0);
|
||||
|
||||
g0 = tg0;
|
||||
g1 = tg1;
|
||||
g0 = c * g0 - s * g1;
|
||||
g1 = s * g0_last + c * g1;
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RESTRICT x)[storage_N], const nl_double (& RESTRICT rhs)[storage_N], const unsigned restart_max, std::size_t mr, nl_double accuracy)
|
||||
template <typename FT, int SIZE>
|
||||
template <typename VT, typename VRHS>
|
||||
std::size_t matrix_solver_GMRES_t<FT, SIZE>::solve_ilu_gmres (VT &x, const VRHS &rhs, const std::size_t restart_max, std::size_t mr, float_type accuracy)
|
||||
{
|
||||
/*-------------------------------------------------------------------------
|
||||
* The code below was inspired by code published by John Burkardt under
|
||||
@ -226,15 +219,22 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
|
||||
*
|
||||
*------------------------------------------------------------------------*/
|
||||
|
||||
unsigned itr_used = 0;
|
||||
std::size_t itr_used = 0;
|
||||
double rho_delta = 0.0;
|
||||
|
||||
const std::size_t n = this->N();
|
||||
|
||||
if (mr > restart_N) mr = restart_N;
|
||||
if (mr > n) mr = n;
|
||||
|
||||
if (m_use_iLU_preconditioning)
|
||||
mat.incomplete_LU_factorization(m_LU);
|
||||
{
|
||||
if (m_ILU_scale < 1)
|
||||
m_LU.raw_copy_from(mat);
|
||||
else
|
||||
m_LU.reduction_copy_from(mat);
|
||||
m_LU.incomplete_LU_factorization();
|
||||
}
|
||||
|
||||
if (m_use_more_precise_stop_condition)
|
||||
{
|
||||
@ -249,27 +249,22 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
|
||||
* differently: The invest doesn't pay off.
|
||||
* Therefore we use the approach in the else part.
|
||||
*/
|
||||
nl_double t[storage_N];
|
||||
nl_double Ax[storage_N];
|
||||
vec_set(n, accuracy, t);
|
||||
mat.mult_vec(t, Ax);
|
||||
vec_set_scalar(n, residual, accuracy);
|
||||
mat.mult_vec(residual, Ax);
|
||||
|
||||
mat.solveLUx(m_LU, Ax);
|
||||
m_LU.solveLUx(Ax);
|
||||
|
||||
const nl_double rho_to_accuracy = std::sqrt(vec_mult2(n, Ax)) / accuracy;
|
||||
const float_type rho_to_accuracy = std::sqrt(vec_mult2<FT>(n, Ax)) / accuracy;
|
||||
|
||||
rho_delta = accuracy * rho_to_accuracy;
|
||||
}
|
||||
else
|
||||
rho_delta = accuracy * std::sqrt(n) * m_accuracy_mult;
|
||||
rho_delta = accuracy * std::sqrt(static_cast<FT>(n)) * m_accuracy_mult;
|
||||
|
||||
for (unsigned itr = 0; itr < restart_max; itr++)
|
||||
for (std::size_t itr = 0; itr < restart_max; itr++)
|
||||
{
|
||||
std::size_t last_k = mr;
|
||||
nl_double rho;
|
||||
|
||||
nl_double Ax[storage_N];
|
||||
nl_double residual[storage_N];
|
||||
float_type rho;
|
||||
|
||||
mat.mult_vec(x, Ax);
|
||||
|
||||
@ -277,19 +272,19 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
|
||||
|
||||
if (m_use_iLU_preconditioning)
|
||||
{
|
||||
mat.solveLUx(m_LU, residual);
|
||||
m_LU.solveLUx(residual);
|
||||
}
|
||||
|
||||
rho = std::sqrt(vec_mult2(n, residual));
|
||||
rho = std::sqrt(vec_mult2<FT>(n, residual));
|
||||
|
||||
if (rho < rho_delta)
|
||||
return itr_used + 1;
|
||||
|
||||
vec_set(mr+1, NL_FCONST(0.0), m_g);
|
||||
vec_set_scalar(mr+1, m_g, NL_FCONST(0.0));
|
||||
m_g[0] = rho;
|
||||
|
||||
for (std::size_t i = 0; i < mr + 1; i++)
|
||||
vec_set(mr, NL_FCONST(0.0), m_ht[i]);
|
||||
vec_set_scalar(mr, m_ht[i], NL_FCONST(0.0));
|
||||
|
||||
vec_mult_scalar(n, residual, NL_FCONST(1.0) / rho, m_v[0]);
|
||||
|
||||
@ -300,14 +295,14 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
|
||||
mat.mult_vec(m_v[k], m_v[k1]);
|
||||
|
||||
if (m_use_iLU_preconditioning)
|
||||
mat.solveLUx(m_LU, m_v[k1]);
|
||||
m_LU.solveLUx(m_v[k1]);
|
||||
|
||||
for (std::size_t j = 0; j <= k; j++)
|
||||
{
|
||||
m_ht[j][k] = vec_mult(n, m_v[k1], m_v[j]);
|
||||
m_ht[j][k] = vec_mult<float_type>(n, m_v[k1], m_v[j]);
|
||||
vec_add_mult_scalar(n, m_v[j], -m_ht[j][k], m_v[k1]);
|
||||
}
|
||||
m_ht[k1][k] = std::sqrt(vec_mult2(n, m_v[k1]));
|
||||
m_ht[k1][k] = std::sqrt(vec_mult2<FT>(n, m_v[k1]));
|
||||
|
||||
if (m_ht[k1][k] != 0.0)
|
||||
vec_scale(n, m_v[k1], NL_FCONST(1.0) / m_ht[k1][k]);
|
||||
@ -315,7 +310,7 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
|
||||
for (std::size_t j = 0; j < k; j++)
|
||||
givens_mult(m_c[j], m_s[j], m_ht[j][k], m_ht[j+1][k]);
|
||||
|
||||
const nl_double mu = 1.0 / std::hypot(m_ht[k][k], m_ht[k1][k]);
|
||||
const float_type mu = 1.0 / std::hypot(m_ht[k][k], m_ht[k1][k]);
|
||||
|
||||
m_c[k] = m_ht[k][k] * mu;
|
||||
m_s[k] = -m_ht[k1][k] * mu;
|
||||
@ -345,36 +340,17 @@ unsigned matrix_solver_GMRES_t<m_N, storage_N>::solve_ilu_gmres (nl_double (& RE
|
||||
{
|
||||
double tmp = m_g[i];
|
||||
for (std::size_t j = i + 1; j <= last_k; j++)
|
||||
{
|
||||
tmp -= m_ht[i][j] * m_y[j];
|
||||
}
|
||||
m_y[i] = tmp / m_ht[i][i];
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i <= last_k; i++)
|
||||
vec_add_mult_scalar(n, m_v[i], m_y[i], x);
|
||||
|
||||
#if 1
|
||||
if (rho <= rho_delta)
|
||||
{
|
||||
break;
|
||||
}
|
||||
#else
|
||||
/* we try to approximate the x difference between to steps using m_v[last_k] */
|
||||
|
||||
double xdelta = m_y[last_k] * vec_maxabs(n, m_v[last_k]);
|
||||
if (xdelta < accuracy)
|
||||
{
|
||||
if (m_accuracy_mult < 16384.0)
|
||||
m_accuracy_mult = m_accuracy_mult * 2.0;
|
||||
break;
|
||||
}
|
||||
else
|
||||
m_accuracy_mult = m_accuracy_mult / 2.0;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
return itr_used;
|
||||
}
|
||||
|
||||
|
@ -43,17 +43,19 @@ namespace netlist
|
||||
{
|
||||
namespace devices
|
||||
{
|
||||
//#define nl_ext_double _float128 // slow, very slow
|
||||
//#define nl_ext_double long double // slightly slower
|
||||
#define nl_ext_double nl_double
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
template <typename FT, int SIZE>
|
||||
class matrix_solver_sm_t: public matrix_solver_t
|
||||
{
|
||||
friend class matrix_solver_t;
|
||||
|
||||
public:
|
||||
|
||||
typedef FT float_ext_type;
|
||||
typedef FT float_type;
|
||||
// FIXME: dirty hack to make this compile
|
||||
static constexpr const std::size_t storage_N = 100;
|
||||
|
||||
matrix_solver_sm_t(netlist_base_t &anetlist, const pstring &name,
|
||||
const solver_parameters_t *params, const std::size_t size);
|
||||
|
||||
@ -66,7 +68,7 @@ protected:
|
||||
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
|
||||
unsigned solve_non_dynamic(const bool newton_raphson);
|
||||
|
||||
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
|
||||
constexpr std::size_t N() const { return m_dim; }
|
||||
|
||||
void LE_invert();
|
||||
|
||||
@ -75,33 +77,33 @@ protected:
|
||||
|
||||
|
||||
template <typename T1, typename T2>
|
||||
nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
|
||||
float_ext_type &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
|
||||
template <typename T1, typename T2>
|
||||
nl_ext_double &W(const T1 &r, const T2 &c) { return m_W[r][c]; }
|
||||
float_ext_type &W(const T1 &r, const T2 &c) { return m_W[r][c]; }
|
||||
template <typename T1, typename T2>
|
||||
nl_ext_double &Ainv(const T1 &r, const T2 &c) { return m_Ainv[r][c]; }
|
||||
float_ext_type &Ainv(const T1 &r, const T2 &c) { return m_Ainv[r][c]; }
|
||||
template <typename T1>
|
||||
nl_ext_double &RHS(const T1 &r) { return m_RHS[r]; }
|
||||
float_ext_type &RHS(const T1 &r) { return m_RHS[r]; }
|
||||
|
||||
|
||||
template <typename T1, typename T2>
|
||||
nl_ext_double &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; }
|
||||
float_ext_type &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; }
|
||||
template <typename T1, typename T2>
|
||||
nl_ext_double &lAinv(const T1 &r, const T2 &c) { return m_lAinv[r][c]; }
|
||||
float_ext_type &lAinv(const T1 &r, const T2 &c) { return m_lAinv[r][c]; }
|
||||
|
||||
nl_double m_last_RHS[storage_N]; // right hand side - contains currents
|
||||
float_type m_last_RHS[storage_N]; // right hand side - contains currents
|
||||
|
||||
private:
|
||||
static constexpr std::size_t m_pitch = ((( storage_N) + 7) / 8) * 8;
|
||||
nl_ext_double m_A[storage_N][m_pitch];
|
||||
nl_ext_double m_Ainv[storage_N][m_pitch];
|
||||
nl_ext_double m_W[storage_N][m_pitch];
|
||||
nl_ext_double m_RHS[storage_N]; // right hand side - contains currents
|
||||
float_ext_type m_A[storage_N][m_pitch];
|
||||
float_ext_type m_Ainv[storage_N][m_pitch];
|
||||
float_ext_type m_W[storage_N][m_pitch];
|
||||
float_ext_type m_RHS[storage_N]; // right hand side - contains currents
|
||||
|
||||
nl_ext_double m_lA[storage_N][m_pitch];
|
||||
nl_ext_double m_lAinv[storage_N][m_pitch];
|
||||
float_ext_type m_lA[storage_N][m_pitch];
|
||||
float_ext_type m_lAinv[storage_N][m_pitch];
|
||||
|
||||
//nl_ext_double m_RHSx[storage_N];
|
||||
//float_ext_type m_RHSx[storage_N];
|
||||
|
||||
const std::size_t m_dim;
|
||||
std::size_t m_cnt;
|
||||
@ -112,13 +114,13 @@ private:
|
||||
// matrix_solver_direct
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
matrix_solver_sm_t<m_N, storage_N>::~matrix_solver_sm_t()
|
||||
template <typename FT, int SIZE>
|
||||
matrix_solver_sm_t<FT, SIZE>::~matrix_solver_sm_t()
|
||||
{
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_sm_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
|
||||
{
|
||||
matrix_solver_t::setup_base(nets);
|
||||
|
||||
@ -130,8 +132,8 @@ void matrix_solver_sm_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
|
||||
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_sm_t<FT, SIZE>::LE_invert()
|
||||
{
|
||||
const std::size_t kN = N();
|
||||
|
||||
@ -148,7 +150,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
|
||||
for (std::size_t i = 0; i < kN; i++)
|
||||
{
|
||||
/* FIXME: Singular matrix? */
|
||||
const nl_double f = 1.0 / W(i,i);
|
||||
const float_type f = 1.0 / W(i,i);
|
||||
const auto * RESTRICT const p = m_terms[i]->m_nzrd.data();
|
||||
const std::size_t e = m_terms[i]->m_nzrd.size();
|
||||
|
||||
@ -159,7 +161,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
|
||||
for (std::size_t jb = 0; jb < eb; jb++)
|
||||
{
|
||||
const unsigned j = pb[jb];
|
||||
const nl_double f1 = - W(j,i) * f;
|
||||
const float_type f1 = - W(j,i) * f;
|
||||
if (f1 != 0.0)
|
||||
{
|
||||
for (std::size_t k = 0; k < e; k++)
|
||||
@ -173,10 +175,10 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
|
||||
for (std::size_t i = kN; i-- > 0; )
|
||||
{
|
||||
/* FIXME: Singular matrix? */
|
||||
const nl_double f = 1.0 / W(i,i);
|
||||
const float_type f = 1.0 / W(i,i);
|
||||
for (std::size_t j = i; j-- > 0; )
|
||||
{
|
||||
const nl_double f1 = - W(j,i) * f;
|
||||
const float_type f1 = - W(j,i) * f;
|
||||
if (f1 != 0.0)
|
||||
{
|
||||
for (std::size_t k = i; k < kN; k++)
|
||||
@ -193,9 +195,9 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_invert()
|
||||
}
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
template <typename FT, int SIZE>
|
||||
template <typename T>
|
||||
void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x(
|
||||
void matrix_solver_sm_t<FT, SIZE>::LE_compute_x(
|
||||
T * RESTRICT x)
|
||||
{
|
||||
const std::size_t kN = N();
|
||||
@ -205,7 +207,7 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x(
|
||||
|
||||
for (std::size_t k=0; k<kN; k++)
|
||||
{
|
||||
const nl_double f = RHS(k);
|
||||
const float_type f = RHS(k);
|
||||
|
||||
for (std::size_t i=0; i<kN; i++)
|
||||
x[i] += Ainv(i,k) * f;
|
||||
@ -213,13 +215,13 @@ void matrix_solver_sm_t<m_N, storage_N>::LE_compute_x(
|
||||
}
|
||||
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT, int SIZE>
|
||||
unsigned matrix_solver_sm_t<FT, SIZE>::solve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
static constexpr const bool incremental = true;
|
||||
const std::size_t iN = N();
|
||||
|
||||
nl_double new_V[storage_N]; // = { 0.0 };
|
||||
float_type new_V[storage_N]; // = { 0.0 };
|
||||
|
||||
if ((m_cnt % 50) == 0)
|
||||
{
|
||||
@ -236,7 +238,7 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
|
||||
}
|
||||
for (std::size_t row = 0; row < iN; row ++)
|
||||
{
|
||||
nl_double v[m_pitch] = {0};
|
||||
float_type v[m_pitch] = {0};
|
||||
std::size_t cols[m_pitch];
|
||||
std::size_t colcount = 0;
|
||||
|
||||
@ -252,10 +254,10 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
|
||||
|
||||
if (colcount > 0)
|
||||
{
|
||||
nl_double lamba = 0.0;
|
||||
nl_double w[m_pitch] = {0};
|
||||
float_type lamba = 0.0;
|
||||
float_type w[m_pitch] = {0};
|
||||
|
||||
nl_double z[m_pitch];
|
||||
float_type z[m_pitch];
|
||||
/* compute w and lamba */
|
||||
for (std::size_t i = 0; i < iN; i++)
|
||||
z[i] = Ainv(i, row); /* u is row'th column */
|
||||
@ -266,7 +268,7 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
|
||||
for (std::size_t j=0; j<colcount; j++)
|
||||
{
|
||||
std::size_t col = cols[j];
|
||||
nl_double f = v[col];
|
||||
float_type f = v[col];
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
w[k] += Ainv(col,k) * f; /* Transpose(Ainv) * v */
|
||||
}
|
||||
@ -274,7 +276,7 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
|
||||
lamba = -1.0 / (1.0 + lamba);
|
||||
for (std::size_t i=0; i<iN; i++)
|
||||
{
|
||||
const nl_double f = lamba * z[i];
|
||||
const float_type f = lamba * z[i];
|
||||
if (f != 0.0)
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
Ainv(i,k) += f * w[k];
|
||||
@ -288,16 +290,16 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::solve_non_dynamic(const bool newton
|
||||
|
||||
this->LE_compute_x(new_V);
|
||||
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
const float_type err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_sm_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT, int SIZE>
|
||||
unsigned matrix_solver_sm_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
build_LE_A<matrix_solver_sm_t>();
|
||||
build_LE_RHS<matrix_solver_sm_t>();
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
|
||||
for (std::size_t i=0, iN=N(); i < iN; i++)
|
||||
m_last_RHS[i] = RHS(i);
|
||||
@ -306,8 +308,8 @@ unsigned matrix_solver_sm_t<m_N, storage_N>::vsolve_non_dynamic(const bool newto
|
||||
return this->solve_non_dynamic(newton_raphson);
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
matrix_solver_sm_t<m_N, storage_N>::matrix_solver_sm_t(netlist_base_t &anetlist, const pstring &name,
|
||||
template <typename FT, int SIZE>
|
||||
matrix_solver_sm_t<FT, SIZE>::matrix_solver_sm_t(netlist_base_t &anetlist, const pstring &name,
|
||||
const solver_parameters_t *params, const std::size_t size)
|
||||
: matrix_solver_t(anetlist, name, NOSORT, params)
|
||||
, m_dim(size)
|
||||
|
@ -21,14 +21,21 @@ namespace netlist
|
||||
{
|
||||
namespace devices
|
||||
{
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
class matrix_solver_SOR_t: public matrix_solver_direct_t<m_N, storage_N>
|
||||
|
||||
template <typename FT, int SIZE>
|
||||
class matrix_solver_SOR_t: public matrix_solver_direct_t<FT, SIZE>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef FT float_type;
|
||||
|
||||
matrix_solver_SOR_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size)
|
||||
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
|
||||
: matrix_solver_direct_t<FT, SIZE>(anetlist, name, matrix_solver_t::ASCENDING, params, size)
|
||||
, m_lp_fact(*this, "m_lp_fact", 0)
|
||||
, w(size, 0.0)
|
||||
, one_m_w(size, 0.0)
|
||||
, RHS(size, 0.0)
|
||||
, new_V(size, 0.0)
|
||||
{
|
||||
}
|
||||
|
||||
@ -38,7 +45,11 @@ public:
|
||||
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
|
||||
|
||||
private:
|
||||
state_var<nl_double> m_lp_fact;
|
||||
state_var<float_type> m_lp_fact;
|
||||
std::vector<float_type> w;
|
||||
std::vector<float_type> one_m_w;
|
||||
std::vector<float_type> RHS;
|
||||
std::vector<float_type> new_V;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
@ -46,14 +57,14 @@ private:
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_SOR_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_SOR_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
|
||||
{
|
||||
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
|
||||
matrix_solver_direct_t<FT, SIZE>::vsetup(nets);
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT, int SIZE>
|
||||
unsigned matrix_solver_SOR_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
const std::size_t iN = this->N();
|
||||
bool resched = false;
|
||||
@ -67,24 +78,19 @@ unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
|
||||
* omega = 2.0 / (1.0 + std::sqrt(1-rho))
|
||||
*/
|
||||
|
||||
const nl_double ws = this->m_params.m_gs_sor;
|
||||
|
||||
nl_double w[storage_N];
|
||||
nl_double one_m_w[storage_N];
|
||||
nl_double RHS[storage_N];
|
||||
nl_double new_V[storage_N];
|
||||
const float_type ws = this->m_params.m_gs_sor;
|
||||
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
nl_double gtot_t = 0.0;
|
||||
nl_double gabs_t = 0.0;
|
||||
nl_double RHS_t = 0.0;
|
||||
float_type gtot_t = 0.0;
|
||||
float_type gabs_t = 0.0;
|
||||
float_type RHS_t = 0.0;
|
||||
|
||||
const std::size_t term_count = this->m_terms[k]->count();
|
||||
const nl_double * const RESTRICT gt = this->m_terms[k]->gt();
|
||||
const nl_double * const RESTRICT go = this->m_terms[k]->go();
|
||||
const nl_double * const RESTRICT Idr = this->m_terms[k]->Idr();
|
||||
const nl_double * const *other_cur_analog = this->m_terms[k]->connected_net_V();
|
||||
const float_type * const RESTRICT gt = this->m_terms[k]->gt();
|
||||
const float_type * const RESTRICT go = this->m_terms[k]->go();
|
||||
const float_type * const RESTRICT Idr = this->m_terms[k]->Idr();
|
||||
const float_type * const *other_cur_analog = this->m_terms[k]->connected_net_V();
|
||||
|
||||
new_V[k] = this->m_nets[k]->Q_Analog();
|
||||
|
||||
@ -123,22 +129,22 @@ unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
|
||||
}
|
||||
}
|
||||
|
||||
const nl_double accuracy = this->m_params.m_accuracy;
|
||||
const float_type accuracy = this->m_params.m_accuracy;
|
||||
|
||||
do {
|
||||
resched = false;
|
||||
nl_double err = 0;
|
||||
float_type err = 0;
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
const int * RESTRICT net_other = this->m_terms[k]->connected_net_idx();
|
||||
const std::size_t railstart = this->m_terms[k]->m_railstart;
|
||||
const nl_double * RESTRICT go = this->m_terms[k]->go();
|
||||
const float_type * RESTRICT go = this->m_terms[k]->go();
|
||||
|
||||
nl_double Idrive = 0.0;
|
||||
float_type Idrive = 0.0;
|
||||
for (std::size_t i = 0; i < railstart; i++)
|
||||
Idrive = Idrive + go[i] * new_V[net_other[i]];
|
||||
Idrive = Idrive + go[i] * new_V[static_cast<std::size_t>(net_other[i])];
|
||||
|
||||
const nl_double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
|
||||
const float_type new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
|
||||
|
||||
err = std::max(std::abs(new_val - new_V[k]), err);
|
||||
new_V[k] = new_val;
|
||||
@ -158,7 +164,7 @@ unsigned matrix_solver_SOR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
|
||||
{
|
||||
// Fallback to direct solver ...
|
||||
this->m_iterative_fail++;
|
||||
return matrix_solver_direct_t<m_N, storage_N>::vsolve_non_dynamic(newton_raphson);
|
||||
return matrix_solver_direct_t<FT, SIZE>::vsolve_non_dynamic(newton_raphson);
|
||||
}
|
||||
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
|
@ -22,20 +22,24 @@ namespace netlist
|
||||
{
|
||||
namespace devices
|
||||
{
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<m_N, storage_N>
|
||||
|
||||
template <typename FT, int SIZE>
|
||||
class matrix_solver_SOR_mat_t: public matrix_solver_direct_t<FT, SIZE>
|
||||
{
|
||||
friend class matrix_solver_t;
|
||||
|
||||
public:
|
||||
|
||||
typedef FT float_type;
|
||||
|
||||
matrix_solver_SOR_mat_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, std::size_t size)
|
||||
: matrix_solver_direct_t<m_N, storage_N>(anetlist, name, matrix_solver_t::DESCENDING, params, size)
|
||||
, m_Vdelta(*this, "m_Vdelta", 0.0)
|
||||
: matrix_solver_direct_t<FT, SIZE>(anetlist, name, matrix_solver_t::DESCENDING, params, size)
|
||||
, m_Vdelta(*this, "m_Vdelta", std::vector<float_type>(size))
|
||||
, m_omega(*this, "m_omega", params->m_gs_sor)
|
||||
, m_lp_fact(*this, "m_lp_fact", 0)
|
||||
, m_gs_fail(*this, "m_gs_fail", 0)
|
||||
, m_gs_total(*this, "m_gs_total", 0)
|
||||
, new_V(size, 0.0)
|
||||
{
|
||||
}
|
||||
|
||||
@ -46,28 +50,31 @@ public:
|
||||
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
|
||||
|
||||
private:
|
||||
state_var<nl_double[storage_N]> m_Vdelta;
|
||||
//state_var<float_type[storage_N]> m_Vdelta;
|
||||
state_var<std::vector<float_type>> m_Vdelta;
|
||||
|
||||
state_var<nl_double> m_omega;
|
||||
state_var<nl_double> m_lp_fact;
|
||||
state_var<float_type> m_omega;
|
||||
state_var<float_type> m_lp_fact;
|
||||
state_var<int> m_gs_fail;
|
||||
state_var<int> m_gs_total;
|
||||
|
||||
std::vector<float_type> new_V;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// matrix_solver - Gauss - Seidel
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_SOR_mat_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_SOR_mat_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
|
||||
{
|
||||
matrix_solver_direct_t<m_N, storage_N>::vsetup(nets);
|
||||
matrix_solver_direct_t<FT, SIZE>::vsetup(nets);
|
||||
}
|
||||
|
||||
#if 0
|
||||
//FIXME: move to solve_base
|
||||
template <unsigned m_N, unsigned storage_N>
|
||||
nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
|
||||
float_type matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
|
||||
{
|
||||
/*
|
||||
* enable linear prediction on first newton pass
|
||||
@ -89,13 +96,13 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
|
||||
|
||||
if (USE_LINEAR_PREDICTION)
|
||||
{
|
||||
nl_double sq = 0;
|
||||
nl_double sqo = 0;
|
||||
const nl_double rez_cts = 1.0 / this->current_timestep();
|
||||
float_type sq = 0;
|
||||
float_type sqo = 0;
|
||||
const float_type rez_cts = 1.0 / this->current_timestep();
|
||||
for (unsigned k = 0; k < this->N(); k++)
|
||||
{
|
||||
const analog_net_t *n = this->m_nets[k];
|
||||
const nl_double nv = (n->Q_Analog() - this->m_last_V[k]) * rez_cts ;
|
||||
const float_type nv = (n->Q_Analog() - this->m_last_V[k]) * rez_cts ;
|
||||
sq += nv * nv;
|
||||
sqo += this->m_Vdelta[k] * this->m_Vdelta[k];
|
||||
this->m_Vdelta[k] = nv;
|
||||
@ -103,7 +110,7 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
|
||||
|
||||
// FIXME: used to be 1e90, but this would not be compatible with float
|
||||
if (sqo > NL_FCONST(1e-20))
|
||||
m_lp_fact = std::min(std::sqrt(sq/sqo), (nl_double) 2.0);
|
||||
m_lp_fact = std::min(std::sqrt(sq/sqo), (float_type) 2.0);
|
||||
else
|
||||
m_lp_fact = NL_FCONST(0.0);
|
||||
}
|
||||
@ -113,8 +120,8 @@ nl_double matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve()
|
||||
}
|
||||
#endif
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT, int SIZE>
|
||||
unsigned matrix_solver_SOR_mat_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
/* The matrix based code looks a lot nicer but actually is 30% slower than
|
||||
* the optimized code which works directly on the data structures.
|
||||
@ -122,11 +129,10 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
|
||||
*/
|
||||
|
||||
|
||||
nl_double new_v[storage_N] = { 0.0 };
|
||||
const std::size_t iN = this->N();
|
||||
|
||||
matrix_solver_t::build_LE_A<matrix_solver_SOR_mat_t>();
|
||||
matrix_solver_t::build_LE_RHS<matrix_solver_SOR_mat_t>();
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
|
||||
bool resched = false;
|
||||
|
||||
@ -139,19 +145,19 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
|
||||
if (1 && ws_cnt % 200 == 0)
|
||||
{
|
||||
// update omega
|
||||
nl_double lambdaN = 0;
|
||||
nl_double lambda1 = 1e9;
|
||||
float_type lambdaN = 0;
|
||||
float_type lambda1 = 1e9;
|
||||
for (int k = 0; k < iN; k++)
|
||||
{
|
||||
#if 0
|
||||
nl_double akk = std::abs(this->m_A[k][k]);
|
||||
float_type akk = std::abs(this->m_A[k][k]);
|
||||
if ( akk > lambdaN)
|
||||
lambdaN = akk;
|
||||
if (akk < lambda1)
|
||||
lambda1 = akk;
|
||||
#else
|
||||
nl_double akk = std::abs(this->m_A[k][k]);
|
||||
nl_double s = 0.0;
|
||||
float_type akk = std::abs(this->m_A[k][k]);
|
||||
float_type s = 0.0;
|
||||
for (int i=0; i<iN; i++)
|
||||
s = s + std::abs(this->m_A[k][i]);
|
||||
akk = s / akk - 1.0;
|
||||
@ -170,25 +176,25 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
|
||||
#endif
|
||||
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
new_v[k] = this->m_nets[k]->Q_Analog();
|
||||
new_V[k] = this->m_nets[k]->Q_Analog();
|
||||
|
||||
do {
|
||||
resched = false;
|
||||
nl_double cerr = 0.0;
|
||||
float_type cerr = 0.0;
|
||||
|
||||
for (std::size_t k = 0; k < iN; k++)
|
||||
{
|
||||
nl_double Idrive = 0;
|
||||
float_type Idrive = 0;
|
||||
|
||||
const auto *p = this->m_terms[k]->m_nz.data();
|
||||
const std::size_t e = this->m_terms[k]->m_nz.size();
|
||||
|
||||
for (std::size_t i = 0; i < e; i++)
|
||||
Idrive = Idrive + this->A(k,p[i]) * new_v[p[i]];
|
||||
Idrive = Idrive + this->A(k,p[i]) * new_V[p[i]];
|
||||
|
||||
const nl_double delta = m_omega * (this->RHS(k) - Idrive) / this->A(k,k);
|
||||
const float_type delta = m_omega * (this->RHS(k) - Idrive) / this->A(k,k);
|
||||
cerr = std::max(cerr, std::abs(delta));
|
||||
new_v[k] += delta;
|
||||
new_V[k] += delta;
|
||||
}
|
||||
|
||||
if (cerr > this->m_params.m_accuracy)
|
||||
@ -208,10 +214,10 @@ unsigned matrix_solver_SOR_mat_t<m_N, storage_N>::vsolve_non_dynamic(const bool
|
||||
//this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
|
||||
this->m_gs_fail++;
|
||||
|
||||
return matrix_solver_direct_t<m_N, storage_N>::solve_non_dynamic(newton_raphson);
|
||||
return matrix_solver_direct_t<FT, SIZE>::solve_non_dynamic(newton_raphson);
|
||||
}
|
||||
else {
|
||||
this->store(new_v);
|
||||
this->store(new_V.data());
|
||||
return resched_cnt;
|
||||
}
|
||||
|
||||
|
@ -50,15 +50,18 @@ namespace netlist
|
||||
{
|
||||
namespace devices
|
||||
{
|
||||
//#define nl_ext_double _float128 // slow, very slow
|
||||
//#define nl_ext_double long double // slightly slower
|
||||
#define nl_ext_double nl_double
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
template <typename FT, int SIZE>
|
||||
class matrix_solver_w_t: public matrix_solver_t
|
||||
{
|
||||
friend class matrix_solver_t;
|
||||
|
||||
public:
|
||||
typedef FT float_ext_type;
|
||||
typedef FT float_type;
|
||||
|
||||
// FIXME: dirty hack to make this compile
|
||||
static constexpr const std::size_t storage_N = 100;
|
||||
|
||||
matrix_solver_w_t(netlist_base_t &anetlist, const pstring &name, const solver_parameters_t *params, const std::size_t size);
|
||||
|
||||
@ -71,7 +74,7 @@ protected:
|
||||
virtual unsigned vsolve_non_dynamic(const bool newton_raphson) override;
|
||||
unsigned solve_non_dynamic(const bool newton_raphson);
|
||||
|
||||
constexpr std::size_t N() const { return (m_N == 0) ? m_dim : m_N; }
|
||||
constexpr std::size_t N() const { return m_dim; }
|
||||
|
||||
void LE_invert();
|
||||
|
||||
@ -80,40 +83,40 @@ protected:
|
||||
|
||||
|
||||
template <typename T1, typename T2>
|
||||
nl_ext_double &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
|
||||
float_ext_type &A(const T1 &r, const T2 &c) { return m_A[r][c]; }
|
||||
template <typename T1, typename T2>
|
||||
nl_ext_double &W(const T1 &r, const T2 &c) { return m_W[r][c]; }
|
||||
float_ext_type &W(const T1 &r, const T2 &c) { return m_W[r][c]; }
|
||||
|
||||
/* access to Ainv for fixed columns over row, there store transposed */
|
||||
template <typename T1, typename T2>
|
||||
nl_ext_double &Ainv(const T1 &r, const T2 &c) { return m_Ainv[c][r]; }
|
||||
float_ext_type &Ainv(const T1 &r, const T2 &c) { return m_Ainv[c][r]; }
|
||||
template <typename T1>
|
||||
nl_ext_double &RHS(const T1 &r) { return m_RHS[r]; }
|
||||
float_ext_type &RHS(const T1 &r) { return m_RHS[r]; }
|
||||
|
||||
|
||||
template <typename T1, typename T2>
|
||||
nl_ext_double &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; }
|
||||
float_ext_type &lA(const T1 &r, const T2 &c) { return m_lA[r][c]; }
|
||||
|
||||
nl_double m_last_RHS[storage_N]; // right hand side - contains currents
|
||||
float_type m_last_RHS[storage_N]; // right hand side - contains currents
|
||||
|
||||
private:
|
||||
static constexpr std::size_t m_pitch = ((( storage_N) + 7) / 8) * 8;
|
||||
nl_ext_double m_A[storage_N][m_pitch];
|
||||
nl_ext_double m_Ainv[storage_N][m_pitch];
|
||||
nl_ext_double m_W[storage_N][m_pitch];
|
||||
nl_ext_double m_RHS[storage_N]; // right hand side - contains currents
|
||||
float_ext_type m_A[storage_N][m_pitch];
|
||||
float_ext_type m_Ainv[storage_N][m_pitch];
|
||||
float_ext_type m_W[storage_N][m_pitch];
|
||||
float_ext_type m_RHS[storage_N]; // right hand side - contains currents
|
||||
|
||||
nl_ext_double m_lA[storage_N][m_pitch];
|
||||
float_ext_type m_lA[storage_N][m_pitch];
|
||||
|
||||
/* temporary */
|
||||
nl_double H[storage_N][m_pitch] ;
|
||||
float_type H[storage_N][m_pitch] ;
|
||||
unsigned rows[storage_N];
|
||||
unsigned cols[storage_N][m_pitch];
|
||||
unsigned colcount[storage_N];
|
||||
|
||||
unsigned m_cnt;
|
||||
|
||||
//nl_ext_double m_RHSx[storage_N];
|
||||
//float_ext_type m_RHSx[storage_N];
|
||||
|
||||
const std::size_t m_dim;
|
||||
|
||||
@ -123,13 +126,13 @@ private:
|
||||
// matrix_solver_direct
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
matrix_solver_w_t<m_N, storage_N>::~matrix_solver_w_t()
|
||||
template <typename FT, int SIZE>
|
||||
matrix_solver_w_t<FT, SIZE>::~matrix_solver_w_t()
|
||||
{
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_w_t<FT, SIZE>::vsetup(analog_net_t::list_t &nets)
|
||||
{
|
||||
matrix_solver_t::setup_base(nets);
|
||||
|
||||
@ -141,8 +144,8 @@ void matrix_solver_w_t<m_N, storage_N>::vsetup(analog_net_t::list_t &nets)
|
||||
|
||||
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
void matrix_solver_w_t<m_N, storage_N>::LE_invert()
|
||||
template <typename FT, int SIZE>
|
||||
void matrix_solver_w_t<FT, SIZE>::LE_invert()
|
||||
{
|
||||
const std::size_t kN = N();
|
||||
|
||||
@ -159,7 +162,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
|
||||
for (std::size_t i = 0; i < kN; i++)
|
||||
{
|
||||
/* FIXME: Singular matrix? */
|
||||
const nl_double f = 1.0 / W(i,i);
|
||||
const float_type f = 1.0 / W(i,i);
|
||||
const auto * RESTRICT const p = m_terms[i]->m_nzrd.data();
|
||||
const size_t e = m_terms[i]->m_nzrd.size();
|
||||
|
||||
@ -170,7 +173,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
|
||||
for (std::size_t jb = 0; jb < eb; jb++)
|
||||
{
|
||||
const auto j = pb[jb];
|
||||
const nl_double f1 = - W(j,i) * f;
|
||||
const float_type f1 = - W(j,i) * f;
|
||||
if (f1 != 0.0)
|
||||
{
|
||||
for (std::size_t k = 0; k < e; k++)
|
||||
@ -184,10 +187,10 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
|
||||
for (std::size_t i = kN; i-- > 0; )
|
||||
{
|
||||
/* FIXME: Singular matrix? */
|
||||
const nl_double f = 1.0 / W(i,i);
|
||||
const float_type f = 1.0 / W(i,i);
|
||||
for (std::size_t j = i; j-- > 0; )
|
||||
{
|
||||
const nl_double f1 = - W(j,i) * f;
|
||||
const float_type f1 = - W(j,i) * f;
|
||||
if (f1 != 0.0)
|
||||
{
|
||||
for (std::size_t k = i; k < kN; k++)
|
||||
@ -203,9 +206,9 @@ void matrix_solver_w_t<m_N, storage_N>::LE_invert()
|
||||
}
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
template <typename FT, int SIZE>
|
||||
template <typename T>
|
||||
void matrix_solver_w_t<m_N, storage_N>::LE_compute_x(
|
||||
void matrix_solver_w_t<FT, SIZE>::LE_compute_x(
|
||||
T * RESTRICT x)
|
||||
{
|
||||
const std::size_t kN = N();
|
||||
@ -215,7 +218,7 @@ void matrix_solver_w_t<m_N, storage_N>::LE_compute_x(
|
||||
|
||||
for (std::size_t k=0; k<kN; k++)
|
||||
{
|
||||
const nl_double f = RHS(k);
|
||||
const float_type f = RHS(k);
|
||||
|
||||
for (std::size_t i=0; i<kN; i++)
|
||||
x[i] += Ainv(i,k) * f;
|
||||
@ -223,12 +226,12 @@ void matrix_solver_w_t<m_N, storage_N>::LE_compute_x(
|
||||
}
|
||||
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT, int SIZE>
|
||||
unsigned matrix_solver_w_t<FT, SIZE>::solve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
const auto iN = N();
|
||||
|
||||
nl_double new_V[storage_N]; // = { 0.0 };
|
||||
float_type new_V[storage_N]; // = { 0.0 };
|
||||
|
||||
if ((m_cnt % 100) == 0)
|
||||
{
|
||||
@ -266,7 +269,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
|
||||
/* construct w = transform(V) * y
|
||||
* dim: rowcount x iN
|
||||
* */
|
||||
nl_double w[storage_N];
|
||||
float_type w[storage_N];
|
||||
for (unsigned i = 0; i < rowcount; i++)
|
||||
{
|
||||
const unsigned r = rows[i];
|
||||
@ -287,7 +290,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
|
||||
for (unsigned k=0; k< colcount[i]; k++)
|
||||
{
|
||||
const unsigned col = cols[i][k];
|
||||
nl_double f = VT(rows[i],col);
|
||||
float_type f = VT(rows[i],col);
|
||||
if (f!=0.0)
|
||||
for (unsigned j= 0; j < rowcount; j++)
|
||||
H[i][j] += f * Ainv(col,rows[j]);
|
||||
@ -298,15 +301,15 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
|
||||
{
|
||||
if (H[i][i] == 0.0)
|
||||
printf("%s H singular\n", this->name().c_str());
|
||||
const nl_double f = 1.0 / H[i][i];
|
||||
const float_type f = 1.0 / H[i][i];
|
||||
for (unsigned j = i+1; j < rowcount; j++)
|
||||
{
|
||||
const nl_double f1 = - f * H[j][i];
|
||||
const float_type f1 = - f * H[j][i];
|
||||
|
||||
if (f1!=0.0)
|
||||
{
|
||||
nl_double *pj = &H[j][i+1];
|
||||
const nl_double *pi = &H[i][i+1];
|
||||
float_type *pj = &H[j][i+1];
|
||||
const float_type *pi = &H[i][i+1];
|
||||
for (unsigned k = 0; k < rowcount-i-1; k++)
|
||||
pj[k] += f1 * pi[k];
|
||||
//H[j][k] += f1 * H[i][k];
|
||||
@ -316,12 +319,12 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
|
||||
}
|
||||
/* Back substitution */
|
||||
//inv(H) w = t w = H t
|
||||
nl_double t[storage_N]; // FIXME: convert to member
|
||||
float_type t[storage_N]; // FIXME: convert to member
|
||||
for (unsigned j = rowcount; j-- > 0; )
|
||||
{
|
||||
nl_double tmp = 0;
|
||||
const nl_double *pj = &H[j][j+1];
|
||||
const nl_double *tj = &t[j+1];
|
||||
float_type tmp = 0;
|
||||
const float_type *pj = &H[j][j+1];
|
||||
const float_type *tj = &t[j+1];
|
||||
for (unsigned k = 0; k < rowcount-j-1; k++)
|
||||
tmp += pj[k] * tj[k];
|
||||
//tmp += H[j][k] * t[k];
|
||||
@ -331,7 +334,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
|
||||
/* x = y - Zt */
|
||||
for (unsigned i=0; i<iN; i++)
|
||||
{
|
||||
nl_double tmp = 0.0;
|
||||
float_type tmp = 0.0;
|
||||
for (unsigned j=0; j<rowcount;j++)
|
||||
{
|
||||
const unsigned row = rows[j];
|
||||
@ -346,7 +349,7 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
|
||||
if (0)
|
||||
for (unsigned i=0; i<iN; i++)
|
||||
{
|
||||
nl_double tmp = 0.0;
|
||||
float_type tmp = 0.0;
|
||||
for (unsigned j=0; j<iN; j++)
|
||||
{
|
||||
tmp += A(i,j) * new_V[j];
|
||||
@ -355,16 +358,16 @@ unsigned matrix_solver_w_t<m_N, storage_N>::solve_non_dynamic(const bool newton_
|
||||
printf("%s failed on row %d: %f RHS: %f\n", this->name().c_str(), i, std::abs(tmp-RHS(i)), RHS(i));
|
||||
}
|
||||
|
||||
const nl_double err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
const float_type err = (newton_raphson ? delta(new_V) : 0.0);
|
||||
store(new_V);
|
||||
return (err > this->m_params.m_accuracy) ? 2 : 1;
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
unsigned matrix_solver_w_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
template <typename FT, int SIZE>
|
||||
unsigned matrix_solver_w_t<FT, SIZE>::vsolve_non_dynamic(const bool newton_raphson)
|
||||
{
|
||||
build_LE_A<matrix_solver_w_t>();
|
||||
build_LE_RHS<matrix_solver_w_t>();
|
||||
this->build_LE_A(*this);
|
||||
this->build_LE_RHS(*this);
|
||||
|
||||
for (std::size_t i=0, iN=N(); i < iN; i++)
|
||||
m_last_RHS[i] = RHS(i);
|
||||
@ -373,8 +376,8 @@ unsigned matrix_solver_w_t<m_N, storage_N>::vsolve_non_dynamic(const bool newton
|
||||
return this->solve_non_dynamic(newton_raphson);
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
matrix_solver_w_t<m_N, storage_N>::matrix_solver_w_t(netlist_base_t &anetlist, const pstring &name,
|
||||
template <typename FT, int SIZE>
|
||||
matrix_solver_w_t<FT, SIZE>::matrix_solver_w_t(netlist_base_t &anetlist, const pstring &name,
|
||||
const solver_parameters_t *params, const std::size_t size)
|
||||
: matrix_solver_t(anetlist, name, NOSORT, params)
|
||||
,m_cnt(0)
|
||||
|
@ -133,12 +133,12 @@ matrix_solver_t * create_it(netlist_base_t &nl, pstring name, solver_parameters_
|
||||
return plib::palloc<C>(nl, name, ¶ms, size);
|
||||
}
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
template <typename FT, int SIZE>
|
||||
matrix_solver_t * NETLIB_NAME(solver)::create_solver(std::size_t size, const pstring &solvername)
|
||||
{
|
||||
if (m_method() == "SOR_MAT")
|
||||
{
|
||||
return create_it<matrix_solver_SOR_mat_t<m_N, storage_N>>(state(), solvername, m_params, size);
|
||||
return create_it<matrix_solver_SOR_mat_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
//typedef matrix_solver_SOR_mat_t<m_N,storage_N> solver_sor_mat;
|
||||
//return plib::make_unique<solver_sor_mat>(state(), solvername, &m_params, size);
|
||||
}
|
||||
@ -146,34 +146,34 @@ matrix_solver_t * NETLIB_NAME(solver)::create_solver(std::size_t size, const pst
|
||||
{
|
||||
if (size > 0) // GCR always outperforms MAT solver
|
||||
{
|
||||
return create_it<matrix_solver_GCR_t<m_N, storage_N>>(state(), solvername, m_params, size);
|
||||
return create_it<matrix_solver_GCR_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
return create_it<matrix_solver_direct_t<m_N, storage_N>>(state(), solvername, m_params, size);
|
||||
return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
}
|
||||
else if (m_method() == "MAT")
|
||||
{
|
||||
return create_it<matrix_solver_direct_t<m_N, storage_N>>(state(), solvername, m_params, size);
|
||||
return create_it<matrix_solver_direct_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else if (m_method() == "SM")
|
||||
{
|
||||
/* Sherman-Morrison Formula */
|
||||
return create_it<matrix_solver_sm_t<m_N, storage_N>>(state(), solvername, m_params, size);
|
||||
return create_it<matrix_solver_sm_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else if (m_method() == "W")
|
||||
{
|
||||
/* Woodbury Formula */
|
||||
return create_it<matrix_solver_w_t<m_N, storage_N>>(state(), solvername, m_params, size);
|
||||
return create_it<matrix_solver_w_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else if (m_method() == "SOR")
|
||||
{
|
||||
return create_it<matrix_solver_SOR_t<m_N, storage_N>>(state(), solvername, m_params, size);
|
||||
return create_it<matrix_solver_SOR_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else if (m_method() == "GMRES")
|
||||
{
|
||||
return create_it<matrix_solver_GMRES_t<m_N, storage_N>>(state(), solvername, m_params, size);
|
||||
return create_it<matrix_solver_GMRES_t<FT, SIZE>>(state(), solvername, m_params, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -288,64 +288,64 @@ void NETLIB_NAME(solver)::post_start()
|
||||
|
||||
switch (net_count)
|
||||
{
|
||||
#if 0
|
||||
#if 1
|
||||
case 1:
|
||||
if (use_specific)
|
||||
ms = plib::palloc<matrix_solver_direct1_t>(state(), sname, &m_params);
|
||||
ms = plib::palloc<matrix_solver_direct1_t<double>>(state(), sname, &m_params);
|
||||
else
|
||||
ms = create_solver<1,1>(1, sname);
|
||||
ms = create_solver<double, 1>(1, sname);
|
||||
break;
|
||||
case 2:
|
||||
if (use_specific)
|
||||
ms = plib::palloc<matrix_solver_direct2_t>(state(), sname, &m_params);
|
||||
ms = plib::palloc<matrix_solver_direct2_t<double>>(state(), sname, &m_params);
|
||||
else
|
||||
ms = create_solver<2,2>(2, sname);
|
||||
ms = create_solver<double, 2>(2, sname);
|
||||
break;
|
||||
#if 1
|
||||
case 3:
|
||||
ms = create_solver<3,3>(3, sname);
|
||||
ms = create_solver<double, 3>(3, sname);
|
||||
break;
|
||||
case 4:
|
||||
ms = create_solver<4,4>(4, sname);
|
||||
ms = create_solver<double, 4>(4, sname);
|
||||
break;
|
||||
case 5:
|
||||
ms = create_solver<5,5>(5, sname);
|
||||
ms = create_solver<double, 5>(5, sname);
|
||||
break;
|
||||
case 6:
|
||||
ms = create_solver<6,6>(6, sname);
|
||||
ms = create_solver<double, 6>(6, sname);
|
||||
break;
|
||||
case 7:
|
||||
ms = create_solver<7,7>(7, sname);
|
||||
ms = create_solver<double, 7>(7, sname);
|
||||
break;
|
||||
case 8:
|
||||
ms = create_solver<8,8>(8, sname);
|
||||
ms = create_solver<double, 8>(8, sname);
|
||||
break;
|
||||
case 9:
|
||||
ms = create_solver<9,9>(9, sname);
|
||||
ms = create_solver<double, 9>(9, sname);
|
||||
break;
|
||||
case 10:
|
||||
ms = create_solver<10,10>(10, sname);
|
||||
ms = create_solver<double, 10>(10, sname);
|
||||
break;
|
||||
case 11:
|
||||
ms = create_solver<11,11>(11, sname);
|
||||
ms = create_solver<double, 11>(11, sname);
|
||||
break;
|
||||
case 12:
|
||||
ms = create_solver<12,12>(12, sname);
|
||||
ms = create_solver<double, 12>(12, sname);
|
||||
break;
|
||||
case 15:
|
||||
ms = create_solver<15,15>(15, sname);
|
||||
ms = create_solver<double, 15>(15, sname);
|
||||
break;
|
||||
case 31:
|
||||
ms = create_solver<31,31>(31, sname);
|
||||
ms = create_solver<double, 31>(31, sname);
|
||||
break;
|
||||
case 35:
|
||||
ms = create_solver<35,35>(35, sname);
|
||||
ms = create_solver<double, 35>(35, sname);
|
||||
break;
|
||||
case 43:
|
||||
ms = create_solver<43,43>(43, sname);
|
||||
ms = create_solver<double, 43>(43, sname);
|
||||
break;
|
||||
case 49:
|
||||
ms = create_solver<49,49>(49, sname);
|
||||
ms = create_solver<double, 49>(49, sname);
|
||||
break;
|
||||
#endif
|
||||
#if 0
|
||||
@ -358,32 +358,31 @@ void NETLIB_NAME(solver)::post_start()
|
||||
log().warning(MW_1_NO_SPECIFIC_SOLVER, net_count);
|
||||
if (net_count <= 8)
|
||||
{
|
||||
ms = create_solver<0, 8>(net_count, sname);
|
||||
ms = create_solver<double, -8>(net_count, sname);
|
||||
}
|
||||
else if (net_count <= 16)
|
||||
{
|
||||
ms = create_solver<0,16>(net_count, sname);
|
||||
ms = create_solver<double, -16>(net_count, sname);
|
||||
}
|
||||
else if (net_count <= 32)
|
||||
{
|
||||
ms = create_solver<0,32>(net_count, sname);
|
||||
ms = create_solver<double, -32>(net_count, sname);
|
||||
}
|
||||
else
|
||||
if (net_count <= 64)
|
||||
{
|
||||
ms = create_solver<0,64>(net_count, sname);
|
||||
ms = create_solver<double, -64>(net_count, sname);
|
||||
}
|
||||
else
|
||||
if (net_count <= 128)
|
||||
{
|
||||
ms = create_solver<0,128>(net_count, sname);
|
||||
ms = create_solver<double, -128>(net_count, sname);
|
||||
}
|
||||
else
|
||||
{
|
||||
log().fatal(MF_1_NETGROUP_SIZE_EXCEEDED_1, 128);
|
||||
ms = nullptr; /* tease compilers */
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -102,7 +102,7 @@ private:
|
||||
|
||||
solver_parameters_t m_params;
|
||||
|
||||
template <std::size_t m_N, std::size_t storage_N>
|
||||
template <typename FT, int SIZE>
|
||||
matrix_solver_t * create_solver(std::size_t size, const pstring &solvername);
|
||||
};
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <type_traits>
|
||||
#include "../plib/pconfig.h"
|
||||
|
||||
#if 0
|
||||
@ -36,63 +37,50 @@ private:
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#endif
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
void vec_set (const std::size_t n, const T scalar, T (& RESTRICT v)[N])
|
||||
template<typename VT, typename T>
|
||||
void vec_set_scalar (const std::size_t n, VT &v, const T & scalar)
|
||||
{
|
||||
if (n != N)
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
v[i] = scalar;
|
||||
else
|
||||
for ( std::size_t i = 0; i < N; i++ )
|
||||
v[i] = scalar;
|
||||
}
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
T vec_mult (const std::size_t n, const T (& RESTRICT v1)[N], const T (& RESTRICT v2)[N] )
|
||||
template<typename VT, typename VS>
|
||||
void vec_set (const std::size_t n, VT &v, const VS & source)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
v[i] = source [i];
|
||||
}
|
||||
|
||||
template<typename T, typename V1, typename V2>
|
||||
T vec_mult (const std::size_t n, const V1 & v1, const V2 & v2 )
|
||||
{
|
||||
T value = 0.0;
|
||||
if (n != N)
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
value += v1[i] * v2[i];
|
||||
else
|
||||
for ( std::size_t i = 0; i < N; i++ )
|
||||
value += v1[i] * v2[i];
|
||||
return value;
|
||||
}
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
T vec_mult2 (const std::size_t n, const T (& RESTRICT v)[N])
|
||||
template<typename T, typename VT>
|
||||
T vec_mult2 (const std::size_t n, const VT &v)
|
||||
{
|
||||
T value = 0.0;
|
||||
if (n != N)
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
value += v[i] * v[i];
|
||||
else
|
||||
for ( std::size_t i = 0; i < N; i++ )
|
||||
value += v[i] * v[i];
|
||||
return value;
|
||||
}
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
void vec_mult_scalar (const std::size_t n, const T (& RESTRICT v)[N], const T & scalar, T (& RESTRICT result)[N])
|
||||
template<typename VV, typename T, typename VR>
|
||||
void vec_mult_scalar (const std::size_t n, const VV & v, const T & scalar, VR & result)
|
||||
{
|
||||
if (n != N)
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] = scalar * v[i];
|
||||
else
|
||||
for ( std::size_t i = 0; i < N; i++ )
|
||||
result[i] = scalar * v[i];
|
||||
}
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
void vec_add_mult_scalar (const std::size_t n, const T (& RESTRICT v)[N], const T scalar, T (& RESTRICT result)[N])
|
||||
template<typename VV, typename T, typename VR>
|
||||
void vec_add_mult_scalar (const std::size_t n, const VV & v, const T scalar, VR & result)
|
||||
{
|
||||
if (n != N)
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] = result[i] + scalar * v[i];
|
||||
else
|
||||
for ( std::size_t i = 0; i < N; i++ )
|
||||
result[i] = result[i] + scalar * v[i];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
@ -102,37 +90,29 @@ void vec_add_mult_scalar_p(const std::size_t & n, const T * RESTRICT v, const T
|
||||
result[i] += scalar * v[i];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void vec_add_ip(const std::size_t n, const T * RESTRICT v, T * RESTRICT result)
|
||||
template<typename V, typename R>
|
||||
void vec_add_ip(const std::size_t n, const V & v, R & result)
|
||||
{
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] += v[i];
|
||||
}
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
void vec_sub(const std::size_t n, const T (& RESTRICT v1)[N], const T (& RESTRICT v2)[N], T (& RESTRICT result)[N])
|
||||
template<typename V1, typename V2, typename VR>
|
||||
void vec_sub(const std::size_t n, const V1 &v1, const V2 & v2, VR & result)
|
||||
{
|
||||
if (n != N)
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
result[i] = v1[i] - v2[i];
|
||||
else
|
||||
for ( std::size_t i = 0; i < N; i++ )
|
||||
result[i] = v1[i] - v2[i];
|
||||
}
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
void vec_scale(const std::size_t n, T (& RESTRICT v)[N], const T scalar)
|
||||
template<typename V, typename T>
|
||||
void vec_scale(const std::size_t n, V & v, const T scalar)
|
||||
{
|
||||
if (n != N)
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
v[i] = scalar * v[i];
|
||||
else
|
||||
for ( std::size_t i = 0; i < N; i++ )
|
||||
v[i] = scalar * v[i];
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T vec_maxabs(const std::size_t n, const T * RESTRICT v)
|
||||
template<typename T, typename V>
|
||||
T vec_maxabs(const std::size_t n, const V & v)
|
||||
{
|
||||
T ret = 0.0;
|
||||
for ( std::size_t i = 0; i < n; i++ )
|
||||
|
@ -317,15 +317,16 @@ NETLIST_START(kidniki)
|
||||
PARAM(Solver.METHOD, "MAT_CR")
|
||||
//PARAM(Solver.METHOD, "MAT")
|
||||
//PARAM(Solver.METHOD, "GMRES")
|
||||
PARAM(Solver.SOR_FACTOR, 1.00)
|
||||
PARAM(Solver.SOR_FACTOR, 1.0)
|
||||
PARAM(Solver.DYNAMIC_TS, 0)
|
||||
PARAM(Solver.DYNAMIC_LTE, 5e-4)
|
||||
PARAM(Solver.DYNAMIC_MIN_TIMESTEP, 20e-6)
|
||||
#else
|
||||
SOLVER(Solver, 18000)
|
||||
PARAM(Solver.ACCURACY, 1e-6)
|
||||
PARAM(Solver.ACCURACY, 1e-7)
|
||||
PARAM(Solver.NR_LOOPS, 100)
|
||||
PARAM(Solver.GS_LOOPS, 20)
|
||||
//PARAM(Solver.METHOD, "MAT_CR")
|
||||
PARAM(Solver.METHOD, "GMRES")
|
||||
#endif
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user