mirror of
https://github.com/holub/mame
synced 2025-10-06 09:00:04 +03:00
Quite a number of changes:
- templates to provide operations for fixed matrix sizes - sorting of nets to increase convergence - successive over relaxation - for popeye a parameter of 1.05 works best. - The code commented out may provide benefits on different architectures like true vector architectures. Popeye run peaks at 1800% (up 20%) and pong has a slight performance increase as well.
This commit is contained in:
parent
fbc460d564
commit
b2c552eadb
@ -15,43 +15,63 @@
|
||||
|
||||
#define USE_PIVOT_SEARCH (0)
|
||||
#define VECTALT 1
|
||||
#define USE_GABS 0
|
||||
#define USE_MATRIX_GS 0
|
||||
#define SORP 1.059
|
||||
|
||||
#define SOLVER_VERBOSE_OUT(x) do {} while (0)
|
||||
//#define SOLVER_VERBOSE_OUT(x) printf x
|
||||
|
||||
/* Commented out for now. Relatively low number of terminals / net makes
|
||||
/* Commented out for now. Relatively low number of terminals / nes makes
|
||||
* the vectorizations this enables pretty expensive
|
||||
*/
|
||||
|
||||
#if 0
|
||||
#pragma GCC optimize "-ffast-math"
|
||||
#pragma GCC optimize "-fvariable-expansion-in-unroller"
|
||||
//#pragma GCC optimize "-funroll-loops"
|
||||
#pragma GCC optimize "-funswitch-loops"
|
||||
#pragma GCC optimize "-fvariable-expansion-in-unroller"
|
||||
#pragma GCC optimize "-funsafe-loop-optimizations"
|
||||
#pragma GCC optimize "-ftree-loop-if-convert-stores"
|
||||
#pragma GCC optimize "-ftree-loop-distribution"
|
||||
#pragma GCC optimize "-ftree-loop-im"
|
||||
#pragma GCC optimize "-ftree-loop-ivcanon"
|
||||
#pragma GCC optimize "-fivopts"
|
||||
#pragma GCC optimize "-ftree-parallelize-loops=4"
|
||||
#pragma GCC optimize "-fvect-cost-model"
|
||||
#pragma GCC optimize "-fvariable-expansion-in-unroller"
|
||||
#endif
|
||||
|
||||
|
||||
static vector_t *create_vector(const int size)
|
||||
static vector_ops_t *create_ops(const int size)
|
||||
{
|
||||
switch (size)
|
||||
{
|
||||
case 1:
|
||||
return new vector_imp_t<1>();
|
||||
return new vector_ops_impl_t<1>();
|
||||
case 2:
|
||||
return new vector_imp_t<2>();
|
||||
return new vector_ops_impl_t<2>();
|
||||
case 3:
|
||||
return new vector_imp_t<3>();
|
||||
return new vector_ops_impl_t<3>();
|
||||
case 4:
|
||||
return new vector_imp_t<4>();
|
||||
return new vector_ops_impl_t<4>();
|
||||
case 5:
|
||||
return new vector_imp_t<5>();
|
||||
return new vector_ops_impl_t<5>();
|
||||
case 6:
|
||||
return new vector_imp_t<6>();
|
||||
return new vector_ops_impl_t<6>();
|
||||
case 7:
|
||||
return new vector_imp_t<7>();
|
||||
return new vector_ops_impl_t<7>();
|
||||
case 8:
|
||||
return new vector_imp_t<8>();
|
||||
return new vector_ops_impl_t<8>();
|
||||
case 9:
|
||||
return new vector_ops_impl_t<9>();
|
||||
case 10:
|
||||
return new vector_ops_impl_t<10>();
|
||||
case 11:
|
||||
return new vector_ops_impl_t<11>();
|
||||
case 12:
|
||||
return new vector_ops_impl_t<12>();
|
||||
default:
|
||||
return new vector_imp_t<0>(size);
|
||||
return new vector_ops_impl_t<0>(size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -62,6 +82,7 @@ ATTR_COLD void terms_t::add(netlist_terminal_t *term, int net_other)
|
||||
m_gt.add(0.0);
|
||||
m_go.add(0.0);
|
||||
m_Idr.add(0.0);
|
||||
m_other_curanalog.add(NULL);
|
||||
}
|
||||
|
||||
ATTR_COLD void terms_t::set_pointers()
|
||||
@ -71,9 +92,10 @@ ATTR_COLD void terms_t::set_pointers()
|
||||
m_term[i]->m_gt1 = &m_gt[i];
|
||||
m_term[i]->m_go1 = &m_go[i];
|
||||
m_term[i]->m_Idr1 = &m_Idr[i];
|
||||
m_other_curanalog[i] = &m_term[i]->m_otherterm->net().as_analog().m_cur_Analog;
|
||||
}
|
||||
|
||||
m_ops = create_vector(m_gt.count());
|
||||
m_ops = create_ops(m_gt.count());
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
@ -172,6 +194,41 @@ ATTR_COLD void netlist_matrix_solver_t::setup(netlist_analog_net_t::list_t &nets
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// netlist_matrix_solver_direct
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <int m_N, int _storage_N>
|
||||
netlist_matrix_solver_direct_t<m_N, _storage_N>::netlist_matrix_solver_direct_t(int size)
|
||||
: netlist_matrix_solver_t()
|
||||
, m_dim(size)
|
||||
{
|
||||
m_terms = new terms_t *[N()];
|
||||
m_rails_temp = new terms_t[N()];
|
||||
|
||||
for (int k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k] = new terms_t;
|
||||
m_row_ops[k] = create_ops(k);
|
||||
}
|
||||
m_row_ops[N()] = create_ops(N());
|
||||
}
|
||||
|
||||
template <int m_N, int _storage_N>
|
||||
netlist_matrix_solver_direct_t<m_N, _storage_N>::~netlist_matrix_solver_direct_t()
|
||||
{
|
||||
for (int k=0; k<_storage_N; k++)
|
||||
{
|
||||
//delete[] m_A[k];
|
||||
}
|
||||
//delete[] m_last_RHS;
|
||||
//delete[] m_RHS;
|
||||
delete[] m_terms;
|
||||
delete[] m_rails_temp;
|
||||
//delete[] m_row_ops;
|
||||
|
||||
}
|
||||
|
||||
template <int m_N, int _storage_N>
|
||||
ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::compute_next_timestep(const double hn)
|
||||
{
|
||||
@ -221,19 +278,12 @@ ATTR_HOT void netlist_matrix_solver_t::update_inputs()
|
||||
for (netlist_analog_output_t * const *p = m_inps.first(); p != NULL; p = m_inps.next(p))
|
||||
if ((*p)->m_proxied_net->m_last_Analog != (*p)->m_proxied_net->m_cur_Analog)
|
||||
(*p)->set_Q((*p)->m_proxied_net->m_cur_Analog);
|
||||
#if 1
|
||||
|
||||
for (int k = 0; k < m_nets.count(); k++)
|
||||
{
|
||||
netlist_analog_net_t *p= m_nets[k];
|
||||
p->m_last_Analog = p->m_cur_Analog;
|
||||
}
|
||||
#else
|
||||
for (netlist_analog_output_t * const *p = m_inps.first(); p != NULL; p = m_inps.next(p))
|
||||
{
|
||||
if ((*p)->m_proxied_net->m_last_Analog != (*p)->m_proxied_net->m_cur_Analog)
|
||||
(*p)->m_proxied_net->m_last_Analog = (*p)->m_proxied_net->m_cur_Analog;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -331,29 +381,6 @@ ATTR_HOT double netlist_matrix_solver_t::solve()
|
||||
return next_time_step;
|
||||
}
|
||||
|
||||
__attribute__ ((noinline)) static double tx(double * ATTR_ALIGN t , const int &N)
|
||||
{
|
||||
double tmp=0.0;
|
||||
for (int k = 0; k<N; k++)
|
||||
tmp += t[k] ;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void ttt()
|
||||
{
|
||||
//typedef int tt ;
|
||||
static double *t = (double *) malloc(128*8);
|
||||
for (int k = 0; k<128; k++)
|
||||
t[k] = k;
|
||||
double tmp;
|
||||
tmp = tx(t, 16);
|
||||
printf("t[0] %p %f\n", &t[0], t[0]);
|
||||
printf("t[1] %p %f\n", &t[1], t[1]);
|
||||
printf("%f\n", tmp);
|
||||
free(t);
|
||||
|
||||
}
|
||||
|
||||
template <int m_N, int _storage_N>
|
||||
void netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::log_stats()
|
||||
{
|
||||
@ -363,14 +390,12 @@ void netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::log_stats()
|
||||
printf(" ==> %d nets\n", this->N()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
|
||||
printf(" has %s elements\n", this->is_dynamic() ? "dynamic" : "no dynamic");
|
||||
printf(" has %s elements\n", this->is_timestep() ? "timestep" : "no timestep");
|
||||
printf(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %4.1f average\n",
|
||||
printf(" %10d invocations (%6d Hz) %10d gs fails (%6.2f%%) %6.3f average\n",
|
||||
this->m_calculations,
|
||||
this->m_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
|
||||
this->m_gs_fail,
|
||||
100.0 * (double) this->m_gs_fail / (double) this->m_calculations,
|
||||
(double) this->m_gs_total / (double) this->m_calculations);
|
||||
ttt();
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -391,21 +416,20 @@ ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::add_term(int k,
|
||||
{
|
||||
if (term->m_otherterm->net().isRailNet())
|
||||
{
|
||||
//m_nets[k].m_rails.add(pterm);
|
||||
m_rails[k].add(term, -1);
|
||||
m_rails_temp[k].add(term, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
int ot = get_net_idx(&term->m_otherterm->net());
|
||||
if (ot>=0)
|
||||
{
|
||||
m_terms[k].add(term, ot);
|
||||
m_terms[k]->add(term, ot);
|
||||
SOLVER_VERBOSE_OUT(("Net %d Term %s %f %f\n", k, terms[i]->name().cstr(), terms[i]->m_gt, terms[i]->m_go));
|
||||
}
|
||||
/* Should this be allowed ? */
|
||||
else // if (ot<0)
|
||||
{
|
||||
m_rails[k].add(term, ot);
|
||||
m_rails_temp[k].add(term, ot);
|
||||
netlist().error("found term with missing othernet %s\n", term->name().cstr());
|
||||
}
|
||||
}
|
||||
@ -415,78 +439,110 @@ ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::add_term(int k,
|
||||
template <int m_N, int _storage_N>
|
||||
ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::vsetup(netlist_analog_net_t::list_t &nets)
|
||||
{
|
||||
m_dim = nets.count();
|
||||
|
||||
if (m_dim < nets.count())
|
||||
netlist().error("Dimension %d less than %d", m_dim, nets.count());
|
||||
|
||||
for (int k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k].clear();
|
||||
m_rails[k].clear();
|
||||
m_terms[k]->clear();
|
||||
m_rails_temp[k].clear();
|
||||
}
|
||||
|
||||
netlist_matrix_solver_t::setup(nets);
|
||||
|
||||
for (int k = 0; k < N(); k++)
|
||||
{
|
||||
m_terms[k].set_pointers();
|
||||
m_rails[k].set_pointers();
|
||||
m_terms[k]->m_railstart = m_terms[k]->count();
|
||||
for (int i = 0; i < m_rails_temp[k].count(); i++)
|
||||
this->m_terms[k]->add(m_rails_temp[k].terms()[i], m_rails_temp[k].net_other()[i]);
|
||||
|
||||
m_rails_temp[k].clear(); // no longer needed
|
||||
m_terms[k]->set_pointers();
|
||||
}
|
||||
|
||||
#if 1
|
||||
|
||||
/* Sort in descending order by number of connected matrix voltages.
|
||||
* The idea is, that for Gauss-Seidel algo the first voltage computed
|
||||
* depends on the greatest number of previous voltages thus taking into
|
||||
* account the maximum amout of information.
|
||||
*
|
||||
* This actually improves performance on popeye slightly. Average
|
||||
* GS computations reduce from 2.509 to 2.370
|
||||
*
|
||||
* Smallest to largest : 2.613
|
||||
* Unsorted : 2.509
|
||||
* Largest to smallest : 2.370
|
||||
*
|
||||
* Sorting as a general matrix pre-conditioning is mentioned in
|
||||
* literature but I have found no articles about Gauss Seidel.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
for (int k = 0; k < N() / 2; k++)
|
||||
for (int i = 0; i < N() - 1; i++)
|
||||
{
|
||||
if (m_terms[i]->m_railstart < m_terms[i+1]->m_railstart)
|
||||
{
|
||||
std::swap(m_terms[i],m_terms[i+1]);
|
||||
m_nets.swap(i, i+1);
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = 0; k < N(); k++)
|
||||
{
|
||||
int *other = m_terms[k]->net_other();
|
||||
for (int i = 0; i < m_terms[k]->count(); i++)
|
||||
if (other[i] != -1)
|
||||
other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
template <int m_N, int _storage_N>
|
||||
ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE()
|
||||
{
|
||||
for (int k=0; k < _storage_N; k++)
|
||||
for (int i=0; i < _storage_N; i++)
|
||||
#if 0
|
||||
for (int k=0; k < N(); k++)
|
||||
for (int i=0; i < N(); i++)
|
||||
m_A[k][i] = 0.0;
|
||||
#endif
|
||||
|
||||
for (int k = 0; k < N(); k++)
|
||||
{
|
||||
for (int i=0; i < N(); i++)
|
||||
m_A[k][i] = 0.0;
|
||||
|
||||
double rhsk = 0.0;
|
||||
double akk = 0.0;
|
||||
{
|
||||
const int terms_count = m_terms[k].count();
|
||||
const double *gt = m_terms[k].gt();
|
||||
const double *Idr = m_terms[k].Idr();
|
||||
const int terms_count = m_terms[k]->count();
|
||||
const double * RESTRICT gt = m_terms[k]->gt();
|
||||
const double * RESTRICT go = m_terms[k]->go();
|
||||
const double * RESTRICT Idr = m_terms[k]->Idr();
|
||||
#if VECTALT
|
||||
|
||||
for (int i = 0; i < terms_count; i++)
|
||||
{
|
||||
//printf("A %d %d %s %f %f\n",t.net_this, t.net_other, t.term->name().cstr(), t.term->m_gt, t.term->m_go);
|
||||
|
||||
rhsk = rhsk + Idr[i];
|
||||
akk = akk + gt[i];
|
||||
//m_A[k][net_other[i]] += -go[i];
|
||||
}
|
||||
#else
|
||||
m_terms[k].ops()->sum2(Idr, gt, rhsk, akk);
|
||||
// rhsk += sum(m_terms[k].Idr(), terms_count);
|
||||
// akk += sum(m_terms[k].gt(), terms_count);
|
||||
#endif
|
||||
}
|
||||
{
|
||||
const int rails_count = m_rails[k].count();
|
||||
const netlist_terminal_t * const *rails = m_rails[k].terms();
|
||||
const double *gt = m_rails[k].gt();
|
||||
const double *Idr = m_rails[k].Idr();
|
||||
#if VECTALT
|
||||
|
||||
for (int i = 0; i < rails_count; i++)
|
||||
{
|
||||
rhsk = rhsk + Idr[i];
|
||||
akk = akk + gt[i];
|
||||
}
|
||||
#else
|
||||
m_rails[k].ops()->sum2(Idr, gt, rhsk, akk);
|
||||
//rhsk += sum(m_rails[k].Idr(), rails_count);
|
||||
//akk += sum(m_rails[k].gt(), rails_count);
|
||||
const netlist_terminal_t * const * terms = this->m_terms[k]->terms();
|
||||
m_terms[k]->ops()->sum2(Idr, gt, rhsk, akk);
|
||||
#endif
|
||||
const double *go = m_rails[k].go();
|
||||
for (int i = 0; i < rails_count; i++)
|
||||
double * const * RESTRICT other_cur_analog = m_terms[k]->other_curanalog();
|
||||
for (int i = m_terms[k]->m_railstart; i < terms_count; i++)
|
||||
{
|
||||
rhsk = rhsk + go[i] * rails[i]->m_otherterm->net().as_analog().Q_Analog();
|
||||
//rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
|
||||
rhsk = rhsk + go[i] * *other_cur_analog[i];
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
/*
|
||||
* Matrix preconditioning with 1.0 / Akk
|
||||
*
|
||||
@ -497,16 +553,29 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE()
|
||||
m_RHS[k] = rhsk * akk;
|
||||
m_A[k][k] += 1.0;
|
||||
{
|
||||
const int terms_count = m_terms[k].count();
|
||||
//const netlist_terminal_t * const *terms = m_terms[k].terms();
|
||||
const int *net_other = m_terms[k].net_other();
|
||||
const double *go = m_terms[k].go();
|
||||
const int *net_other = m_terms[k]->net_other();
|
||||
const double *go = m_terms[k]->go();
|
||||
const int railstart = m_terms[k]->m_railstart;
|
||||
|
||||
for (int i = 0; i < terms_count; i++)
|
||||
for (int i = 0; i < railstart; i++)
|
||||
{
|
||||
m_A[k][net_other[i]] += -go[i] * akk;
|
||||
}
|
||||
}
|
||||
#else
|
||||
m_RHS[k] = rhsk;
|
||||
m_A[k][k] += akk;
|
||||
{
|
||||
const int * RESTRICT net_other = m_terms[k]->net_other();
|
||||
const double * RESTRICT go = m_terms[k]->go();
|
||||
const int railstart = m_terms[k]->m_railstart;
|
||||
|
||||
for (int i = 0; i < railstart; i++)
|
||||
{
|
||||
m_A[k][net_other[i]] += -go[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -558,8 +627,13 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::gauss_LE(
|
||||
const double f1 = - m_A[j][i] * f;
|
||||
if (f1 != 0.0)
|
||||
{
|
||||
#if 0 && VECTALT
|
||||
for (int k = i + 1; k < kN; k++)
|
||||
m_A[j][k] += m_A[i][k] * f1;
|
||||
#else
|
||||
// addmult gives some performance increase here...
|
||||
m_row_ops[kN - (i + 1)]->addmult(&m_A[j][i+1], &m_A[i][i+1], f1) ;
|
||||
#endif
|
||||
m_RHS[j] += m_RHS[i] * f1;
|
||||
}
|
||||
}
|
||||
@ -567,7 +641,6 @@ ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::gauss_LE(
|
||||
/* back substitution */
|
||||
for (int j = kN - 1; j >= 0; j--)
|
||||
{
|
||||
//__builtin_prefetch(&m_A[j-1][j], 0);
|
||||
double tmp = 0;
|
||||
|
||||
for (int k = j + 1; k < kN; k++)
|
||||
@ -726,7 +799,7 @@ ATTR_HOT int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_d
|
||||
* Need something like that for gaussian elimination as well.
|
||||
*/
|
||||
|
||||
#if 0
|
||||
#if USE_MATRIX_GS
|
||||
ATTR_ALIGN double new_v[_storage_N] = { 0.0 };
|
||||
const int iN = this->N();
|
||||
|
||||
@ -740,19 +813,38 @@ ATTR_HOT int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_d
|
||||
{
|
||||
new_v[k] = this->m_nets[k]->m_cur_Analog;
|
||||
}
|
||||
|
||||
// Frobenius norm for (D-L)^(-1)U
|
||||
double frobU;
|
||||
double frobL;
|
||||
double frob;
|
||||
double norm;
|
||||
do {
|
||||
resched = false;
|
||||
double cerr = 0.0;
|
||||
frobU = 0;
|
||||
frobL = 0;
|
||||
frob = 0;
|
||||
norm = 0;
|
||||
|
||||
for (int k = 0; k < iN; k++)
|
||||
{
|
||||
double Idrive = 0;
|
||||
|
||||
double norm_t = 0;
|
||||
// Reduction loops need -ffast-math
|
||||
for (int i = 0; i < iN; i++)
|
||||
Idrive -= this->m_A[k][i] * new_v[i];
|
||||
Idrive += this->m_A[k][i] * new_v[i];
|
||||
|
||||
const double new_val = (this->m_RHS[k] + Idrive + this->m_A[k][k] * new_v[k]) / this->m_A[k][k];
|
||||
for (int i = 0; i < iN; i++)
|
||||
{
|
||||
if (i < k) frobL += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] /this-> m_A[k][k];
|
||||
if (i > k) frobU += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] / this->m_A[k][k];
|
||||
frob += this->m_A[k][i] * this->m_A[k][i];
|
||||
norm_t += fabs(this->m_A[k][i]);
|
||||
}
|
||||
|
||||
if (norm_t > norm) norm = norm_t;
|
||||
const double new_val = (this->m_RHS[k] - Idrive + this->m_A[k][k] * new_v[k]) / this->m_A[k][k];
|
||||
|
||||
const double e = fabs(new_val - new_v[k]);
|
||||
cerr = (e > cerr ? e : cerr);
|
||||
@ -765,6 +857,13 @@ ATTR_HOT int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_d
|
||||
}
|
||||
resched_cnt++;
|
||||
} while (resched && (resched_cnt < this->m_params.m_gs_loops));
|
||||
printf("Frobenius %f %f %f %f %f\n", sqrt(frobU), sqrt(frobL), sqrt(frobU * frobL + (double) iN), sqrt(frob), norm);
|
||||
frobU = sqrt(frobU);
|
||||
frobL = sqrt(frobL);
|
||||
printf("Estimate Frobenius %f\n", 1.0 - (1.0 - frobU -frobL) / (1.0 - frobL) ); // printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
|
||||
|
||||
|
||||
this->store(new_v, false);
|
||||
|
||||
this->m_gs_total += resched_cnt;
|
||||
if (resched)
|
||||
@ -778,8 +877,6 @@ ATTR_HOT int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_d
|
||||
else {
|
||||
this->m_calculations++;
|
||||
|
||||
this->store(new_v, false);
|
||||
|
||||
return resched_cnt;
|
||||
}
|
||||
|
||||
@ -787,10 +884,17 @@ ATTR_HOT int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_d
|
||||
const int iN = this->N();
|
||||
bool resched = false;
|
||||
int resched_cnt = 0;
|
||||
/* some minor SOR helps on typical netlist matrices */
|
||||
|
||||
/* over-relaxation not really works on these matrices */
|
||||
//const double w = 1.0; //2.0 / (1.0 + sin(3.14159 / (m_nets.count()+1)));
|
||||
//const double w1 = 1.0 - w;
|
||||
/* ideally, we could get an estimate for the spectral radius of
|
||||
* Inv(D - L) * U
|
||||
*
|
||||
* and estimate using
|
||||
*
|
||||
* omega = 2.0 / (1.0 + sqrt(1-rho))
|
||||
*/
|
||||
|
||||
const double ws = SORP; //1.045; //2.0 / (1.0 + /*sin*/(3.14159 * 5.5 / (double) (m_nets.count()+1)));
|
||||
|
||||
ATTR_ALIGN double w[_storage_N];
|
||||
ATTR_ALIGN double one_m_w[_storage_N];
|
||||
@ -809,57 +913,46 @@ ATTR_HOT int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_d
|
||||
double RHS_t = 0.0;
|
||||
|
||||
{
|
||||
const netlist_terminal_t * const * rails = this->m_rails[k].terms();
|
||||
//const int * othernet = this->m_rails[k].m_othernet;
|
||||
const int rail_count = this->m_rails[k].count();
|
||||
double *gt = this->m_rails[k].gt();
|
||||
const double *go = this->m_rails[k].go();
|
||||
const double *Idr = this->m_rails[k].Idr();
|
||||
#if VECTALT
|
||||
for (int i = 0; i < rail_count; i++)
|
||||
{
|
||||
gtot_t += gt[i];
|
||||
gabs_t += fabs(go[i]);
|
||||
RHS_t += Idr[i];
|
||||
RHS_t += go[i] * rails[i]->m_otherterm->net().as_analog().Q_Analog();
|
||||
}
|
||||
#else
|
||||
this->m_rails[k].ops()->sum2a(gt, Idr, go, gtot_t, RHS_t, gabs_t);
|
||||
|
||||
for (int i = 0; i < rail_count; i++)
|
||||
RHS_t += go[i] * rails[i]->m_otherterm->net().as_analog().Q_Analog();
|
||||
#endif
|
||||
}
|
||||
{
|
||||
const int term_count = this->m_terms[k].count();
|
||||
const double *gt = this->m_terms[k].gt();
|
||||
const double *go = this->m_terms[k].go();
|
||||
const double *Idr = this->m_terms[k].Idr();
|
||||
const int term_count = this->m_terms[k]->count();
|
||||
const double * RESTRICT gt = this->m_terms[k]->gt();
|
||||
const double * RESTRICT go = this->m_terms[k]->go();
|
||||
const double * RESTRICT Idr = this->m_terms[k]->Idr();
|
||||
#if VECTALT
|
||||
for (int i = 0; i < term_count; i++)
|
||||
{
|
||||
gtot_t += gt[i];
|
||||
gabs_t += fabs(go[i]);
|
||||
if (USE_GABS) gabs_t += fabs(go[i]);
|
||||
RHS_t += Idr[i];
|
||||
}
|
||||
#else
|
||||
this->m_terms[k].ops()->sum2a(gt, Idr, go, gtot_t, RHS_t, gabs_t);
|
||||
if (USE_GABS)
|
||||
this->m_terms[k]->ops()->sum2a(gt, Idr, go, gtot_t, RHS_t, gabs_t);
|
||||
else
|
||||
this->m_terms[k]->ops()->sum2(gt, Idr, gtot_t, RHS_t);
|
||||
#endif
|
||||
double * const *other_cur_analog = this->m_terms[k]->other_curanalog();
|
||||
for (int i = this->m_terms[k]->m_railstart; i < term_count; i++)
|
||||
//RHS_t += go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
|
||||
RHS_t += go[i] * *other_cur_analog[i];
|
||||
}
|
||||
|
||||
RHS[k] = RHS_t;
|
||||
|
||||
gabs_t *= 0.9; // avoid rounding issues
|
||||
if (gabs_t <= gtot_t)
|
||||
{
|
||||
const double ws = 1.0;
|
||||
//if (fabs(gabs_t - fabs(gtot_t)) > 1e-20)
|
||||
// printf("%d %e abs: %f tot: %f\n",k, gabs_t / gtot_t -1.0, gabs_t, gtot_t);
|
||||
|
||||
gabs_t *= 0.5; // avoid rounding issues
|
||||
if (!USE_GABS || gabs_t <= gtot_t)
|
||||
{
|
||||
w[k] = ws / gtot_t;
|
||||
one_m_w[k] = 1.0 - ws;
|
||||
}
|
||||
else
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
//printf("abs: %f tot: %f\n", gabs_t, gtot_t);
|
||||
w[k] = 1.0 / (gtot_t + gabs_t);
|
||||
one_m_w[k] = 1.0 - 1.0 * gtot_t / (gtot_t + gabs_t);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -869,14 +962,15 @@ ATTR_HOT int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_d
|
||||
|
||||
for (int k = 0; k < iN; k++)
|
||||
{
|
||||
const int * net_other = this->m_terms[k].net_other();
|
||||
const int term_count = this->m_terms[k].count();
|
||||
const double *go = this->m_terms[k].go();
|
||||
const int * RESTRICT net_other = this->m_terms[k]->net_other();
|
||||
const int railstart = this->m_terms[k]->m_railstart;
|
||||
const double * RESTRICT go = this->m_terms[k]->go();
|
||||
// -msse2 -msse3 -msse4.1 -msse4.2 -mfpmath=sse -ftree-vectorizer-verbose=3 -fprefetch-loop-arrays -ffast-math
|
||||
|
||||
double Idrive = 0;
|
||||
double Idrive;
|
||||
|
||||
for (int i = 0; i < term_count; i++)
|
||||
Idrive = 0.0;
|
||||
for (int i = 0; i < railstart; i++)
|
||||
Idrive = Idrive + go[i] * new_V[net_other[i]];
|
||||
|
||||
//double new_val = (net->m_cur_Analog * gabs[k] + iIdr) / (gtot[k]);
|
||||
@ -931,6 +1025,7 @@ NETLIB_START(solver)
|
||||
|
||||
register_param("ACCURACY", m_accuracy, 1e-7);
|
||||
register_param("GS_LOOPS", m_gs_loops, 9); // Gauss-Seidel loops
|
||||
//register_param("GS_THRESHOLD", m_gs_threshold, 99); // below this value, gaussian elimination is used
|
||||
register_param("GS_THRESHOLD", m_gs_threshold, 5); // below this value, gaussian elimination is used
|
||||
register_param("NR_LOOPS", m_nr_loops, 25); // Newton-Raphson loops
|
||||
register_param("PARALLEL", m_parallel, 0);
|
||||
@ -1019,7 +1114,7 @@ NETLIB_UPDATE(solver)
|
||||
}
|
||||
|
||||
template <int m_N, int _storage_N>
|
||||
netlist_matrix_solver_t * NETLIB_NAME(solver)::create_solver(const int gs_threshold, const bool use_specific)
|
||||
netlist_matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const int gs_threshold, const bool use_specific)
|
||||
{
|
||||
if (use_specific && m_N == 1)
|
||||
return new netlist_matrix_solver_direct1_t();
|
||||
@ -1027,10 +1122,10 @@ netlist_matrix_solver_t * NETLIB_NAME(solver)::create_solver(const int gs_thresh
|
||||
return new netlist_matrix_solver_direct2_t();
|
||||
else
|
||||
{
|
||||
if (_storage_N >= gs_threshold)
|
||||
return new netlist_matrix_solver_gauss_seidel_t<m_N,_storage_N>();
|
||||
if (size >= gs_threshold)
|
||||
return new netlist_matrix_solver_gauss_seidel_t<m_N,_storage_N>(size);
|
||||
else
|
||||
return new netlist_matrix_solver_direct_t<m_N, _storage_N>();
|
||||
return new netlist_matrix_solver_direct_t<m_N, _storage_N>(size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1087,45 +1182,45 @@ ATTR_COLD void NETLIB_NAME(solver)::post_start()
|
||||
{
|
||||
#if 1
|
||||
case 1:
|
||||
ms = create_solver<1,1>(gs_threshold, use_specific);
|
||||
ms = create_solver<1,1>(1, gs_threshold, use_specific);
|
||||
break;
|
||||
case 2:
|
||||
ms = create_solver<2,2>(gs_threshold, use_specific);
|
||||
ms = create_solver<2,2>(2, gs_threshold, use_specific);
|
||||
break;
|
||||
case 3:
|
||||
ms = create_solver<3,3>(gs_threshold, use_specific);
|
||||
ms = create_solver<3,3>(3, gs_threshold, use_specific);
|
||||
break;
|
||||
case 4:
|
||||
ms = create_solver<4,4>(gs_threshold, use_specific);
|
||||
ms = create_solver<4,4>(4, gs_threshold, use_specific);
|
||||
break;
|
||||
case 5:
|
||||
ms = create_solver<5,5>(gs_threshold, use_specific);
|
||||
ms = create_solver<5,5>(5, gs_threshold, use_specific);
|
||||
break;
|
||||
case 6:
|
||||
ms = create_solver<6,6>(gs_threshold, use_specific);
|
||||
ms = create_solver<6,6>(6, gs_threshold, use_specific);
|
||||
break;
|
||||
case 7:
|
||||
ms = create_solver<7,7>(gs_threshold, use_specific);
|
||||
ms = create_solver<7,7>(7, gs_threshold, use_specific);
|
||||
break;
|
||||
case 8:
|
||||
ms = create_solver<8,8>(gs_threshold, use_specific);
|
||||
ms = create_solver<8,8>(8, gs_threshold, use_specific);
|
||||
break;
|
||||
case 12:
|
||||
ms = create_solver<12,12>(gs_threshold, use_specific);
|
||||
ms = create_solver<12,12>(12, gs_threshold, use_specific);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
default:
|
||||
if (net_count <= 16)
|
||||
{
|
||||
ms = create_solver<0,16>(gs_threshold, use_specific);
|
||||
ms = create_solver<0,16>(net_count, gs_threshold, use_specific);
|
||||
}
|
||||
else if (net_count <= 32)
|
||||
{
|
||||
ms = create_solver<0,32>(gs_threshold, use_specific);
|
||||
ms = create_solver<0,32>(net_count, gs_threshold, use_specific);
|
||||
}
|
||||
else if (net_count <= 64)
|
||||
{
|
||||
ms = create_solver<0,64>(gs_threshold, use_specific);
|
||||
ms = create_solver<0,64>(net_count, gs_threshold, use_specific);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -11,6 +11,9 @@
|
||||
|
||||
//#define ATTR_ALIGNED(N) __attribute__((aligned(N)))
|
||||
#define ATTR_ALIGNED(N) ATTR_ALIGN
|
||||
//#undef RESTRICT
|
||||
//#define RESTRICT
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// Macros
|
||||
@ -40,22 +43,23 @@ struct netlist_solver_parameters_t
|
||||
netlist_time m_nt_sync_delay;
|
||||
};
|
||||
|
||||
class vector_t
|
||||
class vector_ops_t
|
||||
{
|
||||
public:
|
||||
|
||||
vector_t(int size)
|
||||
vector_ops_t(int size)
|
||||
: m_dim(size)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~vector_t() {}
|
||||
virtual ~vector_ops_t() {}
|
||||
|
||||
ATTR_ALIGNED(64) double * RESTRICT m_V;
|
||||
|
||||
virtual const double sum(const double * v) = 0;
|
||||
virtual void sum2(const double * v1, const double * v2, double &s1, double &s2) = 0;
|
||||
virtual void sum2a(const double * v1, const double * v2, const double * v3abs, double &s1, double &s2, double &s3abs) = 0;
|
||||
virtual void sum2(const double * RESTRICT v1, const double * RESTRICT v2, double & RESTRICT s1, double & RESTRICT s2) = 0;
|
||||
virtual void addmult(double * RESTRICT v1, const double * RESTRICT v2, const double &mult) = 0;
|
||||
virtual void sum2a(const double * RESTRICT v1, const double * RESTRICT v2, const double * RESTRICT v3abs, double & RESTRICT s1, double & RESTRICT s2, double & RESTRICT s3abs) = 0;
|
||||
|
||||
virtual const double sumabs(const double * v) = 0;
|
||||
|
||||
@ -67,35 +71,35 @@ private:
|
||||
};
|
||||
|
||||
template <int m_N>
|
||||
class vector_imp_t : public vector_t
|
||||
class vector_ops_impl_t : public vector_ops_t
|
||||
{
|
||||
public:
|
||||
|
||||
vector_imp_t()
|
||||
: vector_t(m_N)
|
||||
vector_ops_impl_t()
|
||||
: vector_ops_t(m_N)
|
||||
{
|
||||
}
|
||||
|
||||
vector_imp_t(int size)
|
||||
: vector_t(size)
|
||||
vector_ops_impl_t(int size)
|
||||
: vector_ops_t(size)
|
||||
{
|
||||
assert(m_N == 0);
|
||||
}
|
||||
|
||||
virtual ~vector_imp_t() {}
|
||||
virtual ~vector_ops_impl_t() {}
|
||||
|
||||
ATTR_HOT inline const int N() const { if (m_N == 0) return m_dim; else return m_N; }
|
||||
|
||||
const double sum(const double * v)
|
||||
{
|
||||
const double * RESTRICT vl = v;
|
||||
const double * RESTRICT vl = v;
|
||||
double tmp = 0.0;
|
||||
for (int i=0; i < N(); i++)
|
||||
tmp += vl[i];
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void sum2(const double * v1, const double * v2, double &s1, double &s2)
|
||||
void sum2(const double * RESTRICT v1, const double * RESTRICT v2, double & RESTRICT s1, double & RESTRICT s2)
|
||||
{
|
||||
const double * RESTRICT v1l = v1;
|
||||
const double * RESTRICT v2l = v2;
|
||||
@ -106,7 +110,17 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void sum2a(const double * v1, const double * v2, const double * v3abs, double &s1, double &s2, double &s3abs)
|
||||
void addmult(double * RESTRICT v1, const double * RESTRICT v2, const double &mult)
|
||||
{
|
||||
double * RESTRICT v1l = v1;
|
||||
const double * RESTRICT v2l = v2;
|
||||
for (int i=0; i < N(); i++)
|
||||
{
|
||||
v1l[i] += v2l[i] * mult;
|
||||
}
|
||||
}
|
||||
|
||||
void sum2a(const double * RESTRICT v1, const double * RESTRICT v2, const double * RESTRICT v3abs, double & RESTRICT s1, double & RESTRICT s2, double & RESTRICT s3abs)
|
||||
{
|
||||
const double * RESTRICT v1l = v1;
|
||||
const double * RESTRICT v2l = v2;
|
||||
@ -133,6 +147,8 @@ private:
|
||||
|
||||
class ATTR_ALIGNED(64) terms_t
|
||||
{
|
||||
NETLIST_PREVENT_COPYING(terms_t)
|
||||
|
||||
public:
|
||||
ATTR_COLD terms_t() {}
|
||||
|
||||
@ -152,17 +168,21 @@ class ATTR_ALIGNED(64) terms_t
|
||||
ATTR_HOT inline double *gt() { return m_gt; }
|
||||
ATTR_HOT inline double *go() { return m_go; }
|
||||
ATTR_HOT inline double *Idr() { return m_Idr; }
|
||||
ATTR_HOT vector_t *ops() { return m_ops; }
|
||||
ATTR_HOT inline double **other_curanalog() { return m_other_curanalog; }
|
||||
ATTR_HOT vector_ops_t *ops() { return m_ops; }
|
||||
|
||||
ATTR_COLD void set_pointers();
|
||||
|
||||
int m_railstart;
|
||||
|
||||
private:
|
||||
plinearlist_t<netlist_terminal_t *> m_term;
|
||||
plinearlist_t<int> m_net_other;
|
||||
plinearlist_t<double> m_gt;
|
||||
plinearlist_t<double> m_go;
|
||||
plinearlist_t<double> m_Idr;
|
||||
vector_t * m_ops;
|
||||
plinearlist_t<double *> m_other_curanalog;
|
||||
vector_ops_t * m_ops;
|
||||
};
|
||||
|
||||
class netlist_matrix_solver_t : public netlist_device_t
|
||||
@ -230,19 +250,13 @@ private:
|
||||
};
|
||||
|
||||
template <int m_N, int _storage_N>
|
||||
class ATTR_ALIGNED(64) netlist_matrix_solver_direct_t: public netlist_matrix_solver_t
|
||||
class netlist_matrix_solver_direct_t: public netlist_matrix_solver_t
|
||||
{
|
||||
public:
|
||||
|
||||
netlist_matrix_solver_direct_t()
|
||||
: netlist_matrix_solver_t()
|
||||
, m_dim(0)
|
||||
{
|
||||
for (int k=0; k<_storage_N; k++)
|
||||
m_A[k] = & m_A_phys[k][0];
|
||||
}
|
||||
netlist_matrix_solver_direct_t(int size);
|
||||
|
||||
virtual ~netlist_matrix_solver_direct_t() {}
|
||||
virtual ~netlist_matrix_solver_direct_t();
|
||||
|
||||
ATTR_COLD virtual void vsetup(netlist_analog_net_t::list_t &nets);
|
||||
ATTR_COLD virtual void reset() { netlist_matrix_solver_t::reset(); }
|
||||
@ -261,15 +275,16 @@ protected:
|
||||
|
||||
ATTR_HOT virtual double compute_next_timestep(const double);
|
||||
|
||||
ATTR_ALIGNED(64) double * RESTRICT m_A[_storage_N];
|
||||
ATTR_ALIGNED(64) double m_RHS[_storage_N];
|
||||
ATTR_ALIGNED(64) double m_last_RHS[_storage_N]; // right hand side - contains currents
|
||||
double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
|
||||
double m_RHS[_storage_N];
|
||||
double m_last_RHS[_storage_N]; // right hand side - contains currents
|
||||
|
||||
terms_t m_terms[_storage_N];
|
||||
terms_t m_rails[_storage_N];
|
||||
terms_t **m_terms;
|
||||
|
||||
terms_t *m_rails_temp;
|
||||
|
||||
private:
|
||||
ATTR_ALIGNED(64) double m_A_phys[_storage_N][((_storage_N + 7) / 8) * 8];
|
||||
vector_ops_t *m_row_ops[_storage_N + 1];
|
||||
|
||||
int m_dim;
|
||||
};
|
||||
@ -279,8 +294,8 @@ class ATTR_ALIGNED(64) netlist_matrix_solver_gauss_seidel_t: public netlist_matr
|
||||
{
|
||||
public:
|
||||
|
||||
netlist_matrix_solver_gauss_seidel_t()
|
||||
: netlist_matrix_solver_direct_t<m_N, _storage_N>()
|
||||
netlist_matrix_solver_gauss_seidel_t(int size)
|
||||
: netlist_matrix_solver_direct_t<m_N, _storage_N>(size)
|
||||
, m_gs_fail(0)
|
||||
, m_gs_total(0)
|
||||
{}
|
||||
@ -300,6 +315,11 @@ private:
|
||||
|
||||
class ATTR_ALIGNED(64) netlist_matrix_solver_direct1_t: public netlist_matrix_solver_direct_t<1,1>
|
||||
{
|
||||
public:
|
||||
|
||||
netlist_matrix_solver_direct1_t()
|
||||
: netlist_matrix_solver_direct_t<1, 1>(1)
|
||||
{}
|
||||
protected:
|
||||
ATTR_HOT int vsolve_non_dynamic();
|
||||
private:
|
||||
@ -307,6 +327,11 @@ private:
|
||||
|
||||
class ATTR_ALIGNED(64) netlist_matrix_solver_direct2_t: public netlist_matrix_solver_direct_t<2,2>
|
||||
{
|
||||
public:
|
||||
|
||||
netlist_matrix_solver_direct2_t()
|
||||
: netlist_matrix_solver_direct_t<2, 2>(2)
|
||||
{}
|
||||
protected:
|
||||
ATTR_HOT int vsolve_non_dynamic();
|
||||
private:
|
||||
@ -352,7 +377,7 @@ private:
|
||||
netlist_solver_parameters_t m_params;
|
||||
|
||||
template <int m_N, int _storage_N>
|
||||
netlist_matrix_solver_t *create_solver(int gs_threshold, bool use_specific);
|
||||
netlist_matrix_solver_t *create_solver(int size, int gs_threshold, bool use_specific);
|
||||
};
|
||||
|
||||
|
||||
|
@ -116,6 +116,15 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
ATTR_HOT inline void swap(const int pos1, const int pos2)
|
||||
{
|
||||
assert((pos1>=0) && (pos1<m_count));
|
||||
assert((pos2>=0) && (pos2<m_count));
|
||||
_ListClass tmp = m_list[pos1];
|
||||
m_list[pos1] = m_list[pos2];
|
||||
m_list[pos2] =tmp;
|
||||
}
|
||||
|
||||
ATTR_HOT inline bool contains(const _ListClass &elem) const
|
||||
{
|
||||
for (_ListClass *i = m_list; i < m_list + m_count; i++)
|
||||
|
Loading…
Reference in New Issue
Block a user