mirror of
https://github.com/holub/mame
synced 2025-07-01 16:19:38 +03:00
Fix seldom used conditional build options.
- Fixed OPENMP compile For congo bongo, using Solver.PARALLEL=1 significantly increases performance from 270% to 380%. However, this has to be taken with a grain of salt. Enabling this on predominantly logic netlists can severly kill performance. - Increased readability of timed queue code.
This commit is contained in:
parent
b1c3586789
commit
5cd17c361f
@ -51,6 +51,7 @@ NETLIST_START(dummy)
|
|||||||
#if USE_OPTMIZATIONS
|
#if USE_OPTMIZATIONS
|
||||||
SOLVER(Solver, 24000)
|
SOLVER(Solver, 24000)
|
||||||
PARAM(Solver.DYNAMIC_TS, 0 )
|
PARAM(Solver.DYNAMIC_TS, 0 )
|
||||||
|
PARAM(Solver.PARALLEL, 1)
|
||||||
#else
|
#else
|
||||||
SOLVER(Solver, 24000)
|
SOLVER(Solver, 24000)
|
||||||
PARAM(Solver.DYNAMIC_TS, 1)
|
PARAM(Solver.DYNAMIC_TS, 1)
|
||||||
|
@ -113,6 +113,8 @@
|
|||||||
|
|
||||||
#if defined(OPENMP)
|
#if defined(OPENMP)
|
||||||
#define HAS_OPENMP ( OPENMP >= 200805 )
|
#define HAS_OPENMP ( OPENMP >= 200805 )
|
||||||
|
#elif defined(_OPENMP)
|
||||||
|
#define HAS_OPENMP ( _OPENMP >= 200805 )
|
||||||
#else
|
#else
|
||||||
#define HAS_OPENMP (0)
|
#define HAS_OPENMP (0)
|
||||||
#endif
|
#endif
|
||||||
|
@ -16,13 +16,40 @@
|
|||||||
#include "plib/plists.h"
|
#include "plib/plists.h"
|
||||||
#include "plib/pchrono.h"
|
#include "plib/pchrono.h"
|
||||||
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------------------
|
||||||
// timed queue
|
// timed queue
|
||||||
// ----------------------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
namespace netlist
|
namespace netlist
|
||||||
{
|
{
|
||||||
|
|
||||||
|
//FIXME: move to an appropriate place
|
||||||
|
template<bool enabled_ = true>
|
||||||
|
class pspin_lock
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
pspin_lock() { }
|
||||||
|
void acquire() noexcept{ while (m_lock.test_and_set(std::memory_order_acquire)) { } }
|
||||||
|
void release() noexcept { m_lock.clear(std::memory_order_release); }
|
||||||
|
private:
|
||||||
|
std::atomic_flag m_lock = ATOMIC_FLAG_INIT;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
class pspin_lock<false>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void acquire() const noexcept { }
|
||||||
|
void release() const noexcept { }
|
||||||
|
};
|
||||||
|
|
||||||
|
#if HAS_OPENMP && USE_OPENMP
|
||||||
|
using tqlock = pspin_lock<true>;
|
||||||
|
#else
|
||||||
|
using tqlock = pspin_lock<false>;
|
||||||
|
#endif
|
||||||
|
|
||||||
template <class Element, class Time>
|
template <class Element, class Time>
|
||||||
class timed_queue
|
class timed_queue
|
||||||
{
|
{
|
||||||
@ -38,21 +65,18 @@ namespace netlist
|
|||||||
timed_queue(unsigned list_size)
|
timed_queue(unsigned list_size)
|
||||||
: m_list(list_size)
|
: m_list(list_size)
|
||||||
{
|
{
|
||||||
#if HAS_OPENMP && USE_OPENMP
|
m_lock.acquire();
|
||||||
m_lock = 0;
|
|
||||||
#endif
|
|
||||||
clear();
|
clear();
|
||||||
|
m_lock.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t capacity() const { return m_list.size(); }
|
std::size_t capacity() const { return m_list.size(); }
|
||||||
bool empty() const { return (m_end == &m_list[1]); }
|
bool empty() const { return (m_end == &m_list[1]); }
|
||||||
|
|
||||||
void push(Element o, const Time t) noexcept
|
void push(Element o, const Time t) noexcept
|
||||||
{
|
{
|
||||||
#if HAS_OPENMP && USE_OPENMP
|
|
||||||
/* Lock */
|
/* Lock */
|
||||||
while (m_lock.exchange(1)) { }
|
m_lock.acquire();
|
||||||
#endif
|
|
||||||
entry_t * i = m_end;
|
entry_t * i = m_end;
|
||||||
for (; t > (i - 1)->m_exec_time; --i)
|
for (; t > (i - 1)->m_exec_time; --i)
|
||||||
{
|
{
|
||||||
@ -62,9 +86,7 @@ namespace netlist
|
|||||||
*i = { t, o };
|
*i = { t, o };
|
||||||
++m_end;
|
++m_end;
|
||||||
m_prof_call.inc();
|
m_prof_call.inc();
|
||||||
#if HAS_OPENMP && USE_OPENMP
|
m_lock.release();
|
||||||
m_lock = 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
entry_t pop() noexcept { return *(--m_end); }
|
entry_t pop() noexcept { return *(--m_end); }
|
||||||
@ -73,9 +95,7 @@ namespace netlist
|
|||||||
void remove(const Element &elem) noexcept
|
void remove(const Element &elem) noexcept
|
||||||
{
|
{
|
||||||
/* Lock */
|
/* Lock */
|
||||||
#if HAS_OPENMP && USE_OPENMP
|
m_lock.acquire();
|
||||||
while (m_lock.exchange(1)) { }
|
|
||||||
#endif
|
|
||||||
for (entry_t * i = m_end - 1; i > &m_list[0]; i--)
|
for (entry_t * i = m_end - 1; i > &m_list[0]; i--)
|
||||||
{
|
{
|
||||||
if (i->m_object == elem)
|
if (i->m_object == elem)
|
||||||
@ -86,15 +106,11 @@ namespace netlist
|
|||||||
*i = *(i+1);
|
*i = *(i+1);
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
#if HAS_OPENMP && USE_OPENMP
|
m_lock.release();
|
||||||
m_lock = 0;
|
|
||||||
#endif
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if HAS_OPENMP && USE_OPENMP
|
m_lock.release();
|
||||||
m_lock = 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void retime(const Element &elem, const Time t) noexcept
|
void retime(const Element &elem, const Time t) noexcept
|
||||||
@ -122,9 +138,7 @@ namespace netlist
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
#if HAS_OPENMP && USE_OPENMP
|
tqlock m_lock;
|
||||||
volatile std::atomic<int> m_lock;
|
|
||||||
#endif
|
|
||||||
entry_t * m_end;
|
entry_t * m_end;
|
||||||
std::vector<entry_t> m_list;
|
std::vector<entry_t> m_list;
|
||||||
|
|
||||||
|
@ -22,6 +22,9 @@
|
|||||||
#include "plib/pstream.h"
|
#include "plib/pstream.h"
|
||||||
|
|
||||||
#define NL_USE_SSE 0
|
#define NL_USE_SSE 0
|
||||||
|
#if NL_USE_SSE
|
||||||
|
#include <mmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace netlist
|
namespace netlist
|
||||||
{
|
{
|
||||||
@ -263,16 +266,16 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
|
|||||||
const nl_double * const * RESTRICT other_cur_analog = t->connected_net_V();
|
const nl_double * const * RESTRICT other_cur_analog = t->connected_net_V();
|
||||||
|
|
||||||
#if (0 ||NL_USE_SSE)
|
#if (0 ||NL_USE_SSE)
|
||||||
__m128d mg = mm_set_pd(0.0, 0.0);
|
__m128d mg = _mm_set_pd(0.0, 0.0);
|
||||||
__m128d mr = mm_set_pd(0.0, 0.0);
|
__m128d mr = _mm_set_pd(0.0, 0.0);
|
||||||
unsigned i = 0;
|
unsigned i = 0;
|
||||||
for (; i < term_count - 1; i+=2)
|
for (; i < term_count - 1; i+=2)
|
||||||
{
|
{
|
||||||
mg = mm_add_pd(mg, mm_loadu_pd(>[i]));
|
mg = _mm_add_pd(mg, _mm_loadu_pd(>[i]));
|
||||||
mr = mm_add_pd(mr, mm_loadu_pd(&Idr[i]));
|
mr = _mm_add_pd(mr, _mm_loadu_pd(&Idr[i]));
|
||||||
}
|
}
|
||||||
gtot_t = mm_cvtsd_f64(mg) + mm_cvtsd_f64(mm_unpackhi_pd(mg,mg));
|
gtot_t = _mm_cvtsd_f64(mg) + _mm_cvtsd_f64(_mm_unpackhi_pd(mg,mg));
|
||||||
RHS_t = mm_cvtsd_f64(mr) + mm_cvtsd_f64(mm_unpackhi_pd(mr,mr));
|
RHS_t = _mm_cvtsd_f64(mr) + _mm_cvtsd_f64(_mm_unpackhi_pd(mr,mr));
|
||||||
for (; i < term_count; i++)
|
for (; i < term_count; i++)
|
||||||
{
|
{
|
||||||
gtot_t += gt[i];
|
gtot_t += gt[i];
|
||||||
@ -356,16 +359,16 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
|
|||||||
//__builtin_prefetch(&new_V[j-1], 1);
|
//__builtin_prefetch(&new_V[j-1], 1);
|
||||||
//if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0);
|
//if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0);
|
||||||
#if (NL_USE_SSE)
|
#if (NL_USE_SSE)
|
||||||
__m128d tmp = mm_set_pd1(0.0);
|
__m128d tmp = _mm_set_pd1(0.0);
|
||||||
const unsigned e = mat.ia[j+1];
|
const unsigned e = mat.ia[j+1];
|
||||||
unsigned pk = mat.diag[j] + 1;
|
unsigned pk = mat.diag[j] + 1;
|
||||||
for (; pk < e - 1; pk+=2)
|
for (; pk < e - 1; pk+=2)
|
||||||
{
|
{
|
||||||
//tmp += m_A[pk] * new_V[mat.ja[pk]];
|
//tmp += m_A[pk] * new_V[mat.ja[pk]];
|
||||||
tmp = mm_add_pd(tmp, mm_mul_pd(mm_set_pd(m_A[pk], m_A[pk+1]),
|
tmp = _mm_add_pd(tmp, _mm_mul_pd(_mm_set_pd(m_A[pk], m_A[pk+1]),
|
||||||
_mm_set_pd(new_V[mat.ja[pk]], new_V[mat.ja[pk+1]])));
|
_mm_set_pd(new_V[mat.ja[pk]], new_V[mat.ja[pk+1]])));
|
||||||
}
|
}
|
||||||
double tmpx = mm_cvtsd_f64(tmp) + mm_cvtsd_f64(mm_unpackhi_pd(tmp,tmp));
|
double tmpx = _mm_cvtsd_f64(tmp) + _mm_cvtsd_f64(_mm_unpackhi_pd(tmp,tmp));
|
||||||
for (; pk < e; pk++)
|
for (; pk < e; pk++)
|
||||||
{
|
{
|
||||||
tmpx += m_A[pk] * new_V[mat.ja[pk]];
|
tmpx += m_A[pk] * new_V[mat.ja[pk]];
|
||||||
|
@ -95,13 +95,13 @@ NETLIB_UPDATE(solver)
|
|||||||
const std::size_t t_cnt = m_mat_solvers.size();
|
const std::size_t t_cnt = m_mat_solvers.size();
|
||||||
if (m_parallel())
|
if (m_parallel())
|
||||||
{
|
{
|
||||||
omp_set_num_threads(3);
|
//omp_set_num_threads(3);
|
||||||
//omp_set_dynamic(0);
|
//omp_set_dynamic(0);
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
for (int i = 0; i < t_cnt; i++)
|
for (int i = 0; i < t_cnt; i++)
|
||||||
if (m_mat_solvers[i]->has_timestep_devices())
|
if (m_mat_solvers[i]->has_timestep_devices() || force_solve)
|
||||||
{
|
{
|
||||||
// Ignore return value
|
// Ignore return value
|
||||||
ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve();
|
ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve();
|
||||||
@ -110,7 +110,7 @@ NETLIB_UPDATE(solver)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
for (int i = 0; i < t_cnt; i++)
|
for (int i = 0; i < t_cnt; i++)
|
||||||
if (m_mat_solvers[i]->has_timestep_devices())
|
if (m_mat_solvers[i]->has_timestep_devices() || force_solve)
|
||||||
{
|
{
|
||||||
// Ignore return value
|
// Ignore return value
|
||||||
ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve();
|
ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve();
|
||||||
|
Loading…
Reference in New Issue
Block a user