mirror of
https://github.com/holub/mame
synced 2025-06-30 16:00:01 +03:00
Fix seldom used conditional build options.
- Fixed OPENMP compile For congo bongo, using Solver.PARALLEL=1 significantly increases performance from 270% to 380%. However, this has to be taken with a grain of salt. Enabling this on predominantly logic netlists can severly kill performance. - Increased readability of timed queue code.
This commit is contained in:
parent
b1c3586789
commit
5cd17c361f
@ -51,6 +51,7 @@ NETLIST_START(dummy)
|
||||
#if USE_OPTMIZATIONS
|
||||
SOLVER(Solver, 24000)
|
||||
PARAM(Solver.DYNAMIC_TS, 0 )
|
||||
PARAM(Solver.PARALLEL, 1)
|
||||
#else
|
||||
SOLVER(Solver, 24000)
|
||||
PARAM(Solver.DYNAMIC_TS, 1)
|
||||
|
@ -113,6 +113,8 @@
|
||||
|
||||
#if defined(OPENMP)
|
||||
#define HAS_OPENMP ( OPENMP >= 200805 )
|
||||
#elif defined(_OPENMP)
|
||||
#define HAS_OPENMP ( _OPENMP >= 200805 )
|
||||
#else
|
||||
#define HAS_OPENMP (0)
|
||||
#endif
|
||||
|
@ -16,13 +16,40 @@
|
||||
#include "plib/plists.h"
|
||||
#include "plib/pchrono.h"
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// timed queue
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
namespace netlist
|
||||
{
|
||||
|
||||
//FIXME: move to an appropriate place
|
||||
template<bool enabled_ = true>
|
||||
class pspin_lock
|
||||
{
|
||||
public:
|
||||
pspin_lock() { }
|
||||
void acquire() noexcept{ while (m_lock.test_and_set(std::memory_order_acquire)) { } }
|
||||
void release() noexcept { m_lock.clear(std::memory_order_release); }
|
||||
private:
|
||||
std::atomic_flag m_lock = ATOMIC_FLAG_INIT;
|
||||
};
|
||||
|
||||
template<>
|
||||
class pspin_lock<false>
|
||||
{
|
||||
public:
|
||||
void acquire() const noexcept { }
|
||||
void release() const noexcept { }
|
||||
};
|
||||
|
||||
#if HAS_OPENMP && USE_OPENMP
|
||||
using tqlock = pspin_lock<true>;
|
||||
#else
|
||||
using tqlock = pspin_lock<false>;
|
||||
#endif
|
||||
|
||||
template <class Element, class Time>
|
||||
class timed_queue
|
||||
{
|
||||
@ -38,21 +65,18 @@ namespace netlist
|
||||
timed_queue(unsigned list_size)
|
||||
: m_list(list_size)
|
||||
{
|
||||
#if HAS_OPENMP && USE_OPENMP
|
||||
m_lock = 0;
|
||||
#endif
|
||||
m_lock.acquire();
|
||||
clear();
|
||||
m_lock.release();
|
||||
}
|
||||
|
||||
std::size_t capacity() const { return m_list.size(); }
|
||||
bool empty() const { return (m_end == &m_list[1]); }
|
||||
std::size_t capacity() const { return m_list.size(); }
|
||||
bool empty() const { return (m_end == &m_list[1]); }
|
||||
|
||||
void push(Element o, const Time t) noexcept
|
||||
{
|
||||
#if HAS_OPENMP && USE_OPENMP
|
||||
/* Lock */
|
||||
while (m_lock.exchange(1)) { }
|
||||
#endif
|
||||
m_lock.acquire();
|
||||
entry_t * i = m_end;
|
||||
for (; t > (i - 1)->m_exec_time; --i)
|
||||
{
|
||||
@ -62,9 +86,7 @@ namespace netlist
|
||||
*i = { t, o };
|
||||
++m_end;
|
||||
m_prof_call.inc();
|
||||
#if HAS_OPENMP && USE_OPENMP
|
||||
m_lock = 0;
|
||||
#endif
|
||||
m_lock.release();
|
||||
}
|
||||
|
||||
entry_t pop() noexcept { return *(--m_end); }
|
||||
@ -73,9 +95,7 @@ namespace netlist
|
||||
void remove(const Element &elem) noexcept
|
||||
{
|
||||
/* Lock */
|
||||
#if HAS_OPENMP && USE_OPENMP
|
||||
while (m_lock.exchange(1)) { }
|
||||
#endif
|
||||
m_lock.acquire();
|
||||
for (entry_t * i = m_end - 1; i > &m_list[0]; i--)
|
||||
{
|
||||
if (i->m_object == elem)
|
||||
@ -86,15 +106,11 @@ namespace netlist
|
||||
*i = *(i+1);
|
||||
++i;
|
||||
}
|
||||
#if HAS_OPENMP && USE_OPENMP
|
||||
m_lock = 0;
|
||||
#endif
|
||||
m_lock.release();
|
||||
return;
|
||||
}
|
||||
}
|
||||
#if HAS_OPENMP && USE_OPENMP
|
||||
m_lock = 0;
|
||||
#endif
|
||||
m_lock.release();
|
||||
}
|
||||
|
||||
void retime(const Element &elem, const Time t) noexcept
|
||||
@ -122,9 +138,7 @@ namespace netlist
|
||||
|
||||
private:
|
||||
|
||||
#if HAS_OPENMP && USE_OPENMP
|
||||
volatile std::atomic<int> m_lock;
|
||||
#endif
|
||||
tqlock m_lock;
|
||||
entry_t * m_end;
|
||||
std::vector<entry_t> m_list;
|
||||
|
||||
|
@ -22,6 +22,9 @@
|
||||
#include "plib/pstream.h"
|
||||
|
||||
#define NL_USE_SSE 0
|
||||
#if NL_USE_SSE
|
||||
#include <mmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace netlist
|
||||
{
|
||||
@ -263,16 +266,16 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
|
||||
const nl_double * const * RESTRICT other_cur_analog = t->connected_net_V();
|
||||
|
||||
#if (0 ||NL_USE_SSE)
|
||||
__m128d mg = mm_set_pd(0.0, 0.0);
|
||||
__m128d mr = mm_set_pd(0.0, 0.0);
|
||||
__m128d mg = _mm_set_pd(0.0, 0.0);
|
||||
__m128d mr = _mm_set_pd(0.0, 0.0);
|
||||
unsigned i = 0;
|
||||
for (; i < term_count - 1; i+=2)
|
||||
{
|
||||
mg = mm_add_pd(mg, mm_loadu_pd(>[i]));
|
||||
mr = mm_add_pd(mr, mm_loadu_pd(&Idr[i]));
|
||||
mg = _mm_add_pd(mg, _mm_loadu_pd(>[i]));
|
||||
mr = _mm_add_pd(mr, _mm_loadu_pd(&Idr[i]));
|
||||
}
|
||||
gtot_t = mm_cvtsd_f64(mg) + mm_cvtsd_f64(mm_unpackhi_pd(mg,mg));
|
||||
RHS_t = mm_cvtsd_f64(mr) + mm_cvtsd_f64(mm_unpackhi_pd(mr,mr));
|
||||
gtot_t = _mm_cvtsd_f64(mg) + _mm_cvtsd_f64(_mm_unpackhi_pd(mg,mg));
|
||||
RHS_t = _mm_cvtsd_f64(mr) + _mm_cvtsd_f64(_mm_unpackhi_pd(mr,mr));
|
||||
for (; i < term_count; i++)
|
||||
{
|
||||
gtot_t += gt[i];
|
||||
@ -356,16 +359,16 @@ unsigned matrix_solver_GCR_t<m_N, storage_N>::vsolve_non_dynamic(const bool newt
|
||||
//__builtin_prefetch(&new_V[j-1], 1);
|
||||
//if (j>0)__builtin_prefetch(&m_A[mat.diag[j-1]], 0);
|
||||
#if (NL_USE_SSE)
|
||||
__m128d tmp = mm_set_pd1(0.0);
|
||||
__m128d tmp = _mm_set_pd1(0.0);
|
||||
const unsigned e = mat.ia[j+1];
|
||||
unsigned pk = mat.diag[j] + 1;
|
||||
for (; pk < e - 1; pk+=2)
|
||||
{
|
||||
//tmp += m_A[pk] * new_V[mat.ja[pk]];
|
||||
tmp = mm_add_pd(tmp, mm_mul_pd(mm_set_pd(m_A[pk], m_A[pk+1]),
|
||||
tmp = _mm_add_pd(tmp, _mm_mul_pd(_mm_set_pd(m_A[pk], m_A[pk+1]),
|
||||
_mm_set_pd(new_V[mat.ja[pk]], new_V[mat.ja[pk+1]])));
|
||||
}
|
||||
double tmpx = mm_cvtsd_f64(tmp) + mm_cvtsd_f64(mm_unpackhi_pd(tmp,tmp));
|
||||
double tmpx = _mm_cvtsd_f64(tmp) + _mm_cvtsd_f64(_mm_unpackhi_pd(tmp,tmp));
|
||||
for (; pk < e; pk++)
|
||||
{
|
||||
tmpx += m_A[pk] * new_V[mat.ja[pk]];
|
||||
|
@ -95,13 +95,13 @@ NETLIB_UPDATE(solver)
|
||||
const std::size_t t_cnt = m_mat_solvers.size();
|
||||
if (m_parallel())
|
||||
{
|
||||
omp_set_num_threads(3);
|
||||
//omp_set_num_threads(3);
|
||||
//omp_set_dynamic(0);
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp for
|
||||
for (int i = 0; i < t_cnt; i++)
|
||||
if (m_mat_solvers[i]->has_timestep_devices())
|
||||
if (m_mat_solvers[i]->has_timestep_devices() || force_solve)
|
||||
{
|
||||
// Ignore return value
|
||||
ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve();
|
||||
@ -110,7 +110,7 @@ NETLIB_UPDATE(solver)
|
||||
}
|
||||
else
|
||||
for (int i = 0; i < t_cnt; i++)
|
||||
if (m_mat_solvers[i]->has_timestep_devices())
|
||||
if (m_mat_solvers[i]->has_timestep_devices() || force_solve)
|
||||
{
|
||||
// Ignore return value
|
||||
ATTR_UNUSED const netlist_time ts = m_mat_solvers[i]->solve();
|
||||
|
Loading…
Reference in New Issue
Block a user