mirror of
https://github.com/holub/mame
synced 2025-04-24 01:11:11 +03:00
Added 32- and 64-bit population count utilities. Only used in ARM7 core for now. Requires -msse4.2 or -mpopcnt to use CPU feature on x86, requires -mpopcntb to use CPU feature on POWER.
This commit is contained in:
parent
9b7d323a47
commit
6ab19ed5bc
@ -263,15 +263,7 @@ int arm7_cpu_device::storeInc(uint32_t pat, uint32_t rbv, int mode)
|
||||
int arm7_cpu_device::storeDec(uint32_t pat, uint32_t rbv, int mode)
|
||||
{
|
||||
// pre-count the # of registers being stored
|
||||
// TODO[RH]: This is just a popcnt. Consider eminline intrinsic.
|
||||
int result = 0;
|
||||
for (int i = 15; i >= 0; i--)
|
||||
{
|
||||
if ((pat >> i) & 1)
|
||||
{
|
||||
result++;
|
||||
}
|
||||
}
|
||||
int const result = population_count_32(pat & 0x0000ffff);
|
||||
|
||||
// adjust starting address
|
||||
rbv -= (result << 2);
|
||||
|
@ -295,6 +295,69 @@ inline uint8_t count_leading_ones(uint32_t val)
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
population_count_32 - return the number of
|
||||
one bits in a 32-bit value
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef population_count_32
|
||||
inline unsigned population_count_32(uint32_t val)
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
// uses CPU feature if available, otherwise falls back to implementation similar to what follows
|
||||
static_assert(sizeof(val) == sizeof(unsigned));
|
||||
return unsigned(__builtin_popcount(static_cast<unsigned>(val)));
|
||||
#else
|
||||
// optimal Hamming weight assuing fast 32*32->32
|
||||
constexpr uint32_t m1(0x55555555);
|
||||
constexpr uint32_t m2(0x33333333);
|
||||
constexpr uint32_t m4(0x0f0f0f0f);
|
||||
constexpr uint32_t h01(0x01010101);
|
||||
val -= (val >> 1) & m1;
|
||||
val = (val & m2) + ((val >> 2) & m2);
|
||||
val = (val + (val >> 4)) & m4;
|
||||
return unsigned((val * h01) >> 24);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
population_count_64 - return the number of
|
||||
one bits in a 64-bit value
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef population_count_64
|
||||
inline unsigned population_count_64(uint64_t val)
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
// uses CPU feature if available, otherwise falls back to implementation similar to what follows
|
||||
static_assert(sizeof(val) == sizeof(unsigned long long));
|
||||
return unsigned(__builtin_popcountll(static_cast<unsigned long long>(val)));
|
||||
#else
|
||||
// guess that architectures with 64-bit pointers have 64-bit multiplier
|
||||
if (sizeof(void *) >= sizeof(uint64_t))
|
||||
{
|
||||
// optimal Hamming weight assuming fast 64*64->64
|
||||
constexpr uint64_t m1(0x5555555555555555);
|
||||
constexpr uint64_t m2(0x3333333333333333);
|
||||
constexpr uint64_t m4(0x0f0f0f0f0f0f0f0f);
|
||||
constexpr uint64_t h01(0x0101010101010101);
|
||||
val -= (val >> 1) & m1;
|
||||
val = (val & m2) + ((val >> 2) & m2);
|
||||
val = (val + (val >> 4)) & m4;
|
||||
return unsigned((val * h01) >> 56);
|
||||
}
|
||||
else
|
||||
{
|
||||
// fall back to two 32-bit operations to avoid slow multiply
|
||||
return population_count_32(uint32_t(val)) + population_count_32(uint32_t(val >> 32));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
INLINE TIMING FUNCTIONS
|
||||
***************************************************************************/
|
||||
|
Loading…
Reference in New Issue
Block a user