mirror of
https://github.com/holub/mame
synced 2025-04-19 15:11:37 +03:00
-osd: Clean up inline maths utilities.
* Removed inline assembly for operations compilers handle well. * Added ARM and AArch64 implementation for a few operations. * Added unsigned integer add with carry out operations. -cpu/drccache.cpp: Detect whether RWX pages are supported. -dynax.cpp: Improved a few hanafuda DIP switch descriptions.
This commit is contained in:
parent
bb7b375aa6
commit
6e1bbe8be8
50
makefile
50
makefile
@ -362,33 +362,6 @@ WINDRES := $(word 1,$(TOOLCHAIN) i686-w64-mingw32-)windres
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(findstring arm,$(UNAME)),arm)
|
||||
ARCHITECTURE :=
|
||||
ifndef NOASM
|
||||
NOASM := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(findstring aarch64,$(UNAME)),aarch64)
|
||||
ARCHITECTURE :=
|
||||
ifndef NOASM
|
||||
NOASM := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(findstring s390x,$(UNAME)),s390x)
|
||||
ifndef NOASM
|
||||
NOASM := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(findstring riscv64,$(UNAME)),riscv64)
|
||||
ARCHITECTURE :=
|
||||
ifndef NOASM
|
||||
NOASM := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
# Emscripten
|
||||
ifeq ($(findstring emcc,$(CC)),emcc)
|
||||
TARGETOS := asmjs
|
||||
@ -398,27 +371,42 @@ ifndef NOASM
|
||||
endif
|
||||
endif
|
||||
|
||||
# ppc has inline assembly support but no DRC
|
||||
ifeq ($(findstring ppc,$(UNAME)),ppc)
|
||||
ifndef FORCE_DRC_C_BACKEND
|
||||
FORCE_DRC_C_BACKEND := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
# powerpc has inline assembly support but no DRC
|
||||
ifeq ($(findstring powerpc,$(UNAME)),powerpc)
|
||||
ifndef FORCE_DRC_C_BACKEND
|
||||
FORCE_DRC_C_BACKEND := 1
|
||||
FORCE_DRC_C_BACKEND := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
# ARM / ARM64
|
||||
ifeq ($(findstring arm,$(UNAME)),arm)
|
||||
ifndef FORCE_DRC_C_BACKEND
|
||||
FORCE_DRC_C_BACKEND := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(findstring aarch64,$(UNAME)),aarch64)
|
||||
ifndef FORCE_DRC_C_BACKEND
|
||||
FORCE_DRC_C_BACKEND := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(findstring s390x,$(UNAME)),s390x)
|
||||
ifndef FORCE_DRC_C_BACKEND
|
||||
FORCE_DRC_C_BACKEND := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(findstring riscv64,$(UNAME)),riscv64)
|
||||
ifndef FORCE_DRC_C_BACKEND
|
||||
FORCE_DRC_C_BACKEND := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
# Autodetect BIGENDIAN
|
||||
# MacOSX
|
||||
ifndef BIGENDIAN
|
||||
|
@ -398,14 +398,14 @@ void alpha_device::cpu_execute(u32 const op)
|
||||
// register variants
|
||||
case 0x00: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(m_r[Rb(op)]))); break; // mull
|
||||
case 0x20: m_r[Rc(op)] = m_r[Ra(op)] * m_r[Rb(op)]; break; // mulq
|
||||
case 0x30: mulu_64x64(m_r[Ra(op)], m_r[Rb(op)], &m_r[Rc(op)]); break; // umulh
|
||||
case 0x30: mulu_64x64(m_r[Ra(op)], m_r[Rb(op)], m_r[Rc(op)]); break; // umulh
|
||||
case 0x40: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(m_r[Rb(op)]))); break; // mull/v
|
||||
case 0x60: m_r[Rc(op)] = m_r[Ra(op)] * m_r[Rb(op)]; break; // mulq/v
|
||||
|
||||
// immediate variants
|
||||
case 0x80: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(Im(op)))); break; // mull
|
||||
case 0xa0: m_r[Rc(op)] = m_r[Ra(op)] * Im(op); break; // mulq
|
||||
case 0xb0: mulu_64x64(m_r[Ra(op)], Im(op), &m_r[Rc(op)]); break; // umulh
|
||||
case 0xb0: mulu_64x64(m_r[Ra(op)], Im(op), m_r[Rc(op)]); break; // umulh
|
||||
case 0xc0: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(Im(op)))); break; // mull/v
|
||||
case 0xe0: m_r[Rc(op)] = m_r[Ra(op)] * Im(op); break; // mulq/v
|
||||
}
|
||||
|
@ -14,10 +14,6 @@
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
// this improves performance of some emulated systems but doesn't work on W^X hosts
|
||||
//#define MAME_DRC_CACHE_RWX
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T, typename U> constexpr T *ALIGN_PTR_UP(T *p, U align)
|
||||
@ -52,7 +48,8 @@ drc_cache::drc_cache(size_t bytes) :
|
||||
m_end(m_limit),
|
||||
m_codegen(nullptr),
|
||||
m_size(m_cache.size()),
|
||||
m_executable(false)
|
||||
m_executable(false),
|
||||
m_rwx(false)
|
||||
{
|
||||
// alignment and page size must be powers of two, cache must be page-aligned
|
||||
assert(!(CACHE_ALIGNMENT & (CACHE_ALIGNMENT - 1)));
|
||||
@ -63,11 +60,24 @@ drc_cache::drc_cache(size_t bytes) :
|
||||
std::fill(std::begin(m_free), std::end(m_free), nullptr);
|
||||
std::fill(std::begin(m_nearfree), std::end(m_nearfree), nullptr);
|
||||
|
||||
#if defined(MAME_DRC_CACHE_RWX)
|
||||
m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE | osd::virtual_memory_allocation::EXECUTE);
|
||||
#else // defined(MAME_DRC_CACHE_RWX)
|
||||
m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE);
|
||||
#endif // defined(MAME_DRC_CACHE_RWX)
|
||||
if (!m_cache)
|
||||
{
|
||||
throw emu_fatalerror("drc_cache: Error allocating virtual memory");
|
||||
}
|
||||
else if (!m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE))
|
||||
{
|
||||
throw emu_fatalerror("drc_cache: Error marking cache read/write");
|
||||
}
|
||||
else if (m_cache.set_access(m_base - m_near, m_end - m_base, osd::virtual_memory_allocation::READ_WRITE | osd::virtual_memory_allocation::EXECUTE))
|
||||
{
|
||||
osd_printf_verbose("drc_cache: RWX pages supported\n");
|
||||
m_rwx = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
osd_printf_verbose("drc_cache: Using W^X mode\n");
|
||||
m_rwx = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -209,9 +219,8 @@ void drc_cache::codegen_init()
|
||||
{
|
||||
if (m_executable)
|
||||
{
|
||||
#if !defined(MAME_DRC_CACHE_RWX)
|
||||
m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE);
|
||||
#endif // !defined(MAME_DRC_CACHE_RWX)
|
||||
if (!m_rwx)
|
||||
m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE);
|
||||
m_executable = false;
|
||||
}
|
||||
}
|
||||
@ -221,9 +230,8 @@ void drc_cache::codegen_complete()
|
||||
{
|
||||
if (!m_executable)
|
||||
{
|
||||
#if !defined(MAME_DRC_CACHE_RWX)
|
||||
m_cache.set_access(m_base - m_near, ALIGN_PTR_UP(m_top, m_cache.page_size()) - m_base, osd::virtual_memory_allocation::READ_EXECUTE);
|
||||
#endif // !defined(MAME_DRC_CACHE_RWX)
|
||||
if (!m_rwx)
|
||||
m_cache.set_access(m_base - m_near, ALIGN_PTR_UP(m_top, m_cache.page_size()) - m_base, osd::virtual_memory_allocation::READ_EXECUTE);
|
||||
m_executable = true;
|
||||
}
|
||||
}
|
||||
|
@ -94,6 +94,7 @@ private:
|
||||
drccodeptr m_codegen; // start of current generated code block
|
||||
size_t const m_size; // size of the cache in bytes
|
||||
bool m_executable; // whether cached code is currently executable
|
||||
bool m_rwx; // whether pages can be simultaneously writable and executable
|
||||
|
||||
// oob management
|
||||
struct oob_handler
|
||||
|
@ -3561,11 +3561,11 @@ void mips3_device::handle_special(uint32_t op)
|
||||
m_core->icount -= 35;
|
||||
break;
|
||||
case 0x1c: /* DMULT */
|
||||
LOVAL64 = mul_64x64(RSVAL64, RTVAL64, reinterpret_cast<s64 *>(&HIVAL64));
|
||||
LOVAL64 = mul_64x64(RSVAL64, RTVAL64, *reinterpret_cast<s64 *>(&HIVAL64));
|
||||
m_core->icount -= 7;
|
||||
break;
|
||||
case 0x1d: /* DMULTU */
|
||||
LOVAL64 = mulu_64x64(RSVAL64, RTVAL64, &HIVAL64);
|
||||
LOVAL64 = mulu_64x64(RSVAL64, RTVAL64, HIVAL64);
|
||||
m_core->icount -= 7;
|
||||
break;
|
||||
case 0x1e: /* DDIV */
|
||||
|
@ -473,10 +473,10 @@ void r4000_base_device::cpu_execute(u32 const op)
|
||||
}
|
||||
break;
|
||||
case 0x1c: // DMULT
|
||||
m_lo = mul_64x64(m_r[RSREG], m_r[RTREG], reinterpret_cast<s64 *>(&m_hi));
|
||||
m_lo = mul_64x64(m_r[RSREG], m_r[RTREG], *reinterpret_cast<s64 *>(&m_hi));
|
||||
break;
|
||||
case 0x1d: // DMULTU
|
||||
m_lo = mulu_64x64(m_r[RSREG], m_r[RTREG], &m_hi);
|
||||
m_lo = mulu_64x64(m_r[RSREG], m_r[RTREG], m_hi);
|
||||
break;
|
||||
case 0x1e: // DDIV
|
||||
if (m_r[RTREG])
|
||||
|
@ -40,17 +40,17 @@ attotime &attotime::operator*=(u32 factor)
|
||||
|
||||
// split attoseconds into upper and lower halves which fit into 32 bits
|
||||
u32 attolo;
|
||||
u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, &attolo);
|
||||
u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, attolo);
|
||||
|
||||
// scale the lower half, then split into high/low parts
|
||||
u64 temp = mulu_32x32(attolo, factor);
|
||||
u32 reslo;
|
||||
temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, &reslo);
|
||||
temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, reslo);
|
||||
|
||||
// scale the upper half, then split into high/low parts
|
||||
temp += mulu_32x32(attohi, factor);
|
||||
u32 reshi;
|
||||
temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, &reshi);
|
||||
temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, reshi);
|
||||
|
||||
// scale the seconds
|
||||
temp += mulu_32x32(m_seconds, factor);
|
||||
@ -80,19 +80,19 @@ attotime &attotime::operator/=(u32 factor)
|
||||
|
||||
// split attoseconds into upper and lower halves which fit into 32 bits
|
||||
u32 attolo;
|
||||
u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, &attolo);
|
||||
u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, attolo);
|
||||
|
||||
// divide the seconds and get the remainder
|
||||
u32 remainder;
|
||||
m_seconds = divu_64x32_rem(m_seconds, factor, &remainder);
|
||||
m_seconds = divu_64x32_rem(m_seconds, factor, remainder);
|
||||
|
||||
// combine the upper half of attoseconds with the remainder and divide that
|
||||
u64 temp = s64(attohi) + mulu_32x32(remainder, ATTOSECONDS_PER_SECOND_SQRT);
|
||||
u32 reshi = divu_64x32_rem(temp, factor, &remainder);
|
||||
u32 reshi = divu_64x32_rem(temp, factor, remainder);
|
||||
|
||||
// combine the lower half of attoseconds with the remainder and divide that
|
||||
temp = attolo + mulu_32x32(remainder, ATTOSECONDS_PER_SECOND_SQRT);
|
||||
u32 reslo = divu_64x32_rem(temp, factor, &remainder);
|
||||
u32 reslo = divu_64x32_rem(temp, factor, remainder);
|
||||
|
||||
// round based on the remainder
|
||||
m_attoseconds = (attoseconds_t)reslo + mulu_32x32(reshi, ATTOSECONDS_PER_SECOND_SQRT);
|
||||
@ -142,7 +142,7 @@ const char *attotime::as_string(int precision) const
|
||||
else
|
||||
{
|
||||
u32 lower;
|
||||
u32 upper = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, &lower);
|
||||
u32 upper = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, lower);
|
||||
int temp = precision;
|
||||
while (temp < 18)
|
||||
{
|
||||
|
@ -357,7 +357,7 @@ inline attotime attotime::from_ticks(u64 ticks, u32 frequency)
|
||||
return attotime(0, ticks * attos_per_tick);
|
||||
|
||||
u32 remainder;
|
||||
s32 secs = divu_64x32_rem(ticks, frequency, &remainder);
|
||||
s32 secs = divu_64x32_rem(ticks, frequency, remainder);
|
||||
return attotime(secs, u64(remainder) * attos_per_tick);
|
||||
}
|
||||
else
|
||||
|
@ -437,7 +437,7 @@ attotime device_t::clocks_to_attotime(u64 numclocks) const noexcept
|
||||
else
|
||||
{
|
||||
u32 remainder;
|
||||
u32 quotient = divu_64x32_rem(numclocks, m_clock, &remainder);
|
||||
u32 quotient = divu_64x32_rem(numclocks, m_clock, remainder);
|
||||
return attotime(quotient, u64(remainder) * u64(m_attoseconds_per_clock));
|
||||
}
|
||||
}
|
||||
|
@ -518,7 +518,7 @@ void device_scheduler::timeslice()
|
||||
else
|
||||
{
|
||||
u32 remainder;
|
||||
s32 secs = divu_64x32_rem(ran, exec->m_cycles_per_second, &remainder);
|
||||
s32 secs = divu_64x32_rem(ran, exec->m_cycles_per_second, remainder);
|
||||
deltatime = attotime(secs, u64(remainder) * exec->m_attoseconds_per_cycle);
|
||||
}
|
||||
assert(deltatime >= attotime::zero);
|
||||
|
@ -475,13 +475,13 @@ void validity_checker::validate_inlines()
|
||||
if (resultu32 != expectedu32)
|
||||
osd_printf_error("Error testing divu_64x32 (%16X / %08X) = %08X (expected %08X)\n", u64(testu64a), u32(testu32a), resultu32, expectedu32);
|
||||
|
||||
resulti32 = div_64x32_rem(testi64a, testi32a, &remainder);
|
||||
resulti32 = div_64x32_rem(testi64a, testi32a, remainder);
|
||||
expectedi32 = testi64a / s64(testi32a);
|
||||
expremainder = testi64a % s64(testi32a);
|
||||
if (resulti32 != expectedi32 || remainder != expremainder)
|
||||
osd_printf_error("Error testing div_64x32_rem (%16X / %08X) = %08X,%08X (expected %08X,%08X)\n", s64(testi64a), s32(testi32a), resulti32, remainder, expectedi32, expremainder);
|
||||
|
||||
resultu32 = divu_64x32_rem(testu64a, testu32a, &uremainder);
|
||||
resultu32 = divu_64x32_rem(testu64a, testu32a, uremainder);
|
||||
expectedu32 = testu64a / u64(testu32a);
|
||||
expuremainder = testu64a % u64(testu32a);
|
||||
if (resultu32 != expectedu32 || uremainder != expuremainder)
|
||||
|
@ -1712,8 +1712,7 @@ INPUT_PORTS_START( HANAFUDA_KEYS_BET )
|
||||
PORT_BIT( 0x20, IP_ACTIVE_LOW, IPT_MAHJONG_SMALL ) PORT_PLAYER(2) // "s"
|
||||
INPUT_PORTS_END
|
||||
|
||||
#ifdef UNREFERENCED_CODE
|
||||
static INPUT_PORTS_START( HANAFUDA_KEYS_BET_ALT )
|
||||
[[maybe_unused]] static INPUT_PORTS_START( HANAFUDA_KEYS_BET_ALT )
|
||||
PORT_START("KEY0")
|
||||
PORT_BIT( 0x01, IP_ACTIVE_LOW, IPT_HANAFUDA_A ) PORT_PLAYER(1)
|
||||
PORT_BIT( 0x02, IP_ACTIVE_LOW, IPT_HANAFUDA_E ) PORT_PLAYER(1)
|
||||
@ -1798,7 +1797,6 @@ static INPUT_PORTS_START( HANAFUDA_KEYS_BET_ALT )
|
||||
PORT_BIT( 0x40, IP_ACTIVE_LOW, IPT_UNKNOWN )
|
||||
PORT_BIT( 0x80, IP_ACTIVE_LOW, IPT_UNKNOWN )
|
||||
INPUT_PORTS_END
|
||||
#endif
|
||||
|
||||
static INPUT_PORTS_START( cdracula )
|
||||
PORT_START("P1")
|
||||
@ -1961,9 +1959,9 @@ static INPUT_PORTS_START( hnkochou )
|
||||
PORT_DIPNAME( 0x10, 0x10, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 1:5" )
|
||||
PORT_DIPSETTING( 0x10, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
PORT_DIPNAME( 0x20, 0x20, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 1:6" )
|
||||
PORT_DIPSETTING( 0x20, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
PORT_DIPNAME( 0x20, 0x20, "Gokou Odds" ) PORT_DIPLOCATION( "DIPSW 1:6" )
|
||||
PORT_DIPSETTING( 0x20, "100" )
|
||||
PORT_DIPSETTING( 0x00, "200" )
|
||||
PORT_DIPNAME( 0x40, 0x40, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 1:7" )
|
||||
PORT_DIPSETTING( 0x40, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
@ -1972,16 +1970,15 @@ static INPUT_PORTS_START( hnkochou )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
|
||||
PORT_START("DSW1")
|
||||
PORT_DIPNAME( 0x01, 0x01, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:1" )
|
||||
PORT_DIPSETTING( 0x01, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
PORT_DIPNAME( 0x02, 0x02, "Stage Select" ) PORT_DIPLOCATION( "DIPSW 2:2" )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( No ) )
|
||||
PORT_DIPSETTING( 0x02, DEF_STR( Yes ) )
|
||||
PORT_DIPNAME( 0x04, 0x04, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:3" )
|
||||
PORT_DIPNAME( 0x03, 0x03, "Game Mode" ) PORT_DIPLOCATION( "DIPSW 2:1,2" )
|
||||
PORT_DIPSETTING( 0x03, "A (Stage Select)" ) // stage select, gal re-dresses if player loses
|
||||
PORT_DIPSETTING( 0x02, "B" ) // no stage select, gal doesn't re-dress if player loses
|
||||
PORT_DIPSETTING( 0x01, "C" ) // no stage select, gal re-dresses if player loses
|
||||
PORT_DIPSETTING( 0x00, "D (Gals Off)" ) // no "show time" on win, gals still shown in attract mode
|
||||
PORT_DIPNAME( 0x04, 0x04, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:3" ) // possibly difficulty/pay rate?
|
||||
PORT_DIPSETTING( 0x04, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
PORT_DIPNAME( 0x08, 0x08, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:4" )
|
||||
PORT_DIPNAME( 0x08, 0x08, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:4" ) // possibly difficulty/pay rate?
|
||||
PORT_DIPSETTING( 0x08, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
PORT_DIPNAME( 0x10, 0x10, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:5" )
|
||||
@ -1990,9 +1987,9 @@ static INPUT_PORTS_START( hnkochou )
|
||||
PORT_DIPNAME( 0x20, 0x20, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:6" )
|
||||
PORT_DIPSETTING( 0x20, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
PORT_DIPNAME( 0x40, 0x40, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:7" )
|
||||
PORT_DIPSETTING( 0x40, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
PORT_DIPNAME( 0x40, 0x40, "Suggest Move" ) PORT_DIPLOCATION( "DIPSW 2:7" )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( No ) )
|
||||
PORT_DIPSETTING( 0x40, DEF_STR( Yes ) )
|
||||
PORT_DIPNAME( 0x80, 0x80, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:8" )
|
||||
PORT_DIPSETTING( 0x80, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
@ -2170,10 +2167,10 @@ static INPUT_PORTS_START( hjingi )
|
||||
PORT_DIPNAME( 0x10, 0x10, "Double-Up Game Rate" ) PORT_DIPLOCATION( "DIP2:5" )
|
||||
PORT_DIPSETTING( 0x10, DEF_STR( High ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( Low ) )
|
||||
PORT_DIPNAME( 0x20, 0x20, "GOKOU Odds" ) PORT_DIPLOCATION( "DIP2:6" )
|
||||
PORT_DIPNAME( 0x20, 0x20, "Gokou Odds" ) PORT_DIPLOCATION( "DIP2:6" )
|
||||
PORT_DIPSETTING( 0x20, "100" )
|
||||
PORT_DIPSETTING( 0x00, "200" )
|
||||
PORT_DIPNAME( 0x40, 0x40, "GOKOU Cut" ) PORT_DIPLOCATION( "DIP2:7" )
|
||||
PORT_DIPNAME( 0x40, 0x40, "Gokou Cut" ) PORT_DIPLOCATION( "DIP2:7" )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( No ) )
|
||||
PORT_DIPSETTING( 0x40, DEF_STR( Yes ) )
|
||||
PORT_DIPNAME( 0x80, 0x80, "3-Renchan Bonus" ) PORT_DIPLOCATION( "DIP2:8" )
|
||||
@ -2623,7 +2620,7 @@ static INPUT_PORTS_START( hanayara )
|
||||
PORT_DIPNAME( 0x20, 0x20, "Choose Bonus (Cheat)") PORT_DIPLOCATION( "DIP2:6" )
|
||||
PORT_DIPSETTING( 0x20, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
PORT_DIPNAME( 0x40, 0x40, "Unknown 2-6" ) PORT_DIPLOCATION( "DIP2:7" )
|
||||
PORT_DIPNAME( 0x40, 0x40, "Show All Bonus Cards") PORT_DIPLOCATION( "DIP2:7" )
|
||||
PORT_DIPSETTING( 0x40, DEF_STR( Off ) )
|
||||
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
|
||||
PORT_DIPNAME( 0x80, 0x80, DEF_STR( Service_Mode ) ) PORT_DIPLOCATION( "DIP2:8" )
|
||||
|
@ -281,7 +281,7 @@ INPUT_PORTS_START(kaypro_keyboard_typewriter)
|
||||
PORT_BIT(0x04, IP_ACTIVE_LOW, IPT_KEYBOARD) PORT_CODE(KEYCODE_LSHIFT) PORT_CODE(KEYCODE_RSHIFT) PORT_CHAR(UCHAR_SHIFT_1) PORT_NAME("SHIFT")
|
||||
INPUT_PORTS_END
|
||||
|
||||
INPUT_PORTS_START(kaypro_keyboard_bitshift)
|
||||
[[maybe_unused]] INPUT_PORTS_START(kaypro_keyboard_bitshift)
|
||||
PORT_INCLUDE(kaypro_keyboard_typewriter)
|
||||
|
||||
PORT_MODIFY("ROW.2")
|
||||
@ -366,7 +366,6 @@ void kaypro_10_keyboard_device::device_add_mconfig(machine_config &config)
|
||||
|
||||
ioport_constructor kaypro_10_keyboard_device::device_input_ports() const
|
||||
{
|
||||
(void)&INPUT_PORTS_NAME(kaypro_keyboard_bitshift);
|
||||
return INPUT_PORTS_NAME(kaypro_keyboard_typewriter);
|
||||
}
|
||||
|
||||
|
285
src/osd/eigccarm.h
Normal file
285
src/osd/eigccarm.h
Normal file
@ -0,0 +1,285 @@
|
||||
// license:BSD-3-Clause
|
||||
// copyright-holders:Vas Crabb
|
||||
/***************************************************************************
|
||||
|
||||
eigccarm.h
|
||||
|
||||
ARM/AArch64 inline implementations for GCC compilers. This code is
|
||||
automatically included if appropriate by eminline.h.
|
||||
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef MAME_OSD_EIGCCARM_H
|
||||
#define MAME_OSD_EIGCCARM_H
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
INLINE MATH FUNCTIONS
|
||||
***************************************************************************/
|
||||
|
||||
/*-------------------------------------------------
|
||||
mul_32x32 - perform a signed 32 bit x 32 bit
|
||||
multiply and return the full 64 bit result
|
||||
-------------------------------------------------*/
|
||||
|
||||
// GCC can do a good job of this.
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
mulu_32x32 - perform an unsigned 32 bit x
|
||||
32 bit multiply and return the full 64 bit
|
||||
result
|
||||
-------------------------------------------------*/
|
||||
|
||||
// GCC can do a good job of this
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
mul_32x32_hi - perform a signed 32 bit x 32 bit
|
||||
multiply and return the upper 32 bits of the
|
||||
result
|
||||
-------------------------------------------------*/
|
||||
|
||||
// GCC can do a good job of this
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
mulu_32x32_hi - perform an unsigned 32 bit x
|
||||
32 bit multiply and return the upper 32 bits
|
||||
of the result
|
||||
-------------------------------------------------*/
|
||||
|
||||
// GCC can do a good job of this
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
mul_32x32_shift - perform a signed 32 bit x
|
||||
32 bit multiply and shift the result by the
|
||||
given number of bits before truncating the
|
||||
result to 32 bits
|
||||
-------------------------------------------------*/
|
||||
|
||||
#if !defined(__aarch64__)
|
||||
#define mul_32x32_shift _mul_32x32_shift
|
||||
inline int32_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift)
|
||||
{
|
||||
uint32_t l, h;
|
||||
|
||||
__asm__ (
|
||||
" smull %[l], %[h], %[val1], %[val2] \n"
|
||||
: [l] "=r" (l)
|
||||
, [h] "=r" (h)
|
||||
: [val1] "%r" (val1)
|
||||
, [val2] "r" (val2)
|
||||
);
|
||||
|
||||
// Valid for (0 <= shift <= 31)
|
||||
return int32_t((l >> shift) | (h << (32 - shift)));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
mulu_32x32_shift - perform an unsigned 32 bit x
|
||||
32 bit multiply and shift the result by the
|
||||
given number of bits before truncating the
|
||||
result to 32 bits
|
||||
-------------------------------------------------*/
|
||||
|
||||
#if !defined(__aarch64__)
|
||||
#define mulu_32x32_shift _mulu_32x32_shift
|
||||
inline uint32_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
{
|
||||
uint32_t l, h;
|
||||
|
||||
__asm__ (
|
||||
" umull %[l], %[h], %[val1], %[val2] \n"
|
||||
: [l] "=r" (l)
|
||||
, [h] "=r" (h)
|
||||
: [val1] "%r" (val1)
|
||||
, [val2] "r" (val2)
|
||||
);
|
||||
|
||||
// Valid for (0 <= shift <= 31)
|
||||
return (l >> shift) | (h << (32 - shift));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
div_64x32 - perform a signed 64 bit x 32 bit
|
||||
divide and return the 32 bit quotient
|
||||
-------------------------------------------------*/
|
||||
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
divu_64x32 - perform an unsigned 64 bit x 32 bit
|
||||
divide and return the 32 bit quotient
|
||||
-------------------------------------------------*/
|
||||
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
div_64x32_rem - perform a signed 64 bit x 32
|
||||
bit divide and return the 32 bit quotient and
|
||||
32 bit remainder
|
||||
-------------------------------------------------*/
|
||||
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
divu_64x32_rem - perform an unsigned 64 bit x
|
||||
32 bit divide and return the 32 bit quotient
|
||||
and 32 bit remainder
|
||||
-------------------------------------------------*/
|
||||
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
div_32x32_shift - perform a signed divide of
|
||||
two 32 bit values, shifting the first before
|
||||
division, and returning the 32 bit quotient
|
||||
-------------------------------------------------*/
|
||||
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
divu_32x32_shift - perform an unsigned divide of
|
||||
two 32 bit values, shifting the first before
|
||||
division, and returning the 32 bit quotient
|
||||
-------------------------------------------------*/
|
||||
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
mod_64x32 - perform a signed 64 bit x 32 bit
|
||||
divide and return the 32 bit remainder
|
||||
-------------------------------------------------*/
|
||||
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
modu_64x32 - perform an unsigned 64 bit x 32 bit
|
||||
divide and return the 32 bit remainder
|
||||
-------------------------------------------------*/
|
||||
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
recip_approx - compute an approximate floating
|
||||
point reciprocal
|
||||
-------------------------------------------------*/
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#define recip_approx _recip_approx
|
||||
inline float ATTR_CONST ATTR_FORCE_INLINE
|
||||
_recip_approx(float value)
|
||||
{
|
||||
float result;
|
||||
|
||||
__asm__ (
|
||||
" frecpe %s[result], %s[value] \n"
|
||||
: [result] "=w" (result)
|
||||
: [value] "w" (value)
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
mul_64x64 - perform a signed 64 bit x 64 bit
|
||||
multiply and return the full 128 bit result
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifdef __aarch64__
|
||||
#define mul_64x64 _mul_64x64
|
||||
inline int64_t ATTR_FORCE_INLINE
|
||||
_mul_64x64(int64_t a, int64_t b, int64_t &hi)
|
||||
{
|
||||
__int128 const r(__int128(a) * b);
|
||||
hi = int64_t(uint64_t((unsigned __int128)r >> 64));
|
||||
return int64_t(uint64_t((unsigned __int128)r));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
mulu_64x64 - perform an unsigned 64 bit x 64
|
||||
bit multiply and return the full 128 bit result
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifdef __aarch64__
|
||||
#define mulu_64x64 _mulu_64x64
|
||||
inline uint64_t ATTR_FORCE_INLINE
|
||||
_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi)
|
||||
{
|
||||
unsigned __int128 const r((unsigned __int128)a * b);
|
||||
hi = uint64_t(r >> 64);
|
||||
return uint64_t(r);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
INLINE BIT MANIPULATION FUNCTIONS
|
||||
***************************************************************************/
|
||||
|
||||
/*-------------------------------------------------
|
||||
count_leading_zeros - return the number of
|
||||
leading zero bits in a 32-bit value
|
||||
-------------------------------------------------*/
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#define count_leading_zeros _count_leading_zeros
|
||||
inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_count_leading_zeros(uint32_t value)
|
||||
{
|
||||
uint32_t result;
|
||||
|
||||
__asm__ (
|
||||
" clz %w[result], %w[value] \n"
|
||||
: [result] "=r" (result)
|
||||
: [value] "r" (value)
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
count_leading_ones - return the number of
|
||||
leading one bits in a 32-bit value
|
||||
-------------------------------------------------*/
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#define count_leading_ones _count_leading_ones
|
||||
inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_count_leading_ones(uint32_t value)
|
||||
{
|
||||
uint32_t result;
|
||||
|
||||
__asm__ (
|
||||
" clz %w[result], %w[value] \n"
|
||||
: [result] "=r" (result)
|
||||
: [value] "r" (~value)
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MAME_OSD_EIGCCARM_H
|
@ -22,7 +22,7 @@
|
||||
multiply and return the full 64 bit result
|
||||
-------------------------------------------------*/
|
||||
|
||||
/* GCC can do a good job of this. */
|
||||
// GCC can do a good job of this.
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -31,7 +31,7 @@
|
||||
result
|
||||
-------------------------------------------------*/
|
||||
|
||||
/* GCC can do a good job of this */
|
||||
// GCC can do a good job of this
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -40,21 +40,7 @@
|
||||
result
|
||||
-------------------------------------------------*/
|
||||
|
||||
#define mul_32x32_hi _mul_32x32_hi
|
||||
static inline int32_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_mul_32x32_hi(int32_t val1, int32_t val2)
|
||||
{
|
||||
int32_t result;
|
||||
|
||||
__asm__ (
|
||||
" mulhw %[result], %[val1], %[val2] \n"
|
||||
: [result] "=r" (result)
|
||||
: [val1] "%r" (val1)
|
||||
, [val2] "r" (val2)
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
// GCC can do a good job of this
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -63,21 +49,7 @@ _mul_32x32_hi(int32_t val1, int32_t val2)
|
||||
of the result
|
||||
-------------------------------------------------*/
|
||||
|
||||
#define mulu_32x32_hi _mulu_32x32_hi
|
||||
static inline uint32_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_mulu_32x32_hi(uint32_t val1, uint32_t val2)
|
||||
{
|
||||
uint32_t result;
|
||||
|
||||
__asm__ (
|
||||
" mulhwu %[result], %[val1], %[val2] \n"
|
||||
: [result] "=r" (result)
|
||||
: [val1] "%r" (val1)
|
||||
, [val2] "r" (val2)
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
// GCC can do a good job of this
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -89,27 +61,22 @@ _mulu_32x32_hi(uint32_t val1, uint32_t val2)
|
||||
|
||||
#if !defined(__ppc64__) && !defined(__PPC64__) && !defined(_ARCH_PPC64)
|
||||
#define mul_32x32_shift _mul_32x32_shift
|
||||
static inline int32_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
inline int32_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift)
|
||||
{
|
||||
int32_t result;
|
||||
uint32_t l, h;
|
||||
|
||||
/* Valid for (0 <= shift <= 32) */
|
||||
__asm__ (
|
||||
" mullw %[result], %[val1], %[val2] \n"
|
||||
" mulhw %[val1], %[val1], %[val2] \n"
|
||||
" srw %[result], %[result], %[shift] \n"
|
||||
" subfic %[shift], %[shift], 0x20 \n"
|
||||
" slw %[val1], %[val1], %[shift] \n"
|
||||
" or %[result], %[result], %[val1] \n"
|
||||
: [result] "=&r" (result)
|
||||
, [shift] "+r" (shift)
|
||||
, [val1] "+r" (val1)
|
||||
: [val2] "r" (val2)
|
||||
: "xer"
|
||||
" mullw %[l], %[val1], %[val2] \n"
|
||||
" mulhw %[h], %[val1], %[val2] \n"
|
||||
: [l] "=&r" (l)
|
||||
, [h] "=r" (h)
|
||||
: [val1] "%r" (val1)
|
||||
, [val2] "r" (val2)
|
||||
);
|
||||
|
||||
return result;
|
||||
// Valid for (0 <= shift <= 31)
|
||||
return int32_t((l >> shift) | (h << (32 - shift)));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -123,27 +90,22 @@ _mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift)
|
||||
|
||||
#if !defined(__ppc64__) && !defined(__PPC64__) && !defined(_ARCH_PPC64)
|
||||
#define mulu_32x32_shift _mulu_32x32_shift
|
||||
static inline uint32_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
inline uint32_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
{
|
||||
uint32_t result;
|
||||
uint32_t l, h;
|
||||
|
||||
/* Valid for (0 <= shift <= 32) */
|
||||
__asm__ (
|
||||
" mullw %[result], %[val1], %[val2] \n"
|
||||
" mulhwu %[val1], %[val1], %[val2] \n"
|
||||
" srw %[result], %[result], %[shift] \n"
|
||||
" subfic %[shift], %[shift], 0x20 \n"
|
||||
" slw %[val1], %[val1], %[shift] \n"
|
||||
" or %[result], %[result], %[val1] \n"
|
||||
: [result] "=&r" (result)
|
||||
, [shift] "+r" (shift)
|
||||
, [val1] "+r" (val1)
|
||||
: [val2] "r" (val2)
|
||||
: "xer"
|
||||
" mullw %[l], %[val1], %[val2] \n"
|
||||
" mulhwu %[h], %[val1], %[val2] \n"
|
||||
: [l] "=&r" (l)
|
||||
, [h] "=r" (h)
|
||||
: [val1] "%r" (val1)
|
||||
, [val2] "r" (val2)
|
||||
);
|
||||
|
||||
return result;
|
||||
// Valid for (0 <= shift <= 31)
|
||||
return (l >> shift) | (h << (32 - shift));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -153,7 +115,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
divide and return the 32 bit quotient
|
||||
-------------------------------------------------*/
|
||||
|
||||
/* TBD */
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -161,7 +123,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
divide and return the 32 bit quotient
|
||||
-------------------------------------------------*/
|
||||
|
||||
/* TBD */
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -170,7 +132,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
32 bit remainder
|
||||
-------------------------------------------------*/
|
||||
|
||||
/* TBD */
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -179,7 +141,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
and 32 bit remainder
|
||||
-------------------------------------------------*/
|
||||
|
||||
/* TBD */
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -188,7 +150,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
division, and returning the 32 bit quotient
|
||||
-------------------------------------------------*/
|
||||
|
||||
/* TBD */
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -197,7 +159,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
division, and returning the 32 bit quotient
|
||||
-------------------------------------------------*/
|
||||
|
||||
/* TBD */
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -205,7 +167,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
divide and return the 32 bit remainder
|
||||
-------------------------------------------------*/
|
||||
|
||||
/* TBD */
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -213,7 +175,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
divide and return the 32 bit remainder
|
||||
-------------------------------------------------*/
|
||||
|
||||
/* TBD */
|
||||
// TBD
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -222,7 +184,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#define recip_approx _recip_approx
|
||||
static inline float ATTR_CONST ATTR_FORCE_INLINE
|
||||
inline float ATTR_CONST ATTR_FORCE_INLINE
|
||||
_recip_approx(float value)
|
||||
{
|
||||
float result;
|
||||
@ -237,6 +199,40 @@ _recip_approx(float value)
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
mul_64x64 - perform a signed 64 bit x 64 bit
|
||||
multiply and return the full 128 bit result
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifdef __ppc64__
|
||||
#define mul_64x64 _mul_64x64
|
||||
inline int64_t ATTR_FORCE_INLINE
|
||||
_mul_64x64(int64_t a, int64_t b, int64_t &hi)
|
||||
{
|
||||
__int128 const r(__int128(a) * b);
|
||||
hi = int64_t(uint64_t((unsigned __int128)r >> 64));
|
||||
return int64_t(uint64_t((unsigned __int128)r));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
mulu_64x64 - perform an unsigned 64 bit x 64
|
||||
bit multiply and return the full 128 bit result
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifdef __ppc64__
|
||||
#define mulu_64x64 _mulu_64x64
|
||||
inline uint64_t ATTR_FORCE_INLINE
|
||||
_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi)
|
||||
{
|
||||
unsigned __int128 const r((unsigned __int128)a * b);
|
||||
hi = uint64_t(r >> 64);
|
||||
return uint64_t(r);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
INLINE BIT MANIPULATION FUNCTIONS
|
||||
@ -248,15 +244,15 @@ _recip_approx(float value)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#define count_leading_zeros _count_leading_zeros
|
||||
static inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_count_leading_zeros(uint32_t value)
|
||||
{
|
||||
uint32_t result;
|
||||
|
||||
__asm__ (
|
||||
" cntlzw %[result], %[value] \n"
|
||||
: [result] "=r" (result) /* result can be in any register */
|
||||
: [value] "r" (value) /* 'value' can be in any register */
|
||||
: [result] "=r" (result)
|
||||
: [value] "r" (value)
|
||||
);
|
||||
|
||||
return result;
|
||||
@ -269,15 +265,15 @@ _count_leading_zeros(uint32_t value)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#define count_leading_ones _count_leading_ones
|
||||
static inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_count_leading_ones(uint32_t value)
|
||||
{
|
||||
uint32_t result;
|
||||
|
||||
__asm__ (
|
||||
" cntlzw %[result], %[result] \n"
|
||||
: [result] "=r" (result) /* result can be in any register */
|
||||
: [value] "r" (~value) /* 'value' can be in any register */
|
||||
" cntlzw %[result], %[value] \n"
|
||||
: [result] "=r" (result)
|
||||
: [value] "r" (~value)
|
||||
);
|
||||
|
||||
return result;
|
||||
|
@ -31,22 +31,7 @@
|
||||
multiply and return the full 64 bit result
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef __x86_64__
|
||||
#define mul_32x32 _mul_32x32
|
||||
inline int64_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_mul_32x32(int32_t a, int32_t b)
|
||||
{
|
||||
int64_t result;
|
||||
__asm__ (
|
||||
" imull %[b] ;"
|
||||
: [result] "=A" (result) // result in edx:eax
|
||||
: [a] "%a" (a) // 'a' should also be in eax on entry
|
||||
, [b] "rm" (b) // 'b' can be memory or register
|
||||
: "cc" // Clobbers condition codes
|
||||
);
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
// GCC can do a good job of this.
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -55,22 +40,7 @@ _mul_32x32(int32_t a, int32_t b)
|
||||
result
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef __x86_64__
|
||||
#define mulu_32x32 _mulu_32x32
|
||||
inline uint64_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_mulu_32x32(uint32_t a, uint32_t b)
|
||||
{
|
||||
uint64_t result;
|
||||
__asm__ (
|
||||
" mull %[b] ;"
|
||||
: [result] "=A" (result) // result in edx:eax
|
||||
: [a] "%a" (a) // 'a' should also be in eax on entry
|
||||
, [b] "rm" (b) // 'b' can be memory or register
|
||||
: "cc" // Clobbers condition codes
|
||||
);
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
// GCC can do a good job of this.
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -79,21 +49,7 @@ _mulu_32x32(uint32_t a, uint32_t b)
|
||||
result
|
||||
-------------------------------------------------*/
|
||||
|
||||
#define mul_32x32_hi _mul_32x32_hi
|
||||
inline int32_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_mul_32x32_hi(int32_t a, int32_t b)
|
||||
{
|
||||
int32_t result, temp;
|
||||
__asm__ (
|
||||
" imull %[b] ;"
|
||||
: [result] "=d" (result) // result in edx
|
||||
, [temp] "=a" (temp) // This is effectively a clobber
|
||||
: [a] "a" (a) // 'a' should be in eax on entry
|
||||
, [b] "rm" (b) // 'b' can be memory or register
|
||||
: "cc" // Clobbers condition codes
|
||||
);
|
||||
return result;
|
||||
}
|
||||
// GCC can do a good job of this.
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -102,21 +58,7 @@ _mul_32x32_hi(int32_t a, int32_t b)
|
||||
of the result
|
||||
-------------------------------------------------*/
|
||||
|
||||
#define mulu_32x32_hi _mulu_32x32_hi
|
||||
inline uint32_t ATTR_CONST ATTR_FORCE_INLINE
|
||||
_mulu_32x32_hi(uint32_t a, uint32_t b)
|
||||
{
|
||||
uint32_t result, temp;
|
||||
__asm__ (
|
||||
" mull %[b] ;"
|
||||
: [result] "=d" (result) // result in edx
|
||||
, [temp] "=a" (temp) // This is effectively a clobber
|
||||
: [a] "a" (a) // 'a' should be in eax on entry
|
||||
, [b] "rm" (b) // 'b' can be memory or register
|
||||
: "cc" // Clobbers condition codes
|
||||
);
|
||||
return result;
|
||||
}
|
||||
// GCC can do a good job of this.
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -241,21 +183,19 @@ _divu_64x32(uint64_t a, uint32_t b)
|
||||
|
||||
#define div_64x32_rem _div_64x32_rem
|
||||
inline int32_t ATTR_FORCE_INLINE
|
||||
_div_64x32_rem(int64_t dividend, int32_t divisor, int32_t *remainder)
|
||||
_div_64x32_rem(int64_t dividend, int32_t divisor, int32_t &remainder)
|
||||
{
|
||||
int32_t quotient;
|
||||
#ifndef __x86_64__
|
||||
|
||||
// Throws arithmetic exception if result doesn't fit in 32 bits
|
||||
__asm__ (
|
||||
" idivl %[divisor] ;"
|
||||
: [result] "=a" (quotient) // quotient ends up in eax
|
||||
, [remainder] "=d" (*remainder) // remainder ends up in edx
|
||||
, [remainder] "=d" (remainder) // remainder ends up in edx
|
||||
: [dividend] "A" (dividend) // 'dividend' in edx:eax
|
||||
, [divisor] "rm" (divisor) // 'divisor' in register or memory
|
||||
: "cc" // clobbers condition codes
|
||||
);
|
||||
|
||||
#else
|
||||
int32_t const divh{ int32_t(uint32_t(uint64_t(dividend) >> 32)) };
|
||||
int32_t const divl{ int32_t(uint32_t(uint64_t(dividend))) };
|
||||
@ -264,13 +204,12 @@ _div_64x32_rem(int64_t dividend, int32_t divisor, int32_t *remainder)
|
||||
__asm__ (
|
||||
" idivl %[divisor] ;"
|
||||
: [result] "=a" (quotient) // quotient ends up in eax
|
||||
, [remainder] "=d" (*remainder) // remainder ends up in edx
|
||||
, [remainder] "=d" (remainder) // remainder ends up in edx
|
||||
: [divl] "a" (divl) // 'dividend' in edx:eax
|
||||
, [divh] "d" (divh)
|
||||
, [divisor] "rm" (divisor) // 'divisor' in register or memory
|
||||
: "cc" // clobbers condition codes
|
||||
);
|
||||
|
||||
#endif
|
||||
return quotient;
|
||||
}
|
||||
@ -284,21 +223,19 @@ _div_64x32_rem(int64_t dividend, int32_t divisor, int32_t *remainder)
|
||||
|
||||
#define divu_64x32_rem _divu_64x32_rem
|
||||
inline uint32_t ATTR_FORCE_INLINE
|
||||
_divu_64x32_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder)
|
||||
_divu_64x32_rem(uint64_t dividend, uint32_t divisor, uint32_t &remainder)
|
||||
{
|
||||
uint32_t quotient;
|
||||
#ifndef __x86_64__
|
||||
|
||||
// Throws arithmetic exception if result doesn't fit in 32 bits
|
||||
__asm__ (
|
||||
" divl %[divisor] ;"
|
||||
: [result] "=a" (quotient) // quotient ends up in eax
|
||||
, [remainder] "=d" (*remainder) // remainder ends up in edx
|
||||
, [remainder] "=d" (remainder) // remainder ends up in edx
|
||||
: [dividend] "A" (dividend) // 'dividend' in edx:eax
|
||||
, [divisor] "rm" (divisor) // 'divisor' in register or memory
|
||||
: "cc" // clobbers condition codes
|
||||
);
|
||||
|
||||
#else
|
||||
uint32_t const divh{ uint32_t(dividend >> 32) };
|
||||
uint32_t const divl{ uint32_t(dividend) };
|
||||
@ -307,7 +244,7 @@ _divu_64x32_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder)
|
||||
__asm__ (
|
||||
" divl %[divisor] ;"
|
||||
: [result] "=a" (quotient) // quotient ends up in eax
|
||||
, [remainder] "=d" (*remainder) // remainder ends up in edx
|
||||
, [remainder] "=d" (remainder) // remainder ends up in edx
|
||||
: [divl] "a" (divl) // 'dividend' in edx:eax
|
||||
, [divh] "d" (divh)
|
||||
, [divisor] "rm" (divisor) // 'divisor' in register or memory
|
||||
@ -444,11 +381,11 @@ _modu_64x32(uint64_t a, uint32_t b)
|
||||
|
||||
#ifdef __SSE2__
|
||||
#define recip_approx _recip_approx
|
||||
inline float ATTR_CONST
|
||||
inline float ATTR_CONST ATTR_FORCE_INLINE
|
||||
_recip_approx(float value)
|
||||
{
|
||||
__m128 const value_xmm = _mm_set_ss(value);
|
||||
__m128 const result_xmm = _mm_rcp_ss(value_xmm);
|
||||
__m128 const value_xmm(_mm_set_ss(value));
|
||||
__m128 const result_xmm(_mm_rcp_ss(value_xmm));
|
||||
float result;
|
||||
_mm_store_ss(&result, result_xmm);
|
||||
return result;
|
||||
@ -464,10 +401,10 @@ _recip_approx(float value)
|
||||
#ifdef __x86_64__
|
||||
#define mul_64x64 _mul_64x64
|
||||
inline int64_t ATTR_FORCE_INLINE
|
||||
_mul_64x64(int64_t a, int64_t b, int64_t *hi)
|
||||
_mul_64x64(int64_t a, int64_t b, int64_t &hi)
|
||||
{
|
||||
__int128 const r(__int128(a) * b);
|
||||
*hi = int64_t(uint64_t((unsigned __int128)r >> 64));
|
||||
hi = int64_t(uint64_t((unsigned __int128)r >> 64));
|
||||
return int64_t(uint64_t((unsigned __int128)r));
|
||||
}
|
||||
#endif
|
||||
@ -481,10 +418,10 @@ _mul_64x64(int64_t a, int64_t b, int64_t *hi)
|
||||
#ifdef __x86_64__
|
||||
#define mulu_64x64 _mulu_64x64
|
||||
inline uint64_t ATTR_FORCE_INLINE
|
||||
_mulu_64x64(uint64_t a, uint64_t b, uint64_t *hi)
|
||||
_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi)
|
||||
{
|
||||
unsigned __int128 const r((unsigned __int128)a * b);
|
||||
*hi = uint64_t(r >> 64);
|
||||
hi = uint64_t(r >> 64);
|
||||
return uint64_t(r);
|
||||
}
|
||||
#endif
|
||||
|
@ -28,7 +28,7 @@
|
||||
|
||||
#ifndef count_leading_zeros
|
||||
#define count_leading_zeros _count_leading_zeros
|
||||
inline uint8_t _count_leading_zeros(uint32_t value)
|
||||
__forceinline uint8_t _count_leading_zeros(uint32_t value)
|
||||
{
|
||||
unsigned long index;
|
||||
return _BitScanReverse(&index, value) ? (31U - index) : 32U;
|
||||
@ -43,7 +43,7 @@ inline uint8_t _count_leading_zeros(uint32_t value)
|
||||
|
||||
#ifndef count_leading_ones
|
||||
#define count_leading_ones _count_leading_ones
|
||||
inline uint8_t _count_leading_ones(uint32_t value)
|
||||
__forceinline uint8_t _count_leading_ones(uint32_t value)
|
||||
{
|
||||
unsigned long index;
|
||||
return _BitScanReverse(&index, ~value) ? (31U - index) : 32U;
|
||||
|
@ -15,9 +15,10 @@
|
||||
|
||||
#ifdef PTR64
|
||||
#include <emmintrin.h>
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include <intrin.h>
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
INLINE MATH FUNCTIONS
|
||||
@ -30,7 +31,7 @@
|
||||
|
||||
#ifndef PTR64
|
||||
#define mul_32x32 _mul_32x32
|
||||
static inline int64_t _mul_32x32(int32_t a, int32_t b)
|
||||
inline int64_t _mul_32x32(int32_t a, int32_t b)
|
||||
{
|
||||
// in theory this should work, but it is untested
|
||||
__asm
|
||||
@ -51,7 +52,7 @@ static inline int64_t _mul_32x32(int32_t a, int32_t b)
|
||||
|
||||
#ifndef PTR64
|
||||
#define mulu_32x32 _mulu_32x32
|
||||
static inline uint64_t _mulu_32x32(uint32_t a, uint32_t b)
|
||||
inline uint64_t _mulu_32x32(uint32_t a, uint32_t b)
|
||||
{
|
||||
// in theory this should work, but it is untested
|
||||
__asm
|
||||
@ -72,7 +73,7 @@ static inline uint64_t _mulu_32x32(uint32_t a, uint32_t b)
|
||||
|
||||
#ifndef PTR64
|
||||
#define mul_32x32_hi _mul_32x32_hi
|
||||
static inline int32_t _mul_32x32_hi(int32_t a, int32_t b)
|
||||
inline int32_t _mul_32x32_hi(int32_t a, int32_t b)
|
||||
{
|
||||
int32_t result;
|
||||
|
||||
@ -96,7 +97,7 @@ static inline int32_t _mul_32x32_hi(int32_t a, int32_t b)
|
||||
|
||||
#ifndef PTR64
|
||||
#define mulu_32x32_hi _mulu_32x32_hi
|
||||
static inline uint32_t _mulu_32x32_hi(uint32_t a, uint32_t b)
|
||||
inline uint32_t _mulu_32x32_hi(uint32_t a, uint32_t b)
|
||||
{
|
||||
int32_t result;
|
||||
|
||||
@ -148,7 +149,7 @@ static inline int32_t _mul_32x32_shift(int32_t a, int32_t b, uint8_t shift)
|
||||
|
||||
#ifndef PTR64
|
||||
#define mulu_32x32_shift _mulu_32x32_shift
|
||||
static inline uint32_t _mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
inline uint32_t _mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
{
|
||||
int32_t result;
|
||||
|
||||
@ -173,7 +174,7 @@ static inline uint32_t _mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
|
||||
#ifndef PTR64
|
||||
#define div_64x32 _div_64x32
|
||||
static inline int32_t _div_64x32(int64_t a, int32_t b)
|
||||
inline int32_t _div_64x32(int64_t a, int32_t b)
|
||||
{
|
||||
int32_t result;
|
||||
int32_t alow = a;
|
||||
@ -199,7 +200,7 @@ static inline int32_t _div_64x32(int64_t a, int32_t b)
|
||||
|
||||
#ifndef PTR64
|
||||
#define divu_64x32 _divu_64x32
|
||||
static inline uint32_t _divu_64x32(uint64_t a, uint32_t b)
|
||||
inline uint32_t _divu_64x32(uint64_t a, uint32_t b)
|
||||
{
|
||||
uint32_t result;
|
||||
uint32_t alow = a;
|
||||
@ -226,7 +227,7 @@ static inline uint32_t _divu_64x32(uint64_t a, uint32_t b)
|
||||
|
||||
#ifndef PTR64
|
||||
#define div_64x32_rem _div_64x32_rem
|
||||
static inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t *remainder)
|
||||
inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t &remainder)
|
||||
{
|
||||
int32_t result;
|
||||
int32_t alow = a;
|
||||
@ -242,7 +243,7 @@ static inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t *remainder)
|
||||
mov rem,edx
|
||||
}
|
||||
|
||||
*remainder = rem;
|
||||
remainder = rem;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
@ -256,7 +257,7 @@ static inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t *remainder)
|
||||
|
||||
#ifndef PTR64
|
||||
#define divu_64x32_rem _divu_64x32_rem
|
||||
static inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remainder)
|
||||
inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t &remainder)
|
||||
{
|
||||
uint32_t result;
|
||||
uint32_t alow = a;
|
||||
@ -272,7 +273,7 @@ static inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remaind
|
||||
mov rem,edx
|
||||
}
|
||||
|
||||
*remainder = rem;
|
||||
remainder = rem;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
@ -286,7 +287,7 @@ static inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remaind
|
||||
|
||||
#ifndef PTR64
|
||||
#define div_32x32_shift _div_32x32_shift
|
||||
static inline int32_t _div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
|
||||
inline int32_t _div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
|
||||
{
|
||||
int32_t result;
|
||||
|
||||
@ -314,7 +315,7 @@ static inline int32_t _div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
|
||||
|
||||
#ifndef PTR64
|
||||
#define divu_32x32_shift _divu_32x32_shift
|
||||
static inline uint32_t _divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
inline uint32_t _divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
{
|
||||
uint32_t result;
|
||||
|
||||
@ -367,7 +368,7 @@ static inline int32_t _mod_64x32(int64_t a, int32_t b)
|
||||
|
||||
#ifndef PTR64
|
||||
#define modu_64x32 _modu_64x32
|
||||
static inline uint32_t _modu_64x32(uint64_t a, uint32_t b)
|
||||
inline uint32_t _modu_64x32(uint64_t a, uint32_t b)
|
||||
{
|
||||
uint32_t result;
|
||||
uint32_t alow = a;
|
||||
@ -393,7 +394,7 @@ static inline uint32_t _modu_64x32(uint64_t a, uint32_t b)
|
||||
|
||||
#ifdef PTR64
|
||||
#define recip_approx _recip_approx
|
||||
static inline float _recip_approx(float z)
|
||||
inline float _recip_approx(float z)
|
||||
{
|
||||
__m128 const mz = _mm_set_ss(z);
|
||||
__m128 const mooz = _mm_rcp_ss(mz);
|
||||
@ -410,7 +411,11 @@ static inline float _recip_approx(float z)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifdef PTR64
|
||||
#define mul_64x64 _mul128
|
||||
#define mul_64x64 _mul_64x64
|
||||
__forceinline int64_t _mul_64x64(int64_t a, int64_t b, int64_t &hi)
|
||||
{
|
||||
return _mul128(a, b, &hi);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -420,7 +425,44 @@ static inline float _recip_approx(float z)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifdef PTR64
|
||||
#define mulu_64x64 _umul128
|
||||
#define mulu_64x64 _mulu_64x64
|
||||
__forceinline int64_t _mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi)
|
||||
{
|
||||
return _umul128(a, b, &hi);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
addu_32x32_co - perform an unsigned 32 bit + 32
|
||||
bit addition and return the result with carry
|
||||
out
|
||||
-------------------------------------------------*/
|
||||
|
||||
#define addu_32x32_co _addu_32x32_co
|
||||
__forceinline bool _addu_32x32_co(uint32_t a, uint32_t b, uint32_t &sum)
|
||||
{
|
||||
return _addcarry_u32(0, a, b, &sum);
|
||||
}
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
addu_64x64_co - perform an unsigned 64 bit + 64
|
||||
bit addition and return the result with carry
|
||||
out
|
||||
-------------------------------------------------*/
|
||||
|
||||
#define addu_64x64_co _addu_64x64_co
|
||||
__forceinline bool _addu_64x64_co(uint64_t a, uint64_t b, uint64_t &sum)
|
||||
{
|
||||
#ifdef PTR64
|
||||
return _addcarry_u64(0, a, b, &sum);
|
||||
#else
|
||||
uint32_t l, h;
|
||||
bool const result = _addcarry_u32(_addcarry_u32(0, uint32_t(a), uint32_t(b), &l), uint32_t(a >> 32), uint32_t(b >> 32), &h);
|
||||
sum = (uint64_t(h) << 32) | l;
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // MAME_OSD_EIVCX86_H
|
||||
|
@ -25,8 +25,8 @@
|
||||
#include "eigccx86.h"
|
||||
#elif defined(__ppc__) || defined (__PPC__) || defined(__ppc64__) || defined(__PPC64__)
|
||||
#include "eigccppc.h"
|
||||
#else
|
||||
#error "no matching assembler implementations found - please compile with NOASM=1"
|
||||
#elif defined(__arm__) || defined(__aarch64__)
|
||||
#include "eigccarm.h"
|
||||
#endif
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
@ -37,10 +37,6 @@
|
||||
|
||||
#include "eivc.h"
|
||||
|
||||
#else
|
||||
|
||||
#error "no matching assembler implementations found - please compile with NOASM=1"
|
||||
|
||||
#endif
|
||||
|
||||
#endif // !defined(MAME_NOASM)
|
||||
@ -56,7 +52,7 @@
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef mul_32x32
|
||||
inline int64_t mul_32x32(int32_t a, int32_t b)
|
||||
constexpr int64_t mul_32x32(int32_t a, int32_t b)
|
||||
{
|
||||
return int64_t(a) * int64_t(b);
|
||||
}
|
||||
@ -70,7 +66,7 @@ inline int64_t mul_32x32(int32_t a, int32_t b)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef mulu_32x32
|
||||
inline uint64_t mulu_32x32(uint32_t a, uint32_t b)
|
||||
constexpr uint64_t mulu_32x32(uint32_t a, uint32_t b)
|
||||
{
|
||||
return uint64_t(a) * uint64_t(b);
|
||||
}
|
||||
@ -84,7 +80,7 @@ inline uint64_t mulu_32x32(uint32_t a, uint32_t b)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef mul_32x32_hi
|
||||
inline int32_t mul_32x32_hi(int32_t a, int32_t b)
|
||||
constexpr int32_t mul_32x32_hi(int32_t a, int32_t b)
|
||||
{
|
||||
return uint32_t((int64_t(a) * int64_t(b)) >> 32);
|
||||
}
|
||||
@ -98,7 +94,7 @@ inline int32_t mul_32x32_hi(int32_t a, int32_t b)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef mulu_32x32_hi
|
||||
inline uint32_t mulu_32x32_hi(uint32_t a, uint32_t b)
|
||||
constexpr uint32_t mulu_32x32_hi(uint32_t a, uint32_t b)
|
||||
{
|
||||
return uint32_t((uint64_t(a) * uint64_t(b)) >> 32);
|
||||
}
|
||||
@ -113,7 +109,7 @@ inline uint32_t mulu_32x32_hi(uint32_t a, uint32_t b)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef mul_32x32_shift
|
||||
inline int32_t mul_32x32_shift(int32_t a, int32_t b, uint8_t shift)
|
||||
constexpr int32_t mul_32x32_shift(int32_t a, int32_t b, uint8_t shift)
|
||||
{
|
||||
return int32_t((int64_t(a) * int64_t(b)) >> shift);
|
||||
}
|
||||
@ -128,7 +124,7 @@ inline int32_t mul_32x32_shift(int32_t a, int32_t b, uint8_t shift)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef mulu_32x32_shift
|
||||
inline uint32_t mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
constexpr uint32_t mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
{
|
||||
return uint32_t((uint64_t(a) * uint64_t(b)) >> shift);
|
||||
}
|
||||
@ -141,7 +137,7 @@ inline uint32_t mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef div_64x32
|
||||
inline int32_t div_64x32(int64_t a, int32_t b)
|
||||
constexpr int32_t div_64x32(int64_t a, int32_t b)
|
||||
{
|
||||
return a / int64_t(b);
|
||||
}
|
||||
@ -154,7 +150,7 @@ inline int32_t div_64x32(int64_t a, int32_t b)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef divu_64x32
|
||||
inline uint32_t divu_64x32(uint64_t a, uint32_t b)
|
||||
constexpr uint32_t divu_64x32(uint64_t a, uint32_t b)
|
||||
{
|
||||
return a / uint64_t(b);
|
||||
}
|
||||
@ -168,10 +164,10 @@ inline uint32_t divu_64x32(uint64_t a, uint32_t b)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef div_64x32_rem
|
||||
inline int32_t div_64x32_rem(int64_t a, int32_t b, int32_t *remainder)
|
||||
inline int32_t div_64x32_rem(int64_t a, int32_t b, int32_t &remainder)
|
||||
{
|
||||
int32_t const res = div_64x32(a, b);
|
||||
*remainder = a - (int64_t(b) * res);
|
||||
int32_t const res(div_64x32(a, b));
|
||||
remainder = a - (int64_t(b) * res);
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
@ -184,10 +180,10 @@ inline int32_t div_64x32_rem(int64_t a, int32_t b, int32_t *remainder)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef divu_64x32_rem
|
||||
inline uint32_t divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remainder)
|
||||
inline uint32_t divu_64x32_rem(uint64_t a, uint32_t b, uint32_t &remainder)
|
||||
{
|
||||
uint32_t const res = divu_64x32(a, b);
|
||||
*remainder = a - (uint64_t(b) * res);
|
||||
uint32_t const res(divu_64x32(a, b));
|
||||
remainder = a - (uint64_t(b) * res);
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
@ -200,7 +196,7 @@ inline uint32_t divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remainder)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef div_32x32_shift
|
||||
inline int32_t div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
|
||||
constexpr int32_t div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
|
||||
{
|
||||
return (int64_t(a) << shift) / int64_t(b);
|
||||
}
|
||||
@ -214,7 +210,7 @@ inline int32_t div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef divu_32x32_shift
|
||||
inline uint32_t divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
constexpr uint32_t divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
{
|
||||
return (uint64_t(a) << shift) / uint64_t(b);
|
||||
}
|
||||
@ -227,7 +223,7 @@ inline uint32_t divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef mod_64x32
|
||||
inline int32_t mod_64x32(int64_t a, int32_t b)
|
||||
constexpr int32_t mod_64x32(int64_t a, int32_t b)
|
||||
{
|
||||
return a - (b * div_64x32(a, b));
|
||||
}
|
||||
@ -240,7 +236,7 @@ inline int32_t mod_64x32(int64_t a, int32_t b)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef modu_64x32
|
||||
inline uint32_t modu_64x32(uint64_t a, uint32_t b)
|
||||
constexpr uint32_t modu_64x32(uint64_t a, uint32_t b)
|
||||
{
|
||||
return a - (b * divu_64x32(a, b));
|
||||
}
|
||||
@ -253,7 +249,7 @@ inline uint32_t modu_64x32(uint64_t a, uint32_t b)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef recip_approx
|
||||
inline float recip_approx(float value)
|
||||
constexpr float recip_approx(float value)
|
||||
{
|
||||
return 1.0f / value;
|
||||
}
|
||||
@ -266,7 +262,7 @@ inline float recip_approx(float value)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef mul_64x64
|
||||
inline int64_t mul_64x64(int64_t a, int64_t b, int64_t *hi)
|
||||
inline int64_t mul_64x64(int64_t a, int64_t b, int64_t &hi)
|
||||
{
|
||||
uint64_t const a_hi = uint64_t(a) >> 32;
|
||||
uint64_t const b_hi = uint64_t(b) >> 32;
|
||||
@ -279,13 +275,13 @@ inline int64_t mul_64x64(int64_t a, int64_t b, int64_t *hi)
|
||||
uint64_t const ab_hi = a_hi * b_hi;
|
||||
uint64_t const carry = ((ab_lo >> 32) + uint32_t(ab_m1) + uint32_t(ab_m2)) >> 32;
|
||||
|
||||
*hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry;
|
||||
hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry;
|
||||
|
||||
// adjust for sign
|
||||
if (a < 0)
|
||||
*hi -= b;
|
||||
hi -= b;
|
||||
if (b < 0)
|
||||
*hi -= a;
|
||||
hi -= a;
|
||||
|
||||
return ab_lo + (ab_m1 << 32) + (ab_m2 << 32);
|
||||
}
|
||||
@ -298,7 +294,7 @@ inline int64_t mul_64x64(int64_t a, int64_t b, int64_t *hi)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef mulu_64x64
|
||||
inline uint64_t mulu_64x64(uint64_t a, uint64_t b, uint64_t *hi)
|
||||
inline uint64_t mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi)
|
||||
{
|
||||
uint64_t const a_hi = uint32_t(a >> 32);
|
||||
uint64_t const b_hi = uint32_t(b >> 32);
|
||||
@ -311,13 +307,51 @@ inline uint64_t mulu_64x64(uint64_t a, uint64_t b, uint64_t *hi)
|
||||
uint64_t const ab_hi = a_hi * b_hi;
|
||||
uint64_t const carry = ((ab_lo >> 32) + uint32_t(ab_m1) + uint32_t(ab_m2)) >> 32;
|
||||
|
||||
*hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry;
|
||||
hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry;
|
||||
|
||||
return ab_lo + (ab_m1 << 32) + (ab_m2 << 32);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
addu_32x32_co - perform an unsigned 32 bit + 32
|
||||
bit addition and return the result with carry
|
||||
out
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef addu_32x32_co
|
||||
inline bool addu_32x32_co(uint32_t a, uint32_t b, uint32_t &sum)
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
return __builtin_add_overflow(a, b, &sum);
|
||||
#else
|
||||
sum = a + b;
|
||||
return (a > sum) || (b > sum);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
addu_64x64_co - perform an unsigned 64 bit + 64
|
||||
bit addition and return the result with carry
|
||||
out
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef addu_64x64_co
|
||||
inline bool addu_64x64_co(uint64_t a, uint64_t b, uint64_t &sum)
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
return __builtin_add_overflow(a, b, &sum);
|
||||
#else
|
||||
sum = a + b;
|
||||
return (a > sum) || (b > sum);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
INLINE BIT MANIPULATION FUNCTIONS
|
||||
@ -360,12 +394,11 @@ inline uint8_t count_leading_ones(uint32_t val)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef population_count_32
|
||||
#if defined(__NetBSD__)
|
||||
#define population_count_32 popcount32
|
||||
#else
|
||||
inline unsigned population_count_32(uint32_t val)
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
#if defined(__NetBSD__)
|
||||
return popcount32(val);
|
||||
#elif defined(__GNUC__)
|
||||
// uses CPU feature if available, otherwise falls back to implementation similar to what follows
|
||||
static_assert(sizeof(val) == sizeof(unsigned), "expected 32-bit unsigned int");
|
||||
return unsigned(__builtin_popcount(static_cast<unsigned>(val)));
|
||||
@ -382,7 +415,6 @@ inline unsigned population_count_32(uint32_t val)
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/*-------------------------------------------------
|
||||
@ -391,12 +423,11 @@ inline unsigned population_count_32(uint32_t val)
|
||||
-------------------------------------------------*/
|
||||
|
||||
#ifndef population_count_64
|
||||
#if defined(__NetBSD__)
|
||||
#define population_count_64 popcount64
|
||||
#else
|
||||
inline unsigned population_count_64(uint64_t val)
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
#if defined(__NetBSD__)
|
||||
return popcount64(val);
|
||||
#elif defined(__GNUC__)
|
||||
// uses CPU feature if available, otherwise falls back to implementation similar to what follows
|
||||
static_assert(sizeof(val) == sizeof(unsigned long long), "expected 64-bit unsigned long long int");
|
||||
return unsigned(__builtin_popcountll(static_cast<unsigned long long>(val)));
|
||||
@ -422,7 +453,6 @@ inline unsigned population_count_64(uint64_t val)
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
|
Loading…
Reference in New Issue
Block a user