-osd: Clean up inline maths utilities.

* Removed inline assembly for operations compilers handle well.
* Added ARM and AArch64 implementation for a few operations.
* Added unsigned integer add with carry out operations.

-cpu/drccache.cpp: Detect whether RWX pages are supported.

-dynax.cpp: Improved a few hanafuda DIP switch descriptions.
This commit is contained in:
Vas Crabb 2021-01-18 01:42:28 +11:00
parent bb7b375aa6
commit 6e1bbe8be8
19 changed files with 590 additions and 307 deletions

View File

@ -362,33 +362,6 @@ WINDRES := $(word 1,$(TOOLCHAIN) i686-w64-mingw32-)windres
endif
endif
ifeq ($(findstring arm,$(UNAME)),arm)
ARCHITECTURE :=
ifndef NOASM
NOASM := 1
endif
endif
ifeq ($(findstring aarch64,$(UNAME)),aarch64)
ARCHITECTURE :=
ifndef NOASM
NOASM := 1
endif
endif
ifeq ($(findstring s390x,$(UNAME)),s390x)
ifndef NOASM
NOASM := 1
endif
endif
ifeq ($(findstring riscv64,$(UNAME)),riscv64)
ARCHITECTURE :=
ifndef NOASM
NOASM := 1
endif
endif
# Emscripten
ifeq ($(findstring emcc,$(CC)),emcc)
TARGETOS := asmjs
@ -398,27 +371,42 @@ ifndef NOASM
endif
endif
# ppc has inline assembly support but no DRC
ifeq ($(findstring ppc,$(UNAME)),ppc)
ifndef FORCE_DRC_C_BACKEND
FORCE_DRC_C_BACKEND := 1
endif
endif
# powerpc has inline assembly support but no DRC
ifeq ($(findstring powerpc,$(UNAME)),powerpc)
ifndef FORCE_DRC_C_BACKEND
FORCE_DRC_C_BACKEND := 1
FORCE_DRC_C_BACKEND := 1
endif
endif
# ARM / ARM64
ifeq ($(findstring arm,$(UNAME)),arm)
ifndef FORCE_DRC_C_BACKEND
FORCE_DRC_C_BACKEND := 1
endif
endif
ifeq ($(findstring aarch64,$(UNAME)),aarch64)
ifndef FORCE_DRC_C_BACKEND
FORCE_DRC_C_BACKEND := 1
endif
endif
ifeq ($(findstring s390x,$(UNAME)),s390x)
ifndef FORCE_DRC_C_BACKEND
FORCE_DRC_C_BACKEND := 1
endif
endif
ifeq ($(findstring riscv64,$(UNAME)),riscv64)
ifndef FORCE_DRC_C_BACKEND
FORCE_DRC_C_BACKEND := 1
endif
endif
# Autodetect BIGENDIAN
# MacOSX
ifndef BIGENDIAN

View File

@ -398,14 +398,14 @@ void alpha_device::cpu_execute(u32 const op)
// register variants
case 0x00: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(m_r[Rb(op)]))); break; // mull
case 0x20: m_r[Rc(op)] = m_r[Ra(op)] * m_r[Rb(op)]; break; // mulq
case 0x30: mulu_64x64(m_r[Ra(op)], m_r[Rb(op)], &m_r[Rc(op)]); break; // umulh
case 0x30: mulu_64x64(m_r[Ra(op)], m_r[Rb(op)], m_r[Rc(op)]); break; // umulh
case 0x40: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(m_r[Rb(op)]))); break; // mull/v
case 0x60: m_r[Rc(op)] = m_r[Ra(op)] * m_r[Rb(op)]; break; // mulq/v
// immediate variants
case 0x80: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(Im(op)))); break; // mull
case 0xa0: m_r[Rc(op)] = m_r[Ra(op)] * Im(op); break; // mulq
case 0xb0: mulu_64x64(m_r[Ra(op)], Im(op), &m_r[Rc(op)]); break; // umulh
case 0xb0: mulu_64x64(m_r[Ra(op)], Im(op), m_r[Rc(op)]); break; // umulh
case 0xc0: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(Im(op)))); break; // mull/v
case 0xe0: m_r[Rc(op)] = m_r[Ra(op)] * Im(op); break; // mulq/v
}

View File

@ -14,10 +14,6 @@
#include <algorithm>
// this improves performance of some emulated systems but doesn't work on W^X hosts
//#define MAME_DRC_CACHE_RWX
namespace {
template <typename T, typename U> constexpr T *ALIGN_PTR_UP(T *p, U align)
@ -52,7 +48,8 @@ drc_cache::drc_cache(size_t bytes) :
m_end(m_limit),
m_codegen(nullptr),
m_size(m_cache.size()),
m_executable(false)
m_executable(false),
m_rwx(false)
{
// alignment and page size must be powers of two, cache must be page-aligned
assert(!(CACHE_ALIGNMENT & (CACHE_ALIGNMENT - 1)));
@ -63,11 +60,24 @@ drc_cache::drc_cache(size_t bytes) :
std::fill(std::begin(m_free), std::end(m_free), nullptr);
std::fill(std::begin(m_nearfree), std::end(m_nearfree), nullptr);
#if defined(MAME_DRC_CACHE_RWX)
m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE | osd::virtual_memory_allocation::EXECUTE);
#else // defined(MAME_DRC_CACHE_RWX)
m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE);
#endif // defined(MAME_DRC_CACHE_RWX)
if (!m_cache)
{
throw emu_fatalerror("drc_cache: Error allocating virtual memory");
}
else if (!m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE))
{
throw emu_fatalerror("drc_cache: Error marking cache read/write");
}
else if (m_cache.set_access(m_base - m_near, m_end - m_base, osd::virtual_memory_allocation::READ_WRITE | osd::virtual_memory_allocation::EXECUTE))
{
osd_printf_verbose("drc_cache: RWX pages supported\n");
m_rwx = true;
}
else
{
osd_printf_verbose("drc_cache: Using W^X mode\n");
m_rwx = false;
}
}
@ -209,9 +219,8 @@ void drc_cache::codegen_init()
{
if (m_executable)
{
#if !defined(MAME_DRC_CACHE_RWX)
m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE);
#endif // !defined(MAME_DRC_CACHE_RWX)
if (!m_rwx)
m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE);
m_executable = false;
}
}
@ -221,9 +230,8 @@ void drc_cache::codegen_complete()
{
if (!m_executable)
{
#if !defined(MAME_DRC_CACHE_RWX)
m_cache.set_access(m_base - m_near, ALIGN_PTR_UP(m_top, m_cache.page_size()) - m_base, osd::virtual_memory_allocation::READ_EXECUTE);
#endif // !defined(MAME_DRC_CACHE_RWX)
if (!m_rwx)
m_cache.set_access(m_base - m_near, ALIGN_PTR_UP(m_top, m_cache.page_size()) - m_base, osd::virtual_memory_allocation::READ_EXECUTE);
m_executable = true;
}
}

View File

@ -94,6 +94,7 @@ private:
drccodeptr m_codegen; // start of current generated code block
size_t const m_size; // size of the cache in bytes
bool m_executable; // whether cached code is currently executable
bool m_rwx; // whether pages can be simultaneously writable and executable
// oob management
struct oob_handler

View File

@ -3561,11 +3561,11 @@ void mips3_device::handle_special(uint32_t op)
m_core->icount -= 35;
break;
case 0x1c: /* DMULT */
LOVAL64 = mul_64x64(RSVAL64, RTVAL64, reinterpret_cast<s64 *>(&HIVAL64));
LOVAL64 = mul_64x64(RSVAL64, RTVAL64, *reinterpret_cast<s64 *>(&HIVAL64));
m_core->icount -= 7;
break;
case 0x1d: /* DMULTU */
LOVAL64 = mulu_64x64(RSVAL64, RTVAL64, &HIVAL64);
LOVAL64 = mulu_64x64(RSVAL64, RTVAL64, HIVAL64);
m_core->icount -= 7;
break;
case 0x1e: /* DDIV */

View File

@ -473,10 +473,10 @@ void r4000_base_device::cpu_execute(u32 const op)
}
break;
case 0x1c: // DMULT
m_lo = mul_64x64(m_r[RSREG], m_r[RTREG], reinterpret_cast<s64 *>(&m_hi));
m_lo = mul_64x64(m_r[RSREG], m_r[RTREG], *reinterpret_cast<s64 *>(&m_hi));
break;
case 0x1d: // DMULTU
m_lo = mulu_64x64(m_r[RSREG], m_r[RTREG], &m_hi);
m_lo = mulu_64x64(m_r[RSREG], m_r[RTREG], m_hi);
break;
case 0x1e: // DDIV
if (m_r[RTREG])

View File

@ -40,17 +40,17 @@ attotime &attotime::operator*=(u32 factor)
// split attoseconds into upper and lower halves which fit into 32 bits
u32 attolo;
u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, &attolo);
u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, attolo);
// scale the lower half, then split into high/low parts
u64 temp = mulu_32x32(attolo, factor);
u32 reslo;
temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, &reslo);
temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, reslo);
// scale the upper half, then split into high/low parts
temp += mulu_32x32(attohi, factor);
u32 reshi;
temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, &reshi);
temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, reshi);
// scale the seconds
temp += mulu_32x32(m_seconds, factor);
@ -80,19 +80,19 @@ attotime &attotime::operator/=(u32 factor)
// split attoseconds into upper and lower halves which fit into 32 bits
u32 attolo;
u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, &attolo);
u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, attolo);
// divide the seconds and get the remainder
u32 remainder;
m_seconds = divu_64x32_rem(m_seconds, factor, &remainder);
m_seconds = divu_64x32_rem(m_seconds, factor, remainder);
// combine the upper half of attoseconds with the remainder and divide that
u64 temp = s64(attohi) + mulu_32x32(remainder, ATTOSECONDS_PER_SECOND_SQRT);
u32 reshi = divu_64x32_rem(temp, factor, &remainder);
u32 reshi = divu_64x32_rem(temp, factor, remainder);
// combine the lower half of attoseconds with the remainder and divide that
temp = attolo + mulu_32x32(remainder, ATTOSECONDS_PER_SECOND_SQRT);
u32 reslo = divu_64x32_rem(temp, factor, &remainder);
u32 reslo = divu_64x32_rem(temp, factor, remainder);
// round based on the remainder
m_attoseconds = (attoseconds_t)reslo + mulu_32x32(reshi, ATTOSECONDS_PER_SECOND_SQRT);
@ -142,7 +142,7 @@ const char *attotime::as_string(int precision) const
else
{
u32 lower;
u32 upper = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, &lower);
u32 upper = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, lower);
int temp = precision;
while (temp < 18)
{

View File

@ -357,7 +357,7 @@ inline attotime attotime::from_ticks(u64 ticks, u32 frequency)
return attotime(0, ticks * attos_per_tick);
u32 remainder;
s32 secs = divu_64x32_rem(ticks, frequency, &remainder);
s32 secs = divu_64x32_rem(ticks, frequency, remainder);
return attotime(secs, u64(remainder) * attos_per_tick);
}
else

View File

@ -437,7 +437,7 @@ attotime device_t::clocks_to_attotime(u64 numclocks) const noexcept
else
{
u32 remainder;
u32 quotient = divu_64x32_rem(numclocks, m_clock, &remainder);
u32 quotient = divu_64x32_rem(numclocks, m_clock, remainder);
return attotime(quotient, u64(remainder) * u64(m_attoseconds_per_clock));
}
}

View File

@ -518,7 +518,7 @@ void device_scheduler::timeslice()
else
{
u32 remainder;
s32 secs = divu_64x32_rem(ran, exec->m_cycles_per_second, &remainder);
s32 secs = divu_64x32_rem(ran, exec->m_cycles_per_second, remainder);
deltatime = attotime(secs, u64(remainder) * exec->m_attoseconds_per_cycle);
}
assert(deltatime >= attotime::zero);

View File

@ -475,13 +475,13 @@ void validity_checker::validate_inlines()
if (resultu32 != expectedu32)
osd_printf_error("Error testing divu_64x32 (%16X / %08X) = %08X (expected %08X)\n", u64(testu64a), u32(testu32a), resultu32, expectedu32);
resulti32 = div_64x32_rem(testi64a, testi32a, &remainder);
resulti32 = div_64x32_rem(testi64a, testi32a, remainder);
expectedi32 = testi64a / s64(testi32a);
expremainder = testi64a % s64(testi32a);
if (resulti32 != expectedi32 || remainder != expremainder)
osd_printf_error("Error testing div_64x32_rem (%16X / %08X) = %08X,%08X (expected %08X,%08X)\n", s64(testi64a), s32(testi32a), resulti32, remainder, expectedi32, expremainder);
resultu32 = divu_64x32_rem(testu64a, testu32a, &uremainder);
resultu32 = divu_64x32_rem(testu64a, testu32a, uremainder);
expectedu32 = testu64a / u64(testu32a);
expuremainder = testu64a % u64(testu32a);
if (resultu32 != expectedu32 || uremainder != expuremainder)

View File

@ -1712,8 +1712,7 @@ INPUT_PORTS_START( HANAFUDA_KEYS_BET )
PORT_BIT( 0x20, IP_ACTIVE_LOW, IPT_MAHJONG_SMALL ) PORT_PLAYER(2) // "s"
INPUT_PORTS_END
#ifdef UNREFERENCED_CODE
static INPUT_PORTS_START( HANAFUDA_KEYS_BET_ALT )
[[maybe_unused]] static INPUT_PORTS_START( HANAFUDA_KEYS_BET_ALT )
PORT_START("KEY0")
PORT_BIT( 0x01, IP_ACTIVE_LOW, IPT_HANAFUDA_A ) PORT_PLAYER(1)
PORT_BIT( 0x02, IP_ACTIVE_LOW, IPT_HANAFUDA_E ) PORT_PLAYER(1)
@ -1798,7 +1797,6 @@ static INPUT_PORTS_START( HANAFUDA_KEYS_BET_ALT )
PORT_BIT( 0x40, IP_ACTIVE_LOW, IPT_UNKNOWN )
PORT_BIT( 0x80, IP_ACTIVE_LOW, IPT_UNKNOWN )
INPUT_PORTS_END
#endif
static INPUT_PORTS_START( cdracula )
PORT_START("P1")
@ -1961,9 +1959,9 @@ static INPUT_PORTS_START( hnkochou )
PORT_DIPNAME( 0x10, 0x10, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 1:5" )
PORT_DIPSETTING( 0x10, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
PORT_DIPNAME( 0x20, 0x20, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 1:6" )
PORT_DIPSETTING( 0x20, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
PORT_DIPNAME( 0x20, 0x20, "Gokou Odds" ) PORT_DIPLOCATION( "DIPSW 1:6" )
PORT_DIPSETTING( 0x20, "100" )
PORT_DIPSETTING( 0x00, "200" )
PORT_DIPNAME( 0x40, 0x40, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 1:7" )
PORT_DIPSETTING( 0x40, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
@ -1972,16 +1970,15 @@ static INPUT_PORTS_START( hnkochou )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
PORT_START("DSW1")
PORT_DIPNAME( 0x01, 0x01, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:1" )
PORT_DIPSETTING( 0x01, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
PORT_DIPNAME( 0x02, 0x02, "Stage Select" ) PORT_DIPLOCATION( "DIPSW 2:2" )
PORT_DIPSETTING( 0x00, DEF_STR( No ) )
PORT_DIPSETTING( 0x02, DEF_STR( Yes ) )
PORT_DIPNAME( 0x04, 0x04, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:3" )
PORT_DIPNAME( 0x03, 0x03, "Game Mode" ) PORT_DIPLOCATION( "DIPSW 2:1,2" )
PORT_DIPSETTING( 0x03, "A (Stage Select)" ) // stage select, gal re-dresses if player loses
PORT_DIPSETTING( 0x02, "B" ) // no stage select, gal doesn't re-dress if player loses
PORT_DIPSETTING( 0x01, "C" ) // no stage select, gal re-dresses if player loses
PORT_DIPSETTING( 0x00, "D (Gals Off)" ) // no "show time" on win, gals still shown in attract mode
PORT_DIPNAME( 0x04, 0x04, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:3" ) // possibly difficulty/pay rate?
PORT_DIPSETTING( 0x04, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
PORT_DIPNAME( 0x08, 0x08, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:4" )
PORT_DIPNAME( 0x08, 0x08, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:4" ) // possibly difficulty/pay rate?
PORT_DIPSETTING( 0x08, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
PORT_DIPNAME( 0x10, 0x10, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:5" )
@ -1990,9 +1987,9 @@ static INPUT_PORTS_START( hnkochou )
PORT_DIPNAME( 0x20, 0x20, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:6" )
PORT_DIPSETTING( 0x20, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
PORT_DIPNAME( 0x40, 0x40, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:7" )
PORT_DIPSETTING( 0x40, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
PORT_DIPNAME( 0x40, 0x40, "Suggest Move" ) PORT_DIPLOCATION( "DIPSW 2:7" )
PORT_DIPSETTING( 0x00, DEF_STR( No ) )
PORT_DIPSETTING( 0x40, DEF_STR( Yes ) )
PORT_DIPNAME( 0x80, 0x80, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:8" )
PORT_DIPSETTING( 0x80, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
@ -2170,10 +2167,10 @@ static INPUT_PORTS_START( hjingi )
PORT_DIPNAME( 0x10, 0x10, "Double-Up Game Rate" ) PORT_DIPLOCATION( "DIP2:5" )
PORT_DIPSETTING( 0x10, DEF_STR( High ) )
PORT_DIPSETTING( 0x00, DEF_STR( Low ) )
PORT_DIPNAME( 0x20, 0x20, "GOKOU Odds" ) PORT_DIPLOCATION( "DIP2:6" )
PORT_DIPNAME( 0x20, 0x20, "Gokou Odds" ) PORT_DIPLOCATION( "DIP2:6" )
PORT_DIPSETTING( 0x20, "100" )
PORT_DIPSETTING( 0x00, "200" )
PORT_DIPNAME( 0x40, 0x40, "GOKOU Cut" ) PORT_DIPLOCATION( "DIP2:7" )
PORT_DIPNAME( 0x40, 0x40, "Gokou Cut" ) PORT_DIPLOCATION( "DIP2:7" )
PORT_DIPSETTING( 0x00, DEF_STR( No ) )
PORT_DIPSETTING( 0x40, DEF_STR( Yes ) )
PORT_DIPNAME( 0x80, 0x80, "3-Renchan Bonus" ) PORT_DIPLOCATION( "DIP2:8" )
@ -2623,7 +2620,7 @@ static INPUT_PORTS_START( hanayara )
PORT_DIPNAME( 0x20, 0x20, "Choose Bonus (Cheat)") PORT_DIPLOCATION( "DIP2:6" )
PORT_DIPSETTING( 0x20, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
PORT_DIPNAME( 0x40, 0x40, "Unknown 2-6" ) PORT_DIPLOCATION( "DIP2:7" )
PORT_DIPNAME( 0x40, 0x40, "Show All Bonus Cards") PORT_DIPLOCATION( "DIP2:7" )
PORT_DIPSETTING( 0x40, DEF_STR( Off ) )
PORT_DIPSETTING( 0x00, DEF_STR( On ) )
PORT_DIPNAME( 0x80, 0x80, DEF_STR( Service_Mode ) ) PORT_DIPLOCATION( "DIP2:8" )

View File

@ -281,7 +281,7 @@ INPUT_PORTS_START(kaypro_keyboard_typewriter)
PORT_BIT(0x04, IP_ACTIVE_LOW, IPT_KEYBOARD) PORT_CODE(KEYCODE_LSHIFT) PORT_CODE(KEYCODE_RSHIFT) PORT_CHAR(UCHAR_SHIFT_1) PORT_NAME("SHIFT")
INPUT_PORTS_END
INPUT_PORTS_START(kaypro_keyboard_bitshift)
[[maybe_unused]] INPUT_PORTS_START(kaypro_keyboard_bitshift)
PORT_INCLUDE(kaypro_keyboard_typewriter)
PORT_MODIFY("ROW.2")
@ -366,7 +366,6 @@ void kaypro_10_keyboard_device::device_add_mconfig(machine_config &config)
ioport_constructor kaypro_10_keyboard_device::device_input_ports() const
{
(void)&INPUT_PORTS_NAME(kaypro_keyboard_bitshift);
return INPUT_PORTS_NAME(kaypro_keyboard_typewriter);
}

285
src/osd/eigccarm.h Normal file
View File

@ -0,0 +1,285 @@
// license:BSD-3-Clause
// copyright-holders:Vas Crabb
/***************************************************************************
eigccarm.h
ARM/AArch64 inline implementations for GCC compilers. This code is
automatically included if appropriate by eminline.h.
***************************************************************************/
#ifndef MAME_OSD_EIGCCARM_H
#define MAME_OSD_EIGCCARM_H
/***************************************************************************
INLINE MATH FUNCTIONS
***************************************************************************/
/*-------------------------------------------------
mul_32x32 - perform a signed 32 bit x 32 bit
multiply and return the full 64 bit result
-------------------------------------------------*/
// GCC can do a good job of this.
/*-------------------------------------------------
mulu_32x32 - perform an unsigned 32 bit x
32 bit multiply and return the full 64 bit
result
-------------------------------------------------*/
// GCC can do a good job of this
/*-------------------------------------------------
mul_32x32_hi - perform a signed 32 bit x 32 bit
multiply and return the upper 32 bits of the
result
-------------------------------------------------*/
// GCC can do a good job of this
/*-------------------------------------------------
mulu_32x32_hi - perform an unsigned 32 bit x
32 bit multiply and return the upper 32 bits
of the result
-------------------------------------------------*/
// GCC can do a good job of this
/*-------------------------------------------------
mul_32x32_shift - perform a signed 32 bit x
32 bit multiply and shift the result by the
given number of bits before truncating the
result to 32 bits
-------------------------------------------------*/
#if !defined(__aarch64__)
#define mul_32x32_shift _mul_32x32_shift
inline int32_t ATTR_CONST ATTR_FORCE_INLINE
_mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift)
{
uint32_t l, h;
__asm__ (
" smull %[l], %[h], %[val1], %[val2] \n"
: [l] "=r" (l)
, [h] "=r" (h)
: [val1] "%r" (val1)
, [val2] "r" (val2)
);
// Valid for (0 <= shift <= 31)
return int32_t((l >> shift) | (h << (32 - shift)));
}
#endif
/*-------------------------------------------------
mulu_32x32_shift - perform an unsigned 32 bit x
32 bit multiply and shift the result by the
given number of bits before truncating the
result to 32 bits
-------------------------------------------------*/
#if !defined(__aarch64__)
#define mulu_32x32_shift _mulu_32x32_shift
inline uint32_t ATTR_CONST ATTR_FORCE_INLINE
_mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
{
uint32_t l, h;
__asm__ (
" umull %[l], %[h], %[val1], %[val2] \n"
: [l] "=r" (l)
, [h] "=r" (h)
: [val1] "%r" (val1)
, [val2] "r" (val2)
);
// Valid for (0 <= shift <= 31)
return (l >> shift) | (h << (32 - shift));
}
#endif
/*-------------------------------------------------
div_64x32 - perform a signed 64 bit x 32 bit
divide and return the 32 bit quotient
-------------------------------------------------*/
// TBD
/*-------------------------------------------------
divu_64x32 - perform an unsigned 64 bit x 32 bit
divide and return the 32 bit quotient
-------------------------------------------------*/
// TBD
/*-------------------------------------------------
div_64x32_rem - perform a signed 64 bit x 32
bit divide and return the 32 bit quotient and
32 bit remainder
-------------------------------------------------*/
// TBD
/*-------------------------------------------------
divu_64x32_rem - perform an unsigned 64 bit x
32 bit divide and return the 32 bit quotient
and 32 bit remainder
-------------------------------------------------*/
// TBD
/*-------------------------------------------------
div_32x32_shift - perform a signed divide of
two 32 bit values, shifting the first before
division, and returning the 32 bit quotient
-------------------------------------------------*/
// TBD
/*-------------------------------------------------
divu_32x32_shift - perform an unsigned divide of
two 32 bit values, shifting the first before
division, and returning the 32 bit quotient
-------------------------------------------------*/
// TBD
/*-------------------------------------------------
mod_64x32 - perform a signed 64 bit x 32 bit
divide and return the 32 bit remainder
-------------------------------------------------*/
// TBD
/*-------------------------------------------------
modu_64x32 - perform an unsigned 64 bit x 32 bit
divide and return the 32 bit remainder
-------------------------------------------------*/
// TBD
/*-------------------------------------------------
recip_approx - compute an approximate floating
point reciprocal
-------------------------------------------------*/
#if defined(__aarch64__)
#define recip_approx _recip_approx
inline float ATTR_CONST ATTR_FORCE_INLINE
_recip_approx(float value)
{
float result;
__asm__ (
" frecpe %s[result], %s[value] \n"
: [result] "=w" (result)
: [value] "w" (value)
);
return result;
}
#endif
/*-------------------------------------------------
mul_64x64 - perform a signed 64 bit x 64 bit
multiply and return the full 128 bit result
-------------------------------------------------*/
#ifdef __aarch64__
#define mul_64x64 _mul_64x64
inline int64_t ATTR_FORCE_INLINE
_mul_64x64(int64_t a, int64_t b, int64_t &hi)
{
__int128 const r(__int128(a) * b);
hi = int64_t(uint64_t((unsigned __int128)r >> 64));
return int64_t(uint64_t((unsigned __int128)r));
}
#endif
/*-------------------------------------------------
mulu_64x64 - perform an unsigned 64 bit x 64
bit multiply and return the full 128 bit result
-------------------------------------------------*/
#ifdef __aarch64__
#define mulu_64x64 _mulu_64x64
inline uint64_t ATTR_FORCE_INLINE
_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi)
{
unsigned __int128 const r((unsigned __int128)a * b);
hi = uint64_t(r >> 64);
return uint64_t(r);
}
#endif
/***************************************************************************
INLINE BIT MANIPULATION FUNCTIONS
***************************************************************************/
/*-------------------------------------------------
count_leading_zeros - return the number of
leading zero bits in a 32-bit value
-------------------------------------------------*/
#if defined(__aarch64__)
#define count_leading_zeros _count_leading_zeros
inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
_count_leading_zeros(uint32_t value)
{
uint32_t result;
__asm__ (
" clz %w[result], %w[value] \n"
: [result] "=r" (result)
: [value] "r" (value)
);
return result;
}
/*-------------------------------------------------
count_leading_ones - return the number of
leading one bits in a 32-bit value
-------------------------------------------------*/
#if defined(__aarch64__)
#define count_leading_ones _count_leading_ones
inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
_count_leading_ones(uint32_t value)
{
uint32_t result;
__asm__ (
" clz %w[result], %w[value] \n"
: [result] "=r" (result)
: [value] "r" (~value)
);
return result;
}
#endif
#endif // MAME_OSD_EIGCCARM_H

View File

@ -22,7 +22,7 @@
multiply and return the full 64 bit result
-------------------------------------------------*/
/* GCC can do a good job of this. */
// GCC can do a good job of this.
/*-------------------------------------------------
@ -31,7 +31,7 @@
result
-------------------------------------------------*/
/* GCC can do a good job of this */
// GCC can do a good job of this
/*-------------------------------------------------
@ -40,21 +40,7 @@
result
-------------------------------------------------*/
#define mul_32x32_hi _mul_32x32_hi
static inline int32_t ATTR_CONST ATTR_FORCE_INLINE
_mul_32x32_hi(int32_t val1, int32_t val2)
{
int32_t result;
__asm__ (
" mulhw %[result], %[val1], %[val2] \n"
: [result] "=r" (result)
: [val1] "%r" (val1)
, [val2] "r" (val2)
);
return result;
}
// GCC can do a good job of this
/*-------------------------------------------------
@ -63,21 +49,7 @@ _mul_32x32_hi(int32_t val1, int32_t val2)
of the result
-------------------------------------------------*/
#define mulu_32x32_hi _mulu_32x32_hi
static inline uint32_t ATTR_CONST ATTR_FORCE_INLINE
_mulu_32x32_hi(uint32_t val1, uint32_t val2)
{
uint32_t result;
__asm__ (
" mulhwu %[result], %[val1], %[val2] \n"
: [result] "=r" (result)
: [val1] "%r" (val1)
, [val2] "r" (val2)
);
return result;
}
// GCC can do a good job of this
/*-------------------------------------------------
@ -89,27 +61,22 @@ _mulu_32x32_hi(uint32_t val1, uint32_t val2)
#if !defined(__ppc64__) && !defined(__PPC64__) && !defined(_ARCH_PPC64)
#define mul_32x32_shift _mul_32x32_shift
static inline int32_t ATTR_CONST ATTR_FORCE_INLINE
inline int32_t ATTR_CONST ATTR_FORCE_INLINE
_mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift)
{
int32_t result;
uint32_t l, h;
/* Valid for (0 <= shift <= 32) */
__asm__ (
" mullw %[result], %[val1], %[val2] \n"
" mulhw %[val1], %[val1], %[val2] \n"
" srw %[result], %[result], %[shift] \n"
" subfic %[shift], %[shift], 0x20 \n"
" slw %[val1], %[val1], %[shift] \n"
" or %[result], %[result], %[val1] \n"
: [result] "=&r" (result)
, [shift] "+r" (shift)
, [val1] "+r" (val1)
: [val2] "r" (val2)
: "xer"
" mullw %[l], %[val1], %[val2] \n"
" mulhw %[h], %[val1], %[val2] \n"
: [l] "=&r" (l)
, [h] "=r" (h)
: [val1] "%r" (val1)
, [val2] "r" (val2)
);
return result;
// Valid for (0 <= shift <= 31)
return int32_t((l >> shift) | (h << (32 - shift)));
}
#endif
@ -123,27 +90,22 @@ _mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift)
#if !defined(__ppc64__) && !defined(__PPC64__) && !defined(_ARCH_PPC64)
#define mulu_32x32_shift _mulu_32x32_shift
static inline uint32_t ATTR_CONST ATTR_FORCE_INLINE
inline uint32_t ATTR_CONST ATTR_FORCE_INLINE
_mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
{
uint32_t result;
uint32_t l, h;
/* Valid for (0 <= shift <= 32) */
__asm__ (
" mullw %[result], %[val1], %[val2] \n"
" mulhwu %[val1], %[val1], %[val2] \n"
" srw %[result], %[result], %[shift] \n"
" subfic %[shift], %[shift], 0x20 \n"
" slw %[val1], %[val1], %[shift] \n"
" or %[result], %[result], %[val1] \n"
: [result] "=&r" (result)
, [shift] "+r" (shift)
, [val1] "+r" (val1)
: [val2] "r" (val2)
: "xer"
" mullw %[l], %[val1], %[val2] \n"
" mulhwu %[h], %[val1], %[val2] \n"
: [l] "=&r" (l)
, [h] "=r" (h)
: [val1] "%r" (val1)
, [val2] "r" (val2)
);
return result;
// Valid for (0 <= shift <= 31)
return (l >> shift) | (h << (32 - shift));
}
#endif
@ -153,7 +115,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
divide and return the 32 bit quotient
-------------------------------------------------*/
/* TBD */
// TBD
/*-------------------------------------------------
@ -161,7 +123,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
divide and return the 32 bit quotient
-------------------------------------------------*/
/* TBD */
// TBD
/*-------------------------------------------------
@ -170,7 +132,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
32 bit remainder
-------------------------------------------------*/
/* TBD */
// TBD
/*-------------------------------------------------
@ -179,7 +141,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
and 32 bit remainder
-------------------------------------------------*/
/* TBD */
// TBD
/*-------------------------------------------------
@ -188,7 +150,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
division, and returning the 32 bit quotient
-------------------------------------------------*/
/* TBD */
// TBD
/*-------------------------------------------------
@ -197,7 +159,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
division, and returning the 32 bit quotient
-------------------------------------------------*/
/* TBD */
// TBD
/*-------------------------------------------------
@ -205,7 +167,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
divide and return the 32 bit remainder
-------------------------------------------------*/
/* TBD */
// TBD
/*-------------------------------------------------
@ -213,7 +175,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
divide and return the 32 bit remainder
-------------------------------------------------*/
/* TBD */
// TBD
/*-------------------------------------------------
@ -222,7 +184,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift)
-------------------------------------------------*/
#define recip_approx _recip_approx
static inline float ATTR_CONST ATTR_FORCE_INLINE
inline float ATTR_CONST ATTR_FORCE_INLINE
_recip_approx(float value)
{
float result;
@ -237,6 +199,40 @@ _recip_approx(float value)
}
/*-------------------------------------------------
mul_64x64 - perform a signed 64 bit x 64 bit
multiply and return the full 128 bit result
-------------------------------------------------*/
#ifdef __ppc64__
#define mul_64x64 _mul_64x64
inline int64_t ATTR_FORCE_INLINE
_mul_64x64(int64_t a, int64_t b, int64_t &hi)
{
__int128 const r(__int128(a) * b);
hi = int64_t(uint64_t((unsigned __int128)r >> 64));
return int64_t(uint64_t((unsigned __int128)r));
}
#endif
/*-------------------------------------------------
mulu_64x64 - perform an unsigned 64 bit x 64
bit multiply and return the full 128 bit result
-------------------------------------------------*/
#ifdef __ppc64__
#define mulu_64x64 _mulu_64x64
inline uint64_t ATTR_FORCE_INLINE
_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi)
{
unsigned __int128 const r((unsigned __int128)a * b);
hi = uint64_t(r >> 64);
return uint64_t(r);
}
#endif
/***************************************************************************
INLINE BIT MANIPULATION FUNCTIONS
@ -248,15 +244,15 @@ _recip_approx(float value)
-------------------------------------------------*/
#define count_leading_zeros _count_leading_zeros
static inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
_count_leading_zeros(uint32_t value)
{
uint32_t result;
__asm__ (
" cntlzw %[result], %[value] \n"
: [result] "=r" (result) /* result can be in any register */
: [value] "r" (value) /* 'value' can be in any register */
: [result] "=r" (result)
: [value] "r" (value)
);
return result;
@ -269,15 +265,15 @@ _count_leading_zeros(uint32_t value)
-------------------------------------------------*/
#define count_leading_ones _count_leading_ones
static inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
inline uint8_t ATTR_CONST ATTR_FORCE_INLINE
_count_leading_ones(uint32_t value)
{
uint32_t result;
__asm__ (
" cntlzw %[result], %[result] \n"
: [result] "=r" (result) /* result can be in any register */
: [value] "r" (~value) /* 'value' can be in any register */
" cntlzw %[result], %[value] \n"
: [result] "=r" (result)
: [value] "r" (~value)
);
return result;

View File

@ -31,22 +31,7 @@
multiply and return the full 64 bit result
-------------------------------------------------*/
#ifndef __x86_64__
#define mul_32x32 _mul_32x32
inline int64_t ATTR_CONST ATTR_FORCE_INLINE
_mul_32x32(int32_t a, int32_t b)
{
int64_t result;
__asm__ (
" imull %[b] ;"
: [result] "=A" (result) // result in edx:eax
: [a] "%a" (a) // 'a' should also be in eax on entry
, [b] "rm" (b) // 'b' can be memory or register
: "cc" // Clobbers condition codes
);
return result;
}
#endif
// GCC can do a good job of this.
/*-------------------------------------------------
@ -55,22 +40,7 @@ _mul_32x32(int32_t a, int32_t b)
result
-------------------------------------------------*/
#ifndef __x86_64__
#define mulu_32x32 _mulu_32x32
inline uint64_t ATTR_CONST ATTR_FORCE_INLINE
_mulu_32x32(uint32_t a, uint32_t b)
{
uint64_t result;
__asm__ (
" mull %[b] ;"
: [result] "=A" (result) // result in edx:eax
: [a] "%a" (a) // 'a' should also be in eax on entry
, [b] "rm" (b) // 'b' can be memory or register
: "cc" // Clobbers condition codes
);
return result;
}
#endif
// GCC can do a good job of this.
/*-------------------------------------------------
@ -79,21 +49,7 @@ _mulu_32x32(uint32_t a, uint32_t b)
result
-------------------------------------------------*/
#define mul_32x32_hi _mul_32x32_hi
inline int32_t ATTR_CONST ATTR_FORCE_INLINE
_mul_32x32_hi(int32_t a, int32_t b)
{
int32_t result, temp;
__asm__ (
" imull %[b] ;"
: [result] "=d" (result) // result in edx
, [temp] "=a" (temp) // This is effectively a clobber
: [a] "a" (a) // 'a' should be in eax on entry
, [b] "rm" (b) // 'b' can be memory or register
: "cc" // Clobbers condition codes
);
return result;
}
// GCC can do a good job of this.
/*-------------------------------------------------
@ -102,21 +58,7 @@ _mul_32x32_hi(int32_t a, int32_t b)
of the result
-------------------------------------------------*/
#define mulu_32x32_hi _mulu_32x32_hi
inline uint32_t ATTR_CONST ATTR_FORCE_INLINE
_mulu_32x32_hi(uint32_t a, uint32_t b)
{
uint32_t result, temp;
__asm__ (
" mull %[b] ;"
: [result] "=d" (result) // result in edx
, [temp] "=a" (temp) // This is effectively a clobber
: [a] "a" (a) // 'a' should be in eax on entry
, [b] "rm" (b) // 'b' can be memory or register
: "cc" // Clobbers condition codes
);
return result;
}
// GCC can do a good job of this.
/*-------------------------------------------------
@ -241,21 +183,19 @@ _divu_64x32(uint64_t a, uint32_t b)
#define div_64x32_rem _div_64x32_rem
inline int32_t ATTR_FORCE_INLINE
_div_64x32_rem(int64_t dividend, int32_t divisor, int32_t *remainder)
_div_64x32_rem(int64_t dividend, int32_t divisor, int32_t &remainder)
{
int32_t quotient;
#ifndef __x86_64__
// Throws arithmetic exception if result doesn't fit in 32 bits
__asm__ (
" idivl %[divisor] ;"
: [result] "=a" (quotient) // quotient ends up in eax
, [remainder] "=d" (*remainder) // remainder ends up in edx
, [remainder] "=d" (remainder) // remainder ends up in edx
: [dividend] "A" (dividend) // 'dividend' in edx:eax
, [divisor] "rm" (divisor) // 'divisor' in register or memory
: "cc" // clobbers condition codes
);
#else
int32_t const divh{ int32_t(uint32_t(uint64_t(dividend) >> 32)) };
int32_t const divl{ int32_t(uint32_t(uint64_t(dividend))) };
@ -264,13 +204,12 @@ _div_64x32_rem(int64_t dividend, int32_t divisor, int32_t *remainder)
__asm__ (
" idivl %[divisor] ;"
: [result] "=a" (quotient) // quotient ends up in eax
, [remainder] "=d" (*remainder) // remainder ends up in edx
, [remainder] "=d" (remainder) // remainder ends up in edx
: [divl] "a" (divl) // 'dividend' in edx:eax
, [divh] "d" (divh)
, [divisor] "rm" (divisor) // 'divisor' in register or memory
: "cc" // clobbers condition codes
);
#endif
return quotient;
}
@ -284,21 +223,19 @@ _div_64x32_rem(int64_t dividend, int32_t divisor, int32_t *remainder)
#define divu_64x32_rem _divu_64x32_rem
inline uint32_t ATTR_FORCE_INLINE
_divu_64x32_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder)
_divu_64x32_rem(uint64_t dividend, uint32_t divisor, uint32_t &remainder)
{
uint32_t quotient;
#ifndef __x86_64__
// Throws arithmetic exception if result doesn't fit in 32 bits
__asm__ (
" divl %[divisor] ;"
: [result] "=a" (quotient) // quotient ends up in eax
, [remainder] "=d" (*remainder) // remainder ends up in edx
, [remainder] "=d" (remainder) // remainder ends up in edx
: [dividend] "A" (dividend) // 'dividend' in edx:eax
, [divisor] "rm" (divisor) // 'divisor' in register or memory
: "cc" // clobbers condition codes
);
#else
uint32_t const divh{ uint32_t(dividend >> 32) };
uint32_t const divl{ uint32_t(dividend) };
@ -307,7 +244,7 @@ _divu_64x32_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder)
__asm__ (
" divl %[divisor] ;"
: [result] "=a" (quotient) // quotient ends up in eax
, [remainder] "=d" (*remainder) // remainder ends up in edx
, [remainder] "=d" (remainder) // remainder ends up in edx
: [divl] "a" (divl) // 'dividend' in edx:eax
, [divh] "d" (divh)
, [divisor] "rm" (divisor) // 'divisor' in register or memory
@ -444,11 +381,11 @@ _modu_64x32(uint64_t a, uint32_t b)
#ifdef __SSE2__
#define recip_approx _recip_approx
inline float ATTR_CONST
inline float ATTR_CONST ATTR_FORCE_INLINE
_recip_approx(float value)
{
__m128 const value_xmm = _mm_set_ss(value);
__m128 const result_xmm = _mm_rcp_ss(value_xmm);
__m128 const value_xmm(_mm_set_ss(value));
__m128 const result_xmm(_mm_rcp_ss(value_xmm));
float result;
_mm_store_ss(&result, result_xmm);
return result;
@ -464,10 +401,10 @@ _recip_approx(float value)
#ifdef __x86_64__
#define mul_64x64 _mul_64x64
inline int64_t ATTR_FORCE_INLINE
_mul_64x64(int64_t a, int64_t b, int64_t *hi)
_mul_64x64(int64_t a, int64_t b, int64_t &hi)
{
__int128 const r(__int128(a) * b);
*hi = int64_t(uint64_t((unsigned __int128)r >> 64));
hi = int64_t(uint64_t((unsigned __int128)r >> 64));
return int64_t(uint64_t((unsigned __int128)r));
}
#endif
@ -481,10 +418,10 @@ _mul_64x64(int64_t a, int64_t b, int64_t *hi)
#ifdef __x86_64__
#define mulu_64x64 _mulu_64x64
inline uint64_t ATTR_FORCE_INLINE
_mulu_64x64(uint64_t a, uint64_t b, uint64_t *hi)
_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi)
{
unsigned __int128 const r((unsigned __int128)a * b);
*hi = uint64_t(r >> 64);
hi = uint64_t(r >> 64);
return uint64_t(r);
}
#endif

View File

@ -28,7 +28,7 @@
#ifndef count_leading_zeros
#define count_leading_zeros _count_leading_zeros
inline uint8_t _count_leading_zeros(uint32_t value)
__forceinline uint8_t _count_leading_zeros(uint32_t value)
{
unsigned long index;
return _BitScanReverse(&index, value) ? (31U - index) : 32U;
@ -43,7 +43,7 @@ inline uint8_t _count_leading_zeros(uint32_t value)
#ifndef count_leading_ones
#define count_leading_ones _count_leading_ones
inline uint8_t _count_leading_ones(uint32_t value)
__forceinline uint8_t _count_leading_ones(uint32_t value)
{
unsigned long index;
return _BitScanReverse(&index, ~value) ? (31U - index) : 32U;

View File

@ -15,9 +15,10 @@
#ifdef PTR64
#include <emmintrin.h>
#include <intrin.h>
#endif
#include <intrin.h>
/***************************************************************************
INLINE MATH FUNCTIONS
@ -30,7 +31,7 @@
#ifndef PTR64
#define mul_32x32 _mul_32x32
static inline int64_t _mul_32x32(int32_t a, int32_t b)
inline int64_t _mul_32x32(int32_t a, int32_t b)
{
// in theory this should work, but it is untested
__asm
@ -51,7 +52,7 @@ static inline int64_t _mul_32x32(int32_t a, int32_t b)
#ifndef PTR64
#define mulu_32x32 _mulu_32x32
static inline uint64_t _mulu_32x32(uint32_t a, uint32_t b)
inline uint64_t _mulu_32x32(uint32_t a, uint32_t b)
{
// in theory this should work, but it is untested
__asm
@ -72,7 +73,7 @@ static inline uint64_t _mulu_32x32(uint32_t a, uint32_t b)
#ifndef PTR64
#define mul_32x32_hi _mul_32x32_hi
static inline int32_t _mul_32x32_hi(int32_t a, int32_t b)
inline int32_t _mul_32x32_hi(int32_t a, int32_t b)
{
int32_t result;
@ -96,7 +97,7 @@ static inline int32_t _mul_32x32_hi(int32_t a, int32_t b)
#ifndef PTR64
#define mulu_32x32_hi _mulu_32x32_hi
static inline uint32_t _mulu_32x32_hi(uint32_t a, uint32_t b)
inline uint32_t _mulu_32x32_hi(uint32_t a, uint32_t b)
{
int32_t result;
@ -148,7 +149,7 @@ static inline int32_t _mul_32x32_shift(int32_t a, int32_t b, uint8_t shift)
#ifndef PTR64
#define mulu_32x32_shift _mulu_32x32_shift
static inline uint32_t _mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
inline uint32_t _mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
{
int32_t result;
@ -173,7 +174,7 @@ static inline uint32_t _mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
#ifndef PTR64
#define div_64x32 _div_64x32
static inline int32_t _div_64x32(int64_t a, int32_t b)
inline int32_t _div_64x32(int64_t a, int32_t b)
{
int32_t result;
int32_t alow = a;
@ -199,7 +200,7 @@ static inline int32_t _div_64x32(int64_t a, int32_t b)
#ifndef PTR64
#define divu_64x32 _divu_64x32
static inline uint32_t _divu_64x32(uint64_t a, uint32_t b)
inline uint32_t _divu_64x32(uint64_t a, uint32_t b)
{
uint32_t result;
uint32_t alow = a;
@ -226,7 +227,7 @@ static inline uint32_t _divu_64x32(uint64_t a, uint32_t b)
#ifndef PTR64
#define div_64x32_rem _div_64x32_rem
static inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t *remainder)
inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t &remainder)
{
int32_t result;
int32_t alow = a;
@ -242,7 +243,7 @@ static inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t *remainder)
mov rem,edx
}
*remainder = rem;
remainder = rem;
return result;
}
#endif
@ -256,7 +257,7 @@ static inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t *remainder)
#ifndef PTR64
#define divu_64x32_rem _divu_64x32_rem
static inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remainder)
inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t &remainder)
{
uint32_t result;
uint32_t alow = a;
@ -272,7 +273,7 @@ static inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remaind
mov rem,edx
}
*remainder = rem;
remainder = rem;
return result;
}
#endif
@ -286,7 +287,7 @@ static inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remaind
#ifndef PTR64
#define div_32x32_shift _div_32x32_shift
static inline int32_t _div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
inline int32_t _div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
{
int32_t result;
@ -314,7 +315,7 @@ static inline int32_t _div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
#ifndef PTR64
#define divu_32x32_shift _divu_32x32_shift
static inline uint32_t _divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
inline uint32_t _divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
{
uint32_t result;
@ -367,7 +368,7 @@ static inline int32_t _mod_64x32(int64_t a, int32_t b)
#ifndef PTR64
#define modu_64x32 _modu_64x32
static inline uint32_t _modu_64x32(uint64_t a, uint32_t b)
inline uint32_t _modu_64x32(uint64_t a, uint32_t b)
{
uint32_t result;
uint32_t alow = a;
@ -393,7 +394,7 @@ static inline uint32_t _modu_64x32(uint64_t a, uint32_t b)
#ifdef PTR64
#define recip_approx _recip_approx
static inline float _recip_approx(float z)
inline float _recip_approx(float z)
{
__m128 const mz = _mm_set_ss(z);
__m128 const mooz = _mm_rcp_ss(mz);
@ -410,7 +411,11 @@ static inline float _recip_approx(float z)
-------------------------------------------------*/
#ifdef PTR64
#define mul_64x64 _mul128
#define mul_64x64 _mul_64x64
__forceinline int64_t _mul_64x64(int64_t a, int64_t b, int64_t &hi)
{
return _mul128(a, b, &hi);
}
#endif
@ -420,7 +425,44 @@ static inline float _recip_approx(float z)
-------------------------------------------------*/
#ifdef PTR64
#define mulu_64x64 _umul128
#define mulu_64x64 _mulu_64x64
__forceinline int64_t _mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi)
{
return _umul128(a, b, &hi);
}
#endif
/*-------------------------------------------------
addu_32x32_co - perform an unsigned 32 bit + 32
bit addition and return the result with carry
out
-------------------------------------------------*/
#define addu_32x32_co _addu_32x32_co
__forceinline bool _addu_32x32_co(uint32_t a, uint32_t b, uint32_t &sum)
{
return _addcarry_u32(0, a, b, &sum);
}
/*-------------------------------------------------
addu_64x64_co - perform an unsigned 64 bit + 64
bit addition and return the result with carry
out
-------------------------------------------------*/
#define addu_64x64_co _addu_64x64_co
__forceinline bool _addu_64x64_co(uint64_t a, uint64_t b, uint64_t &sum)
{
#ifdef PTR64
return _addcarry_u64(0, a, b, &sum);
#else
uint32_t l, h;
bool const result = _addcarry_u32(_addcarry_u32(0, uint32_t(a), uint32_t(b), &l), uint32_t(a >> 32), uint32_t(b >> 32), &h);
sum = (uint64_t(h) << 32) | l;
return result;
#endif
}
#endif // MAME_OSD_EIVCX86_H

View File

@ -25,8 +25,8 @@
#include "eigccx86.h"
#elif defined(__ppc__) || defined (__PPC__) || defined(__ppc64__) || defined(__PPC64__)
#include "eigccppc.h"
#else
#error "no matching assembler implementations found - please compile with NOASM=1"
#elif defined(__arm__) || defined(__aarch64__)
#include "eigccarm.h"
#endif
#elif defined(_MSC_VER)
@ -37,10 +37,6 @@
#include "eivc.h"
#else
#error "no matching assembler implementations found - please compile with NOASM=1"
#endif
#endif // !defined(MAME_NOASM)
@ -56,7 +52,7 @@
-------------------------------------------------*/
#ifndef mul_32x32
inline int64_t mul_32x32(int32_t a, int32_t b)
constexpr int64_t mul_32x32(int32_t a, int32_t b)
{
return int64_t(a) * int64_t(b);
}
@ -70,7 +66,7 @@ inline int64_t mul_32x32(int32_t a, int32_t b)
-------------------------------------------------*/
#ifndef mulu_32x32
inline uint64_t mulu_32x32(uint32_t a, uint32_t b)
constexpr uint64_t mulu_32x32(uint32_t a, uint32_t b)
{
return uint64_t(a) * uint64_t(b);
}
@ -84,7 +80,7 @@ inline uint64_t mulu_32x32(uint32_t a, uint32_t b)
-------------------------------------------------*/
#ifndef mul_32x32_hi
inline int32_t mul_32x32_hi(int32_t a, int32_t b)
constexpr int32_t mul_32x32_hi(int32_t a, int32_t b)
{
return uint32_t((int64_t(a) * int64_t(b)) >> 32);
}
@ -98,7 +94,7 @@ inline int32_t mul_32x32_hi(int32_t a, int32_t b)
-------------------------------------------------*/
#ifndef mulu_32x32_hi
inline uint32_t mulu_32x32_hi(uint32_t a, uint32_t b)
constexpr uint32_t mulu_32x32_hi(uint32_t a, uint32_t b)
{
return uint32_t((uint64_t(a) * uint64_t(b)) >> 32);
}
@ -113,7 +109,7 @@ inline uint32_t mulu_32x32_hi(uint32_t a, uint32_t b)
-------------------------------------------------*/
#ifndef mul_32x32_shift
inline int32_t mul_32x32_shift(int32_t a, int32_t b, uint8_t shift)
constexpr int32_t mul_32x32_shift(int32_t a, int32_t b, uint8_t shift)
{
return int32_t((int64_t(a) * int64_t(b)) >> shift);
}
@ -128,7 +124,7 @@ inline int32_t mul_32x32_shift(int32_t a, int32_t b, uint8_t shift)
-------------------------------------------------*/
#ifndef mulu_32x32_shift
inline uint32_t mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
constexpr uint32_t mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
{
return uint32_t((uint64_t(a) * uint64_t(b)) >> shift);
}
@ -141,7 +137,7 @@ inline uint32_t mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
-------------------------------------------------*/
#ifndef div_64x32
inline int32_t div_64x32(int64_t a, int32_t b)
constexpr int32_t div_64x32(int64_t a, int32_t b)
{
return a / int64_t(b);
}
@ -154,7 +150,7 @@ inline int32_t div_64x32(int64_t a, int32_t b)
-------------------------------------------------*/
#ifndef divu_64x32
inline uint32_t divu_64x32(uint64_t a, uint32_t b)
constexpr uint32_t divu_64x32(uint64_t a, uint32_t b)
{
return a / uint64_t(b);
}
@ -168,10 +164,10 @@ inline uint32_t divu_64x32(uint64_t a, uint32_t b)
-------------------------------------------------*/
#ifndef div_64x32_rem
inline int32_t div_64x32_rem(int64_t a, int32_t b, int32_t *remainder)
inline int32_t div_64x32_rem(int64_t a, int32_t b, int32_t &remainder)
{
int32_t const res = div_64x32(a, b);
*remainder = a - (int64_t(b) * res);
int32_t const res(div_64x32(a, b));
remainder = a - (int64_t(b) * res);
return res;
}
#endif
@ -184,10 +180,10 @@ inline int32_t div_64x32_rem(int64_t a, int32_t b, int32_t *remainder)
-------------------------------------------------*/
#ifndef divu_64x32_rem
inline uint32_t divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remainder)
inline uint32_t divu_64x32_rem(uint64_t a, uint32_t b, uint32_t &remainder)
{
uint32_t const res = divu_64x32(a, b);
*remainder = a - (uint64_t(b) * res);
uint32_t const res(divu_64x32(a, b));
remainder = a - (uint64_t(b) * res);
return res;
}
#endif
@ -200,7 +196,7 @@ inline uint32_t divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remainder)
-------------------------------------------------*/
#ifndef div_32x32_shift
inline int32_t div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
constexpr int32_t div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
{
return (int64_t(a) << shift) / int64_t(b);
}
@ -214,7 +210,7 @@ inline int32_t div_32x32_shift(int32_t a, int32_t b, uint8_t shift)
-------------------------------------------------*/
#ifndef divu_32x32_shift
inline uint32_t divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
constexpr uint32_t divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
{
return (uint64_t(a) << shift) / uint64_t(b);
}
@ -227,7 +223,7 @@ inline uint32_t divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift)
-------------------------------------------------*/
#ifndef mod_64x32
inline int32_t mod_64x32(int64_t a, int32_t b)
constexpr int32_t mod_64x32(int64_t a, int32_t b)
{
return a - (b * div_64x32(a, b));
}
@ -240,7 +236,7 @@ inline int32_t mod_64x32(int64_t a, int32_t b)
-------------------------------------------------*/
#ifndef modu_64x32
inline uint32_t modu_64x32(uint64_t a, uint32_t b)
constexpr uint32_t modu_64x32(uint64_t a, uint32_t b)
{
return a - (b * divu_64x32(a, b));
}
@ -253,7 +249,7 @@ inline uint32_t modu_64x32(uint64_t a, uint32_t b)
-------------------------------------------------*/
#ifndef recip_approx
inline float recip_approx(float value)
constexpr float recip_approx(float value)
{
return 1.0f / value;
}
@ -266,7 +262,7 @@ inline float recip_approx(float value)
-------------------------------------------------*/
#ifndef mul_64x64
inline int64_t mul_64x64(int64_t a, int64_t b, int64_t *hi)
inline int64_t mul_64x64(int64_t a, int64_t b, int64_t &hi)
{
uint64_t const a_hi = uint64_t(a) >> 32;
uint64_t const b_hi = uint64_t(b) >> 32;
@ -279,13 +275,13 @@ inline int64_t mul_64x64(int64_t a, int64_t b, int64_t *hi)
uint64_t const ab_hi = a_hi * b_hi;
uint64_t const carry = ((ab_lo >> 32) + uint32_t(ab_m1) + uint32_t(ab_m2)) >> 32;
*hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry;
hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry;
// adjust for sign
if (a < 0)
*hi -= b;
hi -= b;
if (b < 0)
*hi -= a;
hi -= a;
return ab_lo + (ab_m1 << 32) + (ab_m2 << 32);
}
@ -298,7 +294,7 @@ inline int64_t mul_64x64(int64_t a, int64_t b, int64_t *hi)
-------------------------------------------------*/
#ifndef mulu_64x64
inline uint64_t mulu_64x64(uint64_t a, uint64_t b, uint64_t *hi)
inline uint64_t mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi)
{
uint64_t const a_hi = uint32_t(a >> 32);
uint64_t const b_hi = uint32_t(b >> 32);
@ -311,13 +307,51 @@ inline uint64_t mulu_64x64(uint64_t a, uint64_t b, uint64_t *hi)
uint64_t const ab_hi = a_hi * b_hi;
uint64_t const carry = ((ab_lo >> 32) + uint32_t(ab_m1) + uint32_t(ab_m2)) >> 32;
*hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry;
hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry;
return ab_lo + (ab_m1 << 32) + (ab_m2 << 32);
}
#endif
/*-------------------------------------------------
addu_32x32_co - perform an unsigned 32 bit + 32
bit addition and return the result with carry
out
-------------------------------------------------*/
#ifndef addu_32x32_co
inline bool addu_32x32_co(uint32_t a, uint32_t b, uint32_t &sum)
{
#if defined(__GNUC__)
return __builtin_add_overflow(a, b, &sum);
#else
sum = a + b;
return (a > sum) || (b > sum);
#endif
}
#endif
/*-------------------------------------------------
addu_64x64_co - perform an unsigned 64 bit + 64
bit addition and return the result with carry
out
-------------------------------------------------*/
#ifndef addu_64x64_co
inline bool addu_64x64_co(uint64_t a, uint64_t b, uint64_t &sum)
{
#if defined(__GNUC__)
return __builtin_add_overflow(a, b, &sum);
#else
sum = a + b;
return (a > sum) || (b > sum);
#endif
}
#endif
/***************************************************************************
INLINE BIT MANIPULATION FUNCTIONS
@ -360,12 +394,11 @@ inline uint8_t count_leading_ones(uint32_t val)
-------------------------------------------------*/
#ifndef population_count_32
#if defined(__NetBSD__)
#define population_count_32 popcount32
#else
inline unsigned population_count_32(uint32_t val)
{
#if defined(__GNUC__)
#if defined(__NetBSD__)
return popcount32(val);
#elif defined(__GNUC__)
// uses CPU feature if available, otherwise falls back to implementation similar to what follows
static_assert(sizeof(val) == sizeof(unsigned), "expected 32-bit unsigned int");
return unsigned(__builtin_popcount(static_cast<unsigned>(val)));
@ -382,7 +415,6 @@ inline unsigned population_count_32(uint32_t val)
#endif
}
#endif
#endif
/*-------------------------------------------------
@ -391,12 +423,11 @@ inline unsigned population_count_32(uint32_t val)
-------------------------------------------------*/
#ifndef population_count_64
#if defined(__NetBSD__)
#define population_count_64 popcount64
#else
inline unsigned population_count_64(uint64_t val)
{
#if defined(__GNUC__)
#if defined(__NetBSD__)
return popcount64(val);
#elif defined(__GNUC__)
// uses CPU feature if available, otherwise falls back to implementation similar to what follows
static_assert(sizeof(val) == sizeof(unsigned long long), "expected 64-bit unsigned long long int");
return unsigned(__builtin_popcountll(static_cast<unsigned long long>(val)));
@ -422,7 +453,6 @@ inline unsigned population_count_64(uint64_t val)
#endif
}
#endif
#endif
/***************************************************************************