diff --git a/makefile b/makefile index b30b921b352..96de8f7e69f 100644 --- a/makefile +++ b/makefile @@ -362,33 +362,6 @@ WINDRES := $(word 1,$(TOOLCHAIN) i686-w64-mingw32-)windres endif endif -ifeq ($(findstring arm,$(UNAME)),arm) -ARCHITECTURE := -ifndef NOASM - NOASM := 1 -endif -endif - -ifeq ($(findstring aarch64,$(UNAME)),aarch64) -ARCHITECTURE := -ifndef NOASM - NOASM := 1 -endif -endif - -ifeq ($(findstring s390x,$(UNAME)),s390x) -ifndef NOASM - NOASM := 1 -endif -endif - -ifeq ($(findstring riscv64,$(UNAME)),riscv64) -ARCHITECTURE := -ifndef NOASM - NOASM := 1 -endif -endif - # Emscripten ifeq ($(findstring emcc,$(CC)),emcc) TARGETOS := asmjs @@ -398,27 +371,42 @@ ifndef NOASM endif endif -# ppc has inline assembly support but no DRC ifeq ($(findstring ppc,$(UNAME)),ppc) ifndef FORCE_DRC_C_BACKEND FORCE_DRC_C_BACKEND := 1 endif endif -# powerpc has inline assembly support but no DRC ifeq ($(findstring powerpc,$(UNAME)),powerpc) ifndef FORCE_DRC_C_BACKEND - FORCE_DRC_C_BACKEND := 1 + FORCE_DRC_C_BACKEND := 1 endif endif -# ARM / ARM64 ifeq ($(findstring arm,$(UNAME)),arm) ifndef FORCE_DRC_C_BACKEND FORCE_DRC_C_BACKEND := 1 endif endif +ifeq ($(findstring aarch64,$(UNAME)),aarch64) +ifndef FORCE_DRC_C_BACKEND + FORCE_DRC_C_BACKEND := 1 +endif +endif + +ifeq ($(findstring s390x,$(UNAME)),s390x) +ifndef FORCE_DRC_C_BACKEND + FORCE_DRC_C_BACKEND := 1 +endif +endif + +ifeq ($(findstring riscv64,$(UNAME)),riscv64) +ifndef FORCE_DRC_C_BACKEND + FORCE_DRC_C_BACKEND := 1 +endif +endif + # Autodetect BIGENDIAN # MacOSX ifndef BIGENDIAN diff --git a/src/devices/cpu/alpha/alpha.cpp b/src/devices/cpu/alpha/alpha.cpp index 9ebfc6dd09e..0449a249844 100644 --- a/src/devices/cpu/alpha/alpha.cpp +++ b/src/devices/cpu/alpha/alpha.cpp @@ -398,14 +398,14 @@ void alpha_device::cpu_execute(u32 const op) // register variants case 0x00: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(m_r[Rb(op)]))); break; // mull case 0x20: m_r[Rc(op)] = m_r[Ra(op)] * m_r[Rb(op)]; break; // mulq - case 0x30: mulu_64x64(m_r[Ra(op)], m_r[Rb(op)], &m_r[Rc(op)]); break; // umulh + case 0x30: mulu_64x64(m_r[Ra(op)], m_r[Rb(op)], m_r[Rc(op)]); break; // umulh case 0x40: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(m_r[Rb(op)]))); break; // mull/v case 0x60: m_r[Rc(op)] = m_r[Ra(op)] * m_r[Rb(op)]; break; // mulq/v // immediate variants case 0x80: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(Im(op)))); break; // mull case 0xa0: m_r[Rc(op)] = m_r[Ra(op)] * Im(op); break; // mulq - case 0xb0: mulu_64x64(m_r[Ra(op)], Im(op), &m_r[Rc(op)]); break; // umulh + case 0xb0: mulu_64x64(m_r[Ra(op)], Im(op), m_r[Rc(op)]); break; // umulh case 0xc0: m_r[Rc(op)] = s64(s32(u32(m_r[Ra(op)]) * u32(Im(op)))); break; // mull/v case 0xe0: m_r[Rc(op)] = m_r[Ra(op)] * Im(op); break; // mulq/v } diff --git a/src/devices/cpu/drccache.cpp b/src/devices/cpu/drccache.cpp index e75305cc6ba..96d0b7800a3 100644 --- a/src/devices/cpu/drccache.cpp +++ b/src/devices/cpu/drccache.cpp @@ -14,10 +14,6 @@ #include -// this improves performance of some emulated systems but doesn't work on W^X hosts -//#define MAME_DRC_CACHE_RWX - - namespace { template constexpr T *ALIGN_PTR_UP(T *p, U align) @@ -52,7 +48,8 @@ drc_cache::drc_cache(size_t bytes) : m_end(m_limit), m_codegen(nullptr), m_size(m_cache.size()), - m_executable(false) + m_executable(false), + m_rwx(false) { // alignment and page size must be powers of two, cache must be page-aligned assert(!(CACHE_ALIGNMENT & (CACHE_ALIGNMENT - 1))); @@ -63,11 +60,24 @@ drc_cache::drc_cache(size_t bytes) : std::fill(std::begin(m_free), std::end(m_free), nullptr); std::fill(std::begin(m_nearfree), std::end(m_nearfree), nullptr); -#if defined(MAME_DRC_CACHE_RWX) - m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE | osd::virtual_memory_allocation::EXECUTE); -#else // defined(MAME_DRC_CACHE_RWX) - m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE); -#endif // defined(MAME_DRC_CACHE_RWX) + if (!m_cache) + { + throw emu_fatalerror("drc_cache: Error allocating virtual memory"); + } + else if (!m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE)) + { + throw emu_fatalerror("drc_cache: Error marking cache read/write"); + } + else if (m_cache.set_access(m_base - m_near, m_end - m_base, osd::virtual_memory_allocation::READ_WRITE | osd::virtual_memory_allocation::EXECUTE)) + { + osd_printf_verbose("drc_cache: RWX pages supported\n"); + m_rwx = true; + } + else + { + osd_printf_verbose("drc_cache: Using W^X mode\n"); + m_rwx = false; + } } @@ -209,9 +219,8 @@ void drc_cache::codegen_init() { if (m_executable) { -#if !defined(MAME_DRC_CACHE_RWX) - m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE); -#endif // !defined(MAME_DRC_CACHE_RWX) + if (!m_rwx) + m_cache.set_access(0, m_size, osd::virtual_memory_allocation::READ_WRITE); m_executable = false; } } @@ -221,9 +230,8 @@ void drc_cache::codegen_complete() { if (!m_executable) { -#if !defined(MAME_DRC_CACHE_RWX) - m_cache.set_access(m_base - m_near, ALIGN_PTR_UP(m_top, m_cache.page_size()) - m_base, osd::virtual_memory_allocation::READ_EXECUTE); -#endif // !defined(MAME_DRC_CACHE_RWX) + if (!m_rwx) + m_cache.set_access(m_base - m_near, ALIGN_PTR_UP(m_top, m_cache.page_size()) - m_base, osd::virtual_memory_allocation::READ_EXECUTE); m_executable = true; } } diff --git a/src/devices/cpu/drccache.h b/src/devices/cpu/drccache.h index b79e7d57d72..01adaafd981 100644 --- a/src/devices/cpu/drccache.h +++ b/src/devices/cpu/drccache.h @@ -94,6 +94,7 @@ private: drccodeptr m_codegen; // start of current generated code block size_t const m_size; // size of the cache in bytes bool m_executable; // whether cached code is currently executable + bool m_rwx; // whether pages can be simultaneously writable and executable // oob management struct oob_handler diff --git a/src/devices/cpu/mips/mips3.cpp b/src/devices/cpu/mips/mips3.cpp index 1d0efb3f756..f82ae82da50 100644 --- a/src/devices/cpu/mips/mips3.cpp +++ b/src/devices/cpu/mips/mips3.cpp @@ -3561,11 +3561,11 @@ void mips3_device::handle_special(uint32_t op) m_core->icount -= 35; break; case 0x1c: /* DMULT */ - LOVAL64 = mul_64x64(RSVAL64, RTVAL64, reinterpret_cast(&HIVAL64)); + LOVAL64 = mul_64x64(RSVAL64, RTVAL64, *reinterpret_cast(&HIVAL64)); m_core->icount -= 7; break; case 0x1d: /* DMULTU */ - LOVAL64 = mulu_64x64(RSVAL64, RTVAL64, &HIVAL64); + LOVAL64 = mulu_64x64(RSVAL64, RTVAL64, HIVAL64); m_core->icount -= 7; break; case 0x1e: /* DDIV */ diff --git a/src/devices/cpu/mips/r4000.cpp b/src/devices/cpu/mips/r4000.cpp index 802ab7d04f4..e0d5d8559a7 100644 --- a/src/devices/cpu/mips/r4000.cpp +++ b/src/devices/cpu/mips/r4000.cpp @@ -473,10 +473,10 @@ void r4000_base_device::cpu_execute(u32 const op) } break; case 0x1c: // DMULT - m_lo = mul_64x64(m_r[RSREG], m_r[RTREG], reinterpret_cast(&m_hi)); + m_lo = mul_64x64(m_r[RSREG], m_r[RTREG], *reinterpret_cast(&m_hi)); break; case 0x1d: // DMULTU - m_lo = mulu_64x64(m_r[RSREG], m_r[RTREG], &m_hi); + m_lo = mulu_64x64(m_r[RSREG], m_r[RTREG], m_hi); break; case 0x1e: // DDIV if (m_r[RTREG]) diff --git a/src/emu/attotime.cpp b/src/emu/attotime.cpp index c80219476ab..a1ba4d9ed61 100644 --- a/src/emu/attotime.cpp +++ b/src/emu/attotime.cpp @@ -40,17 +40,17 @@ attotime &attotime::operator*=(u32 factor) // split attoseconds into upper and lower halves which fit into 32 bits u32 attolo; - u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, &attolo); + u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, attolo); // scale the lower half, then split into high/low parts u64 temp = mulu_32x32(attolo, factor); u32 reslo; - temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, &reslo); + temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, reslo); // scale the upper half, then split into high/low parts temp += mulu_32x32(attohi, factor); u32 reshi; - temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, &reshi); + temp = divu_64x32_rem(temp, ATTOSECONDS_PER_SECOND_SQRT, reshi); // scale the seconds temp += mulu_32x32(m_seconds, factor); @@ -80,19 +80,19 @@ attotime &attotime::operator/=(u32 factor) // split attoseconds into upper and lower halves which fit into 32 bits u32 attolo; - u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, &attolo); + u32 attohi = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, attolo); // divide the seconds and get the remainder u32 remainder; - m_seconds = divu_64x32_rem(m_seconds, factor, &remainder); + m_seconds = divu_64x32_rem(m_seconds, factor, remainder); // combine the upper half of attoseconds with the remainder and divide that u64 temp = s64(attohi) + mulu_32x32(remainder, ATTOSECONDS_PER_SECOND_SQRT); - u32 reshi = divu_64x32_rem(temp, factor, &remainder); + u32 reshi = divu_64x32_rem(temp, factor, remainder); // combine the lower half of attoseconds with the remainder and divide that temp = attolo + mulu_32x32(remainder, ATTOSECONDS_PER_SECOND_SQRT); - u32 reslo = divu_64x32_rem(temp, factor, &remainder); + u32 reslo = divu_64x32_rem(temp, factor, remainder); // round based on the remainder m_attoseconds = (attoseconds_t)reslo + mulu_32x32(reshi, ATTOSECONDS_PER_SECOND_SQRT); @@ -142,7 +142,7 @@ const char *attotime::as_string(int precision) const else { u32 lower; - u32 upper = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, &lower); + u32 upper = divu_64x32_rem(m_attoseconds, ATTOSECONDS_PER_SECOND_SQRT, lower); int temp = precision; while (temp < 18) { diff --git a/src/emu/attotime.h b/src/emu/attotime.h index be75c48615c..c6fa441733c 100644 --- a/src/emu/attotime.h +++ b/src/emu/attotime.h @@ -357,7 +357,7 @@ inline attotime attotime::from_ticks(u64 ticks, u32 frequency) return attotime(0, ticks * attos_per_tick); u32 remainder; - s32 secs = divu_64x32_rem(ticks, frequency, &remainder); + s32 secs = divu_64x32_rem(ticks, frequency, remainder); return attotime(secs, u64(remainder) * attos_per_tick); } else diff --git a/src/emu/device.cpp b/src/emu/device.cpp index 60f406342c4..99f3fcc7ce5 100644 --- a/src/emu/device.cpp +++ b/src/emu/device.cpp @@ -437,7 +437,7 @@ attotime device_t::clocks_to_attotime(u64 numclocks) const noexcept else { u32 remainder; - u32 quotient = divu_64x32_rem(numclocks, m_clock, &remainder); + u32 quotient = divu_64x32_rem(numclocks, m_clock, remainder); return attotime(quotient, u64(remainder) * u64(m_attoseconds_per_clock)); } } diff --git a/src/emu/schedule.cpp b/src/emu/schedule.cpp index 28b98cdb91a..e32afb68f00 100644 --- a/src/emu/schedule.cpp +++ b/src/emu/schedule.cpp @@ -518,7 +518,7 @@ void device_scheduler::timeslice() else { u32 remainder; - s32 secs = divu_64x32_rem(ran, exec->m_cycles_per_second, &remainder); + s32 secs = divu_64x32_rem(ran, exec->m_cycles_per_second, remainder); deltatime = attotime(secs, u64(remainder) * exec->m_attoseconds_per_cycle); } assert(deltatime >= attotime::zero); diff --git a/src/emu/validity.cpp b/src/emu/validity.cpp index 84c29dab6c4..09758807e8e 100644 --- a/src/emu/validity.cpp +++ b/src/emu/validity.cpp @@ -475,13 +475,13 @@ void validity_checker::validate_inlines() if (resultu32 != expectedu32) osd_printf_error("Error testing divu_64x32 (%16X / %08X) = %08X (expected %08X)\n", u64(testu64a), u32(testu32a), resultu32, expectedu32); - resulti32 = div_64x32_rem(testi64a, testi32a, &remainder); + resulti32 = div_64x32_rem(testi64a, testi32a, remainder); expectedi32 = testi64a / s64(testi32a); expremainder = testi64a % s64(testi32a); if (resulti32 != expectedi32 || remainder != expremainder) osd_printf_error("Error testing div_64x32_rem (%16X / %08X) = %08X,%08X (expected %08X,%08X)\n", s64(testi64a), s32(testi32a), resulti32, remainder, expectedi32, expremainder); - resultu32 = divu_64x32_rem(testu64a, testu32a, &uremainder); + resultu32 = divu_64x32_rem(testu64a, testu32a, uremainder); expectedu32 = testu64a / u64(testu32a); expuremainder = testu64a % u64(testu32a); if (resultu32 != expectedu32 || uremainder != expuremainder) diff --git a/src/mame/drivers/dynax.cpp b/src/mame/drivers/dynax.cpp index a376f844544..212797eb82e 100644 --- a/src/mame/drivers/dynax.cpp +++ b/src/mame/drivers/dynax.cpp @@ -1712,8 +1712,7 @@ INPUT_PORTS_START( HANAFUDA_KEYS_BET ) PORT_BIT( 0x20, IP_ACTIVE_LOW, IPT_MAHJONG_SMALL ) PORT_PLAYER(2) // "s" INPUT_PORTS_END -#ifdef UNREFERENCED_CODE -static INPUT_PORTS_START( HANAFUDA_KEYS_BET_ALT ) +[[maybe_unused]] static INPUT_PORTS_START( HANAFUDA_KEYS_BET_ALT ) PORT_START("KEY0") PORT_BIT( 0x01, IP_ACTIVE_LOW, IPT_HANAFUDA_A ) PORT_PLAYER(1) PORT_BIT( 0x02, IP_ACTIVE_LOW, IPT_HANAFUDA_E ) PORT_PLAYER(1) @@ -1798,7 +1797,6 @@ static INPUT_PORTS_START( HANAFUDA_KEYS_BET_ALT ) PORT_BIT( 0x40, IP_ACTIVE_LOW, IPT_UNKNOWN ) PORT_BIT( 0x80, IP_ACTIVE_LOW, IPT_UNKNOWN ) INPUT_PORTS_END -#endif static INPUT_PORTS_START( cdracula ) PORT_START("P1") @@ -1961,9 +1959,9 @@ static INPUT_PORTS_START( hnkochou ) PORT_DIPNAME( 0x10, 0x10, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 1:5" ) PORT_DIPSETTING( 0x10, DEF_STR( Off ) ) PORT_DIPSETTING( 0x00, DEF_STR( On ) ) - PORT_DIPNAME( 0x20, 0x20, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 1:6" ) - PORT_DIPSETTING( 0x20, DEF_STR( Off ) ) - PORT_DIPSETTING( 0x00, DEF_STR( On ) ) + PORT_DIPNAME( 0x20, 0x20, "Gokou Odds" ) PORT_DIPLOCATION( "DIPSW 1:6" ) + PORT_DIPSETTING( 0x20, "100" ) + PORT_DIPSETTING( 0x00, "200" ) PORT_DIPNAME( 0x40, 0x40, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 1:7" ) PORT_DIPSETTING( 0x40, DEF_STR( Off ) ) PORT_DIPSETTING( 0x00, DEF_STR( On ) ) @@ -1972,16 +1970,15 @@ static INPUT_PORTS_START( hnkochou ) PORT_DIPSETTING( 0x00, DEF_STR( On ) ) PORT_START("DSW1") - PORT_DIPNAME( 0x01, 0x01, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:1" ) - PORT_DIPSETTING( 0x01, DEF_STR( Off ) ) - PORT_DIPSETTING( 0x00, DEF_STR( On ) ) - PORT_DIPNAME( 0x02, 0x02, "Stage Select" ) PORT_DIPLOCATION( "DIPSW 2:2" ) - PORT_DIPSETTING( 0x00, DEF_STR( No ) ) - PORT_DIPSETTING( 0x02, DEF_STR( Yes ) ) - PORT_DIPNAME( 0x04, 0x04, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:3" ) + PORT_DIPNAME( 0x03, 0x03, "Game Mode" ) PORT_DIPLOCATION( "DIPSW 2:1,2" ) + PORT_DIPSETTING( 0x03, "A (Stage Select)" ) // stage select, gal re-dresses if player loses + PORT_DIPSETTING( 0x02, "B" ) // no stage select, gal doesn't re-dress if player loses + PORT_DIPSETTING( 0x01, "C" ) // no stage select, gal re-dresses if player loses + PORT_DIPSETTING( 0x00, "D (Gals Off)" ) // no "show time" on win, gals still shown in attract mode + PORT_DIPNAME( 0x04, 0x04, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:3" ) // possibly difficulty/pay rate? PORT_DIPSETTING( 0x04, DEF_STR( Off ) ) PORT_DIPSETTING( 0x00, DEF_STR( On ) ) - PORT_DIPNAME( 0x08, 0x08, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:4" ) + PORT_DIPNAME( 0x08, 0x08, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:4" ) // possibly difficulty/pay rate? PORT_DIPSETTING( 0x08, DEF_STR( Off ) ) PORT_DIPSETTING( 0x00, DEF_STR( On ) ) PORT_DIPNAME( 0x10, 0x10, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:5" ) @@ -1990,9 +1987,9 @@ static INPUT_PORTS_START( hnkochou ) PORT_DIPNAME( 0x20, 0x20, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:6" ) PORT_DIPSETTING( 0x20, DEF_STR( Off ) ) PORT_DIPSETTING( 0x00, DEF_STR( On ) ) - PORT_DIPNAME( 0x40, 0x40, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:7" ) - PORT_DIPSETTING( 0x40, DEF_STR( Off ) ) - PORT_DIPSETTING( 0x00, DEF_STR( On ) ) + PORT_DIPNAME( 0x40, 0x40, "Suggest Move" ) PORT_DIPLOCATION( "DIPSW 2:7" ) + PORT_DIPSETTING( 0x00, DEF_STR( No ) ) + PORT_DIPSETTING( 0x40, DEF_STR( Yes ) ) PORT_DIPNAME( 0x80, 0x80, DEF_STR( Unknown ) ) PORT_DIPLOCATION( "DIPSW 2:8" ) PORT_DIPSETTING( 0x80, DEF_STR( Off ) ) PORT_DIPSETTING( 0x00, DEF_STR( On ) ) @@ -2170,10 +2167,10 @@ static INPUT_PORTS_START( hjingi ) PORT_DIPNAME( 0x10, 0x10, "Double-Up Game Rate" ) PORT_DIPLOCATION( "DIP2:5" ) PORT_DIPSETTING( 0x10, DEF_STR( High ) ) PORT_DIPSETTING( 0x00, DEF_STR( Low ) ) - PORT_DIPNAME( 0x20, 0x20, "GOKOU Odds" ) PORT_DIPLOCATION( "DIP2:6" ) + PORT_DIPNAME( 0x20, 0x20, "Gokou Odds" ) PORT_DIPLOCATION( "DIP2:6" ) PORT_DIPSETTING( 0x20, "100" ) PORT_DIPSETTING( 0x00, "200" ) - PORT_DIPNAME( 0x40, 0x40, "GOKOU Cut" ) PORT_DIPLOCATION( "DIP2:7" ) + PORT_DIPNAME( 0x40, 0x40, "Gokou Cut" ) PORT_DIPLOCATION( "DIP2:7" ) PORT_DIPSETTING( 0x00, DEF_STR( No ) ) PORT_DIPSETTING( 0x40, DEF_STR( Yes ) ) PORT_DIPNAME( 0x80, 0x80, "3-Renchan Bonus" ) PORT_DIPLOCATION( "DIP2:8" ) @@ -2623,7 +2620,7 @@ static INPUT_PORTS_START( hanayara ) PORT_DIPNAME( 0x20, 0x20, "Choose Bonus (Cheat)") PORT_DIPLOCATION( "DIP2:6" ) PORT_DIPSETTING( 0x20, DEF_STR( Off ) ) PORT_DIPSETTING( 0x00, DEF_STR( On ) ) - PORT_DIPNAME( 0x40, 0x40, "Unknown 2-6" ) PORT_DIPLOCATION( "DIP2:7" ) + PORT_DIPNAME( 0x40, 0x40, "Show All Bonus Cards") PORT_DIPLOCATION( "DIP2:7" ) PORT_DIPSETTING( 0x40, DEF_STR( Off ) ) PORT_DIPSETTING( 0x00, DEF_STR( On ) ) PORT_DIPNAME( 0x80, 0x80, DEF_STR( Service_Mode ) ) PORT_DIPLOCATION( "DIP2:8" ) diff --git a/src/mame/machine/kay_kbd.cpp b/src/mame/machine/kay_kbd.cpp index 0958c84170e..cde3e11ccb4 100644 --- a/src/mame/machine/kay_kbd.cpp +++ b/src/mame/machine/kay_kbd.cpp @@ -281,7 +281,7 @@ INPUT_PORTS_START(kaypro_keyboard_typewriter) PORT_BIT(0x04, IP_ACTIVE_LOW, IPT_KEYBOARD) PORT_CODE(KEYCODE_LSHIFT) PORT_CODE(KEYCODE_RSHIFT) PORT_CHAR(UCHAR_SHIFT_1) PORT_NAME("SHIFT") INPUT_PORTS_END -INPUT_PORTS_START(kaypro_keyboard_bitshift) +[[maybe_unused]] INPUT_PORTS_START(kaypro_keyboard_bitshift) PORT_INCLUDE(kaypro_keyboard_typewriter) PORT_MODIFY("ROW.2") @@ -366,7 +366,6 @@ void kaypro_10_keyboard_device::device_add_mconfig(machine_config &config) ioport_constructor kaypro_10_keyboard_device::device_input_ports() const { - (void)&INPUT_PORTS_NAME(kaypro_keyboard_bitshift); return INPUT_PORTS_NAME(kaypro_keyboard_typewriter); } diff --git a/src/osd/eigccarm.h b/src/osd/eigccarm.h new file mode 100644 index 00000000000..c6e25d7a95e --- /dev/null +++ b/src/osd/eigccarm.h @@ -0,0 +1,285 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb +/*************************************************************************** + + eigccarm.h + + ARM/AArch64 inline implementations for GCC compilers. This code is + automatically included if appropriate by eminline.h. + +***************************************************************************/ + +#ifndef MAME_OSD_EIGCCARM_H +#define MAME_OSD_EIGCCARM_H + + +/*************************************************************************** + INLINE MATH FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + mul_32x32 - perform a signed 32 bit x 32 bit + multiply and return the full 64 bit result +-------------------------------------------------*/ + +// GCC can do a good job of this. + + +/*------------------------------------------------- + mulu_32x32 - perform an unsigned 32 bit x + 32 bit multiply and return the full 64 bit + result +-------------------------------------------------*/ + +// GCC can do a good job of this + + +/*------------------------------------------------- + mul_32x32_hi - perform a signed 32 bit x 32 bit + multiply and return the upper 32 bits of the + result +-------------------------------------------------*/ + +// GCC can do a good job of this + + +/*------------------------------------------------- + mulu_32x32_hi - perform an unsigned 32 bit x + 32 bit multiply and return the upper 32 bits + of the result +-------------------------------------------------*/ + +// GCC can do a good job of this + + +/*------------------------------------------------- + mul_32x32_shift - perform a signed 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#if !defined(__aarch64__) +#define mul_32x32_shift _mul_32x32_shift +inline int32_t ATTR_CONST ATTR_FORCE_INLINE +_mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift) +{ + uint32_t l, h; + + __asm__ ( + " smull %[l], %[h], %[val1], %[val2] \n" + : [l] "=r" (l) + , [h] "=r" (h) + : [val1] "%r" (val1) + , [val2] "r" (val2) + ); + + // Valid for (0 <= shift <= 31) + return int32_t((l >> shift) | (h << (32 - shift))); +} +#endif + + +/*------------------------------------------------- + mulu_32x32_shift - perform an unsigned 32 bit x + 32 bit multiply and shift the result by the + given number of bits before truncating the + result to 32 bits +-------------------------------------------------*/ + +#if !defined(__aarch64__) +#define mulu_32x32_shift _mulu_32x32_shift +inline uint32_t ATTR_CONST ATTR_FORCE_INLINE +_mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) +{ + uint32_t l, h; + + __asm__ ( + " umull %[l], %[h], %[val1], %[val2] \n" + : [l] "=r" (l) + , [h] "=r" (h) + : [val1] "%r" (val1) + , [val2] "r" (val2) + ); + + // Valid for (0 <= shift <= 31) + return (l >> shift) | (h << (32 - shift)); +} +#endif + + +/*------------------------------------------------- + div_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + divu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + div_64x32_rem - perform a signed 64 bit x 32 + bit divide and return the 32 bit quotient and + 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + divu_64x32_rem - perform an unsigned 64 bit x + 32 bit divide and return the 32 bit quotient + and 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + div_32x32_shift - perform a signed divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + divu_32x32_shift - perform an unsigned divide of + two 32 bit values, shifting the first before + division, and returning the 32 bit quotient +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + mod_64x32 - perform a signed 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + modu_64x32 - perform an unsigned 64 bit x 32 bit + divide and return the 32 bit remainder +-------------------------------------------------*/ + +// TBD + + +/*------------------------------------------------- + recip_approx - compute an approximate floating + point reciprocal +-------------------------------------------------*/ + +#if defined(__aarch64__) +#define recip_approx _recip_approx +inline float ATTR_CONST ATTR_FORCE_INLINE +_recip_approx(float value) +{ + float result; + + __asm__ ( + " frecpe %s[result], %s[value] \n" + : [result] "=w" (result) + : [value] "w" (value) + ); + + return result; +} +#endif + + +/*------------------------------------------------- + mul_64x64 - perform a signed 64 bit x 64 bit + multiply and return the full 128 bit result +-------------------------------------------------*/ + +#ifdef __aarch64__ +#define mul_64x64 _mul_64x64 +inline int64_t ATTR_FORCE_INLINE +_mul_64x64(int64_t a, int64_t b, int64_t &hi) +{ + __int128 const r(__int128(a) * b); + hi = int64_t(uint64_t((unsigned __int128)r >> 64)); + return int64_t(uint64_t((unsigned __int128)r)); +} +#endif + + +/*------------------------------------------------- + mulu_64x64 - perform an unsigned 64 bit x 64 + bit multiply and return the full 128 bit result +-------------------------------------------------*/ + +#ifdef __aarch64__ +#define mulu_64x64 _mulu_64x64 +inline uint64_t ATTR_FORCE_INLINE +_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi) +{ + unsigned __int128 const r((unsigned __int128)a * b); + hi = uint64_t(r >> 64); + return uint64_t(r); +} +#endif + + + +/*************************************************************************** + INLINE BIT MANIPULATION FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + count_leading_zeros - return the number of + leading zero bits in a 32-bit value +-------------------------------------------------*/ + +#if defined(__aarch64__) +#define count_leading_zeros _count_leading_zeros +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_zeros(uint32_t value) +{ + uint32_t result; + + __asm__ ( + " clz %w[result], %w[value] \n" + : [result] "=r" (result) + : [value] "r" (value) + ); + + return result; +} + + +/*------------------------------------------------- + count_leading_ones - return the number of + leading one bits in a 32-bit value +-------------------------------------------------*/ + +#if defined(__aarch64__) +#define count_leading_ones _count_leading_ones +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +_count_leading_ones(uint32_t value) +{ + uint32_t result; + + __asm__ ( + " clz %w[result], %w[value] \n" + : [result] "=r" (result) + : [value] "r" (~value) + ); + + return result; +} +#endif + +#endif // MAME_OSD_EIGCCARM_H diff --git a/src/osd/eigccppc.h b/src/osd/eigccppc.h index 56a4c65d5b1..824657097b0 100644 --- a/src/osd/eigccppc.h +++ b/src/osd/eigccppc.h @@ -22,7 +22,7 @@ multiply and return the full 64 bit result -------------------------------------------------*/ -/* GCC can do a good job of this. */ +// GCC can do a good job of this. /*------------------------------------------------- @@ -31,7 +31,7 @@ result -------------------------------------------------*/ -/* GCC can do a good job of this */ +// GCC can do a good job of this /*------------------------------------------------- @@ -40,21 +40,7 @@ result -------------------------------------------------*/ -#define mul_32x32_hi _mul_32x32_hi -static inline int32_t ATTR_CONST ATTR_FORCE_INLINE -_mul_32x32_hi(int32_t val1, int32_t val2) -{ - int32_t result; - - __asm__ ( - " mulhw %[result], %[val1], %[val2] \n" - : [result] "=r" (result) - : [val1] "%r" (val1) - , [val2] "r" (val2) - ); - - return result; -} +// GCC can do a good job of this /*------------------------------------------------- @@ -63,21 +49,7 @@ _mul_32x32_hi(int32_t val1, int32_t val2) of the result -------------------------------------------------*/ -#define mulu_32x32_hi _mulu_32x32_hi -static inline uint32_t ATTR_CONST ATTR_FORCE_INLINE -_mulu_32x32_hi(uint32_t val1, uint32_t val2) -{ - uint32_t result; - - __asm__ ( - " mulhwu %[result], %[val1], %[val2] \n" - : [result] "=r" (result) - : [val1] "%r" (val1) - , [val2] "r" (val2) - ); - - return result; -} +// GCC can do a good job of this /*------------------------------------------------- @@ -89,27 +61,22 @@ _mulu_32x32_hi(uint32_t val1, uint32_t val2) #if !defined(__ppc64__) && !defined(__PPC64__) && !defined(_ARCH_PPC64) #define mul_32x32_shift _mul_32x32_shift -static inline int32_t ATTR_CONST ATTR_FORCE_INLINE +inline int32_t ATTR_CONST ATTR_FORCE_INLINE _mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift) { - int32_t result; + uint32_t l, h; - /* Valid for (0 <= shift <= 32) */ __asm__ ( - " mullw %[result], %[val1], %[val2] \n" - " mulhw %[val1], %[val1], %[val2] \n" - " srw %[result], %[result], %[shift] \n" - " subfic %[shift], %[shift], 0x20 \n" - " slw %[val1], %[val1], %[shift] \n" - " or %[result], %[result], %[val1] \n" - : [result] "=&r" (result) - , [shift] "+r" (shift) - , [val1] "+r" (val1) - : [val2] "r" (val2) - : "xer" + " mullw %[l], %[val1], %[val2] \n" + " mulhw %[h], %[val1], %[val2] \n" + : [l] "=&r" (l) + , [h] "=r" (h) + : [val1] "%r" (val1) + , [val2] "r" (val2) ); - return result; + // Valid for (0 <= shift <= 31) + return int32_t((l >> shift) | (h << (32 - shift))); } #endif @@ -123,27 +90,22 @@ _mul_32x32_shift(int32_t val1, int32_t val2, uint8_t shift) #if !defined(__ppc64__) && !defined(__PPC64__) && !defined(_ARCH_PPC64) #define mulu_32x32_shift _mulu_32x32_shift -static inline uint32_t ATTR_CONST ATTR_FORCE_INLINE +inline uint32_t ATTR_CONST ATTR_FORCE_INLINE _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) { - uint32_t result; + uint32_t l, h; - /* Valid for (0 <= shift <= 32) */ __asm__ ( - " mullw %[result], %[val1], %[val2] \n" - " mulhwu %[val1], %[val1], %[val2] \n" - " srw %[result], %[result], %[shift] \n" - " subfic %[shift], %[shift], 0x20 \n" - " slw %[val1], %[val1], %[shift] \n" - " or %[result], %[result], %[val1] \n" - : [result] "=&r" (result) - , [shift] "+r" (shift) - , [val1] "+r" (val1) - : [val2] "r" (val2) - : "xer" + " mullw %[l], %[val1], %[val2] \n" + " mulhwu %[h], %[val1], %[val2] \n" + : [l] "=&r" (l) + , [h] "=r" (h) + : [val1] "%r" (val1) + , [val2] "r" (val2) ); - return result; + // Valid for (0 <= shift <= 31) + return (l >> shift) | (h << (32 - shift)); } #endif @@ -153,7 +115,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) divide and return the 32 bit quotient -------------------------------------------------*/ -/* TBD */ +// TBD /*------------------------------------------------- @@ -161,7 +123,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) divide and return the 32 bit quotient -------------------------------------------------*/ -/* TBD */ +// TBD /*------------------------------------------------- @@ -170,7 +132,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) 32 bit remainder -------------------------------------------------*/ -/* TBD */ +// TBD /*------------------------------------------------- @@ -179,7 +141,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) and 32 bit remainder -------------------------------------------------*/ -/* TBD */ +// TBD /*------------------------------------------------- @@ -188,7 +150,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) division, and returning the 32 bit quotient -------------------------------------------------*/ -/* TBD */ +// TBD /*------------------------------------------------- @@ -197,7 +159,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) division, and returning the 32 bit quotient -------------------------------------------------*/ -/* TBD */ +// TBD /*------------------------------------------------- @@ -205,7 +167,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) divide and return the 32 bit remainder -------------------------------------------------*/ -/* TBD */ +// TBD /*------------------------------------------------- @@ -213,7 +175,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) divide and return the 32 bit remainder -------------------------------------------------*/ -/* TBD */ +// TBD /*------------------------------------------------- @@ -222,7 +184,7 @@ _mulu_32x32_shift(uint32_t val1, uint32_t val2, uint8_t shift) -------------------------------------------------*/ #define recip_approx _recip_approx -static inline float ATTR_CONST ATTR_FORCE_INLINE +inline float ATTR_CONST ATTR_FORCE_INLINE _recip_approx(float value) { float result; @@ -237,6 +199,40 @@ _recip_approx(float value) } +/*------------------------------------------------- + mul_64x64 - perform a signed 64 bit x 64 bit + multiply and return the full 128 bit result +-------------------------------------------------*/ + +#ifdef __ppc64__ +#define mul_64x64 _mul_64x64 +inline int64_t ATTR_FORCE_INLINE +_mul_64x64(int64_t a, int64_t b, int64_t &hi) +{ + __int128 const r(__int128(a) * b); + hi = int64_t(uint64_t((unsigned __int128)r >> 64)); + return int64_t(uint64_t((unsigned __int128)r)); +} +#endif + + +/*------------------------------------------------- + mulu_64x64 - perform an unsigned 64 bit x 64 + bit multiply and return the full 128 bit result +-------------------------------------------------*/ + +#ifdef __ppc64__ +#define mulu_64x64 _mulu_64x64 +inline uint64_t ATTR_FORCE_INLINE +_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi) +{ + unsigned __int128 const r((unsigned __int128)a * b); + hi = uint64_t(r >> 64); + return uint64_t(r); +} +#endif + + /*************************************************************************** INLINE BIT MANIPULATION FUNCTIONS @@ -248,15 +244,15 @@ _recip_approx(float value) -------------------------------------------------*/ #define count_leading_zeros _count_leading_zeros -static inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE _count_leading_zeros(uint32_t value) { uint32_t result; __asm__ ( " cntlzw %[result], %[value] \n" - : [result] "=r" (result) /* result can be in any register */ - : [value] "r" (value) /* 'value' can be in any register */ + : [result] "=r" (result) + : [value] "r" (value) ); return result; @@ -269,15 +265,15 @@ _count_leading_zeros(uint32_t value) -------------------------------------------------*/ #define count_leading_ones _count_leading_ones -static inline uint8_t ATTR_CONST ATTR_FORCE_INLINE +inline uint8_t ATTR_CONST ATTR_FORCE_INLINE _count_leading_ones(uint32_t value) { uint32_t result; __asm__ ( - " cntlzw %[result], %[result] \n" - : [result] "=r" (result) /* result can be in any register */ - : [value] "r" (~value) /* 'value' can be in any register */ + " cntlzw %[result], %[value] \n" + : [result] "=r" (result) + : [value] "r" (~value) ); return result; diff --git a/src/osd/eigccx86.h b/src/osd/eigccx86.h index c5ec0848e01..b3bbf7bd0ec 100644 --- a/src/osd/eigccx86.h +++ b/src/osd/eigccx86.h @@ -31,22 +31,7 @@ multiply and return the full 64 bit result -------------------------------------------------*/ -#ifndef __x86_64__ -#define mul_32x32 _mul_32x32 -inline int64_t ATTR_CONST ATTR_FORCE_INLINE -_mul_32x32(int32_t a, int32_t b) -{ - int64_t result; - __asm__ ( - " imull %[b] ;" - : [result] "=A" (result) // result in edx:eax - : [a] "%a" (a) // 'a' should also be in eax on entry - , [b] "rm" (b) // 'b' can be memory or register - : "cc" // Clobbers condition codes - ); - return result; -} -#endif +// GCC can do a good job of this. /*------------------------------------------------- @@ -55,22 +40,7 @@ _mul_32x32(int32_t a, int32_t b) result -------------------------------------------------*/ -#ifndef __x86_64__ -#define mulu_32x32 _mulu_32x32 -inline uint64_t ATTR_CONST ATTR_FORCE_INLINE -_mulu_32x32(uint32_t a, uint32_t b) -{ - uint64_t result; - __asm__ ( - " mull %[b] ;" - : [result] "=A" (result) // result in edx:eax - : [a] "%a" (a) // 'a' should also be in eax on entry - , [b] "rm" (b) // 'b' can be memory or register - : "cc" // Clobbers condition codes - ); - return result; -} -#endif +// GCC can do a good job of this. /*------------------------------------------------- @@ -79,21 +49,7 @@ _mulu_32x32(uint32_t a, uint32_t b) result -------------------------------------------------*/ -#define mul_32x32_hi _mul_32x32_hi -inline int32_t ATTR_CONST ATTR_FORCE_INLINE -_mul_32x32_hi(int32_t a, int32_t b) -{ - int32_t result, temp; - __asm__ ( - " imull %[b] ;" - : [result] "=d" (result) // result in edx - , [temp] "=a" (temp) // This is effectively a clobber - : [a] "a" (a) // 'a' should be in eax on entry - , [b] "rm" (b) // 'b' can be memory or register - : "cc" // Clobbers condition codes - ); - return result; -} +// GCC can do a good job of this. /*------------------------------------------------- @@ -102,21 +58,7 @@ _mul_32x32_hi(int32_t a, int32_t b) of the result -------------------------------------------------*/ -#define mulu_32x32_hi _mulu_32x32_hi -inline uint32_t ATTR_CONST ATTR_FORCE_INLINE -_mulu_32x32_hi(uint32_t a, uint32_t b) -{ - uint32_t result, temp; - __asm__ ( - " mull %[b] ;" - : [result] "=d" (result) // result in edx - , [temp] "=a" (temp) // This is effectively a clobber - : [a] "a" (a) // 'a' should be in eax on entry - , [b] "rm" (b) // 'b' can be memory or register - : "cc" // Clobbers condition codes - ); - return result; -} +// GCC can do a good job of this. /*------------------------------------------------- @@ -241,21 +183,19 @@ _divu_64x32(uint64_t a, uint32_t b) #define div_64x32_rem _div_64x32_rem inline int32_t ATTR_FORCE_INLINE -_div_64x32_rem(int64_t dividend, int32_t divisor, int32_t *remainder) +_div_64x32_rem(int64_t dividend, int32_t divisor, int32_t &remainder) { int32_t quotient; #ifndef __x86_64__ - // Throws arithmetic exception if result doesn't fit in 32 bits __asm__ ( " idivl %[divisor] ;" : [result] "=a" (quotient) // quotient ends up in eax - , [remainder] "=d" (*remainder) // remainder ends up in edx + , [remainder] "=d" (remainder) // remainder ends up in edx : [dividend] "A" (dividend) // 'dividend' in edx:eax , [divisor] "rm" (divisor) // 'divisor' in register or memory : "cc" // clobbers condition codes ); - #else int32_t const divh{ int32_t(uint32_t(uint64_t(dividend) >> 32)) }; int32_t const divl{ int32_t(uint32_t(uint64_t(dividend))) }; @@ -264,13 +204,12 @@ _div_64x32_rem(int64_t dividend, int32_t divisor, int32_t *remainder) __asm__ ( " idivl %[divisor] ;" : [result] "=a" (quotient) // quotient ends up in eax - , [remainder] "=d" (*remainder) // remainder ends up in edx + , [remainder] "=d" (remainder) // remainder ends up in edx : [divl] "a" (divl) // 'dividend' in edx:eax , [divh] "d" (divh) , [divisor] "rm" (divisor) // 'divisor' in register or memory : "cc" // clobbers condition codes ); - #endif return quotient; } @@ -284,21 +223,19 @@ _div_64x32_rem(int64_t dividend, int32_t divisor, int32_t *remainder) #define divu_64x32_rem _divu_64x32_rem inline uint32_t ATTR_FORCE_INLINE -_divu_64x32_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder) +_divu_64x32_rem(uint64_t dividend, uint32_t divisor, uint32_t &remainder) { uint32_t quotient; #ifndef __x86_64__ - // Throws arithmetic exception if result doesn't fit in 32 bits __asm__ ( " divl %[divisor] ;" : [result] "=a" (quotient) // quotient ends up in eax - , [remainder] "=d" (*remainder) // remainder ends up in edx + , [remainder] "=d" (remainder) // remainder ends up in edx : [dividend] "A" (dividend) // 'dividend' in edx:eax , [divisor] "rm" (divisor) // 'divisor' in register or memory : "cc" // clobbers condition codes ); - #else uint32_t const divh{ uint32_t(dividend >> 32) }; uint32_t const divl{ uint32_t(dividend) }; @@ -307,7 +244,7 @@ _divu_64x32_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder) __asm__ ( " divl %[divisor] ;" : [result] "=a" (quotient) // quotient ends up in eax - , [remainder] "=d" (*remainder) // remainder ends up in edx + , [remainder] "=d" (remainder) // remainder ends up in edx : [divl] "a" (divl) // 'dividend' in edx:eax , [divh] "d" (divh) , [divisor] "rm" (divisor) // 'divisor' in register or memory @@ -444,11 +381,11 @@ _modu_64x32(uint64_t a, uint32_t b) #ifdef __SSE2__ #define recip_approx _recip_approx -inline float ATTR_CONST +inline float ATTR_CONST ATTR_FORCE_INLINE _recip_approx(float value) { - __m128 const value_xmm = _mm_set_ss(value); - __m128 const result_xmm = _mm_rcp_ss(value_xmm); + __m128 const value_xmm(_mm_set_ss(value)); + __m128 const result_xmm(_mm_rcp_ss(value_xmm)); float result; _mm_store_ss(&result, result_xmm); return result; @@ -464,10 +401,10 @@ _recip_approx(float value) #ifdef __x86_64__ #define mul_64x64 _mul_64x64 inline int64_t ATTR_FORCE_INLINE -_mul_64x64(int64_t a, int64_t b, int64_t *hi) +_mul_64x64(int64_t a, int64_t b, int64_t &hi) { __int128 const r(__int128(a) * b); - *hi = int64_t(uint64_t((unsigned __int128)r >> 64)); + hi = int64_t(uint64_t((unsigned __int128)r >> 64)); return int64_t(uint64_t((unsigned __int128)r)); } #endif @@ -481,10 +418,10 @@ _mul_64x64(int64_t a, int64_t b, int64_t *hi) #ifdef __x86_64__ #define mulu_64x64 _mulu_64x64 inline uint64_t ATTR_FORCE_INLINE -_mulu_64x64(uint64_t a, uint64_t b, uint64_t *hi) +_mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi) { unsigned __int128 const r((unsigned __int128)a * b); - *hi = uint64_t(r >> 64); + hi = uint64_t(r >> 64); return uint64_t(r); } #endif diff --git a/src/osd/eivc.h b/src/osd/eivc.h index 3fa34f51b3c..56d1dcc3ab8 100644 --- a/src/osd/eivc.h +++ b/src/osd/eivc.h @@ -28,7 +28,7 @@ #ifndef count_leading_zeros #define count_leading_zeros _count_leading_zeros -inline uint8_t _count_leading_zeros(uint32_t value) +__forceinline uint8_t _count_leading_zeros(uint32_t value) { unsigned long index; return _BitScanReverse(&index, value) ? (31U - index) : 32U; @@ -43,7 +43,7 @@ inline uint8_t _count_leading_zeros(uint32_t value) #ifndef count_leading_ones #define count_leading_ones _count_leading_ones -inline uint8_t _count_leading_ones(uint32_t value) +__forceinline uint8_t _count_leading_ones(uint32_t value) { unsigned long index; return _BitScanReverse(&index, ~value) ? (31U - index) : 32U; diff --git a/src/osd/eivcx86.h b/src/osd/eivcx86.h index 3fd3e70948c..eb8811ad8bc 100644 --- a/src/osd/eivcx86.h +++ b/src/osd/eivcx86.h @@ -15,9 +15,10 @@ #ifdef PTR64 #include -#include #endif +#include + /*************************************************************************** INLINE MATH FUNCTIONS @@ -30,7 +31,7 @@ #ifndef PTR64 #define mul_32x32 _mul_32x32 -static inline int64_t _mul_32x32(int32_t a, int32_t b) +inline int64_t _mul_32x32(int32_t a, int32_t b) { // in theory this should work, but it is untested __asm @@ -51,7 +52,7 @@ static inline int64_t _mul_32x32(int32_t a, int32_t b) #ifndef PTR64 #define mulu_32x32 _mulu_32x32 -static inline uint64_t _mulu_32x32(uint32_t a, uint32_t b) +inline uint64_t _mulu_32x32(uint32_t a, uint32_t b) { // in theory this should work, but it is untested __asm @@ -72,7 +73,7 @@ static inline uint64_t _mulu_32x32(uint32_t a, uint32_t b) #ifndef PTR64 #define mul_32x32_hi _mul_32x32_hi -static inline int32_t _mul_32x32_hi(int32_t a, int32_t b) +inline int32_t _mul_32x32_hi(int32_t a, int32_t b) { int32_t result; @@ -96,7 +97,7 @@ static inline int32_t _mul_32x32_hi(int32_t a, int32_t b) #ifndef PTR64 #define mulu_32x32_hi _mulu_32x32_hi -static inline uint32_t _mulu_32x32_hi(uint32_t a, uint32_t b) +inline uint32_t _mulu_32x32_hi(uint32_t a, uint32_t b) { int32_t result; @@ -148,7 +149,7 @@ static inline int32_t _mul_32x32_shift(int32_t a, int32_t b, uint8_t shift) #ifndef PTR64 #define mulu_32x32_shift _mulu_32x32_shift -static inline uint32_t _mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) +inline uint32_t _mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) { int32_t result; @@ -173,7 +174,7 @@ static inline uint32_t _mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) #ifndef PTR64 #define div_64x32 _div_64x32 -static inline int32_t _div_64x32(int64_t a, int32_t b) +inline int32_t _div_64x32(int64_t a, int32_t b) { int32_t result; int32_t alow = a; @@ -199,7 +200,7 @@ static inline int32_t _div_64x32(int64_t a, int32_t b) #ifndef PTR64 #define divu_64x32 _divu_64x32 -static inline uint32_t _divu_64x32(uint64_t a, uint32_t b) +inline uint32_t _divu_64x32(uint64_t a, uint32_t b) { uint32_t result; uint32_t alow = a; @@ -226,7 +227,7 @@ static inline uint32_t _divu_64x32(uint64_t a, uint32_t b) #ifndef PTR64 #define div_64x32_rem _div_64x32_rem -static inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t *remainder) +inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t &remainder) { int32_t result; int32_t alow = a; @@ -242,7 +243,7 @@ static inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t *remainder) mov rem,edx } - *remainder = rem; + remainder = rem; return result; } #endif @@ -256,7 +257,7 @@ static inline int32_t _div_64x32_rem(int64_t a, int32_t b, int32_t *remainder) #ifndef PTR64 #define divu_64x32_rem _divu_64x32_rem -static inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remainder) +inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t &remainder) { uint32_t result; uint32_t alow = a; @@ -272,7 +273,7 @@ static inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remaind mov rem,edx } - *remainder = rem; + remainder = rem; return result; } #endif @@ -286,7 +287,7 @@ static inline uint32_t _divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remaind #ifndef PTR64 #define div_32x32_shift _div_32x32_shift -static inline int32_t _div_32x32_shift(int32_t a, int32_t b, uint8_t shift) +inline int32_t _div_32x32_shift(int32_t a, int32_t b, uint8_t shift) { int32_t result; @@ -314,7 +315,7 @@ static inline int32_t _div_32x32_shift(int32_t a, int32_t b, uint8_t shift) #ifndef PTR64 #define divu_32x32_shift _divu_32x32_shift -static inline uint32_t _divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) +inline uint32_t _divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) { uint32_t result; @@ -367,7 +368,7 @@ static inline int32_t _mod_64x32(int64_t a, int32_t b) #ifndef PTR64 #define modu_64x32 _modu_64x32 -static inline uint32_t _modu_64x32(uint64_t a, uint32_t b) +inline uint32_t _modu_64x32(uint64_t a, uint32_t b) { uint32_t result; uint32_t alow = a; @@ -393,7 +394,7 @@ static inline uint32_t _modu_64x32(uint64_t a, uint32_t b) #ifdef PTR64 #define recip_approx _recip_approx -static inline float _recip_approx(float z) +inline float _recip_approx(float z) { __m128 const mz = _mm_set_ss(z); __m128 const mooz = _mm_rcp_ss(mz); @@ -410,7 +411,11 @@ static inline float _recip_approx(float z) -------------------------------------------------*/ #ifdef PTR64 -#define mul_64x64 _mul128 +#define mul_64x64 _mul_64x64 +__forceinline int64_t _mul_64x64(int64_t a, int64_t b, int64_t &hi) +{ + return _mul128(a, b, &hi); +} #endif @@ -420,7 +425,44 @@ static inline float _recip_approx(float z) -------------------------------------------------*/ #ifdef PTR64 -#define mulu_64x64 _umul128 +#define mulu_64x64 _mulu_64x64 +__forceinline int64_t _mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi) +{ + return _umul128(a, b, &hi); +} #endif + +/*------------------------------------------------- + addu_32x32_co - perform an unsigned 32 bit + 32 + bit addition and return the result with carry + out +-------------------------------------------------*/ + +#define addu_32x32_co _addu_32x32_co +__forceinline bool _addu_32x32_co(uint32_t a, uint32_t b, uint32_t &sum) +{ + return _addcarry_u32(0, a, b, &sum); +} + + +/*------------------------------------------------- + addu_64x64_co - perform an unsigned 64 bit + 64 + bit addition and return the result with carry + out +-------------------------------------------------*/ + +#define addu_64x64_co _addu_64x64_co +__forceinline bool _addu_64x64_co(uint64_t a, uint64_t b, uint64_t &sum) +{ +#ifdef PTR64 + return _addcarry_u64(0, a, b, &sum); +#else + uint32_t l, h; + bool const result = _addcarry_u32(_addcarry_u32(0, uint32_t(a), uint32_t(b), &l), uint32_t(a >> 32), uint32_t(b >> 32), &h); + sum = (uint64_t(h) << 32) | l; + return result; +#endif +} + #endif // MAME_OSD_EIVCX86_H diff --git a/src/osd/eminline.h b/src/osd/eminline.h index c217124bd3e..0eef3d7cb0c 100644 --- a/src/osd/eminline.h +++ b/src/osd/eminline.h @@ -25,8 +25,8 @@ #include "eigccx86.h" #elif defined(__ppc__) || defined (__PPC__) || defined(__ppc64__) || defined(__PPC64__) #include "eigccppc.h" -#else -#error "no matching assembler implementations found - please compile with NOASM=1" +#elif defined(__arm__) || defined(__aarch64__) +#include "eigccarm.h" #endif #elif defined(_MSC_VER) @@ -37,10 +37,6 @@ #include "eivc.h" -#else - -#error "no matching assembler implementations found - please compile with NOASM=1" - #endif #endif // !defined(MAME_NOASM) @@ -56,7 +52,7 @@ -------------------------------------------------*/ #ifndef mul_32x32 -inline int64_t mul_32x32(int32_t a, int32_t b) +constexpr int64_t mul_32x32(int32_t a, int32_t b) { return int64_t(a) * int64_t(b); } @@ -70,7 +66,7 @@ inline int64_t mul_32x32(int32_t a, int32_t b) -------------------------------------------------*/ #ifndef mulu_32x32 -inline uint64_t mulu_32x32(uint32_t a, uint32_t b) +constexpr uint64_t mulu_32x32(uint32_t a, uint32_t b) { return uint64_t(a) * uint64_t(b); } @@ -84,7 +80,7 @@ inline uint64_t mulu_32x32(uint32_t a, uint32_t b) -------------------------------------------------*/ #ifndef mul_32x32_hi -inline int32_t mul_32x32_hi(int32_t a, int32_t b) +constexpr int32_t mul_32x32_hi(int32_t a, int32_t b) { return uint32_t((int64_t(a) * int64_t(b)) >> 32); } @@ -98,7 +94,7 @@ inline int32_t mul_32x32_hi(int32_t a, int32_t b) -------------------------------------------------*/ #ifndef mulu_32x32_hi -inline uint32_t mulu_32x32_hi(uint32_t a, uint32_t b) +constexpr uint32_t mulu_32x32_hi(uint32_t a, uint32_t b) { return uint32_t((uint64_t(a) * uint64_t(b)) >> 32); } @@ -113,7 +109,7 @@ inline uint32_t mulu_32x32_hi(uint32_t a, uint32_t b) -------------------------------------------------*/ #ifndef mul_32x32_shift -inline int32_t mul_32x32_shift(int32_t a, int32_t b, uint8_t shift) +constexpr int32_t mul_32x32_shift(int32_t a, int32_t b, uint8_t shift) { return int32_t((int64_t(a) * int64_t(b)) >> shift); } @@ -128,7 +124,7 @@ inline int32_t mul_32x32_shift(int32_t a, int32_t b, uint8_t shift) -------------------------------------------------*/ #ifndef mulu_32x32_shift -inline uint32_t mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) +constexpr uint32_t mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) { return uint32_t((uint64_t(a) * uint64_t(b)) >> shift); } @@ -141,7 +137,7 @@ inline uint32_t mulu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) -------------------------------------------------*/ #ifndef div_64x32 -inline int32_t div_64x32(int64_t a, int32_t b) +constexpr int32_t div_64x32(int64_t a, int32_t b) { return a / int64_t(b); } @@ -154,7 +150,7 @@ inline int32_t div_64x32(int64_t a, int32_t b) -------------------------------------------------*/ #ifndef divu_64x32 -inline uint32_t divu_64x32(uint64_t a, uint32_t b) +constexpr uint32_t divu_64x32(uint64_t a, uint32_t b) { return a / uint64_t(b); } @@ -168,10 +164,10 @@ inline uint32_t divu_64x32(uint64_t a, uint32_t b) -------------------------------------------------*/ #ifndef div_64x32_rem -inline int32_t div_64x32_rem(int64_t a, int32_t b, int32_t *remainder) +inline int32_t div_64x32_rem(int64_t a, int32_t b, int32_t &remainder) { - int32_t const res = div_64x32(a, b); - *remainder = a - (int64_t(b) * res); + int32_t const res(div_64x32(a, b)); + remainder = a - (int64_t(b) * res); return res; } #endif @@ -184,10 +180,10 @@ inline int32_t div_64x32_rem(int64_t a, int32_t b, int32_t *remainder) -------------------------------------------------*/ #ifndef divu_64x32_rem -inline uint32_t divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remainder) +inline uint32_t divu_64x32_rem(uint64_t a, uint32_t b, uint32_t &remainder) { - uint32_t const res = divu_64x32(a, b); - *remainder = a - (uint64_t(b) * res); + uint32_t const res(divu_64x32(a, b)); + remainder = a - (uint64_t(b) * res); return res; } #endif @@ -200,7 +196,7 @@ inline uint32_t divu_64x32_rem(uint64_t a, uint32_t b, uint32_t *remainder) -------------------------------------------------*/ #ifndef div_32x32_shift -inline int32_t div_32x32_shift(int32_t a, int32_t b, uint8_t shift) +constexpr int32_t div_32x32_shift(int32_t a, int32_t b, uint8_t shift) { return (int64_t(a) << shift) / int64_t(b); } @@ -214,7 +210,7 @@ inline int32_t div_32x32_shift(int32_t a, int32_t b, uint8_t shift) -------------------------------------------------*/ #ifndef divu_32x32_shift -inline uint32_t divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) +constexpr uint32_t divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) { return (uint64_t(a) << shift) / uint64_t(b); } @@ -227,7 +223,7 @@ inline uint32_t divu_32x32_shift(uint32_t a, uint32_t b, uint8_t shift) -------------------------------------------------*/ #ifndef mod_64x32 -inline int32_t mod_64x32(int64_t a, int32_t b) +constexpr int32_t mod_64x32(int64_t a, int32_t b) { return a - (b * div_64x32(a, b)); } @@ -240,7 +236,7 @@ inline int32_t mod_64x32(int64_t a, int32_t b) -------------------------------------------------*/ #ifndef modu_64x32 -inline uint32_t modu_64x32(uint64_t a, uint32_t b) +constexpr uint32_t modu_64x32(uint64_t a, uint32_t b) { return a - (b * divu_64x32(a, b)); } @@ -253,7 +249,7 @@ inline uint32_t modu_64x32(uint64_t a, uint32_t b) -------------------------------------------------*/ #ifndef recip_approx -inline float recip_approx(float value) +constexpr float recip_approx(float value) { return 1.0f / value; } @@ -266,7 +262,7 @@ inline float recip_approx(float value) -------------------------------------------------*/ #ifndef mul_64x64 -inline int64_t mul_64x64(int64_t a, int64_t b, int64_t *hi) +inline int64_t mul_64x64(int64_t a, int64_t b, int64_t &hi) { uint64_t const a_hi = uint64_t(a) >> 32; uint64_t const b_hi = uint64_t(b) >> 32; @@ -279,13 +275,13 @@ inline int64_t mul_64x64(int64_t a, int64_t b, int64_t *hi) uint64_t const ab_hi = a_hi * b_hi; uint64_t const carry = ((ab_lo >> 32) + uint32_t(ab_m1) + uint32_t(ab_m2)) >> 32; - *hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry; + hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry; // adjust for sign if (a < 0) - *hi -= b; + hi -= b; if (b < 0) - *hi -= a; + hi -= a; return ab_lo + (ab_m1 << 32) + (ab_m2 << 32); } @@ -298,7 +294,7 @@ inline int64_t mul_64x64(int64_t a, int64_t b, int64_t *hi) -------------------------------------------------*/ #ifndef mulu_64x64 -inline uint64_t mulu_64x64(uint64_t a, uint64_t b, uint64_t *hi) +inline uint64_t mulu_64x64(uint64_t a, uint64_t b, uint64_t &hi) { uint64_t const a_hi = uint32_t(a >> 32); uint64_t const b_hi = uint32_t(b >> 32); @@ -311,13 +307,51 @@ inline uint64_t mulu_64x64(uint64_t a, uint64_t b, uint64_t *hi) uint64_t const ab_hi = a_hi * b_hi; uint64_t const carry = ((ab_lo >> 32) + uint32_t(ab_m1) + uint32_t(ab_m2)) >> 32; - *hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry; + hi = ab_hi + (ab_m1 >> 32) + (ab_m2 >> 32) + carry; return ab_lo + (ab_m1 << 32) + (ab_m2 << 32); } #endif +/*------------------------------------------------- + addu_32x32_co - perform an unsigned 32 bit + 32 + bit addition and return the result with carry + out +-------------------------------------------------*/ + +#ifndef addu_32x32_co +inline bool addu_32x32_co(uint32_t a, uint32_t b, uint32_t &sum) +{ +#if defined(__GNUC__) + return __builtin_add_overflow(a, b, &sum); +#else + sum = a + b; + return (a > sum) || (b > sum); +#endif +} +#endif + + +/*------------------------------------------------- + addu_64x64_co - perform an unsigned 64 bit + 64 + bit addition and return the result with carry + out +-------------------------------------------------*/ + +#ifndef addu_64x64_co +inline bool addu_64x64_co(uint64_t a, uint64_t b, uint64_t &sum) +{ +#if defined(__GNUC__) + return __builtin_add_overflow(a, b, &sum); +#else + sum = a + b; + return (a > sum) || (b > sum); +#endif +} +#endif + + /*************************************************************************** INLINE BIT MANIPULATION FUNCTIONS @@ -360,12 +394,11 @@ inline uint8_t count_leading_ones(uint32_t val) -------------------------------------------------*/ #ifndef population_count_32 -#if defined(__NetBSD__) -#define population_count_32 popcount32 -#else inline unsigned population_count_32(uint32_t val) { -#if defined(__GNUC__) +#if defined(__NetBSD__) + return popcount32(val); +#elif defined(__GNUC__) // uses CPU feature if available, otherwise falls back to implementation similar to what follows static_assert(sizeof(val) == sizeof(unsigned), "expected 32-bit unsigned int"); return unsigned(__builtin_popcount(static_cast(val))); @@ -382,7 +415,6 @@ inline unsigned population_count_32(uint32_t val) #endif } #endif -#endif /*------------------------------------------------- @@ -391,12 +423,11 @@ inline unsigned population_count_32(uint32_t val) -------------------------------------------------*/ #ifndef population_count_64 -#if defined(__NetBSD__) -#define population_count_64 popcount64 -#else inline unsigned population_count_64(uint64_t val) { -#if defined(__GNUC__) +#if defined(__NetBSD__) + return popcount64(val); +#elif defined(__GNUC__) // uses CPU feature if available, otherwise falls back to implementation similar to what follows static_assert(sizeof(val) == sizeof(unsigned long long), "expected 64-bit unsigned long long int"); return unsigned(__builtin_popcountll(static_cast(val))); @@ -422,7 +453,6 @@ inline unsigned population_count_64(uint64_t val) #endif } #endif -#endif /***************************************************************************