-cpu/drcbearm64.cpp: More fixes and optimisations:

* Correctly identify valid immediate constants for add/sub/cmp (it was
  too conservative).
* Don't unnecessarily copy UML register values kept in host registers
  for CMP.
* Fixed detection of TST against immediate zero and optimised generated
  code.
* Optimised TST against immediate with all bits set.

-cpu/alto2: Follow the same pattern as the other things that have been
 altered to avoid problematic memsets in this device.

-cpu/powerpc: Realigned some comments that had drifted.
This commit is contained in:
Vas Crabb 2025-03-14 14:18:43 +11:00
parent 14d11c479e
commit e75c7079cf
4 changed files with 81 additions and 67 deletions

View File

@ -499,7 +499,7 @@ void alto2_cpu_device::f2_late_evenfield()
*/
void alto2_cpu_device::init_disp()
{
m_dsp.clear();
m_dsp = decltype(m_dsp)();
save_item(NAME(m_dsp.state));
save_item(NAME(m_dsp.hlc));
save_item(NAME(m_dsp.setmode));

View File

@ -193,35 +193,24 @@
#ifndef MAME_CPU_ALTO2_A2DISP_H
#define MAME_CPU_ALTO2_A2DISP_H
struct {
void clear()
{
state = hlc = setmode = inverse = scanline = 0;
halfclock = vblank = false;
std::fill(std::begin(fifo), std::end(fifo), 0);
wa = ra = a63 = a66 = 0;
dht_blocks = dwt_blocks = curt_blocks = curt_wakeup = false;
xpreg = csr = 0;
}
uint32_t state; //!< current state of the display_state_machine()
uint32_t hlc; //!< horizontal line counter
uint32_t setmode; //!< value written by last SETMODE<-
uint32_t inverse; //!< set to 0xffff if line is inverse, 0x0000 otherwise
uint32_t scanline; //!< current scanline
bool halfclock; //!< false for normal pixel clock, true for half pixel clock
bool vblank; //!< true during vblank, false otherwise
uint16_t fifo[A2_DISP_FIFO]; //!< display word fifo
uint32_t wa; //!< fifo input pointer (write address; 4-bit)
uint32_t ra; //!< fifo output pointer (read address; 4-bit)
uint32_t a63; //!< most recent value read from the PROM a63
uint32_t a66; //!< most recent value read from the PROM a66
bool dht_blocks; //!< set true, if the DHT executed BLOCK
bool dwt_blocks; //!< set true, if the DWT executed BLOCK
bool curt_blocks; //!< set true, if the CURT executed BLOCK
bool curt_wakeup; //!< set true, if CURT wakeups are generated
uint32_t xpreg; //!< cursor cursor x position register (10-bit)
uint32_t csr; //!< cursor shift register (16-bit)
uint32_t state = 0; //!< current state of the display_state_machine()
uint32_t hlc = 0; //!< horizontal line counter
uint32_t setmode = 0; //!< value written by last SETMODE<-
uint32_t inverse = 0; //!< set to 0xffff if line is inverse, 0x0000 otherwise
uint32_t scanline = 0; //!< current scanline
bool halfclock = false; //!< false for normal pixel clock, true for half pixel clock
bool vblank = false; //!< true during vblank, false otherwise
uint16_t fifo[A2_DISP_FIFO] = { }; //!< display word fifo
uint32_t wa = 0; //!< fifo input pointer (write address; 4-bit)
uint32_t ra = 0; //!< fifo output pointer (read address; 4-bit)
uint32_t a63 = 0; //!< most recent value read from the PROM a63
uint32_t a66 = 0; //!< most recent value read from the PROM a66
bool dht_blocks = false; //!< set true, if the DHT executed BLOCK
bool dwt_blocks = false; //!< set true, if the DWT executed BLOCK
bool curt_blocks = false; //!< set true, if the CURT executed BLOCK
bool curt_wakeup = false; //!< set true, if CURT wakeups are generated
uint32_t xpreg = 0; //!< cursor cursor x position register (10-bit)
uint32_t csr = 0; //!< cursor shift register (16-bit)
std::unique_ptr<uint16_t[]> framebuf; //!< array of words of the raw bitmap that is displayed
std::unique_ptr<uint8_t[]> patterns; //!< array of 65536 patterns (16 bytes) with 1 byte per pixel
std::unique_ptr<bitmap_ind16> bitmap; //!< MAME bitmap with 16 bit indices

View File

@ -251,6 +251,12 @@ inline bool is_valid_immediate(uint64_t val, size_t bits)
return val < (uint64_t(1) << bits);
}
constexpr bool is_valid_immediate_addsub(uint64_t val)
{
// 12-bit unsigned immediate value, optionally left-shifted by 12 bits
return !(val & ~util::make_bitmask<uint64_t>(12)) || !(val & ~(util::make_bitmask<uint64_t>(12) << 12));
}
inline constexpr bool is_valid_immediate_signed(int64_t val, size_t bits)
{
return util::sext(val, bits) == val;
@ -752,18 +758,20 @@ a64::Vec drcbe_arm64::be_parameter::select_register(a64::Vec const &reg, uint32_
{
if (m_type == PTYPE_FLOAT_REGISTER)
return get_register_float(regsize);
if (regsize == 4)
else if (regsize == 4)
return reg.s();
return reg.d();
else
return reg.d();
}
a64::Gp drcbe_arm64::be_parameter::select_register(a64::Gp const &reg, uint32_t regsize) const
{
if (m_type == PTYPE_INT_REGISTER)
return get_register_int(regsize);
if (regsize == 4)
else if (regsize == 4)
return reg.w();
return reg.x();
else
return reg.x();
}
void drcbe_arm64::get_imm_relative(a64::Assembler &a, const a64::Gp &reg, const uint64_t val) const
@ -3234,7 +3242,7 @@ template <a64::Inst::Id Opcode> void drcbe_arm64::op_add(a64::Assembler &a, cons
if (Opcode == a64::Inst::kIdAdcs)
load_carry(a);
if (src1p.is_immediate() && is_valid_immediate(src1p.immediate(), 11))
if (src1p.is_immediate() && is_valid_immediate_addsub(src1p.immediate()))
{
const a64::Gp src = src2p.select_register(TEMP_REG2, inst.size());
@ -3245,7 +3253,7 @@ template <a64::Inst::Id Opcode> void drcbe_arm64::op_add(a64::Assembler &a, cons
a.emit(Opcode, output, src, src1p.immediate());
mov_param_reg(a, inst.size(), dstp, output);
}
else if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11))
else if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
{
const a64::Gp src = src1p.select_register(TEMP_REG1, inst.size());
@ -3285,7 +3293,7 @@ template <a64::Inst::Id Opcode> void drcbe_arm64::op_sub(a64::Assembler &a, cons
const a64::Gp output = dstp.select_register(TEMP_REG3, inst.size());
if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11))
if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
{
const a64::Gp src = select_register(TEMP_REG1, inst.size());
@ -3319,22 +3327,23 @@ void drcbe_arm64::op_cmp(a64::Assembler &a, const uml::instruction &inst)
be_parameter src1p(*this, inst.param(0), PTYPE_MRI);
be_parameter src2p(*this, inst.param(1), PTYPE_MRI);
const a64::Gp temp = select_register(TEMP_REG1, inst.size());
const a64::Gp temp2 = select_register(TEMP_REG2, inst.size());
const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size());
mov_reg_param(a, inst.size(), temp, src1p);
mov_reg_param(a, inst.size(), src1, src1p);
if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11))
if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
{
if (src2p.is_immediate_value(0))
a.cmp(temp, select_register(a64::xzr, inst.size()));
a.cmp(src1, select_register(a64::xzr, inst.size()));
else
a.cmp(temp, src2p.immediate());
a.cmp(src1, src2p.immediate());
}
else
{
mov_reg_param(a, inst.size(), temp2, src2p);
a.cmp(temp, temp2);
const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size());
mov_reg_param(a, inst.size(), src2, src2p);
a.cmp(src1, src2);
}
store_carry(a, true);
@ -3728,17 +3737,35 @@ void drcbe_arm64::op_test(a64::Assembler &a, const uml::instruction &inst)
const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size());
const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size());
mov_reg_param(a, inst.size(), src1, src1p);
if (src2p.is_immediate() && is_valid_immediate_mask(src2p.immediate(), inst.size()))
if (src1p.is_immediate_value(0) || src2p.is_immediate_value(0))
{
if (src2p.is_immediate_value(0))
a.tst(src1, select_register(a64::xzr, inst.size()));
else
a.tst(src1, src2p.immediate());
const a64::Gp zero = select_register(a64::xzr, inst.size());
a.tst(zero, zero);
}
else if (src2p.is_immediate_value(util::make_bitmask<uint64_t>(inst.size() * 8)))
{
mov_reg_param(a, inst.size(), src1, src1p);
a.tst(src1, src1);
}
else if (src1p.is_immediate_value(util::make_bitmask<uint64_t>(inst.size() * 8)))
{
mov_reg_param(a, inst.size(), src2, src2p);
a.tst(src2, src2);
}
else if (src2p.is_immediate() && is_valid_immediate_mask(src2p.immediate(), inst.size()))
{
mov_reg_param(a, inst.size(), src1, src1p);
a.tst(src1, src2p.immediate());
}
else if (src1p.is_immediate() && is_valid_immediate_mask(src1p.immediate(), inst.size()))
{
mov_reg_param(a, inst.size(), src2, src2p);
a.tst(src2, src1p.immediate());
}
else
{
mov_reg_param(a, inst.size(), src1, src1p);
mov_reg_param(a, inst.size(), src2, src2p);
a.tst(src1, src2);
}

View File

@ -1021,32 +1021,30 @@ void ppc_device::static_generate_memory_accessor(int mode, int size, int iswrite
/* check for unaligned accesses and break into two */
if (!ismasked && size != 1)
{
/* in little-endian mode, anything misaligned generates an exception */
if ((mode & MODE_LITTLE_ENDIAN) || masked == nullptr || !(m_cap & PPCCAP_MISALIGNED))
{
/* in little-endian mode, anything misaligned generates an exception */
UML_TEST(block, I0, size - 1); // test i0,size-1
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
}
/* in big-endian mode, it's more complicated */
else
{
/* 8-byte accesses must be word-aligned */
/* in big-endian mode, it's more complicated */
if (size == 8)
{
/* 8-byte accesses must be word-aligned */
UML_TEST(block, I0, 3); // test i0,3
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
/* word aligned accesses need to be broken up */
UML_TEST(block, I0, 4); // test i0,4
UML_JMPc(block, COND_NZ, unaligned = label++); // jmp unaligned, nz
}
/* unaligned 2 and 4 byte accesses need to be broken up */
else
{
/* unaligned 2 and 4 byte accesses need to be broken up */
UML_TEST(block, I0, size - 1); // test i0,size-1
UML_JMPc(block, COND_NZ, unaligned = label++); // jmp unaligned,nz
UML_JMPc(block, COND_NZ, unaligned = label++); // jmp unaligned,nz
}
}
}
@ -1054,12 +1052,12 @@ void ppc_device::static_generate_memory_accessor(int mode, int size, int iswrite
/* general case: assume paging and perform a translation */
if (((m_cap & PPCCAP_OEA) && (mode & MODE_DATA_TRANSLATION)) || (iswrite && (m_cap & PPCCAP_4XX) && (mode & MODE_PROTECTION)))
{
UML_SHR(block, I3, I0, 12); // shr i3,i0,12
UML_LOAD(block, I3, (void *)vtlb_table(), I3, SIZE_DWORD, SCALE_x4);// load i3,[vtlb],i3,dword
UML_TEST(block, I3, (uint64_t)1 << translate_type); // test i3,1 << translate_type
UML_JMPc(block, COND_Z, tlbmiss = label++); // jmp tlbmiss,z
UML_LABEL(block, tlbreturn = label++); // tlbreturn:
UML_ROLINS(block, I0, I3, 0, 0xfffff000); // rolins i0,i3,0,0xfffff000
UML_SHR(block, I3, I0, 12); // shr i3,i0,12
UML_LOAD(block, I3, (void *)vtlb_table(), I3, SIZE_DWORD, SCALE_x4); // load i3,[vtlb],i3,dword
UML_TEST(block, I3, (uint64_t)1 << translate_type); // test i3,1 << translate_type
UML_JMPc(block, COND_Z, tlbmiss = label++); // jmp tlbmiss,z
UML_LABEL(block, tlbreturn = label++); // tlbreturn:
UML_ROLINS(block, I0, I3, 0, 0xfffff000); // rolins i0,i3,0,0xfffff000
}
else if (m_cap & PPCCAP_4XX)
UML_AND(block, I0, I0, 0x7fffffff); // and i0,i0,0x7fffffff