mirror of
https://github.com/holub/mame
synced 2025-04-23 00:39:36 +03:00
-cpu/drcbearm64.cpp: More fixes and optimisations:
* Correctly identify valid immediate constants for add/sub/cmp (it was too conservative). * Don't unnecessarily copy UML register values kept in host registers for CMP. * Fixed detection of TST against immediate zero and optimised generated code. * Optimised TST against immediate with all bits set. -cpu/alto2: Follow the same pattern as the other things that have been altered to avoid problematic memsets in this device. -cpu/powerpc: Realigned some comments that had drifted.
This commit is contained in:
parent
14d11c479e
commit
e75c7079cf
@ -499,7 +499,7 @@ void alto2_cpu_device::f2_late_evenfield()
|
||||
*/
|
||||
void alto2_cpu_device::init_disp()
|
||||
{
|
||||
m_dsp.clear();
|
||||
m_dsp = decltype(m_dsp)();
|
||||
save_item(NAME(m_dsp.state));
|
||||
save_item(NAME(m_dsp.hlc));
|
||||
save_item(NAME(m_dsp.setmode));
|
||||
|
@ -193,35 +193,24 @@
|
||||
#ifndef MAME_CPU_ALTO2_A2DISP_H
|
||||
#define MAME_CPU_ALTO2_A2DISP_H
|
||||
struct {
|
||||
|
||||
void clear()
|
||||
{
|
||||
state = hlc = setmode = inverse = scanline = 0;
|
||||
halfclock = vblank = false;
|
||||
std::fill(std::begin(fifo), std::end(fifo), 0);
|
||||
wa = ra = a63 = a66 = 0;
|
||||
dht_blocks = dwt_blocks = curt_blocks = curt_wakeup = false;
|
||||
xpreg = csr = 0;
|
||||
}
|
||||
|
||||
uint32_t state; //!< current state of the display_state_machine()
|
||||
uint32_t hlc; //!< horizontal line counter
|
||||
uint32_t setmode; //!< value written by last SETMODE<-
|
||||
uint32_t inverse; //!< set to 0xffff if line is inverse, 0x0000 otherwise
|
||||
uint32_t scanline; //!< current scanline
|
||||
bool halfclock; //!< false for normal pixel clock, true for half pixel clock
|
||||
bool vblank; //!< true during vblank, false otherwise
|
||||
uint16_t fifo[A2_DISP_FIFO]; //!< display word fifo
|
||||
uint32_t wa; //!< fifo input pointer (write address; 4-bit)
|
||||
uint32_t ra; //!< fifo output pointer (read address; 4-bit)
|
||||
uint32_t a63; //!< most recent value read from the PROM a63
|
||||
uint32_t a66; //!< most recent value read from the PROM a66
|
||||
bool dht_blocks; //!< set true, if the DHT executed BLOCK
|
||||
bool dwt_blocks; //!< set true, if the DWT executed BLOCK
|
||||
bool curt_blocks; //!< set true, if the CURT executed BLOCK
|
||||
bool curt_wakeup; //!< set true, if CURT wakeups are generated
|
||||
uint32_t xpreg; //!< cursor cursor x position register (10-bit)
|
||||
uint32_t csr; //!< cursor shift register (16-bit)
|
||||
uint32_t state = 0; //!< current state of the display_state_machine()
|
||||
uint32_t hlc = 0; //!< horizontal line counter
|
||||
uint32_t setmode = 0; //!< value written by last SETMODE<-
|
||||
uint32_t inverse = 0; //!< set to 0xffff if line is inverse, 0x0000 otherwise
|
||||
uint32_t scanline = 0; //!< current scanline
|
||||
bool halfclock = false; //!< false for normal pixel clock, true for half pixel clock
|
||||
bool vblank = false; //!< true during vblank, false otherwise
|
||||
uint16_t fifo[A2_DISP_FIFO] = { }; //!< display word fifo
|
||||
uint32_t wa = 0; //!< fifo input pointer (write address; 4-bit)
|
||||
uint32_t ra = 0; //!< fifo output pointer (read address; 4-bit)
|
||||
uint32_t a63 = 0; //!< most recent value read from the PROM a63
|
||||
uint32_t a66 = 0; //!< most recent value read from the PROM a66
|
||||
bool dht_blocks = false; //!< set true, if the DHT executed BLOCK
|
||||
bool dwt_blocks = false; //!< set true, if the DWT executed BLOCK
|
||||
bool curt_blocks = false; //!< set true, if the CURT executed BLOCK
|
||||
bool curt_wakeup = false; //!< set true, if CURT wakeups are generated
|
||||
uint32_t xpreg = 0; //!< cursor cursor x position register (10-bit)
|
||||
uint32_t csr = 0; //!< cursor shift register (16-bit)
|
||||
std::unique_ptr<uint16_t[]> framebuf; //!< array of words of the raw bitmap that is displayed
|
||||
std::unique_ptr<uint8_t[]> patterns; //!< array of 65536 patterns (16 bytes) with 1 byte per pixel
|
||||
std::unique_ptr<bitmap_ind16> bitmap; //!< MAME bitmap with 16 bit indices
|
||||
|
@ -251,6 +251,12 @@ inline bool is_valid_immediate(uint64_t val, size_t bits)
|
||||
return val < (uint64_t(1) << bits);
|
||||
}
|
||||
|
||||
constexpr bool is_valid_immediate_addsub(uint64_t val)
|
||||
{
|
||||
// 12-bit unsigned immediate value, optionally left-shifted by 12 bits
|
||||
return !(val & ~util::make_bitmask<uint64_t>(12)) || !(val & ~(util::make_bitmask<uint64_t>(12) << 12));
|
||||
}
|
||||
|
||||
inline constexpr bool is_valid_immediate_signed(int64_t val, size_t bits)
|
||||
{
|
||||
return util::sext(val, bits) == val;
|
||||
@ -752,18 +758,20 @@ a64::Vec drcbe_arm64::be_parameter::select_register(a64::Vec const ®, uint32_
|
||||
{
|
||||
if (m_type == PTYPE_FLOAT_REGISTER)
|
||||
return get_register_float(regsize);
|
||||
if (regsize == 4)
|
||||
else if (regsize == 4)
|
||||
return reg.s();
|
||||
return reg.d();
|
||||
else
|
||||
return reg.d();
|
||||
}
|
||||
|
||||
a64::Gp drcbe_arm64::be_parameter::select_register(a64::Gp const ®, uint32_t regsize) const
|
||||
{
|
||||
if (m_type == PTYPE_INT_REGISTER)
|
||||
return get_register_int(regsize);
|
||||
if (regsize == 4)
|
||||
else if (regsize == 4)
|
||||
return reg.w();
|
||||
return reg.x();
|
||||
else
|
||||
return reg.x();
|
||||
}
|
||||
|
||||
void drcbe_arm64::get_imm_relative(a64::Assembler &a, const a64::Gp ®, const uint64_t val) const
|
||||
@ -3234,7 +3242,7 @@ template <a64::Inst::Id Opcode> void drcbe_arm64::op_add(a64::Assembler &a, cons
|
||||
if (Opcode == a64::Inst::kIdAdcs)
|
||||
load_carry(a);
|
||||
|
||||
if (src1p.is_immediate() && is_valid_immediate(src1p.immediate(), 11))
|
||||
if (src1p.is_immediate() && is_valid_immediate_addsub(src1p.immediate()))
|
||||
{
|
||||
const a64::Gp src = src2p.select_register(TEMP_REG2, inst.size());
|
||||
|
||||
@ -3245,7 +3253,7 @@ template <a64::Inst::Id Opcode> void drcbe_arm64::op_add(a64::Assembler &a, cons
|
||||
a.emit(Opcode, output, src, src1p.immediate());
|
||||
mov_param_reg(a, inst.size(), dstp, output);
|
||||
}
|
||||
else if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11))
|
||||
else if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
|
||||
{
|
||||
const a64::Gp src = src1p.select_register(TEMP_REG1, inst.size());
|
||||
|
||||
@ -3285,7 +3293,7 @@ template <a64::Inst::Id Opcode> void drcbe_arm64::op_sub(a64::Assembler &a, cons
|
||||
|
||||
const a64::Gp output = dstp.select_register(TEMP_REG3, inst.size());
|
||||
|
||||
if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11))
|
||||
if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
|
||||
{
|
||||
const a64::Gp src = select_register(TEMP_REG1, inst.size());
|
||||
|
||||
@ -3319,22 +3327,23 @@ void drcbe_arm64::op_cmp(a64::Assembler &a, const uml::instruction &inst)
|
||||
be_parameter src1p(*this, inst.param(0), PTYPE_MRI);
|
||||
be_parameter src2p(*this, inst.param(1), PTYPE_MRI);
|
||||
|
||||
const a64::Gp temp = select_register(TEMP_REG1, inst.size());
|
||||
const a64::Gp temp2 = select_register(TEMP_REG2, inst.size());
|
||||
const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size());
|
||||
|
||||
mov_reg_param(a, inst.size(), temp, src1p);
|
||||
mov_reg_param(a, inst.size(), src1, src1p);
|
||||
|
||||
if (src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 11))
|
||||
if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate()))
|
||||
{
|
||||
if (src2p.is_immediate_value(0))
|
||||
a.cmp(temp, select_register(a64::xzr, inst.size()));
|
||||
a.cmp(src1, select_register(a64::xzr, inst.size()));
|
||||
else
|
||||
a.cmp(temp, src2p.immediate());
|
||||
a.cmp(src1, src2p.immediate());
|
||||
}
|
||||
else
|
||||
{
|
||||
mov_reg_param(a, inst.size(), temp2, src2p);
|
||||
a.cmp(temp, temp2);
|
||||
const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size());
|
||||
|
||||
mov_reg_param(a, inst.size(), src2, src2p);
|
||||
a.cmp(src1, src2);
|
||||
}
|
||||
|
||||
store_carry(a, true);
|
||||
@ -3728,17 +3737,35 @@ void drcbe_arm64::op_test(a64::Assembler &a, const uml::instruction &inst)
|
||||
const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size());
|
||||
const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size());
|
||||
|
||||
mov_reg_param(a, inst.size(), src1, src1p);
|
||||
|
||||
if (src2p.is_immediate() && is_valid_immediate_mask(src2p.immediate(), inst.size()))
|
||||
if (src1p.is_immediate_value(0) || src2p.is_immediate_value(0))
|
||||
{
|
||||
if (src2p.is_immediate_value(0))
|
||||
a.tst(src1, select_register(a64::xzr, inst.size()));
|
||||
else
|
||||
a.tst(src1, src2p.immediate());
|
||||
const a64::Gp zero = select_register(a64::xzr, inst.size());
|
||||
|
||||
a.tst(zero, zero);
|
||||
}
|
||||
else if (src2p.is_immediate_value(util::make_bitmask<uint64_t>(inst.size() * 8)))
|
||||
{
|
||||
mov_reg_param(a, inst.size(), src1, src1p);
|
||||
a.tst(src1, src1);
|
||||
}
|
||||
else if (src1p.is_immediate_value(util::make_bitmask<uint64_t>(inst.size() * 8)))
|
||||
{
|
||||
mov_reg_param(a, inst.size(), src2, src2p);
|
||||
a.tst(src2, src2);
|
||||
}
|
||||
else if (src2p.is_immediate() && is_valid_immediate_mask(src2p.immediate(), inst.size()))
|
||||
{
|
||||
mov_reg_param(a, inst.size(), src1, src1p);
|
||||
a.tst(src1, src2p.immediate());
|
||||
}
|
||||
else if (src1p.is_immediate() && is_valid_immediate_mask(src1p.immediate(), inst.size()))
|
||||
{
|
||||
mov_reg_param(a, inst.size(), src2, src2p);
|
||||
a.tst(src2, src1p.immediate());
|
||||
}
|
||||
else
|
||||
{
|
||||
mov_reg_param(a, inst.size(), src1, src1p);
|
||||
mov_reg_param(a, inst.size(), src2, src2p);
|
||||
a.tst(src1, src2);
|
||||
}
|
||||
|
@ -1021,32 +1021,30 @@ void ppc_device::static_generate_memory_accessor(int mode, int size, int iswrite
|
||||
/* check for unaligned accesses and break into two */
|
||||
if (!ismasked && size != 1)
|
||||
{
|
||||
/* in little-endian mode, anything misaligned generates an exception */
|
||||
if ((mode & MODE_LITTLE_ENDIAN) || masked == nullptr || !(m_cap & PPCCAP_MISALIGNED))
|
||||
{
|
||||
/* in little-endian mode, anything misaligned generates an exception */
|
||||
UML_TEST(block, I0, size - 1); // test i0,size-1
|
||||
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
|
||||
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
|
||||
}
|
||||
|
||||
/* in big-endian mode, it's more complicated */
|
||||
else
|
||||
{
|
||||
/* 8-byte accesses must be word-aligned */
|
||||
/* in big-endian mode, it's more complicated */
|
||||
if (size == 8)
|
||||
{
|
||||
/* 8-byte accesses must be word-aligned */
|
||||
UML_TEST(block, I0, 3); // test i0,3
|
||||
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
|
||||
UML_JMPc(block, COND_NZ, alignex = label++); // jmp alignex,nz
|
||||
|
||||
/* word aligned accesses need to be broken up */
|
||||
UML_TEST(block, I0, 4); // test i0,4
|
||||
UML_JMPc(block, COND_NZ, unaligned = label++); // jmp unaligned, nz
|
||||
}
|
||||
|
||||
/* unaligned 2 and 4 byte accesses need to be broken up */
|
||||
else
|
||||
{
|
||||
/* unaligned 2 and 4 byte accesses need to be broken up */
|
||||
UML_TEST(block, I0, size - 1); // test i0,size-1
|
||||
UML_JMPc(block, COND_NZ, unaligned = label++); // jmp unaligned,nz
|
||||
UML_JMPc(block, COND_NZ, unaligned = label++); // jmp unaligned,nz
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1054,12 +1052,12 @@ void ppc_device::static_generate_memory_accessor(int mode, int size, int iswrite
|
||||
/* general case: assume paging and perform a translation */
|
||||
if (((m_cap & PPCCAP_OEA) && (mode & MODE_DATA_TRANSLATION)) || (iswrite && (m_cap & PPCCAP_4XX) && (mode & MODE_PROTECTION)))
|
||||
{
|
||||
UML_SHR(block, I3, I0, 12); // shr i3,i0,12
|
||||
UML_LOAD(block, I3, (void *)vtlb_table(), I3, SIZE_DWORD, SCALE_x4);// load i3,[vtlb],i3,dword
|
||||
UML_TEST(block, I3, (uint64_t)1 << translate_type); // test i3,1 << translate_type
|
||||
UML_JMPc(block, COND_Z, tlbmiss = label++); // jmp tlbmiss,z
|
||||
UML_LABEL(block, tlbreturn = label++); // tlbreturn:
|
||||
UML_ROLINS(block, I0, I3, 0, 0xfffff000); // rolins i0,i3,0,0xfffff000
|
||||
UML_SHR(block, I3, I0, 12); // shr i3,i0,12
|
||||
UML_LOAD(block, I3, (void *)vtlb_table(), I3, SIZE_DWORD, SCALE_x4); // load i3,[vtlb],i3,dword
|
||||
UML_TEST(block, I3, (uint64_t)1 << translate_type); // test i3,1 << translate_type
|
||||
UML_JMPc(block, COND_Z, tlbmiss = label++); // jmp tlbmiss,z
|
||||
UML_LABEL(block, tlbreturn = label++); // tlbreturn:
|
||||
UML_ROLINS(block, I0, I3, 0, 0xfffff000); // rolins i0,i3,0,0xfffff000
|
||||
}
|
||||
else if (m_cap & PPCCAP_4XX)
|
||||
UML_AND(block, I0, I0, 0x7fffffff); // and i0,i0,0x7fffffff
|
||||
|
Loading…
Reference in New Issue
Block a user