r4000: timing improvements

* external clock is doubled internally
* consume additional cycles for integer multiply/divide
* streamline memory access
This commit is contained in:
Patrick Mackinlay 2021-04-01 11:06:14 +07:00
parent ee62f40e1b
commit efa3521d7a
4 changed files with 85 additions and 25 deletions

View File

@ -90,10 +90,11 @@ DEFINE_DEVICE_TYPE(R5000, r5000_device, "r5000", "MIPS R5000")
u32 const r5000_device::s_fcc_masks[8] = { (1U << 23), (1U << 25), (1U << 26), (1U << 27), (1U << 28), (1U << 29), (1U << 30), (1U << 31) };
u32 const r5000_device::s_fcc_shifts[8] = { 23, 25, 26, 27, 28, 29, 30, 31 };
r4000_base_device::r4000_base_device(machine_config const &mconfig, device_type type, char const *tag, device_t *owner, u32 clock, u32 prid, u32 fcr, cache_size icache_size, cache_size dcache_size)
r4000_base_device::r4000_base_device(machine_config const &mconfig, device_type type, char const *tag, device_t *owner, u32 clock, u32 prid, u32 fcr, cache_size icache_size, cache_size dcache_size, unsigned m32, unsigned m64, unsigned d32, unsigned d64)
: cpu_device(mconfig, type, tag, owner, clock)
, m_program_config_le("program", ENDIANNESS_LITTLE, 64, 32)
, m_program_config_be("program", ENDIANNESS_BIG, 64, 32)
, m_hilo_cycles{ m32, m64, d32, d64 }
, m_r{}
, m_cp0{}
, m_f{}
@ -193,6 +194,8 @@ void r4000_base_device::device_start()
m_icache_mask_hi = (0x1000U << config_ic) - 1;
m_icache_tag = std::make_unique<u32[]>(0x100U << config_ic);
m_icache_data = std::make_unique<u32 []>((0x1000U << config_ic) >> 2);
R4000_ENDIAN_LE_BE(accessors(m_le), accessors(m_be));
}
void r4000_base_device::device_reset()
@ -205,6 +208,7 @@ void r4000_base_device::device_reset()
else
m_cp0[CP0_Status] = SR_BEV | SR_ERL;
m_hilo_delay = 0;
m_branch_state = NONE;
m_pc = s64(s32(0xbfc00000));
m_r[0] = 0;
@ -294,6 +298,9 @@ void r4000_base_device::execute_run()
m_r[0] = 0;
});
if (m_hilo_delay)
m_hilo_delay--;
// update pc and branch state
switch (m_branch_state)
{
@ -428,12 +435,22 @@ void r4000_base_device::cpu_execute(u32 const op)
break;
case 0x10: // MFHI
m_r[RDREG] = m_hi;
if (m_hilo_delay)
{
m_icount -= m_hilo_delay;
m_hilo_delay = 0;
}
break;
case 0x11: // MTHI
m_hi = m_r[RSREG];
break;
case 0x12: // MFLO
m_r[RDREG] = m_lo;
if (m_hilo_delay)
{
m_icount -= m_hilo_delay;
m_hilo_delay = 0;
}
break;
case 0x13: // MTLO
m_lo = m_r[RSREG];
@ -454,6 +471,7 @@ void r4000_base_device::cpu_execute(u32 const op)
m_lo = s64(s32(product));
m_hi = s64(s32(product >> 32));
m_hilo_delay = m_hilo_cycles[0];
}
break;
case 0x19: // MULTU
@ -462,6 +480,7 @@ void r4000_base_device::cpu_execute(u32 const op)
m_lo = s64(s32(product));
m_hi = s64(s32(product >> 32));
m_hilo_delay = m_hilo_cycles[0];
}
break;
case 0x1a: // DIV
@ -469,6 +488,7 @@ void r4000_base_device::cpu_execute(u32 const op)
{
m_lo = s64(s32(m_r[RSREG]) / s32(m_r[RTREG]));
m_hi = s64(s32(m_r[RSREG]) % s32(m_r[RTREG]));
m_hilo_delay = m_hilo_cycles[2];
}
break;
case 0x1b: // DIVU
@ -476,19 +496,23 @@ void r4000_base_device::cpu_execute(u32 const op)
{
m_lo = s64(s32(u32(m_r[RSREG]) / u32(m_r[RTREG])));
m_hi = s64(s32(u32(m_r[RSREG]) % u32(m_r[RTREG])));
m_hilo_delay = m_hilo_cycles[2];
}
break;
case 0x1c: // DMULT
m_lo = mul_64x64(m_r[RSREG], m_r[RTREG], *reinterpret_cast<s64 *>(&m_hi));
m_hilo_delay = m_hilo_cycles[1];
break;
case 0x1d: // DMULTU
m_lo = mulu_64x64(m_r[RSREG], m_r[RTREG], m_hi);
m_hilo_delay = m_hilo_cycles[1];
break;
case 0x1e: // DDIV
if (m_r[RTREG])
{
m_lo = s64(m_r[RSREG]) / s64(m_r[RTREG]);
m_hi = s64(m_r[RSREG]) % s64(m_r[RTREG]);
m_hilo_delay = m_hilo_cycles[3];
}
break;
case 0x1f: // DDIVU
@ -496,6 +520,7 @@ void r4000_base_device::cpu_execute(u32 const op)
{
m_lo = m_r[RSREG] / m_r[RTREG];
m_hi = m_r[RSREG] % m_r[RTREG];
m_hilo_delay = m_hilo_cycles[3];
}
break;
case 0x20: // ADD
@ -3704,6 +3729,21 @@ void r4000_base_device::address_error(int intention, u64 const address)
}
}
template <typename T> void r4000_base_device::accessors(T &m)
{
space(AS_PROGRAM).cache(m);
read_byte = [&m](offs_t offset) { return m.read_byte(offset); };
read_word = [&m](offs_t offset) { return m.read_word(offset); };
read_dword = [&m](offs_t offset) { return m.read_dword(offset); };
read_qword = [&m](offs_t offset) { return m.read_qword(offset); };
write_byte = [&m](offs_t offset, u8 data) { m.write_byte(offset, data); };
write_word = [&m](offs_t offset, u16 data, u16 mem_mask) { m.write_word(offset, data, mem_mask); };
write_dword = [&m](offs_t offset, u32 data, u32 mem_mask) { m.write_dword(offset, data, mem_mask); };
write_qword = [&m](offs_t offset, u64 data, u64 mem_mask) { m.write_qword(offset, data, mem_mask); };
}
template <typename T, bool Aligned, typename U> std::enable_if_t<std::is_convertible<U, std::function<void(T)>>::value, bool> r4000_base_device::load(u64 address, U &&apply)
{
// alignment error
@ -3748,10 +3788,10 @@ template <typename T, bool Aligned, typename U> std::enable_if_t<std::is_convert
T value = 0;
switch (sizeof(T))
{
case 1: value = T(space(0).read_byte(address)); break;
case 2: value = T(space(0).read_word(address)); break;
case 4: value = T(space(0).read_dword(address)); break;
case 8: value = T(space(0).read_qword(address)); break;
case 1: value = T(read_byte(address)); break;
case 2: value = T(read_word(address)); break;
case 4: value = T(read_dword(address)); break;
case 8: value = T(read_qword(address)); break;
}
if (m_bus_error)
@ -3805,8 +3845,8 @@ template <typename T, typename U> std::enable_if_t<std::is_convertible<U, std::f
switch (sizeof(T))
{
case 4: apply(address, T(space(0).read_dword(address))); break;
case 8: apply(address, T(space(0).read_qword(address))); break;
case 4: apply(address, T(read_dword(address))); break;
case 8: apply(address, T(read_qword(address))); break;
}
return true;
@ -3854,10 +3894,10 @@ template <typename T, bool Aligned, typename U> std::enable_if_t<std::is_convert
switch (sizeof(T))
{
case 1: space(0).write_byte(address, T(data)); break;
case 2: space(0).write_word(address, T(data), mem_mask); break;
case 4: space(0).write_dword(address, T(data), mem_mask); break;
case 8: space(0).write_qword(address, T(data), mem_mask); break;
case 1: write_byte(address, T(data)); break;
case 2: write_word(address, T(data), mem_mask); break;
case 4: write_dword(address, T(data), mem_mask); break;
case 8: write_qword(address, T(data), mem_mask); break;
}
return true;
@ -3893,7 +3933,7 @@ bool r4000_base_device::fetch(u64 address, std::function<void(u32)> &&apply)
{
if (t == UNCACHED)
{
const u32 insn = space(0).read_dword(address);
const u32 insn = read_dword(address);
if (m_bus_error)
{
@ -3922,7 +3962,7 @@ bool r4000_base_device::fetch(u64 address, std::function<void(u32)> &&apply)
tag = ICACHE_V | (address >> 12);
for (unsigned i = 0; i < m_icache_line_size; i += 8)
{
u64 const data = space(0).read_qword((address & m_icache_mask_lo) | i);
u64 const data = read_qword((address & m_icache_mask_lo) | i);
m_icache_data[(((cache_address & m_icache_mask_lo) | i) >> 2) + 0] = u32(data);
m_icache_data[(((cache_address & m_icache_mask_lo) | i) >> 2) + 1] = data >> 32;
@ -3936,7 +3976,7 @@ bool r4000_base_device::fetch(u64 address, std::function<void(u32)> &&apply)
}
else
{
const u32 insn = space(0).read_dword(address);
const u32 insn = read_dword(address);
if (m_bus_error)
{

View File

@ -57,7 +57,7 @@ protected:
CACHE_256K = 6,
CACHE_512K = 7,
};
r4000_base_device(machine_config const &mconfig, device_type type, char const *tag, device_t *owner, u32 clock, u32 prid, u32 fcr, cache_size icache_size, cache_size dcache_size);
r4000_base_device(machine_config const &mconfig, device_type type, char const *tag, device_t *owner, u32 clock, u32 prid, u32 fcr, cache_size icache_size, cache_size dcache_size, unsigned m32, unsigned m64, unsigned d32, unsigned d64);
enum cp0_reg : int
{
@ -320,8 +320,10 @@ protected:
virtual std::unique_ptr<util::disasm_interface> create_disassembler() override;
// device_execute_interface overrides
virtual u64 execute_clocks_to_cycles(u64 clocks) const noexcept override { return (clocks * 2); }
virtual u64 execute_cycles_to_clocks(u64 cycles) const noexcept override { return (cycles + 1) / 2; }
virtual u32 execute_min_cycles() const noexcept override { return 1; }
virtual u32 execute_max_cycles() const noexcept override { return 40; }
virtual u32 execute_max_cycles() const noexcept override { return *std::max_element(std::begin(m_hilo_cycles), std::end(m_hilo_cycles)); }
virtual u32 execute_input_lines() const noexcept override { return 6; }
virtual void execute_run() override;
virtual void execute_set_input(int inputnum, int state) override;
@ -370,6 +372,7 @@ protected:
translate_result translate(int intention, u64 &address);
void address_error(int intention, u64 const address);
template <typename T> void accessors(T &m);
template <typename T, bool Aligned = true, typename U> std::enable_if_t<std::is_convertible<U, std::function<void(T)>>::value, bool> load(u64 program_address, U &&apply);
template <typename T, typename U> std::enable_if_t<std::is_convertible<U, std::function<void(u64, T)>>::value, bool> load_linked(u64 program_address, U &&apply);
template <typename T, bool Aligned = true, typename U> std::enable_if_t<std::is_convertible<U, T>::value, bool> store(u64 program_address, U data, T mem_mask = ~T(0));
@ -384,9 +387,26 @@ protected:
address_space_config m_program_config_le;
address_space_config m_program_config_be;
// memory access helpers
memory_access<64, 3, 0, ENDIANNESS_LITTLE>::cache m_le;
memory_access<64, 3, 0, ENDIANNESS_BIG>::cache m_be;
std::function<u8(offs_t offset)> read_byte;
std::function<u16(offs_t offset)> read_word;
std::function<u32(offs_t offset)> read_dword;
std::function<u64(offs_t offset)> read_qword;
std::function<void(offs_t offset, u8 data)> write_byte;
std::function<void(offs_t offset, u16 data, u16 mem_mask)> write_word;
std::function<void(offs_t offset, u32 data, u32 mem_mask)> write_dword;
std::function<void(offs_t offset, u64 data, u64 mem_mask)> write_qword;
// runtime state
int m_icount;
// integer multiple/divide state
unsigned const m_hilo_cycles[4];
unsigned m_hilo_delay;
// cpu state
u64 m_pc;
u64 m_r[32];
@ -447,7 +467,7 @@ class r4000_device : public r4000_base_device
public:
// NOTE: R4000 chips prior to 3.0 have an xtlb bug
r4000_device(const machine_config &mconfig, const char *tag, device_t *owner, u32 clock)
: r4000_base_device(mconfig, R4000, tag, owner, clock, 0x0430, 0x0500, CACHE_8K, CACHE_8K)
: r4000_base_device(mconfig, R4000, tag, owner, clock, 0x0430, 0x0500, CACHE_8K, CACHE_8K, 10, 20, 69, 133)
{
// no secondary cache
m_cp0[CP0_Config] |= CONFIG_SC;
@ -458,7 +478,7 @@ class r4400_device : public r4000_base_device
{
public:
r4400_device(const machine_config &mconfig, const char *tag, device_t *owner, u32 clock)
: r4000_base_device(mconfig, R4400, tag, owner, clock, 0x0440, 0x0500, CACHE_16K, CACHE_16K)
: r4000_base_device(mconfig, R4400, tag, owner, clock, 0x0440, 0x0500, CACHE_16K, CACHE_16K, 10, 20, 69, 133)
{
// no secondary cache
m_cp0[CP0_Config] |= CONFIG_SC;
@ -469,7 +489,7 @@ class r4600_device : public r4000_base_device
{
public:
r4600_device(const machine_config &mconfig, const char *tag, device_t *owner, u32 clock)
: r4000_base_device(mconfig, R4600, tag, owner, clock, 0x2020, 0x2020, CACHE_16K, CACHE_16K)
: r4000_base_device(mconfig, R4600, tag, owner, clock, 0x2020, 0x2020, CACHE_16K, CACHE_16K, 10, 12, 42, 74)
{
// no secondary cache
m_cp0[CP0_Config] |= CONFIG_SC;
@ -480,7 +500,7 @@ class r5000_device : public r4000_base_device
{
public:
r5000_device(const machine_config &mconfig, const char *tag, device_t *owner, u32 clock)
: r4000_base_device(mconfig, R5000, tag, owner, clock, 0x2320, 0x2320, CACHE_32K, CACHE_32K)
: r4000_base_device(mconfig, R5000, tag, owner, clock, 0x2320, 0x2320, CACHE_32K, CACHE_32K, 5, 9, 36, 68)
{
// no secondary cache
m_cp0[CP0_Config] |= CONFIG_SC;

View File

@ -214,7 +214,7 @@ void indigo4k_state::indigo4k(machine_config &config)
{
indigo_base(config);
R4000(config, m_maincpu, 50000000*2);
R4000(config, m_maincpu, 50000000);
//m_maincpu->set_icache_size(32768);
//m_maincpu->set_dcache_size(32768);
m_maincpu->set_addrmap(AS_PROGRAM, &indigo4k_state::mem_map);

View File

@ -407,7 +407,7 @@ void ip24_state::indy_5015(machine_config &config)
{
ip24(config);
R5000(config, m_maincpu, 50000000*3);
R5000(config, m_maincpu, 75'000'000);
m_maincpu->set_addrmap(AS_PROGRAM, &ip24_state::ip24_map);
}
@ -415,7 +415,7 @@ void ip24_state::indy_4613(machine_config &config)
{
ip24(config);
R4600(config, m_maincpu, 33333333*4);
R4600(config, m_maincpu, 66'666'666);
m_maincpu->set_addrmap(AS_PROGRAM, &ip24_state::ip24_map);
}
@ -423,7 +423,7 @@ void ip24_state::indy_4610(machine_config &config)
{
ip24(config);
R4600(config, m_maincpu, 33333333*3);
R4600(config, m_maincpu, 50'000'000);
m_maincpu->set_addrmap(AS_PROGRAM, &ip24_state::ip24_map);
}
@ -436,7 +436,7 @@ void ip22_state::wd33c93_2(device_t *device)
void ip22_state::indigo2_4415(machine_config &config)
{
R4400(config, m_maincpu, 50000000*3);
R4400(config, m_maincpu, 75'000'000);
m_maincpu->set_addrmap(AS_PROGRAM, &ip22_state::ip22_map);
ip24_base(config);