From cfb1d575c3f58ea8bd1cf87c2c9a52fd82b99b28 Mon Sep 17 00:00:00 2001 From: 987123879113 <63495610+987123879113@users.noreply.github.com> Date: Sun, 29 Dec 2024 23:32:40 +0900 Subject: [PATCH] Fixed more recompiler backend issues: (#13132) * cpu/drcbex86.cpp: Clear top half of iregs on loads to I0-I3, fix LOADS sign extension, and fixed FLOAD/FSTORE data size. * cpu/drcbex64.cpp: Use appropriate register size for LOAD. * cpu/drcbec.cpp: Fix parameter sizes for READM/WRITEM, and clear upper half of registers after 32-bit operations. * cpu/uml.cpp: Fixed definition for FSREAD/FSWRITE and FDREAD/FDWRITE, and fixed READM/WRITEM simplification. --- src/devices/cpu/drcbec.cpp | 75 ++++++++++++++++++++++++++++++++---- src/devices/cpu/drcbex64.cpp | 23 +++++------ src/devices/cpu/drcbex86.cpp | 32 ++++++++------- src/devices/cpu/drcumlsh.h | 8 ++-- src/devices/cpu/uml.cpp | 4 +- src/devices/cpu/uml.h | 2 + 6 files changed, 104 insertions(+), 40 deletions(-) diff --git a/src/devices/cpu/drcbec.cpp b/src/devices/cpu/drcbec.cpp index 143fbfeb248..5385d8ca2fd 100644 --- a/src/devices/cpu/drcbec.cpp +++ b/src/devices/cpu/drcbec.cpp @@ -324,13 +324,30 @@ void drcbe_c::reset() void drcbe_c::generate(drcuml_block &block, const instruction *instlist, uint32_t numinst) { + // Calculate the max possible number of register clears required + uint32_t regclears = 0; + + for (int inum = 0; inum < numinst; inum++) + { + const instruction &inst = instlist[inum]; + + if (inst.size() != 4) + continue; + + for (int pnum = 0; pnum < inst.numparams(); pnum++) + { + if (uml::instruction::is_param_out(inst.opcode(), pnum) && inst.param(pnum).is_int_register()) + regclears++; + } + } + // tell all of our utility objects that a block is beginning - m_hash.block_begin(block, instlist, numinst); + m_hash.block_begin(block, instlist, numinst + regclears); m_labels.block_begin(block); m_map.block_begin(block); // begin codegen; fail if we can't - drccodeptr *cachetop = m_cache.begin_codegen(numinst * sizeof(drcbec_instruction) * 4); + drccodeptr *cachetop = m_cache.begin_codegen((numinst + regclears) * sizeof(drcbec_instruction) * 4); if (cachetop == nullptr) block.abort(); @@ -338,6 +355,9 @@ void drcbe_c::generate(drcuml_block &block, const instruction *instlist, uint32_ drcbec_instruction *base = (drcbec_instruction *)(((uintptr_t)*cachetop + sizeof(drcbec_instruction) - 1) & ~(sizeof(drcbec_instruction) - 1)); drcbec_instruction *dst = base; + bool ireg_needs_clearing[REG_I_COUNT]; + std::fill(std::begin(ireg_needs_clearing), std::end(ireg_needs_clearing), true); + // generate code by copying the instructions and extracting immediates for (int inum = 0; inum < numinst; inum++) { @@ -391,10 +411,14 @@ void drcbe_c::generate(drcuml_block &block, const instruction *instlist, uint32_ psize[2] = 4; if (opcode == OP_STORE || opcode == OP_FSTORE) psize[1] = 4; - if (opcode == OP_READ || opcode == OP_READM || opcode == OP_FREAD) + if (opcode == OP_READ || opcode == OP_FREAD) psize[1] = psize[2] = 4; - if (opcode == OP_WRITE || opcode == OP_WRITEM || opcode == OP_FWRITE) + if (opcode == OP_WRITE || opcode == OP_FWRITE) psize[0] = psize[2] = 4; + if (opcode == OP_READM) + psize[1] = psize[3] = 4; + if (opcode == OP_WRITEM) + psize[0] = psize[3] = 4; if (opcode == OP_SEXT && inst.param(2).size() != SIZE_QWORD) psize[1] = 4; if (opcode == OP_FTOINT) @@ -449,6 +473,41 @@ void drcbe_c::generate(drcuml_block &block, const instruction *instlist, uint32_ // point past the end of the immediates dst += immedwords; + + // Keep track of which registers had an 8 byte write and clear it the next time it's written + if (inst.size() == 4) + { + for (int pnum = 0; pnum < inst.numparams(); pnum++) + { + if (uml::instruction::is_param_out(inst.opcode(), pnum) && inst.param(pnum).is_int_register() && ireg_needs_clearing[inst.param(pnum).ireg() - REG_I0]) + { + immedwords = (8 + sizeof(drcbec_instruction) - 1) / sizeof(drcbec_instruction); + + (dst++)->i = MAKE_OPCODE_FULL(OP_AND, 8, 0, 0, 3 + immedwords); + + immed = dst + 3; + + output_parameter(&dst, &immed, 8, inst.param(pnum)); + output_parameter(&dst, &immed, 8, inst.param(pnum)); + output_parameter(&dst, &immed, 8, 0xffffffff); + + dst += immedwords; + + ireg_needs_clearing[inst.param(pnum).ireg() - REG_I0] = false; + } + } + } + else if (inst.size() == 8) + { + for (int pnum = 0; pnum < inst.numparams(); pnum++) + { + if (uml::instruction::is_param_out(inst.opcode(), pnum) && inst.param(pnum).is_int_register()) + { + ireg_needs_clearing[inst.param(pnum).ireg() - REG_I0] = true; + } + } + } + break; } } @@ -1455,20 +1514,20 @@ int drcbe_c::execute(code_handle &entry) DPARAM0 = m_space[PARAM2]->read_dword(PARAM1); break; - case MAKE_OPCODE_SHORT(OP_READ8, 8, 0): // DREAD dst,src1,space_QOWRD + case MAKE_OPCODE_SHORT(OP_READ8, 8, 0): // DREAD dst,src1,space_QWORD DPARAM0 = m_space[PARAM2]->read_qword(PARAM1); break; case MAKE_OPCODE_SHORT(OP_READM2, 8, 0): // DREADM dst,src1,mask,space_WORD - DPARAM0 = m_space[PARAM3]->read_word(PARAM1, PARAM2); + DPARAM0 = m_space[PARAM3]->read_word(PARAM1, DPARAM2); break; case MAKE_OPCODE_SHORT(OP_READM4, 8, 0): // DREADM dst,src1,mask,space_DWORD - DPARAM0 = m_space[PARAM3]->read_dword(PARAM1, PARAM2); + DPARAM0 = m_space[PARAM3]->read_dword(PARAM1, DPARAM2); break; case MAKE_OPCODE_SHORT(OP_READM8, 8, 0): // DREADM dst,src1,mask,space_QWORD - DPARAM0 = m_space[PARAM3]->read_qword(PARAM1, PARAM2); + DPARAM0 = m_space[PARAM3]->read_qword(PARAM1, DPARAM2); break; case MAKE_OPCODE_SHORT(OP_WRITE1, 8, 0): // DWRITE dst,src1,space_BYTE diff --git a/src/devices/cpu/drcbex64.cpp b/src/devices/cpu/drcbex64.cpp index 7d908b3697c..17b25bba958 100644 --- a/src/devices/cpu/drcbex64.cpp +++ b/src/devices/cpu/drcbex64.cpp @@ -2306,7 +2306,7 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst) Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs); // pick a target register for the general case - Gp dstreg = dstp.select_register(eax); + Gp dstreg = (inst.size() == 4) ? dstp.select_register(eax) : dstp.select_register(rax); // immediate index if (indp.is_immediate()) @@ -2314,13 +2314,13 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst) s32 const offset = baseoffs + (s32(indp.immediate()) << scalesizep.scale()); if (size == SIZE_BYTE) - a.movzx(dstreg, byte_ptr(basereg, offset)); // movzx dstreg,[basep + scale*indp] + a.movzx(dstreg, byte_ptr(basereg, offset)); else if (size == SIZE_WORD) - a.movzx(dstreg, word_ptr(basereg, offset)); // movzx dstreg,[basep + scale*indp] + a.movzx(dstreg, word_ptr(basereg, offset)); else if (size == SIZE_DWORD) - a.mov(dstreg, ptr(basereg, offset)); // mov dstreg,[basep + scale*indp] + a.mov(dstreg, dword_ptr(basereg, offset)); else if (size == SIZE_QWORD) - a.mov(dstreg.r64(), ptr(basereg, offset)); // mov dstreg,[basep + scale*indp] + a.mov(dstreg, ptr(basereg, offset)); } // other index @@ -2329,20 +2329,17 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst) Gp indreg = indp.select_register(rcx); movsx_r64_p32(a, indreg, indp); if (size == SIZE_BYTE) - a.movzx(dstreg, byte_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // movzx dstreg,[basep + scale*indp] + a.movzx(dstreg, byte_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); else if (size == SIZE_WORD) - a.movzx(dstreg, word_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // movzx dstreg,[basep + scale*indp] + a.movzx(dstreg, word_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); else if (size == SIZE_DWORD) - a.mov(dstreg, ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // mov dstreg,[basep + scale*indp] + a.mov(dstreg, dword_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); else if (size == SIZE_QWORD) - a.mov(dstreg.r64(), ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // mov dstreg,[basep + scale*indp] + a.mov(dstreg, ptr(basereg, indreg, scalesizep.scale(), baseoffs)); } // store result - if (inst.size() == 4) - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg - else - mov_param_reg(a, dstp, dstreg.r64()); // mov dstp,dstreg + mov_param_reg(a, dstp, dstreg); } diff --git a/src/devices/cpu/drcbex86.cpp b/src/devices/cpu/drcbex86.cpp index 8ce4c0d3f69..6e187e6846a 100644 --- a/src/devices/cpu/drcbex86.cpp +++ b/src/devices/cpu/drcbex86.cpp @@ -1020,6 +1020,7 @@ void drcbe_x86::emit_mov_p32_r32(Assembler &a, be_parameter const ¶m, Gp con { if (reg.id() != param.ireg()) a.mov(Gpd(param.ireg()), reg); // mov param,reg + a.mov(MABS(m_reghi[param.ireg()], 4), 0); } } @@ -3170,7 +3171,12 @@ void drcbe_x86::op_loads(Assembler &a, const instruction &inst) // 64-bit form stores upper 32 bits if (inst.size() == 8) { - a.cdq(); // cdq + if (size != SIZE_QWORD) + { + if (dstreg.id() != eax.id()) + a.mov(eax, dstreg); + a.cdq(); // cdq + } if (dstp.is_memory()) a.mov(MABS(dstp.memory(4)), edx); // mov [dstp+4],edx else if (dstp.is_int_register()) @@ -5839,9 +5845,9 @@ void drcbe_x86::op_fload(Assembler &a, const instruction &inst) // immediate index if (indp.is_immediate()) { - a.mov(eax, MABS(basep.memory(4*indp.immediate()))); // mov eax,[basep + 4*indp] + a.mov(eax, MABS(basep.memory(inst.size()*indp.immediate()))); if (inst.size() == 8) - a.mov(edx, MABS(basep.memory(4 + 4*indp.immediate()))); // mov edx,[basep + 4*indp + 4] + a.mov(edx, MABS(basep.memory(4 + inst.size()*indp.immediate()))); } // other index @@ -5849,15 +5855,15 @@ void drcbe_x86::op_fload(Assembler &a, const instruction &inst) { Gp const indreg = indp.select_register(ecx); emit_mov_r32_p32(a, indreg, indp); - a.mov(eax, ptr(u64(basep.memory(0)), indreg, 2)); // mov eax,[basep + 4*indp] + a.mov(eax, ptr(u64(basep.memory(0)), indreg, (inst.size() == 8) ? 3 : 2)); if (inst.size() == 8) - a.mov(edx, ptr(u64(basep.memory(4)), indreg, 2)); // mov edx,[basep + 4*indp + 4] + a.mov(edx, ptr(u64(basep.memory(4)), indreg, (inst.size() == 8) ? 3 : 2)); } // general case - a.mov(MABS(dstp.memory(0)), eax); // mov [dstp],eax + a.mov(MABS(dstp.memory(0)), eax); if (inst.size() == 8) - a.mov(MABS(dstp.memory(4)), edx); // mov [dstp + 4],edx + a.mov(MABS(dstp.memory(4)), edx); } @@ -5878,16 +5884,16 @@ void drcbe_x86::op_fstore(Assembler &a, const instruction &inst) be_parameter srcp(*this, inst.param(2), PTYPE_MF); // general case - a.mov(eax, MABS(srcp.memory(0))); // mov eax,[srcp] + a.mov(eax, MABS(srcp.memory(0))); if (inst.size() == 8) - a.mov(edx, MABS(srcp.memory(4))); // mov edx,[srcp + 4] + a.mov(edx, MABS(srcp.memory(4))); // immediate index if (indp.is_immediate()) { - a.mov(MABS(basep.memory(4*indp.immediate())), eax); // mov [basep + 4*indp],eax + a.mov(MABS(basep.memory(inst.size()*indp.immediate())), eax); if (inst.size() == 8) - a.mov(MABS(basep.memory(4 + 4*indp.immediate())), edx); // mov [basep + 4*indp + 4],edx + a.mov(MABS(basep.memory(4 + inst.size()*indp.immediate())), edx); } // other index @@ -5895,9 +5901,9 @@ void drcbe_x86::op_fstore(Assembler &a, const instruction &inst) { Gp const indreg = indp.select_register(ecx); emit_mov_r32_p32(a, indreg, indp); - a.mov(ptr(u64(basep.memory(0)), indreg, 2), eax); // mov [basep + 4*indp],eax + a.mov(ptr(u64(basep.memory(0)), indreg, (inst.size() == 8) ? 3 : 2), eax); if (inst.size() == 8) - a.mov(ptr(u64(basep.memory(4)), indreg, 2), edx); // mov [basep + 4*indp + 4],edx + a.mov(ptr(u64(basep.memory(4)), indreg, (inst.size() == 8) ? 3 : 2), edx); } } diff --git a/src/devices/cpu/drcumlsh.h b/src/devices/cpu/drcumlsh.h index 666b01c8bdf..3660afb1979 100644 --- a/src/devices/cpu/drcumlsh.h +++ b/src/devices/cpu/drcumlsh.h @@ -146,8 +146,8 @@ /* ----- 32-bit Floating Point Arithmetic Operations ----- */ #define UML_FSLOAD(block, dst, base, index) do { using namespace uml; block.append().fsload(dst, base, index); } while (0) #define UML_FSSTORE(block, base, index, src1) do { using namespace uml; block.append().fsstore(base, index, src1); } while (0) -#define UML_FSREAD(block, dst, src1, space) do { using namespace uml; block.append().fsread(dst, src1, AS_##space); } while (0) -#define UML_FSWRITE(block, dst, src1, space) do { using namespace uml; block.append().fswrite(dst, src1, AS_##space); } while (0) +#define UML_FSREAD(block, dst, src1, space) do { using namespace uml; block.append().fsread(dst, src1, space); } while (0) +#define UML_FSWRITE(block, dst, src1, space) do { using namespace uml; block.append().fswrite(dst, src1, space); } while (0) #define UML_FSMOV(block, dst, src1) do { using namespace uml; block.append().fsmov(dst, src1); } while (0) #define UML_FSMOVc(block, cond, dst, src1) do { using namespace uml; block.append().fsmov(cond, dst, src1); } while (0) #define UML_FSTOINT(block, dst, src1, size, round) do { using namespace uml; block.append().fstoint(dst, src1, size, round); } while (0) @@ -170,8 +170,8 @@ /* ----- 64-bit Floating Point Arithmetic Operations ----- */ #define UML_FDLOAD(block, dst, base, index) do { using namespace uml; block.append().fdload(dst, base, index); } while (0) #define UML_FDSTORE(block, base, index, src1) do { using namespace uml; block.append().fdstore(base, index, src1); } while (0) -#define UML_FDREAD(block, dst, src1, space) do { using namespace uml; block.append().fdread(dst, src1, AS_##space); } while (0) -#define UML_FDWRITE(block, dst, src1, space) do { using namespace uml; block.append().fdwrite(dst, src1, AS_##space); } while (0) +#define UML_FDREAD(block, dst, src1, space) do { using namespace uml; block.append().fdread(dst, src1, space); } while (0) +#define UML_FDWRITE(block, dst, src1, space) do { using namespace uml; block.append().fdwrite(dst, src1, space); } while (0) #define UML_FDMOV(block, dst, src1) do { using namespace uml; block.append().fdmov(dst, src1); } while (0) #define UML_FDMOVc(block, cond, dst, src1) do { using namespace uml; block.append().fdmov(cond, dst, src1); } while (0) #define UML_FDTOINT(block, dst, src1, size, round) do { using namespace uml; block.append().fdtoint(dst, src1, size, round); } while (0) diff --git a/src/devices/cpu/uml.cpp b/src/devices/cpu/uml.cpp index 53b2174859e..7133a5ca0ad 100644 --- a/src/devices/cpu/uml.cpp +++ b/src/devices/cpu/uml.cpp @@ -393,7 +393,7 @@ void uml::instruction::simplify() if (m_param[2].is_immediate_value(paramsizemask[m_param[3].size()])) { m_opcode = OP_READ; - m_numparams = 2; + m_numparams = 3; m_param[2] = m_param[3]; } break; @@ -403,7 +403,7 @@ void uml::instruction::simplify() if (m_param[2].is_immediate_value(paramsizemask[m_param[3].size()])) { m_opcode = OP_WRITE; - m_numparams = 2; + m_numparams = 3; m_param[2] = m_param[3]; } break; diff --git a/src/devices/cpu/uml.h b/src/devices/cpu/uml.h index a60d7595bf8..7c8ae800acf 100644 --- a/src/devices/cpu/uml.h +++ b/src/devices/cpu/uml.h @@ -398,6 +398,8 @@ namespace uml // construction/destruction constexpr instruction() : m_param{ } { } + static bool is_param_out(opcode_t opcode, int paramnum) { assert(opcode < OP_MAX); assert(paramnum < m_numparams); return (s_opcode_info_table[opcode].param[paramnum].output & 0x02) != 0; } + // getters constexpr opcode_t opcode() const { return m_opcode; } constexpr condition_t condition() const { return m_condition; }