Fixed more recompiler backend issues: (#13132)

* cpu/drcbex86.cpp: Clear top half of iregs on loads to I0-I3, fix LOADS sign extension, and fixed FLOAD/FSTORE data size.
* cpu/drcbex64.cpp: Use appropriate register size for LOAD.
* cpu/drcbec.cpp: Fix parameter sizes for READM/WRITEM, and clear upper half of registers after 32-bit operations.
* cpu/uml.cpp: Fixed definition for FSREAD/FSWRITE and FDREAD/FDWRITE, and fixed READM/WRITEM simplification.
This commit is contained in:
987123879113 2024-12-29 23:32:40 +09:00 committed by GitHub
parent f08ec01a02
commit cfb1d575c3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 104 additions and 40 deletions

View File

@ -324,13 +324,30 @@ void drcbe_c::reset()
void drcbe_c::generate(drcuml_block &block, const instruction *instlist, uint32_t numinst)
{
// Calculate the max possible number of register clears required
uint32_t regclears = 0;
for (int inum = 0; inum < numinst; inum++)
{
const instruction &inst = instlist[inum];
if (inst.size() != 4)
continue;
for (int pnum = 0; pnum < inst.numparams(); pnum++)
{
if (uml::instruction::is_param_out(inst.opcode(), pnum) && inst.param(pnum).is_int_register())
regclears++;
}
}
// tell all of our utility objects that a block is beginning
m_hash.block_begin(block, instlist, numinst);
m_hash.block_begin(block, instlist, numinst + regclears);
m_labels.block_begin(block);
m_map.block_begin(block);
// begin codegen; fail if we can't
drccodeptr *cachetop = m_cache.begin_codegen(numinst * sizeof(drcbec_instruction) * 4);
drccodeptr *cachetop = m_cache.begin_codegen((numinst + regclears) * sizeof(drcbec_instruction) * 4);
if (cachetop == nullptr)
block.abort();
@ -338,6 +355,9 @@ void drcbe_c::generate(drcuml_block &block, const instruction *instlist, uint32_
drcbec_instruction *base = (drcbec_instruction *)(((uintptr_t)*cachetop + sizeof(drcbec_instruction) - 1) & ~(sizeof(drcbec_instruction) - 1));
drcbec_instruction *dst = base;
bool ireg_needs_clearing[REG_I_COUNT];
std::fill(std::begin(ireg_needs_clearing), std::end(ireg_needs_clearing), true);
// generate code by copying the instructions and extracting immediates
for (int inum = 0; inum < numinst; inum++)
{
@ -391,10 +411,14 @@ void drcbe_c::generate(drcuml_block &block, const instruction *instlist, uint32_
psize[2] = 4;
if (opcode == OP_STORE || opcode == OP_FSTORE)
psize[1] = 4;
if (opcode == OP_READ || opcode == OP_READM || opcode == OP_FREAD)
if (opcode == OP_READ || opcode == OP_FREAD)
psize[1] = psize[2] = 4;
if (opcode == OP_WRITE || opcode == OP_WRITEM || opcode == OP_FWRITE)
if (opcode == OP_WRITE || opcode == OP_FWRITE)
psize[0] = psize[2] = 4;
if (opcode == OP_READM)
psize[1] = psize[3] = 4;
if (opcode == OP_WRITEM)
psize[0] = psize[3] = 4;
if (opcode == OP_SEXT && inst.param(2).size() != SIZE_QWORD)
psize[1] = 4;
if (opcode == OP_FTOINT)
@ -449,6 +473,41 @@ void drcbe_c::generate(drcuml_block &block, const instruction *instlist, uint32_
// point past the end of the immediates
dst += immedwords;
// Keep track of which registers had an 8 byte write and clear it the next time it's written
if (inst.size() == 4)
{
for (int pnum = 0; pnum < inst.numparams(); pnum++)
{
if (uml::instruction::is_param_out(inst.opcode(), pnum) && inst.param(pnum).is_int_register() && ireg_needs_clearing[inst.param(pnum).ireg() - REG_I0])
{
immedwords = (8 + sizeof(drcbec_instruction) - 1) / sizeof(drcbec_instruction);
(dst++)->i = MAKE_OPCODE_FULL(OP_AND, 8, 0, 0, 3 + immedwords);
immed = dst + 3;
output_parameter(&dst, &immed, 8, inst.param(pnum));
output_parameter(&dst, &immed, 8, inst.param(pnum));
output_parameter(&dst, &immed, 8, 0xffffffff);
dst += immedwords;
ireg_needs_clearing[inst.param(pnum).ireg() - REG_I0] = false;
}
}
}
else if (inst.size() == 8)
{
for (int pnum = 0; pnum < inst.numparams(); pnum++)
{
if (uml::instruction::is_param_out(inst.opcode(), pnum) && inst.param(pnum).is_int_register())
{
ireg_needs_clearing[inst.param(pnum).ireg() - REG_I0] = true;
}
}
}
break;
}
}
@ -1455,20 +1514,20 @@ int drcbe_c::execute(code_handle &entry)
DPARAM0 = m_space[PARAM2]->read_dword(PARAM1);
break;
case MAKE_OPCODE_SHORT(OP_READ8, 8, 0): // DREAD dst,src1,space_QOWRD
case MAKE_OPCODE_SHORT(OP_READ8, 8, 0): // DREAD dst,src1,space_QWORD
DPARAM0 = m_space[PARAM2]->read_qword(PARAM1);
break;
case MAKE_OPCODE_SHORT(OP_READM2, 8, 0): // DREADM dst,src1,mask,space_WORD
DPARAM0 = m_space[PARAM3]->read_word(PARAM1, PARAM2);
DPARAM0 = m_space[PARAM3]->read_word(PARAM1, DPARAM2);
break;
case MAKE_OPCODE_SHORT(OP_READM4, 8, 0): // DREADM dst,src1,mask,space_DWORD
DPARAM0 = m_space[PARAM3]->read_dword(PARAM1, PARAM2);
DPARAM0 = m_space[PARAM3]->read_dword(PARAM1, DPARAM2);
break;
case MAKE_OPCODE_SHORT(OP_READM8, 8, 0): // DREADM dst,src1,mask,space_QWORD
DPARAM0 = m_space[PARAM3]->read_qword(PARAM1, PARAM2);
DPARAM0 = m_space[PARAM3]->read_qword(PARAM1, DPARAM2);
break;
case MAKE_OPCODE_SHORT(OP_WRITE1, 8, 0): // DWRITE dst,src1,space_BYTE

View File

@ -2306,7 +2306,7 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst)
Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
// pick a target register for the general case
Gp dstreg = dstp.select_register(eax);
Gp dstreg = (inst.size() == 4) ? dstp.select_register(eax) : dstp.select_register(rax);
// immediate index
if (indp.is_immediate())
@ -2314,13 +2314,13 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst)
s32 const offset = baseoffs + (s32(indp.immediate()) << scalesizep.scale());
if (size == SIZE_BYTE)
a.movzx(dstreg, byte_ptr(basereg, offset)); // movzx dstreg,[basep + scale*indp]
a.movzx(dstreg, byte_ptr(basereg, offset));
else if (size == SIZE_WORD)
a.movzx(dstreg, word_ptr(basereg, offset)); // movzx dstreg,[basep + scale*indp]
a.movzx(dstreg, word_ptr(basereg, offset));
else if (size == SIZE_DWORD)
a.mov(dstreg, ptr(basereg, offset)); // mov dstreg,[basep + scale*indp]
a.mov(dstreg, dword_ptr(basereg, offset));
else if (size == SIZE_QWORD)
a.mov(dstreg.r64(), ptr(basereg, offset)); // mov dstreg,[basep + scale*indp]
a.mov(dstreg, ptr(basereg, offset));
}
// other index
@ -2329,20 +2329,17 @@ void drcbe_x64::op_load(Assembler &a, const instruction &inst)
Gp indreg = indp.select_register(rcx);
movsx_r64_p32(a, indreg, indp);
if (size == SIZE_BYTE)
a.movzx(dstreg, byte_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // movzx dstreg,[basep + scale*indp]
a.movzx(dstreg, byte_ptr(basereg, indreg, scalesizep.scale(), baseoffs));
else if (size == SIZE_WORD)
a.movzx(dstreg, word_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // movzx dstreg,[basep + scale*indp]
a.movzx(dstreg, word_ptr(basereg, indreg, scalesizep.scale(), baseoffs));
else if (size == SIZE_DWORD)
a.mov(dstreg, ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // mov dstreg,[basep + scale*indp]
a.mov(dstreg, dword_ptr(basereg, indreg, scalesizep.scale(), baseoffs));
else if (size == SIZE_QWORD)
a.mov(dstreg.r64(), ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // mov dstreg,[basep + scale*indp]
a.mov(dstreg, ptr(basereg, indreg, scalesizep.scale(), baseoffs));
}
// store result
if (inst.size() == 4)
mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
else
mov_param_reg(a, dstp, dstreg.r64()); // mov dstp,dstreg
mov_param_reg(a, dstp, dstreg);
}

View File

@ -1020,6 +1020,7 @@ void drcbe_x86::emit_mov_p32_r32(Assembler &a, be_parameter const &param, Gp con
{
if (reg.id() != param.ireg())
a.mov(Gpd(param.ireg()), reg); // mov param,reg
a.mov(MABS(m_reghi[param.ireg()], 4), 0);
}
}
@ -3170,7 +3171,12 @@ void drcbe_x86::op_loads(Assembler &a, const instruction &inst)
// 64-bit form stores upper 32 bits
if (inst.size() == 8)
{
a.cdq(); // cdq
if (size != SIZE_QWORD)
{
if (dstreg.id() != eax.id())
a.mov(eax, dstreg);
a.cdq(); // cdq
}
if (dstp.is_memory())
a.mov(MABS(dstp.memory(4)), edx); // mov [dstp+4],edx
else if (dstp.is_int_register())
@ -5839,9 +5845,9 @@ void drcbe_x86::op_fload(Assembler &a, const instruction &inst)
// immediate index
if (indp.is_immediate())
{
a.mov(eax, MABS(basep.memory(4*indp.immediate()))); // mov eax,[basep + 4*indp]
a.mov(eax, MABS(basep.memory(inst.size()*indp.immediate())));
if (inst.size() == 8)
a.mov(edx, MABS(basep.memory(4 + 4*indp.immediate()))); // mov edx,[basep + 4*indp + 4]
a.mov(edx, MABS(basep.memory(4 + inst.size()*indp.immediate())));
}
// other index
@ -5849,15 +5855,15 @@ void drcbe_x86::op_fload(Assembler &a, const instruction &inst)
{
Gp const indreg = indp.select_register(ecx);
emit_mov_r32_p32(a, indreg, indp);
a.mov(eax, ptr(u64(basep.memory(0)), indreg, 2)); // mov eax,[basep + 4*indp]
a.mov(eax, ptr(u64(basep.memory(0)), indreg, (inst.size() == 8) ? 3 : 2));
if (inst.size() == 8)
a.mov(edx, ptr(u64(basep.memory(4)), indreg, 2)); // mov edx,[basep + 4*indp + 4]
a.mov(edx, ptr(u64(basep.memory(4)), indreg, (inst.size() == 8) ? 3 : 2));
}
// general case
a.mov(MABS(dstp.memory(0)), eax); // mov [dstp],eax
a.mov(MABS(dstp.memory(0)), eax);
if (inst.size() == 8)
a.mov(MABS(dstp.memory(4)), edx); // mov [dstp + 4],edx
a.mov(MABS(dstp.memory(4)), edx);
}
@ -5878,16 +5884,16 @@ void drcbe_x86::op_fstore(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(2), PTYPE_MF);
// general case
a.mov(eax, MABS(srcp.memory(0))); // mov eax,[srcp]
a.mov(eax, MABS(srcp.memory(0)));
if (inst.size() == 8)
a.mov(edx, MABS(srcp.memory(4))); // mov edx,[srcp + 4]
a.mov(edx, MABS(srcp.memory(4)));
// immediate index
if (indp.is_immediate())
{
a.mov(MABS(basep.memory(4*indp.immediate())), eax); // mov [basep + 4*indp],eax
a.mov(MABS(basep.memory(inst.size()*indp.immediate())), eax);
if (inst.size() == 8)
a.mov(MABS(basep.memory(4 + 4*indp.immediate())), edx); // mov [basep + 4*indp + 4],edx
a.mov(MABS(basep.memory(4 + inst.size()*indp.immediate())), edx);
}
// other index
@ -5895,9 +5901,9 @@ void drcbe_x86::op_fstore(Assembler &a, const instruction &inst)
{
Gp const indreg = indp.select_register(ecx);
emit_mov_r32_p32(a, indreg, indp);
a.mov(ptr(u64(basep.memory(0)), indreg, 2), eax); // mov [basep + 4*indp],eax
a.mov(ptr(u64(basep.memory(0)), indreg, (inst.size() == 8) ? 3 : 2), eax);
if (inst.size() == 8)
a.mov(ptr(u64(basep.memory(4)), indreg, 2), edx); // mov [basep + 4*indp + 4],edx
a.mov(ptr(u64(basep.memory(4)), indreg, (inst.size() == 8) ? 3 : 2), edx);
}
}

View File

@ -146,8 +146,8 @@
/* ----- 32-bit Floating Point Arithmetic Operations ----- */
#define UML_FSLOAD(block, dst, base, index) do { using namespace uml; block.append().fsload(dst, base, index); } while (0)
#define UML_FSSTORE(block, base, index, src1) do { using namespace uml; block.append().fsstore(base, index, src1); } while (0)
#define UML_FSREAD(block, dst, src1, space) do { using namespace uml; block.append().fsread(dst, src1, AS_##space); } while (0)
#define UML_FSWRITE(block, dst, src1, space) do { using namespace uml; block.append().fswrite(dst, src1, AS_##space); } while (0)
#define UML_FSREAD(block, dst, src1, space) do { using namespace uml; block.append().fsread(dst, src1, space); } while (0)
#define UML_FSWRITE(block, dst, src1, space) do { using namespace uml; block.append().fswrite(dst, src1, space); } while (0)
#define UML_FSMOV(block, dst, src1) do { using namespace uml; block.append().fsmov(dst, src1); } while (0)
#define UML_FSMOVc(block, cond, dst, src1) do { using namespace uml; block.append().fsmov(cond, dst, src1); } while (0)
#define UML_FSTOINT(block, dst, src1, size, round) do { using namespace uml; block.append().fstoint(dst, src1, size, round); } while (0)
@ -170,8 +170,8 @@
/* ----- 64-bit Floating Point Arithmetic Operations ----- */
#define UML_FDLOAD(block, dst, base, index) do { using namespace uml; block.append().fdload(dst, base, index); } while (0)
#define UML_FDSTORE(block, base, index, src1) do { using namespace uml; block.append().fdstore(base, index, src1); } while (0)
#define UML_FDREAD(block, dst, src1, space) do { using namespace uml; block.append().fdread(dst, src1, AS_##space); } while (0)
#define UML_FDWRITE(block, dst, src1, space) do { using namespace uml; block.append().fdwrite(dst, src1, AS_##space); } while (0)
#define UML_FDREAD(block, dst, src1, space) do { using namespace uml; block.append().fdread(dst, src1, space); } while (0)
#define UML_FDWRITE(block, dst, src1, space) do { using namespace uml; block.append().fdwrite(dst, src1, space); } while (0)
#define UML_FDMOV(block, dst, src1) do { using namespace uml; block.append().fdmov(dst, src1); } while (0)
#define UML_FDMOVc(block, cond, dst, src1) do { using namespace uml; block.append().fdmov(cond, dst, src1); } while (0)
#define UML_FDTOINT(block, dst, src1, size, round) do { using namespace uml; block.append().fdtoint(dst, src1, size, round); } while (0)

View File

@ -393,7 +393,7 @@ void uml::instruction::simplify()
if (m_param[2].is_immediate_value(paramsizemask[m_param[3].size()]))
{
m_opcode = OP_READ;
m_numparams = 2;
m_numparams = 3;
m_param[2] = m_param[3];
}
break;
@ -403,7 +403,7 @@ void uml::instruction::simplify()
if (m_param[2].is_immediate_value(paramsizemask[m_param[3].size()]))
{
m_opcode = OP_WRITE;
m_numparams = 2;
m_numparams = 3;
m_param[2] = m_param[3];
}
break;

View File

@ -398,6 +398,8 @@ namespace uml
// construction/destruction
constexpr instruction() : m_param{ } { }
static bool is_param_out(opcode_t opcode, int paramnum) { assert(opcode < OP_MAX); assert(paramnum < m_numparams); return (s_opcode_info_table[opcode].param[paramnum].output & 0x02) != 0; }
// getters
constexpr opcode_t opcode() const { return m_opcode; }
constexpr condition_t condition() const { return m_condition; }