cpu/drcbex64.cpp: Always keep UML floating point registers in memory for SysV ABI.

This was changed in 822c3b4232 but it
never worked for multiple reasons:
* The comment mentions saving around CALLC, but there is no code to save
  and restore around CALLC, DEBUG and RECOVER, all of which can clobber
  callee-saved registers.
* Numerous opcode handlers assume XMM0 and XMM1 can be used as scratch
  registers, but this change mapped them to UML F0 and F1.  This
  resulted in F0 and F1 getting clobbered by many UML instructions.
This commit is contained in:
Vas Crabb 2025-04-17 05:39:07 +10:00
parent c6313f8049
commit 1f6f0197df

View File

@ -269,6 +269,9 @@ const Gp::Id REG_PARAM4 = Gp::kIdCx;
#endif
const Xmm REG_FSCRATCH1 = xmm0;
const Xmm REG_FSCRATCH2 = xmm1;
// register mapping tables
const Gp::Id int_register_map[REG_I_COUNT] =
{
@ -283,10 +286,6 @@ uint32_t float_register_map[REG_F_COUNT] =
{
#ifdef _WIN32
6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#else
// on SysV x64 ABI, XMM0-7 are FP function args. since this code has no args, and we
// save/restore them around CALLC, they should be safe for our use.
0, 1, 2, 3, 4, 5, 6, 7
#endif
};
@ -2610,7 +2609,9 @@ void drcbe_x64::op_save(Assembler &a, const instruction &inst)
for (int regnum = 0; regnum < std::size(m_state.r); regnum++)
{
if (int_register_map[regnum] != 0)
{
a.mov(ptr(rcx, regoffs + 8 * regnum), Gpq(int_register_map[regnum]));
}
else
{
a.mov(rax, MABS(&m_state.r[regnum].d));
@ -2623,7 +2624,9 @@ void drcbe_x64::op_save(Assembler &a, const instruction &inst)
for (int regnum = 0; regnum < std::size(m_state.f); regnum++)
{
if (float_register_map[regnum] != 0)
{
a.movsd(ptr(rcx, regoffs + 8 * regnum), Xmm(float_register_map[regnum]));
}
else
{
a.mov(rax, MABS(&m_state.f[regnum].d));
@ -5036,7 +5039,7 @@ void drcbe_x64::op_fload(Assembler &a, const instruction &inst)
int const scale = (inst.size() == 4) ? 2 : 3;
// pick a target register for the general case
Xmm const dstreg = dstp.select_register(xmm0);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// determine the pointer base
int32_t baseoffs;
@ -5080,7 +5083,7 @@ void drcbe_x64::op_fstore(Assembler &a, const instruction &inst)
int const scale = (inst.size() == 4) ? 2 : 3;
// pick a target register for the general case
Xmm const srcreg = srcp.select_register(xmm0);
Xmm const srcreg = srcp.select_register(REG_FSCRATCH1);
// determine the pointer base
int32_t baseoffs;
@ -5209,7 +5212,7 @@ void drcbe_x64::op_fmov(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// always start with a jmp
Label skip;
@ -5219,9 +5222,9 @@ void drcbe_x64::op_fmov(Assembler &a, const instruction &inst)
a.short_().j(X86_NOT_CONDITION(inst.condition()), skip); // jcc skip
}
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
if (srcp.is_float_register())
{
movss_p32_r128(a, dstp, Xmm(srcp.freg())); // movss dstp,srcp
@ -5232,10 +5235,9 @@ void drcbe_x64::op_fmov(Assembler &a, const instruction &inst)
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
}
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
if (srcp.is_float_register())
{
movsd_p64_r128(a, dstp, Xmm(srcp.freg())); // movsd dstp,srcp
@ -5282,9 +5284,9 @@ void drcbe_x64::op_ftoint(Assembler &a, const instruction &inst)
a.ldmxcsr(MABS(&m_near.ssecontrol[roundp.rounding()])); // ldmxcsr fpcontrol[mode]
}
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
if (srcp.is_memory())
{
if (roundp.rounding() != ROUND_TRUNC)
@ -5300,10 +5302,9 @@ void drcbe_x64::op_ftoint(Assembler &a, const instruction &inst)
a.cvttss2si(dstreg, Xmm(srcp.freg())); // cvttss2si dstreg,srcp
}
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
if (srcp.is_memory())
{
if (roundp.rounding() != ROUND_TRUNC)
@ -5346,14 +5347,14 @@ void drcbe_x64::op_ffrint(Assembler &a, const instruction &inst)
assert(sizep.is_size());
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit integer source
// 32-bit form
if (sizep.size() == SIZE_DWORD)
{
// 32-bit integer source
if (srcp.is_memory())
a.cvtsi2ss(dstreg, MABS(srcp.memory(), 4)); // cvtsi2ss dstreg,[srcp]
else
@ -5363,10 +5364,9 @@ void drcbe_x64::op_ffrint(Assembler &a, const instruction &inst)
a.cvtsi2ss(dstreg, srcreg); // cvtsi2ss dstreg,srcreg
}
}
// 64-bit integer source
else
{
// 64-bit integer source
if (srcp.is_memory())
a.cvtsi2ss(dstreg, MABS(srcp.memory(), 8)); // cvtsi2ss dstreg,[srcp]
else
@ -5378,13 +5378,12 @@ void drcbe_x64::op_ffrint(Assembler &a, const instruction &inst)
}
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
}
// 64-bit form
else if (inst.size() == 8)
{
// 32-bit integer source
// 64-bit form
if (sizep.size() == SIZE_DWORD)
{
// 32-bit integer source
if (srcp.is_memory())
a.cvtsi2sd(dstreg, MABS(srcp.memory(), 4)); // cvtsi2sd dstreg,[srcp]
else
@ -5394,10 +5393,9 @@ void drcbe_x64::op_ffrint(Assembler &a, const instruction &inst)
a.cvtsi2sd(dstreg, srcreg); // cvtsi2sd dstreg,srcreg
}
}
// 64-bit integer source
else
{
// 64-bit integer source
if (srcp.is_memory())
a.cvtsi2sd(dstreg, MABS(srcp.memory(), 8)); // cvtsi2sd dstreg,[srcp]
else
@ -5430,21 +5428,20 @@ void drcbe_x64::op_ffrflt(Assembler &a, const instruction &inst)
assert(sizep.is_size());
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// single-to-double
if (inst.size() == 8 && sizep.size() == SIZE_DWORD)
{
// single-to-double
if (srcp.is_memory())
a.cvtss2sd(dstreg, MABS(srcp.memory())); // cvtss2sd dstreg,[srcp]
else if (srcp.is_float_register())
a.cvtss2sd(dstreg, Xmm(srcp.freg())); // cvtss2sd dstreg,srcp
movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
}
// double-to-single
else if (inst.size() == 4 && sizep.size() == SIZE_QWORD)
{
// double-to-single
if (srcp.is_memory())
a.cvtsd2ss(dstreg, MABS(srcp.memory())); // cvtsd2ss dstreg,[srcp]
else if (srcp.is_float_register())
@ -5470,7 +5467,7 @@ void drcbe_x64::op_frnds(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 64-bit form
if (srcp.is_memory())
@ -5499,11 +5496,11 @@ void drcbe_x64::op_fadd(Assembler &a, const instruction &inst)
be_parameter src2p(*this, inst.param(2), PTYPE_MF);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0, src2p);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, src2p);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
if (src2p.is_memory())
a.addss(dstreg, MABS(src2p.memory())); // addss dstreg,[src2p]
@ -5511,10 +5508,9 @@ void drcbe_x64::op_fadd(Assembler &a, const instruction &inst)
a.addss(dstreg, Xmm(src2p.freg())); // addss dstreg,src2p
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
if (src2p.is_memory())
a.addsd(dstreg, MABS(src2p.memory())); // addsd dstreg,[src2p]
@ -5542,11 +5538,11 @@ void drcbe_x64::op_fsub(Assembler &a, const instruction &inst)
be_parameter src2p(*this, inst.param(2), PTYPE_MF);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0, src2p);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, src2p);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
if (src2p.is_memory())
a.subss(dstreg, MABS(src2p.memory())); // subss dstreg,[src2p]
@ -5554,10 +5550,9 @@ void drcbe_x64::op_fsub(Assembler &a, const instruction &inst)
a.subss(dstreg, Xmm(src2p.freg())); // subss dstreg,src2p
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
if (src2p.is_memory())
a.subsd(dstreg, MABS(src2p.memory())); // subsd dstreg,[src2p]
@ -5584,21 +5579,20 @@ void drcbe_x64::op_fcmp(Assembler &a, const instruction &inst)
be_parameter src2p(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case
Xmm src1reg = src1p.select_register(xmm0);
Xmm const src1reg = src1p.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
movss_r128_p32(a, src1reg, src1p); // movss src1reg,src1p
if (src2p.is_memory())
a.comiss(src1reg, MABS(src2p.memory())); // comiss src1reg,[src2p]
else if (src2p.is_float_register())
a.comiss(src1reg, Xmm(src2p.freg())); // comiss src1reg,src2p
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
movsd_r128_p64(a, src1reg, src1p); // movsd src1reg,src1p
if (src2p.is_memory())
a.comisd(src1reg, MABS(src2p.memory())); // comisd src1reg,[src2p]
@ -5625,11 +5619,11 @@ void drcbe_x64::op_fmul(Assembler &a, const instruction &inst)
be_parameter src2p(*this, inst.param(2), PTYPE_MF);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0, src2p);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, src2p);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
if (src2p.is_memory())
a.mulss(dstreg, MABS(src2p.memory())); // mulss dstreg,[src2p]
@ -5637,10 +5631,9 @@ void drcbe_x64::op_fmul(Assembler &a, const instruction &inst)
a.mulss(dstreg, Xmm(src2p.freg())); // mulss dstreg,src2p
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
if (src2p.is_memory())
a.mulsd(dstreg, MABS(src2p.memory())); // mulsd dstreg,[src2p]
@ -5668,11 +5661,11 @@ void drcbe_x64::op_fdiv(Assembler &a, const instruction &inst)
be_parameter src2p(*this, inst.param(2), PTYPE_MF);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0, src2p);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, src2p);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
if (src2p.is_memory())
a.divss(dstreg, MABS(src2p.memory())); // divss dstreg,[src2p]
@ -5680,10 +5673,9 @@ void drcbe_x64::op_fdiv(Assembler &a, const instruction &inst)
a.divss(dstreg, Xmm(src2p.freg())); // divss dstreg,src2p
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
if (src2p.is_memory())
a.divsd(dstreg, MABS(src2p.memory())); // divsd dstreg,[src2p]
@ -5709,8 +5701,8 @@ void drcbe_x64::op_fneg(Assembler &a, const instruction &inst)
be_parameter dstp(*this, inst.param(0), PTYPE_MF);
be_parameter srcp(*this, inst.param(1), PTYPE_MF);
Xmm dstreg = dstp.select_register(xmm0, srcp);
Xmm tempreg = xmm1;
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, srcp);
Xmm const tempreg = REG_FSCRATCH2;
// note: using memory addrs with xorpd is dangerous because MAME does not guarantee
// the memory address will be 16 byte aligned so there's a good chance it'll crash
@ -5730,8 +5722,8 @@ void drcbe_x64::op_fneg(Assembler &a, const instruction &inst)
if (inst.size() == 4)
{
a.mov(rax, 0x80000000);
a.movd(dstreg, rax);
a.mov(eax, 0x80000000);
a.movd(dstreg, eax);
if (srcp.is_memory())
a.xorpd(dstreg, tempreg);
else if (srcp.is_float_register())
@ -5767,19 +5759,18 @@ void drcbe_x64::op_fabs(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0, srcp);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, srcp);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
movss_r128_p32(a, dstreg, srcp); // movss dstreg,srcp
a.andps(dstreg, MABS(m_absmask32)); // andps dstreg,[absmask32]
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
movsd_r128_p64(a, dstreg, srcp); // movsd dstreg,srcp
a.andpd(dstreg, MABS(m_absmask64)); // andpd dstreg,[absmask64]
movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
@ -5803,21 +5794,20 @@ void drcbe_x64::op_fsqrt(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
if (srcp.is_memory())
a.sqrtss(dstreg, MABS(srcp.memory())); // sqrtss dstreg,[srcp]
else if (srcp.is_float_register())
a.sqrtss(dstreg, Xmm(srcp.freg())); // sqrtss dstreg,srcp
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
if (srcp.is_memory())
a.sqrtsd(dstreg, MABS(srcp.memory())); // sqrtsd dstreg,[srcp]
else if (srcp.is_float_register())
@ -5843,11 +5833,11 @@ void drcbe_x64::op_frecip(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
if (USE_RCPSS_FOR_SINGLES)
{
if (srcp.is_memory())
@ -5858,18 +5848,17 @@ void drcbe_x64::op_frecip(Assembler &a, const instruction &inst)
}
else
{
a.movss(xmm1, MABS(&m_near.single1)); // movss xmm1,1.0
a.movss(REG_FSCRATCH2, MABS(&m_near.single1)); // movss xmm1,1.0
if (srcp.is_memory())
a.divss(xmm1, MABS(srcp.memory())); // divss xmm1,[srcp]
a.divss(REG_FSCRATCH2, MABS(srcp.memory())); // divss xmm1,[srcp]
else if (srcp.is_float_register())
a.divss(xmm1, Xmm(srcp.freg())); // divss xmm1,srcp
movss_p32_r128(a, dstp, xmm1); // movss dstp,xmm1
a.divss(REG_FSCRATCH2, Xmm(srcp.freg())); // divss xmm1,srcp
movss_p32_r128(a, dstp, REG_FSCRATCH2); // movss dstp,xmm1
}
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
if (USE_RCPSS_FOR_DOUBLES)
{
if (srcp.is_memory())
@ -5882,12 +5871,12 @@ void drcbe_x64::op_frecip(Assembler &a, const instruction &inst)
}
else
{
a.movsd(xmm1, MABS(&m_near.double1)); // movsd xmm1,1.0
a.movsd(REG_FSCRATCH2, MABS(&m_near.double1)); // movsd xmm1,1.0
if (srcp.is_memory())
a.divsd(xmm1, MABS(srcp.memory())); // divsd xmm1,[srcp]
a.divsd(REG_FSCRATCH2, MABS(srcp.memory())); // divsd xmm1,[srcp]
else if (srcp.is_float_register())
a.divsd(xmm1, Xmm(srcp.freg())); // divsd xmm1,srcp
movsd_p64_r128(a, dstp, xmm1); // movsd dstp,xmm1
a.divsd(REG_FSCRATCH2, Xmm(srcp.freg())); // divsd xmm1,srcp
movsd_p64_r128(a, dstp, REG_FSCRATCH2); // movsd dstp,xmm1
}
}
}
@ -5909,11 +5898,11 @@ void drcbe_x64::op_frsqrt(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
if (USE_RSQRTSS_FOR_SINGLES)
{
if (srcp.is_memory())
@ -5924,18 +5913,17 @@ void drcbe_x64::op_frsqrt(Assembler &a, const instruction &inst)
else
{
if (srcp.is_memory())
a.sqrtss(xmm1, MABS(srcp.memory())); // sqrtss xmm1,[srcp]
a.sqrtss(REG_FSCRATCH2, MABS(srcp.memory())); // sqrtss xmm1,[srcp]
else if (srcp.is_float_register())
a.sqrtss(xmm1, Xmm(srcp.freg())); // sqrtss xmm1,srcp
a.sqrtss(REG_FSCRATCH2, Xmm(srcp.freg())); // sqrtss xmm1,srcp
a.movss(dstreg, MABS(&m_near.single1)); // movss dstreg,1.0
a.divss(dstreg, xmm1); // divss dstreg,xmm1
a.divss(dstreg, REG_FSCRATCH2); // divss dstreg,xmm1
}
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
if (USE_RSQRTSS_FOR_DOUBLES)
{
if (srcp.is_memory())
@ -5948,11 +5936,11 @@ void drcbe_x64::op_frsqrt(Assembler &a, const instruction &inst)
else
{
if (srcp.is_memory())
a.sqrtsd(xmm1, MABS(srcp.memory())); // sqrtsd xmm1,[srcp]
a.sqrtsd(REG_FSCRATCH2, MABS(srcp.memory())); // sqrtsd xmm1,[srcp]
else if (srcp.is_float_register())
a.sqrtsd(xmm1, Xmm(srcp.freg())); // sqrtsd xmm1,srcp
a.sqrtsd(REG_FSCRATCH2, Xmm(srcp.freg())); // sqrtsd xmm1,srcp
a.movsd(dstreg, MABS(&m_near.double1)); // movsd dstreg,1.0
a.divsd(dstreg, xmm1); // divsd dstreg,xmm1
a.divsd(dstreg, REG_FSCRATCH2); // divsd dstreg,xmm1
}
movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
}
@ -5975,11 +5963,11 @@ void drcbe_x64::op_fcopyi(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MR);
// pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0);
Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4)
{
// 32-bit form
if (srcp.is_memory())
{
a.movd(dstreg, MABS(srcp.memory()));
@ -5996,10 +5984,9 @@ void drcbe_x64::op_fcopyi(Assembler &a, const instruction &inst)
}
}
// 64-bit form
else if (inst.size() == 8)
{
// 64-bit form
if (srcp.is_memory())
{
a.movq(dstreg, MABS(srcp.memory()));
@ -6038,7 +6025,7 @@ void drcbe_x64::op_icopyf(Assembler &a, const instruction &inst)
// 32-bit form
if (srcp.is_memory())
{
Gp dstreg = dstp.select_register(eax);
Gp const dstreg = dstp.select_register(eax);
a.mov(dstreg, MABS(srcp.memory()));
mov_param_reg(a, dstp, dstreg);
}
@ -6056,7 +6043,7 @@ void drcbe_x64::op_icopyf(Assembler &a, const instruction &inst)
// 64-bit form
if (srcp.is_memory())
{
Gp dstreg = dstp.select_register(rax);
Gp const dstreg = dstp.select_register(rax);
a.mov(dstreg, MABS(srcp.memory()));
mov_param_reg(a, dstp, dstreg);
}