cpu/drcbex64.cpp: Always keep UML floating point registers in memory for SysV ABI.

This was changed in 822c3b4232 but it
never worked for multiple reasons:
* The comment mentions saving around CALLC, but there is no code to save
  and restore around CALLC, DEBUG and RECOVER, all of which can clobber
  callee-saved registers.
* Numerous opcode handlers assume XMM0 and XMM1 can be used as scratch
  registers, but this change mapped them to UML F0 and F1.  This
  resulted in F0 and F1 getting clobbered by many UML instructions.
This commit is contained in:
Vas Crabb 2025-04-17 05:39:07 +10:00
parent c6313f8049
commit 1f6f0197df

View File

@ -269,6 +269,9 @@ const Gp::Id REG_PARAM4 = Gp::kIdCx;
#endif #endif
const Xmm REG_FSCRATCH1 = xmm0;
const Xmm REG_FSCRATCH2 = xmm1;
// register mapping tables // register mapping tables
const Gp::Id int_register_map[REG_I_COUNT] = const Gp::Id int_register_map[REG_I_COUNT] =
{ {
@ -283,10 +286,6 @@ uint32_t float_register_map[REG_F_COUNT] =
{ {
#ifdef _WIN32 #ifdef _WIN32
6, 7, 8, 9, 10, 11, 12, 13, 14, 15 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
#else
// on SysV x64 ABI, XMM0-7 are FP function args. since this code has no args, and we
// save/restore them around CALLC, they should be safe for our use.
0, 1, 2, 3, 4, 5, 6, 7
#endif #endif
}; };
@ -2610,7 +2609,9 @@ void drcbe_x64::op_save(Assembler &a, const instruction &inst)
for (int regnum = 0; regnum < std::size(m_state.r); regnum++) for (int regnum = 0; regnum < std::size(m_state.r); regnum++)
{ {
if (int_register_map[regnum] != 0) if (int_register_map[regnum] != 0)
{
a.mov(ptr(rcx, regoffs + 8 * regnum), Gpq(int_register_map[regnum])); a.mov(ptr(rcx, regoffs + 8 * regnum), Gpq(int_register_map[regnum]));
}
else else
{ {
a.mov(rax, MABS(&m_state.r[regnum].d)); a.mov(rax, MABS(&m_state.r[regnum].d));
@ -2623,7 +2624,9 @@ void drcbe_x64::op_save(Assembler &a, const instruction &inst)
for (int regnum = 0; regnum < std::size(m_state.f); regnum++) for (int regnum = 0; regnum < std::size(m_state.f); regnum++)
{ {
if (float_register_map[regnum] != 0) if (float_register_map[regnum] != 0)
{
a.movsd(ptr(rcx, regoffs + 8 * regnum), Xmm(float_register_map[regnum])); a.movsd(ptr(rcx, regoffs + 8 * regnum), Xmm(float_register_map[regnum]));
}
else else
{ {
a.mov(rax, MABS(&m_state.f[regnum].d)); a.mov(rax, MABS(&m_state.f[regnum].d));
@ -5036,7 +5039,7 @@ void drcbe_x64::op_fload(Assembler &a, const instruction &inst)
int const scale = (inst.size() == 4) ? 2 : 3; int const scale = (inst.size() == 4) ? 2 : 3;
// pick a target register for the general case // pick a target register for the general case
Xmm const dstreg = dstp.select_register(xmm0); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// determine the pointer base // determine the pointer base
int32_t baseoffs; int32_t baseoffs;
@ -5080,7 +5083,7 @@ void drcbe_x64::op_fstore(Assembler &a, const instruction &inst)
int const scale = (inst.size() == 4) ? 2 : 3; int const scale = (inst.size() == 4) ? 2 : 3;
// pick a target register for the general case // pick a target register for the general case
Xmm const srcreg = srcp.select_register(xmm0); Xmm const srcreg = srcp.select_register(REG_FSCRATCH1);
// determine the pointer base // determine the pointer base
int32_t baseoffs; int32_t baseoffs;
@ -5209,7 +5212,7 @@ void drcbe_x64::op_fmov(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF); be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// always start with a jmp // always start with a jmp
Label skip; Label skip;
@ -5219,9 +5222,9 @@ void drcbe_x64::op_fmov(Assembler &a, const instruction &inst)
a.short_().j(X86_NOT_CONDITION(inst.condition()), skip); // jcc skip a.short_().j(X86_NOT_CONDITION(inst.condition()), skip); // jcc skip
} }
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
if (srcp.is_float_register()) if (srcp.is_float_register())
{ {
movss_p32_r128(a, dstp, Xmm(srcp.freg())); // movss dstp,srcp movss_p32_r128(a, dstp, Xmm(srcp.freg())); // movss dstp,srcp
@ -5232,10 +5235,9 @@ void drcbe_x64::op_fmov(Assembler &a, const instruction &inst)
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
} }
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
if (srcp.is_float_register()) if (srcp.is_float_register())
{ {
movsd_p64_r128(a, dstp, Xmm(srcp.freg())); // movsd dstp,srcp movsd_p64_r128(a, dstp, Xmm(srcp.freg())); // movsd dstp,srcp
@ -5282,9 +5284,9 @@ void drcbe_x64::op_ftoint(Assembler &a, const instruction &inst)
a.ldmxcsr(MABS(&m_near.ssecontrol[roundp.rounding()])); // ldmxcsr fpcontrol[mode] a.ldmxcsr(MABS(&m_near.ssecontrol[roundp.rounding()])); // ldmxcsr fpcontrol[mode]
} }
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
if (srcp.is_memory()) if (srcp.is_memory())
{ {
if (roundp.rounding() != ROUND_TRUNC) if (roundp.rounding() != ROUND_TRUNC)
@ -5300,10 +5302,9 @@ void drcbe_x64::op_ftoint(Assembler &a, const instruction &inst)
a.cvttss2si(dstreg, Xmm(srcp.freg())); // cvttss2si dstreg,srcp a.cvttss2si(dstreg, Xmm(srcp.freg())); // cvttss2si dstreg,srcp
} }
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
if (srcp.is_memory()) if (srcp.is_memory())
{ {
if (roundp.rounding() != ROUND_TRUNC) if (roundp.rounding() != ROUND_TRUNC)
@ -5346,14 +5347,14 @@ void drcbe_x64::op_ffrint(Assembler &a, const instruction &inst)
assert(sizep.is_size()); assert(sizep.is_size());
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit integer source // 32-bit form
if (sizep.size() == SIZE_DWORD) if (sizep.size() == SIZE_DWORD)
{ {
// 32-bit integer source
if (srcp.is_memory()) if (srcp.is_memory())
a.cvtsi2ss(dstreg, MABS(srcp.memory(), 4)); // cvtsi2ss dstreg,[srcp] a.cvtsi2ss(dstreg, MABS(srcp.memory(), 4)); // cvtsi2ss dstreg,[srcp]
else else
@ -5363,10 +5364,9 @@ void drcbe_x64::op_ffrint(Assembler &a, const instruction &inst)
a.cvtsi2ss(dstreg, srcreg); // cvtsi2ss dstreg,srcreg a.cvtsi2ss(dstreg, srcreg); // cvtsi2ss dstreg,srcreg
} }
} }
// 64-bit integer source
else else
{ {
// 64-bit integer source
if (srcp.is_memory()) if (srcp.is_memory())
a.cvtsi2ss(dstreg, MABS(srcp.memory(), 8)); // cvtsi2ss dstreg,[srcp] a.cvtsi2ss(dstreg, MABS(srcp.memory(), 8)); // cvtsi2ss dstreg,[srcp]
else else
@ -5378,13 +5378,12 @@ void drcbe_x64::op_ffrint(Assembler &a, const instruction &inst)
} }
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 32-bit integer source // 64-bit form
if (sizep.size() == SIZE_DWORD) if (sizep.size() == SIZE_DWORD)
{ {
// 32-bit integer source
if (srcp.is_memory()) if (srcp.is_memory())
a.cvtsi2sd(dstreg, MABS(srcp.memory(), 4)); // cvtsi2sd dstreg,[srcp] a.cvtsi2sd(dstreg, MABS(srcp.memory(), 4)); // cvtsi2sd dstreg,[srcp]
else else
@ -5394,10 +5393,9 @@ void drcbe_x64::op_ffrint(Assembler &a, const instruction &inst)
a.cvtsi2sd(dstreg, srcreg); // cvtsi2sd dstreg,srcreg a.cvtsi2sd(dstreg, srcreg); // cvtsi2sd dstreg,srcreg
} }
} }
// 64-bit integer source
else else
{ {
// 64-bit integer source
if (srcp.is_memory()) if (srcp.is_memory())
a.cvtsi2sd(dstreg, MABS(srcp.memory(), 8)); // cvtsi2sd dstreg,[srcp] a.cvtsi2sd(dstreg, MABS(srcp.memory(), 8)); // cvtsi2sd dstreg,[srcp]
else else
@ -5430,21 +5428,20 @@ void drcbe_x64::op_ffrflt(Assembler &a, const instruction &inst)
assert(sizep.is_size()); assert(sizep.is_size());
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// single-to-double
if (inst.size() == 8 && sizep.size() == SIZE_DWORD) if (inst.size() == 8 && sizep.size() == SIZE_DWORD)
{ {
// single-to-double
if (srcp.is_memory()) if (srcp.is_memory())
a.cvtss2sd(dstreg, MABS(srcp.memory())); // cvtss2sd dstreg,[srcp] a.cvtss2sd(dstreg, MABS(srcp.memory())); // cvtss2sd dstreg,[srcp]
else if (srcp.is_float_register()) else if (srcp.is_float_register())
a.cvtss2sd(dstreg, Xmm(srcp.freg())); // cvtss2sd dstreg,srcp a.cvtss2sd(dstreg, Xmm(srcp.freg())); // cvtss2sd dstreg,srcp
movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
} }
// double-to-single
else if (inst.size() == 4 && sizep.size() == SIZE_QWORD) else if (inst.size() == 4 && sizep.size() == SIZE_QWORD)
{ {
// double-to-single
if (srcp.is_memory()) if (srcp.is_memory())
a.cvtsd2ss(dstreg, MABS(srcp.memory())); // cvtsd2ss dstreg,[srcp] a.cvtsd2ss(dstreg, MABS(srcp.memory())); // cvtsd2ss dstreg,[srcp]
else if (srcp.is_float_register()) else if (srcp.is_float_register())
@ -5470,7 +5467,7 @@ void drcbe_x64::op_frnds(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF); be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 64-bit form // 64-bit form
if (srcp.is_memory()) if (srcp.is_memory())
@ -5499,11 +5496,11 @@ void drcbe_x64::op_fadd(Assembler &a, const instruction &inst)
be_parameter src2p(*this, inst.param(2), PTYPE_MF); be_parameter src2p(*this, inst.param(2), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0, src2p); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, src2p);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
if (src2p.is_memory()) if (src2p.is_memory())
a.addss(dstreg, MABS(src2p.memory())); // addss dstreg,[src2p] a.addss(dstreg, MABS(src2p.memory())); // addss dstreg,[src2p]
@ -5511,10 +5508,9 @@ void drcbe_x64::op_fadd(Assembler &a, const instruction &inst)
a.addss(dstreg, Xmm(src2p.freg())); // addss dstreg,src2p a.addss(dstreg, Xmm(src2p.freg())); // addss dstreg,src2p
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
if (src2p.is_memory()) if (src2p.is_memory())
a.addsd(dstreg, MABS(src2p.memory())); // addsd dstreg,[src2p] a.addsd(dstreg, MABS(src2p.memory())); // addsd dstreg,[src2p]
@ -5542,11 +5538,11 @@ void drcbe_x64::op_fsub(Assembler &a, const instruction &inst)
be_parameter src2p(*this, inst.param(2), PTYPE_MF); be_parameter src2p(*this, inst.param(2), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0, src2p); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, src2p);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
if (src2p.is_memory()) if (src2p.is_memory())
a.subss(dstreg, MABS(src2p.memory())); // subss dstreg,[src2p] a.subss(dstreg, MABS(src2p.memory())); // subss dstreg,[src2p]
@ -5554,10 +5550,9 @@ void drcbe_x64::op_fsub(Assembler &a, const instruction &inst)
a.subss(dstreg, Xmm(src2p.freg())); // subss dstreg,src2p a.subss(dstreg, Xmm(src2p.freg())); // subss dstreg,src2p
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
if (src2p.is_memory()) if (src2p.is_memory())
a.subsd(dstreg, MABS(src2p.memory())); // subsd dstreg,[src2p] a.subsd(dstreg, MABS(src2p.memory())); // subsd dstreg,[src2p]
@ -5584,21 +5579,20 @@ void drcbe_x64::op_fcmp(Assembler &a, const instruction &inst)
be_parameter src2p(*this, inst.param(1), PTYPE_MF); be_parameter src2p(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm src1reg = src1p.select_register(xmm0); Xmm const src1reg = src1p.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
movss_r128_p32(a, src1reg, src1p); // movss src1reg,src1p movss_r128_p32(a, src1reg, src1p); // movss src1reg,src1p
if (src2p.is_memory()) if (src2p.is_memory())
a.comiss(src1reg, MABS(src2p.memory())); // comiss src1reg,[src2p] a.comiss(src1reg, MABS(src2p.memory())); // comiss src1reg,[src2p]
else if (src2p.is_float_register()) else if (src2p.is_float_register())
a.comiss(src1reg, Xmm(src2p.freg())); // comiss src1reg,src2p a.comiss(src1reg, Xmm(src2p.freg())); // comiss src1reg,src2p
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
movsd_r128_p64(a, src1reg, src1p); // movsd src1reg,src1p movsd_r128_p64(a, src1reg, src1p); // movsd src1reg,src1p
if (src2p.is_memory()) if (src2p.is_memory())
a.comisd(src1reg, MABS(src2p.memory())); // comisd src1reg,[src2p] a.comisd(src1reg, MABS(src2p.memory())); // comisd src1reg,[src2p]
@ -5625,11 +5619,11 @@ void drcbe_x64::op_fmul(Assembler &a, const instruction &inst)
be_parameter src2p(*this, inst.param(2), PTYPE_MF); be_parameter src2p(*this, inst.param(2), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0, src2p); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, src2p);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
if (src2p.is_memory()) if (src2p.is_memory())
a.mulss(dstreg, MABS(src2p.memory())); // mulss dstreg,[src2p] a.mulss(dstreg, MABS(src2p.memory())); // mulss dstreg,[src2p]
@ -5637,10 +5631,9 @@ void drcbe_x64::op_fmul(Assembler &a, const instruction &inst)
a.mulss(dstreg, Xmm(src2p.freg())); // mulss dstreg,src2p a.mulss(dstreg, Xmm(src2p.freg())); // mulss dstreg,src2p
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
if (src2p.is_memory()) if (src2p.is_memory())
a.mulsd(dstreg, MABS(src2p.memory())); // mulsd dstreg,[src2p] a.mulsd(dstreg, MABS(src2p.memory())); // mulsd dstreg,[src2p]
@ -5668,11 +5661,11 @@ void drcbe_x64::op_fdiv(Assembler &a, const instruction &inst)
be_parameter src2p(*this, inst.param(2), PTYPE_MF); be_parameter src2p(*this, inst.param(2), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0, src2p); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, src2p);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
if (src2p.is_memory()) if (src2p.is_memory())
a.divss(dstreg, MABS(src2p.memory())); // divss dstreg,[src2p] a.divss(dstreg, MABS(src2p.memory())); // divss dstreg,[src2p]
@ -5680,10 +5673,9 @@ void drcbe_x64::op_fdiv(Assembler &a, const instruction &inst)
a.divss(dstreg, Xmm(src2p.freg())); // divss dstreg,src2p a.divss(dstreg, Xmm(src2p.freg())); // divss dstreg,src2p
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
if (src2p.is_memory()) if (src2p.is_memory())
a.divsd(dstreg, MABS(src2p.memory())); // divsd dstreg,[src2p] a.divsd(dstreg, MABS(src2p.memory())); // divsd dstreg,[src2p]
@ -5709,8 +5701,8 @@ void drcbe_x64::op_fneg(Assembler &a, const instruction &inst)
be_parameter dstp(*this, inst.param(0), PTYPE_MF); be_parameter dstp(*this, inst.param(0), PTYPE_MF);
be_parameter srcp(*this, inst.param(1), PTYPE_MF); be_parameter srcp(*this, inst.param(1), PTYPE_MF);
Xmm dstreg = dstp.select_register(xmm0, srcp); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, srcp);
Xmm tempreg = xmm1; Xmm const tempreg = REG_FSCRATCH2;
// note: using memory addrs with xorpd is dangerous because MAME does not guarantee // note: using memory addrs with xorpd is dangerous because MAME does not guarantee
// the memory address will be 16 byte aligned so there's a good chance it'll crash // the memory address will be 16 byte aligned so there's a good chance it'll crash
@ -5730,8 +5722,8 @@ void drcbe_x64::op_fneg(Assembler &a, const instruction &inst)
if (inst.size() == 4) if (inst.size() == 4)
{ {
a.mov(rax, 0x80000000); a.mov(eax, 0x80000000);
a.movd(dstreg, rax); a.movd(dstreg, eax);
if (srcp.is_memory()) if (srcp.is_memory())
a.xorpd(dstreg, tempreg); a.xorpd(dstreg, tempreg);
else if (srcp.is_float_register()) else if (srcp.is_float_register())
@ -5767,19 +5759,18 @@ void drcbe_x64::op_fabs(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF); be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0, srcp); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1, srcp);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
movss_r128_p32(a, dstreg, srcp); // movss dstreg,srcp movss_r128_p32(a, dstreg, srcp); // movss dstreg,srcp
a.andps(dstreg, MABS(m_absmask32)); // andps dstreg,[absmask32] a.andps(dstreg, MABS(m_absmask32)); // andps dstreg,[absmask32]
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
movsd_r128_p64(a, dstreg, srcp); // movsd dstreg,srcp movsd_r128_p64(a, dstreg, srcp); // movsd dstreg,srcp
a.andpd(dstreg, MABS(m_absmask64)); // andpd dstreg,[absmask64] a.andpd(dstreg, MABS(m_absmask64)); // andpd dstreg,[absmask64]
movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
@ -5803,21 +5794,20 @@ void drcbe_x64::op_fsqrt(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF); be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
if (srcp.is_memory()) if (srcp.is_memory())
a.sqrtss(dstreg, MABS(srcp.memory())); // sqrtss dstreg,[srcp] a.sqrtss(dstreg, MABS(srcp.memory())); // sqrtss dstreg,[srcp]
else if (srcp.is_float_register()) else if (srcp.is_float_register())
a.sqrtss(dstreg, Xmm(srcp.freg())); // sqrtss dstreg,srcp a.sqrtss(dstreg, Xmm(srcp.freg())); // sqrtss dstreg,srcp
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
if (srcp.is_memory()) if (srcp.is_memory())
a.sqrtsd(dstreg, MABS(srcp.memory())); // sqrtsd dstreg,[srcp] a.sqrtsd(dstreg, MABS(srcp.memory())); // sqrtsd dstreg,[srcp]
else if (srcp.is_float_register()) else if (srcp.is_float_register())
@ -5843,11 +5833,11 @@ void drcbe_x64::op_frecip(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF); be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
if (USE_RCPSS_FOR_SINGLES) if (USE_RCPSS_FOR_SINGLES)
{ {
if (srcp.is_memory()) if (srcp.is_memory())
@ -5858,18 +5848,17 @@ void drcbe_x64::op_frecip(Assembler &a, const instruction &inst)
} }
else else
{ {
a.movss(xmm1, MABS(&m_near.single1)); // movss xmm1,1.0 a.movss(REG_FSCRATCH2, MABS(&m_near.single1)); // movss xmm1,1.0
if (srcp.is_memory()) if (srcp.is_memory())
a.divss(xmm1, MABS(srcp.memory())); // divss xmm1,[srcp] a.divss(REG_FSCRATCH2, MABS(srcp.memory())); // divss xmm1,[srcp]
else if (srcp.is_float_register()) else if (srcp.is_float_register())
a.divss(xmm1, Xmm(srcp.freg())); // divss xmm1,srcp a.divss(REG_FSCRATCH2, Xmm(srcp.freg())); // divss xmm1,srcp
movss_p32_r128(a, dstp, xmm1); // movss dstp,xmm1 movss_p32_r128(a, dstp, REG_FSCRATCH2); // movss dstp,xmm1
} }
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
if (USE_RCPSS_FOR_DOUBLES) if (USE_RCPSS_FOR_DOUBLES)
{ {
if (srcp.is_memory()) if (srcp.is_memory())
@ -5882,12 +5871,12 @@ void drcbe_x64::op_frecip(Assembler &a, const instruction &inst)
} }
else else
{ {
a.movsd(xmm1, MABS(&m_near.double1)); // movsd xmm1,1.0 a.movsd(REG_FSCRATCH2, MABS(&m_near.double1)); // movsd xmm1,1.0
if (srcp.is_memory()) if (srcp.is_memory())
a.divsd(xmm1, MABS(srcp.memory())); // divsd xmm1,[srcp] a.divsd(REG_FSCRATCH2, MABS(srcp.memory())); // divsd xmm1,[srcp]
else if (srcp.is_float_register()) else if (srcp.is_float_register())
a.divsd(xmm1, Xmm(srcp.freg())); // divsd xmm1,srcp a.divsd(REG_FSCRATCH2, Xmm(srcp.freg())); // divsd xmm1,srcp
movsd_p64_r128(a, dstp, xmm1); // movsd dstp,xmm1 movsd_p64_r128(a, dstp, REG_FSCRATCH2); // movsd dstp,xmm1
} }
} }
} }
@ -5909,11 +5898,11 @@ void drcbe_x64::op_frsqrt(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MF); be_parameter srcp(*this, inst.param(1), PTYPE_MF);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
if (USE_RSQRTSS_FOR_SINGLES) if (USE_RSQRTSS_FOR_SINGLES)
{ {
if (srcp.is_memory()) if (srcp.is_memory())
@ -5924,18 +5913,17 @@ void drcbe_x64::op_frsqrt(Assembler &a, const instruction &inst)
else else
{ {
if (srcp.is_memory()) if (srcp.is_memory())
a.sqrtss(xmm1, MABS(srcp.memory())); // sqrtss xmm1,[srcp] a.sqrtss(REG_FSCRATCH2, MABS(srcp.memory())); // sqrtss xmm1,[srcp]
else if (srcp.is_float_register()) else if (srcp.is_float_register())
a.sqrtss(xmm1, Xmm(srcp.freg())); // sqrtss xmm1,srcp a.sqrtss(REG_FSCRATCH2, Xmm(srcp.freg())); // sqrtss xmm1,srcp
a.movss(dstreg, MABS(&m_near.single1)); // movss dstreg,1.0 a.movss(dstreg, MABS(&m_near.single1)); // movss dstreg,1.0
a.divss(dstreg, xmm1); // divss dstreg,xmm1 a.divss(dstreg, REG_FSCRATCH2); // divss dstreg,xmm1
} }
movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
if (USE_RSQRTSS_FOR_DOUBLES) if (USE_RSQRTSS_FOR_DOUBLES)
{ {
if (srcp.is_memory()) if (srcp.is_memory())
@ -5948,11 +5936,11 @@ void drcbe_x64::op_frsqrt(Assembler &a, const instruction &inst)
else else
{ {
if (srcp.is_memory()) if (srcp.is_memory())
a.sqrtsd(xmm1, MABS(srcp.memory())); // sqrtsd xmm1,[srcp] a.sqrtsd(REG_FSCRATCH2, MABS(srcp.memory())); // sqrtsd xmm1,[srcp]
else if (srcp.is_float_register()) else if (srcp.is_float_register())
a.sqrtsd(xmm1, Xmm(srcp.freg())); // sqrtsd xmm1,srcp a.sqrtsd(REG_FSCRATCH2, Xmm(srcp.freg())); // sqrtsd xmm1,srcp
a.movsd(dstreg, MABS(&m_near.double1)); // movsd dstreg,1.0 a.movsd(dstreg, MABS(&m_near.double1)); // movsd dstreg,1.0
a.divsd(dstreg, xmm1); // divsd dstreg,xmm1 a.divsd(dstreg, REG_FSCRATCH2); // divsd dstreg,xmm1
} }
movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
} }
@ -5975,11 +5963,11 @@ void drcbe_x64::op_fcopyi(Assembler &a, const instruction &inst)
be_parameter srcp(*this, inst.param(1), PTYPE_MR); be_parameter srcp(*this, inst.param(1), PTYPE_MR);
// pick a target register for the general case // pick a target register for the general case
Xmm dstreg = dstp.select_register(xmm0); Xmm const dstreg = dstp.select_register(REG_FSCRATCH1);
// 32-bit form
if (inst.size() == 4) if (inst.size() == 4)
{ {
// 32-bit form
if (srcp.is_memory()) if (srcp.is_memory())
{ {
a.movd(dstreg, MABS(srcp.memory())); a.movd(dstreg, MABS(srcp.memory()));
@ -5996,10 +5984,9 @@ void drcbe_x64::op_fcopyi(Assembler &a, const instruction &inst)
} }
} }
// 64-bit form
else if (inst.size() == 8) else if (inst.size() == 8)
{ {
// 64-bit form
if (srcp.is_memory()) if (srcp.is_memory())
{ {
a.movq(dstreg, MABS(srcp.memory())); a.movq(dstreg, MABS(srcp.memory()));
@ -6038,7 +6025,7 @@ void drcbe_x64::op_icopyf(Assembler &a, const instruction &inst)
// 32-bit form // 32-bit form
if (srcp.is_memory()) if (srcp.is_memory())
{ {
Gp dstreg = dstp.select_register(eax); Gp const dstreg = dstp.select_register(eax);
a.mov(dstreg, MABS(srcp.memory())); a.mov(dstreg, MABS(srcp.memory()));
mov_param_reg(a, dstp, dstreg); mov_param_reg(a, dstp, dstreg);
} }
@ -6056,7 +6043,7 @@ void drcbe_x64::op_icopyf(Assembler &a, const instruction &inst)
// 64-bit form // 64-bit form
if (srcp.is_memory()) if (srcp.is_memory())
{ {
Gp dstreg = dstp.select_register(rax); Gp const dstreg = dstp.select_register(rax);
a.mov(dstreg, MABS(srcp.memory())); a.mov(dstreg, MABS(srcp.memory()));
mov_param_reg(a, dstp, dstreg); mov_param_reg(a, dstp, dstreg);
} }