From 4a3b37d368dff843a99e762eab8db3cd354ae98e Mon Sep 17 00:00:00 2001 From: Vas Crabb Date: Sat, 15 Mar 2025 13:44:49 +1100 Subject: [PATCH] cpu/drcbearm64.cpp: Fixed issues with and optimise ADD[C]/SUB[B] code generation. (#13476) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fixed potential crash on generating code for UML ADDC or SUBB with immediate operands (GitHub #13475). * Optimised generated code for various degenerate cases and cases where register loads can be avoided. --- src/devices/cpu/drcbearm64.cpp | 187 ++++++++++++++++++++++++++------- 1 file changed, 149 insertions(+), 38 deletions(-) diff --git a/src/devices/cpu/drcbearm64.cpp b/src/devices/cpu/drcbearm64.cpp index 7f4eb9d6650..bc09029b5ef 100644 --- a/src/devices/cpu/drcbearm64.cpp +++ b/src/devices/cpu/drcbearm64.cpp @@ -490,8 +490,8 @@ private: void op_sext(asmjit::a64::Assembler &a, const uml::instruction &inst); void op_roland(asmjit::a64::Assembler &a, const uml::instruction &inst); void op_rolins(asmjit::a64::Assembler &a, const uml::instruction &inst); - template void op_add(asmjit::a64::Assembler &a, const uml::instruction &inst); - template void op_sub(asmjit::a64::Assembler &a, const uml::instruction &inst); + template void op_add(asmjit::a64::Assembler &a, const uml::instruction &inst); + template void op_sub(asmjit::a64::Assembler &a, const uml::instruction &inst); void op_cmp(asmjit::a64::Assembler &a, const uml::instruction &inst); void op_mulu(asmjit::a64::Assembler &a, const uml::instruction &inst); void op_mululw(asmjit::a64::Assembler &a, const uml::instruction &inst); @@ -646,10 +646,10 @@ const drcbe_arm64::opcode_table_entry drcbe_arm64::s_opcode_table_source[] = { uml::OP_SEXT, &drcbe_arm64::op_sext }, // SEXT dst,src { uml::OP_ROLAND, &drcbe_arm64::op_roland }, // ROLAND dst,src1,src2,src3 { uml::OP_ROLINS, &drcbe_arm64::op_rolins }, // ROLINS dst,src1,src2,src3 - { uml::OP_ADD, &drcbe_arm64::op_add }, // ADD dst,src1,src2[,f] - { uml::OP_ADDC, &drcbe_arm64::op_add }, // ADDC dst,src1,src2[,f] - { uml::OP_SUB, &drcbe_arm64::op_sub }, // SUB dst,src1,src2[,f] - { uml::OP_SUBB, &drcbe_arm64::op_sub }, // SUBB dst,src1,src2[,f] + { uml::OP_ADD, &drcbe_arm64::op_add }, // ADD dst,src1,src2[,f] + { uml::OP_ADDC, &drcbe_arm64::op_add }, // ADDC dst,src1,src2[,f] + { uml::OP_SUB, &drcbe_arm64::op_sub }, // SUB dst,src1,src2[,f] + { uml::OP_SUBB, &drcbe_arm64::op_sub }, // SUBB dst,src1,src2[,f] { uml::OP_CMP, &drcbe_arm64::op_cmp }, // CMP src1,src2[,f] { uml::OP_MULU, &drcbe_arm64::op_mulu }, // MULU dst,edst,src1,src2[,f] { uml::OP_MULULW, &drcbe_arm64::op_mululw }, // MULULW dst,src1,src2[,f] @@ -3227,91 +3227,202 @@ void drcbe_arm64::op_rolins(a64::Assembler &a, const uml::instruction &inst) a.tst(dst, dst); } -template void drcbe_arm64::op_add(a64::Assembler &a, const uml::instruction &inst) +template void drcbe_arm64::op_add(a64::Assembler &a, const uml::instruction &inst) { assert(inst.size() == 4 || inst.size() == 8); assert_no_condition(inst); assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S); + const a64::Inst::Id opcode = CarryIn ? a64::Inst::kIdAdcs : a64::Inst::kIdAdds; + be_parameter dstp(*this, inst.param(0), PTYPE_MR); be_parameter src1p(*this, inst.param(1), PTYPE_MRI); be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + const a64::Gp zero = select_register(a64::xzr, inst.size()); + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); + const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); const a64::Gp output = dstp.select_register(TEMP_REG3, inst.size()); - if (Opcode == a64::Inst::kIdAdcs) + if (CarryIn) load_carry(a); - if (src1p.is_immediate() && is_valid_immediate_addsub(src1p.immediate())) + if (src1p.is_immediate_value(0)) { - const a64::Gp src = src2p.select_register(TEMP_REG2, inst.size()); - - mov_reg_param(a, inst.size(), src, src2p); - if (src1p.immediate() == 0) - a.emit(Opcode, output, src, select_register(a64::xzr, inst.size())); + if (src2p.is_immediate_value(0)) + { + if (CarryIn) + { + a.emit(opcode, output, zero, zero); + mov_param_reg(a, inst.size(), dstp, output); + } + else + { + mov_param_reg(a, inst.size(), dstp, zero); + a.emit(opcode, zero, zero, zero); + } + } + else if (!CarryIn && src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate())) + { + a.emit(opcode, output, zero, src2p.immediate()); + mov_param_reg(a, inst.size(), dstp, output); + } + else if (!CarryIn && src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 24)) + { + a.emit(opcode, output, zero, src2p.immediate() & (util::make_bitmask(12) << 12)); + a.emit(opcode, output, output, src2p.immediate() & util::make_bitmask(12)); + mov_param_reg(a, inst.size(), dstp, output); + } else - a.emit(Opcode, output, src, src1p.immediate()); + { + mov_reg_param(a, inst.size(), src2, src2p); + a.emit(opcode, output, src2, zero); + mov_param_reg(a, inst.size(), dstp, output); + } + } + else if (src2p.is_immediate_value(0)) + { + if (!CarryIn && src1p.is_immediate() && is_valid_immediate_addsub(src1p.immediate())) + { + a.emit(opcode, output, zero, src1p.immediate()); + mov_param_reg(a, inst.size(), dstp, output); + } + else if (!CarryIn && src1p.is_immediate() && is_valid_immediate(src1p.immediate(), 24)) + { + a.emit(opcode, output, zero, src1p.immediate() & (util::make_bitmask(12) << 12)); + a.emit(opcode, output, output, src1p.immediate() & util::make_bitmask(12)); + mov_param_reg(a, inst.size(), dstp, output); + } + else + { + mov_reg_param(a, inst.size(), src1, src1p); + a.emit(opcode, output, src1, zero); + mov_param_reg(a, inst.size(), dstp, output); + } + } + else if (!CarryIn && src1p.is_immediate() && is_valid_immediate_addsub(src1p.immediate())) + { + mov_reg_param(a, inst.size(), src2, src2p); + a.emit(opcode, output, src2, src1p.immediate()); mov_param_reg(a, inst.size(), dstp, output); } - else if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate())) + else if (!CarryIn && src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate())) { - const a64::Gp src = src1p.select_register(TEMP_REG1, inst.size()); - - mov_reg_param(a, inst.size(), src, src1p); - if (src2p.is_immediate_value(0)) - a.emit(Opcode, output, src, select_register(a64::xzr, inst.size())); - else - a.emit(Opcode, output, src, src2p.immediate()); + mov_reg_param(a, inst.size(), src1, src1p); + a.emit(opcode, output, src1, src2p.immediate()); + mov_param_reg(a, inst.size(), dstp, output); + } + else if (!CarryIn && !inst.flags() && src1p.is_immediate() && is_valid_immediate(src1p.immediate(), 24)) + { + // will still alter flags, but carry and overflow values will be incorrect for this path + mov_reg_param(a, inst.size(), src2, src2p); + a.emit(opcode, output, src2, src1p.immediate() & (util::make_bitmask(12) << 12)); + a.emit(opcode, output, output, src1p.immediate() & util::make_bitmask(12)); + mov_param_reg(a, inst.size(), dstp, output); + } + else if (!CarryIn && !inst.flags() && src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 24)) + { + // will still alter flags, but carry and overflow values will be incorrect for this path + mov_reg_param(a, inst.size(), src1, src1p); + a.emit(opcode, output, src1, src2p.immediate() & (util::make_bitmask(12) << 12)); + a.emit(opcode, output, output, src2p.immediate() & util::make_bitmask(12)); mov_param_reg(a, inst.size(), dstp, output); } else { - const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); - const a64::Gp src2 = src2p.select_register(TEMP_REG2, inst.size()); - mov_reg_param(a, inst.size(), src1, src1p); mov_reg_param(a, inst.size(), src2, src2p); - a.emit(Opcode, output, src1, src2); + a.emit(opcode, output, src1, src2); mov_param_reg(a, inst.size(), dstp, output); } store_carry(a); } -template void drcbe_arm64::op_sub(a64::Assembler &a, const uml::instruction &inst) +template void drcbe_arm64::op_sub(a64::Assembler &a, const uml::instruction &inst) { assert(inst.size() == 4 || inst.size() == 8); assert_no_condition(inst); assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S); + const a64::Inst::Id opcode = CarryIn ? a64::Inst::kIdSbcs : a64::Inst::kIdSubs; + be_parameter dstp(*this, inst.param(0), PTYPE_MR); be_parameter src1p(*this, inst.param(1), PTYPE_MRI); be_parameter src2p(*this, inst.param(2), PTYPE_MRI); - if (Opcode == a64::Inst::kIdSbcs) + if (CarryIn) load_carry(a, true); + const a64::Gp zero = select_register(a64::xzr, inst.size()); + const a64::Gp src1 = src1p.select_register(TEMP_REG1, inst.size()); const a64::Gp output = dstp.select_register(TEMP_REG3, inst.size()); - if (src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate())) + if (src2p.is_immediate_value(0)) { - const a64::Gp src = select_register(TEMP_REG1, inst.size()); - - mov_reg_param(a, inst.size(), src, src1p); - if (src2p.is_immediate_value(0)) - a.emit(Opcode, output, src, select_register(a64::xzr, inst.size())); + if (src1p.is_immediate_value(0)) + { + if (CarryIn) + { + a.emit(opcode, output, zero, zero); + mov_param_reg(a, inst.size(), dstp, output); + } + else + { + mov_param_reg(a, inst.size(), dstp, zero); + a.emit(opcode, zero, zero, zero); + } + } else - a.emit(Opcode, output, src, src2p.immediate()); + { + mov_reg_param(a, inst.size(), src1, src1p); + if (CarryIn) + { + a.emit(opcode, output, src1, zero); + mov_param_reg(a, inst.size(), dstp, output); + } + else + { + mov_param_reg(a, inst.size(), dstp, src1); + a.emit(opcode, zero, src1, zero); + } + } + } + else if (!CarryIn && src2p.is_immediate() && is_valid_immediate_addsub(src2p.immediate())) + { + if (src1p.is_immediate_value(0)) + { + a.emit(opcode, output, zero, src2p.immediate()); + } + else + { + mov_reg_param(a, inst.size(), src1, src1p); + a.emit(opcode, output, src1, src2p.immediate()); + } + mov_param_reg(a, inst.size(), dstp, output); + } + else if (!CarryIn && (!inst.flags() || src1p.is_immediate_value(0)) && src2p.is_immediate() && is_valid_immediate(src2p.immediate(), 24)) + { + if (src1p.is_immediate_value(0)) + { + a.emit(opcode, output, zero, src2p.immediate() & (util::make_bitmask(12) << 12)); + } + else + { + // will still alter flags, but carry and overflow values will be incorrect for this path + mov_reg_param(a, inst.size(), src1, src1p); + a.emit(opcode, output, src1, src2p.immediate() & (util::make_bitmask(12) << 12)); + } + a.emit(opcode, output, output, src2p.immediate() & util::make_bitmask(12)); mov_param_reg(a, inst.size(), dstp, output); } else { - const a64::Gp src1 = select_register(TEMP_REG1, inst.size()); const a64::Gp src2 = select_register(TEMP_REG2, inst.size()); mov_reg_param(a, inst.size(), src1, src1p); mov_reg_param(a, inst.size(), src2, src2p); - a.emit(Opcode, output, src1, src2); + a.emit(opcode, output, src1, src2); mov_param_reg(a, inst.size(), dstp, output); }