From ec1f9b948c496535c89eaa70e65f38ae6ab34f10 Mon Sep 17 00:00:00 2001 From: Ville Linde Date: Fri, 20 May 2016 01:23:22 +0300 Subject: [PATCH] UML: Added TZCNT instruction (Trailing Zero Count) [Ville Linde] --- src/devices/cpu/drcbec.cpp | 40 +++++++++++++++++++++++++++++++ src/devices/cpu/drcbec.h | 2 ++ src/devices/cpu/drcbex64.cpp | 40 +++++++++++++++++++++++++++++++ src/devices/cpu/drcbex64.h | 1 + src/devices/cpu/drcbex86.cpp | 46 ++++++++++++++++++++++++++++++++++++ src/devices/cpu/drcbex86.h | 1 + src/devices/cpu/drcumlsh.h | 2 ++ src/devices/cpu/uml.cpp | 2 ++ src/devices/cpu/uml.h | 3 +++ 9 files changed, 137 insertions(+) diff --git a/src/devices/cpu/drcbec.cpp b/src/devices/cpu/drcbec.cpp index ac6da189688..c48bb990f82 100644 --- a/src/devices/cpu/drcbec.cpp +++ b/src/devices/cpu/drcbec.cpp @@ -1070,6 +1070,16 @@ int drcbe_c::execute(code_handle &entry) PARAM0 = temp32; break; + case MAKE_OPCODE_SHORT(OP_TZCNT, 4, 0): // TZCNT dst,src + PARAM0 = tzcount32(PARAM1); + break; + + case MAKE_OPCODE_SHORT(OP_TZCNT, 4, 1): + temp32 = tzcount32(PARAM1); + flags = (temp32 == 32) ? FLAG_Z : 0; + PARAM0 = temp32; + break; + case MAKE_OPCODE_SHORT(OP_BSWAP, 4, 0): // BSWAP dst,src temp32 = PARAM1; PARAM0 = FLIPENDIAN_INT32(temp32); @@ -1679,6 +1689,16 @@ int drcbe_c::execute(code_handle &entry) DPARAM0 = temp64; break; + case MAKE_OPCODE_SHORT(OP_TZCNT, 8, 0): // DTZCNT dst,src + DPARAM0 = tzcount64(DPARAM1); + break; + + case MAKE_OPCODE_SHORT(OP_TZCNT, 8, 1): + temp64 = tzcount64(DPARAM1); + flags = (temp64 == 64) ? FLAG_Z : 0; + DPARAM0 = temp64; + break; + case MAKE_OPCODE_SHORT(OP_BSWAP, 8, 0): // DBSWAP dst,src temp64 = DPARAM1; DPARAM0 = FLIPENDIAN_INT64(temp64); @@ -2289,3 +2309,23 @@ int drcbe_c::dmuls(UINT64 &dstlo, UINT64 &dsthi, INT64 src1, INT64 src2, int fla dstlo = lo; return ((hi >> 60) & FLAG_S) | ((dsthi != ((INT64)lo >> 63)) << 1); } + +UINT32 drcbe_c::tzcount32(UINT32 value) +{ + for (int i = 0; i < 32; i++) + { + if (value & (1 << i)) + return i; + } + return 32; +} + +UINT64 drcbe_c::tzcount64(UINT64 value) +{ + for (int i = 0; i < 64; i++) + { + if (value & ((UINT64)(1) << i)) + return i; + } + return 64; +} diff --git a/src/devices/cpu/drcbec.h b/src/devices/cpu/drcbec.h index a205dcf6302..7326144681e 100644 --- a/src/devices/cpu/drcbec.h +++ b/src/devices/cpu/drcbec.h @@ -44,6 +44,8 @@ private: void fixup_label(void *parameter, drccodeptr labelcodeptr); int dmulu(UINT64 &dstlo, UINT64 &dsthi, UINT64 src1, UINT64 src2, int flags); int dmuls(UINT64 &dstlo, UINT64 &dsthi, INT64 src1, INT64 src2, int flags); + UINT32 tzcount32(UINT32 value); + UINT64 tzcount64(UINT64 value); // internal state drc_hash_table m_hash; // hash table state diff --git a/src/devices/cpu/drcbex64.cpp b/src/devices/cpu/drcbex64.cpp index 9c8ae6e0bbb..39863b42f15 100644 --- a/src/devices/cpu/drcbex64.cpp +++ b/src/devices/cpu/drcbex64.cpp @@ -361,6 +361,7 @@ const drcbe_x64::opcode_table_entry drcbe_x64::s_opcode_table_source[] = { uml::OP_OR, &drcbe_x64::op_or }, // OR dst,src1,src2[,f] { uml::OP_XOR, &drcbe_x64::op_xor }, // XOR dst,src1,src2[,f] { uml::OP_LZCNT, &drcbe_x64::op_lzcnt }, // LZCNT dst,src[,f] + { uml::OP_TZCNT, &drcbe_x64::op_tzcnt }, // TZCNT dst,src[,f] { uml::OP_BSWAP, &drcbe_x64::op_bswap }, // BSWAP dst,src { uml::OP_SHL, &drcbe_x64::op_shl }, // SHL dst,src,count[,f] { uml::OP_SHR, &drcbe_x64::op_shr }, // SHR dst,src,count[,f] @@ -5360,6 +5361,45 @@ void drcbe_x64::op_lzcnt(x86code *&dst, const instruction &inst) } +//------------------------------------------------- +// op_tzcnt - process a TZCNT opcode +//------------------------------------------------- + +void drcbe_x64::op_tzcnt(x86code *&dst, const instruction &inst) +{ + // validate instruction + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_Z | FLAG_S); + + // normalize parameters + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + + // 32-bit form + if (inst.size() == 4) + { + int dstreg = dstp.select_register(REG_EAX); + emit_mov_r32_p32(dst, dstreg, srcp); // mov dstreg,srcp + emit_mov_r32_imm(dst, REG_ECX, 32); // mov ecx,32 + emit_bsf_r32_r32(dst, dstreg, dstreg); // bsf dstreg,dstreg + emit_cmovcc_r32_r32(dst, x64emit::COND_Z, dstreg, REG_ECX); // cmovz dstreg,ecx + emit_mov_p32_r32(dst, dstp, dstreg); // mov dstp,dstreg + } + + // 64-bit form + else if (inst.size() == 8) + { + int dstreg = dstp.select_register(REG_RAX); + emit_mov_r64_p64(dst, dstreg, srcp); // mov dstreg,srcp + emit_mov_r64_imm(dst, REG_RCX, 64); // mov rcx,64 + emit_bsf_r64_r64(dst, dstreg, dstreg); // bsf dstreg,dstreg + emit_cmovcc_r64_r64(dst, x64emit::COND_Z, dstreg, REG_RCX); // cmovz dstreg,rcx + emit_mov_p64_r64(dst, dstp, dstreg); // mov dstp,dstreg + } +} + + //------------------------------------------------- // op_bswap - process a BSWAP opcode //------------------------------------------------- diff --git a/src/devices/cpu/drcbex64.h b/src/devices/cpu/drcbex64.h index a538f9bf1e7..cc78b52921d 100644 --- a/src/devices/cpu/drcbex64.h +++ b/src/devices/cpu/drcbex64.h @@ -179,6 +179,7 @@ private: void op_or(x86code *&dst, const uml::instruction &inst); void op_xor(x86code *&dst, const uml::instruction &inst); void op_lzcnt(x86code *&dst, const uml::instruction &inst); + void op_tzcnt(x86code *&dst, const uml::instruction &inst); void op_bswap(x86code *&dst, const uml::instruction &inst); void op_shl(x86code *&dst, const uml::instruction &inst); void op_shr(x86code *&dst, const uml::instruction &inst); diff --git a/src/devices/cpu/drcbex86.cpp b/src/devices/cpu/drcbex86.cpp index 478e9f08bc0..2613028d03b 100644 --- a/src/devices/cpu/drcbex86.cpp +++ b/src/devices/cpu/drcbex86.cpp @@ -243,6 +243,7 @@ const drcbe_x86::opcode_table_entry drcbe_x86::s_opcode_table_source[] = { uml::OP_OR, &drcbe_x86::op_or }, // OR dst,src1,src2[,f] { uml::OP_XOR, &drcbe_x86::op_xor }, // XOR dst,src1,src2[,f] { uml::OP_LZCNT, &drcbe_x86::op_lzcnt }, // LZCNT dst,src[,f] + { uml::OP_TZCNT, &drcbe_x86::op_tzcnt }, // TZCNT dst,src[,f] { uml::OP_BSWAP, &drcbe_x86::op_bswap }, // BSWAP dst,src { uml::OP_SHL, &drcbe_x86::op_shl }, // SHL dst,src,count[,f] { uml::OP_SHR, &drcbe_x86::op_shr }, // SHR dst,src,count[,f] @@ -5475,6 +5476,51 @@ void drcbe_x86::op_lzcnt(x86code *&dst, const instruction &inst) } +//------------------------------------------------- +// op_tzcnt - process a TZCNT opcode +//------------------------------------------------- + +void drcbe_x86::op_tzcnt(x86code *&dst, const instruction &inst) +{ + // validate instruction + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_Z | FLAG_S); + + // normalize parameters + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + + int dstreg = dstp.select_register(REG_EAX); + + // 32-bit form + if (inst.size() == 4) + { + emit_mov_r32_p32(dst, dstreg, srcp); // mov dstreg,src1p + emit_mov_r32_imm(dst, REG_ECX, 32); // mov ecx,32 + emit_bsf_r32_r32(dst, dstreg, dstreg); // bsf dstreg,dstreg + emit_cmovcc_r32_r32(dst, x86emit::COND_Z, dstreg, REG_ECX); // cmovz dstreg,ecx + emit_mov_p32_r32(dst, dstp, dstreg); // mov dstp,dstreg + } + + // 64-bit form + else if (inst.size() == 8) + { + emit_link skip; + emit_mov_r64_p64(dst, REG_EDX, dstreg, srcp); // mov dstreg:edx,srcp + emit_bsf_r32_r32(dst, dstreg, dstreg); // bsf dstreg,dstreg + emit_jcc_short_link(dst, x86emit::COND_NZ, skip); // jnz skip + emit_mov_r32_imm(dst, REG_ECX, 32); // mov ecx,32 + emit_bsf_r32_r32(dst, dstreg, REG_EDX); // bsf dstreg,edx + emit_cmovcc_r32_r32(dst, x86emit::COND_Z, dstreg, REG_ECX); // cmovz dstreg,ecx + emit_add_r32_imm(dst, dstreg, 32); // add dstreg,32 + track_resolve_link(dst, skip); // skip: + emit_xor_r32_r32(dst, REG_EDX, REG_EDX); // xor edx,edx + emit_mov_p64_r64(dst, dstp, dstreg, REG_EDX); // mov dstp,edx:dstreg + } +} + + //------------------------------------------------- // op_bswap - process a BSWAP opcode //------------------------------------------------- diff --git a/src/devices/cpu/drcbex86.h b/src/devices/cpu/drcbex86.h index 397cbb53215..67b49c167ec 100644 --- a/src/devices/cpu/drcbex86.h +++ b/src/devices/cpu/drcbex86.h @@ -180,6 +180,7 @@ private: void op_or(x86code *&dst, const uml::instruction &inst); void op_xor(x86code *&dst, const uml::instruction &inst); void op_lzcnt(x86code *&dst, const uml::instruction &inst); + void op_tzcnt(x86code *&dst, const uml::instruction &inst); void op_bswap(x86code *&dst, const uml::instruction &inst); void op_shl(x86code *&dst, const uml::instruction &inst); void op_shr(x86code *&dst, const uml::instruction &inst); diff --git a/src/devices/cpu/drcumlsh.h b/src/devices/cpu/drcumlsh.h index 12f5abc20eb..ba73e8f61ac 100644 --- a/src/devices/cpu/drcumlsh.h +++ b/src/devices/cpu/drcumlsh.h @@ -87,6 +87,7 @@ #define UML_OR(block, dst, src1, src2) do { block->append()._or(dst, src1, src2); } while (0) #define UML_XOR(block, dst, src1, src2) do { block->append()._xor(dst, src1, src2); } while (0) #define UML_LZCNT(block, dst, src) do { block->append().lzcnt(dst, src); } while (0) +#define UML_TZCNT(block, dst, src) do { block->append().tzcnt(dst, src); } while (0) #define UML_BSWAP(block, dst, src) do { block->append().bswap(dst, src); } while (0) #define UML_SHL(block, dst, src, count) do { block->append().shl(dst, src, count); } while (0) #define UML_SHR(block, dst, src, count) do { block->append().shr(dst, src, count); } while (0) @@ -126,6 +127,7 @@ #define UML_DOR(block, dst, src1, src2) do { block->append().dor(dst, src1, src2); } while (0) #define UML_DXOR(block, dst, src1, src2) do { block->append().dxor(dst, src1, src2); } while (0) #define UML_DLZCNT(block, dst, src) do { block->append().dlzcnt(dst, src); } while (0) +#define UML_DTZCNT(block, dst, src) do { block->append().dtzcnt(dst, src); } while (0) #define UML_DBSWAP(block, dst, src) do { block->append().dbswap(dst, src); } while (0) #define UML_DSHL(block, dst, src, count) do { block->append().dshl(dst, src, count); } while (0) #define UML_DSHR(block, dst, src, count) do { block->append().dshr(dst, src, count); } while (0) diff --git a/src/devices/cpu/uml.cpp b/src/devices/cpu/uml.cpp index 466c47aeda4..5810391dcb9 100644 --- a/src/devices/cpu/uml.cpp +++ b/src/devices/cpu/uml.cpp @@ -55,6 +55,7 @@ using namespace uml; // opcode validation condition/flag valid bitmasks #define OPFLAGS_NONE 0x00 #define OPFLAGS_C FLAG_C +#define OPFLAGS_Z FLAG_Z #define OPFLAGS_SZ (FLAG_S | FLAG_Z) #define OPFLAGS_SZC (FLAG_S | FLAG_Z | FLAG_C) #define OPFLAGS_SZV (FLAG_S | FLAG_Z | FLAG_V) @@ -182,6 +183,7 @@ const opcode_info instruction::s_opcode_info_table[OP_MAX] = OPINFO3(OR, "!or", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) OPINFO3(XOR, "!xor", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) OPINFO2(LZCNT, "!lzcnt", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY)) + OPINFO2(TZCNT, "!tzcnt", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY)) OPINFO2(BSWAP, "!bswap", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY)) OPINFO3(SHL, "!shl", 4|8, false, NONE, SZC, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) OPINFO3(SHR, "!shr", 4|8, false, NONE, SZC, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) diff --git a/src/devices/cpu/uml.h b/src/devices/cpu/uml.h index 5b390700ced..9c2b9a4c80e 100644 --- a/src/devices/cpu/uml.h +++ b/src/devices/cpu/uml.h @@ -189,6 +189,7 @@ namespace uml OP_OR, // OR dst,src1,src2[,f] OP_XOR, // XOR dst,src1,src2[,f] OP_LZCNT, // LZCNT dst,src + OP_TZCNT, // TZCNT dst,src OP_BSWAP, // BSWAP dst,src OP_SHL, // SHL dst,src,count[,f] OP_SHR, // SHR dst,src,count[,f] @@ -479,6 +480,7 @@ namespace uml void _or(parameter dst, parameter src1, parameter src2) { configure(OP_OR, 4, dst, src1, src2); } void _xor(parameter dst, parameter src1, parameter src2) { configure(OP_XOR, 4, dst, src1, src2); } void lzcnt(parameter dst, parameter src) { configure(OP_LZCNT, 4, dst, src); } + void tzcnt(parameter dst, parameter src) { configure(OP_TZCNT, 4, dst, src); } void bswap(parameter dst, parameter src) { configure(OP_BSWAP, 4, dst, src); } void shl(parameter dst, parameter src, parameter count) { configure(OP_SHL, 4, dst, src, count); } void shr(parameter dst, parameter src, parameter count) { configure(OP_SHR, 4, dst, src, count); } @@ -517,6 +519,7 @@ namespace uml void dor(parameter dst, parameter src1, parameter src2) { configure(OP_OR, 8, dst, src1, src2); } void dxor(parameter dst, parameter src1, parameter src2) { configure(OP_XOR, 8, dst, src1, src2); } void dlzcnt(parameter dst, parameter src) { configure(OP_LZCNT, 8, dst, src); } + void dtzcnt(parameter dst, parameter src) { configure(OP_TZCNT, 8, dst, src); } void dbswap(parameter dst, parameter src) { configure(OP_BSWAP, 8, dst, src); } void dshl(parameter dst, parameter src, parameter count) { configure(OP_SHL, 8, dst, src, count); } void dshr(parameter dst, parameter src, parameter count) { configure(OP_SHR, 8, dst, src, count); }