From 441616587454f10aedc55cd88928285b43bdea5f Mon Sep 17 00:00:00 2001 From: cracyc Date: Sun, 23 Jun 2013 22:08:50 +0000 Subject: [PATCH] chihiro: few updates to the i386 processor and chihiro driver. [Samuele Zannoli] - adds lots of mmx and sse opcodes to the i386 processor - adds the fcomip x87 opcode - adds a "UINT8 *memory(UINT32 &size)" method to the naomi_gdrom_board device that returns the size and a pointer to the decrypted gdrom data (used by chihiro) Then for the chihiro driver: - adds basic stuff for the Nvidia audio APU - adds the "chihiro curthread" debugger command, shows information about the current active thread - adds the "chihiro irq," debugger command, to generate an interrupt with irq number 0-15 by hand - adds more patches to let the software run even if usb is not implemented - adds the Chihiro Type 1 baseboard/mediaboard features to let the system load the gdrom games - adds incomplete save state support - adds support to the Nvidia 3d accelerator to draw primitives where the vertex data is not stored in a vertex buffer but contained in the command stream i386: don't take an smi until current instruction is complete (nw) --- src/emu/cpu/i386/i386.c | 29 +- src/emu/cpu/i386/i386ops.h | 144 +- src/emu/cpu/i386/i386priv.h | 31 +- src/emu/cpu/i386/pentops.c | 3405 +++++++++++++++++++++++++++++++++-- src/emu/cpu/i386/x87ops.c | 46 + src/mame/drivers/chihiro.c | 1035 +++++++++-- src/mame/machine/naomigd.h | 2 + 7 files changed, 4377 insertions(+), 315 deletions(-) diff --git a/src/emu/cpu/i386/i386.c b/src/emu/cpu/i386/i386.c index 38c367cf2cc..92573b74193 100644 --- a/src/emu/cpu/i386/i386.c +++ b/src/emu/cpu/i386/i386.c @@ -31,6 +31,7 @@ MODRM_TABLE i386_MODRM_table[256]; static void i386_trap_with_error(i386_state* cpustate, int irq, int irq_gate, int trap_level, UINT32 err); static void i286_task_switch(i386_state* cpustate, UINT16 selector, UINT8 nested); static void i386_task_switch(i386_state* cpustate, UINT16 selector, UINT8 nested); +static void pentium_smi(i386_state* cpustate); #define FAULT(fault,error) {cpustate->ext = 1; i386_trap_with_error(cpustate,fault,0,0,error); return;} #define FAULT_EXP(fault,error) {cpustate->ext = 1; i386_trap_with_error(cpustate,fault,0,trap_level+1,error); return;} @@ -1224,6 +1225,12 @@ static void i386_task_switch(i386_state *cpustate, UINT16 selector, UINT8 nested static void i386_check_irq_line(i386_state *cpustate) { + if(!cpustate->smm && cpustate->smi) + { + pentium_smi(cpustate); + return; + } + /* Check if the interrupts are enabled */ if ( (cpustate->irq_state) && cpustate->IF ) { @@ -2987,6 +2994,8 @@ static void i386_common_init(legacy_cpu_device *device, device_irq_acknowledge_c static const int regs32[8] = {EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI}; i386_state *cpustate = get_safe_token(device); + assert((sizeof(XMM_REG)/sizeof(double)) == 2); + build_cycle_table(device->machine()); for( i=0; i < 256; i++ ) { @@ -3014,6 +3023,7 @@ static void i386_common_init(legacy_cpu_device *device, device_irq_acknowledge_c cpustate->direct = &cpustate->program->direct(); cpustate->io = &device->space(AS_IO); cpustate->vtlb = vtlb_alloc(device, AS_PROGRAM, 0, tlbsize); + cpustate->smi = false; device->save_item(NAME( cpustate->reg.d)); device->save_item(NAME(cpustate->sreg[ES].selector)); @@ -3070,6 +3080,8 @@ static void i386_common_init(legacy_cpu_device *device, device_irq_acknowledge_c device->save_item(NAME(cpustate->performed_intersegment_jump)); device->save_item(NAME(cpustate->mxcsr)); device->save_item(NAME(cpustate->smm)); + device->save_item(NAME(cpustate->smi_latched)); + device->save_item(NAME(cpustate->smi)); device->save_item(NAME(cpustate->nmi_masked)); device->save_item(NAME(cpustate->nmi_latched)); device->save_item(NAME(cpustate->smbase)); @@ -3171,6 +3183,7 @@ static CPU_RESET( i386 ) cpustate->idtr.base = 0; cpustate->idtr.limit = 0x3ff; cpustate->smm = false; + cpustate->smi_latched = false; cpustate->nmi_masked = false; cpustate->nmi_latched = false; @@ -3204,13 +3217,14 @@ static void pentium_smi(i386_state *cpustate) UINT32 old_flags = get_flags(cpustate); if(cpustate->smm) - return; // TODO: latch + return; cpustate->cr[0] &= ~(0x8000000d); set_flags(cpustate, 2); if(!cpustate->smiact.isnull()) cpustate->smiact(true); cpustate->smm = true; + cpustate->smi_latched = false; // save state WRITE32(cpustate, cpustate->cr[4], smram_state+SMRAM_IP5_CR4); @@ -3777,6 +3791,7 @@ static CPU_RESET( i486 ) cpustate->eflags_mask = 0x00077fd7; cpustate->eip = 0xfff0; cpustate->smm = false; + cpustate->smi_latched = false; cpustate->nmi_masked = false; cpustate->nmi_latched = false; @@ -3892,6 +3907,7 @@ static CPU_RESET( pentium ) cpustate->eip = 0xfff0; cpustate->mxcsr = 0x1f80; cpustate->smm = false; + cpustate->smi_latched = false; cpustate->smbase = 0x30000; cpustate->nmi_masked = false; cpustate->nmi_latched = false; @@ -3938,8 +3954,9 @@ static CPU_SET_INFO( pentium ) switch (state) { case CPUINFO_INT_INPUT_STATE+INPUT_LINE_SMI: - if(state) - pentium_smi(cpustate); + if(!cpustate->smi && state && cpustate->smm) + cpustate->smi_latched = true; + cpustate->smi = state; break; case CPUINFO_INT_REGISTER + X87_CTRL: cpustate->x87_cw = info->i; break; case CPUINFO_INT_REGISTER + X87_STATUS: cpustate->x87_sw = info->i; break; @@ -4026,6 +4043,7 @@ static CPU_RESET( mediagx ) cpustate->eflags_mask = 0x00277fd7; /* TODO: is this correct? */ cpustate->eip = 0xfff0; cpustate->smm = false; + cpustate->smi_latched = false; cpustate->nmi_masked = false; cpustate->nmi_latched = false; @@ -4149,6 +4167,7 @@ static CPU_RESET( pentium_pro ) cpustate->eip = 0xfff0; cpustate->mxcsr = 0x1f80; cpustate->smm = false; + cpustate->smi_latched = false; cpustate->smbase = 0x30000; cpustate->nmi_masked = false; cpustate->nmi_latched = false; @@ -4253,6 +4272,7 @@ static CPU_RESET( pentium_mmx ) cpustate->eip = 0xfff0; cpustate->mxcsr = 0x1f80; cpustate->smm = false; + cpustate->smi_latched = false; cpustate->smbase = 0x30000; cpustate->nmi_masked = false; cpustate->nmi_latched = false; @@ -4357,6 +4377,7 @@ static CPU_RESET( pentium2 ) cpustate->eip = 0xfff0; cpustate->mxcsr = 0x1f80; cpustate->smm = false; + cpustate->smi_latched = false; cpustate->smbase = 0x30000; cpustate->nmi_masked = false; cpustate->nmi_latched = false; @@ -4461,6 +4482,7 @@ static CPU_RESET( pentium3 ) cpustate->eip = 0xfff0; cpustate->mxcsr = 0x1f80; cpustate->smm = false; + cpustate->smi_latched = false; cpustate->smbase = 0x30000; cpustate->nmi_masked = false; cpustate->nmi_latched = false; @@ -4567,6 +4589,7 @@ static CPU_RESET( pentium4 ) cpustate->eip = 0xfff0; cpustate->mxcsr = 0x1f80; cpustate->smm = false; + cpustate->smi_latched = false; cpustate->smbase = 0x30000; cpustate->nmi_masked = false; cpustate->nmi_latched = false; diff --git a/src/emu/cpu/i386/i386ops.h b/src/emu/cpu/i386/i386ops.h index 35401c9b857..8fdfe9b509d 100644 --- a/src/emu/cpu/i386/i386ops.h +++ b/src/emu/cpu/i386/i386ops.h @@ -298,6 +298,15 @@ static const X86_OPCODE x86_opcode_table[] = { 0x08, OP_2BYTE|OP_I486, I486OP(invd), I486OP(invd), }, { 0x09, OP_2BYTE|OP_I486, I486OP(wbinvd), I486OP(wbinvd), }, { 0x0B, OP_2BYTE|OP_PENTIUM, PENTIUMOP(ud2), PENTIUMOP(ud2), }, + { 0x10, OP_2BYTE|OP_SSE, SSEOP(movups_r128_rm128), SSEOP(movups_r128_rm128), }, + { 0x11, OP_2BYTE|OP_SSE, SSEOP(movups_rm128_r128), SSEOP(movups_rm128_r128), }, + { 0x12, OP_2BYTE|OP_SSE, SSEOP(movlps_r128_m64), SSEOP(movlps_r128_m64), }, + { 0x13, OP_2BYTE|OP_SSE, SSEOP(movlps_m64_r128), SSEOP(movlps_m64_r128), }, + { 0x14, OP_2BYTE|OP_SSE, SSEOP(unpcklps_r128_rm128), SSEOP(unpcklps_r128_rm128), }, + { 0x15, OP_2BYTE|OP_SSE, SSEOP(unpckhps_r128_rm128), SSEOP(unpckhps_r128_rm128), }, + { 0x16, OP_2BYTE|OP_SSE, SSEOP(movhps_r128_m64), SSEOP(movhps_r128_m64), }, + { 0x17, OP_2BYTE|OP_SSE, SSEOP(movhps_m64_r128), SSEOP(movhps_m64_r128), }, + { 0x18, OP_2BYTE|OP_PENTIUM, PENTIUMOP(prefetch_m8), PENTIUMOP(prefetch_m8), }, { 0x20, OP_2BYTE|OP_I386, I386OP(mov_r32_cr), I386OP(mov_r32_cr), }, { 0x21, OP_2BYTE|OP_I386, I386OP(mov_r32_dr), I386OP(mov_r32_dr), }, { 0x22, OP_2BYTE|OP_I386, I386OP(mov_cr_r32), I386OP(mov_cr_r32), }, @@ -307,13 +316,72 @@ static const X86_OPCODE x86_opcode_table[] = { 0x26, OP_2BYTE|OP_I386, I386OP(mov_tr_r32), I386OP(mov_tr_r32), }, { 0x28, OP_2BYTE|OP_SSE, SSEOP(movaps_r128_rm128), SSEOP(movaps_r128_rm128), }, { 0x29, OP_2BYTE|OP_SSE, SSEOP(movaps_rm128_r128), SSEOP(movaps_rm128_r128), }, + { 0x2a, OP_2BYTE|OP_SSE, SSEOP(cvtpi2ps_r128_rm64), SSEOP(cvtpi2ps_r128_rm64), }, + { 0x2b, OP_2BYTE|OP_SSE, SSEOP(movntps_m128_r128), SSEOP(movntps_m128_r128), }, + { 0x2c, OP_2BYTE|OP_SSE, SSEOP(cvttps2pi_r64_r128m64), SSEOP(cvttps2pi_r64_r128m64),}, + { 0x2d, OP_2BYTE|OP_SSE, SSEOP(cvtps2pi_r64_r128m64), SSEOP(cvtps2pi_r64_r128m64),}, + { 0x2e, OP_2BYTE|OP_SSE, SSEOP(ucomiss_r128_r128m32), SSEOP(ucomiss_r128_r128m32),}, + { 0x2f, OP_2BYTE|OP_SSE, SSEOP(comiss_r128_r128m32), SSEOP(comiss_r128_r128m32), }, { 0x30, OP_2BYTE|OP_PENTIUM, PENTIUMOP(wrmsr), PENTIUMOP(wrmsr), }, { 0x31, OP_2BYTE|OP_PENTIUM, PENTIUMOP(rdtsc), PENTIUMOP(rdtsc), }, { 0x32, OP_2BYTE|OP_PENTIUM, PENTIUMOP(rdmsr), PENTIUMOP(rdmsr), }, + { 0x40, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovo_r16_rm16), PENTIUMOP(cmovo_r32_rm32), }, + { 0x41, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovno_r16_rm16), PENTIUMOP(cmovno_r32_rm32), }, + { 0x42, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovb_r16_rm16), PENTIUMOP(cmovb_r32_rm32), }, + { 0x43, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovae_r16_rm16), PENTIUMOP(cmovae_r32_rm32), }, + { 0x44, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmove_r16_rm16), PENTIUMOP(cmove_r32_rm32), }, + { 0x45, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovne_r16_rm16), PENTIUMOP(cmovne_r32_rm32), }, + { 0x46, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovbe_r16_rm16), PENTIUMOP(cmovbe_r32_rm32), }, + { 0x47, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmova_r16_rm16), PENTIUMOP(cmova_r32_rm32), }, + { 0x48, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovs_r16_rm16), PENTIUMOP(cmovs_r32_rm32), }, + { 0x49, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovns_r16_rm16), PENTIUMOP(cmovns_r32_rm32), }, + { 0x4a, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovp_r16_rm16), PENTIUMOP(cmovp_r32_rm32), }, + { 0x4b, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovnp_r16_rm16), PENTIUMOP(cmovnp_r32_rm32), }, + { 0x4c, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovl_r16_rm16), PENTIUMOP(cmovl_r32_rm32), }, + { 0x4d, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovge_r16_rm16), PENTIUMOP(cmovge_r32_rm32), }, + { 0x4e, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovle_r16_rm16), PENTIUMOP(cmovle_r32_rm32), }, + { 0x4f, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmovg_r16_rm16), PENTIUMOP(cmovg_r32_rm32), }, + { 0x50, OP_2BYTE|OP_SSE, SSEOP(movmskps_r16_r128), SSEOP(movmskps_r32_r128), }, + { 0x51, OP_2BYTE|OP_SSE, SSEOP(sqrtps_r128_rm128), SSEOP(sqrtps_r128_rm128), }, + { 0x52, OP_2BYTE|OP_SSE, SSEOP(rsqrtps_r128_rm128), SSEOP(rsqrtps_r128_rm128), }, + { 0x53, OP_2BYTE|OP_SSE, SSEOP(rcpps_r128_rm128), SSEOP(rcpps_r128_rm128), }, + { 0x54, OP_2BYTE|OP_SSE, SSEOP(andps_r128_rm128), SSEOP(andps_r128_rm128), }, + { 0x55, OP_2BYTE|OP_SSE, SSEOP(andnps_r128_rm128), SSEOP(andnps_r128_rm128), }, + { 0x56, OP_2BYTE|OP_SSE, SSEOP(orps_r128_rm128), SSEOP(orps_r128_rm128), }, + { 0x57, OP_2BYTE|OP_SSE, SSEOP(xorps), SSEOP(xorps), }, { 0x58, OP_2BYTE|OP_SSE, SSEOP(addps), SSEOP(addps), }, { 0x59, OP_2BYTE|OP_SSE, SSEOP(mulps), SSEOP(mulps), }, + { 0x5a, OP_2BYTE|OP_SSE, SSEOP(cvtps2pd_r128_r128m64), SSEOP(cvtps2pd_r128_r128m64),}, + { 0x5b, OP_2BYTE|OP_SSE, SSEOP(cvtdq2ps_r128_rm128), SSEOP(cvtdq2ps_r128_rm128), }, + { 0x5c, OP_2BYTE|OP_SSE, SSEOP(subps), SSEOP(subps), }, + { 0x5d, OP_2BYTE|OP_SSE, SSEOP(minps), SSEOP(minps), }, + { 0x5e, OP_2BYTE|OP_SSE, SSEOP(divps), SSEOP(divps), }, + { 0x5f, OP_2BYTE|OP_SSE, SSEOP(maxps), SSEOP(maxps), }, + { 0x60, OP_2BYTE|OP_MMX, MMXOP(punpcklbw_r64_r64m32), MMXOP(punpcklbw_r64_r64m32),}, + { 0x61, OP_2BYTE|OP_MMX, MMXOP(punpcklwd_r64_r64m32), MMXOP(punpcklwd_r64_r64m32),}, + { 0x62, OP_2BYTE|OP_MMX, MMXOP(punpckldq_r64_r64m32), MMXOP(punpckldq_r64_r64m32),}, + { 0x63, OP_2BYTE|OP_MMX, MMXOP(packsswb_r64_rm64), MMXOP(packsswb_r64_rm64), }, + { 0x64, OP_2BYTE|OP_MMX, MMXOP(pcmpgtb_r64_rm64), MMXOP(pcmpgtb_r64_rm64), }, + { 0x65, OP_2BYTE|OP_MMX, MMXOP(pcmpgtw_r64_rm64), MMXOP(pcmpgtw_r64_rm64), }, + { 0x66, OP_2BYTE|OP_MMX, MMXOP(pcmpgtd_r64_rm64), MMXOP(pcmpgtd_r64_rm64), }, + { 0x67, OP_2BYTE|OP_MMX, MMXOP(packuswb_r64_rm64), MMXOP(packuswb_r64_rm64), }, + { 0x68, OP_2BYTE|OP_MMX, MMXOP(punpckhbw_r64_rm64), MMXOP(punpckhbw_r64_rm64), }, + { 0x69, OP_2BYTE|OP_MMX, MMXOP(punpckhwd_r64_rm64), MMXOP(punpckhwd_r64_rm64), }, + { 0x6a, OP_2BYTE|OP_MMX, MMXOP(punpckhdq_r64_rm64), MMXOP(punpckhdq_r64_rm64), }, + { 0x6b, OP_2BYTE|OP_MMX, MMXOP(packssdw_r64_rm64), MMXOP(packssdw_r64_rm64), }, + { 0x6e, OP_2BYTE|OP_MMX, MMXOP(movd_r64_rm32), MMXOP(movd_r64_rm32), }, + { 0x6f, OP_2BYTE|OP_MMX, MMXOP(movq_r64_rm64), MMXOP(movq_r64_rm64), }, + { 0x70, OP_2BYTE|OP_MMX, MMXOP(pshufw_r64_rm64_i8), MMXOP(pshufw_r64_rm64_i8), }, + { 0x71, OP_2BYTE|OP_MMX, MMXOP(group_0f71), MMXOP(group_0f71), }, + { 0x72, OP_2BYTE|OP_MMX, MMXOP(group_0f72), MMXOP(group_0f72), }, + { 0x73, OP_2BYTE|OP_MMX, MMXOP(group_0f73), MMXOP(group_0f73), }, { 0x74, OP_2BYTE|OP_CYRIX, I386OP(cyrix_unknown), I386OP(cyrix_unknown), }, + { 0x74, OP_2BYTE|OP_MMX, MMXOP(pcmpeqb_r64_rm64), MMXOP(pcmpeqb_r64_rm64), }, + { 0x75, OP_2BYTE|OP_MMX, MMXOP(pcmpeqw_r64_rm64), MMXOP(pcmpeqw_r64_rm64), }, + { 0x76, OP_2BYTE|OP_MMX, MMXOP(pcmpeqd_r64_rm64), MMXOP(pcmpeqd_r64_rm64), }, { 0x77, OP_2BYTE|OP_MMX, MMXOP(emms), MMXOP(emms), }, + { 0x7e, OP_2BYTE|OP_MMX, MMXOP(movd_rm32_r64), MMXOP(movd_rm32_r64), }, + { 0x7f, OP_2BYTE|OP_MMX, MMXOP(movq_rm64_r64), MMXOP(movq_rm64_r64), }, { 0x80, OP_2BYTE|OP_I386, I386OP(jo_rel16), I386OP(jo_rel32), }, { 0x81, OP_2BYTE|OP_I386, I386OP(jno_rel16), I386OP(jno_rel32), }, { 0x82, OP_2BYTE|OP_I386, I386OP(jc_rel16), I386OP(jc_rel32), }, @@ -376,7 +444,11 @@ static const X86_OPCODE x86_opcode_table[] = { 0xBF, OP_2BYTE|OP_I386, I386OP(invalid), I386OP(movsx_r32_rm16), }, { 0xC0, OP_2BYTE|OP_I486, I486OP(xadd_rm8_r8), I486OP(xadd_rm8_r8), }, { 0xC1, OP_2BYTE|OP_I486, I486OP(xadd_rm16_r16), I486OP(xadd_rm32_r32), }, - { 0xc6, OP_2BYTE|OP_SSE, SSEOP(shufps), SSEOP(shufps), }, + { 0xC2, OP_2BYTE|OP_SSE, SSEOP(cmpps_r128_rm128_i8), SSEOP(cmpps_r128_rm128_i8), }, + { 0xC3, OP_2BYTE|OP_PENTIUM, PENTIUMOP(movnti_m16_r16), PENTIUMOP(movnti_m32_r32), }, + { 0xC4, OP_2BYTE|OP_SSE, SSEOP(pinsrw_r64_r16m16_i8), SSEOP(pinsrw_r64_r32m16_i8),}, + { 0xC5, OP_2BYTE|OP_SSE, SSEOP(pextrw_r16_r64_i8), SSEOP(pextrw_r32_r64_i8), }, + { 0xC6, OP_2BYTE|OP_SSE, SSEOP(shufps), SSEOP(shufps), }, { 0xC7, OP_2BYTE|OP_PENTIUM, PENTIUMOP(cmpxchg8b_m64), PENTIUMOP(cmpxchg8b_m64), }, { 0xC8, OP_2BYTE|OP_I486, I486OP(bswap_eax), I486OP(bswap_eax), }, { 0xC9, OP_2BYTE|OP_I486, I486OP(bswap_ecx), I486OP(bswap_ecx), }, @@ -386,6 +458,74 @@ static const X86_OPCODE x86_opcode_table[] = { 0xCD, OP_2BYTE|OP_I486, I486OP(bswap_ebp), I486OP(bswap_ebp), }, { 0xCE, OP_2BYTE|OP_I486, I486OP(bswap_esi), I486OP(bswap_esi), }, { 0xCF, OP_2BYTE|OP_I486, I486OP(bswap_edi), I486OP(bswap_edi), }, + { 0xD1, OP_2BYTE|OP_MMX, MMXOP(psrlw_r64_rm64), MMXOP(psrlw_r64_rm64), }, + { 0xD2, OP_2BYTE|OP_MMX, MMXOP(psrld_r64_rm64), MMXOP(psrld_r64_rm64), }, + { 0xD3, OP_2BYTE|OP_MMX, MMXOP(psrlq_r64_rm64), MMXOP(psrlq_r64_rm64), }, + { 0xD4, OP_2BYTE|OP_MMX, MMXOP(paddq_r64_rm64), MMXOP(paddq_r64_rm64), }, + { 0xD5, OP_2BYTE|OP_MMX, MMXOP(pmullw_r64_rm64), MMXOP(pmullw_r64_rm64), }, + { 0xD7, OP_2BYTE|OP_SSE, SSEOP(pmovmskb_r16_r64), SSEOP(pmovmskb_r32_r64), }, + { 0xD8, OP_2BYTE|OP_MMX, MMXOP(psubusb_r64_rm64), MMXOP(psubusb_r64_rm64), }, + { 0xD9, OP_2BYTE|OP_MMX, MMXOP(psubusw_r64_rm64), MMXOP(psubusw_r64_rm64), }, + { 0xDA, OP_2BYTE|OP_SSE, SSEOP(pminub_r64_rm64), SSEOP(pminub_r64_rm64), }, + { 0xDB, OP_2BYTE|OP_MMX, MMXOP(pand_r64_rm64), MMXOP(pand_r64_rm64), }, + { 0xDC, OP_2BYTE|OP_MMX, MMXOP(paddusb_r64_rm64), MMXOP(paddusb_r64_rm64), }, + { 0xDD, OP_2BYTE|OP_MMX, MMXOP(paddusw_r64_rm64), MMXOP(paddusw_r64_rm64), }, + { 0xDE, OP_2BYTE|OP_SSE, SSEOP(pmaxub_r64_rm64), SSEOP(pmaxub_r64_rm64), }, + { 0xDF, OP_2BYTE|OP_MMX, MMXOP(pandn_r64_rm64), MMXOP(pandn_r64_rm64), }, + { 0xE0, OP_2BYTE|OP_SSE, SSEOP(pavgb_r64_rm64), SSEOP(pavgb_r64_rm64), }, + { 0xE1, OP_2BYTE|OP_MMX, MMXOP(psraw_r64_rm64), MMXOP(psraw_r64_rm64), }, + { 0xE2, OP_2BYTE|OP_MMX, MMXOP(psrad_r64_rm64), MMXOP(psrad_r64_rm64), }, + { 0xE3, OP_2BYTE|OP_SSE, SSEOP(pavgw_r64_rm64), SSEOP(pavgw_r64_rm64), }, + { 0xE4, OP_2BYTE|OP_SSE, SSEOP(pmulhuw_r64_rm64), SSEOP(pmulhuw_r64_rm64), }, + { 0xE5, OP_2BYTE|OP_MMX, MMXOP(pmulhw_r64_rm64), MMXOP(pmulhw_r64_rm64), }, + { 0xE7, OP_2BYTE|OP_PENTIUM, PENTIUMOP(movntq_m64_r64), PENTIUMOP(movntq_m64_r64), }, + { 0xE8, OP_2BYTE|OP_MMX, MMXOP(psubsb_r64_rm64), MMXOP(psubsb_r64_rm64), }, + { 0xE9, OP_2BYTE|OP_MMX, MMXOP(psubsw_r64_rm64), MMXOP(psubsw_r64_rm64), }, + { 0xEA, OP_2BYTE|OP_SSE, SSEOP(pminsw_r64_rm64), SSEOP(pminsw_r64_rm64), }, + { 0xEB, OP_2BYTE|OP_MMX, MMXOP(por_r64_rm64), MMXOP(por_r64_rm64), }, + { 0xEC, OP_2BYTE|OP_MMX, MMXOP(paddsb_r64_rm64), MMXOP(paddsb_r64_rm64), }, + { 0xED, OP_2BYTE|OP_MMX, MMXOP(paddsw_r64_rm64), MMXOP(paddsw_r64_rm64), }, + { 0xEE, OP_2BYTE|OP_SSE, SSEOP(pmaxsw_r64_rm64), SSEOP(pmaxsw_r64_rm64), }, + { 0xEF, OP_2BYTE|OP_MMX, MMXOP(pxor_r64_rm64), MMXOP(pxor_r64_rm64), }, + { 0xF1, OP_2BYTE|OP_MMX, MMXOP(psllw_r64_rm64), MMXOP(psllw_r64_rm64), }, + { 0xF2, OP_2BYTE|OP_MMX, MMXOP(pslld_r64_rm64), MMXOP(pslld_r64_rm64), }, + { 0xF3, OP_2BYTE|OP_MMX, MMXOP(psllq_r64_rm64), MMXOP(psllq_r64_rm64), }, + { 0xF4, OP_2BYTE|OP_SSE, SSEOP(pmuludq_r64_rm64), SSEOP(pmuludq_r64_rm64), }, + { 0xF5, OP_2BYTE|OP_MMX, MMXOP(pmaddwd_r64_rm64), MMXOP(pmaddwd_r64_rm64), }, + { 0xF6, OP_2BYTE|OP_SSE, SSEOP(psadbw_r64_rm64), SSEOP(psadbw_r64_rm64), }, + { 0xf7, OP_2BYTE|OP_PENTIUM, PENTIUMOP(maskmovq_r64_r64), PENTIUMOP(maskmovq_r64_r64),}, + { 0xF8, OP_2BYTE|OP_MMX, MMXOP(psubb_r64_rm64), MMXOP(psubb_r64_rm64), }, + { 0xF9, OP_2BYTE|OP_MMX, MMXOP(psubw_r64_rm64), MMXOP(psubw_r64_rm64), }, + { 0xFA, OP_2BYTE|OP_MMX, MMXOP(psubd_r64_rm64), MMXOP(psubd_r64_rm64), }, + { 0xFB, OP_2BYTE|OP_SSE, SSEOP(psubq_r64_rm64), SSEOP(psubq_r64_rm64), }, + { 0xFC, OP_2BYTE|OP_MMX, MMXOP(paddb_r64_rm64), MMXOP(paddb_r64_rm64), }, + { 0xFD, OP_2BYTE|OP_MMX, MMXOP(paddw_r64_rm64), MMXOP(paddw_r64_rm64), }, + { 0xFE, OP_2BYTE|OP_MMX, MMXOP(paddd_r64_rm64), MMXOP(paddd_r64_rm64), }, /* F3 0F ?? */ - { 0x2C, OP_3BYTEF3|OP_SSE, SSEOP(cvttss2si), SSEOP(cvttss2si), } + { 0x10, OP_3BYTEF3|OP_SSE, SSEOP(movss_r128_rm128), SSEOP(movss_r128_rm128), }, + { 0x11, OP_3BYTEF3|OP_SSE, SSEOP(movss_rm128_r128), SSEOP(movss_rm128_r128), }, + { 0x12, OP_3BYTEF3|OP_SSE, SSEOP(movsldup_r128_rm128), SSEOP(movsldup_r128_rm128), }, + { 0x16, OP_3BYTEF3|OP_SSE, SSEOP(movshdup_r128_rm128), SSEOP(movshdup_r128_rm128), }, + { 0x2A, OP_3BYTEF3|OP_SSE, SSEOP(cvtsi2ss_r128_rm32), SSEOP(cvtsi2ss_r128_rm32), }, + { 0x2C, OP_3BYTEF3|OP_SSE, SSEOP(cvttss2si_r32_r128m32), SSEOP(cvttss2si_r32_r128m32),}, + { 0x2D, OP_3BYTEF3|OP_SSE, SSEOP(cvtss2si_r32_r128m32), SSEOP(cvtss2si_r32_r128m32),}, + { 0x51, OP_3BYTEF3|OP_SSE, SSEOP(sqrtss_r128_r128m32), SSEOP(sqrtss_r128_r128m32), }, + { 0x52, OP_3BYTEF3|OP_SSE, SSEOP(rsqrtss_r128_r128m32), SSEOP(rsqrtss_r128_r128m32),}, + { 0x53, OP_3BYTEF3|OP_SSE, SSEOP(rcpss_r128_r128m32), SSEOP(rcpss_r128_r128m32), }, + { 0x58, OP_3BYTEF3|OP_SSE, SSEOP(addss), SSEOP(addss), }, + { 0x59, OP_3BYTEF3|OP_SSE, SSEOP(mulss), SSEOP(mulss), }, + { 0x5A, OP_3BYTEF3|OP_SSE, SSEOP(cvtss2sd_r128_r128m32), SSEOP(cvtss2sd_r128_r128m32),}, + { 0x5B, OP_3BYTEF3|OP_SSE, SSEOP(cvttps2dq_r128_rm128), SSEOP(cvttps2dq_r128_rm128),}, + { 0x5C, OP_3BYTEF3|OP_SSE, SSEOP(subss), SSEOP(subss), }, + { 0x5D, OP_3BYTEF3|OP_SSE, SSEOP(minss_r128_r128m32), SSEOP(minss_r128_r128m32), }, + { 0x5E, OP_3BYTEF3|OP_SSE, SSEOP(divss), SSEOP(divss), }, + { 0x5F, OP_3BYTEF3|OP_SSE, SSEOP(maxss_r128_r128m32), SSEOP(maxss_r128_r128m32), }, + { 0x6F, OP_3BYTEF3|OP_SSE, SSEOP(movdqu_r128_rm128), SSEOP(movdqu_r128_rm128), }, + { 0x70, OP_3BYTEF3|OP_SSE, SSEOP(pshufhw_r128_rm128_i8), SSEOP(pshufhw_r128_rm128_i8),}, + { 0x7E, OP_3BYTEF3|OP_SSE, SSEOP(movq_r128_r128m64), SSEOP(movq_r128_r128m64), }, + { 0x7F, OP_3BYTEF3|OP_SSE, SSEOP(movdqu_rm128_r128), SSEOP(movdqu_rm128_r128), }, + { 0xB8, OP_3BYTEF3|OP_PENTIUM, PENTIUMOP(popcnt_r16_rm16), PENTIUMOP(popcnt_r32_rm32), }, + { 0xC2, OP_3BYTEF3|OP_SSE, SSEOP(cmpss_r128_r128m32_i8), SSEOP(cmpss_r128_r128m32_i8),}, + { 0xD6, OP_3BYTEF3|OP_SSE, SSEOP(movq2dq_r128_r64), SSEOP(movq2dq_r128_r64), }, + { 0xE6, OP_3BYTEF3|OP_SSE, SSEOP(cvtdq2pd_r128_r128m64), SSEOP(cvtdq2pd_r128_r128m64)} }; diff --git a/src/emu/cpu/i386/i386priv.h b/src/emu/cpu/i386/i386priv.h index 5874f40070f..11906f98264 100644 --- a/src/emu/cpu/i386/i386priv.h +++ b/src/emu/cpu/i386/i386priv.h @@ -306,19 +306,29 @@ union I386_GPR { UINT8 b[32]; }; -union X87_REG { - UINT64 i; - double f; +union MMX_REG { + UINT32 d[2]; + INT32 i[2]; + UINT16 w[4]; + INT16 s[4]; + UINT8 b[8]; + INT8 c[8]; + float f[2]; + UINT64 q; + INT64 l; }; -typedef UINT64 MMX_REG; - union XMM_REG { - UINT32 d[4]; + UINT8 b[16]; UINT16 w[8]; - UINT8 b[16]; + UINT32 d[4]; UINT64 q[2]; - float f[4]; + INT8 c[16]; + INT16 s[8]; + INT32 i[4]; + INT64 l[2]; + float f[4]; + double f64[2]; }; struct i386_state @@ -434,6 +444,8 @@ struct i386_state vtlb_state *vtlb; bool smm; + bool smi; + bool smi_latched; bool nmi_masked; bool nmi_latched; UINT32 smbase; @@ -496,7 +508,7 @@ static int i386_limit_check(i386_state *cpustate, int seg, UINT32 offset); #define SetSZPF16(x) {cpustate->ZF = ((UINT16)(x)==0); cpustate->SF = ((x)&0x8000) ? 1 : 0; cpustate->PF = i386_parity_table[x & 0xFF]; } #define SetSZPF32(x) {cpustate->ZF = ((UINT32)(x)==0); cpustate->SF = ((x)&0x80000000) ? 1 : 0; cpustate->PF = i386_parity_table[x & 0xFF]; } -#define MMX(n) cpustate->fpu_reg[(n)].i +#define MMX(n) (*((MMX_REG *)(&cpustate->x87_reg[(n)].low))) #define XMM(n) cpustate->sse_reg[(n)] /***********************************************************************************/ @@ -866,6 +878,7 @@ INLINE UINT16 READ16PL0(i386_state *cpustate,UINT32 ea) } return value; } + INLINE UINT32 READ32PL0(i386_state *cpustate,UINT32 ea) { UINT32 value; diff --git a/src/emu/cpu/i386/pentops.c b/src/emu/cpu/i386/pentops.c index 426c15ee665..9cd2119eff3 100644 --- a/src/emu/cpu/i386/pentops.c +++ b/src/emu/cpu/i386/pentops.c @@ -1,22 +1,6 @@ // Pentium+ specific opcodes -/* return the single precision floating point number represented by the 32 bit value */ -INLINE float FPU_INT32_SINGLE(UINT32 value) -{ - float v; - - v=*((float *)&value); - return v; -} - -INLINE UINT32 FPU_SINGLE_INT32(X87_REG value) -{ - float fs=(float)value.f; - UINT32 v; - - v=*((UINT32 *)(&fs)); - return v; -} +extern flag float32_is_nan( float32 a ); // since its not defined in softfloat.h INLINE void MMXPROLOG(i386_state *cpustate) { @@ -24,6 +8,48 @@ INLINE void MMXPROLOG(i386_state *cpustate) cpustate->x87_tw = 0; // tag word = 0 } +INLINE void READMMX(i386_state *cpustate,UINT32 ea,MMX_REG &r) +{ + r.q=READ64(cpustate, ea); +} + +INLINE void WRITEMMX(i386_state *cpustate,UINT32 ea,MMX_REG &r) +{ + WRITE64(cpustate, ea, r.q); +} + +INLINE void READXMM(i386_state *cpustate,UINT32 ea,XMM_REG &r) +{ + r.q[0]=READ64(cpustate, ea); + r.q[1]=READ64(cpustate, ea+8); +} + +INLINE void WRITEXMM(i386_state *cpustate,UINT32 ea,XMM_REG &r) +{ + WRITE64(cpustate, ea, r.q[0]); + WRITE64(cpustate, ea+8, r.q[1]); +} + +INLINE void READXMM_LO64(i386_state *cpustate,UINT32 ea,XMM_REG &r) +{ + r.q[0]=READ64(cpustate, ea); +} + +INLINE void WRITEXMM_LO64(i386_state *cpustate,UINT32 ea,XMM_REG &r) +{ + WRITE64(cpustate, ea, r.q[0]); +} + +INLINE void READXMM_HI64(i386_state *cpustate,UINT32 ea,XMM_REG &r) +{ + r.q[1]=READ64(cpustate, ea); +} + +INLINE void WRITEXMM_HI64(i386_state *cpustate,UINT32 ea,XMM_REG &r) +{ + WRITE64(cpustate, ea, r.q[1]); +} + static void PENTIUMOP(rdmsr)(i386_state *cpustate) // Opcode 0x0f 32 { UINT64 data; @@ -64,81 +90,6 @@ static void PENTIUMOP(rdtsc)(i386_state *cpustate) // Opcode 0x0f 31 CYCLES(cpustate,CYCLES_RDTSC); } -static void I386OP(cyrix_unknown)(i386_state *cpustate) // Opcode 0x0f 74 -{ - logerror("Unemulated 0x0f 0x74 opcode called\n"); - - CYCLES(cpustate,1); -} - -static void PENTIUMOP(cmpxchg8b_m64)(i386_state *cpustate) // Opcode 0x0f c7 -{ - UINT8 modm = FETCH(cpustate); - if( modm >= 0xc0 ) { - report_invalid_modrm(cpustate, "cmpxchg8b_m64", modm); - } else { - UINT32 ea = GetEA(cpustate, modm, 0); - UINT64 value = READ64(cpustate,ea); - UINT64 edx_eax = (((UINT64) REG32(EDX)) << 32) | REG32(EAX); - UINT64 ecx_ebx = (((UINT64) REG32(ECX)) << 32) | REG32(EBX); - - if( value == edx_eax ) { - WRITE64(cpustate,ea, ecx_ebx); - cpustate->ZF = 1; - CYCLES(cpustate,CYCLES_CMPXCHG_REG_MEM_T); - } else { - REG32(EDX) = (UINT32) (value >> 32); - REG32(EAX) = (UINT32) (value >> 0); - cpustate->ZF = 0; - CYCLES(cpustate,CYCLES_CMPXCHG_REG_MEM_F); - } - } -} - -INLINE void READXMM(i386_state *cpustate,UINT32 ea,XMM_REG &r) -{ - r.q[0]=READ64(cpustate, ea); - r.q[1]=READ64(cpustate, ea+8); -} -INLINE void WRITEXMM(i386_state *cpustate,UINT32 ea,XMM_REG &r) -{ - WRITE64(cpustate, ea, r.q[0]); - WRITE64(cpustate, ea+8, r.q[1]); -} - -static void SSEOP(sse_group0fae)(i386_state *cpustate) // Opcode 0f ae -{ - UINT8 modm = FETCH(cpustate); - if( modm == 0xf8 ) { - logerror("Unemulated SFENCE opcode called\n"); - CYCLES(cpustate,1); // sfence instruction - } else if( modm == 0xf0 ) { - CYCLES(cpustate,1); // mfence instruction - } else if( modm == 0xe8 ) { - CYCLES(cpustate,1); // lfence instruction - } else if( modm < 0xc0 ) { - UINT32 ea; - switch ( (modm & 0x38) >> 3 ) - { - case 2: // ldmxcsr m32 - ea = GetEA(cpustate, modm, 0); - cpustate->mxcsr = READ32(cpustate, ea); - break; - case 3: // stmxcsr m32 - ea = GetEA(cpustate, modm, 0); - WRITE32(cpustate, ea, cpustate->mxcsr); - break; - case 7: // clflush m8 - GetNonTranslatedEA(cpustate, modm, NULL); - break; - default: - report_invalid_modrm(cpustate, "sse_group0fae", modm); - } - } else { - report_invalid_modrm(cpustate, "sse_group0fae", modm); - } -} - static void PENTIUMOP(ud2)(i386_state *cpustate) // Opcode 0x0f 0b { i386_trap(cpustate, 6, 0, 0); @@ -229,6 +180,11 @@ static void PENTIUMOP(rsm)(i386_state *cpustate) CHANGE_PC(cpustate,cpustate->eip); cpustate->nmi_masked = false; + if(cpustate->smi_latched) + { + pentium_smi(cpustate); + return; + } if(cpustate->nmi_latched) { cpustate->nmi_latched = false; @@ -236,18 +192,2158 @@ static void PENTIUMOP(rsm)(i386_state *cpustate) } } -static void SSEOP(cvttss2si)(i386_state *cpustate) // Opcode f3 0f 2c +static void PENTIUMOP(prefetch_m8)(i386_state *cpustate) // Opcode 0x0f 18 +{ + UINT8 modrm = FETCH(cpustate); + UINT32 ea = GetEA(cpustate,modrm,0); + CYCLES(cpustate,1+(ea & 1)); // TODO: correct cycle count +} + +static void PENTIUMOP(cmovo_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 40 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->OF == 1) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovo_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 40 { UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->OF == 1) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovno_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 41 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->OF == 0) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovno_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 41 +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->OF == 0) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovb_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 42 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->CF == 1) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovb_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 42 +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->CF == 1) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovae_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 43 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->CF == 0) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovae_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 43 +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->CF == 0) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmove_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 44 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->ZF == 1) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmove_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 44 +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->ZF == 1) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovne_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 45 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->ZF == 0) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovne_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 45 +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->ZF == 0) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovbe_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 46 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if ((cpustate->CF == 1) || (cpustate->ZF == 1)) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovbe_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 46 +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if ((cpustate->CF == 1) || (cpustate->ZF == 1)) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmova_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 47 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if ((cpustate->CF == 0) && (cpustate->ZF == 0)) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmova_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 47 +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if ((cpustate->CF == 0) && (cpustate->ZF == 0)) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovs_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 48 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->SF == 1) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovs_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 48 +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->SF == 1) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovns_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 49 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->SF == 0) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovns_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 49 +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->SF == 0) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovp_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4a +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->PF == 1) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovp_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4a +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->PF == 1) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovnp_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4b +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->PF == 0) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovnp_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4b +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->PF == 0) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovl_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4c +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->SF != cpustate->OF) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovl_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4c +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->SF != cpustate->OF) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovge_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4d +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->SF == cpustate->OF) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovge_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4d +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if (cpustate->SF == cpustate->OF) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovle_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4e +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF)) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovle_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4e +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF)) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovg_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4f +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + + if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF)) { + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + STORE_REG16(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(cmovg_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4f +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + + if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF)) { + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + STORE_REG32(modrm, src); + CYCLES(cpustate,1); // TODO: correct cycle count + } + } +} + +static void PENTIUMOP(movnti_m16_r16)(i386_state *cpustate) // Opcode 0f c3 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + // unsupported by cpu + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + // since cache is not implemented + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITE16(cpustate,ea,LOAD_RM16(modrm)); + CYCLES(cpustate,1); // TODO: correct cycle count + } +} + +static void PENTIUMOP(movnti_m32_r32)(i386_state *cpustate) // Opcode 0f c3 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + // unsupported by cpu + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + // since cache is not implemented + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITE32(cpustate,ea,LOAD_RM32(modrm)); + CYCLES(cpustate,1); // TODO: correct cycle count + } +} + +static void I386OP(cyrix_unknown)(i386_state *cpustate) // Opcode 0x0f 74 +{ + logerror("Unemulated 0x0f 0x74 opcode called\n"); + + CYCLES(cpustate,1); +} + +static void PENTIUMOP(cmpxchg8b_m64)(i386_state *cpustate) // Opcode 0x0f c7 +{ + UINT8 modm = FETCH(cpustate); + if( modm >= 0xc0 ) { + report_invalid_modrm(cpustate, "cmpxchg8b_m64", modm); + } else { + UINT32 ea = GetEA(cpustate, modm, 0); + UINT64 value = READ64(cpustate,ea); + UINT64 edx_eax = (((UINT64) REG32(EDX)) << 32) | REG32(EAX); + UINT64 ecx_ebx = (((UINT64) REG32(ECX)) << 32) | REG32(EBX); + + if( value == edx_eax ) { + WRITE64(cpustate,ea, ecx_ebx); + cpustate->ZF = 1; + CYCLES(cpustate,CYCLES_CMPXCHG_REG_MEM_T); + } else { + REG32(EDX) = (UINT32) (value >> 32); + REG32(EAX) = (UINT32) (value >> 0); + cpustate->ZF = 0; + CYCLES(cpustate,CYCLES_CMPXCHG_REG_MEM_F); + } + } +} + +static void PENTIUMOP(movntq_m64_r64)(i386_state *cpustate) // Opcode 0f e7 +{ + //MMXPROLOG(cpustate); // TODO: check if needed + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + CYCLES(cpustate,1); // unsupported + } else { + // since cache is not implemented + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITEMMX(cpustate, ea, MMX((modrm >> 3) & 0x7)); + CYCLES(cpustate,1); // TODO: correct cycle count + } +} + +static void PENTIUMOP(maskmovq_r64_r64)(i386_state *cpustate) // Opcode 0f f7 +{ + int s,m,n; + UINT8 modm = FETCH(cpustate); + UINT32 ea = GetEA(cpustate, 7, 0); // ds:di/edi/rdi register + MMXPROLOG(cpustate); + s=(modm >> 3) & 7; + m=modm & 7; + for (n=0;n <= 7;n++) + if (MMX(m).b[n] & 127) + WRITE8(cpustate, ea+n, MMX(s).b[n]); +} + +static void PENTIUMOP(popcnt_r16_rm16)(i386_state *cpustate) // Opcode f3 0f b8 +{ + UINT16 src; + UINT8 modrm = FETCH(cpustate); + int n,count; + + if( modrm >= 0xc0 ) { + src = LOAD_RM16(modrm); + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ16(cpustate,ea); + } + count=0; + for (n=0;n < 16;n++) { + count=count+(src & 1); + src=src >> 1; + } + STORE_REG16(modrm, count); + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void PENTIUMOP(popcnt_r32_rm32)(i386_state *cpustate) // Opcode f3 0f b8 +{ + UINT32 src; + UINT8 modrm = FETCH(cpustate); + int n,count; + + if( modrm >= 0xc0 ) { + src = LOAD_RM32(modrm); + } else { + UINT32 ea = GetEA(cpustate,modrm,0); + src = READ32(cpustate,ea); + } + count=0; + for (n=0;n < 32;n++) { + count=count+(src & 1); + src=src >> 1; + } + STORE_REG32(modrm, count); + CYCLES(cpustate,1); // TODO: correct cycle count +} + +INLINE INT8 SaturatedSignedWordToSignedByte(INT16 word) +{ + if (word > 127) + return 127; + if (word < -128) + return -128; + return (INT8)word; +} + +INLINE UINT8 SaturatedSignedWordToUnsignedByte(INT16 word) +{ + if (word > 255) + return 255; + if (word < 0) + return 0; + return (UINT8)word; +} + +INLINE INT16 SaturatedSignedDwordToSignedWord(INT32 dword) +{ + if (dword > 32767) + return 32767; + if (dword < -32768) + return -32768; + return (INT16)dword; +} + +static void MMXOP(group_0f71)(i386_state *cpustate) // Opcode 0f 71 +{ + UINT8 modm = FETCH(cpustate); + UINT8 imm8 = FETCH(cpustate); + MMXPROLOG(cpustate); + if( modm >= 0xc0 ) { + switch ( (modm & 0x38) >> 3 ) + { + case 2: // psrlw + MMX(modm & 7).w[0]=MMX(modm & 7).w[0] >> imm8; + MMX(modm & 7).w[1]=MMX(modm & 7).w[1] >> imm8; + MMX(modm & 7).w[2]=MMX(modm & 7).w[2] >> imm8; + MMX(modm & 7).w[3]=MMX(modm & 7).w[3] >> imm8; + break; + case 4: // psraw + MMX(modm & 7).s[0]=MMX(modm & 7).s[0] >> imm8; + MMX(modm & 7).s[1]=MMX(modm & 7).s[1] >> imm8; + MMX(modm & 7).s[2]=MMX(modm & 7).s[2] >> imm8; + MMX(modm & 7).s[3]=MMX(modm & 7).s[3] >> imm8; + break; + case 6: // psllw + MMX(modm & 7).w[0]=MMX(modm & 7).w[0] << imm8; + MMX(modm & 7).w[1]=MMX(modm & 7).w[1] << imm8; + MMX(modm & 7).w[2]=MMX(modm & 7).w[2] << imm8; + MMX(modm & 7).w[3]=MMX(modm & 7).w[3] << imm8; + break; + default: + report_invalid_modrm(cpustate, "mmx_group0f71", modm); + } + } +} + +static void MMXOP(group_0f72)(i386_state *cpustate) // Opcode 0f 72 +{ + UINT8 modm = FETCH(cpustate); + UINT8 imm8 = FETCH(cpustate); + MMXPROLOG(cpustate); + if( modm >= 0xc0 ) { + switch ( (modm & 0x38) >> 3 ) + { + case 2: // psrld + MMX(modm & 7).d[0]=MMX(modm & 7).d[0] >> imm8; + MMX(modm & 7).d[1]=MMX(modm & 7).d[1] >> imm8; + break; + case 4: // psrad + MMX(modm & 7).i[0]=MMX(modm & 7).i[0] >> imm8; + MMX(modm & 7).i[1]=MMX(modm & 7).i[1] >> imm8; + break; + case 6: // pslld + MMX(modm & 7).d[0]=MMX(modm & 7).d[0] << imm8; + MMX(modm & 7).d[1]=MMX(modm & 7).d[1] << imm8; + break; + default: + report_invalid_modrm(cpustate, "mmx_group0f72", modm); + } + } +} + +static void MMXOP(group_0f73)(i386_state *cpustate) // Opcode 0f 73 +{ + UINT8 modm = FETCH(cpustate); + UINT8 imm8 = FETCH(cpustate); + MMXPROLOG(cpustate); + if( modm >= 0xc0 ) { + switch ( (modm & 0x38) >> 3 ) + { + case 2: // psrlq + MMX(modm & 7).q=MMX(modm & 7).q >> imm8; + break; + case 6: // psllq + MMX(modm & 7).q=MMX(modm & 7).q << imm8; + break; + default: + report_invalid_modrm(cpustate, "mmx_group0f73", modm); + } + } +} + +static void MMXOP(psrlw_r64_rm64)(i386_state *cpustate) // Opcode 0f d1 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int count=(int)MMX(modrm & 7).q; + MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] >> count; + MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] >> count; + MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] >> count; + MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] >> count; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + int count=(int)src.q; + MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] >> count; + MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] >> count; + MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] >> count; + MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] >> count; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psrld_r64_rm64)(i386_state *cpustate) // Opcode 0f d2 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int count=(int)MMX(modrm & 7).q; + MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] >> count; + MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] >> count; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + int count=(int)src.q; + MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] >> count; + MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] >> count; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psrlq_r64_rm64)(i386_state *cpustate) // Opcode 0f d3 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int count=(int)MMX(modrm & 7).q; + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q >> count; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + int count=(int)src.q; + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q >> count; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(paddq_r64_rm64)(i386_state *cpustate) // Opcode 0f d4 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q+MMX(modrm & 7).q; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q+src.q; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pmullw_r64_rm64)(i386_state *cpustate) // Opcode 0f d5 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]) & 0xffff; + MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1]) & 0xffff; + MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]) & 0xffff; + MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3]) & 0xffff; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)src.s[0]) & 0xffff; + MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)src.s[1]) & 0xffff; + MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)src.s[2]) & 0xffff; + MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)src.s[3]) & 0xffff; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psubusb_r64_rm64)(i386_state *cpustate) // Opcode 0f d8 +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] < MMX(modrm & 7).b[n] ? 0 : MMX((modrm >> 3) & 0x7).b[n]-MMX(modrm & 7).b[n]; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] < src.b[n] ? 0 : MMX((modrm >> 3) & 0x7).b[n]-src.b[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psubusw_r64_rm64)(i386_state *cpustate) // Opcode 0f d9 +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] < MMX(modrm & 7).w[n] ? 0 : MMX((modrm >> 3) & 0x7).w[n]-MMX(modrm & 7).w[n]; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] < src.w[n] ? 0 : MMX((modrm >> 3) & 0x7).w[n]-src.w[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pand_r64_rm64)(i386_state *cpustate) // Opcode 0f db +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q & MMX(modrm & 7).q; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q & src.q; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(paddusb_r64_rm64)(i386_state *cpustate) // Opcode 0f dc +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] > (0xff-MMX(modrm & 7).b[n]) ? 0xff : MMX((modrm >> 3) & 0x7).b[n]+MMX(modrm & 7).b[n]; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] > (0xff-src.b[n]) ? 0xff : MMX((modrm >> 3) & 0x7).b[n]+src.b[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(paddusw_r64_rm64)(i386_state *cpustate) // Opcode 0f dd +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] > (0xffff-MMX(modrm & 7).w[n]) ? 0xffff : MMX((modrm >> 3) & 0x7).w[n]+MMX(modrm & 7).w[n]; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] > (0xffff-src.w[n]) ? 0xffff : MMX((modrm >> 3) & 0x7).w[n]+src.w[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pandn_r64_rm64)(i386_state *cpustate) // Opcode 0f df +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).q=(~MMX((modrm >> 3) & 0x7).q) & MMX(modrm & 7).q; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + MMX((modrm >> 3) & 0x7).q=(~MMX((modrm >> 3) & 0x7).q) & src.q; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psraw_r64_rm64)(i386_state *cpustate) // Opcode 0f e1 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int count=(int)MMX(modrm & 7).q; + MMX((modrm >> 3) & 0x7).s[0]=MMX((modrm >> 3) & 0x7).s[0] >> count; + MMX((modrm >> 3) & 0x7).s[1]=MMX((modrm >> 3) & 0x7).s[1] >> count; + MMX((modrm >> 3) & 0x7).s[2]=MMX((modrm >> 3) & 0x7).s[2] >> count; + MMX((modrm >> 3) & 0x7).s[3]=MMX((modrm >> 3) & 0x7).s[3] >> count; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + int count=(int)src.q; + MMX((modrm >> 3) & 0x7).s[0]=MMX((modrm >> 3) & 0x7).s[0] >> count; + MMX((modrm >> 3) & 0x7).s[1]=MMX((modrm >> 3) & 0x7).s[1] >> count; + MMX((modrm >> 3) & 0x7).s[2]=MMX((modrm >> 3) & 0x7).s[2] >> count; + MMX((modrm >> 3) & 0x7).s[3]=MMX((modrm >> 3) & 0x7).s[3] >> count; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psrad_r64_rm64)(i386_state *cpustate) // Opcode 0f e2 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int count=(int)MMX(modrm & 7).q; + MMX((modrm >> 3) & 0x7).i[0]=MMX((modrm >> 3) & 0x7).i[0] >> count; + MMX((modrm >> 3) & 0x7).i[1]=MMX((modrm >> 3) & 0x7).i[1] >> count; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + int count=(int)src.q; + MMX((modrm >> 3) & 0x7).i[0]=MMX((modrm >> 3) & 0x7).i[0] >> count; + MMX((modrm >> 3) & 0x7).i[1]=MMX((modrm >> 3) & 0x7).i[1] >> count; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pmulhw_r64_rm64)(i386_state *cpustate) // Opcode 0f e5 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]) >> 16; + MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1]) >> 16; + MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]) >> 16; + MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3]) >> 16; + } else { + MMX_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, src); + MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)src.s[0]) >> 16; + MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)src.s[1]) >> 16; + MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)src.s[2]) >> 16; + MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)src.s[3]) >> 16; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psubsb_r64_rm64)(i386_state *cpustate) // Opcode 0f e8 +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] - (INT16)MMX(modrm & 7).c[n]); + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] - (INT16)s.c[n]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psubsw_r64_rm64)(i386_state *cpustate) // Opcode 0f e9 +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] - (INT32)MMX(modrm & 7).s[n]); + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] - (INT32)s.s[n]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(por_r64_rm64)(i386_state *cpustate) // Opcode 0f eb +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q | MMX(modrm & 7).q; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q | s.q; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(paddsb_r64_rm64)(i386_state *cpustate) // Opcode 0f ec +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] + (INT16)MMX(modrm & 7).c[n]); + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] + (INT16)s.c[n]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(paddsw_r64_rm64)(i386_state *cpustate) // Opcode 0f ed +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] + (INT32)MMX(modrm & 7).s[n]); + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] + (INT32)s.s[n]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pxor_r64_rm64)(i386_state *cpustate) // Opcode 0f ef +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q ^ MMX(modrm & 7).q; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q ^ s.q; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psllw_r64_rm64)(i386_state *cpustate) // Opcode 0f f1 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int count=(int)MMX(modrm & 7).q; + MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] << count; + MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] << count; + MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] << count; + MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] << count; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + int count=(int)s.q; + MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] << count; + MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] << count; + MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] << count; + MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] << count; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pslld_r64_rm64)(i386_state *cpustate) // Opcode 0f f2 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int count=(int)MMX(modrm & 7).q; + MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] << count; + MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] << count; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + int count=(int)s.q; + MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] << count; + MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] << count; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psllq_r64_rm64)(i386_state *cpustate) // Opcode 0f f3 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int count=(int)MMX(modrm & 7).q; + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q << count; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + int count=(int)s.q; + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q << count; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pmaddwd_r64_rm64)(i386_state *cpustate) // Opcode 0f f5 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).i[0]=(INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]+ + (INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1]; + MMX((modrm >> 3) & 0x7).i[1]=(INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]+ + (INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX((modrm >> 3) & 0x7).i[0]=(INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)s.s[0]+ + (INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)s.s[1]; + MMX((modrm >> 3) & 0x7).i[1]=(INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)s.s[2]+ + (INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)s.s[3]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psubb_r64_rm64)(i386_state *cpustate) // Opcode 0f f8 +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] - MMX(modrm & 7).b[n]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] - s.b[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psubw_r64_rm64)(i386_state *cpustate) // Opcode 0f f9 +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] - MMX(modrm & 7).w[n]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] - s.w[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(psubd_r64_rm64)(i386_state *cpustate) // Opcode 0f fa +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 2;n++) + MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] - MMX(modrm & 7).d[n]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 2;n++) + MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] - s.d[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(paddb_r64_rm64)(i386_state *cpustate) // Opcode 0f fc +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] + MMX(modrm & 7).b[n]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] + s.b[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(paddw_r64_rm64)(i386_state *cpustate) // Opcode 0f fd +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] + MMX(modrm & 7).w[n]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] + s.w[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(paddd_r64_rm64)(i386_state *cpustate) // Opcode 0f fe +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 2;n++) + MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] + MMX(modrm & 7).d[n]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 2;n++) + MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] + s.d[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(emms)(i386_state *cpustate) // Opcode 0f 77 +{ + cpustate->x87_tw = 0xffff; // tag word = 0xffff + // TODO + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(movd_r64_rm32)(i386_state *cpustate) // Opcode 0f 6e +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).d[0]=LOAD_RM32(modrm); + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + MMX((modrm >> 3) & 0x7).d[0]=READ32(cpustate, ea); + } + MMX((modrm >> 3) & 0x7).d[1]=0; + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(movq_r64_rm64)(i386_state *cpustate) // Opcode 0f 6f +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).l=MMX(modrm & 0x7).l; + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, MMX((modrm >> 3) & 0x7)); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(movd_rm32_r64)(i386_state *cpustate) // Opcode 0f 7e +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + STORE_RM32(modrm, MMX((modrm >> 3) & 0x7).d[0]); + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITE32(cpustate, ea, MMX((modrm >> 3) & 0x7).d[0]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(movq_rm64_r64)(i386_state *cpustate) // Opcode 0f 7f +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX(modrm & 0x7)=MMX((modrm >> 3) & 0x7); + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITEMMX(cpustate, ea, MMX((modrm >> 3) & 0x7)); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pcmpeqb_r64_rm64)(i386_state *cpustate) // Opcode 0f 74 +{ + int c; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + for (c=0;c <= 7;c++) + MMX(d).b[c]=(MMX(d).b[c] == MMX(s).b[c]) ? 0xff : 0; + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (c=0;c <= 7;c++) + MMX(d).b[c]=(MMX(d).b[c] == s.b[c]) ? 0xff : 0; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pcmpeqw_r64_rm64)(i386_state *cpustate) // Opcode 0f 75 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + MMX(d).w[0]=(MMX(d).w[0] == MMX(s).w[0]) ? 0xffff : 0; + MMX(d).w[1]=(MMX(d).w[1] == MMX(s).w[1]) ? 0xffff : 0; + MMX(d).w[2]=(MMX(d).w[2] == MMX(s).w[2]) ? 0xffff : 0; + MMX(d).w[3]=(MMX(d).w[3] == MMX(s).w[3]) ? 0xffff : 0; + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX(d).w[0]=(MMX(d).w[0] == s.w[0]) ? 0xffff : 0; + MMX(d).w[1]=(MMX(d).w[1] == s.w[1]) ? 0xffff : 0; + MMX(d).w[2]=(MMX(d).w[2] == s.w[2]) ? 0xffff : 0; + MMX(d).w[3]=(MMX(d).w[3] == s.w[3]) ? 0xffff : 0; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pcmpeqd_r64_rm64)(i386_state *cpustate) // Opcode 0f 76 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + MMX(d).d[0]=(MMX(d).d[0] == MMX(s).d[0]) ? 0xffffffff : 0; + MMX(d).d[1]=(MMX(d).d[1] == MMX(s).d[1]) ? 0xffffffff : 0; + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX(d).d[0]=(MMX(d).d[0] == s.d[0]) ? 0xffffffff : 0; + MMX(d).d[1]=(MMX(d).d[1] == s.d[1]) ? 0xffffffff : 0; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pshufw_r64_rm64_i8)(i386_state *cpustate) // Opcode 0f 70 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX_REG t; + int s,d; + UINT8 imm8 = FETCH(cpustate); + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + t.q=MMX(s).q; + MMX(d).w[0]=t.w[imm8 & 3]; + MMX(d).w[1]=t.w[(imm8 >> 2) & 3]; + MMX(d).w[2]=t.w[(imm8 >> 4) & 3]; + MMX(d).w[3]=t.w[(imm8 >> 6) & 3]; + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + UINT8 imm8 = FETCH(cpustate); + READMMX(cpustate, ea, s); + MMX(d).w[0]=s.w[imm8 & 3]; + MMX(d).w[1]=s.w[(imm8 >> 2) & 3]; + MMX(d).w[2]=s.w[(imm8 >> 4) & 3]; + MMX(d).w[3]=s.w[(imm8 >> 6) & 3]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(punpcklbw_r64_r64m32)(i386_state *cpustate) // Opcode 0f 60 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + UINT32 t; + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + t=MMX(d).d[0]; + MMX(d).b[0]=t & 0xff; + MMX(d).b[1]=MMX(s).b[0]; + MMX(d).b[2]=(t >> 8) & 0xff; + MMX(d).b[3]=MMX(s).b[1]; + MMX(d).b[4]=(t >> 16) & 0xff; + MMX(d).b[5]=MMX(s).b[2]; + MMX(d).b[6]=(t >> 24) & 0xff; + MMX(d).b[7]=MMX(s).b[3]; + } else { + UINT32 s,t; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + s = READ32(cpustate, ea); + t=MMX(d).d[0]; + MMX(d).b[0]=t & 0xff; + MMX(d).b[1]=s & 0xff; + MMX(d).b[2]=(t >> 8) & 0xff; + MMX(d).b[3]=(s >> 8) & 0xff; + MMX(d).b[4]=(t >> 16) & 0xff; + MMX(d).b[5]=(s >> 16) & 0xff; + MMX(d).b[6]=(t >> 24) & 0xff; + MMX(d).b[7]=(s >> 24) & 0xff; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(punpcklwd_r64_r64m32)(i386_state *cpustate) // Opcode 0f 61 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + UINT16 t; + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + t=MMX(d).w[1]; + MMX(d).w[0]=MMX(d).w[0]; + MMX(d).w[1]=MMX(s).w[0]; + MMX(d).w[2]=t; + MMX(d).w[3]=MMX(s).w[1]; + } else { + UINT32 s; + UINT16 t; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + s = READ32(cpustate, ea); + t=MMX(d).w[1]; + MMX(d).w[0]=MMX(d).w[0]; + MMX(d).w[1]=s & 0xffff; + MMX(d).w[2]=t; + MMX(d).w[3]=(s >> 16) & 0xffff; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(punpckldq_r64_r64m32)(i386_state *cpustate) // Opcode 0f 62 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + MMX(d).d[0]=MMX(d).d[0]; + MMX(d).d[1]=MMX(s).d[0]; + } else { + UINT32 s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + s = READ32(cpustate, ea); + MMX(d).d[0]=MMX(d).d[0]; + MMX(d).d[1]=s; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(packsswb_r64_rm64)(i386_state *cpustate) // Opcode 0f 63 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + MMX(d).c[0]=SaturatedSignedWordToSignedByte(MMX(d).s[0]); + MMX(d).c[1]=SaturatedSignedWordToSignedByte(MMX(d).s[1]); + MMX(d).c[2]=SaturatedSignedWordToSignedByte(MMX(d).s[2]); + MMX(d).c[3]=SaturatedSignedWordToSignedByte(MMX(d).s[3]); + MMX(d).c[4]=SaturatedSignedWordToSignedByte(MMX(s).s[0]); + MMX(d).c[5]=SaturatedSignedWordToSignedByte(MMX(s).s[1]); + MMX(d).c[6]=SaturatedSignedWordToSignedByte(MMX(s).s[2]); + MMX(d).c[7]=SaturatedSignedWordToSignedByte(MMX(s).s[3]); + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX(d).c[0]=SaturatedSignedWordToSignedByte(MMX(d).s[0]); + MMX(d).c[1]=SaturatedSignedWordToSignedByte(MMX(d).s[1]); + MMX(d).c[2]=SaturatedSignedWordToSignedByte(MMX(d).s[2]); + MMX(d).c[3]=SaturatedSignedWordToSignedByte(MMX(d).s[3]); + MMX(d).c[4]=SaturatedSignedWordToSignedByte(s.s[0]); + MMX(d).c[5]=SaturatedSignedWordToSignedByte(s.s[1]); + MMX(d).c[6]=SaturatedSignedWordToSignedByte(s.s[2]); + MMX(d).c[7]=SaturatedSignedWordToSignedByte(s.s[3]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pcmpgtb_r64_rm64)(i386_state *cpustate) // Opcode 0f 64 +{ + int c; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + for (c=0;c <= 7;c++) + MMX(d).b[c]=(MMX(d).c[c] > MMX(s).c[c]) ? 0xff : 0; + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (c=0;c <= 7;c++) + MMX(d).b[c]=(MMX(d).c[c] > s.c[c]) ? 0xff : 0; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pcmpgtw_r64_rm64)(i386_state *cpustate) // Opcode 0f 65 +{ + int c; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + for (c=0;c <= 3;c++) + MMX(d).w[c]=(MMX(d).s[c] > MMX(s).s[c]) ? 0xffff : 0; + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (c=0;c <= 3;c++) + MMX(d).w[c]=(MMX(d).s[c] > s.s[c]) ? 0xffff : 0; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(pcmpgtd_r64_rm64)(i386_state *cpustate) // Opcode 0f 66 +{ + int c; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + for (c=0;c <= 1;c++) + MMX(d).d[c]=(MMX(d).i[c] > MMX(s).i[c]) ? 0xffffffff : 0; + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (c=0;c <= 1;c++) + MMX(d).d[c]=(MMX(d).i[c] > s.i[c]) ? 0xffffffff : 0; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(packuswb_r64_rm64)(i386_state *cpustate) // Opcode 0f 67 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + MMX(d).b[0]=SaturatedSignedWordToUnsignedByte(MMX(d).s[0]); + MMX(d).b[1]=SaturatedSignedWordToUnsignedByte(MMX(d).s[1]); + MMX(d).b[2]=SaturatedSignedWordToUnsignedByte(MMX(d).s[2]); + MMX(d).b[3]=SaturatedSignedWordToUnsignedByte(MMX(d).s[3]); + MMX(d).b[4]=SaturatedSignedWordToUnsignedByte(MMX(s).s[0]); + MMX(d).b[5]=SaturatedSignedWordToUnsignedByte(MMX(s).s[1]); + MMX(d).b[6]=SaturatedSignedWordToUnsignedByte(MMX(s).s[2]); + MMX(d).b[7]=SaturatedSignedWordToUnsignedByte(MMX(s).s[3]); + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX(d).b[0]=SaturatedSignedWordToUnsignedByte(MMX(d).s[0]); + MMX(d).b[1]=SaturatedSignedWordToUnsignedByte(MMX(d).s[1]); + MMX(d).b[2]=SaturatedSignedWordToUnsignedByte(MMX(d).s[2]); + MMX(d).b[3]=SaturatedSignedWordToUnsignedByte(MMX(d).s[3]); + MMX(d).b[4]=SaturatedSignedWordToUnsignedByte(s.s[0]); + MMX(d).b[5]=SaturatedSignedWordToUnsignedByte(s.s[1]); + MMX(d).b[6]=SaturatedSignedWordToUnsignedByte(s.s[2]); + MMX(d).b[7]=SaturatedSignedWordToUnsignedByte(s.s[3]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(punpckhbw_r64_rm64)(i386_state *cpustate) // Opcode 0f 68 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + MMX(d).b[0]=MMX(d).b[4]; + MMX(d).b[1]=MMX(s).b[4]; + MMX(d).b[2]=MMX(d).b[5]; + MMX(d).b[3]=MMX(s).b[5]; + MMX(d).b[4]=MMX(d).b[6]; + MMX(d).b[5]=MMX(s).b[6]; + MMX(d).b[6]=MMX(d).b[7]; + MMX(d).b[7]=MMX(s).b[7]; + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX(d).b[0]=MMX(d).b[4]; + MMX(d).b[1]=s.b[4]; + MMX(d).b[2]=MMX(d).b[5]; + MMX(d).b[3]=s.b[5]; + MMX(d).b[4]=MMX(d).b[6]; + MMX(d).b[5]=s.b[6]; + MMX(d).b[6]=MMX(d).b[7]; + MMX(d).b[7]=s.b[7]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(punpckhwd_r64_rm64)(i386_state *cpustate) // Opcode 0f 69 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + MMX(d).w[0]=MMX(d).w[2]; + MMX(d).w[1]=MMX(s).w[2]; + MMX(d).w[2]=MMX(d).w[3]; + MMX(d).w[3]=MMX(s).w[3]; + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX(d).w[0]=MMX(d).w[2]; + MMX(d).w[1]=s.w[2]; + MMX(d).w[2]=MMX(d).w[3]; + MMX(d).w[3]=s.w[3]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(punpckhdq_r64_rm64)(i386_state *cpustate) // Opcode 0f 6a +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + MMX(d).d[0]=MMX(d).d[1]; + MMX(d).d[1]=MMX(s).d[1]; + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX(d).d[0]=MMX(d).d[1]; + MMX(d).d[1]=s.d[1]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void MMXOP(packssdw_r64_rm64)(i386_state *cpustate) // Opcode 0f 6b +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + MMX(d).s[0]=SaturatedSignedDwordToSignedWord(MMX(d).i[0]); + MMX(d).s[1]=SaturatedSignedDwordToSignedWord(MMX(d).i[1]); + MMX(d).s[2]=SaturatedSignedDwordToSignedWord(MMX(s).i[0]); + MMX(d).s[3]=SaturatedSignedDwordToSignedWord(MMX(s).i[1]); + } else { + MMX_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX(d).s[0]=SaturatedSignedDwordToSignedWord(MMX(d).i[0]); + MMX(d).s[1]=SaturatedSignedDwordToSignedWord(MMX(d).i[1]); + MMX(d).s[2]=SaturatedSignedDwordToSignedWord(s.i[0]); + MMX(d).s[3]=SaturatedSignedDwordToSignedWord(s.i[1]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(sse_group0fae)(i386_state *cpustate) // Opcode 0f ae +{ + UINT8 modm = FETCH(cpustate); + if( modm == 0xf8 ) { + logerror("Unemulated SFENCE opcode called\n"); + CYCLES(cpustate,1); // sfence instruction + } else if( modm == 0xf0 ) { + CYCLES(cpustate,1); // mfence instruction + } else if( modm == 0xe8 ) { + CYCLES(cpustate,1); // lfence instruction + } else if( modm < 0xc0 ) { + UINT32 ea; + switch ( (modm & 0x38) >> 3 ) + { + case 2: // ldmxcsr m32 + ea = GetEA(cpustate, modm, 0); + cpustate->mxcsr = READ32(cpustate, ea); + break; + case 3: // stmxcsr m32 + ea = GetEA(cpustate, modm, 0); + WRITE32(cpustate, ea, cpustate->mxcsr); + break; + case 7: // clflush m8 + GetNonTranslatedEA(cpustate, modm, NULL); + break; + default: + report_invalid_modrm(cpustate, "sse_group0fae", modm); + } + } else { + report_invalid_modrm(cpustate, "sse_group0fae", modm); + } +} + +static void SSEOP(cvttps2dq_r128_rm128)(i386_state *cpustate) // Opcode f3 0f 5b +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).i[0]=(INT32)XMM(modrm & 0x7).f[0]; + XMM((modrm >> 3) & 0x7).i[1]=(INT32)XMM(modrm & 0x7).f[1]; + XMM((modrm >> 3) & 0x7).i[2]=(INT32)XMM(modrm & 0x7).f[2]; + XMM((modrm >> 3) & 0x7).i[3]=(INT32)XMM(modrm & 0x7).f[3]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).i[0]=(INT32)src.f[0]; + XMM((modrm >> 3) & 0x7).i[1]=(INT32)src.f[1]; + XMM((modrm >> 3) & 0x7).i[2]=(INT32)src.f[2]; + XMM((modrm >> 3) & 0x7).i[3]=(INT32)src.f[3]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cvtss2sd_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 5a +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f64[0] = XMM(modrm & 0x7).f[0]; + } else { + XMM_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + s.d[0] = READ32(cpustate, ea); + XMM((modrm >> 3) & 0x7).f64[0] = s.f[0]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cvttss2si_r32_r128m32)(i386_state *cpustate) // Opcode f3 0f 2c +{ + INT32 src; UINT8 modrm = FETCH(cpustate); // get mordm byte if( modrm >= 0xc0 ) { // if bits 7-6 are 11 the source is a xmm register (low doubleword) - src = XMM(modrm & 0x7).d[0^NATIVE_ENDIAN_VALUE_LE_BE(0,1)]; + src = (INT32)XMM(modrm & 0x7).f[0^NATIVE_ENDIAN_VALUE_LE_BE(0,1)]; } else { // otherwise is a memory address + XMM_REG t; UINT32 ea = GetEA(cpustate, modrm, 0); - src = READ32(cpustate, ea); + t.d[0] = READ32(cpustate, ea); + src = (INT32)t.f[0]; + } + STORE_REG32(modrm, (UINT32)src); + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cvtss2si_r32_r128m32)(i386_state *cpustate) // Opcode f3 0f 2d +{ + INT32 src; + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + src = (INT32)XMM(modrm & 0x7).f[0]; + } else { + XMM_REG t; + UINT32 ea = GetEA(cpustate, modrm, 0); + t.d[0] = READ32(cpustate, ea); + src = (INT32)t.f[0]; + } + STORE_REG32(modrm, (UINT32)src); + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cvtsi2ss_r128_rm32)(i386_state *cpustate) // Opcode f3 0f 2a +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = (INT32)LOAD_RM32(modrm); + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + XMM((modrm >> 3) & 0x7).f[0] = (INT32)READ32(cpustate, ea); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cvtpi2ps_r128_rm64)(i386_state *cpustate) // Opcode 0f 2a +{ + UINT8 modrm = FETCH(cpustate); + MMXPROLOG(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = MMX(modrm & 0x7).i[0]; + XMM((modrm >> 3) & 0x7).f[1] = MMX(modrm & 0x7).i[1]; + } else { + MMX_REG r; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, r); + XMM((modrm >> 3) & 0x7).f[0] = r.i[0]; + XMM((modrm >> 3) & 0x7).f[1] = r.i[1]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cvttps2pi_r64_r128m64)(i386_state *cpustate) // Opcode 0f 2c +{ + UINT8 modrm = FETCH(cpustate); + MMXPROLOG(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0]; + MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1]; + } else { + XMM_REG r; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, r); + XMM((modrm >> 3) & 0x7).i[0] = r.f[0]; + XMM((modrm >> 3) & 0x7).i[1] = r.f[1]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cvtps2pi_r64_r128m64)(i386_state *cpustate) // Opcode 0f 2d +{ + UINT8 modrm = FETCH(cpustate); + MMXPROLOG(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0]; + MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1]; + } else { + XMM_REG r; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, r); + XMM((modrm >> 3) & 0x7).i[0] = r.f[0]; + XMM((modrm >> 3) & 0x7).i[1] = r.f[1]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cvtps2pd_r128_r128m64)(i386_state *cpustate) // Opcode 0f 5a +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f64[0] = (double)XMM(modrm & 0x7).f[0]; + XMM((modrm >> 3) & 0x7).f64[1] = (double)XMM(modrm & 0x7).f[1]; + } else { + MMX_REG r; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, r); + XMM((modrm >> 3) & 0x7).f64[0] = (double)r.f[0]; + XMM((modrm >> 3) & 0x7).f64[1] = (double)r.f[1]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cvtdq2ps_r128_rm128)(i386_state *cpustate) // Opcode 0f 5b +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = (float)XMM(modrm & 0x7).i[0]; + XMM((modrm >> 3) & 0x7).f[1] = (float)XMM(modrm & 0x7).i[1]; + XMM((modrm >> 3) & 0x7).f[2] = (float)XMM(modrm & 0x7).i[2]; + XMM((modrm >> 3) & 0x7).f[3] = (float)XMM(modrm & 0x7).i[3]; + } else { + XMM_REG r; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, r); + XMM((modrm >> 3) & 0x7).f[0] = (float)r.i[0]; + XMM((modrm >> 3) & 0x7).f[1] = (float)r.i[1]; + XMM((modrm >> 3) & 0x7).f[2] = (float)r.i[2]; + XMM((modrm >> 3) & 0x7).f[3] = (float)r.i[3]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cvtdq2pd_r128_r128m64)(i386_state *cpustate) // Opcode f3 0f e6 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f64[0] = (double)XMM(modrm & 0x7).i[0]; + XMM((modrm >> 3) & 0x7).f64[1] = (double)XMM(modrm & 0x7).i[1]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + XMM((modrm >> 3) & 0x7).f64[0] = (double)s.i[0]; + XMM((modrm >> 3) & 0x7).f64[1] = (double)s.i[1]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movss_r128_rm128)(i386_state *cpustate) // Opcode f3 0f 10 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[0]; + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + XMM((modrm >> 3) & 0x7).d[0] = READ32(cpustate, ea); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movss_rm128_r128)(i386_state *cpustate) // Opcode f3 0f 11 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM(modrm & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0]; + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITE32(cpustate, ea, XMM((modrm >> 3) & 0x7).d[0]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movsldup_r128_rm128)(i386_state *cpustate) // Opcode f3 0f 12 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[0]; + XMM((modrm >> 3) & 0x7).d[1] = XMM(modrm & 0x7).d[0]; + XMM((modrm >> 3) & 0x7).d[2] = XMM(modrm & 0x7).d[2]; + XMM((modrm >> 3) & 0x7).d[3] = XMM(modrm & 0x7).d[2]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).d[0] = src.d[0]; + XMM((modrm >> 3) & 0x7).d[1] = src.d[0]; + XMM((modrm >> 3) & 0x7).d[2] = src.d[2]; + XMM((modrm >> 3) & 0x7).d[3] = src.d[2]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movshdup_r128_rm128)(i386_state *cpustate) // Opcode f3 0f 16 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[1]; + XMM((modrm >> 3) & 0x7).d[1] = XMM(modrm & 0x7).d[1]; + XMM((modrm >> 3) & 0x7).d[2] = XMM(modrm & 0x7).d[3]; + XMM((modrm >> 3) & 0x7).d[3] = XMM(modrm & 0x7).d[3]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).d[0] = src.d[1]; + XMM((modrm >> 3) & 0x7).d[1] = src.d[1]; + XMM((modrm >> 3) & 0x7).d[2] = src.d[3]; + XMM((modrm >> 3) & 0x7).d[3] = src.d[3]; } - STORE_REG32(modrm, (INT32)FPU_INT32_SINGLE(src)); - // TODO CYCLES(cpustate,1); // TODO: correct cycle count } @@ -260,6 +2356,7 @@ static void SSEOP(movaps_r128_rm128)(i386_state *cpustate) // Opcode 0f 28 UINT32 ea = GetEA(cpustate, modrm, 0); READXMM(cpustate, ea, XMM((modrm >> 3) & 0x7)); } + CYCLES(cpustate,1); // TODO: correct cycle count } static void SSEOP(movaps_rm128_r128)(i386_state *cpustate) // Opcode 0f 29 @@ -271,6 +2368,237 @@ static void SSEOP(movaps_rm128_r128)(i386_state *cpustate) // Opcode 0f 29 UINT32 ea = GetEA(cpustate, modrm, 0); WRITEXMM(cpustate, ea, XMM((modrm >> 3) & 0x7)); } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movups_r128_rm128)(i386_state *cpustate) // Opcode 0f 10 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7); + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movups_rm128_r128)(i386_state *cpustate) // Opcode 0f 11 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7); + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITEXMM(cpustate, ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movlps_r128_m64)(i386_state *cpustate) // Opcode 0f 12 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + // unsupported by cpu + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM_LO64(cpustate, ea, XMM((modrm >> 3) & 0x7)); + CYCLES(cpustate,1); // TODO: correct cycle count + } +} + +static void SSEOP(movlps_m64_r128)(i386_state *cpustate) // Opcode 0f 13 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + // unsupported by cpu + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITEXMM_LO64(cpustate, ea, XMM((modrm >> 3) & 0x7)); + CYCLES(cpustate,1); // TODO: correct cycle count + } +} + +static void SSEOP(movhps_r128_m64)(i386_state *cpustate) // Opcode 0f 16 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + // unsupported by cpu + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM_HI64(cpustate, ea, XMM((modrm >> 3) & 0x7)); + CYCLES(cpustate,1); // TODO: correct cycle count + } +} + +static void SSEOP(movhps_m64_r128)(i386_state *cpustate) // Opcode 0f 17 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + // unsupported by cpu + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITEXMM_HI64(cpustate,ea, XMM((modrm >> 3) & 0x7)); + CYCLES(cpustate,1); // TODO: correct cycle count + } +} + +static void SSEOP(movntps_m128_r128)(i386_state *cpustate) // Opcode 0f 2b +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + // unsupported by cpu + CYCLES(cpustate,1); // TODO: correct cycle count + } else { + // since cache is not implemented + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITEXMM(cpustate, ea, XMM((modrm >> 3) & 0x7)); + CYCLES(cpustate,1); // TODO: correct cycle count + } +} + +static void SSEOP(movmskps_r16_r128)(i386_state *cpustate) // Opcode 0f 50 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int b; + b=(XMM(modrm & 0x7).d[0] >> 31) & 1; + b=b | ((XMM(modrm & 0x7).d[1] >> 30) & 2); + b=b | ((XMM(modrm & 0x7).d[2] >> 29) & 4); + b=b | ((XMM(modrm & 0x7).d[3] >> 28) & 8); + STORE_REG16(modrm, b); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movmskps_r32_r128)(i386_state *cpustate) // Opcode 0f 50 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int b; + b=(XMM(modrm & 0x7).d[0] >> 31) & 1; + b=b | ((XMM(modrm & 0x7).d[1] >> 30) & 2); + b=b | ((XMM(modrm & 0x7).d[2] >> 29) & 4); + b=b | ((XMM(modrm & 0x7).d[3] >> 28) & 8); + STORE_REG32(modrm, b); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movq2dq_r128_r64)(i386_state *cpustate) // Opcode f3 0f d6 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).q[0] = MMX(modrm & 7).q; + XMM((modrm >> 3) & 0x7).q[1] = 0; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movdqu_r128_rm128)(i386_state *cpustate) // Opcode f3 0f 6f +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0]; + XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[1]; + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, XMM((modrm >> 3) & 0x7)); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movdqu_rm128_r128)(i386_state *cpustate) // Opcode f3 0f 7f +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0]; + XMM(modrm & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1]; + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + WRITEXMM(cpustate, ea, XMM((modrm >> 3) & 0x7)); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(movq_r128_r128m64)(i386_state *cpustate) // Opcode f3 0f 7e +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0]; + XMM((modrm >> 3) & 0x7).q[1] = 0; + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + XMM((modrm >> 3) & 0x7).q[0] = READ64(cpustate,ea); + XMM((modrm >> 3) & 0x7).q[1] = 0; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pmovmskb_r16_r64)(i386_state *cpustate) // Opcode 0f d7 +{ + //MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int b; + b=(MMX(modrm & 0x7).b[0] >> 7) & 1; + b=b | ((MMX(modrm & 0x7).b[1] >> 6) & 2); + b=b | ((MMX(modrm & 0x7).b[2] >> 5) & 4); + b=b | ((MMX(modrm & 0x7).b[3] >> 4) & 8); + b=b | ((MMX(modrm & 0x7).b[4] >> 3) & 16); + b=b | ((MMX(modrm & 0x7).b[5] >> 2) & 32); + b=b | ((MMX(modrm & 0x7).b[6] >> 1) & 64); + b=b | ((MMX(modrm & 0x7).b[7] >> 0) & 128); + STORE_REG16(modrm, b); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pmovmskb_r32_r64)(i386_state *cpustate) // Opcode 0f d7 +{ + //MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int b; + b=(MMX(modrm & 0x7).b[0] >> 7) & 1; + b=b | ((MMX(modrm & 0x7).b[1] >> 6) & 2); + b=b | ((MMX(modrm & 0x7).b[2] >> 5) & 4); + b=b | ((MMX(modrm & 0x7).b[3] >> 4) & 8); + b=b | ((MMX(modrm & 0x7).b[4] >> 3) & 16); + b=b | ((MMX(modrm & 0x7).b[5] >> 2) & 32); + b=b | ((MMX(modrm & 0x7).b[6] >> 1) & 64); + b=b | ((MMX(modrm & 0x7).b[7] >> 0) & 128); + STORE_REG32(modrm, b); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(xorps)(i386_state *cpustate) // Opcode 0f 57 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0] ^ XMM(modrm & 0x7).d[0]; + XMM((modrm >> 3) & 0x7).d[1] = XMM((modrm >> 3) & 0x7).d[1] ^ XMM(modrm & 0x7).d[1]; + XMM((modrm >> 3) & 0x7).d[2] = XMM((modrm >> 3) & 0x7).d[2] ^ XMM(modrm & 0x7).d[2]; + XMM((modrm >> 3) & 0x7).d[3] = XMM((modrm >> 3) & 0x7).d[3] ^ XMM(modrm & 0x7).d[3]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0] ^ src.d[0]; + XMM((modrm >> 3) & 0x7).d[1] = XMM((modrm >> 3) & 0x7).d[1] ^ src.d[1]; + XMM((modrm >> 3) & 0x7).d[2] = XMM((modrm >> 3) & 0x7).d[2] ^ src.d[2]; + XMM((modrm >> 3) & 0x7).d[3] = XMM((modrm >> 3) & 0x7).d[3] ^ src.d[3]; + } + CYCLES(cpustate,1); // TODO: correct cycle count } static void SSEOP(addps)(i386_state *cpustate) // Opcode 0f 58 @@ -290,9 +2618,118 @@ static void SSEOP(addps)(i386_state *cpustate) // Opcode 0f 58 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] + src.f[2]; XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] + src.f[3]; } + CYCLES(cpustate,1); // TODO: correct cycle count } -static void SSEOP(mulps)(i386_state *cpustate) // Opcode 0f 59 +static void SSEOP(sqrtps_r128_rm128)(i386_state *cpustate) // Opcode 0f 51 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = sqrt(XMM(modrm & 0x7).f[0]); + XMM((modrm >> 3) & 0x7).f[1] = sqrt(XMM(modrm & 0x7).f[1]); + XMM((modrm >> 3) & 0x7).f[2] = sqrt(XMM(modrm & 0x7).f[2]); + XMM((modrm >> 3) & 0x7).f[3] = sqrt(XMM(modrm & 0x7).f[3]); + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = sqrt(src.f[0]); + XMM((modrm >> 3) & 0x7).f[1] = sqrt(src.f[1]); + XMM((modrm >> 3) & 0x7).f[2] = sqrt(src.f[2]); + XMM((modrm >> 3) & 0x7).f[3] = sqrt(src.f[3]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(rsqrtps_r128_rm128)(i386_state *cpustate) // Opcode 0f 52 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(XMM(modrm & 0x7).f[0]); + XMM((modrm >> 3) & 0x7).f[1] = 1.0 / sqrt(XMM(modrm & 0x7).f[1]); + XMM((modrm >> 3) & 0x7).f[2] = 1.0 / sqrt(XMM(modrm & 0x7).f[2]); + XMM((modrm >> 3) & 0x7).f[3] = 1.0 / sqrt(XMM(modrm & 0x7).f[3]); + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(src.f[0]); + XMM((modrm >> 3) & 0x7).f[1] = 1.0 / sqrt(src.f[1]); + XMM((modrm >> 3) & 0x7).f[2] = 1.0 / sqrt(src.f[2]); + XMM((modrm >> 3) & 0x7).f[3] = 1.0 / sqrt(src.f[3]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(rcpps_r128_rm128)(i386_state *cpustate) // Opcode 0f 53 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = 1.0 / XMM(modrm & 0x7).f[0]; + XMM((modrm >> 3) & 0x7).f[1] = 1.0 / XMM(modrm & 0x7).f[1]; + XMM((modrm >> 3) & 0x7).f[2] = 1.0 / XMM(modrm & 0x7).f[2]; + XMM((modrm >> 3) & 0x7).f[3] = 1.0 / XMM(modrm & 0x7).f[3]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = 1.0 / src.f[0]; + XMM((modrm >> 3) & 0x7).f[1] = 1.0 / src.f[1]; + XMM((modrm >> 3) & 0x7).f[2] = 1.0 / src.f[2]; + XMM((modrm >> 3) & 0x7).f[3] = 1.0 / src.f[3]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(andps_r128_rm128)(i386_state *cpustate) // Opcode 0f 54 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 0x7).q[0]; + XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 0x7).q[1]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & src.q[0]; + XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & src.q[1]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(andnps_r128_rm128)(i386_state *cpustate) // Opcode 0f 55 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 0x7).q[0]; + XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 0x7).q[1]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & src.q[0]; + XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & src.q[1]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(orps_r128_rm128)(i386_state *cpustate) // Opcode 0f 56 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 0x7).q[0]; + XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 0x7).q[1]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | src.q[0]; + XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | src.q[1]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(mulps)(i386_state *cpustate) // Opcode 0f 59 ???? { UINT8 modrm = FETCH(cpustate); if( modrm >= 0xc0 ) { @@ -309,6 +2746,313 @@ static void SSEOP(mulps)(i386_state *cpustate) // Opcode 0f 59 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] * src.f[2]; XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] * src.f[3]; } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(subps)(i386_state *cpustate) // Opcode 0f 5c +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0]; + XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] - XMM(modrm & 0x7).f[1]; + XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] - XMM(modrm & 0x7).f[2]; + XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] - XMM(modrm & 0x7).f[3]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - src.f[0]; + XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] - src.f[1]; + XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] - src.f[2]; + XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] - src.f[3]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +INLINE float sse_min_single(float src1, float src2) +{ + /*if ((src1 == 0) && (src2 == 0)) + return src2; + if (src1 = SNaN) + return src2; + if (src2 = SNaN) + return src2;*/ + if (src1 < src2) + return src1; + return src2; +} + +static void SSEOP(minps)(i386_state *cpustate) // Opcode 0f 5d +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = sse_min_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]); + XMM((modrm >> 3) & 0x7).f[1] = sse_min_single(XMM((modrm >> 3) & 0x7).f[1], XMM(modrm & 0x7).f[1]); + XMM((modrm >> 3) & 0x7).f[2] = sse_min_single(XMM((modrm >> 3) & 0x7).f[2], XMM(modrm & 0x7).f[2]); + XMM((modrm >> 3) & 0x7).f[3] = sse_min_single(XMM((modrm >> 3) & 0x7).f[3], XMM(modrm & 0x7).f[3]); + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = sse_min_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]); + XMM((modrm >> 3) & 0x7).f[1] = sse_min_single(XMM((modrm >> 3) & 0x7).f[1], src.f[1]); + XMM((modrm >> 3) & 0x7).f[2] = sse_min_single(XMM((modrm >> 3) & 0x7).f[2], src.f[2]); + XMM((modrm >> 3) & 0x7).f[3] = sse_min_single(XMM((modrm >> 3) & 0x7).f[3], src.f[3]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(divps)(i386_state *cpustate) // Opcode 0f 5e +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / XMM(modrm & 0x7).f[0]; + XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] / XMM(modrm & 0x7).f[1]; + XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] / XMM(modrm & 0x7).f[2]; + XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] / XMM(modrm & 0x7).f[3]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / src.f[0]; + XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] / src.f[1]; + XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] / src.f[2]; + XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] / src.f[3]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +INLINE float sse_max_single(float src1, float src2) +{ + /*if ((src1 == 0) && (src2 == 0)) + return src2; + if (src1 = SNaN) + return src2; + if (src2 = SNaN) + return src2;*/ + if (src1 > src2) + return src1; + return src2; +} + +static void SSEOP(maxps)(i386_state *cpustate) // Opcode 0f 5f +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]); + XMM((modrm >> 3) & 0x7).f[1] = sse_max_single(XMM((modrm >> 3) & 0x7).f[1], XMM(modrm & 0x7).f[1]); + XMM((modrm >> 3) & 0x7).f[2] = sse_max_single(XMM((modrm >> 3) & 0x7).f[2], XMM(modrm & 0x7).f[2]); + XMM((modrm >> 3) & 0x7).f[3] = sse_max_single(XMM((modrm >> 3) & 0x7).f[3], XMM(modrm & 0x7).f[3]); + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]); + XMM((modrm >> 3) & 0x7).f[1] = sse_max_single(XMM((modrm >> 3) & 0x7).f[1], src.f[1]); + XMM((modrm >> 3) & 0x7).f[2] = sse_max_single(XMM((modrm >> 3) & 0x7).f[2], src.f[2]); + XMM((modrm >> 3) & 0x7).f[3] = sse_max_single(XMM((modrm >> 3) & 0x7).f[3], src.f[3]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(maxss_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 5f +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]); + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + src.d[0]=READ32(cpustate, ea); + XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(addss)(i386_state *cpustate) // Opcode f3 0f 58 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + XMM(modrm & 0x7).f[0]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + src.f[0]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(subss)(i386_state *cpustate) // Opcode f3 0f 5c +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - src.f[0]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(mulss)(i386_state *cpustate) // Opcode f3 0f 5e +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * XMM(modrm & 0x7).f[0]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * src.f[0]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(divss)(i386_state *cpustate) // Opcode 0f 59 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / XMM(modrm & 0x7).f[0]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / src.f[0]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(rcpss_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 53 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = 1.0 / XMM(modrm & 0x7).f[0]; + } else { + XMM_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + s.d[0]=READ32(cpustate, ea); + XMM((modrm >> 3) & 0x7).f[0] = 1.0 / s.f[0]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(sqrtss_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 51 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = sqrt(XMM(modrm & 0x7).f[0]); + } else { + XMM_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + s.d[0]=READ32(cpustate, ea); + XMM((modrm >> 3) & 0x7).f[0] = sqrt(s.f[0]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(rsqrtss_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 52 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(XMM(modrm & 0x7).f[0]); + } else { + XMM_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + s.d[0]=READ32(cpustate, ea); + XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(s.f[0]); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(minss_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 5d +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] < XMM(modrm & 0x7).f[0] ? XMM((modrm >> 3) & 0x7).f[0] : XMM(modrm & 0x7).f[0]; + } else { + XMM_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + s.d[0] = READ32(cpustate, ea); + XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] < s.f[0] ? XMM((modrm >> 3) & 0x7).f[0] : s.f[0]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(comiss_r128_r128m32)(i386_state *cpustate) // Opcode 0f 2f +{ + float32 a,b; + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + a = XMM((modrm >> 3) & 0x7).d[0]; + b = XMM(modrm & 0x7).d[0]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + a = XMM((modrm >> 3) & 0x7).d[0]; + b = src.d[0]; + } + cpustate->OF=0; + cpustate->SF=0; + cpustate->AF=0; + if (float32_is_nan(a) || float32_is_nan(b)) + { + cpustate->ZF = 1; + cpustate->PF = 1; + cpustate->CF = 1; + } + else + { + cpustate->ZF = 0; + cpustate->PF = 0; + cpustate->CF = 0; + if (float32_eq(a, b)) + cpustate->ZF = 1; + if (float32_lt(a, b)) + cpustate->CF = 1; + } + // should generate exception when at least one of the operands is either QNaN or SNaN + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(ucomiss_r128_r128m32)(i386_state *cpustate) // Opcode 0f 2e +{ + float32 a,b; + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + a = XMM((modrm >> 3) & 0x7).d[0]; + b = XMM(modrm & 0x7).d[0]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + a = XMM((modrm >> 3) & 0x7).d[0]; + b = src.d[0]; + } + cpustate->OF=0; + cpustate->SF=0; + cpustate->AF=0; + if (float32_is_nan(a) || float32_is_nan(b)) + { + cpustate->ZF = 1; + cpustate->PF = 1; + cpustate->CF = 1; + } + else + { + cpustate->ZF = 0; + cpustate->PF = 0; + cpustate->CF = 0; + if (float32_eq(a, b)) + cpustate->ZF = 1; + if (float32_lt(a, b)) + cpustate->CF = 1; + } + // should generate exception when at least one of the operands is SNaN + CYCLES(cpustate,1); // TODO: correct cycle count } static void SSEOP(shufps)(i386_state *cpustate) // Opcode 0f 67 @@ -341,11 +3085,464 @@ static void SSEOP(shufps)(i386_state *cpustate) // Opcode 0f 67 XMM(d).d[2]=src.d[m3]; XMM(d).d[3]=src.d[m4]; } -} - -static void MMXOP(emms)(i386_state *cpustate) // Opcode 0f 77 -{ - cpustate->x87_tw = 0xffff; // tag word = 0xffff - // TODO + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(unpcklps_r128_rm128)(i386_state *cpustate) // Opcode 0f 14 +{ + UINT8 modrm = FETCH(cpustate); + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + if( modrm >= 0xc0 ) { + XMM(d).d[3]=XMM(s).d[1]; + XMM(d).d[2]=XMM(d).d[1]; + XMM(d).d[1]=XMM(s).d[0]; + //XMM(d).d[0]=XMM(d).d[0]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM(d).d[3]=src.d[1]; + XMM(d).d[2]=XMM(d).d[1]; + XMM(d).d[1]=src.d[0]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(unpckhps_r128_rm128)(i386_state *cpustate) // Opcode 0f 15 +{ + UINT8 modrm = FETCH(cpustate); + int s,d; + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + if( modrm >= 0xc0 ) { + XMM(d).d[0]=XMM(d).d[2]; + XMM(d).d[1]=XMM(s).d[2]; + XMM(d).d[2]=XMM(d).d[3]; + XMM(d).d[3]=XMM(s).d[3]; + } else { + XMM_REG src; + UINT32 ea = GetEA(cpustate, modrm, 0); + READXMM(cpustate, ea, src); + XMM(d).d[0]=XMM(d).d[2]; + XMM(d).d[1]=src.d[2]; + XMM(d).d[2]=XMM(d).d[3]; + XMM(d).d[3]=src.d[3]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +INLINE bool sse_issingleordered(float op1, float op2) +{ + // TODO: true when at least one of the two source operands being compared is a NaN + return (op1 != op1) || (op1 != op2); +} + +INLINE bool sse_issingleunordered(float op1, float op2) +{ + // TODO: true when neither source operand is a NaN + return !((op1 != op1) || (op1 != op2)); +} + +INLINE void sse_predicate_compare_single(UINT8 imm8, XMM_REG d, XMM_REG s) +{ + switch (imm8 & 7) + { + case 0: + s.d[0]=s.f[0] == s.f[0] ? 0xffffffff : 0; + d.d[1]=d.f[1] == s.f[1] ? 0xffffffff : 0; + d.d[2]=d.f[2] == s.f[2] ? 0xffffffff : 0; + d.d[3]=d.f[3] == s.f[3] ? 0xffffffff : 0; + break; + case 1: + d.d[0]=d.f[0] < s.f[0] ? 0xffffffff : 0; + d.d[1]=d.f[1] < s.f[1] ? 0xffffffff : 0; + d.d[2]=d.f[2] < s.f[2] ? 0xffffffff : 0; + d.d[3]=d.f[3] < s.f[3] ? 0xffffffff : 0; + break; + case 2: + d.d[0]=d.f[0] <= s.f[0] ? 0xffffffff : 0; + d.d[1]=d.f[1] <= s.f[1] ? 0xffffffff : 0; + d.d[2]=d.f[2] <= s.f[2] ? 0xffffffff : 0; + d.d[3]=d.f[3] <= s.f[3] ? 0xffffffff : 0; + break; + case 3: + d.d[0]=sse_issingleunordered(d.f[0], s.f[0]) ? 0xffffffff : 0; + d.d[1]=sse_issingleunordered(d.f[1], s.f[1]) ? 0xffffffff : 0; + d.d[2]=sse_issingleunordered(d.f[2], s.f[2]) ? 0xffffffff : 0; + d.d[3]=sse_issingleunordered(d.f[3], s.f[3]) ? 0xffffffff : 0; + break; + case 4: + d.d[0]=d.f[0] != s.f[0] ? 0xffffffff : 0; + d.d[1]=d.f[1] != s.f[1] ? 0xffffffff : 0; + d.d[2]=d.f[2] != s.f[2] ? 0xffffffff : 0; + d.d[3]=d.f[3] != s.f[3] ? 0xffffffff : 0; + break; + case 5: + d.d[0]=d.f[0] < s.f[0] ? 0 : 0xffffffff; + d.d[1]=d.f[1] < s.f[1] ? 0 : 0xffffffff; + d.d[2]=d.f[2] < s.f[2] ? 0 : 0xffffffff; + d.d[3]=d.f[3] < s.f[3] ? 0 : 0xffffffff; + break; + case 6: + d.d[0]=d.f[0] <= s.f[0] ? 0 : 0xffffffff; + d.d[1]=d.f[1] <= s.f[1] ? 0 : 0xffffffff; + d.d[2]=d.f[2] <= s.f[2] ? 0 : 0xffffffff; + d.d[3]=d.f[3] <= s.f[3] ? 0 : 0xffffffff; + break; + case 7: + d.d[0]=sse_issingleordered(d.f[0], s.f[0]) ? 0xffffffff : 0; + d.d[1]=sse_issingleordered(d.f[1], s.f[1]) ? 0xffffffff : 0; + d.d[2]=sse_issingleordered(d.f[2], s.f[2]) ? 0xffffffff : 0; + d.d[3]=sse_issingleordered(d.f[3], s.f[3]) ? 0xffffffff : 0; + break; + } +} + +INLINE void sse_predicate_compare_single_scalar(UINT8 imm8, XMM_REG d, XMM_REG s) +{ + switch (imm8 & 7) + { + case 0: + s.d[0]=s.f[0] == s.f[0] ? 0xffffffff : 0; + break; + case 1: + d.d[0]=d.f[0] < s.f[0] ? 0xffffffff : 0; + break; + case 2: + d.d[0]=d.f[0] <= s.f[0] ? 0xffffffff : 0; + break; + case 3: + d.d[0]=sse_issingleunordered(d.f[0], s.f[0]) ? 0xffffffff : 0; + break; + case 4: + d.d[0]=d.f[0] != s.f[0] ? 0xffffffff : 0; + break; + case 5: + d.d[0]=d.f[0] < s.f[0] ? 0 : 0xffffffff; + break; + case 6: + d.d[0]=d.f[0] <= s.f[0] ? 0 : 0xffffffff; + break; + case 7: + d.d[0]=sse_issingleordered(d.f[0], s.f[0]) ? 0xffffffff : 0; + break; + } +} + +static void SSEOP(cmpps_r128_rm128_i8)(i386_state *cpustate) // Opcode 0f c2 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + UINT8 imm8 = FETCH(cpustate); + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + sse_predicate_compare_single(imm8, XMM(d), XMM(s)); + } else { + int d; + XMM_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + UINT8 imm8 = FETCH(cpustate); + READXMM(cpustate, ea, s); + d=(modrm >> 3) & 0x7; + sse_predicate_compare_single(imm8, XMM(d), s); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(cmpss_r128_r128m32_i8)(i386_state *cpustate) // Opcode f3 0f c2 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + int s,d; + UINT8 imm8 = FETCH(cpustate); + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + sse_predicate_compare_single_scalar(imm8, XMM(d), XMM(s)); + } else { + int d; + XMM_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + UINT8 imm8 = FETCH(cpustate); + s.d[0]=READ32(cpustate, ea); + d=(modrm >> 3) & 0x7; + sse_predicate_compare_single_scalar(imm8, XMM(d), s); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pinsrw_r64_r16m16_i8)(i386_state *cpustate) // Opcode 0f c4 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + UINT8 imm8 = FETCH(cpustate); + UINT16 v = LOAD_RM16(modrm); + MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v; + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + UINT8 imm8 = FETCH(cpustate); + UINT16 v = READ16(cpustate, ea); + MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pinsrw_r64_r32m16_i8)(i386_state *cpustate) // Opcode 0f c4 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + UINT8 imm8 = FETCH(cpustate); + UINT16 v = (UINT16)LOAD_RM32(modrm); + MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v; + } else { + UINT32 ea = GetEA(cpustate, modrm, 0); + UINT8 imm8 = FETCH(cpustate); + UINT16 v = READ16(cpustate, ea); + MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pextrw_r16_r64_i8)(i386_state *cpustate) // Opcode 0f c5 +{ + //MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + UINT8 imm8 = FETCH(cpustate); + STORE_REG16(modrm, MMX(modrm & 0x7).w[imm8 & 3]); + } else { + //UINT8 imm8 = FETCH(cpustate); + report_invalid_modrm(cpustate, "pextrw_r16_r64_i8", modrm); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pextrw_r32_r64_i8)(i386_state *cpustate) // Opcode 0f c5 +{ + //MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + UINT8 imm8 = FETCH(cpustate); + STORE_REG32(modrm, MMX(modrm & 0x7).w[imm8 & 3]); + } else { + //UINT8 imm8 = FETCH(cpustate); + report_invalid_modrm(cpustate, "pextrw_r32_r64_i8", modrm); + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pminub_r64_rm64)(i386_state *cpustate) // Opcode 0f da +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] < MMX(modrm & 0x7).b[n] ? MMX((modrm >> 3) & 0x7).b[n] : MMX(modrm & 0x7).b[n]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] < s.b[n] ? MMX((modrm >> 3) & 0x7).b[n] : s.b[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pmaxub_r64_rm64)(i386_state *cpustate) // Opcode 0f de +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] > MMX(modrm & 0x7).b[n] ? MMX((modrm >> 3) & 0x7).b[n] : MMX(modrm & 0x7).b[n]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] > s.b[n] ? MMX((modrm >> 3) & 0x7).b[n] : s.b[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pavgb_r64_rm64)(i386_state *cpustate) // Opcode 0f e0 +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n] = ((UINT16)MMX((modrm >> 3) & 0x7).b[n] + (UINT16)MMX(modrm & 0x7).b[n] + 1) >> 1; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 8;n++) + MMX((modrm >> 3) & 0x7).b[n] = ((UINT16)MMX((modrm >> 3) & 0x7).b[n] + (UINT16)s.b[n] + 1) >> 1; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pavgw_r64_rm64)(i386_state *cpustate) // Opcode 0f e3 +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).w[n] = ((UINT32)MMX((modrm >> 3) & 0x7).w[n] + (UINT32)MMX(modrm & 0x7).w[n] + 1) >> 1; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).w[n] = ((UINT32)MMX((modrm >> 3) & 0x7).w[n] + (UINT32)s.w[n] + 1) >> 1; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pmulhuw_r64_rm64)(i386_state *cpustate) // Opcode 0f e4 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).w[0]=((UINT32)MMX((modrm >> 3) & 0x7).w[0]*(UINT32)MMX(modrm & 7).w[0]) >> 16; + MMX((modrm >> 3) & 0x7).w[1]=((UINT32)MMX((modrm >> 3) & 0x7).w[1]*(UINT32)MMX(modrm & 7).w[1]) >> 16; + MMX((modrm >> 3) & 0x7).w[2]=((UINT32)MMX((modrm >> 3) & 0x7).w[2]*(UINT32)MMX(modrm & 7).w[2]) >> 16; + MMX((modrm >> 3) & 0x7).w[3]=((UINT32)MMX((modrm >> 3) & 0x7).w[3]*(UINT32)MMX(modrm & 7).w[3]) >> 16; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX((modrm >> 3) & 0x7).w[0]=((UINT32)MMX((modrm >> 3) & 0x7).w[0]*(UINT32)s.w[0]) >> 16; + MMX((modrm >> 3) & 0x7).w[1]=((UINT32)MMX((modrm >> 3) & 0x7).w[1]*(UINT32)s.w[1]) >> 16; + MMX((modrm >> 3) & 0x7).w[2]=((UINT32)MMX((modrm >> 3) & 0x7).w[2]*(UINT32)s.w[2]) >> 16; + MMX((modrm >> 3) & 0x7).w[3]=((UINT32)MMX((modrm >> 3) & 0x7).w[3]*(UINT32)s.w[3]) >> 16; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pminsw_r64_rm64)(i386_state *cpustate) // Opcode 0f ea +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] < MMX(modrm & 0x7).s[n] ? MMX((modrm >> 3) & 0x7).s[n] : MMX(modrm & 0x7).s[n]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] < s.s[n] ? MMX((modrm >> 3) & 0x7).s[n] : s.s[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pmaxsw_r64_rm64)(i386_state *cpustate) // Opcode 0f ee +{ + int n; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] > MMX(modrm & 0x7).s[n] ? MMX((modrm >> 3) & 0x7).s[n] : MMX(modrm & 0x7).s[n]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + for (n=0;n < 4;n++) + MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] > s.s[n] ? MMX((modrm >> 3) & 0x7).s[n] : s.s[n]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pmuludq_r64_rm64)(i386_state *cpustate) // Opcode 0f f4 +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).q = (UINT64)MMX((modrm >> 3) & 0x7).d[0] * (UINT64)MMX(modrm & 0x7).d[0]; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX((modrm >> 3) & 0x7).q = (UINT64)MMX((modrm >> 3) & 0x7).d[0] * (UINT64)s.d[0]; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(psadbw_r64_rm64)(i386_state *cpustate) // Opcode 0f f6 +{ + int n; + INT32 temp; + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + temp=0; + for (n=0;n < 8;n++) + temp += abs((INT32)MMX((modrm >> 3) & 0x7).b[n] - (INT32)MMX(modrm & 0x7).b[n]); + MMX((modrm >> 3) & 0x7).l=(UINT64)temp & 0xffff; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + temp=0; + for (n=0;n < 8;n++) + temp += abs((INT32)MMX((modrm >> 3) & 0x7).b[n] - (INT32)s.b[n]); + MMX((modrm >> 3) & 0x7).l=(UINT64)temp & 0xffff; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(psubq_r64_rm64)(i386_state *cpustate) // Opcode 0f fb +{ + MMXPROLOG(cpustate); + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q - MMX(modrm & 7).q; + } else { + MMX_REG s; + UINT32 ea = GetEA(cpustate, modrm, 0); + READMMX(cpustate, ea, s); + MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q - s.q; + } + CYCLES(cpustate,1); // TODO: correct cycle count +} + +static void SSEOP(pshufhw_r128_rm128_i8)(i386_state *cpustate) // Opcode f3 0f 70 +{ + UINT8 modrm = FETCH(cpustate); + if( modrm >= 0xc0 ) { + XMM_REG t; + int s,d; + UINT8 imm8 = FETCH(cpustate); + s=modrm & 0x7; + d=(modrm >> 3) & 0x7; + t.q[0]=XMM(s).q[1]; + XMM(d).q[0]=XMM(s).q[0]; + XMM(d).w[4]=t.w[imm8 & 3]; + XMM(d).w[5]=t.w[(imm8 >> 2) & 3]; + XMM(d).w[6]=t.w[(imm8 >> 4) & 3]; + XMM(d).w[7]=t.w[(imm8 >> 6) & 3]; + } else { + XMM_REG s; + int d=(modrm >> 3) & 0x7; + UINT32 ea = GetEA(cpustate, modrm, 0); + UINT8 imm8 = FETCH(cpustate); + READXMM(cpustate, ea, s); + XMM(d).q[0]=s.q[0]; + XMM(d).w[4]=s.w[4 + (imm8 & 3)]; + XMM(d).w[5]=s.w[4 + ((imm8 >> 2) & 3)]; + XMM(d).w[6]=s.w[4 + ((imm8 >> 4) & 3)]; + XMM(d).w[7]=s.w[4 + ((imm8 >> 6) & 3)]; + } CYCLES(cpustate,1); // TODO: correct cycle count } diff --git a/src/emu/cpu/i386/x87ops.c b/src/emu/cpu/i386/x87ops.c index edab482aea3..8ab1c7b1c56 100644 --- a/src/emu/cpu/i386/x87ops.c +++ b/src/emu/cpu/i386/x87ops.c @@ -3762,6 +3762,51 @@ void x87_fcomp_sti(i386_state *cpustate, UINT8 modrm) CYCLES(cpustate, 4); } +void x87_fcomip_sti(i386_state *cpustate, UINT8 modrm) +{ + int i = modrm & 7; + + if (X87_IS_ST_EMPTY(0) || X87_IS_ST_EMPTY(i)) + { + x87_set_stack_underflow(cpustate); + cpustate->ZF = 1; + cpustate->PF = 1; + cpustate->CF = 1; + } + else + { + cpustate->x87_sw &= ~X87_SW_C1; + + floatx80 a = ST(0); + floatx80 b = ST(i); + + if (floatx80_is_nan(a) || floatx80_is_nan(b)) + { + cpustate->ZF = 1; + cpustate->PF = 1; + cpustate->CF = 1; + cpustate->x87_sw |= X87_SW_IE; + } + else + { + cpustate->ZF = 0; + cpustate->PF = 0; + cpustate->CF = 0; + + if (floatx80_eq(a, b)) + cpustate->ZF = 1; + + if (floatx80_lt(a, b)) + cpustate->CF = 1; + } + } + + if (x87_check_exceptions(cpustate)) + x87_inc_stack(cpustate); + + CYCLES(cpustate, 4); // TODO: correct cycle count +} + void x87_fcompp(i386_state *cpustate, UINT8 modrm) { if (X87_IS_ST_EMPTY(0) || X87_IS_ST_EMPTY(1)) @@ -4651,6 +4696,7 @@ void build_x87_opcode_table_df(i386_state *cpustate) switch (modrm) { case 0xe0: ptr = x87_fstsw_ax; break; + case 0xf0: case 0xf1: case 0xf2: case 0xf3: case 0xf4: case 0xf5: case 0xf6: case 0xf7: ptr = x87_fcomip_sti; break; } } diff --git a/src/mame/drivers/chihiro.c b/src/mame/drivers/chihiro.c index 62bd9c9aca6..08ec1c018b9 100644 --- a/src/mame/drivers/chihiro.c +++ b/src/mame/drivers/chihiro.c @@ -377,6 +377,7 @@ Thanks to Alex, Mr Mudkips, and Philip Burke for this info. #define LOG_PCI //#define LOG_OHCI //#define LOG_NV2A +#define LOG_BASEBOARD class nv2a_renderer; // forw. dec. struct nvidia_object_data @@ -387,9 +388,11 @@ struct nvidia_object_data class chihiro_state : public driver_device { public: - chihiro_state(const machine_config &mconfig, device_type type, const char *tag) - : driver_device(mconfig, type, tag), - m_maincpu(*this, "maincpu") { } + chihiro_state(const machine_config &mconfig, device_type type, const char *tag) : + driver_device(mconfig, type, tag), + nvidia_nv2a(NULL), + debug_irq_active(false), + m_maincpu(*this, "maincpu") { } DECLARE_READ32_MEMBER( geforce_r ); DECLARE_WRITE32_MEMBER( geforce_w ); @@ -397,6 +400,12 @@ public: DECLARE_WRITE32_MEMBER( usbctrl_w ); DECLARE_READ32_MEMBER( smbus_r ); DECLARE_WRITE32_MEMBER( smbus_w ); + DECLARE_READ32_MEMBER( mediaboard_r ); + DECLARE_WRITE32_MEMBER( mediaboard_w ); + DECLARE_READ32_MEMBER( audio_apu_r ); + DECLARE_WRITE32_MEMBER( audio_apu_w ); + DECLARE_READ32_MEMBER( audio_ac93_r ); + DECLARE_WRITE32_MEMBER( audio_ac93_w ); DECLARE_READ32_MEMBER( dummy_r ); DECLARE_WRITE32_MEMBER( dummy_w ); @@ -404,23 +413,59 @@ public: int smbus_pic16lc(int command,int rw,int data); int smbus_cx25871(int command,int rw,int data); int smbus_eeprom(int command,int rw,int data); + void baseboard_ide_event(int type,UINT8 *read,UINT8 *write); + UINT8 *baseboard_ide_dimmboard(UINT32 lba); + void dword_write_le(UINT8 *addr,UINT32 d); + void word_write_le(UINT8 *addr,UINT16 d); + void debug_generate_irq(int irq,bool active); void vblank_callback(screen_device &screen, bool state); UINT32 screen_update_callback(screen_device &screen, bitmap_rgb32 &bitmap, const rectangle &cliprect); - struct chihiro_devices { - pic8259_device *pic8259_1; - pic8259_device *pic8259_2; - bus_master_ide_controller_device *ide; - } chihiro_devs; - - nv2a_renderer *nvidia_nv2a; virtual void machine_start(); DECLARE_WRITE_LINE_MEMBER(chihiro_pic8259_1_set_int_line); DECLARE_READ8_MEMBER(get_slave_ack); DECLARE_WRITE_LINE_MEMBER(chihiro_pit8254_out0_changed); DECLARE_WRITE_LINE_MEMBER(chihiro_pit8254_out2_changed); IRQ_CALLBACK_MEMBER(irq_callback); + TIMER_CALLBACK_MEMBER(audio_apu_timer); + + struct chihiro_devices { + pic8259_device *pic8259_1; + pic8259_device *pic8259_2; + bus_master_ide_controller_device *ide; + naomi_gdrom_board *dimmboard; + } chihiro_devs; + struct smbus_state { + int status; + int control; + int address; + int data; + int command; + int rw; + int (*devices[128])(chihiro_state &chs,int command,int rw,int data); + UINT32 words[256/4]; + } smbusst; + struct apu_state { + UINT32 memory0_sgaddress; + UINT32 memory0_sgblocks; + UINT32 memory0_address; + UINT32 memory1_sgaddress; + UINT32 memory1_sgblocks; + emu_timer *timer; + address_space *space; + } apust; + struct ac97_state { + UINT32 mixer_regs[0x80/4]; + UINT32 controller_regs[0x38/4]; + } ac97st; + UINT8 pic16lc_buffer[0xff]; + nv2a_renderer *nvidia_nv2a; + bool debug_irq_active; + int debug_irq_number; + UINT8 *dimm_board_memory; + UINT32 dimm_board_memory_size; + int usbhack_counter; required_device m_maincpu; }; @@ -459,6 +504,8 @@ public: objectdata->data=this; combiner.used=0; combiner.lock=osd_lock_alloc(); + enabled_vertex_attributes=0; + memset(words_vertex_attributes,0,sizeof(words_vertex_attributes)); } DECLARE_READ32_MEMBER( geforce_r ); DECLARE_WRITE32_MEMBER( geforce_w ); @@ -472,7 +519,7 @@ public: int geforce_commandkind(UINT32 word); UINT32 geforce_object_offset(UINT32 handle); void geforce_read_dma_object(UINT32 handle,UINT32 &offset,UINT32 &size); - void geforce_exec_method(address_space &space,UINT32 channel,UINT32 subchannel,UINT32 method,UINT32 data); + void geforce_exec_method(address_space &space,UINT32 channel,UINT32 subchannel,UINT32 method,UINT32 address,int &countlen); void combiner_initialize_registers(UINT32 argb8[6]); void combiner_initialize_stage(int stage_number); void combiner_initialize_final(); @@ -500,6 +547,7 @@ public: void computedilated(void); void putpixtex(int xp,int yp,int up,int vp); int toggle_register_combiners_usage(); + void savestate_items(); struct { UINT32 regs[0x80/4]; @@ -625,6 +673,8 @@ public: int used; osd_lock *lock; } combiner; + int enabled_vertex_attributes; + int words_vertex_attributes[16]; bitmap_rgb32 fb; UINT32 dilated0[16][2048]; UINT32 dilated1[16][2048]; @@ -656,6 +706,11 @@ public: TEX2=11, TEX3=12 }; + enum NV2A_VTXBUF_TYPE { + FLOAT=2, + UBYTE=4, + USHORT=5 + }; enum NV2A_TEX_FORMAT { L8=0x0, I8=0x1, @@ -692,52 +747,6 @@ public: }; }; -UINT32 nv2a_renderer::dilate0(UINT32 value,int bits) // dilate first "bits" bits in "value" -{ - UINT32 x,m1,m2,m3; - int a; - - x = value; - for (a=0;a < bits;a++) - { - m2 = 1 << (a << 1); - m1 = m2 - 1; - m3 = (~m1) << 1; - x = (x & m1) + (x & m2) + ((x & m3) << 1); - } - return x; -} - -UINT32 nv2a_renderer::dilate1(UINT32 value,int bits) // dilate first "bits" bits in "value" -{ - UINT32 x,m1,m2,m3; - int a; - - x = value; - for (a=0;a < bits;a++) - { - m2 = 1 << (a << 1); - m1 = m2 - 1; - m3 = (~m1) << 1; - x = (x & m1) + ((x & m2) << 1) + ((x & m3) << 1); - } - return x; -} - -void nv2a_renderer::computedilated(void) -{ - int a,b; - - for (b=0;b < 16;b++) - for (a=0;a < 2048;a++) { - dilated0[b][a]=dilate0(a,b); - dilated1[b][a]=dilate1(a,b); - } - for (b=0;b < 16;b++) - for (a=0;a < 16;a++) - dilatechose[(b << 4) + a]=(a < b ? a : b); -} - /* jamtable instructions for Chihiro (different from console) St. Instr. Comment 0x01 POKEPCI PCICONF[OP2] := OP1 @@ -976,6 +985,58 @@ static void dump_list_command(running_machine &machine, int ref, int params, con } } +static void curthread_command(running_machine &machine, int ref, int params, const char **param) +{ + address_space &space=machine.firstcpu->space(); + UINT64 fsbase; + UINT32 kthrd,topstack,tlsdata; + offs_t address; + cpuinfo cpu_info; + + CPU_GET_INFO_NAME(i386)((legacy_cpu_device *)machine.firstcpu,CPUINFO_INT_REGISTER + 44,&cpu_info); + fsbase=cpu_info.i; + address=(offs_t)fsbase+0x28; + if (!debug_cpu_translate(space,TRANSLATE_READ_DEBUG,&address)) + { + debug_console_printf(machine,"Address is unmapped.\n"); + return; + } + kthrd=space.read_dword_unaligned(address); + debug_console_printf(machine,"Current thread is %08X\n",kthrd); + address=(offs_t)kthrd+0x1c; + if (!debug_cpu_translate(space,TRANSLATE_READ_DEBUG,&address)) + return; + topstack=space.read_dword_unaligned(address); + debug_console_printf(machine,"Current thread stack top is %08X\n",topstack); + address=(offs_t)kthrd+0x28; + if (!debug_cpu_translate(space,TRANSLATE_READ_DEBUG,&address)) + return; + tlsdata=space.read_dword_unaligned(address); + if (tlsdata == 0) + address=(offs_t)topstack-0x210-8; + else + address=(offs_t)tlsdata-8; + if (!debug_cpu_translate(space,TRANSLATE_READ_DEBUG,&address)) + return; + debug_console_printf(machine,"Current thread function is %08X\n",space.read_dword_unaligned(address)); +} + +static void generate_irq_command(running_machine &machine, int ref, int params, const char **param) +{ + UINT64 irq; + chihiro_state *chst=machine.driver_data(); + + if (params < 1) + return; + if (!debug_command_parameter_number(machine, param[0], &irq)) + return; + if (irq > 15) + return; + if (irq == 2) + return; + chst->debug_generate_irq((int)irq,true); +} + static void nv2a_combiners_command(running_machine &machine, int ref, int params, const char **param) { int en; @@ -995,6 +1056,8 @@ static void help_command(running_machine &machine, int ref, int params, const ch debug_console_printf(machine," chihiro dump_string,
-- Dump _STRING object at
\n"); debug_console_printf(machine," chihiro dump_process,
-- Dump _PROCESS object at
\n"); debug_console_printf(machine," chihiro dump_list,
[,] -- Dump _LIST_ENTRY chain starting at
\n"); + debug_console_printf(machine," chihiro curthread -- Print information about current thread\n"); + debug_console_printf(machine," chihiro irq, -- Generate interrupt with irq number 0-15\n"); debug_console_printf(machine," chihiro nv2a_combiners -- Toggle use of register combiners\n"); debug_console_printf(machine," chihiro help -- this list\n"); } @@ -1011,12 +1074,66 @@ static void chihiro_debug_commands(running_machine &machine, int ref, int params dump_process_command(machine,ref,params-1,param+1); else if (strcmp("dump_list",param[0]) == 0) dump_list_command(machine,ref,params-1,param+1); + else if (strcmp("curthread",param[0]) == 0) + curthread_command(machine,ref,params-1,param+1); + else if (strcmp("irq",param[0]) == 0) + generate_irq_command(machine,ref,params-1,param+1); else if (strcmp("nv2a_combiners",param[0]) == 0) nv2a_combiners_command(machine,ref,params-1,param+1); else help_command(machine,ref,params-1,param+1); } +/* + * Graphics + */ + +UINT32 nv2a_renderer::dilate0(UINT32 value,int bits) // dilate first "bits" bits in "value" +{ + UINT32 x,m1,m2,m3; + int a; + + x = value; + for (a=0;a < bits;a++) + { + m2 = 1 << (a << 1); + m1 = m2 - 1; + m3 = (~m1) << 1; + x = (x & m1) + (x & m2) + ((x & m3) << 1); + } + return x; +} + +UINT32 nv2a_renderer::dilate1(UINT32 value,int bits) // dilate first "bits" bits in "value" +{ + UINT32 x,m1,m2,m3; + int a; + + x = value; + for (a=0;a < bits;a++) + { + m2 = 1 << (a << 1); + m1 = m2 - 1; + m3 = (~m1) << 1; + x = (x & m1) + ((x & m2) << 1) + ((x & m3) << 1); + } + return x; +} + +void nv2a_renderer::computedilated(void) +{ + int a,b; + + for (b=0;b < 16;b++) + for (a=0;a < 2048;a++) { + dilated0[b][a]=dilate0(a,b); + dilated1[b][a]=dilate1(a,b); + } + for (b=0;b < 16;b++) + for (a=0;a < 16;a++) + dilatechose[(b << 4) + a]=(a < b ? a : b); +} + int nv2a_renderer::geforce_commandkind(UINT32 word) { if ((word & 0x00000003) == 0x00000002) @@ -1146,8 +1263,12 @@ void nv2a_renderer::render_texture_simple(INT32 scanline, const extent_t &extent up=(extent.param[4].start+(float)x*extent.param[4].dpdx)*(float)(objectdata.data->texture[0].sizeu-1); // x coordinate of texel in texture vp=extent.param[5].start*(float)(objectdata.data->texture[0].sizev-1); // y coordinate of texel in texture to=(objectdata.data->dilated0[objectdata.data->texture[0].dilate][up]+objectdata.data->dilated1[objectdata.data->texture[0].dilate][vp]); // offset of texel in texture memory - a4r4g4b4=*(((UINT16 *)objectdata.data->texture[0].buffer)+to); // get texel color - *((UINT32 *)objectdata.data->fb.raw_pixptr(scanline,xp))=convert_a4r4g4b4_a8r8g8b8(a4r4g4b4); + if (objectdata.data->texture[0].format == 6) { + *((UINT32 *)objectdata.data->fb.raw_pixptr(scanline,xp))=*(((UINT32 *)objectdata.data->texture[0].buffer)+to); // get texel color + } else { + a4r4g4b4=*(((UINT16 *)objectdata.data->texture[0].buffer)+to); // get texel color + *((UINT32 *)objectdata.data->fb.raw_pixptr(scanline,xp))=convert_a4r4g4b4_a8r8g8b8(a4r4g4b4); + } x--; } } @@ -1182,8 +1303,12 @@ void nv2a_renderer::render_register_combiners(INT32 scanline, const extent_t &ex up=(extent.param[4+n*2].start+(float)x*extent.param[4+n*2].dpdx)*(float)(objectdata.data->texture[n].sizeu-1); vp=extent.param[5+n*2].start*(float)(objectdata.data->texture[n].sizev-1); to=(objectdata.data->dilated0[objectdata.data->texture[n].dilate][up]+objectdata.data->dilated1[objectdata.data->texture[n].dilate][vp]); - color[n+2]=*(((UINT16 *)objectdata.data->texture[n].buffer)+to); - color[n+2]=convert_a4r4g4b4_a8r8g8b8(color[n+2]); + if (texture[n].format == 6) { + color[n+2]=*(((UINT32 *)objectdata.data->texture[n].buffer)+to); + } else { + color[n+2]=*(((UINT16 *)objectdata.data->texture[n].buffer)+to); + color[n+2]=convert_a4r4g4b4_a8r8g8b8(color[n+2]); + } } } // 2: compute @@ -1353,84 +1478,14 @@ void dumpcombiners(UINT32 *m) } #endif -void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT32 subchannel,UINT32 method,UINT32 data) +void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT32 subchannel,UINT32 method,UINT32 address,int &countlen) { - UINT32 maddress=method*4; + UINT32 maddress; + UINT32 data; + + maddress=method*4; + data=space.read_dword(address); channel[chanel][subchannel].object.method[method]=data; - if ((maddress == 0x1d6c) || (maddress == 0x1d70) || (maddress == 0x1a4)) - method=method+0; - if (maddress == 0x1d70) { - // with 1d70 write the value at offest [1d6c] inside dma object [1a4] - UINT32 offset,base; - UINT32 dmahand,dmaoff,smasiz; - - offset=channel[chanel][subchannel].object.method[0x1d6c/4]; - dmahand=channel[chanel][subchannel].object.method[0x1a4/4]; - geforce_read_dma_object(dmahand,dmaoff,smasiz); - base=dmaoff; - space.write_dword(base+offset,data); - } - if (maddress == 0x1d94) { - // clear framebuffer - if (data & 0xf0) { - // clear colors - UINT32 color=channel[chanel][subchannel].object.method[0x1d90/4]; - fb.fill(color & 0xffffff); - printf("clearscreen\n\r"); - } - if (data & 0x03) { - // clear stencil+zbuffer - } - } - // Texture Units - if ((maddress >= 0x1b00) && (maddress < 0x1c00)) { - int unit;//,off; - - unit=(maddress >> 6) & 3; - //off=maddress & 0xc0; - maddress=maddress & ~0xc0; - if (maddress == 0x1b00) { - UINT32 offset;//,base; - //UINT32 dmahand,dmaoff,dmasiz; - - offset=data; - texture[unit].buffer=space.get_read_ptr(offset); - /*if (dma0 != 0) { - dmahand=channel[channel][subchannel].object.method[0x184/4]; - geforce_read_dma_object(dmahand,dmaoff,smasiz); - } else if (dma1 != 0) { - dmahand=channel[channel][subchannel].object.method[0x188/4]; - geforce_read_dma_object(dmahand,dmaoff,smasiz); - }*/ - } - if (maddress == 0x1b04) { - //int dma0,dma1,cubic,noborder,dims,mipmap; - int basesizeu,basesizev,basesizew,format; - - //dma0=(data >> 0) & 1; - //dma1=(data >> 1) & 1; - //cubic=(data >> 2) & 1; - //noborder=(data >> 3) & 1; - //dims=(data >> 4) & 15; - //mipmap=(data >> 19) & 1; - format=(data >> 8) & 255; - basesizeu=(data >> 20) & 15; - basesizev=(data >> 24) & 15; - basesizew=(data >> 28) & 15; - texture[unit].sizeu=1 << basesizeu; - texture[unit].sizev=1 << basesizev; - texture[unit].sizew=1 << basesizew; - texture[unit].dilate=dilatechose[(basesizeu << 4)+basesizev]; - texture[unit].format=format; - } - if (maddress == 0x1b0c) { - // enable texture - int enable; - - enable=(data >> 30) & 1; - texture[unit].enabled=enable; - } - } if (maddress == 0x1810) { // draw vertices int offset,count,type; @@ -1462,11 +1517,11 @@ void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT #ifdef LOG_NV2A printf(" %08X %08X\n\r",channel[chanel][subchannel].object.method[0x1720/4+n],channel[chanel][subchannel].object.method[0x1760/4+n]); #endif - tmp=channel[chanel][subchannel].object.method[0x1760/4+n]; + tmp=channel[chanel][subchannel].object.method[0x1760/4+n]; // VTXBUF_FMT //vtxbuf_kind[n]=tmp & 15; //vtxbuf_size[n]=(tmp >> 4) & 15; vtxbuf_stride[n]=(tmp >> 8) & 255; - tmp=channel[chanel][subchannel].object.method[0x1720/4+n]; + tmp=channel[chanel][subchannel].object.method[0x1720/4+n]; // VTXBUF_OFFSET if (tmp & 0x80000000) vtxbuf_address[n]=(tmp & 0x0fffffff)+dmaoff[1]; else @@ -1519,7 +1574,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT float z[3],w[3]; UINT32 c[3]; - printf("draw triangle\n\r"); + //printf("draw triangle\n\r"); // put first 2 vertices data in elements 0,1 of arrays for (m=0;m < 2;m++) { *((UINT32 *)(&xy[m].x))=space.read_dword(vtxbuf_address[0]+(m+offset)*vtxbuf_stride[0]+0); @@ -1571,11 +1626,222 @@ void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT w[1]=w[2]; } } else { - type=type+0; // dummy, you can put a breakpoint here while debugging + logerror("Unsupported primitive %d for method 0x1810\n",type); + } + countlen--; + } + if (maddress == 0x1818) { + int n,m,u,vwords; + int vattrpos[16]; + int type; + render_delegate renderspans; + + if (((channel[chanel][subchannel].object.method[0x1e60/4] & 7) > 0) && (combiner.used != 0)) { + renderspans=render_delegate(FUNC(nv2a_renderer::render_register_combiners),this); + } else if (texture[0].enabled) { + renderspans=render_delegate(FUNC(nv2a_renderer::render_texture_simple),this); + } else + renderspans=render_delegate(FUNC(nv2a_renderer::render_color),this); + vwords=0; + for (n=0;n < 16;n++) { + vattrpos[n]=vwords; + if ((enabled_vertex_attributes & (1 << n)) != 0) + vwords += words_vertex_attributes[n]; + } + // vertices are taken from the next words, not from a vertex buffer + // first send primitive type with 17fc + // then countlen number of dwords with 1818 + // end with 17fc primitive type 0 + // at 1760 16 words specify the vertex format:for each possible vertex attribute the number of components (0=not present) and type of each + if ((countlen % vwords) != 0) { + logerror("Method 0x1818 got %d words, at least %d were expected\n",countlen,(countlen/vwords+1)*vwords); + countlen=0; + return; + } + type=channel[chanel][subchannel].object.method[0x17fc/4]; + if (type == nv2a_renderer::TRIANGLE_FAN) { + vertex_t xy[3]; + float z[3],w[3]; + UINT32 c[3]; + + // put first 2 vertices data in elements 0,1 of arrays + for (m=0;m < 2;m++) { + // consider only attributes: position,color0,texture 0-3 + // position + *((UINT32 *)(&xy[m].x))=space.read_dword(address+vattrpos[0]*4+0); + *((UINT32 *)(&xy[m].y))=space.read_dword(address+vattrpos[0]*4+4); + *((UINT32 *)(&z[m]))=space.read_dword(address+vattrpos[0]*4+8); + *((UINT32 *)(&w[m]))=space.read_dword(address+vattrpos[0]*4+12); + // color + c[m]=space.read_dword(address+vattrpos[3]*4+0); // color + xy[m].p[0]=c[m] & 0xff; // b + xy[m].p[1]=(c[m] & 0xff00) >> 8; // g + xy[m].p[2]=(c[m] & 0xff0000) >> 16; // r + xy[m].p[3]=(c[m] & 0xff000000) >> 24; // a + // texture 0-3 + for (u=0;u < 4;u++) { + xy[m].p[4+u*2]=0; + xy[m].p[5+u*2]=0; + if (texture[u].enabled) { + *((UINT32 *)(&xy[m].p[4+u*2]))=space.read_dword(address+vattrpos[9+u]*4+0); + *((UINT32 *)(&xy[m].p[5+u*2]))=space.read_dword(address+vattrpos[9+u]*4+4); + } + } + address=address+vwords*4; + countlen=countlen-vwords; + } + if (countlen <= 0) { + logerror("Method 0x1818 missing %d words to draw a complete primitive\n",-countlen+vwords); + countlen=0; + return; + } + for (n=2;countlen > 0;n++) { + // put vertex n data in element 2 of arrays + // position + *((UINT32 *)(&xy[2].x))=space.read_dword(address+vattrpos[0]*4+0); + *((UINT32 *)(&xy[2].y))=space.read_dword(address+vattrpos[0]*4+4); + *((UINT32 *)(&z[2]))=space.read_dword(address+vattrpos[0]*4+8); + *((UINT32 *)(&w[2]))=space.read_dword(address+vattrpos[0]*4+12); + // color + c[2]=space.read_dword(address+vattrpos[3]*4+0); // color + xy[2].p[0]=c[2] & 0xff; // b + xy[2].p[1]=(c[2] & 0xff00) >> 8; // g + xy[2].p[2]=(c[2] & 0xff0000) >> 16; // r + xy[2].p[3]=(c[2] & 0xff000000) >> 24; // a + // texture 0-3 + for (u=0;u < 4;u++) { + xy[2].p[4+u*2]=0; + xy[2].p[5+u*2]=0; + if (texture[u].enabled) { + *((UINT32 *)(&xy[2].p[4+u*2]))=space.read_dword(address+vattrpos[9+u]*4+0); + *((UINT32 *)(&xy[2].p[5+u*2]))=space.read_dword(address+vattrpos[9+u]*4+4); + } + } + address=address+vwords*4; + countlen=countlen-vwords; + if (countlen < 0) { + logerror("Method 0x1818 missing %d words to draw a complete primitive\n",-countlen); + countlen=0; + return; + } + // draw triangle + render_triangle(fb.cliprect(),renderspans,4+4*2,xy[0],xy[1],xy[2]); // 012 + wait(); + // move element 2 to 1 + xy[1]=xy[2]; + z[1]=z[2]; + w[1]=w[2]; + } + } else { + logerror("Unsupported primitive %d for method 0x1818\n",type); } } - if (maddress == 0x1e60) + if ((maddress == 0x1d6c) || (maddress == 0x1d70) || (maddress == 0x1a4)) + countlen--; + if (maddress == 0x1d70) { + // with 1d70 write the value at offest [1d6c] inside dma object [1a4] + UINT32 offset,base; + UINT32 dmahand,dmaoff,smasiz; + + offset=channel[chanel][subchannel].object.method[0x1d6c/4]; + dmahand=channel[chanel][subchannel].object.method[0x1a4/4]; + geforce_read_dma_object(dmahand,dmaoff,smasiz); + base=dmaoff; + space.write_dword(base+offset,data); + countlen--; + } + if (maddress == 0x1d94) { + // clear framebuffer + if (data & 0xf0) { + // clear colors + UINT32 color=channel[chanel][subchannel].object.method[0x1d90/4]; + fb.fill(color & 0xffffff); + //printf("clearscreen\n\r"); + } + if (data & 0x03) { + // clear stencil+zbuffer + } + countlen--; + } + // Texture Units + if ((maddress >= 0x1b00) && (maddress < 0x1c00)) { + int unit;//,off; + + unit=(maddress >> 6) & 3; + //off=maddress & 0xc0; + maddress=maddress & ~0xc0; + if (maddress == 0x1b00) { + UINT32 offset;//,base; + //UINT32 dmahand,dmaoff,dmasiz; + + offset=data; + texture[unit].buffer=space.get_read_ptr(offset); + /*if (dma0 != 0) { + dmahand=channel[channel][subchannel].object.method[0x184/4]; + geforce_read_dma_object(dmahand,dmaoff,smasiz); + } else if (dma1 != 0) { + dmahand=channel[channel][subchannel].object.method[0x188/4]; + geforce_read_dma_object(dmahand,dmaoff,smasiz); + }*/ + } + if (maddress == 0x1b04) { + //int dma0,dma1,cubic,noborder,dims,mipmap; + int basesizeu,basesizev,basesizew,format; + + //dma0=(data >> 0) & 1; + //dma1=(data >> 1) & 1; + //cubic=(data >> 2) & 1; + //noborder=(data >> 3) & 1; + //dims=(data >> 4) & 15; + //mipmap=(data >> 19) & 1; + format=(data >> 8) & 255; + basesizeu=(data >> 20) & 15; + basesizev=(data >> 24) & 15; + basesizew=(data >> 28) & 15; + texture[unit].sizeu=1 << basesizeu; + texture[unit].sizev=1 << basesizev; + texture[unit].sizew=1 << basesizew; + texture[unit].dilate=dilatechose[(basesizeu << 4)+basesizev]; + texture[unit].format=format; + } + if (maddress == 0x1b0c) { + // enable texture + int enable; + + enable=(data >> 30) & 1; + texture[unit].enabled=enable; + } + countlen--; + } + if ((maddress >= 0x1760) && (maddress < 0x17A0)) { + int bit=method-0x1760/4; + + data=data & 255; + if (data > 15) + enabled_vertex_attributes |= (1 << bit); + else + enabled_vertex_attributes &= ~(1 << bit); + switch (data & 15) { + case 0: + words_vertex_attributes[bit]=(((data >> 4) + 3) & 15) >> 2; + break; + case nv2a_renderer::FLOAT: + words_vertex_attributes[bit]=(data >> 4); + break; + case nv2a_renderer::UBYTE: + words_vertex_attributes[bit]=(((data >> 4) + 3) & 15) >> 2; + break; + case nv2a_renderer::USHORT: + words_vertex_attributes[bit]=(((data >> 4) + 1) & 15) >> 1; + break; + default: + words_vertex_attributes[bit]=0; + } + } + if (maddress == 0x1e60) { combiner.stages=data & 15; + countlen--; + } if (maddress == 0x0288) { combiner.final.mapin_rgbD_input=data & 15; combiner.final.mapin_rgbD_component=(data >> 4) & 1; @@ -1589,6 +1855,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT combiner.final.mapin_rgbA_input=(data >> 24) & 15; combiner.final.mapin_rgbA_component=(data >> 28) & 1; combiner.final.mapin_rgbA_mapping=(data >> 29) & 7; + countlen--; } if (maddress == 0x028c) { combiner.final.color_sum_clamp=(data >> 7) & 1; @@ -1601,12 +1868,15 @@ void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT combiner.final.mapin_rgbE_input=(data >> 24) & 15; combiner.final.mapin_rgbE_component=(data >> 28) & 1; combiner.final.mapin_rgbE_mapping=(data >> 29) & 7; + countlen--; } if (maddress == 0x1e20) { combiner_argb8_float(data,combiner.final.register_constantcolor0); + countlen--; } if (maddress == 0x1e24) { combiner_argb8_float(data,combiner.final.register_constantcolor1); + countlen--; } if ((maddress >= 0x0260) && (maddress < 0x0280)) { int n; @@ -1624,6 +1894,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT combiner.stage[n].mapin_aA_input=(data >> 24) & 15; combiner.stage[n].mapin_aA_component=(data >> 28) & 1; combiner.stage[n].mapin_aA_mapping=(data >> 29) & 7; + countlen--; } if ((maddress >= 0x0ac0) && (maddress < 0x0ae0)) { int n; @@ -1641,18 +1912,21 @@ void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT combiner.stage[n].mapin_rgbA_input=(data >> 24) & 15; combiner.stage[n].mapin_rgbA_component=(data >> 28) & 1; combiner.stage[n].mapin_rgbA_mapping=(data >> 29) & 7; + countlen--; } if ((maddress >= 0x0a60) && (maddress < 0x0a80)) { int n; n=(maddress-0x0a60) >> 2; combiner_argb8_float(data,combiner.stage[n].register_constantcolor0); + countlen--; } if ((maddress >= 0x0a80) && (maddress < 0x0aa0)) { int n; n=(maddress-0x0a80) >> 2; combiner_argb8_float(data,combiner.stage[n].register_constantcolor1); + countlen--; } if ((maddress >= 0x0aa0) && (maddress < 0x0ac0)) { int n; @@ -1667,6 +1941,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT combiner.stage[n].mapout_a_bias=(data >> 15) & 1; combiner.stage[n].mapout_a_scale=(data >> 16) & 3; //combiner.=(data >> 27) & 7; + countlen--; } if ((maddress >= 0x1e40) && (maddress < 0x1e60)) { int n; @@ -1681,6 +1956,7 @@ void nv2a_renderer::geforce_exec_method(address_space & space,UINT32 chanel,UINT combiner.stage[n].mapout_rgb_bias=(data >> 15) & 1; combiner.stage[n].mapout_rgb_scale=(data >> 16) & 3; //combiner.=(data >> 27) & 7; + countlen--; } } @@ -1690,6 +1966,10 @@ int nv2a_renderer::toggle_register_combiners_usage() return combiner.used; } +void nv2a_renderer::savestate_items() +{ +} + void nv2a_renderer::combiner_argb8_float(UINT32 color,float reg[4]) { reg[0]=(float)(color & 0xff)/255.0; @@ -2313,7 +2593,7 @@ void nv2a_renderer::vblank_callback(screen_device &screen, bool state) { chihiro_state *chst=machine().driver_data(); - printf("vblank_callback\n\r"); + //printf("vblank_callback\n\r"); if (state == true) pcrtc[0x100/4] |= 1; else @@ -2334,11 +2614,76 @@ UINT32 nv2a_renderer::screen_update_callback(screen_device &screen, bitmap_rgb32 UINT32 *dst=(UINT32 *)bitmap.raw_pixptr(0,0); UINT32 *src=(UINT32 *)fb.raw_pixptr(0,0); - printf("updatescreen\n\r"); + //printf("updatescreen\n\r"); memcpy(dst,src,bitmap.rowbytes()*bitmap.height()); return 0; } +void chihiro_state::debug_generate_irq(int irq,bool active) +{ + int state; + + if (active) + { + debug_irq_active=true; + debug_irq_number=irq; + state=1; + } + else + { + debug_irq_active=false; + state=0; + } + switch (irq) + { + case 0: + chihiro_devs.pic8259_1->ir0_w(state); + break; + case 1: + chihiro_devs.pic8259_1->ir1_w(state); + break; + case 3: + chihiro_devs.pic8259_1->ir3_w(state); + break; + case 4: + chihiro_devs.pic8259_1->ir4_w(state); + break; + case 5: + chihiro_devs.pic8259_1->ir5_w(state); + break; + case 6: + chihiro_devs.pic8259_1->ir6_w(state); + break; + case 7: + chihiro_devs.pic8259_1->ir7_w(state); + break; + case 8: + chihiro_devs.pic8259_2->ir0_w(state); + break; + case 9: + chihiro_devs.pic8259_2->ir1_w(state); + break; + case 10: + chihiro_devs.pic8259_2->ir2_w(state); + break; + case 11: + chihiro_devs.pic8259_2->ir3_w(state); + break; + case 12: + chihiro_devs.pic8259_2->ir4_w(state); + break; + case 13: + chihiro_devs.pic8259_2->ir5_w(state); + break; + case 14: + chihiro_devs.pic8259_2->ir6_w(state); + break; + case 15: + chihiro_devs.pic8259_2->ir7_w(state); + break; + } +} + void chihiro_state::vblank_callback(screen_device &screen, bool state) { nvidia_nv2a->vblank_callback(screen,state); @@ -2359,24 +2704,24 @@ static int x,ret; ret=x; } if ((offset >= 0x00101000/4) && (offset < 0x00102000/4)) { - logerror("NV_2A: read STRAPS[%06X] mask %08X value %08X\n",offset*4-0x00101000,mem_mask,ret); + //logerror("NV_2A: read STRAPS[%06X] mask %08X value %08X\n",offset*4-0x00101000,mem_mask,ret); } else if ((offset >= 0x00002000/4) && (offset < 0x00004000/4)) { ret=pfifo[offset-0x00002000/4]; // PFIFO.CACHE1.STATUS or PFIFO.RUNOUT_STATUS if ((offset == 0x3214/4) || (offset == 0x2400/4)) ret=0x10; - logerror("NV_2A: read PFIFO[%06X] value %08X\n",offset*4-0x00002000,ret); + //logerror("NV_2A: read PFIFO[%06X] value %08X\n",offset*4-0x00002000,ret); } else if ((offset >= 0x00700000/4) && (offset < 0x00800000/4)) { ret=ramin[offset-0x00700000/4]; - logerror("NV_2A: read PRAMIN[%06X] value %08X\n",offset*4-0x00700000,ret); + //logerror("NV_2A: read PRAMIN[%06X] value %08X\n",offset*4-0x00700000,ret); } else if ((offset >= 0x00400000/4) && (offset < 0x00402000/4)) { - logerror("NV_2A: read PGRAPH[%06X] value %08X\n",offset*4-0x00400000,ret); + //logerror("NV_2A: read PGRAPH[%06X] value %08X\n",offset*4-0x00400000,ret); } else if ((offset >= 0x00600000/4) && (offset < 0x00601000/4)) { ret=pcrtc[offset-0x00600000/4]; - logerror("NV_2A: read PCRTC[%06X] value %08X\n",offset*4-0x00600000,ret); + //logerror("NV_2A: read PCRTC[%06X] value %08X\n",offset*4-0x00600000,ret); } else if ((offset >= 0x00000000/4) && (offset < 0x00001000/4)) { ret=pmc[offset-0x00000000/4]; - logerror("NV_2A: read PMC[%06X] value %08X\n",offset*4-0x00000000,ret); + //logerror("NV_2A: read PMC[%06X] value %08X\n",offset*4-0x00000000,ret); } else if ((offset >= 0x00800000/4) && (offset < 0x00900000/4)) { // 32 channels size 0x10000 each, 8 subchannels per channel size 0x2000 each int chanel,subchannel,suboffset; @@ -2387,31 +2732,31 @@ static int x,ret; suboffset=suboffset & 0x7ff; if (suboffset < 0x80/4) ret=channel[chanel][subchannel].regs[suboffset]; - logerror("NV_2A: read channel[%02X,%d,%04X]=%08X\n",chanel,subchannel,suboffset*4,ret); + //logerror("NV_2A: read channel[%02X,%d,%04X]=%08X\n",chanel,subchannel,suboffset*4,ret); return ret; - } else - logerror("NV_2A: read at %08X mask %08X value %08X\n",0xfd000000+offset*4,mem_mask,ret); + } else ; + //logerror("NV_2A: read at %08X mask %08X value %08X\n",0xfd000000+offset*4,mem_mask,ret); return ret; } WRITE32_MEMBER( nv2a_renderer::geforce_w ) { if ((offset >= 0x00101000/4) && (offset < 0x00102000/4)) { - logerror("NV_2A: write STRAPS[%06X] mask %08X value %08X\n",offset*4-0x00101000,mem_mask,data); + //logerror("NV_2A: write STRAPS[%06X] mask %08X value %08X\n",offset*4-0x00101000,mem_mask,data); } else if ((offset >= 0x00002000/4) && (offset < 0x00004000/4)) { COMBINE_DATA(pfifo+offset-0x00002000/4); - logerror("NV_2A: read PFIFO[%06X]=%08X\n",offset*4-0x00002000,data & mem_mask); // 2210 pfifo ramht & 1f0 << 12 + //logerror("NV_2A: read PFIFO[%06X]=%08X\n",offset*4-0x00002000,data & mem_mask); // 2210 pfifo ramht & 1f0 << 12 } else if ((offset >= 0x00700000/4) && (offset < 0x00800000/4)) { COMBINE_DATA(ramin+offset-0x00700000/4); - logerror("NV_2A: write PRAMIN[%06X]=%08X\n",offset*4-0x00700000,data & mem_mask); + //logerror("NV_2A: write PRAMIN[%06X]=%08X\n",offset*4-0x00700000,data & mem_mask); } else if ((offset >= 0x00400000/4) && (offset < 0x00402000/4)) { - logerror("NV_2A: write PGRAPH[%06X]=%08X\n",offset*4-0x00400000,data & mem_mask); + //logerror("NV_2A: write PGRAPH[%06X]=%08X\n",offset*4-0x00400000,data & mem_mask); } else if ((offset >= 0x00600000/4) && (offset < 0x00601000/4)) { COMBINE_DATA(pcrtc+offset-0x00600000/4); - logerror("NV_2A: write PCRTC[%06X]=%08X\n",offset*4-0x00600000,data & mem_mask); + //logerror("NV_2A: write PCRTC[%06X]=%08X\n",offset*4-0x00600000,data & mem_mask); } else if ((offset >= 0x00000000/4) && (offset < 0x00001000/4)) { COMBINE_DATA(pmc+offset-0x00000000/4); - logerror("NV_2A: write PMC[%06X]=%08X\n",offset*4-0x00000000,data & mem_mask); + //logerror("NV_2A: write PMC[%06X]=%08X\n",offset*4-0x00000000,data & mem_mask); } else if ((offset >= 0x00800000/4) && (offset < 0x00900000/4)) { // 32 channels size 0x10000 each, 8 subchannels per channel size 0x2000 each int chanel,subchannel,suboffset; @@ -2424,17 +2769,20 @@ WRITE32_MEMBER( nv2a_renderer::geforce_w ) chanel=(suboffset >> (16-2)) & 31; subchannel=(suboffset >> (13-2)) & 7; suboffset=suboffset & 0x7ff; - logerror("NV_2A: write channel[%02X,%d,%04X]=%08X\n",chanel,subchannel,suboffset*4,data & mem_mask); + //logerror("NV_2A: write channel[%02X,%d,%04X]=%08X\n",chanel,subchannel,suboffset*4,data & mem_mask); if (suboffset >= 0x80/4) return; COMBINE_DATA(&channel[chanel][subchannel].regs[suboffset]); if ((suboffset == 0x40/4) || (suboffset == 0x44/4)) { // DMA_PUT or DMA_GET UINT32 *dmaput,*dmaget; UINT32 cmd,cmdtype; + int countlen; dmaput=&channel[chanel][subchannel].regs[0x40/4]; dmaget=&channel[chanel][subchannel].regs[0x44/4]; //printf("dmaget %08X dmaput %08X\n\r",*dmaget,*dmaput); + if ((*dmaput == 0x048cf000) && (*dmaget == 0x07f4d000)) + *dmaget = *dmaput; while (*dmaget != *dmaput) { cmd=space.read_dword(*dmaget); *dmaget += 4; @@ -2472,7 +2820,8 @@ WRITE32_MEMBER( nv2a_renderer::geforce_w ) logerror(" subch. %d method %04x offset %04x count %d\n",subch,method,method*4,count); #endif while (count > 0) { - geforce_exec_method(space,chanel,subchannel,method,space.read_dword(*dmaget)); + countlen=1; + geforce_exec_method(space,chanel,subchannel,method,*dmaget,countlen); count--; method++; *dmaget += 4; @@ -2504,9 +2853,10 @@ WRITE32_MEMBER( nv2a_renderer::geforce_w ) logerror(" subch. %d method %04x offset %04x count %d\n",subch,method,method*4,count); #endif while (count > 0) { - geforce_exec_method(space,chanel,subchannel,method,space.read_dword(*dmaget)); - count--; - *dmaget += 4; + countlen=count; + geforce_exec_method(space,chanel,subchannel,method,*dmaget,countlen); + *dmaget += 4*(count-countlen); + count=countlen; } } break; @@ -2533,9 +2883,10 @@ WRITE32_MEMBER( nv2a_renderer::geforce_w ) logerror(" subch. %d method %04x offset %04x count %d\n",subch,method,method*4,count); #endif while (count > 0) { - geforce_exec_method(space,chanel,subchannel,method,space.read_dword(*dmaget)); - count--; - *dmaget += 4; + countlen=count; + geforce_exec_method(space,chanel,subchannel,method,*dmaget,countlen); + *dmaget += 4*(count-countlen); + count=countlen; } } break; @@ -2544,8 +2895,8 @@ WRITE32_MEMBER( nv2a_renderer::geforce_w ) } } } - } else - logerror("NV_2A: write at %08X mask %08X value %08X\n",0xfd000000+offset*4,mem_mask,data); + } else ; +// logerror("NV_2A: write at %08X mask %08X value %08X\n",0xfd000000+offset*4,mem_mask,data); } READ32_MEMBER( chihiro_state::geforce_r ) @@ -2561,7 +2912,7 @@ WRITE32_MEMBER( chihiro_state::geforce_w ) static UINT32 geforce_pci_r(device_t *busdevice, device_t *device, int function, int reg, UINT32 mem_mask) { #ifdef LOG_PCI - logerror(" bus:1 function:%d register:%d mask:%08X\n",function,reg,mem_mask); +// logerror(" bus:1 device:NV_2A function:%d register:%d mask:%08X\n",function,reg,mem_mask); #endif return 0; } @@ -2569,7 +2920,7 @@ static UINT32 geforce_pci_r(device_t *busdevice, device_t *device, int function, static void geforce_pci_w(device_t *busdevice, device_t *device, int function, int reg, UINT32 data, UINT32 mem_mask) { #ifdef LOG_PCI - logerror(" bus:1 function:%d register:%d data:%08X mask:%08X\n",function,reg,data,mem_mask); +// logerror(" bus:1 device:NV_2A function:%d register:%d data:%08X mask:%08X\n",function,reg,data,mem_mask); #endif } @@ -2605,8 +2956,32 @@ static const char *const usbregnames[]={ READ32_MEMBER( chihiro_state::usbctrl_r ) { if (offset == 0) { /* hack needed until usb (and jvs) is implemented */ - chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x6a79f,0x01); - chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x6a7a0,0x00); + if (usbhack_counter == 0) { + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x6a79f,0x01); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x6a7a0,0x00); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x6b575,0x00); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x6b576,0x00); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x6b5af,0x75); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x6b78a,0x75); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x6b7ca,0x00); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x6b7b8,0x00); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x8f5b2,0x75); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x79a9e,0x74); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x79b80,0x74); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x79b97,0x74); + } + // after game loaded + if (usbhack_counter == 1) { + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x12e4cf,0x01); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x12e4d0,0x00); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x4793e,0x01); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x4793f,0x00); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x47aa3,0x01); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x47aa4,0x00); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x14f2b6,0x84); + chihiro_devs.pic8259_1->machine().firstcpu->space(0).write_byte(0x14f2d1,0x75); + } + usbhack_counter++; } #ifdef LOG_OHCI if (offset >= 0x54/4) @@ -2627,6 +3002,87 @@ WRITE32_MEMBER( chihiro_state::usbctrl_w ) #endif } +/* + * Audio + */ + +READ32_MEMBER( chihiro_state::audio_apu_r ) +{ + logerror("Audio_APU: read from %08X mask %08X\n",0xfe800000+offset*4,mem_mask); + if (offset == 0x20010/4) + return 0x20+4+8+0x48+0x80; + return 0; +} + +WRITE32_MEMBER( chihiro_state::audio_apu_w ) +{ + logerror("Audio_APU: write at %08X mask %08X value %08X\n",0xfe800000+offset*4,mem_mask,data); + if (offset == 0x2040/4) + apust.memory0_sgaddress=data; + if (offset == 0x20d4/4) { + apust.memory0_sgblocks=data; + apust.memory0_address=apust.space->read_dword(apust.memory0_sgaddress); + apust.timer->enable(); + apust.timer->adjust(attotime::from_msec(1),0,attotime::from_msec(1)); + } + if (offset == 0x2048/4) + apust.memory1_sgaddress=data; + if (offset == 0x20dc/4) + apust.memory1_sgblocks=data; +} + +READ32_MEMBER( chihiro_state::audio_ac93_r ) +{ + UINT32 ret=0; + + logerror("Audio_AC3: read from %08X mask %08X\n",0xfec00000+offset*4,mem_mask); + if (offset < 0x80/4) + { + ret=ac97st.mixer_regs[offset]; + } + if ((offset >= 0x100/4) && (offset <= 0x138/4)) + { + offset=offset-0x100/4; + if (offset == 0x18/4) + { + ac97st.controller_regs[offset] &= ~0x02000000; // REGRST: register reset + } + if (offset == 0x30/4) + { + ac97st.controller_regs[offset] |= 0x100; // PCRDY: primary codec ready + } + if (offset == 0x34/4) + { + ac97st.controller_regs[offset] &= ~1; // CAS: codec access semaphore + } + ret=ac97st.controller_regs[offset]; + } + return ret; +} + +WRITE32_MEMBER( chihiro_state::audio_ac93_w ) +{ + logerror("Audio_AC3: write at %08X mask %08X value %08X\n",0xfec00000+offset*4,mem_mask,data); + if (offset < 0x80/4) + { + COMBINE_DATA(ac97st.mixer_regs+offset); + } + if ((offset >= 0x100/4) && (offset <= 0x138/4)) + { + offset=offset-0x100/4; + COMBINE_DATA(ac97st.controller_regs+offset); + } +} + +TIMER_CALLBACK_MEMBER(chihiro_state::audio_apu_timer) +{ + int cmd=apust.space->read_dword(apust.memory0_address+0x800+0x10); + if (cmd == 3) + apust.space->write_dword(apust.memory0_address+0x800+0x10,0); + /*else + logerror("Audio_APU: unexpected value at address %d\n",apust.memory0_address+0x800+0x10);*/ +} + /* * dummy for non connected devices */ @@ -2634,7 +3090,7 @@ WRITE32_MEMBER( chihiro_state::usbctrl_w ) static UINT32 dummy_pci_r(device_t *busdevice, device_t *device, int function, int reg, UINT32 mem_mask) { #ifdef LOG_PCI - logerror(" bus:0 function:%d register:%d mask:%08X\n",function,reg,mem_mask); +// logerror(" bus:0 function:%d register:%d mask:%08X\n",function,reg,mem_mask); #endif return 0; } @@ -2642,7 +3098,7 @@ static UINT32 dummy_pci_r(device_t *busdevice, device_t *device, int function, i static void dummy_pci_w(device_t *busdevice, device_t *device, int function, int reg, UINT32 data, UINT32 mem_mask) { #ifdef LOG_PCI - logerror(" bus:0 function:%d register:%d data:%08X mask:%08X\n",function,reg,data,mem_mask); + if (reg >= 16) logerror(" bus:0 function:%d register:%d data:%08X mask:%08X\n",function,reg,data,mem_mask); #endif } @@ -2667,7 +3123,11 @@ public: virtual int write_sector(UINT32 lba, const void *buffer); protected: // device-level overrides + virtual void device_start(); virtual void device_reset(); + UINT8 read_buffer[0x20]; + UINT8 write_buffer[0x20]; + chihiro_state *chihirosystem; }; //************************************************************************** @@ -2686,6 +3146,19 @@ ide_baseboard_device::ide_baseboard_device(const machine_config &mconfig, const { } +//------------------------------------------------- +// device_start - device-specific startup +//------------------------------------------------- + +void ide_baseboard_device::device_start() +{ + ata_mass_storage_device::device_start(); + chihirosystem=machine().driver_data(); + // savestates + save_item(NAME(read_buffer)); + save_item(NAME(write_buffer)); +} + //------------------------------------------------- // device_reset - device-specific reset //------------------------------------------------- @@ -2709,19 +3182,141 @@ int ide_baseboard_device::read_sector(UINT32 lba, void *buffer) int off; UINT8 *data; + /* + It assumes there are 4 "partitions", the size of the first one depends on bits 3-0 of io port 40f4: + Value Size lba + 0 0x40000-0x8000 + ... + 4 0x400000-0x8000 + The size of the second one is always 0x8000 sectors, and is used as a special communication area + This is a list of the partitions in the minimum size case: + Name Start lba Size lba Size + \??\mbfs: 0x0 0x38000 112MB + \??\mbcom: 0x38000 0x8000 16MB + \??\mbrom0: 0x8000000 0x800 1MB + \??\mbrom1: 0x8000800 0x800 1MB + This is a list of the partitions in the maximum size case: + Name Start lba Size lba Size + \??\mbfs: 0x0 0x3f8000 2032MB + \??\mbcom: 0x3f8000 0x8000 16MB + \??\mbrom0: 0x8000000 0x800 1MB + \??\mbrom1: 0x8000800 0x800 1MB + */ logerror("baseboard: read sector lba %08x\n",lba); - off=(lba&0x7ff)*512; - data=memregion(":others")->base(); - memcpy(buffer,data+off,512); + if (lba >= 0x08000000) { + off=(lba&0x7ff)*512; + data=memregion(":others")->base(); + memcpy(buffer,data+off,512); + return 1; + } + if (lba >= 0xf8000) { + memset(buffer,0,512); + lba=lba-0xf8000; + if (lba == 0x4800) + memcpy(buffer,read_buffer,0x20); + else if (lba == 0x4801) + memcpy(buffer,write_buffer,0x20); + return 1; + } + // in a type 1 chihiro this gets data from the dimm board memory + data=chihirosystem->baseboard_ide_dimmboard(lba); + if (data != NULL) + memcpy(buffer,data,512); return 1; } int ide_baseboard_device::write_sector(UINT32 lba, const void *buffer) { logerror("baseboard: write sector lba %08x\n",lba); + if (lba >= 0xf8000) { + lba=lba-0xf8000; + if (lba == 0x4800) + memcpy(read_buffer,buffer,0x20); + else if (lba == 0x4801) { + memcpy(write_buffer,buffer,0x20); + // call chihiro driver + chihirosystem->baseboard_ide_event(3,read_buffer,write_buffer); + } + } return 1; } +/* + * Chihiro Type 1 baseboard + */ + +void chihiro_state::dword_write_le(UINT8 *addr,UINT32 d) +{ + addr[0]=d & 255; + addr[1]=(d >> 8) & 255; + addr[2]=(d >> 16) & 255; + addr[3]=(d >> 24) & 255; +} + +void chihiro_state::word_write_le(UINT8 *addr,UINT16 d) +{ + addr[0]=d & 255; + addr[1]=(d >> 8) & 255; +} + +void chihiro_state::baseboard_ide_event(int type,UINT8 *read_buffer,UINT8 *write_buffer) +{ + int c; + + if ((type != 3) || ((write_buffer[0] == 0) && (write_buffer[1] == 0))) + return; +#ifdef LOG_BASEBOARD + logerror("Baseboard sector command:\n"); + for (int a=0;a < 32;a++) + logerror(" %02X",write_buffer[a]); + logerror("\n"); +#endif + // response + // second word 8001 (8000+counter), first word=first word of written data (command ?), second dword ? + read_buffer[0]=write_buffer[0]; + read_buffer[1]=write_buffer[1]; + read_buffer[2]=0x01; // write_buffer[2]; + read_buffer[3]=0x80; // write_buffer[3] | 0x80; + c=write_buffer[2]+(write_buffer[3] << 8); // 0001 0101 0103 + switch (c) + { + case 0x0001: + // second dword + dword_write_le(read_buffer+4,0x00f00000); // ? + break; + case 0x0100: + // second dword third dword + dword_write_le(read_buffer+4,5); // game data loading phase + dword_write_le(read_buffer+8,0); // completion % + break; + case 0x0101: + // third word fourth word + word_write_le(read_buffer+4,0xca); // ? + word_write_le(read_buffer+6,0xcb); // ? + break; + case 0x0102: + // second dword + dword_write_le(read_buffer+4,0); // bit 16 develop. mode + break; + case 0x0103: + // dwords 1 3 4 + memcpy(read_buffer+4,"-abc-abc12345678",16); // ? + break; + } + // clear + write_buffer[0]=write_buffer[1]=write_buffer[2]=write_buffer[3]=0; + // irq 10 active + chihiro_devs.pic8259_2->ir2_w(1); +} + +UINT8 *chihiro_state::baseboard_ide_dimmboard(UINT32 lba) +{ + // return pointer to memory containing decrypted gdrom data (contains an image of a fatx partition) + if (chihiro_devs.dimmboard != NULL) + return dimm_board_memory+lba*512; + return NULL; +} + /* * PIC & PIT */ @@ -2747,14 +3342,16 @@ IRQ_CALLBACK_MEMBER(chihiro_state::irq_callback) { r = chihiro_devs.pic8259_1->acknowledge(); } + if (debug_irq_active) + debug_generate_irq(debug_irq_number,false); return r; } WRITE_LINE_MEMBER(chihiro_state::chihiro_pit8254_out0_changed) { - if ( machine().device("pic8259_1") ) + if ( chihiro_devs.pic8259_1 ) { - machine().device("pic8259_1")->ir0_w(state); + chihiro_devs.pic8259_1->ir0_w(state); } } @@ -2786,8 +3383,6 @@ static const struct pit8253_interface chihiro_pit8254_config = * SMbus devices */ -static UINT8 pic16lc_buffer[0xff]; - int smbus_callback_pic16lc(chihiro_state &chs,int command,int rw,int data) { return chs.smbus_pic16lc(command, rw, data); @@ -2865,17 +3460,6 @@ int chihiro_state::smbus_eeprom(int command,int rw,int data) * SMbus controller */ -struct smbus_state { - int status; - int control; - int address; - int data; - int command; - int rw; - int (*devices[128])(chihiro_state &chs,int command,int rw,int data); - UINT32 words[256/4]; -} smbusst; - void chihiro_state::smbus_register_device(int address,int (*handler)(chihiro_state &chs,int command,int rw,int data)) { if (address < 128) @@ -2937,11 +3521,42 @@ WRITE32_MEMBER( chihiro_state::smbus_w ) smbusst.command = data; } +READ32_MEMBER( chihiro_state::mediaboard_r ) +{ + UINT32 r; + + logerror("I/O port read %04x mask %08X\n",offset*4+0x4000,mem_mask); + r=0; + if ((offset == 7) && ACCESSING_BITS_16_31) + r=0x10000000; + if ((offset == 8) && ACCESSING_BITS_0_15) + r=0x000000a0; + if ((offset == 8) && ACCESSING_BITS_16_31) + r=0x42580000; + if ((offset == 9) && ACCESSING_BITS_0_15) + r=0x00004d41; + if ((offset == 0x3c) && ACCESSING_BITS_0_15) + r=0x00000000; // bits 15-0 0 if media board present + if ((offset == 0x3d) && ACCESSING_BITS_0_15) + r=0x00000002; // bits 3-0 size of dimm board memory. Must be 2 + return r; +} + +WRITE32_MEMBER( chihiro_state::mediaboard_w ) +{ + logerror("I/O port write %04x mask %08X value %08X\n",offset*4+0x4000,mem_mask,data); + // irq 10 + if ((offset == 0x38) && ACCESSING_BITS_8_15) + chihiro_devs.pic8259_2->ir2_w(0); +} + static ADDRESS_MAP_START( xbox_map, AS_PROGRAM, 32, chihiro_state ) AM_RANGE(0x00000000, 0x07ffffff) AM_RAM // 128 megabytes AM_RANGE(0xf0000000, 0xf0ffffff) AM_RAM AM_RANGE(0xfd000000, 0xfdffffff) AM_RAM AM_READWRITE(geforce_r, geforce_w) AM_RANGE(0xfed00000, 0xfed003ff) AM_READWRITE(usbctrl_r, usbctrl_w) + AM_RANGE(0xfe800000, 0xfe85ffff) AM_READWRITE(audio_apu_r, audio_apu_w) + AM_RANGE(0xfec00000, 0xfec001ff) AM_READWRITE(audio_ac93_r, audio_ac93_w) AM_RANGE(0xff000000, 0xffffffff) AM_ROM AM_REGION("bios", 0) AM_MIRROR(0x00f80000) ADDRESS_MAP_END @@ -2951,6 +3566,7 @@ static ADDRESS_MAP_START(xbox_map_io, AS_IO, 32, chihiro_state ) AM_RANGE(0x00a0, 0x00a3) AM_DEVREADWRITE8("pic8259_2", pic8259_device, read, write, 0xffffffff) AM_RANGE(0x01f0, 0x01f7) AM_DEVREADWRITE("ide", bus_master_ide_controller_device, read_cs0, write_cs0) AM_RANGE(0x0cf8, 0x0cff) AM_DEVREADWRITE("pcibus", pci_bus_legacy_device, read, write) + AM_RANGE(0x4000, 0x40ff) AM_READWRITE(mediaboard_r, mediaboard_w) AM_RANGE(0x8000, 0x80ff) AM_READWRITE(dummy_r, dummy_w) AM_RANGE(0xc000, 0xc0ff) AM_READWRITE(smbus_r, smbus_w) AM_RANGE(0xff60, 0xff67) AM_DEVREADWRITE("ide", bus_master_ide_controller_device, bmdma_r, bmdma_w) @@ -2964,7 +3580,7 @@ void chihiro_state::machine_start() nvidia_nv2a=auto_alloc(machine(), nv2a_renderer(machine())); memset(pic16lc_buffer,0,sizeof(pic16lc_buffer)); pic16lc_buffer[0]='B'; - pic16lc_buffer[4]=2; // A/V connector, 2=vga + pic16lc_buffer[4]=0; // A/V connector, 2=vga smbus_register_device(0x10,smbus_callback_pic16lc); smbus_register_device(0x45,smbus_callback_cx25871); smbus_register_device(0x54,smbus_callback_eeprom); @@ -2972,8 +3588,29 @@ void chihiro_state::machine_start() chihiro_devs.pic8259_1 = machine().device( "pic8259_1" ); chihiro_devs.pic8259_2 = machine().device( "pic8259_2" ); chihiro_devs.ide = machine().device( "ide" ); + chihiro_devs.dimmboard=machine().device("rom_board"); + if (chihiro_devs.dimmboard != NULL) { + dimm_board_memory=chihiro_devs.dimmboard->memory(dimm_board_memory_size); + } + apust.space=&machine().firstcpu->space(); + apust.timer=machine().scheduler().timer_alloc(timer_expired_delegate(FUNC(chihiro_state::audio_apu_timer),this),(void *)"APU Timer"); + apust.timer->enable(false); if (machine().debug_flags & DEBUG_FLAG_ENABLED) debug_console_register_command(machine(),"chihiro",CMDFLAG_NONE,0,1,4,chihiro_debug_commands); + usbhack_counter=0; + // savestates + save_item(NAME(debug_irq_active)); + save_item(NAME(debug_irq_number)); + save_item(NAME(smbusst.status)); + save_item(NAME(smbusst.control)); + save_item(NAME(smbusst.address)); + save_item(NAME(smbusst.data)); + save_item(NAME(smbusst.command)); + save_item(NAME(smbusst.rw)); + save_item(NAME(smbusst.words)); + save_item(NAME(pic16lc_buffer)); + save_item(NAME(usbhack_counter)); + nvidia_nv2a->savestate_items(); } static SLOT_INTERFACE_START(ide_baseboard) @@ -2994,6 +3631,10 @@ static MACHINE_CONFIG_START( chihiro_base, chihiro_state ) MCFG_PCI_BUS_LEGACY_DEVICE(1, "HUB Interface - ISA Bridge", dummy_pci_r, dummy_pci_w) MCFG_PCI_BUS_LEGACY_DEVICE(2, "OHCI USB Controller 1", dummy_pci_r, dummy_pci_w) MCFG_PCI_BUS_LEGACY_DEVICE(3, "OHCI USB Controller 2", dummy_pci_r, dummy_pci_w) + MCFG_PCI_BUS_LEGACY_DEVICE(4, "MCP Networking Adapter", dummy_pci_r, dummy_pci_w) + MCFG_PCI_BUS_LEGACY_DEVICE(5, "MCP APU", dummy_pci_r, dummy_pci_w) + MCFG_PCI_BUS_LEGACY_DEVICE(6, "AC`97 Audio Codec Interface", dummy_pci_r, dummy_pci_w) + MCFG_PCI_BUS_LEGACY_DEVICE(9, "IDE Controller", dummy_pci_r, dummy_pci_w) MCFG_PCI_BUS_LEGACY_DEVICE(30, "AGP Host to PCI Bridge", dummy_pci_r, dummy_pci_w) MCFG_PCI_BUS_LEGACY_ADD("agpbus", 1) MCFG_PCI_BUS_LEGACY_SIBLING("pcibus") @@ -3014,12 +3655,11 @@ static MACHINE_CONFIG_START( chihiro_base, chihiro_state ) MCFG_SCREEN_UPDATE_DRIVER(chihiro_state,screen_update_callback) MCFG_SCREEN_VBLANK_DRIVER(chihiro_state,vblank_callback) - MCFG_PALETTE_LENGTH(65536) MACHINE_CONFIG_END static MACHINE_CONFIG_DERIVED( chihirogd, chihiro_base ) - MCFG_NAOMI_GDROM_BOARD_ADD("rom_board", ":gdrom", "pic", NULL, NOOP) + MCFG_NAOMI_GDROM_BOARD_ADD("rom_board", ":gdrom", ":pic", NULL, NOOP) MACHINE_CONFIG_END #define ROM_LOAD16_WORD_SWAP_BIOS(bios,name,offset,length,hash) \ @@ -3038,6 +3678,7 @@ MACHINE_CONFIG_END ROM_LOAD16_WORD_SWAP_BIOS( 0, "ic11_24lc024.bin", 0x202000, 0x80, CRC(8dc8374e) SHA1(cc03a0650bfac4bf6cb66e414bbef121cba53efe) ) \ ROM_LOAD16_WORD_SWAP_BIOS( 0, "pc20_g24lc64.bin", 0x202080, 0x2000, CRC(7742ab62) SHA1(82dad6e2a75bab4a4840dc6939462f1fb9b95101) ) \ ROM_LOAD16_WORD_SWAP_BIOS( 0, "ver1305.bin", 0x204080, 0x200000, CRC(a738ea1c) SHA1(45d94d0c39be1cb3db9fab6610a88a550adda4e9) ) + ROM_START( chihiro ) CHIHIRO_BIOS @@ -3253,7 +3894,7 @@ GAME( 2002, chihiro, 0, chihiro_base, chihiro, driver_device, 0, ROT0, "S GAME( 2002, hotd3, chihiro, chihirogd, chihiro, driver_device, 0, ROT0, "Sega", "The House of the Dead III (GDX-0001)", GAME_NO_SOUND|GAME_NOT_WORKING ) GAME( 2003, crtaxihr, chihiro, chihirogd, chihiro, driver_device, 0, ROT0, "Sega", "Crazy Taxi High Roller (Rev B) (GDX-0002B)", GAME_NO_SOUND|GAME_NOT_WORKING ) GAME( 2003, vcop3, chihiro, chihirogd, chihiro, driver_device, 0, ROT0, "Sega", "Virtua Cop 3 (Rev A) (GDX-0003A)", GAME_NO_SOUND|GAME_NOT_WORKING ) -GAME( 2003, outr2, chihiro, chihirogd, chihiro, driver_device, 0, ROT0, "Sega", "Out Run 2 (Rev A) (GDX-0004A)", GAME_NO_SOUND|GAME_NOT_WORKING ) +GAME( 2003, outr2, chihiro, chihirogd, chihiro, driver_device, 0, ROT0, "Sega", "Out Run 2 (Rev A) (GDX-0004A)", GAME_NO_SOUND|GAME_NOT_WORKING|GAME_SUPPORTS_SAVE ) GAME( 2004, mj2, chihiro, chihirogd, chihiro, driver_device, 0, ROT0, "Sega", "Sega Network Taisen Mahjong MJ 2 (Rev C) (GDX-0006C)", GAME_NO_SOUND|GAME_NOT_WORKING ) GAME( 2004, ollie, chihiro, chihirogd, chihiro, driver_device, 0, ROT0, "Sega", "Ollie King (GDX-0007)", GAME_NO_SOUND|GAME_NOT_WORKING ) GAME( 2004, wangmid, chihiro, chihirogd, chihiro, driver_device, 0, ROT0, "Namco", "Wangan Midnight Maximum Tune (Export) (Rev B) (GDX-0009B)", GAME_NO_SOUND|GAME_NOT_WORKING ) diff --git a/src/mame/machine/naomigd.h b/src/mame/machine/naomigd.h index b315b02055a..028b62a388e 100644 --- a/src/mame/machine/naomigd.h +++ b/src/mame/machine/naomigd.h @@ -14,6 +14,8 @@ public: static void static_set_tags(device_t &device, const char *_image_tag, const char *_pic_tag); + UINT8 *memory(UINT32 &size) { size = dimm_data_size; return dimm_data; } + protected: virtual void device_start(); virtual void device_reset();