From a81cb8469e8f3548205dc0add226365fdddce59c Mon Sep 17 00:00:00 2001 From: yz70s Date: Thu, 14 Mar 2019 20:29:23 +0100 Subject: [PATCH] i386: add a simple cache to the athlon xp processor (nw) . A single 64K 2-way set associative cache Used for both instructions and data Enabled only for addresses in the first megabyte Works always in writeback mode . It is needed by the nforce motherboard bios that uses it to simulate a block of ram at address d0000 before ddr ram is configured --- scripts/src/cpu.lua | 1 + src/devices/cpu/i386/cache.h | 258 +++++++++++++++++++++++++++++ src/devices/cpu/i386/cpuidmsrs.hxx | 12 ++ src/devices/cpu/i386/i386.cpp | 225 +++++++++++++++++++++++++ src/devices/cpu/i386/i386.h | 25 +++ 5 files changed, 521 insertions(+) create mode 100644 src/devices/cpu/i386/cache.h diff --git a/scripts/src/cpu.lua b/scripts/src/cpu.lua index 631af0dbcff..090093df856 100644 --- a/scripts/src/cpu.lua +++ b/scripts/src/cpu.lua @@ -1087,6 +1087,7 @@ if (CPUS["I386"]~=null) then files { MAME_DIR .. "src/devices/cpu/i386/i386.cpp", MAME_DIR .. "src/devices/cpu/i386/i386.h", + MAME_DIR .. "src/devices/cpu/i386/cache.h", MAME_DIR .. "src/devices/cpu/i386/cycles.h", MAME_DIR .. "src/devices/cpu/i386/i386op16.hxx", MAME_DIR .. "src/devices/cpu/i386/i386op32.hxx", diff --git a/src/devices/cpu/i386/cache.h b/src/devices/cpu/i386/cache.h new file mode 100644 index 00000000000..548a1fb9f85 --- /dev/null +++ b/src/devices/cpu/i386/cache.h @@ -0,0 +1,258 @@ +// license:BSD-3-Clause +// copyright-holders:Samuele Zannoli + +#ifndef MAME_CPU_I386_CACHE_H +#define MAME_CPU_I386_CACHE_H + +#pragma once + +/* To test it outside of Mame +#include + +typedef unsigned char u8; +typedef unsigned int u32; +*/ + +enum { + CacheLineBytes16 = 4, + CacheLineBytes32 = 5, + CacheLineBytes64 = 6, + CacheLineBytes128 = 7, + CacheLineBytes256 = 8, +}; + +enum { + CacheDirectMapped = 0, + Cache2Way = 1, + Cache4Way = 2, + Cache8Way = 3, + Cache16Way = 4 +}; + +enum { + CacheRead = 0, + CacheWrite = 1 +}; + +template +class cpucache { +public: + // Constructor + cpucache(); + // Reset the cache + void reset(); + // Find the cacheline containing data at address + template u8* search(u32 address); + // Allocate a cacheline for data at address + template bool allocate(u32 address, u8 **data); + // Get the address where the cacheline data should be written back to + u32 old(); + // Get the address of the first byte of the cacheline that contains data at address + u32 base(u32 address); + // Compose the cacheline parameters into an address + u32 address(u32 tag, u32 set, u32 offset); + // Get the data of the first cacheline marked as dirty + u8* first_dirty(u32 &base, bool clean); + // Get the data of the next cacheline marked as dirty + u8* next_dirty(u32 &base, bool clean); + +private: + static const int Ways = 1 << WayBits; + static const int LineBytes = 1 << LineBits; + static const int Sets = 1 << SetBits; + static const u32 LineMask = (1 << LineBits) - 1; + static const u32 SetMask = ((1 << SetBits) - 1) << LineBits; + static const u32 WayMask = (1 << WayBits) - 1; + static const int TagShift = LineBits + SetBits; + + struct cacheline { + u8 data[LineBytes]; + bool allocated; + bool dirty; + u32 tag; + u32 debug_address; + }; + + struct cacheset { + cacheline lines[Ways]; + int nextway; + }; + + cacheset sets[Sets]; + u32 writeback_base; + int last_set; + int last_way; +}; + +template +cpucache::cpucache() +{ + reset(); +} + +template +void cpucache::reset() +{ + for (int s = 0; s < Sets; s++) + for (int w = 0; w < Ways; w++) + { + sets[s].nextway = 0; + sets[s].lines[w].allocated = false; + sets[s].lines[w].dirty = false; + sets[s].lines[w].debug_address = 0; + } + last_set = -1; + last_way = -1; +} + +template +template +u8* cpucache::search(u32 address) +{ + const int addresset = (address & SetMask) >> LineBits; + const int addrestag = address >> TagShift; + + for (int w = 0; w < Ways; w++) + if ((sets[addresset].lines[w].allocated) && (sets[addresset].lines[w].tag == addrestag)) + { + if (ReadWrite != 0) + sets[addresset].lines[w].dirty = true; + return sets[addresset].lines[w].data; + } + return nullptr; +} + +template +template +bool cpucache::allocate(u32 address, u8 **data) +{ + const int addresset = (address & SetMask) >> LineBits; + const int addrestag = address >> TagShift; + const int victimway = sets[addresset].nextway; + bool old_allocated, old_dirty; + bool ret; + + sets[addresset].nextway = (victimway + 1) & WayMask; // decide wich way will be allocated next + old_allocated = sets[addresset].lines[victimway].allocated; + old_dirty = sets[addresset].lines[victimway].dirty; + writeback_base = (sets[addresset].lines[victimway].tag << TagShift) | (address & SetMask); + sets[addresset].lines[victimway].tag = addrestag; + sets[addresset].lines[victimway].allocated = true; + if (ReadWrite == 0) + sets[addresset].lines[victimway].dirty = false; // caller must write back the cacheline if told so + else + sets[addresset].lines[victimway].dirty = true; // line is allocated to write into it + *data = sets[addresset].lines[victimway].data; + sets[addresset].lines[victimway].debug_address = address; + ret = old_allocated; // ret = old_allocated && old_dirty + if (!old_dirty) + ret = false; + return ret; // true if caller must write back the cacheline +} + +template +u32 cpucache::old() +{ + return writeback_base; +} + +template +u32 cpucache::base(u32 address) +{ + return address & ~LineMask; +} + +template +u32 cpucache::address(u32 tag, u32 set, u32 offset) +{ + return (tag << TagShift) | (set << LineBits) | offset; +} + +template +u8* cpucache::first_dirty(u32 &base, bool clean) +{ + for (int s = 0; s < Sets; s++) + for (int w = 0; w < Ways; w++) + if (sets[s].lines[w].dirty == true) + { + if (clean) + sets[s].lines[w].dirty = false; + last_set = s; + last_way = w; + base = address(sets[s].lines[w].tag, s, 0); + return sets[s].lines[w].data; + } + return nullptr; +} + +template +u8* cpucache::next_dirty(u32 &base, bool clean) +{ + if (last_set < 0) + return nullptr; + while (true) + { + last_way++; + if (last_way == Ways) + { + last_way = 0; + last_set++; + if (last_set == Sets) + { + last_set = -1; + last_way = -1; + return nullptr; + } + } + if (sets[last_set].lines[last_way].dirty == true) + { + if (clean) + sets[last_set].lines[last_way].dirty = false; + base = address(sets[last_set].lines[last_way].tag, last_set, 0); + return sets[last_set].lines[last_way].data; + } + } +} + +#endif + +/* To test it outside of Mame +const int memorysize = 256 * 1024; +u8 memory[memorysize]; + +void readline(u8 *data, u32 address) +{ + for (int n = 0; n < 64; n++) + data[n] = memory[address + n]; +} + +void writeline(u8 *data, u32 address) +{ + for (int n = 0; n < 64; n++) + memory[address + n] = data[n]; +} + +void cache_tester() +{ + cpucache<18, 8, 6, 2> cache; + bool r; + u8 *data; + int address; + u8 value; + + for (int n = 0; n < memorysize; n++) + memory[n] = 0xaa ^ n; + address = std::rand() & (memorysize - 1); + r = cache.search(address, &data); + if (r == false) + { + r = cache.allocate(address, &data); + if (r == true) + writeline(data, cache.base(address)); + readline(data, cache.base(address)); + } + value = data[address & 63]; + if (value != memory[address]) + printf("Error reading address %d\n\r", address); +} +*/ diff --git a/src/devices/cpu/i386/cpuidmsrs.hxx b/src/devices/cpu/i386/cpuidmsrs.hxx index 827050174e3..842ff85b4b2 100644 --- a/src/devices/cpu/i386/cpuidmsrs.hxx +++ b/src/devices/cpu/i386/cpuidmsrs.hxx @@ -314,12 +314,15 @@ uint64_t athlonxp_device::opcode_rdmsr(bool &valid_msr) break; case 0x250: // MTRRfix64K_00000 // 8 bits for each 64k block starting at address 0 + ret = m_msr_mtrrfix[0]; break; case 0x258: // MTRRfix16K_80000 // 8 bits for each 16k block starting at address 0x80000 + ret = m_msr_mtrrfix[1]; break; case 0x259: // MTRRfix16K_A0000 // 8 bits for each 16k block starting at address 0xa0000 + ret = m_msr_mtrrfix[2]; break; case 0x268: // MTRRfix4K_C0000 case 0x269: // MTRRfix4K_C8000 @@ -330,6 +333,7 @@ uint64_t athlonxp_device::opcode_rdmsr(bool &valid_msr) case 0x26e: // MTRRfix4K_F0000 case 0x26f: // MTRRfix4K_F8000 // 8 bits for each 4k block + ret = m_msr_mtrrfix[3 + offset - 0x268]; break; case 0x400: // MC0_CTL break; @@ -399,10 +403,16 @@ void athlonxp_device::opcode_wrmsr(uint64_t data, bool &valid_msr) case 0x2ff: // MTRRdefType break; case 0x250: // MTRRfix64K_00000 + m_msr_mtrrfix[0] = data; + parse_mtrrfix(data, 0, 64); break; case 0x258: // MTRRfix16K_80000 + m_msr_mtrrfix[1] = data; + parse_mtrrfix(data, 0x80000, 16); break; case 0x259: // MTRRfix16K_A0000 + m_msr_mtrrfix[2] = data; + parse_mtrrfix(data, 0xa0000, 16); break; case 0x268: // MTRRfix4K_C0000-F8000 case 0x269: @@ -412,6 +422,8 @@ void athlonxp_device::opcode_wrmsr(uint64_t data, bool &valid_msr) case 0x26d: case 0x26e: case 0x26f: + m_msr_mtrrfix[3 + offset - 0x268] = data; + parse_mtrrfix(data, 0xc0000 + (offset - 0x268) * 0x8000, 4); break; case 0x400: // MC0_CTL break; diff --git a/src/devices/cpu/i386/i386.cpp b/src/devices/cpu/i386/i386.cpp index fdc0561a1ab..f372dba1239 100644 --- a/src/devices/cpu/i386/i386.cpp +++ b/src/devices/cpu/i386/i386.cpp @@ -4721,6 +4721,10 @@ void athlonxp_device::device_reset() m_cpuid_id2 = ('D' << 24) | ('M' << 16) | ('A' << 8) | 'c'; // cAMD memset(m_processor_name_string, 0, 48); strcpy((char *)m_processor_name_string, "AMD Athlon(tm) Processor"); + for (int n = 0; n < 11; n++) + m_msr_mtrrfix[n] = 0; + for (int n = 0; n < (1024 / 4); n++) + m_memory_ranges_1m[n] = 0; // change the 0 to 6 to test the cache just after reset m_cpuid_max_input_value_eax = 0x01; m_cpu_version = REG32(EDX); @@ -4731,6 +4735,227 @@ void athlonxp_device::device_reset() CHANGE_PC(m_eip); } +void athlonxp_device::parse_mtrrfix(u64 mtrr, offs_t base, int kblock) +{ + int nb = kblock / 4; + int range = (int)(base >> 12); // base must never be higher than 1 megabyte + + for (int n = 0; n < 8; n++) + { + uint8_t type = mtrr & 0xff; + + for (int b = 0; b < nb; b++) + { + m_memory_ranges_1m[range] = type; + range++; + } + mtrr = mtrr >> 8; + } +} + +int athlonxp_device::check_cacheable(offs_t address) +{ + offs_t block; + int disabled; + + disabled = 0; + if (m_cr[0] & (1 << 30)) + disabled = 128; + if (address >= 0x100000) + return disabled; + block = address >> 12; + return m_memory_ranges_1m[block] | disabled; +} + +template +dt athlonxp_device::opcode_read_cache(offs_t address) +{ + int mode = check_cacheable(address); + bool nocache = false; + u8 *data; + + if ((mode & 7) == 0) + nocache = true; + if (mode & 1) + nocache = true; + if (nocache == false) + { + int offset = (address & 63) ^ xorle; + data = cache.search(address); + if (data) + return *(dt *)(data + offset); + if (!(mode & 128)) + { + bool dirty = cache.allocate(address, &data); + address = cache.base(address); + if (dirty) + { + offs_t old_address = cache.old(); + + for (int w = 0; w < 64; w += 4) + macache32->write_dword(old_address + w, *(u32 *)(data + w)); + } + for (int r = 0; r < 64; r += 4) + *(u32 *)(data + r) = macache32->read_dword(address + r); + return *(dt *)(data + offset); + } + else + { + if (sizeof(dt) == 1) + return macache32->read_byte(address); + else if (sizeof(dt) == 2) + return macache32->read_word(address); + else + return macache32->read_dword(address); + } + } + else + { + if (sizeof(dt) == 1) + return macache32->read_byte(address); + else if (sizeof(dt) == 2) + return macache32->read_word(address); + else + return macache32->read_dword(address); + } +} + +template +dt athlonxp_device::program_read_cache(offs_t address) +{ + int mode = check_cacheable(address); + bool nocache = false; + u8 *data; + + if ((mode & 7) == 0) + nocache = true; + if (mode & 1) + nocache = true; + if (nocache == false) + { + int offset = (address & 63) ^ xorle; + data = cache.search(address); + if (data) + return *(dt *)(data + offset); + if (!(mode & 128)) + { + bool dirty = cache.allocate(address, &data); + address = cache.base(address); + if (dirty) + { + offs_t old_address = cache.old(); + + for (int w = 0; w < 64; w += 4) + m_program->write_dword(old_address + w, *(u32 *)(data + w)); + } + for (int r = 0; r < 64; r += 4) + *(u32 *)(data + r) = m_program->read_dword(address + r); + return *(dt *)(data + offset); + } + else + { + if (sizeof(dt) == 1) + return m_program->read_byte(address); + else if (sizeof(dt) == 2) + return m_program->read_word(address); + else + return m_program->read_dword(address); + } + } + else + { + if (sizeof(dt) == 1) + return m_program->read_byte(address); + else if (sizeof(dt) == 2) + return m_program->read_word(address); + else + return m_program->read_dword(address); + } +} + +template +void athlonxp_device::program_write_cache(offs_t address, dt data) +{ + int mode = check_cacheable(address); + bool nocache = false; + u8 *dataw; + + if ((mode & 7) == 0) + nocache = true; + if (mode & 1) + nocache = true; + if (nocache == false) + { + int offset = (address & 63) ^ xorle; + dataw = cache.search(address); + if (dataw) + { + *(dt *)(dataw + offset) = data; + return; + } + if (!(mode & 128)) + { + bool dirty = cache.allocate(address, &dataw); + address = cache.base(address); + if (dirty) + { + offs_t old_address = cache.old(); + + for (int w = 0; w < 64; w += 4) + m_program->write_dword(old_address + w, *(u32 *)(dataw + w)); + } + for (int r = 0; r < 64; r += 4) + *(u32 *)(dataw + r) = m_program->read_dword(address + r); + *(dt *)(dataw + offset) = data; + } + else + { + if (sizeof(dt) == 1) + m_program->write_byte(address, data); + else if (sizeof(dt) == 2) + m_program->write_word(address, data); + else + m_program->write_dword(address, data); + } + } + else + { + if (sizeof(dt) == 1) + m_program->write_byte(address, data); + else if (sizeof(dt) == 2) + m_program->write_word(address, data); + else + m_program->write_dword(address, data); + } +} + +void athlonxp_device::invalidate_cache(bool writeback) +{ + u32 base; + u8 *data; + + data = cache.first_dirty(base, true); + while (data != nullptr) + { + if (writeback) + for (int w = 0; w < 64; w += 4) + m_program->write_dword(base + w, *(u32 *)(data + w)); + data = cache.next_dirty(base, true); + } + cache.reset(); +} + +void athlonxp_device::opcode_invd() +{ + invalidate_cache(false); +} + +void athlonxp_device::opcode_wbinvd() +{ + invalidate_cache(true); +} + + /*****************************************************************************/ /* Intel Pentium 4 */ diff --git a/src/devices/cpu/i386/i386.h b/src/devices/cpu/i386/i386.h index 52e3c4fec8f..d8422780710 100644 --- a/src/devices/cpu/i386/i386.h +++ b/src/devices/cpu/i386/i386.h @@ -16,6 +16,7 @@ #include "divtlb.h" #include "i386dasm.h" +#include "cache.h" #define INPUT_LINE_A20 1 #define INPUT_LINE_SMI 2 @@ -1631,10 +1632,34 @@ protected: virtual void opcode_cpuid() override; virtual uint64_t opcode_rdmsr(bool &valid_msr) override; virtual void opcode_wrmsr(uint64_t data, bool &valid_msr) override; + virtual void opcode_invd() override; + virtual void opcode_wbinvd() override; virtual void device_start() override; virtual void device_reset() override; + virtual u8 mem_pr8(offs_t address) override { return opcode_read_cache(address); } + virtual u16 mem_pr16(offs_t address) override { return opcode_read_cache(address); } + virtual u32 mem_pr32(offs_t address) override { return opcode_read_cache(address); } + virtual u8 mem_prd8(offs_t address) override { return program_read_cache(address); } + virtual u16 mem_prd16(offs_t address) override { return program_read_cache(address); } + virtual u32 mem_prd32(offs_t address) override { return program_read_cache(address); } + virtual void mem_pwd8(offs_t address, u8 data) override { program_write_cache(address, data); } + virtual void mem_pwd16(offs_t address, u16 data) override { program_write_cache(address, data); } + virtual void mem_pwd32(offs_t address, u32 data) override { program_write_cache(address, data); } + +private: + void parse_mtrrfix(u64 mtrr, offs_t base, int kblock); + int check_cacheable(offs_t address); + void invalidate_cache(bool writeback); + + template dt opcode_read_cache(offs_t address); + template dt program_read_cache(offs_t address); + template void program_write_cache(offs_t address, dt data); + uint8_t m_processor_name_string[48]; + uint64_t m_msr_mtrrfix[11]; + uint8_t m_memory_ranges_1m[1024 / 4]; + cpucache<17, 9, Cache2Way, CacheLineBytes64> cache; // 512 sets, 2 ways (cachelines per set), 64 bytes per cacheline };