i386: add a simple cache to the athlon xp processor (nw)

.
A single 64K 2-way set associative cache
Used for both instructions and data
Enabled only for addresses in the first megabyte
Works always in writeback mode
.
It is needed by the nforce motherboard bios that uses it to simulate a
block of ram at address d0000 before ddr ram is configured
This commit is contained in:
yz70s 2019-03-14 20:29:23 +01:00
parent f1dacfc337
commit a81cb8469e
5 changed files with 521 additions and 0 deletions

View File

@ -1087,6 +1087,7 @@ if (CPUS["I386"]~=null) then
files {
MAME_DIR .. "src/devices/cpu/i386/i386.cpp",
MAME_DIR .. "src/devices/cpu/i386/i386.h",
MAME_DIR .. "src/devices/cpu/i386/cache.h",
MAME_DIR .. "src/devices/cpu/i386/cycles.h",
MAME_DIR .. "src/devices/cpu/i386/i386op16.hxx",
MAME_DIR .. "src/devices/cpu/i386/i386op32.hxx",

View File

@ -0,0 +1,258 @@
// license:BSD-3-Clause
// copyright-holders:Samuele Zannoli
#ifndef MAME_CPU_I386_CACHE_H
#define MAME_CPU_I386_CACHE_H
#pragma once
/* To test it outside of Mame
#include <cstdlib>
typedef unsigned char u8;
typedef unsigned int u32;
*/
enum {
CacheLineBytes16 = 4,
CacheLineBytes32 = 5,
CacheLineBytes64 = 6,
CacheLineBytes128 = 7,
CacheLineBytes256 = 8,
};
enum {
CacheDirectMapped = 0,
Cache2Way = 1,
Cache4Way = 2,
Cache8Way = 3,
Cache16Way = 4
};
enum {
CacheRead = 0,
CacheWrite = 1
};
template<int TagBits, int SetBits, int WayBits, int LineBits>
class cpucache {
public:
// Constructor
cpucache();
// Reset the cache
void reset();
// Find the cacheline containing data at address
template <int ReadWrite> u8* search(u32 address);
// Allocate a cacheline for data at address
template <int ReadWrite> bool allocate(u32 address, u8 **data);
// Get the address where the cacheline data should be written back to
u32 old();
// Get the address of the first byte of the cacheline that contains data at address
u32 base(u32 address);
// Compose the cacheline parameters into an address
u32 address(u32 tag, u32 set, u32 offset);
// Get the data of the first cacheline marked as dirty
u8* first_dirty(u32 &base, bool clean);
// Get the data of the next cacheline marked as dirty
u8* next_dirty(u32 &base, bool clean);
private:
static const int Ways = 1 << WayBits;
static const int LineBytes = 1 << LineBits;
static const int Sets = 1 << SetBits;
static const u32 LineMask = (1 << LineBits) - 1;
static const u32 SetMask = ((1 << SetBits) - 1) << LineBits;
static const u32 WayMask = (1 << WayBits) - 1;
static const int TagShift = LineBits + SetBits;
struct cacheline {
u8 data[LineBytes];
bool allocated;
bool dirty;
u32 tag;
u32 debug_address;
};
struct cacheset {
cacheline lines[Ways];
int nextway;
};
cacheset sets[Sets];
u32 writeback_base;
int last_set;
int last_way;
};
template<int TagBits, int SetBits, int WayBits, int LineBits>
cpucache<TagBits, SetBits, WayBits, LineBits>::cpucache()
{
reset();
}
template<int TagBits, int SetBits, int WayBits, int LineBits>
void cpucache<TagBits, SetBits, WayBits, LineBits>::reset()
{
for (int s = 0; s < Sets; s++)
for (int w = 0; w < Ways; w++)
{
sets[s].nextway = 0;
sets[s].lines[w].allocated = false;
sets[s].lines[w].dirty = false;
sets[s].lines[w].debug_address = 0;
}
last_set = -1;
last_way = -1;
}
template<int TagBits, int SetBits, int WayBits, int LineBits>
template<int ReadWrite>
u8* cpucache<TagBits, SetBits, WayBits, LineBits>::search(u32 address)
{
const int addresset = (address & SetMask) >> LineBits;
const int addrestag = address >> TagShift;
for (int w = 0; w < Ways; w++)
if ((sets[addresset].lines[w].allocated) && (sets[addresset].lines[w].tag == addrestag))
{
if (ReadWrite != 0)
sets[addresset].lines[w].dirty = true;
return sets[addresset].lines[w].data;
}
return nullptr;
}
template<int TagBits, int SetBits, int WayBits, int LineBits>
template<int ReadWrite>
bool cpucache<TagBits, SetBits, WayBits, LineBits>::allocate(u32 address, u8 **data)
{
const int addresset = (address & SetMask) >> LineBits;
const int addrestag = address >> TagShift;
const int victimway = sets[addresset].nextway;
bool old_allocated, old_dirty;
bool ret;
sets[addresset].nextway = (victimway + 1) & WayMask; // decide wich way will be allocated next
old_allocated = sets[addresset].lines[victimway].allocated;
old_dirty = sets[addresset].lines[victimway].dirty;
writeback_base = (sets[addresset].lines[victimway].tag << TagShift) | (address & SetMask);
sets[addresset].lines[victimway].tag = addrestag;
sets[addresset].lines[victimway].allocated = true;
if (ReadWrite == 0)
sets[addresset].lines[victimway].dirty = false; // caller must write back the cacheline if told so
else
sets[addresset].lines[victimway].dirty = true; // line is allocated to write into it
*data = sets[addresset].lines[victimway].data;
sets[addresset].lines[victimway].debug_address = address;
ret = old_allocated; // ret = old_allocated && old_dirty
if (!old_dirty)
ret = false;
return ret; // true if caller must write back the cacheline
}
template<int TagBits, int SetBits, int WayBits, int LineBits>
u32 cpucache<TagBits, SetBits, WayBits, LineBits>::old()
{
return writeback_base;
}
template<int TagBits, int SetBits, int WayBits, int LineBits>
u32 cpucache<TagBits, SetBits, WayBits, LineBits>::base(u32 address)
{
return address & ~LineMask;
}
template<int TagBits, int SetBits, int WayBits, int LineBits>
u32 cpucache<TagBits, SetBits, WayBits, LineBits>::address(u32 tag, u32 set, u32 offset)
{
return (tag << TagShift) | (set << LineBits) | offset;
}
template<int TagBits, int SetBits, int WayBits, int LineBits>
u8* cpucache<TagBits, SetBits, WayBits, LineBits>::first_dirty(u32 &base, bool clean)
{
for (int s = 0; s < Sets; s++)
for (int w = 0; w < Ways; w++)
if (sets[s].lines[w].dirty == true)
{
if (clean)
sets[s].lines[w].dirty = false;
last_set = s;
last_way = w;
base = address(sets[s].lines[w].tag, s, 0);
return sets[s].lines[w].data;
}
return nullptr;
}
template<int TagBits, int SetBits, int WayBits, int LineBits>
u8* cpucache<TagBits, SetBits, WayBits, LineBits>::next_dirty(u32 &base, bool clean)
{
if (last_set < 0)
return nullptr;
while (true)
{
last_way++;
if (last_way == Ways)
{
last_way = 0;
last_set++;
if (last_set == Sets)
{
last_set = -1;
last_way = -1;
return nullptr;
}
}
if (sets[last_set].lines[last_way].dirty == true)
{
if (clean)
sets[last_set].lines[last_way].dirty = false;
base = address(sets[last_set].lines[last_way].tag, last_set, 0);
return sets[last_set].lines[last_way].data;
}
}
}
#endif
/* To test it outside of Mame
const int memorysize = 256 * 1024;
u8 memory[memorysize];
void readline(u8 *data, u32 address)
{
for (int n = 0; n < 64; n++)
data[n] = memory[address + n];
}
void writeline(u8 *data, u32 address)
{
for (int n = 0; n < 64; n++)
memory[address + n] = data[n];
}
void cache_tester()
{
cpucache<18, 8, 6, 2> cache;
bool r;
u8 *data;
int address;
u8 value;
for (int n = 0; n < memorysize; n++)
memory[n] = 0xaa ^ n;
address = std::rand() & (memorysize - 1);
r = cache.search(address, &data);
if (r == false)
{
r = cache.allocate(address, &data);
if (r == true)
writeline(data, cache.base(address));
readline(data, cache.base(address));
}
value = data[address & 63];
if (value != memory[address])
printf("Error reading address %d\n\r", address);
}
*/

View File

@ -314,12 +314,15 @@ uint64_t athlonxp_device::opcode_rdmsr(bool &valid_msr)
break;
case 0x250: // MTRRfix64K_00000
// 8 bits for each 64k block starting at address 0
ret = m_msr_mtrrfix[0];
break;
case 0x258: // MTRRfix16K_80000
// 8 bits for each 16k block starting at address 0x80000
ret = m_msr_mtrrfix[1];
break;
case 0x259: // MTRRfix16K_A0000
// 8 bits for each 16k block starting at address 0xa0000
ret = m_msr_mtrrfix[2];
break;
case 0x268: // MTRRfix4K_C0000
case 0x269: // MTRRfix4K_C8000
@ -330,6 +333,7 @@ uint64_t athlonxp_device::opcode_rdmsr(bool &valid_msr)
case 0x26e: // MTRRfix4K_F0000
case 0x26f: // MTRRfix4K_F8000
// 8 bits for each 4k block
ret = m_msr_mtrrfix[3 + offset - 0x268];
break;
case 0x400: // MC0_CTL
break;
@ -399,10 +403,16 @@ void athlonxp_device::opcode_wrmsr(uint64_t data, bool &valid_msr)
case 0x2ff: // MTRRdefType
break;
case 0x250: // MTRRfix64K_00000
m_msr_mtrrfix[0] = data;
parse_mtrrfix(data, 0, 64);
break;
case 0x258: // MTRRfix16K_80000
m_msr_mtrrfix[1] = data;
parse_mtrrfix(data, 0x80000, 16);
break;
case 0x259: // MTRRfix16K_A0000
m_msr_mtrrfix[2] = data;
parse_mtrrfix(data, 0xa0000, 16);
break;
case 0x268: // MTRRfix4K_C0000-F8000
case 0x269:
@ -412,6 +422,8 @@ void athlonxp_device::opcode_wrmsr(uint64_t data, bool &valid_msr)
case 0x26d:
case 0x26e:
case 0x26f:
m_msr_mtrrfix[3 + offset - 0x268] = data;
parse_mtrrfix(data, 0xc0000 + (offset - 0x268) * 0x8000, 4);
break;
case 0x400: // MC0_CTL
break;

View File

@ -4721,6 +4721,10 @@ void athlonxp_device::device_reset()
m_cpuid_id2 = ('D' << 24) | ('M' << 16) | ('A' << 8) | 'c'; // cAMD
memset(m_processor_name_string, 0, 48);
strcpy((char *)m_processor_name_string, "AMD Athlon(tm) Processor");
for (int n = 0; n < 11; n++)
m_msr_mtrrfix[n] = 0;
for (int n = 0; n < (1024 / 4); n++)
m_memory_ranges_1m[n] = 0; // change the 0 to 6 to test the cache just after reset
m_cpuid_max_input_value_eax = 0x01;
m_cpu_version = REG32(EDX);
@ -4731,6 +4735,227 @@ void athlonxp_device::device_reset()
CHANGE_PC(m_eip);
}
void athlonxp_device::parse_mtrrfix(u64 mtrr, offs_t base, int kblock)
{
int nb = kblock / 4;
int range = (int)(base >> 12); // base must never be higher than 1 megabyte
for (int n = 0; n < 8; n++)
{
uint8_t type = mtrr & 0xff;
for (int b = 0; b < nb; b++)
{
m_memory_ranges_1m[range] = type;
range++;
}
mtrr = mtrr >> 8;
}
}
int athlonxp_device::check_cacheable(offs_t address)
{
offs_t block;
int disabled;
disabled = 0;
if (m_cr[0] & (1 << 30))
disabled = 128;
if (address >= 0x100000)
return disabled;
block = address >> 12;
return m_memory_ranges_1m[block] | disabled;
}
template <class dt, offs_t xorle>
dt athlonxp_device::opcode_read_cache(offs_t address)
{
int mode = check_cacheable(address);
bool nocache = false;
u8 *data;
if ((mode & 7) == 0)
nocache = true;
if (mode & 1)
nocache = true;
if (nocache == false)
{
int offset = (address & 63) ^ xorle;
data = cache.search<CacheRead>(address);
if (data)
return *(dt *)(data + offset);
if (!(mode & 128))
{
bool dirty = cache.allocate<CacheRead>(address, &data);
address = cache.base(address);
if (dirty)
{
offs_t old_address = cache.old();
for (int w = 0; w < 64; w += 4)
macache32->write_dword(old_address + w, *(u32 *)(data + w));
}
for (int r = 0; r < 64; r += 4)
*(u32 *)(data + r) = macache32->read_dword(address + r);
return *(dt *)(data + offset);
}
else
{
if (sizeof(dt) == 1)
return macache32->read_byte(address);
else if (sizeof(dt) == 2)
return macache32->read_word(address);
else
return macache32->read_dword(address);
}
}
else
{
if (sizeof(dt) == 1)
return macache32->read_byte(address);
else if (sizeof(dt) == 2)
return macache32->read_word(address);
else
return macache32->read_dword(address);
}
}
template <class dt, offs_t xorle>
dt athlonxp_device::program_read_cache(offs_t address)
{
int mode = check_cacheable(address);
bool nocache = false;
u8 *data;
if ((mode & 7) == 0)
nocache = true;
if (mode & 1)
nocache = true;
if (nocache == false)
{
int offset = (address & 63) ^ xorle;
data = cache.search<CacheRead>(address);
if (data)
return *(dt *)(data + offset);
if (!(mode & 128))
{
bool dirty = cache.allocate<CacheRead>(address, &data);
address = cache.base(address);
if (dirty)
{
offs_t old_address = cache.old();
for (int w = 0; w < 64; w += 4)
m_program->write_dword(old_address + w, *(u32 *)(data + w));
}
for (int r = 0; r < 64; r += 4)
*(u32 *)(data + r) = m_program->read_dword(address + r);
return *(dt *)(data + offset);
}
else
{
if (sizeof(dt) == 1)
return m_program->read_byte(address);
else if (sizeof(dt) == 2)
return m_program->read_word(address);
else
return m_program->read_dword(address);
}
}
else
{
if (sizeof(dt) == 1)
return m_program->read_byte(address);
else if (sizeof(dt) == 2)
return m_program->read_word(address);
else
return m_program->read_dword(address);
}
}
template <class dt, offs_t xorle>
void athlonxp_device::program_write_cache(offs_t address, dt data)
{
int mode = check_cacheable(address);
bool nocache = false;
u8 *dataw;
if ((mode & 7) == 0)
nocache = true;
if (mode & 1)
nocache = true;
if (nocache == false)
{
int offset = (address & 63) ^ xorle;
dataw = cache.search<CacheWrite>(address);
if (dataw)
{
*(dt *)(dataw + offset) = data;
return;
}
if (!(mode & 128))
{
bool dirty = cache.allocate<CacheWrite>(address, &dataw);
address = cache.base(address);
if (dirty)
{
offs_t old_address = cache.old();
for (int w = 0; w < 64; w += 4)
m_program->write_dword(old_address + w, *(u32 *)(dataw + w));
}
for (int r = 0; r < 64; r += 4)
*(u32 *)(dataw + r) = m_program->read_dword(address + r);
*(dt *)(dataw + offset) = data;
}
else
{
if (sizeof(dt) == 1)
m_program->write_byte(address, data);
else if (sizeof(dt) == 2)
m_program->write_word(address, data);
else
m_program->write_dword(address, data);
}
}
else
{
if (sizeof(dt) == 1)
m_program->write_byte(address, data);
else if (sizeof(dt) == 2)
m_program->write_word(address, data);
else
m_program->write_dword(address, data);
}
}
void athlonxp_device::invalidate_cache(bool writeback)
{
u32 base;
u8 *data;
data = cache.first_dirty(base, true);
while (data != nullptr)
{
if (writeback)
for (int w = 0; w < 64; w += 4)
m_program->write_dword(base + w, *(u32 *)(data + w));
data = cache.next_dirty(base, true);
}
cache.reset();
}
void athlonxp_device::opcode_invd()
{
invalidate_cache(false);
}
void athlonxp_device::opcode_wbinvd()
{
invalidate_cache(true);
}
/*****************************************************************************/
/* Intel Pentium 4 */

View File

@ -16,6 +16,7 @@
#include "divtlb.h"
#include "i386dasm.h"
#include "cache.h"
#define INPUT_LINE_A20 1
#define INPUT_LINE_SMI 2
@ -1631,10 +1632,34 @@ protected:
virtual void opcode_cpuid() override;
virtual uint64_t opcode_rdmsr(bool &valid_msr) override;
virtual void opcode_wrmsr(uint64_t data, bool &valid_msr) override;
virtual void opcode_invd() override;
virtual void opcode_wbinvd() override;
virtual void device_start() override;
virtual void device_reset() override;
virtual u8 mem_pr8(offs_t address) override { return opcode_read_cache<u8, NATIVE_ENDIAN_VALUE_LE_BE(0, 3)>(address); }
virtual u16 mem_pr16(offs_t address) override { return opcode_read_cache<u16, NATIVE_ENDIAN_VALUE_LE_BE(0, 2)>(address); }
virtual u32 mem_pr32(offs_t address) override { return opcode_read_cache<u32, 0>(address); }
virtual u8 mem_prd8(offs_t address) override { return program_read_cache<u8, NATIVE_ENDIAN_VALUE_LE_BE(0, 3)>(address); }
virtual u16 mem_prd16(offs_t address) override { return program_read_cache<u16, NATIVE_ENDIAN_VALUE_LE_BE(0, 2)>(address); }
virtual u32 mem_prd32(offs_t address) override { return program_read_cache<u32, 0>(address); }
virtual void mem_pwd8(offs_t address, u8 data) override { program_write_cache<u8, NATIVE_ENDIAN_VALUE_LE_BE(0, 3)>(address, data); }
virtual void mem_pwd16(offs_t address, u16 data) override { program_write_cache<u16, NATIVE_ENDIAN_VALUE_LE_BE(0, 2)>(address, data); }
virtual void mem_pwd32(offs_t address, u32 data) override { program_write_cache<u32, 0>(address, data); }
private:
void parse_mtrrfix(u64 mtrr, offs_t base, int kblock);
int check_cacheable(offs_t address);
void invalidate_cache(bool writeback);
template <class dt, offs_t xorle> dt opcode_read_cache(offs_t address);
template <class dt, offs_t xorle> dt program_read_cache(offs_t address);
template <class dt, offs_t xorle> void program_write_cache(offs_t address, dt data);
uint8_t m_processor_name_string[48];
uint64_t m_msr_mtrrfix[11];
uint8_t m_memory_ranges_1m[1024 / 4];
cpucache<17, 9, Cache2Way, CacheLineBytes64> cache; // 512 sets, 2 ways (cachelines per set), 64 bytes per cacheline
};