Slight DRC code generation optimisation:

cpu/drcbex64.cpp: Consider a RIP-relative LEA for generating 64-bit
values (7-byte instruction versus a 10-byte MOV instruction).  Seems to
work pretty well for heap pointers.

cpu/drcbearm64.cpp: Changed the near cache base pointer offset from 0x80
to 0x100.  The value 0x80 was copied from the x86-64 back-end where it
was choses to allow an 8-bit signed displacement to reach as much of the
top of the near cache as possible.  However, AArch64 use 9-bit signed
displacements, so a quarter of the range was being wasted.

cpu/drcbearm64.cpp: Consider an adrp/add sequence for generating
immediate values as well as memory references.
This commit is contained in:
Vas Crabb 2025-01-17 00:46:34 +11:00
parent 12c491b13f
commit 375877823c
2 changed files with 24 additions and 4 deletions

View File

@ -47,10 +47,10 @@ FP -> SP + 0x00 previous FP
Stack layout in nested generated code subroutine call frame:
SP -> SP + 0x00 previous FP
SP -> SP + 0x00 saved FP
SP + 0x08 return address
...
FP - 0x10 previous FP
FP - 0x10 saved FP
FP - 0x08 return address
FP -> FP + 0x00 previous FP
FP + 0x08 top-level return address
@ -492,6 +492,20 @@ void drcbe_arm64::get_imm_relative(a64::Assembler &a, const a64::Gp &reg, const
return;
}
const uint64_t pagebase = codeoffs & ~make_bitmask<uint64_t>(12);
const int64_t pagerel = (int64_t)ptr - pagebase;
if (is_valid_immediate_signed(pagerel, 33))
{
const uint64_t targetpage = (uint64_t)ptr & ~make_bitmask<uint64_t>(12);
const uint64_t pageoffs = (uint64_t)ptr & util::make_bitmask<uint64_t>(12);
a.adrp(reg, targetpage);
if (pageoffs != 0)
a.add(reg, reg, pageoffs);
return;
}
a.mov(reg, ptr);
}
@ -953,7 +967,7 @@ drcbe_arm64::drcbe_arm64(drcuml_state &drcuml, device_t &device, drc_cache &cach
, m_entry(nullptr)
, m_exit(nullptr)
, m_nocode(nullptr)
, m_baseptr(cache.near() + 0x80)
, m_baseptr(cache.near() + 0x100)
, m_near(*(near_state *)cache.alloc_near(sizeof(m_near)))
{
m_near.emulated_flags = 0;

View File

@ -1287,7 +1287,13 @@ void drcbe_x64::mov_r64_imm(Assembler &a, Gp const &reg, uint64_t const imm)
else if (s32(imm) == imm)
a.mov(reg.r64(), s32(imm));
else
a.mov(reg.r64(), imm);
{
const int64_t delta = imm - (a.code()->baseAddress() + a.offset() + 7);
if (short_immediate(delta))
a.lea(reg.r64(), ptr(rip, delta));
else
a.mov(reg.r64(), imm);
}
}