mirror of
https://github.com/holub/mame
synced 2025-04-21 07:52:35 +03:00
nw, more RSP opts ported from CEN64
This commit is contained in:
parent
b6385b1c6e
commit
ae67c6b0ad
@ -163,6 +163,8 @@ public:
|
||||
void ccfunc_sp_set_status_cb();
|
||||
void ccfunc_unimplemented();
|
||||
|
||||
UINT8* get_dmem() { return m_dmem8; }
|
||||
|
||||
protected:
|
||||
// device-level overrides
|
||||
virtual void device_start();
|
||||
|
@ -178,10 +178,58 @@ const rsp_cop2::vec_helpers_t rsp_cop2::m_vec_helpers = {
|
||||
{ 0x0d0e, 0x030c, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08 },
|
||||
{ 0x0c0d, 0x0203, 0x0001, 0x0607, 0x0405, 0x0c0b, 0x0809, 0x0e0f },
|
||||
{ 0x030c, 0x0102, 0x0700, 0x0506, 0x0b04, 0x090a, 0x0f08, 0x0d0e }
|
||||
},
|
||||
{ // qr_lut
|
||||
{ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff },
|
||||
{ 0xff00, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff },
|
||||
{ 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff },
|
||||
{ 0x0000, 0xff00, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff },
|
||||
|
||||
{ 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
|
||||
{ 0x0000, 0x0000, 0xff00, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff },
|
||||
{ 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff },
|
||||
{ 0x0000, 0x0000, 0x0000, 0xff00, 0xffff, 0xffff, 0xffff, 0xffff },
|
||||
|
||||
{ 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff },
|
||||
{ 0x0000, 0x0000, 0x0000, 0x0000, 0xff00, 0xffff, 0xffff, 0xffff },
|
||||
{ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff },
|
||||
{ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xff00, 0xffff, 0xffff },
|
||||
|
||||
{ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff },
|
||||
{ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xff00, 0xffff },
|
||||
{ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff },
|
||||
{ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xff00 }
|
||||
},
|
||||
{ // bdls_lut - mask to denote which part of the vector to load/store.
|
||||
{ 0x00ff, 0x0000, 0x0000, 0x0000 }, // B
|
||||
{ 0xffff, 0x0000, 0x0000, 0x0000 }, // S
|
||||
{ 0xffff, 0xffff, 0x0000, 0x0000 }, // L
|
||||
{ 0xffff, 0xffff, 0xffff, 0xffff } // D
|
||||
}
|
||||
};
|
||||
|
||||
#ifndef __SSSE3__
|
||||
// TODO: Highly optimized. More of a stopgap measure.
|
||||
static inline rsp_vec_t sse2_pshufb(rsp_vec_t v, const UINT16 *keys)
|
||||
{
|
||||
UINT8 dest[16];
|
||||
UINT8 temp[16];
|
||||
|
||||
_mm_storeu_si128((rsp_vec_t *) temp, v);
|
||||
|
||||
for (UINT32 j = 0; j < 8; j++)
|
||||
{
|
||||
UINT16 key = keys[j];
|
||||
UINT8 key_hi = key >> 8;
|
||||
UINT8 key_lo = key >> 0;
|
||||
|
||||
dest[(j << 1) + 1] = key_hi == 0x80 ? 0x00 : temp[key_hi];
|
||||
dest[(j << 1) + 0] = key_lo == 0x80 ? 0x00 : temp[key_lo];
|
||||
}
|
||||
|
||||
return _mm_loadu_si128((rsp_vec_t *) dest);
|
||||
}
|
||||
|
||||
rsp_vec_t rsp_cop2::vec_load_and_shuffle_operand(const UINT16* src, UINT32 element)
|
||||
{
|
||||
if (element >= 8) // element => 0w ... 7w
|
||||
@ -234,6 +282,292 @@ rsp_vec_t rsp_cop2::vec_load_and_shuffle_operand(const UINT16* src, UINT32 eleme
|
||||
return _mm_shuffle_epi8(operand, key);
|
||||
}
|
||||
#endif
|
||||
//
|
||||
// SSSE3+ accelerated loads for group I. Byteswap big-endian to 2-byte
|
||||
// little-endian vector. Start at vector element offset, discarding any
|
||||
// wraparound as necessary.
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when loads to vector elements must
|
||||
// wraparound. Do we just discard the data, as below, or does the
|
||||
// data effectively get rotated around the edge of the vector?
|
||||
//
|
||||
void rsp_cop2::vec_load_group1(UINT32 addr, UINT32 element, UINT16 *regp, rsp_vec_t reg, rsp_vec_t dqm)
|
||||
{
|
||||
UINT32 offset = addr & 0x7;
|
||||
UINT32 ror = offset - element;
|
||||
|
||||
// Always load in 8-byte chunks to emulate wraparound.
|
||||
rsp_vec_t data;
|
||||
if (offset) {
|
||||
UINT32 aligned_addr_lo = addr & ~0x7;
|
||||
UINT32 aligned_addr_hi = (aligned_addr_lo + 8) & 0xFFF;
|
||||
|
||||
data = _mm_loadl_epi64((rsp_vec_t *) (m_rsp.get_dmem() + aligned_addr_lo));
|
||||
rsp_vec_t temp = _mm_loadl_epi64((rsp_vec_t *) (m_rsp.get_dmem() + aligned_addr_hi));
|
||||
data = _mm_unpacklo_epi64(data, temp);
|
||||
}
|
||||
else
|
||||
{
|
||||
data = _mm_loadl_epi64((rsp_vec_t *) (m_rsp.get_dmem() + addr));
|
||||
}
|
||||
|
||||
// Shift the DQM up to the point where we mux in the data.
|
||||
#ifndef __SSSE3__
|
||||
dqm = sse2_pshufb(dqm, m_vec_helpers.sll_b2l_keys[element]);
|
||||
#else
|
||||
rsp_vec_t ekey = _mm_load_si128((rsp_vec_t *) (m_vec_helpers.sll_b2l_keys[element]));
|
||||
dqm = _mm_shuffle_epi8(dqm, ekey);
|
||||
#endif
|
||||
|
||||
// Align the data to the DQM so we can mask it in.
|
||||
#ifndef __SSSE3__
|
||||
data = sse2_pshufb(data, m_vec_helpers.ror_b2l_keys[ror & 0xF]);
|
||||
#else
|
||||
ekey = _mm_load_si128((rsp_vec_t *) (m_vec_helpers.ror_b2l_keys[ror & 0xF]));
|
||||
data = _mm_shuffle_epi8(data, ekey);
|
||||
#endif
|
||||
|
||||
// Mask and mux in the data.
|
||||
#ifdef __SSE4_1__
|
||||
reg = _mm_blendv_epi8(reg, data, dqm);
|
||||
#else
|
||||
data = _mm_and_si128(dqm, data);
|
||||
reg = _mm_andnot_si128(dqm, reg);
|
||||
reg = _mm_or_si128(data, reg);
|
||||
#endif
|
||||
|
||||
_mm_store_si128((rsp_vec_t *) regp, reg);
|
||||
}
|
||||
|
||||
//
|
||||
// SSSE3+ accelerated loads for group II.
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when loads to vector elements must
|
||||
// wraparound. Do we just discard the data, as below, or does the
|
||||
// data effectively get rotated around the edge of the vector?
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when element != 0.
|
||||
//
|
||||
void rsp_cop2::vec_load_group2(UINT32 addr, UINT32 element, UINT16 *regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type) {
|
||||
UINT32 offset = addr & 0x7;
|
||||
rsp_vec_t data;
|
||||
|
||||
// Always load in 8-byte chunks to emulate wraparound.
|
||||
if (offset) {
|
||||
UINT32 aligned_addr_lo = addr & ~0x7;
|
||||
UINT32 aligned_addr_hi = (aligned_addr_lo + 8) & 0xFFF;
|
||||
UINT64 datalow, datahigh;
|
||||
|
||||
memcpy(&datalow, m_rsp.get_dmem() + aligned_addr_lo, sizeof(datalow));
|
||||
memcpy(&datahigh, m_rsp.get_dmem() + aligned_addr_hi, sizeof(datahigh));
|
||||
|
||||
// TODO: Test for endian issues?
|
||||
datahigh >>= ((8 - offset) << 3);
|
||||
datalow <<= (offset << 3);
|
||||
datalow = datahigh | datalow;
|
||||
|
||||
data = _mm_loadl_epi64((rsp_vec_t *) &datalow);
|
||||
}
|
||||
else
|
||||
{
|
||||
data = _mm_loadl_epi64((rsp_vec_t *) (m_rsp.get_dmem() + addr));
|
||||
}
|
||||
|
||||
// "Unpack" the data.
|
||||
data = _mm_unpacklo_epi8(_mm_setzero_si128(), data);
|
||||
|
||||
if (request_type != RSP_MEM_REQUEST_PACK)
|
||||
{
|
||||
data = _mm_srli_epi16(data, 1);
|
||||
}
|
||||
|
||||
_mm_store_si128((rsp_vec_t *) regp, data);
|
||||
}
|
||||
|
||||
//
|
||||
// SSSE3+ accelerated loads for group IV. Byteswap big-endian to 2-byte
|
||||
// little-endian vector. Stop loading at quadword boundaries.
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when loads from vector elements
|
||||
// must wraparound (i.e., the address offset is small, starting
|
||||
// element is large).
|
||||
//
|
||||
void rsp_cop2::vec_load_group4(UINT32 addr, UINT32 element, UINT16 *regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type)
|
||||
{
|
||||
UINT32 aligned_addr = addr & 0xFF0;
|
||||
UINT32 offset = addr & 0xF;
|
||||
|
||||
rsp_vec_t data = _mm_load_si128((rsp_vec_t *) (m_rsp.get_dmem() + aligned_addr));
|
||||
|
||||
UINT32 ror;
|
||||
if (request_type == RSP_MEM_REQUEST_QUAD)
|
||||
{
|
||||
ror = 16 - element + offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: How is this adjusted for LRV when e != 0?
|
||||
dqm = _mm_cmpeq_epi8(_mm_setzero_si128(), dqm);
|
||||
ror = 16 - offset;
|
||||
}
|
||||
|
||||
#ifndef __SSSE3__
|
||||
data = sse2_pshufb(data, m_vec_helpers.ror_b2l_keys[ror & 0xF]);
|
||||
dqm = sse2_pshufb(dqm, m_vec_helpers.ror_b2l_keys[ror & 0xF]);
|
||||
#else
|
||||
rsp_vec_t dkey = _mm_load_si128((rsp_vec_t *) (m_vec_helpers.ror_b2l_keys[ror & 0xF]));
|
||||
data = _mm_shuffle_epi8(data, dkey);
|
||||
dqm = _mm_shuffle_epi8(dqm, dkey);
|
||||
#endif
|
||||
|
||||
// Mask and mux in the data.
|
||||
#ifdef __SSE4_1__
|
||||
data = _mm_blendv_epi8(reg, data, dqm);
|
||||
#else
|
||||
data = _mm_and_si128(dqm, data);
|
||||
reg = _mm_andnot_si128(dqm, reg);
|
||||
data = _mm_or_si128(data, reg);
|
||||
#endif
|
||||
|
||||
_mm_store_si128((rsp_vec_t *) regp, data);
|
||||
}
|
||||
|
||||
//
|
||||
// SSE3+ accelerated stores for group I. Byteswap 2-byte little-endian
|
||||
// vector back to big-endian. Start at vector element offset, wrapping
|
||||
// around the edge of the vector as necessary.
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when stores from vector elements
|
||||
// must wraparound. Do we just stop storing the data, or do we
|
||||
// continue storing from the front of the vector, as below?
|
||||
//
|
||||
void rsp_cop2::vec_store_group1(UINT32 addr, UINT32 element, UINT16 *regp, rsp_vec_t reg, rsp_vec_t dqm)
|
||||
{
|
||||
UINT32 offset = addr & 0x7;
|
||||
UINT32 ror = element - offset;
|
||||
|
||||
// Shift the DQM up to the point where we mux in the data.
|
||||
#ifndef __SSSE3__
|
||||
dqm = sse2_pshufb(dqm, m_vec_helpers.sll_l2b_keys[offset]);
|
||||
#else
|
||||
__m182i ekey = _mm_load_si128((rsp_vec_t *) (m_vec_helpers.sll_l2b_keys[offset]));
|
||||
dqm = _mm_shuffle_epi8(dqm, ekey);
|
||||
#endif
|
||||
|
||||
// Rotate the reg to align with the DQM.
|
||||
#ifndef __SSSE3__
|
||||
reg = sse2_pshufb(reg, m_vec_helpers.ror_l2b_keys[ror & 0xF]);
|
||||
#else
|
||||
ekey = _mm_load_si128((rsp_vec_t *) (m_vec_helpers.ror_l2b_keys[ror & 0xF]));
|
||||
reg = _mm_shuffle_epi8(reg, ekey);
|
||||
#endif
|
||||
|
||||
// Always load in 8-byte chunks to emulate wraparound.
|
||||
rsp_vec_t data;
|
||||
if (offset)
|
||||
{
|
||||
UINT32 aligned_addr_lo = addr & ~0x7;
|
||||
UINT32 aligned_addr_hi = (aligned_addr_lo + 8) & 0xFFF;
|
||||
|
||||
data = _mm_loadl_epi64((rsp_vec_t *) (m_rsp.get_dmem() + aligned_addr_lo));
|
||||
rsp_vec_t temp = _mm_loadl_epi64((rsp_vec_t *) (m_rsp.get_dmem() + aligned_addr_hi));
|
||||
data = _mm_unpacklo_epi64(data, temp);
|
||||
|
||||
// Mask and mux in the data.
|
||||
#ifdef __SSE4_1__
|
||||
data = _mm_blendv_epi8(data, reg, dqm);
|
||||
#else
|
||||
data = _mm_andnot_si128(dqm, data);
|
||||
reg = _mm_and_si128(dqm, reg);
|
||||
data = _mm_or_si128(data, reg);
|
||||
#endif
|
||||
|
||||
_mm_storel_epi64((rsp_vec_t *) (m_rsp.get_dmem() + aligned_addr_lo), data);
|
||||
|
||||
data = _mm_srli_si128(data, 8);
|
||||
_mm_storel_epi64((rsp_vec_t *) (m_rsp.get_dmem() + aligned_addr_hi), data);
|
||||
}
|
||||
else
|
||||
{
|
||||
data = _mm_loadl_epi64((rsp_vec_t *) (m_rsp.get_dmem() + addr));
|
||||
|
||||
// Mask and mux in the data.
|
||||
#ifdef __SSE4_1__
|
||||
data = _mm_blendv_epi8(data, reg, dqm);
|
||||
#else
|
||||
data = _mm_andnot_si128(dqm, data);
|
||||
reg = _mm_and_si128(dqm, reg);
|
||||
data = _mm_or_si128(data, reg);
|
||||
#endif
|
||||
|
||||
_mm_storel_epi64((rsp_vec_t *) (m_rsp.get_dmem() + addr), data);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// SSE3+ accelerated stores for group II. Byteswap 2-byte little-endian
|
||||
// vector back to big-endian. Start at vector element offset, wrapping
|
||||
// around the edge of the vector as necessary.
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when stores from vector elements
|
||||
// must wraparound. Do we just stop storing the data, or do we
|
||||
// continue storing from the front of the vector, as below?
|
||||
//
|
||||
// TODO: Reverse-engineer what happens when element != 0.
|
||||
//
|
||||
void rsp_cop2::vec_store_group2(UINT32 addr, UINT32 element, UINT16 *regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type) {
|
||||
// "Pack" the data.
|
||||
if (request_type != RSP_MEM_REQUEST_PACK)
|
||||
{
|
||||
reg = _mm_slli_epi16(reg, 1);
|
||||
}
|
||||
|
||||
reg = _mm_srai_epi16(reg, 8);
|
||||
reg = _mm_packs_epi16(reg, reg);
|
||||
|
||||
// TODO: Always store in 8-byte chunks to emulate wraparound.
|
||||
_mm_storel_epi64((rsp_vec_t *) (m_rsp.get_dmem() + addr), reg);
|
||||
}
|
||||
|
||||
//
|
||||
// SSE3+ accelerated stores for group IV. Byteswap 2-byte little-endian
|
||||
// vector back to big-endian. Stop storing at quadword boundaries.
|
||||
//
|
||||
void rsp_cop2::vec_store_group4(UINT32 addr, UINT32 element, UINT16 *regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type) {
|
||||
UINT32 aligned_addr = addr & 0xFF0;
|
||||
UINT32 offset = addr & 0xF;
|
||||
UINT32 rol = offset;
|
||||
|
||||
rsp_vec_t data = _mm_load_si128((rsp_vec_t *) (m_rsp.get_dmem() + aligned_addr));
|
||||
|
||||
if (request_type == RSP_MEM_REQUEST_QUAD)
|
||||
{
|
||||
rol -= element;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: How is this adjusted for SRV when e != 0?
|
||||
dqm = _mm_cmpeq_epi8(_mm_setzero_si128(), dqm);
|
||||
}
|
||||
|
||||
#ifndef __SSSE3__
|
||||
reg = sse2_pshufb(reg, m_vec_helpers.rol_l2b_keys[rol & 0xF]);
|
||||
#else
|
||||
rsp_vec_t ekey = _mm_load_si128((rsp_vec_t *) (m_vec_helpers.rol_l2b_keys[rol & 0xF]));
|
||||
reg = _mm_shuffle_epi8(reg, ekey);
|
||||
#endif
|
||||
|
||||
// Mask and mux out the data, write.
|
||||
#ifdef __SSE4_1__
|
||||
data = _mm_blendv_epi8(data, reg, dqm);
|
||||
#else
|
||||
reg = _mm_and_si128(dqm, reg);
|
||||
data = _mm_andnot_si128(dqm, data);
|
||||
data = _mm_or_si128(data, reg);
|
||||
#endif
|
||||
|
||||
_mm_store_si128((rsp_vec_t *) (m_rsp.get_dmem() + aligned_addr), data);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op);
|
||||
@ -498,14 +832,16 @@ void rsp_cop2::state_string_export(const int index, std::string &str)
|
||||
|
||||
void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
{
|
||||
int base = (op >> 21) & 0x1f;
|
||||
#if !USE_SIMD
|
||||
int i, end;
|
||||
UINT32 ea;
|
||||
int dest = (op >> 16) & 0x1f;
|
||||
int base = (op >> 21) & 0x1f;
|
||||
int index = (op >> 7) & 0xf;
|
||||
int offset = (op & 0x7f);
|
||||
if (offset & 0x40)
|
||||
offset |= 0xffffffc0;
|
||||
#endif
|
||||
|
||||
switch ((op >> 11) & 0x1f)
|
||||
{
|
||||
@ -518,8 +854,12 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
//
|
||||
// Load 1 byte to vector byte index
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lbdlsv_sbdlsv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset;
|
||||
VREG_B(dest, index) = m_rsp.READ8(ea);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x01: /* LSV */
|
||||
@ -531,6 +871,9 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
//
|
||||
// Loads 2 bytes starting from vector byte index
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lbdlsv_sbdlsv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2);
|
||||
|
||||
end = index + 2;
|
||||
@ -540,6 +883,7 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
VREG_B(dest, i) = m_rsp.READ8(ea);
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x02: /* LLV */
|
||||
@ -551,6 +895,9 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
//
|
||||
// Loads 4 bytes starting from vector byte index
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lbdlsv_sbdlsv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4);
|
||||
|
||||
end = index + 4;
|
||||
@ -560,6 +907,7 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
VREG_B(dest, i) = m_rsp.READ8(ea);
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x03: /* LDV */
|
||||
@ -571,6 +919,9 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
//
|
||||
// Loads 8 bytes starting from vector byte index
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lbdlsv_sbdlsv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8);
|
||||
|
||||
end = index + 8;
|
||||
@ -580,6 +931,7 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
VREG_B(dest, i) = m_rsp.READ8(ea);
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x04: /* LQV */
|
||||
@ -591,6 +943,9 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
//
|
||||
// Loads up to 16 bytes starting from vector byte index
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lqrv_sqrv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16);
|
||||
|
||||
end = index + (16 - (ea & 0xf));
|
||||
@ -601,6 +956,7 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
VREG_B(dest, i) = m_rsp.READ8(ea);
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x05: /* LRV */
|
||||
@ -612,6 +968,9 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
//
|
||||
// Stores up to 16 bytes starting from right side until 16-byte boundary
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lqrv_sqrv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16);
|
||||
|
||||
index = 16 - ((ea & 0xf) - index);
|
||||
@ -623,6 +982,7 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
VREG_B(dest, i) = m_rsp.READ8(ea);
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x06: /* LPV */
|
||||
@ -634,12 +994,16 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
//
|
||||
// Loads a byte as the upper 8 bits of each element
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lfhpuv_sfhpuv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8);
|
||||
|
||||
for (i=0; i < 8; i++)
|
||||
{
|
||||
VREG_S(dest, i) = m_rsp.READ8(ea + (((16-index) + i) & 0xf)) << 8;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x07: /* LUV */
|
||||
@ -651,12 +1015,16 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
//
|
||||
// Loads a byte as the bits 14-7 of each element
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lfhpuv_sfhpuv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8);
|
||||
|
||||
for (i=0; i < 8; i++)
|
||||
{
|
||||
VREG_S(dest, i) = m_rsp.READ8(ea + (((16-index) + i) & 0xf)) << 7;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x08: /* LHV */
|
||||
@ -668,12 +1036,16 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
//
|
||||
// Loads a byte as the bits 14-7 of each element, with 2-byte stride
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lfhpuv_sfhpuv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16);
|
||||
|
||||
for (i=0; i < 8; i++)
|
||||
{
|
||||
VREG_S(dest, i) = m_rsp.READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x09: /* LFV */
|
||||
@ -685,6 +1057,9 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
//
|
||||
// Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lfhpuv_sfhpuv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16);
|
||||
|
||||
// not sure what happens if 16-byte boundary is crossed...
|
||||
@ -696,6 +1071,7 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
VREG_S(dest, i) = m_rsp.READ8(ea) << 7;
|
||||
ea += 4;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x0a: /* LWV */
|
||||
@ -708,6 +1084,8 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
// Loads the full 128-bit vector starting from vector byte index and wrapping to index 0
|
||||
// after byte index 15
|
||||
|
||||
#if USE_SIMD
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16);
|
||||
|
||||
end = (16 - index) + 16;
|
||||
@ -717,6 +1095,7 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
VREG_B(dest, i & 0xf) = m_rsp.READ8(ea);
|
||||
ea += 4;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x0b: /* LTV */
|
||||
@ -730,6 +1109,8 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
|
||||
// FIXME: has a small problem with odd indices
|
||||
|
||||
#if USE_SIMD
|
||||
#else
|
||||
int element;
|
||||
int vs = dest;
|
||||
int ve = dest + 8;
|
||||
@ -751,6 +1132,7 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
|
||||
ea += 2;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
@ -769,15 +1151,17 @@ void rsp_cop2::handle_lwc2(UINT32 op)
|
||||
|
||||
void rsp_cop2::handle_swc2(UINT32 op)
|
||||
{
|
||||
int base = (op >> 21) & 0x1f;
|
||||
#if !USE_SIMD
|
||||
int i, end;
|
||||
int eaoffset;
|
||||
UINT32 ea;
|
||||
int dest = (op >> 16) & 0x1f;
|
||||
int base = (op >> 21) & 0x1f;
|
||||
int index = (op >> 7) & 0xf;
|
||||
int offset = (op & 0x7f);
|
||||
if (offset & 0x40)
|
||||
offset |= 0xffffffc0;
|
||||
#endif
|
||||
|
||||
switch ((op >> 11) & 0x1f)
|
||||
{
|
||||
@ -790,8 +1174,12 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
//
|
||||
// Stores 1 byte from vector byte index
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lbdlsv_sbdlsv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset;
|
||||
m_rsp.WRITE8(ea, VREG_B(dest, index));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x01: /* SSV */
|
||||
@ -803,6 +1191,9 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
//
|
||||
// Stores 2 bytes starting from vector byte index
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lbdlsv_sbdlsv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2);
|
||||
|
||||
end = index + 2;
|
||||
@ -812,6 +1203,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
m_rsp.WRITE8(ea, VREG_B(dest, i));
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x02: /* SLV */
|
||||
@ -823,6 +1215,9 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
//
|
||||
// Stores 4 bytes starting from vector byte index
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lbdlsv_sbdlsv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4);
|
||||
|
||||
end = index + 4;
|
||||
@ -832,6 +1227,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
m_rsp.WRITE8(ea, VREG_B(dest, i));
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x03: /* SDV */
|
||||
@ -843,6 +1239,9 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
//
|
||||
// Stores 8 bytes starting from vector byte index
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lbdlsv_sbdlsv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8);
|
||||
|
||||
end = index + 8;
|
||||
@ -852,6 +1251,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
m_rsp.WRITE8(ea, VREG_B(dest, i));
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x04: /* SQV */
|
||||
@ -863,6 +1263,9 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
//
|
||||
// Stores up to 16 bytes starting from vector byte index until 16-byte boundary
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lqrv_sqrv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16);
|
||||
|
||||
end = index + (16 - (ea & 0xf));
|
||||
@ -872,6 +1275,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
m_rsp.WRITE8(ea, VREG_B(dest, i & 0xf));
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x05: /* SRV */
|
||||
@ -883,6 +1287,9 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
//
|
||||
// Stores up to 16 bytes starting from right side until 16-byte boundary
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lqrv_sqrv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
int o;
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16);
|
||||
|
||||
@ -895,6 +1302,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
m_rsp.WRITE8(ea, VREG_B(dest, ((i + o) & 0xf)));
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x06: /* SPV */
|
||||
@ -906,6 +1314,9 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
//
|
||||
// Stores upper 8 bits of each element
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lfhpuv_sfhpuv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8);
|
||||
end = index + 8;
|
||||
|
||||
@ -921,6 +1332,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
}
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x07: /* SUV */
|
||||
@ -932,6 +1344,9 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
//
|
||||
// Stores bits 14-7 of each element
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lfhpuv_sfhpuv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8);
|
||||
end = index + 8;
|
||||
|
||||
@ -947,6 +1362,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
}
|
||||
ea++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x08: /* SHV */
|
||||
@ -958,6 +1374,9 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
//
|
||||
// Stores bits 14-7 of each element, with 2-byte stride
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lfhpuv_sfhpuv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16);
|
||||
|
||||
for (i=0; i < 8; i++)
|
||||
@ -968,6 +1387,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
m_rsp.WRITE8(ea, d);
|
||||
ea += 2;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x09: /* SFV */
|
||||
@ -981,6 +1401,9 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
|
||||
// FIXME: only works for index 0 and index 8
|
||||
|
||||
#if USE_SIMD
|
||||
vec_lfhpuv_sfhpuv(op, m_rsp.m_rsp_state->r[base]);
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16);
|
||||
|
||||
eaoffset = ea & 0xf;
|
||||
@ -993,6 +1416,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
m_rsp.WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7);
|
||||
eaoffset += 4;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x0a: /* SWV */
|
||||
@ -1005,6 +1429,8 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
// Stores the full 128-bit vector starting from vector byte index and wrapping to index 0
|
||||
// after byte index 15
|
||||
|
||||
#if USE_SIMD
|
||||
#else
|
||||
ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16);
|
||||
|
||||
eaoffset = ea & 0xf;
|
||||
@ -1017,6 +1443,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
m_rsp.WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf));
|
||||
eaoffset++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 0x0b: /* STV */
|
||||
@ -1028,6 +1455,8 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
//
|
||||
// Stores one element from maximum of 8 vectors, while incrementing element index
|
||||
|
||||
#if USE_SIMD
|
||||
#else
|
||||
int element;
|
||||
int vs = dest;
|
||||
int ve = dest + 8;
|
||||
@ -1047,6 +1476,7 @@ void rsp_cop2::handle_swc2(UINT32 op)
|
||||
eaoffset += 2;
|
||||
element++;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1981,16 +2411,21 @@ void rsp_cop2::handle_vector_ops(UINT32 op)
|
||||
// Stores high, middle or low slice of accumulator to destination vector
|
||||
|
||||
#if USE_SIMD
|
||||
UINT16 *acc = m_acc.s;
|
||||
switch (EL)
|
||||
{
|
||||
case 8:
|
||||
m_v[VDREG].v = read_acc_hi(acc);
|
||||
break;
|
||||
case 9:
|
||||
m_v[VDREG].v = read_acc_mid(acc);
|
||||
break;
|
||||
case 10:
|
||||
m_v[VDREG].v = read_acc_lo(acc);
|
||||
break;
|
||||
|
||||
default:
|
||||
m_v[VDREG].v = _mm_setzero_si128();
|
||||
break;
|
||||
}
|
||||
#else
|
||||
|
@ -199,6 +199,23 @@ protected:
|
||||
RSP_ACC_HI = 0,
|
||||
};
|
||||
|
||||
enum rsp_mem_request_type {
|
||||
RSP_MEM_REQUEST_NONE,
|
||||
RSP_MEM_REQUEST_INT_MEM,
|
||||
RSP_MEM_REQUEST_VECTOR,
|
||||
RSP_MEM_REQUEST_FOURTH,
|
||||
RSP_MEM_REQUEST_HALF,
|
||||
RSP_MEM_REQUEST_PACK,
|
||||
RSP_MEM_REQUEST_QUAD,
|
||||
RSP_MEM_REQUEST_REST,
|
||||
RSP_MEM_REQUEST_UPACK
|
||||
};
|
||||
|
||||
union aligned_rsp_1vect_t {
|
||||
rsp_vec_t __align;
|
||||
UINT16 s[8];
|
||||
};
|
||||
|
||||
union aligned_rsp_2vect_t {
|
||||
rsp_vec_t __align[2];
|
||||
UINT16 s[16];
|
||||
@ -209,6 +226,7 @@ protected:
|
||||
UINT16 s[24];
|
||||
};
|
||||
|
||||
aligned_rsp_1vect_t m_vdqm;
|
||||
aligned_rsp_2vect_t m_flags[3];
|
||||
aligned_rsp_3vect_t m_acc;
|
||||
UINT32 m_dp_flag;
|
||||
@ -225,11 +243,16 @@ protected:
|
||||
const UINT16 ror_b2l_keys[16][8];
|
||||
const UINT16 rol_l2b_keys[16][8];
|
||||
const UINT16 ror_l2b_keys[16][8];
|
||||
const UINT16 qr_lut[16][8];
|
||||
const UINT16 bdls_lut[4][4];
|
||||
} vec_helpers_t;
|
||||
|
||||
static const vec_helpers_t m_vec_helpers;
|
||||
|
||||
rsp_vec_t vec_load_and_shuffle_operand(const UINT16* src, UINT32 element);
|
||||
static inline UINT32 sign_extend_6(INT32 i) {
|
||||
return (i << (32 - 7)) >> (32 - 7);
|
||||
}
|
||||
static inline rsp_vec_t vec_load_unshuffled_operand(const UINT16* src)
|
||||
{
|
||||
return _mm_load_si128((rsp_vec_t*) src);
|
||||
@ -319,11 +342,11 @@ protected:
|
||||
}
|
||||
|
||||
void vec_load_group1(UINT32 addr, UINT32 element, UINT16* regp, rsp_vec_t reg, rsp_vec_t dqm);
|
||||
void vec_load_group2(UINT32 addr, UINT32 element, UINT16* regp, rsp_vec_t reg, rsp_vec_t dqm);
|
||||
void vec_load_group4(UINT32 addr, UINT32 element, UINT16* regp, rsp_vec_t reg, rsp_vec_t dqm);
|
||||
void vec_load_group2(UINT32 addr, UINT32 element, UINT16* regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type);
|
||||
void vec_load_group4(UINT32 addr, UINT32 element, UINT16* regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type);
|
||||
void vec_store_group1(UINT32 addr, UINT32 element, UINT16* regp, rsp_vec_t reg, rsp_vec_t dqm);
|
||||
void vec_store_group2(UINT32 addr, UINT32 element, UINT16* regp, rsp_vec_t reg, rsp_vec_t dqm);
|
||||
void vec_store_group4(UINT32 addr, UINT32 element, UINT16* regp, rsp_vec_t reg, rsp_vec_t dqm);
|
||||
void vec_store_group2(UINT32 addr, UINT32 element, UINT16* regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type);
|
||||
void vec_store_group4(UINT32 addr, UINT32 element, UINT16* regp, rsp_vec_t reg, rsp_vec_t dqm, rsp_mem_request_type request_type);
|
||||
|
||||
#include "clamp.h"
|
||||
#include "vabs.h"
|
||||
@ -349,6 +372,7 @@ protected:
|
||||
#include "vsub.h"
|
||||
#include "vsubc.h"
|
||||
#include "vxor.h"
|
||||
#include "vldst.h"
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
67
src/emu/cpu/rsp/vldst.h
Normal file
67
src/emu/cpu/rsp/vldst.h
Normal file
@ -0,0 +1,67 @@
|
||||
// license:BSD-3-Clause
|
||||
// copyright-holders:Tyler J. Stachecki,Ryan Holtz
|
||||
|
||||
// LBV, LDV, LLV, LSV, SBV, SDV, SLV, SSV
|
||||
inline void vec_lbdlsv_sbdlsv(UINT32 iw, UINT32 rs)
|
||||
{
|
||||
const UINT32 shift_and_idx = (iw >> 11) & 0x3;
|
||||
rsp_vec_t dqm = _mm_loadl_epi64((rsp_vec_t *) (m_vec_helpers.bdls_lut[shift_and_idx]));
|
||||
|
||||
const UINT32 addr = rs + (sign_extend_6(iw) << shift_and_idx);
|
||||
const UINT32 element = (iw >> 21) & 0xf;
|
||||
UINT16* regp = m_v[(iw >> 16) & 0x1f].s;
|
||||
|
||||
if (iw >> 29 & 0x1)
|
||||
{
|
||||
vec_store_group1(addr, element, regp, vec_load_unshuffled_operand(regp), dqm);
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_load_group1(addr, element, regp, vec_load_unshuffled_operand(regp), dqm);
|
||||
}
|
||||
}
|
||||
|
||||
// LPV, LUV, SPV, SUV
|
||||
inline void vec_lfhpuv_sfhpuv(UINT32 iw, UINT32 rs)
|
||||
{
|
||||
static const enum rsp_mem_request_type fhpu_type_lut[4] = {
|
||||
RSP_MEM_REQUEST_PACK,
|
||||
RSP_MEM_REQUEST_UPACK,
|
||||
RSP_MEM_REQUEST_HALF,
|
||||
RSP_MEM_REQUEST_FOURTH
|
||||
};
|
||||
|
||||
const UINT32 addr = rs + (sign_extend_6(iw) << 3);
|
||||
const UINT32 element = (iw >> 21) & 0xf;
|
||||
UINT16* regp = m_v[(iw >> 16) & 0x1f].s;
|
||||
|
||||
rsp_mem_request_type request_type = fhpu_type_lut[((iw >> 11) & 0x1f) - 6];
|
||||
if ((iw >> 29) && 0x1)
|
||||
{
|
||||
vec_store_group2(addr, element, regp, vec_load_unshuffled_operand(regp), _mm_setzero_si128(), request_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_load_group2(addr, element, regp, vec_load_unshuffled_operand(regp), _mm_setzero_si128(), request_type);
|
||||
}
|
||||
}
|
||||
|
||||
// LQV, LRV, SQV, SRV
|
||||
inline void vec_lqrv_sqrv(UINT32 iw, UINT32 rs)
|
||||
{
|
||||
const UINT32 addr = rs + (sign_extend_6(iw) << 4);
|
||||
const UINT32 element = (iw >> 21) & 0xf;
|
||||
UINT16* regp = m_v[(iw >> 16) & 0x1f].s;
|
||||
|
||||
memcpy(m_vdqm.s, m_vec_helpers.qr_lut[addr & 0xf], sizeof(m_vdqm.s));
|
||||
|
||||
rsp_mem_request_type request_type = (iw >> 11 & 0x1) ? RSP_MEM_REQUEST_REST : RSP_MEM_REQUEST_QUAD;
|
||||
if ((iw >> 29) & 0x1)
|
||||
{
|
||||
vec_store_group4(addr, element, regp, vec_load_unshuffled_operand(regp), vec_load_unshuffled_operand(m_vdqm.s), request_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_load_group4(addr, element, regp, vec_load_unshuffled_operand(regp), vec_load_unshuffled_operand(m_vdqm.s), request_type);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user