mirror of
https://github.com/holub/mame
synced 2025-04-24 09:20:02 +03:00
Merge pull request #3811 from snickerbockers/powervr2_performance
powervr2.cpp: change some per-pixel branches into per-polygon branches
This commit is contained in:
commit
0b134c3fd3
@ -2301,7 +2301,54 @@ void powervr2_device::computedilated()
|
||||
dilatechose[(b << 3) + a]=3+(a < b ? a : b);
|
||||
}
|
||||
|
||||
void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, float xl, float xr, float ul, float ur, float vl, float vr, float wl, float wr, float const bl_in[4], float const br_in[4], float const offl_in[4], float const offr_in[4])
|
||||
inline uint32_t powervr2_device::sample_nontextured(texinfo *ti, float u, float v, uint32_t offset_color, uint32_t base_color)
|
||||
{
|
||||
return bls24(base_color, offset_color) | (base_color & 0xff000000);
|
||||
}
|
||||
|
||||
template <int tsinst, bool bilinear>
|
||||
inline uint32_t powervr2_device::sample_textured(texinfo *ti, float u, float v, uint32_t offset_color, uint32_t base_color)
|
||||
{
|
||||
uint32_t tmp;
|
||||
uint32_t c = (this->*(ti->r))(ti, u, v);
|
||||
if (bilinear)
|
||||
{
|
||||
uint32_t c1 = (this->*(ti->r))(ti, u+1.0f, v);
|
||||
uint32_t c2 = (this->*(ti->r))(ti, u+1.0f, v+1.0f);
|
||||
uint32_t c3 = (this->*(ti->r))(ti, u, v+1.0f);
|
||||
c = bilinear_filter(c, c1, c2, c3, u, v);
|
||||
}
|
||||
|
||||
switch (tsinst) {
|
||||
case 0:
|
||||
// decal
|
||||
c = bls24(c, offset_color) | (c & 0xff000000);
|
||||
break;
|
||||
case 1:
|
||||
// modulate
|
||||
tmp = blc(c, base_color);
|
||||
tmp = bls24(tmp, offset_color);
|
||||
tmp |= c & 0xff000000;
|
||||
c = tmp;
|
||||
break;
|
||||
case 2:
|
||||
// decal with alpha
|
||||
tmp = bls24(bla(c, c), blia(base_color, c));
|
||||
c = bls24(tmp, offset_color) | (base_color & 0xff000000);
|
||||
break;
|
||||
case 3:
|
||||
// modulate with alpha
|
||||
tmp = blc(c, base_color);
|
||||
tmp = bls24(tmp, offset_color);
|
||||
tmp |= (((c >> 24) * (base_color >> 24)) >> 8) << 24;
|
||||
c = tmp;
|
||||
break;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
template <powervr2_device::pix_sample_fn sample_fn>
|
||||
inline void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, float xl, float xr, float ul, float ur, float vl, float vr, float wl, float wr, float const bl_in[4], float const br_in[4], float const offl_in[4], float const offr_in[4])
|
||||
{
|
||||
int idx;
|
||||
int xxl, xxr;
|
||||
@ -2309,12 +2356,7 @@ void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, flo
|
||||
uint32_t *tdata;
|
||||
float *wbufline;
|
||||
|
||||
// untextured cases aren't handled
|
||||
// if (!ti->textured) return;
|
||||
|
||||
float bl[4], offl[4];
|
||||
memcpy(bl, bl_in, sizeof(bl));
|
||||
memcpy(offl, offl_in, sizeof(offl));
|
||||
|
||||
if(xr < 0 || xl >= 640)
|
||||
return;
|
||||
@ -2325,23 +2367,28 @@ void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, flo
|
||||
if(xxl == xxr)
|
||||
return;
|
||||
|
||||
memcpy(bl, bl_in, sizeof(bl));
|
||||
memcpy(offl, offl_in, sizeof(offl));
|
||||
|
||||
dx = xr-xl;
|
||||
dudx = (ur-ul)/dx;
|
||||
dvdx = (vr-vl)/dx;
|
||||
dwdx = (wr-wl)/dx;
|
||||
float dx_recip = 1.0f / dx;
|
||||
|
||||
dudx = (ur-ul) * dx_recip;
|
||||
dvdx = (vr-vl) * dx_recip;
|
||||
dwdx = (wr-wl) * dx_recip;
|
||||
|
||||
float dbdx[4] = {
|
||||
(br_in[0] - bl[0]) / dx,
|
||||
(br_in[1] - bl[1]) / dx,
|
||||
(br_in[2] - bl[2]) / dx,
|
||||
(br_in[3] - bl[3]) / dx
|
||||
(br_in[0] - bl[0]) * dx_recip,
|
||||
(br_in[1] - bl[1]) * dx_recip,
|
||||
(br_in[2] - bl[2]) * dx_recip,
|
||||
(br_in[3] - bl[3]) * dx_recip
|
||||
};
|
||||
|
||||
float dodx[4] = {
|
||||
(offr_in[0] - offl[0]) / dx,
|
||||
(offr_in[1] - offl[1]) / dx,
|
||||
(offr_in[2] - offl[2]) / dx,
|
||||
(offr_in[3] - offl[3]) / dx
|
||||
(offr_in[0] - offl[0]) * dx_recip,
|
||||
(offr_in[1] - offl[1]) * dx_recip,
|
||||
(offr_in[2] - offl[2]) * dx_recip,
|
||||
(offr_in[3] - offl[3]) * dx_recip
|
||||
};
|
||||
|
||||
if(xxl < 0)
|
||||
@ -2364,64 +2411,15 @@ void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, flo
|
||||
|
||||
while(xxl < xxr) {
|
||||
if((wl >= *wbufline)) {
|
||||
uint32_t c;
|
||||
float u = ul/wl;
|
||||
float v = vl/wl;
|
||||
|
||||
/*
|
||||
* TODO: Not sure if blending should be done in
|
||||
* floating point or fixed point, or if it even matters.
|
||||
*/
|
||||
uint32_t c;
|
||||
uint32_t offset_color = float_argb_to_packed_argb(offl);
|
||||
uint32_t base_color = float_argb_to_packed_argb(bl);
|
||||
|
||||
if (ti->textured) {
|
||||
c = (this->*(ti->r))(ti, u, v);
|
||||
// debug dip to turn on/off bilinear filtering, it's slooooow
|
||||
if (debug_dip_status&0x1)
|
||||
{
|
||||
if(ti->filter_mode >= TEX_FILTER_BILINEAR)
|
||||
{
|
||||
uint32_t c1 = (this->*(ti->r))(ti, u+1.0f, v);
|
||||
uint32_t c2 = (this->*(ti->r))(ti, u+1.0f, v+1.0f);
|
||||
uint32_t c3 = (this->*(ti->r))(ti, u, v+1.0f);
|
||||
c = bilinear_filter(c, c1, c2, c3, u, v);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t tmp;
|
||||
switch (ti->tsinstruction) {
|
||||
case 0:
|
||||
// decal
|
||||
c = bls24(c, offset_color) | (c & 0xff000000);
|
||||
break;
|
||||
case 1:
|
||||
// modulate
|
||||
tmp = blc(c, base_color);
|
||||
tmp = bls24(tmp, offset_color);
|
||||
tmp |= c & 0xff000000;
|
||||
c = tmp;
|
||||
break;
|
||||
case 2:
|
||||
// decal with alpha
|
||||
tmp = bls24(bla(c, c), blia(base_color, c));
|
||||
c = bls24(tmp, offset_color) | (base_color & 0xff000000);
|
||||
break;
|
||||
case 3:
|
||||
// modulate with alpha
|
||||
tmp = blc(c, base_color);
|
||||
tmp = bls24(tmp, offset_color);
|
||||
tmp |= (((c >> 24) * (base_color >> 24)) >> 8) << 24;
|
||||
c = tmp;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
c = bls24(base_color, offset_color) | (base_color & 0xff000000);
|
||||
}
|
||||
|
||||
c = (this->*sample_fn)(ti, u, v, offset_color, base_color);
|
||||
if(c & 0xff000000) {
|
||||
*tdata = ti->blend(c, *tdata);
|
||||
*wbufline = wl;
|
||||
*tdata = ti->blend(c, *tdata);
|
||||
}
|
||||
}
|
||||
wbufline++;
|
||||
@ -2438,7 +2436,8 @@ void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, flo
|
||||
}
|
||||
}
|
||||
|
||||
void powervr2_device::render_span(bitmap_rgb32 &bitmap, texinfo *ti,
|
||||
template <powervr2_device::pix_sample_fn sample_fn>
|
||||
inline void powervr2_device::render_span(bitmap_rgb32 &bitmap, texinfo *ti,
|
||||
float y0, float y1,
|
||||
float xl, float xr,
|
||||
float ul, float ur,
|
||||
@ -2516,7 +2515,7 @@ void powervr2_device::render_span(bitmap_rgb32 &bitmap, texinfo *ti,
|
||||
}
|
||||
|
||||
while(yy0 < yy1) {
|
||||
render_hline(bitmap, ti, yy0, xl, xr, ul, ur, vl, vr, wl, wr, bl, br, offl, offr);
|
||||
render_hline<sample_fn>(bitmap, ti, yy0, xl, xr, ul, ur, vl, vr, wl, wr, bl, br, offl, offr);
|
||||
|
||||
xl += dxldy;
|
||||
xr += dxrdy;
|
||||
@ -2568,7 +2567,8 @@ void powervr2_device::sort_vertices(const vert *v, int *i0, int *i1, int *i2)
|
||||
}
|
||||
|
||||
|
||||
void powervr2_device::render_tri_sorted(bitmap_rgb32 &bitmap, texinfo *ti, const vert *v0, const vert *v1, const vert *v2)
|
||||
template <powervr2_device::pix_sample_fn sample_fn>
|
||||
inline void powervr2_device::render_tri_sorted(bitmap_rgb32 &bitmap, texinfo *ti, const vert *v0, const vert *v1, const vert *v2)
|
||||
{
|
||||
float dy01, dy02, dy12;
|
||||
|
||||
@ -2686,15 +2686,15 @@ void powervr2_device::render_tri_sorted(bitmap_rgb32 &bitmap, texinfo *ti, const
|
||||
return;
|
||||
|
||||
if(v1->x > v0->x)
|
||||
render_span(bitmap, ti, v1->y, v2->y, v0->x, v1->x, v0->u, v1->u, v0->v, v1->v, v0->w, v1->w, v0->b, v1->b, v0->o, v1->o, dx02dy, dx12dy, du02dy, du12dy, dv02dy, dv12dy, dw02dy, dw12dy, db02dy, db12dy, do02dy, do12dy);
|
||||
render_span<sample_fn>(bitmap, ti, v1->y, v2->y, v0->x, v1->x, v0->u, v1->u, v0->v, v1->v, v0->w, v1->w, v0->b, v1->b, v0->o, v1->o, dx02dy, dx12dy, du02dy, du12dy, dv02dy, dv12dy, dw02dy, dw12dy, db02dy, db12dy, do02dy, do12dy);
|
||||
else
|
||||
render_span(bitmap, ti, v1->y, v2->y, v1->x, v0->x, v1->u, v0->u, v1->v, v0->v, v1->w, v0->w, v1->b, v0->b, v1->o, v0->o, dx12dy, dx02dy, du12dy, du02dy, dv12dy, dv02dy, dw12dy, dw02dy, db12dy, db02dy, do12dy, do02dy);
|
||||
render_span<sample_fn>(bitmap, ti, v1->y, v2->y, v1->x, v0->x, v1->u, v0->u, v1->v, v0->v, v1->w, v0->w, v1->b, v0->b, v1->o, v0->o, dx12dy, dx02dy, du12dy, du02dy, dv12dy, dv02dy, dw12dy, dw02dy, db12dy, db02dy, do12dy, do02dy);
|
||||
|
||||
} else if(!dy12) {
|
||||
if(v2->x > v1->x)
|
||||
render_span(bitmap, ti, v0->y, v1->y, v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->b, v0->b, v0->o, v0->o, dx01dy, dx02dy, du01dy, du02dy, dv01dy, dv02dy, dw01dy, dw02dy, db01dy, db02dy, do01dy, do02dy);
|
||||
render_span<sample_fn>(bitmap, ti, v0->y, v1->y, v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->b, v0->b, v0->o, v0->o, dx01dy, dx02dy, du01dy, du02dy, dv01dy, dv02dy, dw01dy, dw02dy, db01dy, db02dy, do01dy, do02dy);
|
||||
else
|
||||
render_span(bitmap, ti, v0->y, v1->y, v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->b, v0->b, v0->o, v0->o, dx02dy, dx01dy, du02dy, du01dy, dv02dy, dv01dy, dw02dy, dw01dy, db02dy, db01dy, do02dy, do01dy);
|
||||
render_span<sample_fn>(bitmap, ti, v0->y, v1->y, v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->b, v0->b, v0->o, v0->o, dx02dy, dx01dy, du02dy, du01dy, dv02dy, dv01dy, dw02dy, dw01dy, db02dy, db01dy, do02dy, do01dy);
|
||||
|
||||
} else {
|
||||
float idk_b[4] = {
|
||||
@ -2710,17 +2710,17 @@ void powervr2_device::render_tri_sorted(bitmap_rgb32 &bitmap, texinfo *ti, const
|
||||
v0->o[3] + do02dy[3] * dy01
|
||||
};
|
||||
if(dx01dy < dx02dy) {
|
||||
render_span(bitmap, ti, v0->y, v1->y,
|
||||
render_span<sample_fn>(bitmap, ti, v0->y, v1->y,
|
||||
v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->b, v0->b, v0->o, v0->o,
|
||||
dx01dy, dx02dy, du01dy, du02dy, dv01dy, dv02dy, dw01dy, dw02dy, db01dy, db02dy, do01dy, do02dy);
|
||||
render_span(bitmap, ti, v1->y, v2->y,
|
||||
render_span<sample_fn>(bitmap, ti, v1->y, v2->y,
|
||||
v1->x, v0->x + dx02dy*dy01, v1->u, v0->u + du02dy*dy01, v1->v, v0->v + dv02dy*dy01, v1->w, v0->w + dw02dy*dy01, v1->b, idk_b, v1->o, idk_o,
|
||||
dx12dy, dx02dy, du12dy, du02dy, dv12dy, dv02dy, dw12dy, dw02dy, db12dy, db02dy, do12dy, do02dy);
|
||||
} else {
|
||||
render_span(bitmap, ti, v0->y, v1->y,
|
||||
render_span<sample_fn>(bitmap, ti, v0->y, v1->y,
|
||||
v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->b, v0->b, v0->o, v0->o,
|
||||
dx02dy, dx01dy, du02dy, du01dy, dv02dy, dv01dy, dw02dy, dw01dy, db02dy, db01dy, do02dy, do01dy);
|
||||
render_span(bitmap, ti, v1->y, v2->y,
|
||||
render_span<sample_fn>(bitmap, ti, v1->y, v2->y,
|
||||
v0->x + dx02dy*dy01, v1->x, v0->u + du02dy*dy01, v1->u, v0->v + dv02dy*dy01, v1->v, v0->w + dw02dy*dy01, v1->w, idk_b, v1->b, idk_o, v1->o,
|
||||
dx02dy, dx12dy, du02dy, du12dy, dv02dy, dv12dy, dw02dy, dw12dy, db02dy, db12dy, do02dy, do12dy);
|
||||
}
|
||||
@ -2732,7 +2732,59 @@ void powervr2_device::render_tri(bitmap_rgb32 &bitmap, texinfo *ti, const vert *
|
||||
int i0, i1, i2;
|
||||
|
||||
sort_vertices(v, &i0, &i1, &i2);
|
||||
render_tri_sorted(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
|
||||
bool textured = ti->textured;
|
||||
if (textured) {
|
||||
bool bilinear = (debug_dip_status & 1) &&
|
||||
(ti->filter_mode >= TEX_FILTER_BILINEAR);
|
||||
if (bilinear) {
|
||||
switch (ti->tsinstruction) {
|
||||
case 0:
|
||||
render_tri_sorted<&powervr2_device::sample_textured<0,true>>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
break;
|
||||
case 1:
|
||||
render_tri_sorted<&powervr2_device::sample_textured<1,true>>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
break;
|
||||
case 2:
|
||||
render_tri_sorted<&powervr2_device::sample_textured<2,true>>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
break;
|
||||
case 3:
|
||||
render_tri_sorted<&powervr2_device::sample_textured<3,true>>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* This should be impossible because tsinstruction was previously
|
||||
* AND'd with 3
|
||||
*/
|
||||
logerror("%s - tsinstruction is 0x%08x\n", (unsigned)ti->tsinstruction);
|
||||
render_tri_sorted<&powervr2_device::sample_nontextured>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
}
|
||||
} else {
|
||||
switch (ti->tsinstruction) {
|
||||
case 0:
|
||||
render_tri_sorted<&powervr2_device::sample_textured<0,false>>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
break;
|
||||
case 1:
|
||||
render_tri_sorted<&powervr2_device::sample_textured<1,false>>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
break;
|
||||
case 2:
|
||||
render_tri_sorted<&powervr2_device::sample_textured<2,false>>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
break;
|
||||
case 3:
|
||||
render_tri_sorted<&powervr2_device::sample_textured<3,false>>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* This should be impossible because tsinstruction was previously
|
||||
* AND'd with 3
|
||||
*/
|
||||
logerror("%s - tsinstruction is 0x%08x\n", (unsigned)ti->tsinstruction);
|
||||
render_tri_sorted<&powervr2_device::sample_nontextured>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
render_tri_sorted<&powervr2_device::sample_nontextured>(bitmap, ti, v+i0, v+i1, v+i2);
|
||||
}
|
||||
}
|
||||
|
||||
void powervr2_device::render_to_accumulation_buffer(bitmap_rgb32 &bitmap,const rectangle &cliprect)
|
||||
|
@ -305,6 +305,14 @@ public:
|
||||
void pvr_scanline_timer(int vpos);
|
||||
uint32_t screen_update(screen_device &screen, bitmap_rgb32 &bitmap, const rectangle &cliprect);
|
||||
|
||||
typedef uint32_t(powervr2_device::*pix_sample_fn)(texinfo*,float,float,uint32_t,uint32_t);
|
||||
typedef std::function<uint32_t(texinfo*,float,float,uint32_t,uint32_t)> sample_fn;
|
||||
|
||||
inline uint32_t sample_nontextured(texinfo *ti, float u, float v, uint32_t offset_color, uint32_t base_color);
|
||||
|
||||
template <int tsinst, bool bilinear>
|
||||
inline uint32_t sample_textured(texinfo *ti, float u, float v, uint32_t offset_color, uint32_t base_color);
|
||||
|
||||
protected:
|
||||
virtual void device_start() override;
|
||||
virtual void device_reset() override;
|
||||
@ -462,23 +470,36 @@ private:
|
||||
uint32_t tex_r_default(texinfo *t, float x, float y);
|
||||
void tex_get_info(texinfo *t);
|
||||
|
||||
void render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, float xl, float xr, float ul, float ur, float vl, float vr, float wl, float wr, float const bl[4], float const br[4], float const offl[4], float const offr[4]);
|
||||
void render_span(bitmap_rgb32 &bitmap, texinfo *ti,
|
||||
float y0, float y1,
|
||||
float xl, float xr,
|
||||
float ul, float ur,
|
||||
float vl, float vr,
|
||||
float wl, float wr,
|
||||
float const bl[4], float const br[4],
|
||||
float const offl[4], float const offr[4],
|
||||
float dxldy, float dxrdy,
|
||||
float duldy, float durdy,
|
||||
float dvldy, float dvrdy,
|
||||
float dwldy, float dwrdy,
|
||||
float const dbldy[4], float const dbrdy[4],
|
||||
float const doldy[4], float const dordy[4]);
|
||||
template <pix_sample_fn sample_fn>
|
||||
inline void render_hline(bitmap_rgb32 &bitmap, texinfo *ti,
|
||||
int y, float xl, float xr,
|
||||
float ul, float ur, float vl, float vr,
|
||||
float wl, float wr,
|
||||
float const bl[4], float const br[4],
|
||||
float const offl[4], float const offr[4]);
|
||||
|
||||
template <pix_sample_fn sample_fn>
|
||||
inline void render_span(bitmap_rgb32 &bitmap, texinfo *ti,
|
||||
float y0, float y1,
|
||||
float xl, float xr,
|
||||
float ul, float ur,
|
||||
float vl, float vr,
|
||||
float wl, float wr,
|
||||
float const bl[4], float const br[4],
|
||||
float const offl[4], float const offr[4],
|
||||
float dxldy, float dxrdy,
|
||||
float duldy, float durdy,
|
||||
float dvldy, float dvrdy,
|
||||
float dwldy, float dwrdy,
|
||||
float const dbldy[4], float const dbrdy[4],
|
||||
float const doldy[4], float const dordy[4]);
|
||||
|
||||
template <pix_sample_fn sample_fn>
|
||||
inline void render_tri_sorted(bitmap_rgb32 &bitmap, texinfo *ti,
|
||||
const vert *v0,
|
||||
const vert *v1, const vert *v2);
|
||||
|
||||
void sort_vertices(const vert *v, int *i0, int *i1, int *i2);
|
||||
void render_tri_sorted(bitmap_rgb32 &bitmap, texinfo *ti, const vert *v0, const vert *v1, const vert *v2);
|
||||
void render_tri(bitmap_rgb32 &bitmap, texinfo *ti, const vert *v);
|
||||
void render_to_accumulation_buffer(bitmap_rgb32 &bitmap, const rectangle &cliprect);
|
||||
void pvr_accumulationbuffer_to_framebuffer(address_space &space, int x, int y);
|
||||
|
Loading…
Reference in New Issue
Block a user