Merge pull request #3738 from snickerbockers/pvr2_color_fix

PowerVR2: improved color handling
This commit is contained in:
R. Belmont 2018-07-10 17:43:01 -04:00 committed by GitHub
commit 40fb85b6db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 409 additions and 74 deletions

View File

@ -187,6 +187,54 @@ inline uint32_t powervr2_device::bls(uint32_t c1, uint32_t c2)
return cr1|(cr2 << 8); return cr1|(cr2 << 8);
} }
/*
* Add two colors with saturation, not including the alpha channel
* The only difference between this function and bls is that bls does not
* ignore alpha. The alpha will be cleared to zero by this instruction
*/
inline uint32_t powervr2_device::bls24(uint32_t c1, uint32_t c2)
{
uint32_t cr1, cr2;
cr1 = (c1 & 0x00ff00ff) + (c2 & 0x00ff00ff);
if(cr1 & 0x0000ff00)
cr1 = (cr1 & 0xffff00ff) | 0x000000ff;
if(cr1 & 0xff000000)
cr1 = (cr1 & 0x00ffffff) | 0x00ff0000;
cr2 = ((c1 >> 8) & 0x000000ff) + ((c2 >> 8) & 0x000000ff);
if(cr2 & 0x0000ff00)
cr2 = (cr2 & 0xffff00ff) | 0x000000ff;
return cr1|(cr2 << 8);
}
inline uint32_t powervr2_device::float_argb_to_packed_argb(float argb[4]) {
int argb_int[4] = {
(int)(argb[0] * 256.0f),
(int)(argb[1] * 256.0f),
(int)(argb[2] * 256.0f),
(int)(argb[3] * 256.0f)
};
// clamp to [0, 255]
int idx;
for (idx = 0; idx < 4; idx++) {
if (argb_int[idx] < 0)
argb_int[idx] = 0;
else if (argb_int[idx] > 255)
argb_int[idx] = 255;
}
return (argb_int[0] << 24) | (argb_int[1] << 16) |
(argb_int[2] << 8) | argb_int[3];
}
inline void powervr2_device::packed_argb_to_float_argb(float dst[4], uint32_t in) {
dst[0] = (in >> 24) / 256.0f;
dst[1] = ((in >> 16) & 0xff) / 256.0f;
dst[2] = ((in >> 8) & 0xff) / 256.0f;
dst[3] = (in & 0xff) / 256.0f;
}
// All 64 blending modes, 3 top bits are source mode, 3 bottom bits are destination mode // All 64 blending modes, 3 top bits are source mode, 3 bottom bits are destination mode
uint32_t powervr2_device::bl00(uint32_t s, uint32_t d) { return 0; } uint32_t powervr2_device::bl00(uint32_t s, uint32_t d) { return 0; }
uint32_t powervr2_device::bl01(uint32_t s, uint32_t d) { return d; } uint32_t powervr2_device::bl01(uint32_t s, uint32_t d) { return d; }
@ -456,16 +504,6 @@ uint32_t powervr2_device::tex_r_4444_vq(texinfo *t, float x, float y)
return cv_4444(*(uint16_t *)((reinterpret_cast<uint8_t *>(dc_texture_ram)) + WORD_XOR_LE(addrp))); return cv_4444(*(uint16_t *)((reinterpret_cast<uint8_t *>(dc_texture_ram)) + WORD_XOR_LE(addrp)));
} }
uint32_t powervr2_device::tex_r_nt_palint(texinfo *t, float x, float y)
{
return t->nontextured_pal_int;
}
uint32_t powervr2_device::tex_r_nt_palfloat(texinfo *t, float x, float y)
{
return (t->nontextured_fpal_a << 24) | (t->nontextured_fpal_r << 16) | (t->nontextured_fpal_g << 8) | (t->nontextured_fpal_b);
}
uint32_t powervr2_device::tex_r_p4_1555_tw(texinfo *t, float x, float y) uint32_t powervr2_device::tex_r_p4_1555_tw(texinfo *t, float x, float y)
{ {
int xt = t->u_func(x, t->sizex); int xt = t->u_func(x, t->sizex);
@ -686,24 +724,13 @@ void powervr2_device::tex_get_info(texinfo *t)
t->vqbase = t->address; t->vqbase = t->address;
t->blend = use_alpha ? blend_functions[t->blend_mode] : bl10; t->blend = use_alpha ? blend_functions[t->blend_mode] : bl10;
t->coltype = coltype;
t->tsinstruction = tsinstruction;
// fprintf(stderr, "tex %d %d %d %d\n", t->pf, t->mode, pal_ram_ctrl, t->mipmapped); // fprintf(stderr, "tex %d %d %d %d\n", t->pf, t->mode, pal_ram_ctrl, t->mipmapped);
if(!t->textured) if(!t->textured)
{ {
t->coltype = coltype; t->r = NULL;
switch(t->coltype) {
case 0: // packed color
t->nontextured_pal_int = nontextured_pal_int;
t->r = &powervr2_device::tex_r_nt_palint;
break;
case 1: // floating color
/* TODO: might be converted even earlier I believe */
t->nontextured_fpal_a = (uint8_t)(nontextured_fpal_a * 255.0f);
t->nontextured_fpal_r = (uint8_t)(nontextured_fpal_r * 255.0f);
t->nontextured_fpal_g = (uint8_t)(nontextured_fpal_g * 255.0f);
t->nontextured_fpal_b = (uint8_t)(nontextured_fpal_b * 255.0f);
t->r = &powervr2_device::tex_r_nt_palfloat;
break;
}
} }
else else
{ {
@ -1762,7 +1789,7 @@ void powervr2_device::process_ta_fifo()
volume=(objcontrol >> 6) & 1; volume=(objcontrol >> 6) & 1;
coltype=(objcontrol >> 4) & 3; coltype=(objcontrol >> 4) & 3;
texture=(objcontrol >> 3) & 1; texture=(objcontrol >> 3) & 1;
offfset=(objcontrol >> 2) & 1; offset_color_enable=(objcontrol >> 2) & 1;
gouraud=(objcontrol >> 1) & 1; gouraud=(objcontrol >> 1) & 1;
uv16bit=(objcontrol >> 0) & 1; uv16bit=(objcontrol >> 0) & 1;
} }
@ -1793,9 +1820,47 @@ void powervr2_device::process_ta_fifo()
return; return;
} }
} }
bool have_16_byte_header = tafifo_mask != 7;
tafifo_mask = 7; tafifo_mask = 7;
// now we heve all the needed words // now we heve all the needed words
/*
* load per-polygon colors if color type is 2 or 3 or parameter type is
* 5 (quad). For color types 0 and 1, color is determined entirely on a
* per-vertex basis.
*/
if (paratype == 4)
{
switch (coltype) {
case 2:
if (offset_color_enable) {
memcpy(poly_base_color, tafifo_buff + 8, 4 * sizeof(float));
memcpy(poly_offs_color, tafifo_buff + 12, 4 * sizeof(float));
} else {
memcpy(poly_base_color, tafifo_buff + 4, 4 * sizeof(float));
memset(poly_offs_color, 0, sizeof(poly_offs_color));
}
memcpy(poly_last_mode_2_base_color, poly_base_color, sizeof(poly_last_mode_2_base_color));
break;
case 3:
memcpy(poly_base_color, poly_last_mode_2_base_color, sizeof(poly_base_color));
memset(poly_offs_color, 0, sizeof(poly_offs_color));
break;
default:
memset(poly_base_color, 0, sizeof(poly_base_color));
memset(poly_offs_color, 0, sizeof(poly_offs_color));
break;
}
} else if (paratype == 5) {
packed_argb_to_float_argb(poly_base_color, tafifo_buff[4]);
if (offset_color_enable) {
packed_argb_to_float_argb(poly_offs_color, tafifo_buff[5]);
} else {
memset(poly_offs_color, 0, sizeof(poly_offs_color));
}
}
// here we should generate the data for the various tiles // here we should generate the data for the various tiles
// for now, just interpret their meaning // for now, just interpret their meaning
if (paratype == 0) if (paratype == 0)
@ -1967,6 +2032,14 @@ void powervr2_device::process_ta_fifo()
tv[2].u = tv[0].u+tv[3].u-tv[1].u; tv[2].u = tv[0].u+tv[3].u-tv[1].u;
tv[2].v = tv[0].v+tv[3].v-tv[1].v; tv[2].v = tv[0].v+tv[3].v-tv[1].v;
int idx;
for (idx = 0; idx < 4; idx++) {
memcpy(tv[idx].b, poly_base_color,
sizeof(tv[idx].b));
memcpy(tv[idx].o, poly_offs_color,
sizeof(tv[idx].o));
}
ts = &rd->strips[rd->strips_size++]; ts = &rd->strips[rd->strips_size++];
tex_get_info(&ts->ti); tex_get_info(&ts->ti);
ts->svert = rd->verts_size; ts->svert = rd->verts_size;
@ -1985,6 +2058,56 @@ void powervr2_device::process_ta_fifo()
#endif #endif
if (rd->verts_size <= 65530) if (rd->verts_size <= 65530)
{ {
float vert_offset_color[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
float vert_base_color[4];
float base_intensity, offs_intensity;
switch (coltype) {
case 0:
// packed color
packed_argb_to_float_argb(vert_base_color, tafifo_buff[6]);
break;
case 1:
// floating-point color
if (have_16_byte_header) {
memcpy(vert_base_color, tafifo_buff + 8,
sizeof(vert_base_color));
memcpy(vert_offset_color, tafifo_buff + 12,
sizeof(vert_offset_color));
} else {
memcpy(vert_base_color, tafifo_buff + 4,
sizeof(vert_base_color));
}
break;
case 2:
case 3:
/*
* base/offset color were previously
* specified on a per-polygon basis.
* To get the per-vertex base and
* offset colors, they are scaled by
* per-vertex scalar values.
*/
memcpy(&base_intensity, tafifo_buff + 6, sizeof(base_intensity));
memcpy(&offs_intensity, tafifo_buff + 7, sizeof(offs_intensity));
vert_base_color[0] = poly_base_color[0] * base_intensity;
vert_base_color[1] = poly_base_color[1] * base_intensity;
vert_base_color[2] = poly_base_color[2] * base_intensity;
vert_base_color[3] = poly_base_color[3] * base_intensity;
if (offset_color_enable) {
vert_offset_color[0] = poly_offs_color[0] * offs_intensity;
vert_offset_color[1] = poly_offs_color[1] * offs_intensity;
vert_offset_color[2] = poly_offs_color[2] * offs_intensity;
vert_offset_color[3] = poly_offs_color[3] * offs_intensity;
}
break;
default:
// This will never actually happen, coltype is 2-bits.
logerror("line %d of %s - coltype is %d\n", coltype);
memset(vert_base_color, 0, sizeof(vert_base_color));
}
/* add a vertex to our list */ /* add a vertex to our list */
/* this is used for 3d stuff, ie most of the graphics (see guilty gear, confidential mission, maze of the kings etc.) */ /* this is used for 3d stuff, ie most of the graphics (see guilty gear, confidential mission, maze of the kings etc.) */
/* -- this is also wildly inaccurate! */ /* -- this is also wildly inaccurate! */
@ -1995,18 +2118,8 @@ void powervr2_device::process_ta_fifo()
tv->w=u2f(tafifo_buff[3]); tv->w=u2f(tafifo_buff[3]);
tv->u=u2f(tafifo_buff[4]); tv->u=u2f(tafifo_buff[4]);
tv->v=u2f(tafifo_buff[5]); tv->v=u2f(tafifo_buff[5]);
if (texture == 0) memcpy(tv->b, vert_base_color, sizeof(tv->b));
{ memcpy(tv->o, vert_offset_color, sizeof(tv->o));
if(coltype == 0)
nontextured_pal_int=tafifo_buff[6];
else if(coltype == 1)
{
nontextured_fpal_a=u2f(tafifo_buff[4]);
nontextured_fpal_r=u2f(tafifo_buff[5]);
nontextured_fpal_g=u2f(tafifo_buff[6]);
nontextured_fpal_b=u2f(tafifo_buff[7]);
}
}
if((!rd->strips_size) || if((!rd->strips_size) ||
rd->strips[rd->strips_size-1].evert != -1) rd->strips[rd->strips_size-1].evert != -1)
@ -2188,8 +2301,9 @@ void powervr2_device::computedilated()
dilatechose[(b << 3) + a]=3+(a < b ? a : b); dilatechose[(b << 3) + a]=3+(a < b ? a : b);
} }
void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, float xl, float xr, float ul, float ur, float vl, float vr, float wl, float wr) void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, float xl, float xr, float ul, float ur, float vl, float vr, float wl, float wr, float const bl_in[4], float const br_in[4], float const offl_in[4], float const offr_in[4])
{ {
int idx;
int xxl, xxr; int xxl, xxr;
float dx, ddx, dudx, dvdx, dwdx; float dx, ddx, dudx, dvdx, dwdx;
uint32_t *tdata; uint32_t *tdata;
@ -2198,6 +2312,10 @@ void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, flo
// untextured cases aren't handled // untextured cases aren't handled
// if (!ti->textured) return; // if (!ti->textured) return;
float bl[4], offl[4];
memcpy(bl, bl_in, sizeof(bl));
memcpy(offl, offl_in, sizeof(offl));
if(xr < 0 || xl >= 640) if(xr < 0 || xl >= 640)
return; return;
@ -2212,6 +2330,20 @@ void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, flo
dvdx = (vr-vl)/dx; dvdx = (vr-vl)/dx;
dwdx = (wr-wl)/dx; dwdx = (wr-wl)/dx;
float dbdx[4] = {
(br_in[0] - bl[0]) / dx,
(br_in[1] - bl[1]) / dx,
(br_in[2] - bl[2]) / dx,
(br_in[3] - bl[3]) / dx
};
float dodx[4] = {
(offr_in[0] - offl[0]) / dx,
(offr_in[1] - offl[1]) / dx,
(offr_in[2] - offl[2]) / dx,
(offr_in[3] - offl[3]) / dx
};
if(xxl < 0) if(xxl < 0)
xxl = 0; xxl = 0;
if(xxr > 640) if(xxr > 640)
@ -2222,29 +2354,69 @@ void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, flo
ul += ddx*dudx; ul += ddx*dudx;
vl += ddx*dvdx; vl += ddx*dvdx;
wl += ddx*dwdx; wl += ddx*dwdx;
for (idx = 0; idx < 4; idx++) {
bl[idx] += ddx * dbdx[idx];
offl[idx] += ddx * dodx[idx];
}
tdata = &bitmap.pix32(y, xxl); tdata = &bitmap.pix32(y, xxl);
wbufline = &wbuffer[y][xxl]; wbufline = &wbuffer[y][xxl];
while(xxl < xxr) { while(xxl < xxr) {
if((wl >= *wbufline)) { if((wl >= *wbufline)) {
uint32_t c;
float u = ul/wl; float u = ul/wl;
float v = vl/wl; float v = vl/wl;
c = (this->*(ti->r))(ti, u, v); /*
* TODO: Not sure if blending should be done in
* floating point or fixed point, or if it even matters.
*/
uint32_t c;
uint32_t offset_color = float_argb_to_packed_argb(offl);
uint32_t base_color = float_argb_to_packed_argb(bl);
// debug dip to turn on/off bilinear filtering, it's slooooow if (ti->textured) {
if (debug_dip_status&0x1) c = (this->*(ti->r))(ti, u, v);
{ // debug dip to turn on/off bilinear filtering, it's slooooow
if(ti->filter_mode >= TEX_FILTER_BILINEAR) if (debug_dip_status&0x1)
{ {
uint32_t c1 = (this->*(ti->r))(ti, u+1.0f, v); if(ti->filter_mode >= TEX_FILTER_BILINEAR)
uint32_t c2 = (this->*(ti->r))(ti, u+1.0f, v+1.0f); {
uint32_t c3 = (this->*(ti->r))(ti, u, v+1.0f); uint32_t c1 = (this->*(ti->r))(ti, u+1.0f, v);
c = bilinear_filter(c, c1, c2, c3, u, v); uint32_t c2 = (this->*(ti->r))(ti, u+1.0f, v+1.0f);
uint32_t c3 = (this->*(ti->r))(ti, u, v+1.0f);
c = bilinear_filter(c, c1, c2, c3, u, v);
}
} }
uint32_t tmp;
switch (ti->tsinstruction) {
case 0:
// decal
c = bls24(c, offset_color) | (c & 0xff000000);
break;
case 1:
// modulate
tmp = blc(c, base_color);
tmp = bls24(tmp, offset_color);
tmp |= c & 0xff000000;
c = tmp;
break;
case 2:
// decal with alpha
tmp = bls24(bla(c, c), blia(base_color, c));
c = bls24(tmp, offset_color) | (base_color & 0xff000000);
break;
case 3:
// modulate with alpha
tmp = blc(c, base_color);
tmp = bls24(tmp, offset_color);
tmp |= (((c >> 24) * (base_color >> 24)) >> 8) << 24;
c = tmp;
break;
}
} else {
c = bls24(base_color, offset_color) | (base_color & 0xff000000);
} }
if(c & 0xff000000) { if(c & 0xff000000) {
@ -2258,6 +2430,10 @@ void powervr2_device::render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, flo
ul += dudx; ul += dudx;
vl += dvdx; vl += dvdx;
wl += dwdx; wl += dwdx;
for (idx = 0; idx < 4; idx++) {
bl[idx] += dbdx[idx];
offl[idx] += dodx[idx];
}
xxl ++; xxl ++;
} }
} }
@ -2268,11 +2444,16 @@ void powervr2_device::render_span(bitmap_rgb32 &bitmap, texinfo *ti,
float ul, float ur, float ul, float ur,
float vl, float vr, float vl, float vr,
float wl, float wr, float wl, float wr,
float const bl_in[4], float const br_in[4],
float const offl_in[4], float const offr_in[4],
float dxldy, float dxrdy, float dxldy, float dxrdy,
float duldy, float durdy, float duldy, float durdy,
float dvldy, float dvrdy, float dvldy, float dvrdy,
float dwldy, float dwrdy) float dwldy, float dwrdy,
float const dbldy[4], float const dbrdy[4],
float const doldy[4], float const dordy[4])
{ {
int idx;
float dy; float dy;
int yy0, yy1; int yy0, yy1;
@ -2281,6 +2462,12 @@ void powervr2_device::render_span(bitmap_rgb32 &bitmap, texinfo *ti,
if(y1 > 480) if(y1 > 480)
y1 = 480; y1 = 480;
float bl[4], br[4], offl[4], offr[4];
memcpy(bl, bl_in, sizeof(bl));
memcpy(br, br_in, sizeof(br));
memcpy(offl, offl_in, sizeof(offl));
memcpy(offr, offr_in, sizeof(offr));
if(y0 < 0) { if(y0 < 0) {
xl += -dxldy*y0; xl += -dxldy*y0;
xr += -dxrdy*y0; xr += -dxrdy*y0;
@ -2290,6 +2477,13 @@ void powervr2_device::render_span(bitmap_rgb32 &bitmap, texinfo *ti,
vr += -dvrdy*y0; vr += -dvrdy*y0;
wl += -dwldy*y0; wl += -dwldy*y0;
wr += -dwrdy*y0; wr += -dwrdy*y0;
for (idx = 0; idx < 4; idx++) {
bl[idx] += -dbldy[idx] * y0;
br[idx] += -dbrdy[idx] * y0;
offl[idx] += -doldy[idx] * y0;
offr[idx] += -dordy[idx] * y0;
}
y0 = 0; y0 = 0;
} }
@ -2314,9 +2508,15 @@ void powervr2_device::render_span(bitmap_rgb32 &bitmap, texinfo *ti,
vr += dy*dvrdy; vr += dy*dvrdy;
wl += dy*dwldy; wl += dy*dwldy;
wr += dy*dwrdy; wr += dy*dwrdy;
for (idx = 0; idx < 4; idx++) {
bl[idx] += dy * dbldy[idx];
br[idx] += dy * dbrdy[idx];
offl[idx] += dy * doldy[idx];
offr[idx] += dy * dordy[idx];
}
while(yy0 < yy1) { while(yy0 < yy1) {
render_hline(bitmap, ti, yy0, xl, xr, ul, ur, vl, vr, wl, wr); render_hline(bitmap, ti, yy0, xl, xr, ul, ur, vl, vr, wl, wr, bl, br, offl, offr);
xl += dxldy; xl += dxldy;
xr += dxrdy; xr += dxrdy;
@ -2326,6 +2526,13 @@ void powervr2_device::render_span(bitmap_rgb32 &bitmap, texinfo *ti,
vr += dvrdy; vr += dvrdy;
wl += dwldy; wl += dwldy;
wr += dwrdy; wr += dwrdy;
for (idx = 0; idx < 4; idx++) {
bl[idx] += dbldy[idx];
br[idx] += dbrdy[idx];
offl[idx] += doldy[idx];
offr[idx] += dordy[idx];
}
yy0 ++; yy0 ++;
} }
} }
@ -2370,10 +2577,94 @@ void powervr2_device::render_tri_sorted(bitmap_rgb32 &bitmap, texinfo *ti, const
if(v0->y >= 480 || v2->y < 0) if(v0->y >= 480 || v2->y < 0)
return; return;
float db01[4] = {
v1->b[0] - v0->b[0],
v1->b[1] - v0->b[1],
v1->b[2] - v0->b[2],
v1->b[3] - v0->b[3]
};
float db02[4] = {
v2->b[0] - v0->b[0],
v2->b[1] - v0->b[1],
v2->b[2] - v0->b[2],
v2->b[3] - v0->b[3]
};
float db12[4] = {
v2->b[0] - v1->b[0],
v2->b[1] - v1->b[1],
v2->b[2] - v1->b[2],
v2->b[3] - v1->b[3]
};
float do01[4] = {
v1->o[0] - v0->o[0],
v1->o[1] - v0->o[1],
v1->o[2] - v0->o[2],
v1->o[3] - v0->o[3]
};
float do02[4] = {
v2->o[0] - v0->o[0],
v2->o[1] - v0->o[1],
v2->o[2] - v0->o[2],
v2->o[3] - v0->o[3]
};
float do12[4] = {
v2->o[0] - v1->o[0],
v2->o[1] - v1->o[1],
v2->o[2] - v1->o[2],
v2->o[3] - v1->o[3]
};
dy01 = v1->y - v0->y; dy01 = v1->y - v0->y;
dy02 = v2->y - v0->y; dy02 = v2->y - v0->y;
dy12 = v2->y - v1->y; dy12 = v2->y - v1->y;
float db01dy[4] = {
dy01 ? db01[0]/dy01 : 0,
dy01 ? db01[1]/dy01 : 0,
dy01 ? db01[2]/dy01 : 0,
dy01 ? db01[3]/dy01 : 0
};
float db02dy[4] = {
dy01 ? db02[0]/dy02 : 0,
dy01 ? db02[1]/dy02 : 0,
dy01 ? db02[2]/dy02 : 0,
dy01 ? db02[3]/dy02 : 0
};
float db12dy[4] = {
dy01 ? db12[0]/dy12 : 0,
dy01 ? db12[1]/dy12 : 0,
dy01 ? db12[2]/dy12 : 0,
dy01 ? db12[3]/dy12 : 0
};
float do01dy[4] = {
dy01 ? do01[0]/dy01 : 0,
dy01 ? do01[1]/dy01 : 0,
dy01 ? do01[2]/dy01 : 0,
dy01 ? do01[3]/dy01 : 0
};
float do02dy[4] = {
dy01 ? do02[0]/dy02 : 0,
dy01 ? do02[1]/dy02 : 0,
dy01 ? do02[2]/dy02 : 0,
dy01 ? do02[3]/dy02 : 0
};
float do12dy[4] = {
dy01 ? do12[0]/dy12 : 0,
dy01 ? do12[1]/dy12 : 0,
dy01 ? do12[2]/dy12 : 0,
dy01 ? do12[3]/dy12 : 0
};
dx01dy = dy01 ? (v1->x-v0->x)/dy01 : 0; dx01dy = dy01 ? (v1->x-v0->x)/dy01 : 0;
dx02dy = dy02 ? (v2->x-v0->x)/dy02 : 0; dx02dy = dy02 ? (v2->x-v0->x)/dy02 : 0;
dx12dy = dy12 ? (v2->x-v1->x)/dy12 : 0; dx12dy = dy12 ? (v2->x-v1->x)/dy12 : 0;
@ -2395,31 +2686,43 @@ void powervr2_device::render_tri_sorted(bitmap_rgb32 &bitmap, texinfo *ti, const
return; return;
if(v1->x > v0->x) if(v1->x > v0->x)
render_span(bitmap, ti, v1->y, v2->y, v0->x, v1->x, v0->u, v1->u, v0->v, v1->v, v0->w, v1->w, dx02dy, dx12dy, du02dy, du12dy, dv02dy, dv12dy, dw02dy, dw12dy); render_span(bitmap, ti, v1->y, v2->y, v0->x, v1->x, v0->u, v1->u, v0->v, v1->v, v0->w, v1->w, v0->b, v1->b, v0->o, v1->o, dx02dy, dx12dy, du02dy, du12dy, dv02dy, dv12dy, dw02dy, dw12dy, db02dy, db12dy, do02dy, do12dy);
else else
render_span(bitmap, ti, v1->y, v2->y, v1->x, v0->x, v1->u, v0->u, v1->v, v0->v, v1->w, v0->w, dx12dy, dx02dy, du12dy, du02dy, dv12dy, dv02dy, dw12dy, dw02dy); render_span(bitmap, ti, v1->y, v2->y, v1->x, v0->x, v1->u, v0->u, v1->v, v0->v, v1->w, v0->w, v1->b, v0->b, v1->o, v0->o, dx12dy, dx02dy, du12dy, du02dy, dv12dy, dv02dy, dw12dy, dw02dy, db12dy, db02dy, do12dy, do02dy);
} else if(!dy12) { } else if(!dy12) {
if(v2->x > v1->x) if(v2->x > v1->x)
render_span(bitmap, ti, v0->y, v1->y, v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, dx01dy, dx02dy, du01dy, du02dy, dv01dy, dv02dy, dw01dy, dw02dy); render_span(bitmap, ti, v0->y, v1->y, v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->b, v0->b, v0->o, v0->o, dx01dy, dx02dy, du01dy, du02dy, dv01dy, dv02dy, dw01dy, dw02dy, db01dy, db02dy, do01dy, do02dy);
else else
render_span(bitmap, ti, v0->y, v1->y, v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, dx02dy, dx01dy, du02dy, du01dy, dv02dy, dv01dy, dw02dy, dw01dy); render_span(bitmap, ti, v0->y, v1->y, v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->b, v0->b, v0->o, v0->o, dx02dy, dx01dy, du02dy, du01dy, dv02dy, dv01dy, dw02dy, dw01dy, db02dy, db01dy, do02dy, do01dy);
} else { } else {
float idk_b[4] = {
v0->b[0] + db02dy[0] * dy01,
v0->b[1] + db02dy[1] * dy01,
v0->b[2] + db02dy[2] * dy01,
v0->b[3] + db02dy[3] * dy01
};
float idk_o[4] = {
v0->o[0] + do02dy[0] * dy01,
v0->o[1] + do02dy[1] * dy01,
v0->o[2] + do02dy[2] * dy01,
v0->o[3] + do02dy[3] * dy01
};
if(dx01dy < dx02dy) { if(dx01dy < dx02dy) {
render_span(bitmap, ti, v0->y, v1->y, render_span(bitmap, ti, v0->y, v1->y,
v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->b, v0->b, v0->o, v0->o,
dx01dy, dx02dy, du01dy, du02dy, dv01dy, dv02dy, dw01dy, dw02dy); dx01dy, dx02dy, du01dy, du02dy, dv01dy, dv02dy, dw01dy, dw02dy, db01dy, db02dy, do01dy, do02dy);
render_span(bitmap, ti, v1->y, v2->y, render_span(bitmap, ti, v1->y, v2->y,
v1->x, v0->x + dx02dy*dy01, v1->u, v0->u + du02dy*dy01, v1->v, v0->v + dv02dy*dy01, v1->w, v0->w + dw02dy*dy01, v1->x, v0->x + dx02dy*dy01, v1->u, v0->u + du02dy*dy01, v1->v, v0->v + dv02dy*dy01, v1->w, v0->w + dw02dy*dy01, v1->b, idk_b, v1->o, idk_o,
dx12dy, dx02dy, du12dy, du02dy, dv12dy, dv02dy, dw12dy, dw02dy); dx12dy, dx02dy, du12dy, du02dy, dv12dy, dv02dy, dw12dy, dw02dy, db12dy, db02dy, do12dy, do02dy);
} else { } else {
render_span(bitmap, ti, v0->y, v1->y, render_span(bitmap, ti, v0->y, v1->y,
v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->x, v0->x, v0->u, v0->u, v0->v, v0->v, v0->w, v0->w, v0->b, v0->b, v0->o, v0->o,
dx02dy, dx01dy, du02dy, du01dy, dv02dy, dv01dy, dw02dy, dw01dy); dx02dy, dx01dy, du02dy, du01dy, dv02dy, dv01dy, dw02dy, dw01dy, db02dy, db01dy, do02dy, do01dy);
render_span(bitmap, ti, v1->y, v2->y, render_span(bitmap, ti, v1->y, v2->y,
v0->x + dx02dy*dy01, v1->x, v0->u + du02dy*dy01, v1->u, v0->v + dv02dy*dy01, v1->v, v0->w + dw02dy*dy01, v1->w, v0->x + dx02dy*dy01, v1->x, v0->u + du02dy*dy01, v1->u, v0->v + dv02dy*dy01, v1->v, v0->w + dw02dy*dy01, v1->w, idk_b, v1->b, idk_o, v1->o,
dx02dy, dx12dy, du02dy, du12dy, dv02dy, dv12dy, dw02dy, dw12dy); dx02dy, dx12dy, du02dy, du12dy, dv02dy, dv12dy, dw02dy, dw12dy, db02dy, db12dy, do02dy, do12dy);
} }
} }
} }

View File

@ -59,10 +59,35 @@ public:
// our implementation is not currently tile based, and thus the accumulation buffer is screen sized // our implementation is not currently tile based, and thus the accumulation buffer is screen sized
std::unique_ptr<bitmap_rgb32> fake_accumulationbuffer_bitmap; std::unique_ptr<bitmap_rgb32> fake_accumulationbuffer_bitmap;
/*
* Per-polygon base and offset colors. These are scaled by per-vertex
* weights.
*
* These are only used if the colortype in the polygon header is 2
* or 3. If it is 0 or 1, then each vertex's base and offset colors are
* specified completely independently of one another in the per-vertex
* parameters.
*
* The base color is combined with the texture sample (if any) according
* to one of four fixed functions. The offset color is then added to
* the combined texture sample and base color with the exception of
* alpha.
*
* poly_offs_color is not always used. Not specifying a poly_offs_color
* is equivalent to using a poly_offs_color of 0.
*
* poly_last_mode_2_base_color is used to hold the last base color
* specified using color type 2. Color type 3 will always use the last
* base color specified using color type 2.
*/
float poly_base_color[4], poly_offs_color[4],
poly_last_mode_2_base_color[4];
struct texinfo { struct texinfo {
uint32_t address, vqbase; uint32_t address, vqbase;
uint32_t nontextured_pal_int;
uint8_t nontextured_fpal_a,nontextured_fpal_r,nontextured_fpal_g,nontextured_fpal_b; uint32_t tsinstruction;
int textured, sizex, sizey, stride, sizes, pf, palette, mode, mipmapped, blend_mode, filter_mode; int textured, sizex, sizey, stride, sizes, pf, palette, mode, mipmapped, blend_mode, filter_mode;
int coltype; int coltype;
@ -76,6 +101,9 @@ public:
typedef struct typedef struct
{ {
float x, y, w, u, v; float x, y, w, u, v;
// base and offset colors
float b[4], o[4];
} vert; } vert;
struct strip struct strip
@ -113,13 +141,11 @@ public:
int grabsellast; int grabsellast;
uint32_t paracontrol,paratype,endofstrip,listtype,global_paratype,parameterconfig; uint32_t paracontrol,paratype,endofstrip,listtype,global_paratype,parameterconfig;
uint32_t groupcontrol,groupen,striplen,userclip; uint32_t groupcontrol,groupen,striplen,userclip;
uint32_t objcontrol,shadow,volume,coltype,texture,offfset,gouraud,uv16bit; uint32_t objcontrol,shadow,volume,coltype,texture,offset_color_enable,gouraud,uv16bit;
uint32_t texturesizes,textureaddress,scanorder,pixelformat; uint32_t texturesizes,textureaddress,scanorder,pixelformat;
uint32_t blend_mode, srcselect,dstselect,fogcontrol,colorclamp, use_alpha; uint32_t blend_mode, srcselect,dstselect,fogcontrol,colorclamp, use_alpha;
uint32_t ignoretexalpha,flipuv,clampuv,filtermode,sstexture,mmdadjust,tsinstruction; uint32_t ignoretexalpha,flipuv,clampuv,filtermode,sstexture,mmdadjust,tsinstruction;
uint32_t depthcomparemode,cullingmode,zwritedisable,cachebypass,dcalcctrl,volumeinstruction,mipmapped,vqcompressed,strideselect,paletteselector; uint32_t depthcomparemode,cullingmode,zwritedisable,cachebypass,dcalcctrl,volumeinstruction,mipmapped,vqcompressed,strideselect,paletteselector;
uint32_t nontextured_pal_int;
float nontextured_fpal_a,nontextured_fpal_r,nontextured_fpal_g,nontextured_fpal_b;
uint64_t *dc_texture_ram; uint64_t *dc_texture_ram;
uint64_t *dc_framebuffer_ram; uint64_t *dc_framebuffer_ram;
@ -321,6 +347,9 @@ private:
static int uv_flip(float uv, int size); static int uv_flip(float uv, int size);
static int uv_clamp(float uv, int size); static int uv_clamp(float uv, int size);
static inline uint32_t float_argb_to_packed_argb(float argb[4]);
static inline void packed_argb_to_float_argb(float dst[4], uint32_t in);
static inline int32_t clamp(int32_t in, int32_t min, int32_t max); static inline int32_t clamp(int32_t in, int32_t min, int32_t max);
static inline uint32_t bilinear_filter(uint32_t c0, uint32_t c1, uint32_t c2, uint32_t c3, float u, float v); static inline uint32_t bilinear_filter(uint32_t c0, uint32_t c1, uint32_t c2, uint32_t c3, float u, float v);
static inline uint32_t bla(uint32_t c, uint32_t a); static inline uint32_t bla(uint32_t c, uint32_t a);
@ -328,6 +357,8 @@ private:
static inline uint32_t blc(uint32_t c1, uint32_t c2); static inline uint32_t blc(uint32_t c1, uint32_t c2);
static inline uint32_t blic(uint32_t c1, uint32_t c2); static inline uint32_t blic(uint32_t c1, uint32_t c2);
static inline uint32_t bls(uint32_t c1, uint32_t c2); static inline uint32_t bls(uint32_t c1, uint32_t c2);
static inline uint32_t bls24(uint32_t c1, uint32_t c2);
static uint32_t bl00(uint32_t s, uint32_t d); static uint32_t bl00(uint32_t s, uint32_t d);
static uint32_t bl01(uint32_t s, uint32_t d); static uint32_t bl01(uint32_t s, uint32_t d);
static uint32_t bl02(uint32_t s, uint32_t d); static uint32_t bl02(uint32_t s, uint32_t d);
@ -428,23 +459,24 @@ private:
uint32_t tex_r_p8_8888_tw(texinfo *t, float x, float y); uint32_t tex_r_p8_8888_tw(texinfo *t, float x, float y);
uint32_t tex_r_p8_8888_vq(texinfo *t, float x, float y); uint32_t tex_r_p8_8888_vq(texinfo *t, float x, float y);
uint32_t tex_r_nt_palint(texinfo *t, float x, float y);
uint32_t tex_r_nt_palfloat(texinfo *t, float x, float y);
uint32_t tex_r_default(texinfo *t, float x, float y); uint32_t tex_r_default(texinfo *t, float x, float y);
void tex_get_info(texinfo *t); void tex_get_info(texinfo *t);
void render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, float xl, float xr, float ul, float ur, float vl, float vr, float wl, float wr); void render_hline(bitmap_rgb32 &bitmap, texinfo *ti, int y, float xl, float xr, float ul, float ur, float vl, float vr, float wl, float wr, float const bl[4], float const br[4], float const offl[4], float const offr[4]);
void render_span(bitmap_rgb32 &bitmap, texinfo *ti, void render_span(bitmap_rgb32 &bitmap, texinfo *ti,
float y0, float y1, float y0, float y1,
float xl, float xr, float xl, float xr,
float ul, float ur, float ul, float ur,
float vl, float vr, float vl, float vr,
float wl, float wr, float wl, float wr,
float const bl[4], float const br[4],
float const offl[4], float const offr[4],
float dxldy, float dxrdy, float dxldy, float dxrdy,
float duldy, float durdy, float duldy, float durdy,
float dvldy, float dvrdy, float dvldy, float dvrdy,
float dwldy, float dwrdy); float dwldy, float dwrdy,
float const dbldy[4], float const dbrdy[4],
float const doldy[4], float const dordy[4]);
void sort_vertices(const vert *v, int *i0, int *i1, int *i2); void sort_vertices(const vert *v, int *i0, int *i1, int *i2);
void render_tri_sorted(bitmap_rgb32 &bitmap, texinfo *ti, const vert *v0, const vert *v1, const vert *v2); void render_tri_sorted(bitmap_rgb32 &bitmap, texinfo *ti, const vert *v0, const vert *v1, const vert *v2);
void render_tri(bitmap_rgb32 &bitmap, texinfo *ti, const vert *v); void render_tri(bitmap_rgb32 &bitmap, texinfo *ti, const vert *v);