mirror of
https://github.com/holub/mame
synced 2025-06-03 19:36:26 +03:00
1884 lines
62 KiB
C++
1884 lines
62 KiB
C++
// license:BSD-3-Clause
|
|
// copyright-holders:Aaron Giles
|
|
/***************************************************************************
|
|
|
|
rendersw.hxx
|
|
|
|
Software-only rasterization system.
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
#include "emucore.h"
|
|
#include "eminline.h"
|
|
#include "video/rgbutil.h"
|
|
#include "render.h"
|
|
|
|
|
|
template <typename PixelType, int SrcShiftR, int SrcShiftG, int SrcShiftB, int DstShiftR, int DstShiftG, int DstShiftB, bool NoDestRead = false, bool BilinearFilter = false>
|
|
class software_renderer
|
|
{
|
|
private:
|
|
// internal structs
|
|
struct quad_setup_data
|
|
{
|
|
s32 dudx, dvdx, dudy, dvdy;
|
|
s32 startu, startv;
|
|
s32 startx, starty;
|
|
s32 endx, endy;
|
|
};
|
|
|
|
// internal helpers
|
|
static constexpr bool is_opaque(float alpha) { return (alpha >= (NoDestRead ? 0.5f : 1.0f)); }
|
|
static constexpr bool is_transparent(float alpha) { return (alpha < (NoDestRead ? 0.5f : 0.0001f)); }
|
|
static inline rgb_t apply_intensity(int intensity, rgb_t color) { return color.scale8(intensity); }
|
|
static inline float round_nearest(float f) { return floor(f + 0.5f); }
|
|
|
|
// destination pixels are written based on the values of the template parameters
|
|
static constexpr PixelType dest_assemble_rgb(u32 r, u32 g, u32 b) { return (r << DstShiftR) | (g << DstShiftG) | (b << DstShiftB); }
|
|
static constexpr PixelType dest_rgb_to_pixel(u32 r, u32 g, u32 b) { return dest_assemble_rgb(r >> SrcShiftR, g >> SrcShiftG, b >> SrcShiftB); }
|
|
|
|
// source 32-bit pixels are in MAME standardized format
|
|
static constexpr u32 source32_r(u32 pixel) { return (pixel >> (16 + SrcShiftR)) & (0xff >> SrcShiftR); }
|
|
static constexpr u32 source32_g(u32 pixel) { return (pixel >> ( 8 + SrcShiftG)) & (0xff >> SrcShiftG); }
|
|
static constexpr u32 source32_b(u32 pixel) { return (pixel >> ( 0 + SrcShiftB)) & (0xff >> SrcShiftB); }
|
|
|
|
// destination pixel masks are based on the template parameters as well
|
|
static constexpr u32 dest_r(PixelType pixel) { return (pixel >> DstShiftR) & (0xff >> SrcShiftR); }
|
|
static constexpr u32 dest_g(PixelType pixel) { return (pixel >> DstShiftG) & (0xff >> SrcShiftG); }
|
|
static constexpr u32 dest_b(PixelType pixel) { return (pixel >> DstShiftB) & (0xff >> SrcShiftB); }
|
|
|
|
// generic conversion with special optimization for destinations in the standard format
|
|
static constexpr PixelType source32_to_dest(u32 pixel)
|
|
{
|
|
if (SrcShiftR == 0 && SrcShiftG == 0 && SrcShiftB == 0 && DstShiftR == 16 && DstShiftG == 8 && DstShiftB == 0)
|
|
return pixel;
|
|
else
|
|
return dest_assemble_rgb(source32_r(pixel), source32_g(pixel), source32_b(pixel));
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// ycc_to_rgb - convert YCC to RGB; the YCC pixel
|
|
// contains Y in the LSB, Cb << 8, and Cr << 16
|
|
// This actually a YCbCr conversion,
|
|
// details my be found in chapter 6.4 ff of
|
|
// http://softwarecommunity.intel.com/isn/downloads/softwareproducts/pdfs/346495.pdf
|
|
// The document also contains the constants below as floats.
|
|
//-------------------------------------------------
|
|
|
|
static constexpr u32 clamp16_shift8(u32 x)
|
|
{
|
|
return (s32(x) < 0) ? 0 : (x > 65535) ? 255 : (x >> 8);
|
|
}
|
|
|
|
static constexpr u32 ycc_to_rgb(u32 ycc)
|
|
{
|
|
// original equations:
|
|
//
|
|
// C = Y - 16
|
|
// D = Cb - 128
|
|
// E = Cr - 128
|
|
//
|
|
// R = clip(( 298 * C + 409 * E + 128) >> 8)
|
|
// G = clip(( 298 * C - 100 * D - 208 * E + 128) >> 8)
|
|
// B = clip(( 298 * C + 516 * D + 128) >> 8)
|
|
//
|
|
// R = clip(( 298 * (Y - 16) + 409 * (Cr - 128) + 128) >> 8)
|
|
// G = clip(( 298 * (Y - 16) - 100 * (Cb - 128) - 208 * (Cr - 128) + 128) >> 8)
|
|
// B = clip(( 298 * (Y - 16) + 516 * (Cb - 128) + 128) >> 8)
|
|
//
|
|
// R = clip(( 298 * Y - 298 * 16 + 409 * Cr - 409 * 128 + 128) >> 8)
|
|
// G = clip(( 298 * Y - 298 * 16 - 100 * Cb + 100 * 128 - 208 * Cr + 208 * 128 + 128) >> 8)
|
|
// B = clip(( 298 * Y - 298 * 16 + 516 * Cb - 516 * 128 + 128) >> 8)
|
|
//
|
|
// R = clip(( 298 * Y - 298 * 16 + 409 * Cr - 409 * 128 + 128) >> 8)
|
|
// G = clip(( 298 * Y - 298 * 16 - 100 * Cb + 100 * 128 - 208 * Cr + 208 * 128 + 128) >> 8)
|
|
// B = clip(( 298 * Y - 298 * 16 + 516 * Cb - 516 * 128 + 128) >> 8)
|
|
//
|
|
// Now combine constants:
|
|
//
|
|
// R = clip(( 298 * Y + 409 * Cr - 56992) >> 8)
|
|
// G = clip(( 298 * Y - 100 * Cb - 208 * Cr + 34784) >> 8)
|
|
// B = clip(( 298 * Y + 516 * Cb - 70688) >> 8)
|
|
//
|
|
// Define common = 298 * y - 56992. This will save one addition
|
|
//
|
|
// R = clip(( common + 409 * Cr - 0) >> 8)
|
|
// G = clip(( common - 100 * Cb - 208 * Cr + 91776) >> 8)
|
|
// B = clip(( common + 516 * Cb - 13696) >> 8)
|
|
//
|
|
|
|
u8 y = ycc;
|
|
u8 cb = ycc >> 8;
|
|
u8 cr = ycc >> 16;
|
|
|
|
u32 common = 298 * y - 56992;
|
|
u32 r = (common + 409 * cr);
|
|
u32 g = (common - 100 * cb - 208 * cr + 91776);
|
|
u32 b = (common + 516 * cb - 13696);
|
|
|
|
// Now clamp and shift back
|
|
return rgb_t(clamp16_shift8(r), clamp16_shift8(g), clamp16_shift8(b));
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// get_texel_palette16 - return a texel from a
|
|
// palettized 16bpp source
|
|
//-------------------------------------------------
|
|
|
|
static inline u32 get_texel_palette16(const render_texinfo &texture, s32 curu, s32 curv)
|
|
{
|
|
rgb_t const *const palbase = texture.palette;
|
|
if constexpr (BilinearFilter)
|
|
{
|
|
s32 u0 = curu >> 16;
|
|
s32 u1 = 1;
|
|
if (u0 < 0) u0 = u1 = 0;
|
|
else if (u0 + 1 >= texture.width) u0 = texture.width - 1, u1 = 0;
|
|
s32 v0 = curv >> 16;
|
|
s32 v1 = texture.rowpixels;
|
|
if (v0 < 0) v0 = v1 = 0;
|
|
else if (v0 + 1 >= texture.height) v0 = texture.height - 1, v1 = 0;
|
|
|
|
u16 const *texbase = reinterpret_cast<u16 const *>(texture.base);
|
|
texbase += v0 * texture.rowpixels + u0;
|
|
|
|
u32 pix00 = palbase[texbase[0]];
|
|
u32 pix01 = palbase[texbase[u1]];
|
|
u32 pix10 = palbase[texbase[v1]];
|
|
u32 pix11 = palbase[texbase[u1 + v1]];
|
|
return rgbaint_t::bilinear_filter(pix00, pix01, pix10, pix11, curu >> 8, curv >> 8);
|
|
}
|
|
else
|
|
{
|
|
s32 u = std::clamp<s32>(curu >> 16, 0, texture.width - 1);
|
|
s32 v = std::clamp<s32>(curv >> 16, 0, texture.height - 1);
|
|
|
|
u16 const *const texbase = reinterpret_cast<u16 const *>(texture.base) + v * texture.rowpixels + u;
|
|
return palbase[texbase[0]];
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// get_texel_palette16a - return a texel from a
|
|
// palettized 16bpp source with alpha
|
|
//-------------------------------------------------
|
|
|
|
static inline u32 get_texel_palette16a(const render_texinfo &texture, s32 curu, s32 curv)
|
|
{
|
|
rgb_t const *const palbase = texture.palette;
|
|
if constexpr (BilinearFilter)
|
|
{
|
|
s32 u0 = curu >> 16;
|
|
s32 u1 = 1;
|
|
if (u0 < 0) u0 = u1 = 0;
|
|
else if (u0 + 1 >= texture.width) u0 = texture.width - 1, u1 = 0;
|
|
s32 v0 = curv >> 16;
|
|
s32 v1 = texture.rowpixels;
|
|
if (v0 < 0) v0 = v1 = 0;
|
|
else if (v0 + 1 >= texture.height) v0 = texture.height - 1, v1 = 0;
|
|
|
|
u16 const *texbase = reinterpret_cast<u16 const *>(texture.base);
|
|
texbase += v0 * texture.rowpixels + u0;
|
|
|
|
return rgbaint_t::bilinear_filter(palbase[texbase[0]], palbase[texbase[u1]], palbase[texbase[v1]], palbase[texbase[u1 + v1]], curu >> 8, curv >> 8);
|
|
}
|
|
else
|
|
{
|
|
s32 u = std::clamp<s32>(curu >> 16, 0, texture.width - 1);
|
|
s32 v = std::clamp<s32>(curv >> 16, 0, texture.height - 1);
|
|
|
|
u16 const *const texbase = reinterpret_cast<u16 const *>(texture.base) + v * texture.rowpixels + u;
|
|
return palbase[texbase[0]];
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// get_texel_yuy16 - return a texel from a 16bpp
|
|
// YCbCr source (pixel is returned as Cr-Cb-Y)
|
|
//-------------------------------------------------
|
|
|
|
static inline u32 get_texel_yuy16(const render_texinfo &texture, s32 curu, s32 curv)
|
|
{
|
|
if constexpr (BilinearFilter)
|
|
{
|
|
s32 u0 = curu >> 16;
|
|
s32 u1 = 1;
|
|
if (u0 < 0) u0 = u1 = 0;
|
|
else if (u0 + 1 >= texture.width) u0 = texture.width - 1, u1 = 0;
|
|
s32 v0 = curv >> 16;
|
|
s32 v1 = texture.rowpixels;
|
|
if (v0 < 0) v0 = v1 = 0;
|
|
else if (v0 + 1 >= texture.height) v0 = texture.height - 1, v1 = 0;
|
|
|
|
const u16 *texbase = reinterpret_cast<const u16 *>(texture.base);
|
|
texbase += v0 * texture.rowpixels + (u0 & ~1);
|
|
|
|
u32 pix00, pix01, pix10, pix11;
|
|
if ((curu & 0x10000) == 0)
|
|
{
|
|
u32 cbcr = ((texbase[0] & 0xff) << 8) | ((texbase[1] & 0xff) << 16);
|
|
pix00 = (texbase[0] >> 8) | cbcr;
|
|
pix01 = (texbase[u1] >> 8) | cbcr;
|
|
cbcr = ((texbase[v1 + 0] & 0xff) << 8) | ((texbase[v1 + 1] & 0xff) << 16);
|
|
pix10 = (texbase[v1 + 0] >> 8) | cbcr;
|
|
pix11 = (texbase[v1 + u1] >> 8) | cbcr;
|
|
}
|
|
else
|
|
{
|
|
u32 cbcr = ((texbase[0] & 0xff) << 8) | ((texbase[1] & 0xff) << 16);
|
|
pix00 = (texbase[1] >> 8) | cbcr;
|
|
if (u1 != 0)
|
|
{
|
|
cbcr = ((texbase[2] & 0xff) << 8) | ((texbase[3] & 0xff) << 16);
|
|
pix01 = (texbase[2] >> 8) | cbcr;
|
|
}
|
|
else
|
|
pix01 = pix00;
|
|
cbcr = ((texbase[v1 + 0] & 0xff) << 8) | ((texbase[v1 + 1] & 0xff) << 16);
|
|
pix10 = (texbase[v1 + 1] >> 8) | cbcr;
|
|
if (u1 != 0)
|
|
{
|
|
cbcr = ((texbase[v1 + 2] & 0xff) << 8) | ((texbase[v1 + 3] & 0xff) << 16);
|
|
pix11 = (texbase[v1 + 2] >> 8) | cbcr;
|
|
}
|
|
else
|
|
pix11 = pix10;
|
|
}
|
|
return rgbaint_t::bilinear_filter(pix00, pix01, pix10, pix11, curu >> 8, curv >> 8);
|
|
}
|
|
else
|
|
{
|
|
s32 u = std::clamp<s32>(curu >> 16, 0, texture.width - 1);
|
|
s32 v = std::clamp<s32>(curv >> 16, 0, texture.height - 1);
|
|
|
|
const u16 *texbase = reinterpret_cast<const u16 *>(texture.base) + v * texture.rowpixels + (u >> 1) * 2;
|
|
return (texbase[u & 1] >> 8) | ((texbase[0] & 0xff) << 8) | ((texbase[1] & 0xff) << 16);
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// get_texel_rgb32 - return a texel from a 32bpp
|
|
// RGB source
|
|
//-------------------------------------------------
|
|
|
|
template <bool Wrap>
|
|
static inline u32 get_texel_rgb32(const render_texinfo &texture, s32 curu, s32 curv)
|
|
{
|
|
if constexpr (BilinearFilter)
|
|
{
|
|
s32 u0, u1, v0, v1;
|
|
if constexpr (Wrap)
|
|
{
|
|
u0 = (curu >> 16) % texture.width;
|
|
if (0 > u0)
|
|
u0 += texture.width;
|
|
u1 = (u0 + 1) % texture.width;
|
|
|
|
v0 = (curv >> 16) % texture.height;
|
|
if (0 > v0)
|
|
v0 += texture.height;
|
|
v1 = (v0 + 1) % texture.height;
|
|
}
|
|
else
|
|
{
|
|
u0 = curu >> 16;
|
|
if (u0 < 0)
|
|
u0 = u1 = 0;
|
|
else if (texture.width <= (u0 + 1))
|
|
u0 = u1 = texture.width - 1;
|
|
else
|
|
u1 = u0 + 1;
|
|
|
|
v0 = curv >> 16;
|
|
if (v0 < 0)
|
|
v0 = v1 = 0;
|
|
else if (texture.height <= (v0 + 1))
|
|
v0 = v1 = texture.height - 1;
|
|
else
|
|
v1 = v0 + 1;
|
|
}
|
|
u32 const *const texbase = reinterpret_cast<u32 const *>(texture.base);
|
|
u32 const *const row0base = texbase + (v0 * texture.rowpixels);
|
|
u32 const *const row1base = texbase + (v1 * texture.rowpixels);
|
|
return rgbaint_t::bilinear_filter(row0base[u0], row0base[u1], row1base[u0], row1base[u1], curu >> 8, curv >> 8);
|
|
}
|
|
else
|
|
{
|
|
s32 u, v;
|
|
if constexpr (Wrap)
|
|
{
|
|
u = (curu >> 16) % texture.width;
|
|
if (0 > u)
|
|
u += texture.width;
|
|
|
|
v = (curv >> 16) % texture.height;
|
|
if (0 > v)
|
|
v += texture.height;
|
|
}
|
|
else
|
|
{
|
|
u = std::clamp<s32>(curu >> 16, 0, texture.width - 1);
|
|
v = std::clamp<s32>(curv >> 16, 0, texture.height - 1);
|
|
}
|
|
u32 const *const rowbase = reinterpret_cast<u32 const *>(texture.base) + (v * texture.rowpixels);
|
|
return rowbase[u];
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// get_texel_argb32 - return a texel from a 32bpp
|
|
// ARGB source
|
|
//-------------------------------------------------
|
|
|
|
template <bool Wrap>
|
|
static inline u32 get_texel_argb32(render_texinfo const &texture, s32 curu, s32 curv)
|
|
{
|
|
if constexpr (BilinearFilter)
|
|
{
|
|
s32 u0, u1, v0, v1;
|
|
if constexpr (Wrap)
|
|
{
|
|
u0 = (curu >> 16) % texture.width;
|
|
if (0 > u0)
|
|
u0 += texture.width;
|
|
u1 = (u0 + 1) % texture.width;
|
|
|
|
v0 = (curv >> 16) % texture.height;
|
|
if (0 > v0)
|
|
v0 += texture.height;
|
|
v1 = (v0 + 1) % texture.height;
|
|
}
|
|
else
|
|
{
|
|
u0 = curu >> 16;
|
|
if (u0 < 0)
|
|
u0 = u1 = 0;
|
|
else if (texture.width <= (u0 + 1))
|
|
u0 = u1 = texture.width - 1;
|
|
else
|
|
u1 = u0 + 1;
|
|
|
|
v0 = curv >> 16;
|
|
if (v0 < 0)
|
|
v0 = v1 = 0;
|
|
else if (texture.height <= (v0 + 1))
|
|
v0 = v1 = texture.height - 1;
|
|
else
|
|
v1 = v0 + 1;
|
|
}
|
|
u32 const *const texbase = reinterpret_cast<u32 const *>(texture.base);
|
|
u32 const *const row0base = texbase + (v0 * texture.rowpixels);
|
|
u32 const *const row1base = texbase + (v1 * texture.rowpixels);
|
|
return rgbaint_t::bilinear_filter(row0base[u0], row0base[u1], row1base[u0], row1base[u1], curu >> 8, curv >> 8);
|
|
}
|
|
else
|
|
{
|
|
s32 u, v;
|
|
if constexpr (Wrap)
|
|
{
|
|
u = (curu >> 16) % texture.width;
|
|
if (0 > u)
|
|
u += texture.width;
|
|
|
|
v = (curv >> 16) % texture.height;
|
|
if (0 > v)
|
|
v += texture.height;
|
|
}
|
|
else
|
|
{
|
|
u = std::clamp<s32>(curu >> 16, 0, texture.width - 1);
|
|
v = std::clamp<s32>(curv >> 16, 0, texture.height - 1);
|
|
}
|
|
u32 const *const rowbase = reinterpret_cast<u32 const *>(texture.base) + (v * texture.rowpixels);
|
|
return rowbase[u];
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// draw_aa_pixel - draw an antialiased pixel
|
|
//-------------------------------------------------
|
|
|
|
static inline void draw_aa_pixel(PixelType *dstdata, u32 pitch, int x, int y, u32 col)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + x;
|
|
u32 dpix = NoDestRead ? 0 : *dest;
|
|
u32 dr = source32_r(col) + dest_r(dpix);
|
|
u32 dg = source32_g(col) + dest_g(dpix);
|
|
u32 db = source32_b(col) + dest_b(dpix);
|
|
dr = (dr | -(dr >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
dg = (dg | -(dg >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
db = (db | -(db >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest = dest_assemble_rgb(dr, dg, db);
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// draw_line - draw a line or point
|
|
//-------------------------------------------------
|
|
|
|
static void draw_line(render_primitive const &prim, PixelType *dstdata, s32 width, s32 height, u32 pitch)
|
|
{
|
|
// internal tables
|
|
static u32 s_cosine_table[2049];
|
|
|
|
// compute the start/end coordinates
|
|
int x1 = int(prim.bounds.x0 * 65536.0f);
|
|
int y1 = int(prim.bounds.y0 * 65536.0f);
|
|
int x2 = int(prim.bounds.x1 * 65536.0f);
|
|
int y2 = int(prim.bounds.y1 * 65536.0f);
|
|
|
|
// handle color and intensity
|
|
u32 col = rgb_t(int(255.0f * prim.color.r * prim.color.a), int(255.0f * prim.color.g * prim.color.a), int(255.0f * prim.color.b * prim.color.a));
|
|
|
|
if (PRIMFLAG_GET_ANTIALIAS(prim.flags))
|
|
{
|
|
// build up the cosine table if we haven't yet
|
|
if (s_cosine_table[0] == 0)
|
|
for (int entry = 0; entry <= 2048; entry++)
|
|
s_cosine_table[entry] = int(double(1.0 / cos(atan(double(entry) / 2048.0))) * 0x10000000 + 0.5);
|
|
|
|
int beam = prim.width * 65536.0f;
|
|
if (beam < 0x00010000)
|
|
beam = 0x00010000;
|
|
|
|
// draw an anti-aliased line
|
|
int dx = abs(x1 - x2);
|
|
int dy = abs(y1 - y2);
|
|
|
|
if (dx >= dy)
|
|
{
|
|
int sx = ((x1 <= x2) ? 1 : -1);
|
|
int sy = (dy == 0) ? 0 : div_32x32_shift(y2 - y1, dx, 16);
|
|
if (sy < 0)
|
|
dy--;
|
|
x1 >>= 16;
|
|
int xx = x2 >> 16;
|
|
int bwidth = mul_32x32_hi(beam << 4, s_cosine_table[abs(sy) >> 5]);
|
|
y1 -= bwidth >> 1; // start back half the diameter
|
|
for (;;)
|
|
{
|
|
if (x1 >= 0 && x1 < width)
|
|
{
|
|
dx = bwidth; // init diameter of beam
|
|
dy = y1 >> 16;
|
|
if (dy >= 0 && dy < height)
|
|
draw_aa_pixel(dstdata, pitch, x1, dy, apply_intensity(0xff & (~y1 >> 8), col));
|
|
dy++;
|
|
dx -= 0x10000 - (0xffff & y1); // take off amount plotted
|
|
u8 a1 = (dx >> 8) & 0xff; // calc remainder pixel
|
|
dx >>= 16; // adjust to pixel (solid) count
|
|
while (dx--) // plot rest of pixels
|
|
{
|
|
if (dy >= 0 && dy < height)
|
|
draw_aa_pixel(dstdata, pitch, x1, dy, col);
|
|
dy++;
|
|
}
|
|
if (dy >= 0 && dy < height)
|
|
draw_aa_pixel(dstdata, pitch, x1, dy, apply_intensity(a1,col));
|
|
}
|
|
if (x1 == xx) break;
|
|
x1 += sx;
|
|
y1 += sy;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int sy = ((y1 <= y2) ? 1: -1);
|
|
int sx = (dx == 0) ? 0 : div_32x32_shift(x2 - x1, dy, 16);
|
|
if (sx < 0)
|
|
dx--;
|
|
y1 >>= 16;
|
|
int yy = y2 >> 16;
|
|
int bwidth = mul_32x32_hi(beam << 4,s_cosine_table[abs(sx) >> 5]);
|
|
x1 -= bwidth >> 1; // start back half the width
|
|
for (;;)
|
|
{
|
|
if (y1 >= 0 && y1 < height)
|
|
{
|
|
dy = bwidth; // calc diameter of beam
|
|
dx = x1 >> 16;
|
|
if (dx >= 0 && dx < width)
|
|
draw_aa_pixel(dstdata, pitch, dx, y1, apply_intensity(0xff & (~x1 >> 8), col));
|
|
dx++;
|
|
dy -= 0x10000 - (0xffff & x1); // take off amount plotted
|
|
u8 a1 = (dy >> 8) & 0xff; // remainder pixel
|
|
dy >>= 16; // adjust to pixel (solid) count
|
|
while (dy--) // plot rest of pixels
|
|
{
|
|
if (dx >= 0 && dx < width)
|
|
draw_aa_pixel(dstdata, pitch, dx, y1, col);
|
|
dx++;
|
|
}
|
|
if (dx >= 0 && dx < width)
|
|
draw_aa_pixel(dstdata, pitch, dx, y1, apply_intensity(a1, col));
|
|
}
|
|
if (y1 == yy) break;
|
|
y1 += sy;
|
|
x1 += sx;
|
|
}
|
|
}
|
|
}
|
|
else // use good old Bresenham for non-antialiasing 980317 BW
|
|
{
|
|
x1 = (x1 + 0x8000) >> 16;
|
|
y1 = (y1 + 0x8000) >> 16;
|
|
x2 = (x2 + 0x8000) >> 16;
|
|
y2 = (y2 + 0x8000) >> 16;
|
|
|
|
int dx = abs(x1 - x2);
|
|
int dy = abs(y1 - y2);
|
|
int sx = (x1 <= x2) ? 1 : -1;
|
|
int sy = (y1 <= y2) ? 1 : -1;
|
|
int cx = dx / 2;
|
|
int cy = dy / 2;
|
|
|
|
if (dx >= dy)
|
|
{
|
|
for (;;)
|
|
{
|
|
if (x1 >= 0 && x1 < width && y1 >= 0 && y1 < height)
|
|
draw_aa_pixel(dstdata, pitch, x1, y1, col);
|
|
if (x1 == x2) break;
|
|
x1 += sx;
|
|
cx -= dy;
|
|
if (cx < 0)
|
|
{
|
|
y1 += sy;
|
|
cx += dx;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (;;)
|
|
{
|
|
if (x1 >= 0 && x1 < width && y1 >= 0 && y1 < height)
|
|
draw_aa_pixel(dstdata, pitch, x1, y1, col);
|
|
if (y1 == y2) break;
|
|
y1 += sy;
|
|
cy -= dx;
|
|
if (cy < 0)
|
|
{
|
|
x1 += sx;
|
|
cy += dy;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//**************************************************************************
|
|
// RECT RASTERIZERS
|
|
//**************************************************************************
|
|
|
|
//-------------------------------------------------
|
|
// draw_rect - draw a solid rectangle
|
|
//-------------------------------------------------
|
|
|
|
static void draw_rect(render_primitive const &prim, PixelType *dstdata, s32 width, s32 height, u32 pitch)
|
|
{
|
|
render_bounds const fpos = prim.bounds;
|
|
assert(fpos.x0 <= fpos.x1);
|
|
assert(fpos.y0 <= fpos.y1);
|
|
|
|
// clamp to integers and ensure we fit
|
|
s32 const startx = std::clamp<s32>(round_nearest(fpos.x0), 0, width);
|
|
s32 const starty = std::clamp<s32>(round_nearest(fpos.y0), 0, height);
|
|
s32 const endx = std::clamp<s32>(round_nearest(fpos.x1), 0, width);
|
|
s32 const endy = std::clamp<s32>(round_nearest(fpos.y1), 0, height);
|
|
|
|
// bail if nothing left
|
|
if ((startx > endx) || (starty > endy))
|
|
return;
|
|
|
|
// only support alpha and "none" blendmodes
|
|
assert(PRIMFLAG_GET_BLENDMODE(prim.flags) == BLENDMODE_NONE ||
|
|
PRIMFLAG_GET_BLENDMODE(prim.flags) == BLENDMODE_ALPHA);
|
|
|
|
if ((PRIMFLAG_GET_BLENDMODE(prim.flags) == BLENDMODE_NONE) || is_opaque(prim.color.a))
|
|
{
|
|
// fast case: no alpha
|
|
|
|
// clamp R,G,B to 0-256 range
|
|
u32 const r = u32(std::clamp(256.0f * prim.color.r, 0.0f, 255.0f));
|
|
u32 const g = u32(std::clamp(256.0f * prim.color.g, 0.0f, 255.0f));
|
|
u32 const b = u32(std::clamp(256.0f * prim.color.b, 0.0f, 255.0f));
|
|
u32 const pix = dest_rgb_to_pixel(r, g, b);
|
|
|
|
// loop over rows
|
|
for (s32 y = starty; y < endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + startx;
|
|
|
|
// loop over cols
|
|
for (s32 x = startx; x < endx; x++)
|
|
*dest++ = pix;
|
|
}
|
|
}
|
|
else if (!is_transparent(prim.color.a))
|
|
{
|
|
// alpha and/or coloring case
|
|
u32 const rmask = dest_rgb_to_pixel(0xff,0x00,0x00);
|
|
u32 const gmask = dest_rgb_to_pixel(0x00,0xff,0x00);
|
|
u32 const bmask = dest_rgb_to_pixel(0x00,0x00,0xff);
|
|
|
|
// clamp R,G,B and inverse A to 0-256 range
|
|
u32 r = u32(std::clamp(256.0f * prim.color.r * prim.color.a, 0.0f, 255.0f));
|
|
u32 g = u32(std::clamp(256.0f * prim.color.g * prim.color.a, 0.0f, 255.0f));
|
|
u32 b = u32(std::clamp(256.0f * prim.color.b * prim.color.a, 0.0f, 255.0f));
|
|
u32 const inva = u32(std::clamp(256.0f * (1.0f - prim.color.a), 0.0f, 256.0f));
|
|
|
|
// pre-shift the RGBA pieces
|
|
r = dest_rgb_to_pixel(r, 0, 0) << 8;
|
|
g = dest_rgb_to_pixel(0, g, 0) << 8;
|
|
b = dest_rgb_to_pixel(0, 0, b) << 8;
|
|
|
|
// loop over rows
|
|
for (s32 y = starty; y < endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + startx;
|
|
|
|
// loop over cols
|
|
for (s32 x = startx; x < endx; x++)
|
|
{
|
|
u32 dpix = NoDestRead ? 0 : *dest;
|
|
u32 dr = (r + ((dpix & rmask) * inva)) & (rmask << 8);
|
|
u32 dg = (g + ((dpix & gmask) * inva)) & (gmask << 8);
|
|
u32 db = (b + ((dpix & bmask) * inva)) & (bmask << 8);
|
|
*dest++ = (dr | dg | db) >> 8;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//**************************************************************************
|
|
// 16-BIT PALETTE RASTERIZERS
|
|
//**************************************************************************
|
|
|
|
//-------------------------------------------------
|
|
// draw_quad_palette16_none - perform
|
|
// rasterization of a 16bpp palettized texture
|
|
//-------------------------------------------------
|
|
|
|
static void draw_quad_palette16_none(render_primitive const &prim, PixelType *dstdata, u32 pitch, quad_setup_data const &setup)
|
|
{
|
|
// ensure all parameters are valid
|
|
assert(prim.texture.palette != nullptr);
|
|
|
|
if (prim.color.r >= 1.0f && prim.color.g >= 1.0f && prim.color.b >= 1.0f && is_opaque(prim.color.a))
|
|
{
|
|
// fast case: no coloring, no alpha
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_palette16(prim.texture, curu, curv);
|
|
*dest++ = source32_to_dest(pix);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
else if (is_opaque(prim.color.a))
|
|
{
|
|
// coloring-only case
|
|
|
|
// clamp R,G,B to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b, 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_palette16(prim.texture, curu, curv);
|
|
u32 const r = (source32_r(pix) * sr) >> 8;
|
|
u32 const g = (source32_g(pix) * sg) >> 8;
|
|
u32 const b = (source32_b(pix) * sb) >> 8;
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
else if (!is_transparent(prim.color.a))
|
|
{
|
|
// alpha and/or coloring case
|
|
|
|
// clamp R,G,B and inverse A to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r * prim.color.a, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g * prim.color.a, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b * prim.color.a, 0.0f, 256.0f));
|
|
u32 const invsa = u32(std::clamp(256.0f * (1.0f - prim.color.a), 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_palette16(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const r = (source32_r(pix) * sr + dest_r(dpix) * invsa) >> 8;
|
|
u32 const g = (source32_g(pix) * sg + dest_g(dpix) * invsa) >> 8;
|
|
u32 const b = (source32_b(pix) * sb + dest_b(dpix) * invsa) >> 8;
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// draw_quad_palette16_add - perform
|
|
// rasterization of a 16bpp palettized texture
|
|
//-------------------------------------------------
|
|
|
|
static void draw_quad_palette16_add(render_primitive const &prim, PixelType *dstdata, u32 pitch, quad_setup_data const &setup)
|
|
{
|
|
// ensure all parameters are valid
|
|
assert(prim.texture.palette != nullptr);
|
|
|
|
if (prim.color.r >= 1.0f && prim.color.g >= 1.0f && prim.color.b >= 1.0f && is_opaque(prim.color.a))
|
|
{
|
|
// fast case: no coloring, no alpha
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
const u32 pix = get_texel_palette16(prim.texture, curu, curv);
|
|
if ((pix & 0xffffff) != 0)
|
|
{
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = source32_r(pix) + dest_r(dpix);
|
|
u32 g = source32_g(pix) + dest_g(dpix);
|
|
u32 b = source32_b(pix) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest = dest_assemble_rgb(r, g, b);
|
|
}
|
|
dest++;
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// alpha and/or coloring case
|
|
|
|
// clamp R,G,B and inverse A to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r * prim.color.a, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g * prim.color.a, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b * prim.color.a, 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_palette16(prim.texture, curu, curv);
|
|
if ((pix & 0xffffff) != 0)
|
|
{
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = ((source32_r(pix) * sr) >> 8) + dest_r(dpix);
|
|
u32 g = ((source32_g(pix) * sg) >> 8) + dest_g(dpix);
|
|
u32 b = ((source32_b(pix) * sb) >> 8) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//**************************************************************************
|
|
// 16-BIT YUY RASTERIZERS
|
|
//**************************************************************************
|
|
|
|
//-------------------------------------------------
|
|
// draw_quad_yuy16_none - perform
|
|
// rasterization of a 16bpp YUY image
|
|
//-------------------------------------------------
|
|
|
|
static void draw_quad_yuy16_none(render_primitive const &prim, PixelType *dstdata, u32 pitch, quad_setup_data const &setup)
|
|
{
|
|
if (prim.color.r >= 1.0f && prim.color.g >= 1.0f && prim.color.b >= 1.0f && is_opaque(prim.color.a))
|
|
{
|
|
// fast case: no coloring, no alpha
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = ycc_to_rgb(get_texel_yuy16(prim.texture, curu, curv));
|
|
*dest++ = source32_to_dest(pix);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
else if (is_opaque(prim.color.a))
|
|
{
|
|
// coloring-only case
|
|
|
|
// clamp R,G,B to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b, 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = ycc_to_rgb(get_texel_yuy16(prim.texture, curu, curv));
|
|
u32 const r = (source32_r(pix) * sr) >> 8;
|
|
u32 const g = (source32_g(pix) * sg) >> 8;
|
|
u32 const b = (source32_b(pix) * sb) >> 8;
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
else if (!is_transparent(prim.color.a))
|
|
{
|
|
// alpha and/or coloring case
|
|
|
|
// clamp R,G,B and inverse A to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r * prim.color.a, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g * prim.color.a, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b * prim.color.a, 0.0f, 256.0f));
|
|
u32 const invsa = u32(std::clamp(256.0f * (1.0f - prim.color.a), 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = ycc_to_rgb(get_texel_yuy16(prim.texture, curu, curv));
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const r = (source32_r(pix) * sr + dest_r(dpix) * invsa) >> 8;
|
|
u32 const g = (source32_g(pix) * sg + dest_g(dpix) * invsa) >> 8;
|
|
u32 const b = (source32_b(pix) * sb + dest_b(dpix) * invsa) >> 8;
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// draw_quad_yuy16_add - perform
|
|
// rasterization by using RGB add after YUY
|
|
// conversion
|
|
//-------------------------------------------------
|
|
|
|
static void draw_quad_yuy16_add(render_primitive const &prim, PixelType *dstdata, u32 pitch, quad_setup_data const &setup)
|
|
{
|
|
// simply can't do this without reading from the dest
|
|
if constexpr (NoDestRead)
|
|
return;
|
|
|
|
if (prim.color.r >= 1.0f && prim.color.g >= 1.0f && prim.color.b >= 1.0f && is_opaque(prim.color.a))
|
|
{
|
|
// fast case: no coloring, no alpha
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = ycc_to_rgb(get_texel_yuy16(prim.texture, curu, curv));
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = source32_r(pix) + dest_r(dpix);
|
|
u32 g = source32_g(pix) + dest_g(dpix);
|
|
u32 b = source32_b(pix) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// alpha and/or coloring case
|
|
|
|
// clamp R,G,B and inverse A to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b, 0.0f, 256.0f));
|
|
u32 const sa = u32(std::clamp(256.0f * prim.color.a, 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
const u32 pix = ycc_to_rgb(get_texel_yuy16(prim.texture, curu, curv));
|
|
const u32 dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = ((source32_r(pix) * sr * sa) >> 16) + dest_r(dpix);
|
|
u32 g = ((source32_g(pix) * sg * sa) >> 16) + dest_g(dpix);
|
|
u32 b = ((source32_b(pix) * sb * sa) >> 16) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//**************************************************************************
|
|
// 32-BIT RGB QUAD RASTERIZERS
|
|
//**************************************************************************
|
|
|
|
//-------------------------------------------------
|
|
// draw_quad_rgb32 - perform rasterization of
|
|
// a 32bpp RGB texture
|
|
//-------------------------------------------------
|
|
|
|
template <bool Wrap>
|
|
static void draw_quad_rgb32(render_primitive const &prim, PixelType *dstdata, u32 pitch, quad_setup_data const &setup)
|
|
{
|
|
rgb_t const *const palbase = prim.texture.palette;
|
|
|
|
// fast case: no coloring, no alpha
|
|
if (prim.color.r >= 1.0f && prim.color.g >= 1.0f && prim.color.b >= 1.0f && is_opaque(prim.color.a))
|
|
{
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_rgb32<Wrap>(prim.texture, curu, curv);
|
|
*dest++ = source32_to_dest(pix);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_rgb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const r = palbase[(pix >> 16) & 0xff] >> SrcShiftR;
|
|
u32 const g = palbase[(pix >> 8) & 0xff] >> SrcShiftG;
|
|
u32 const b = palbase[(pix >> 0) & 0xff] >> SrcShiftB;
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (is_opaque(prim.color.a))
|
|
{
|
|
// coloring-only case
|
|
|
|
// clamp R,G,B to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b, 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_rgb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const r = (source32_r(pix) * sr) >> 8;
|
|
u32 const g = (source32_g(pix) * sg) >> 8;
|
|
u32 const b = (source32_b(pix) * sb) >> 8;
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_rgb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const r = (palbase[(pix >> 16) & 0xff] * sr) >> (8 + SrcShiftR);
|
|
u32 const g = (palbase[(pix >> 8) & 0xff] * sg) >> (8 + SrcShiftG);
|
|
u32 const b = (palbase[(pix >> 0) & 0xff] * sb) >> (8 + SrcShiftB);
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (!is_transparent(prim.color.a))
|
|
{
|
|
// alpha and/or coloring case
|
|
|
|
// clamp R,G,B and inverse A to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r * prim.color.a, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g * prim.color.a, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b * prim.color.a, 0.0f, 256.0f));
|
|
u32 const invsa = u32(std::clamp(256.0f * (1.0f - prim.color.a), 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_rgb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const r = (source32_r(pix) * sr + dest_r(dpix) * invsa) >> 8;
|
|
u32 const g = (source32_g(pix) * sg + dest_g(dpix) * invsa) >> 8;
|
|
u32 const b = (source32_b(pix) * sb + dest_b(dpix) * invsa) >> 8;
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_rgb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const r = ((palbase[(pix >> 16) & 0xff] >> SrcShiftR) * sr + dest_r(dpix) * invsa) >> 8;
|
|
u32 const g = ((palbase[(pix >> 8) & 0xff] >> SrcShiftG) * sg + dest_g(dpix) * invsa) >> 8;
|
|
u32 const b = ((palbase[(pix >> 0) & 0xff] >> SrcShiftB) * sb + dest_b(dpix) * invsa) >> 8;
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// draw_quad_rgb32_add - perform
|
|
// rasterization by using RGB add
|
|
//-------------------------------------------------
|
|
|
|
template <bool Wrap>
|
|
static void draw_quad_rgb32_add(render_primitive const &prim, PixelType *dstdata, u32 pitch, quad_setup_data const &setup)
|
|
{
|
|
// simply can't do this without reading from the dest
|
|
if (NoDestRead)
|
|
return;
|
|
|
|
rgb_t const *const palbase = prim.texture.palette;
|
|
|
|
if (prim.color.r >= 1.0f && prim.color.g >= 1.0f && prim.color.b >= 1.0f && is_opaque(prim.color.a))
|
|
{
|
|
// fast case: no coloring, no alpha
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = source32_r(pix) + dest_r(dpix);
|
|
u32 g = source32_g(pix) + dest_g(dpix);
|
|
u32 b = source32_b(pix) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = (palbase[(pix >> 16) & 0xff] >> SrcShiftR) + dest_r(dpix);
|
|
u32 g = (palbase[(pix >> 8) & 0xff] >> SrcShiftG) + dest_g(dpix);
|
|
u32 b = (palbase[(pix >> 0) & 0xff] >> SrcShiftB) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// alpha and/or coloring case
|
|
|
|
// clamp R,G,B and inverse A to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b, 0.0f, 256.0f));
|
|
u32 const sa = u32(std::clamp(256.0f * prim.color.a, 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = ((source32_r(pix) * sr * sa) >> 16) + dest_r(dpix);
|
|
u32 g = ((source32_g(pix) * sg * sa) >> 16) + dest_g(dpix);
|
|
u32 b = ((source32_b(pix) * sb * sa) >> 16) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = ((palbase[(pix >> 16) & 0xff] * sr * sa) >> (16 + SrcShiftR)) + dest_r(dpix);
|
|
u32 g = ((palbase[(pix >> 8) & 0xff] * sr * sa) >> (16 + SrcShiftR)) + dest_g(dpix);
|
|
u32 b = ((palbase[(pix >> 0) & 0xff] * sr * sa) >> (16 + SrcShiftR)) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// draw_quad_rgb32_multiply - perform
|
|
// rasterization using RGB multiply
|
|
//-------------------------------------------------
|
|
|
|
template <bool Wrap>
|
|
static void draw_quad_rgb32_multiply(render_primitive const &prim, PixelType *dstdata, u32 pitch, quad_setup_data const &setup)
|
|
{
|
|
// simply can't do this without reading from the dest
|
|
if (NoDestRead)
|
|
return;
|
|
|
|
rgb_t const *const palbase = prim.texture.palette;
|
|
|
|
if (prim.color.r >= 1.0f && prim.color.g >= 1.0f && prim.color.b >= 1.0f && is_opaque(prim.color.a))
|
|
{
|
|
// fast case: no coloring, no alpha
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const r = (source32_r(pix) * dest_r(dpix)) >> (8 - SrcShiftR);
|
|
u32 const g = (source32_g(pix) * dest_g(dpix)) >> (8 - SrcShiftG);
|
|
u32 const b = (source32_b(pix) * dest_b(dpix)) >> (8 - SrcShiftB);
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const r = (palbase[(pix >> 16) & 0xff] * dest_r(dpix)) >> 8;
|
|
u32 const g = (palbase[(pix >> 8) & 0xff] * dest_g(dpix)) >> 8;
|
|
u32 const b = (palbase[(pix >> 0) & 0xff] * dest_b(dpix)) >> 8;
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// alpha and/or coloring case
|
|
|
|
// clamp R,G,B to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r * prim.color.a, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g * prim.color.a, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b * prim.color.a, 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const r = (source32_r(pix) * sr * dest_r(dpix)) >> (16 - SrcShiftR);
|
|
u32 const g = (source32_g(pix) * sg * dest_g(dpix)) >> (16 - SrcShiftG);
|
|
u32 const b = (source32_b(pix) * sb * dest_b(dpix)) >> (16 - SrcShiftB);
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const r = (palbase[(pix >> 16) & 0xff] * sr * dest_r(dpix)) >> 16;
|
|
u32 const g = (palbase[(pix >> 8) & 0xff] * sg * dest_g(dpix)) >> 16;
|
|
u32 const b = (palbase[(pix >> 0) & 0xff] * sb * dest_b(dpix)) >> 16;
|
|
|
|
*dest++ = dest_assemble_rgb(r, g, b);
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//**************************************************************************
|
|
// 32-BIT ARGB QUAD RASTERIZERS
|
|
//**************************************************************************
|
|
|
|
//-------------------------------------------------
|
|
// draw_quad_argb32_alpha - perform
|
|
// rasterization using standard alpha blending
|
|
//-------------------------------------------------
|
|
|
|
template <bool Wrap>
|
|
static void draw_quad_argb32_alpha(render_primitive const &prim, PixelType *dstdata, u32 pitch, quad_setup_data const &setup)
|
|
{
|
|
rgb_t const *const palbase = prim.texture.palette;
|
|
|
|
if (prim.color.r >= 1.0f && prim.color.g >= 1.0f && prim.color.b >= 1.0f && is_opaque(prim.color.a))
|
|
{
|
|
// fast case: no coloring, no alpha
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const ta = pix >> 24;
|
|
if (ta != 0)
|
|
{
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const invta = 0x100 - ta;
|
|
u32 const r = (source32_r(pix) * ta + dest_r(dpix) * invta) >> 8;
|
|
u32 const g = (source32_g(pix) * ta + dest_g(dpix) * invta) >> 8;
|
|
u32 const b = (source32_b(pix) * ta + dest_b(dpix) * invta) >> 8;
|
|
|
|
*dest = dest_assemble_rgb(r, g, b);
|
|
}
|
|
dest++;
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const ta = pix >> 24;
|
|
if (ta != 0)
|
|
{
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const invta = 0x100 - ta;
|
|
u32 const r = ((palbase[(pix >> 16) & 0xff] >> SrcShiftR) * ta + dest_r(dpix) * invta) >> 8;
|
|
u32 const g = ((palbase[(pix >> 8) & 0xff] >> SrcShiftG) * ta + dest_g(dpix) * invta) >> 8;
|
|
u32 const b = ((palbase[(pix >> 0) & 0xff] >> SrcShiftB) * ta + dest_b(dpix) * invta) >> 8;
|
|
|
|
*dest = dest_assemble_rgb(r, g, b);
|
|
}
|
|
dest++;
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// alpha and/or coloring case
|
|
|
|
// clamp R,G,B and inverse A to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b, 0.0f, 256.0f));
|
|
u32 const sa = u32(std::clamp(256.0f * prim.color.a, 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const ta = (pix >> 24) * sa;
|
|
if (ta != 0)
|
|
{
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const invsta = (0x10000 - ta) << 8;
|
|
u32 const r = (source32_r(pix) * sr * ta + dest_r(dpix) * invsta) >> 24;
|
|
u32 const g = (source32_g(pix) * sg * ta + dest_g(dpix) * invsta) >> 24;
|
|
u32 const b = (source32_b(pix) * sb * ta + dest_b(dpix) * invsta) >> 24;
|
|
|
|
*dest = dest_assemble_rgb(r, g, b);
|
|
}
|
|
dest++;
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const ta = (pix >> 24) * sa;
|
|
if (ta != 0)
|
|
{
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 const invsta = (0x10000 - ta) << 8;
|
|
u32 const r = ((palbase[(pix >> 16) & 0xff] >> SrcShiftR) * sr * ta + dest_r(dpix) * invsta) >> 24;
|
|
u32 const g = ((palbase[(pix >> 8) & 0xff] >> SrcShiftG) * sg * ta + dest_g(dpix) * invsta) >> 24;
|
|
u32 const b = ((palbase[(pix >> 0) & 0xff] >> SrcShiftB) * sb * ta + dest_b(dpix) * invsta) >> 24;
|
|
|
|
*dest = dest_assemble_rgb(r, g, b);
|
|
}
|
|
dest++;
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// draw_quad_argb32_add - perform
|
|
// rasterization by using RGB add
|
|
//-------------------------------------------------
|
|
|
|
template <bool Wrap>
|
|
static void draw_quad_argb32_add(render_primitive const &prim, PixelType *dstdata, u32 pitch, quad_setup_data const &setup)
|
|
{
|
|
// simply can't do this without reading from the dest
|
|
if (NoDestRead)
|
|
return;
|
|
|
|
rgb_t const *const palbase = prim.texture.palette;
|
|
|
|
// fast case: no coloring, no alpha
|
|
if (prim.color.r >= 1.0f && prim.color.g >= 1.0f && prim.color.b >= 1.0f && is_opaque(prim.color.a))
|
|
{
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const ta = pix >> 24;
|
|
if (ta != 0)
|
|
{
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = ((source32_r(pix) * ta) >> 8) + dest_r(dpix);
|
|
u32 g = ((source32_g(pix) * ta) >> 8) + dest_g(dpix);
|
|
u32 b = ((source32_b(pix) * ta) >> 8) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest = dest_assemble_rgb(r, g, b);
|
|
}
|
|
dest++;
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const ta = pix >> 24;
|
|
if (ta != 0)
|
|
{
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = ((palbase[(pix >> 16) & 0xff] * ta) >> (8 + SrcShiftR)) + dest_r(dpix);
|
|
u32 g = ((palbase[(pix >> 8) & 0xff] * ta) >> (8 + SrcShiftG)) + dest_g(dpix);
|
|
u32 b = ((palbase[(pix >> 0) & 0xff] * ta) >> (8 + SrcShiftB)) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest = dest_assemble_rgb(r, g, b);
|
|
}
|
|
dest++;
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// alpha and/or coloring case
|
|
|
|
// clamp R,G,B and inverse A to 0-256 range
|
|
u32 const sr = u32(std::clamp(256.0f * prim.color.r, 0.0f, 256.0f));
|
|
u32 const sg = u32(std::clamp(256.0f * prim.color.g, 0.0f, 256.0f));
|
|
u32 const sb = u32(std::clamp(256.0f * prim.color.b, 0.0f, 256.0f));
|
|
u32 const sa = u32(std::clamp(256.0f * prim.color.a, 0.0f, 256.0f));
|
|
|
|
// loop over rows
|
|
for (s32 y = setup.starty; y < setup.endy; y++)
|
|
{
|
|
PixelType *dest = dstdata + y * pitch + setup.startx;
|
|
s32 curu = setup.startu + (y - setup.starty) * setup.dudy;
|
|
s32 curv = setup.startv + (y - setup.starty) * setup.dvdy;
|
|
|
|
if (!palbase)
|
|
{
|
|
// no lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const ta = (pix >> 24) * sa;
|
|
if (ta != 0)
|
|
{
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = ((source32_r(pix) * sr * ta) >> 24) + dest_r(dpix);
|
|
u32 g = ((source32_g(pix) * sg * ta) >> 24) + dest_g(dpix);
|
|
u32 b = ((source32_b(pix) * sb * ta) >> 24) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest = dest_assemble_rgb(r, g, b);
|
|
}
|
|
dest++;
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// lookup case
|
|
|
|
// loop over cols
|
|
for (s32 x = setup.startx; x < setup.endx; x++)
|
|
{
|
|
u32 const pix = get_texel_argb32<Wrap>(prim.texture, curu, curv);
|
|
u32 const ta = (pix >> 24) * sa;
|
|
if (ta != 0)
|
|
{
|
|
u32 const dpix = NoDestRead ? 0 : *dest;
|
|
u32 r = ((palbase[(pix >> 16) & 0xff] * sr * ta) >> (24 + SrcShiftR)) + dest_r(dpix);
|
|
u32 g = ((palbase[(pix >> 8) & 0xff] * sr * ta) >> (24 + SrcShiftR)) + dest_g(dpix);
|
|
u32 b = ((palbase[(pix >> 0) & 0xff] * sr * ta) >> (24 + SrcShiftR)) + dest_b(dpix);
|
|
r = (r | -(r >> (8 - SrcShiftR))) & (0xff >> SrcShiftR);
|
|
g = (g | -(g >> (8 - SrcShiftG))) & (0xff >> SrcShiftG);
|
|
b = (b | -(b >> (8 - SrcShiftB))) & (0xff >> SrcShiftB);
|
|
*dest = dest_assemble_rgb(r, g, b);
|
|
}
|
|
dest++;
|
|
curu += setup.dudx;
|
|
curv += setup.dvdx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//**************************************************************************
|
|
// CORE QUAD RASTERIZERS
|
|
//**************************************************************************
|
|
|
|
//-------------------------------------------------
|
|
// setup_and_draw_textured_quad - perform setup
|
|
// and then dispatch to a texture-mode-specific
|
|
// drawing routine
|
|
//-------------------------------------------------
|
|
|
|
static void setup_and_draw_textured_quad(render_primitive const &prim, PixelType *dstdata, s32 width, s32 height, u32 pitch)
|
|
{
|
|
assert(prim.bounds.x0 <= prim.bounds.x1);
|
|
assert(prim.bounds.y0 <= prim.bounds.y1);
|
|
|
|
// determine U/V deltas
|
|
float const fdudx = (prim.texcoords.tr.u - prim.texcoords.tl.u) / (prim.bounds.x1 - prim.bounds.x0);
|
|
float const fdvdx = (prim.texcoords.tr.v - prim.texcoords.tl.v) / (prim.bounds.x1 - prim.bounds.x0);
|
|
float const fdudy = (prim.texcoords.bl.u - prim.texcoords.tl.u) / (prim.bounds.y1 - prim.bounds.y0);
|
|
float const fdvdy = (prim.texcoords.bl.v - prim.texcoords.tl.v) / (prim.bounds.y1 - prim.bounds.y0);
|
|
|
|
// clamp to integers
|
|
quad_setup_data setup;
|
|
setup.startx = round_nearest(prim.bounds.x0);
|
|
setup.starty = round_nearest(prim.bounds.y0);
|
|
setup.endx = round_nearest(prim.bounds.x1);
|
|
setup.endy = round_nearest(prim.bounds.y1);
|
|
|
|
// ensure we fit
|
|
if (setup.startx < 0) setup.startx = 0;
|
|
if (setup.startx >= width) setup.startx = width;
|
|
if (setup.endx < 0) setup.endx = 0;
|
|
if (setup.endx >= width) setup.endx = width;
|
|
if (setup.starty < 0) setup.starty = 0;
|
|
if (setup.starty >= height) setup.starty = height;
|
|
if (setup.endy < 0) setup.endy = 0;
|
|
if (setup.endy >= height) setup.endy = height;
|
|
|
|
// compute start and delta U,V coordinates now
|
|
setup.dudx = round_nearest(65536.0f * float(prim.texture.width) * fdudx);
|
|
setup.dvdx = round_nearest(65536.0f * float(prim.texture.height) * fdvdx);
|
|
setup.dudy = round_nearest(65536.0f * float(prim.texture.width) * fdudy);
|
|
setup.dvdy = round_nearest(65536.0f * float(prim.texture.height) * fdvdy);
|
|
setup.startu = round_nearest(65536.0f * float(prim.texture.width) * prim.texcoords.tl.u);
|
|
setup.startv = round_nearest(65536.0f * float(prim.texture.height) * prim.texcoords.tl.v);
|
|
|
|
// advance U/V to the middle of the first texel
|
|
setup.startu += (setup.dudx + setup.dudy) / 2;
|
|
setup.startv += (setup.dvdx + setup.dvdy) / 2;
|
|
|
|
// if we're bilinear filtering, we need to offset u/v by half a texel
|
|
if constexpr (BilinearFilter)
|
|
{
|
|
setup.startu -= 0x8000;
|
|
setup.startv -= 0x8000;
|
|
}
|
|
|
|
// render based on the texture coordinates
|
|
switch (prim.flags & (PRIMFLAG_TEXFORMAT_MASK | PRIMFLAG_BLENDMODE_MASK))
|
|
{
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_PALETTE16) | PRIMFLAG_BLENDMODE(BLENDMODE_NONE):
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_PALETTE16) | PRIMFLAG_BLENDMODE(BLENDMODE_ALPHA):
|
|
draw_quad_palette16_none(prim, dstdata, pitch, setup);
|
|
break;
|
|
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_PALETTE16) | PRIMFLAG_BLENDMODE(BLENDMODE_ADD):
|
|
draw_quad_palette16_add(prim, dstdata, pitch, setup);
|
|
break;
|
|
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_YUY16) | PRIMFLAG_BLENDMODE(BLENDMODE_NONE):
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_YUY16) | PRIMFLAG_BLENDMODE(BLENDMODE_ALPHA):
|
|
draw_quad_yuy16_none(prim, dstdata, pitch, setup);
|
|
break;
|
|
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_YUY16) | PRIMFLAG_BLENDMODE(BLENDMODE_ADD):
|
|
draw_quad_yuy16_add(prim, dstdata, pitch, setup);
|
|
break;
|
|
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_RGB32) | PRIMFLAG_BLENDMODE(BLENDMODE_NONE):
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_RGB32) | PRIMFLAG_BLENDMODE(BLENDMODE_ALPHA):
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_ARGB32) | PRIMFLAG_BLENDMODE(BLENDMODE_NONE):
|
|
if (PRIMFLAG_GET_TEXWRAP(prim.flags))
|
|
draw_quad_rgb32<true>(prim, dstdata, pitch, setup);
|
|
else
|
|
draw_quad_rgb32<false>(prim, dstdata, pitch, setup);
|
|
break;
|
|
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_RGB32) | PRIMFLAG_BLENDMODE(BLENDMODE_RGB_MULTIPLY):
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_ARGB32) | PRIMFLAG_BLENDMODE(BLENDMODE_RGB_MULTIPLY):
|
|
if (PRIMFLAG_GET_TEXWRAP(prim.flags))
|
|
draw_quad_rgb32_multiply<true>(prim, dstdata, pitch, setup);
|
|
else
|
|
draw_quad_rgb32_multiply<false>(prim, dstdata, pitch, setup);
|
|
break;
|
|
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_RGB32) | PRIMFLAG_BLENDMODE(BLENDMODE_ADD):
|
|
if (PRIMFLAG_GET_TEXWRAP(prim.flags))
|
|
draw_quad_rgb32_add<true>(prim, dstdata, pitch, setup);
|
|
else
|
|
draw_quad_rgb32_add<false>(prim, dstdata, pitch, setup);
|
|
break;
|
|
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_ARGB32) | PRIMFLAG_BLENDMODE(BLENDMODE_ALPHA):
|
|
if (PRIMFLAG_GET_TEXWRAP(prim.flags))
|
|
draw_quad_argb32_alpha<true>(prim, dstdata, pitch, setup);
|
|
else
|
|
draw_quad_argb32_alpha<false>(prim, dstdata, pitch, setup);
|
|
break;
|
|
|
|
case PRIMFLAG_TEXFORMAT(TEXFORMAT_ARGB32) | PRIMFLAG_BLENDMODE(BLENDMODE_ADD):
|
|
if (PRIMFLAG_GET_TEXWRAP(prim.flags))
|
|
draw_quad_argb32_add<true>(prim, dstdata, pitch, setup);
|
|
else
|
|
draw_quad_argb32_add<false>(prim, dstdata, pitch, setup);
|
|
break;
|
|
|
|
default:
|
|
fatalerror("Unknown texformat(%d)/blendmode(%d) combo\n", PRIMFLAG_GET_TEXFORMAT(prim.flags), PRIMFLAG_GET_BLENDMODE(prim.flags));
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
//**************************************************************************
|
|
// PRIMARY ENTRY POINT
|
|
//**************************************************************************
|
|
|
|
//-------------------------------------------------
|
|
// draw_primitives - draw a series of primitives
|
|
// using a software rasterizer
|
|
//-------------------------------------------------
|
|
|
|
public:
|
|
static void draw_primitives(render_primitive_list const &primlist, void *dstdata, u32 width, u32 height, u32 pitch)
|
|
{
|
|
// loop over the list and render each element
|
|
for (render_primitive const *prim = primlist.first(); prim != nullptr; prim = prim->next())
|
|
switch (prim->type)
|
|
{
|
|
case render_primitive::LINE:
|
|
draw_line(*prim, reinterpret_cast<PixelType *>(dstdata), width, height, pitch);
|
|
break;
|
|
|
|
case render_primitive::QUAD:
|
|
if (!prim->texture.base)
|
|
draw_rect(*prim, reinterpret_cast<PixelType *>(dstdata), width, height, pitch);
|
|
else
|
|
setup_and_draw_textured_quad(*prim, reinterpret_cast<PixelType *>(dstdata), width, height, pitch);
|
|
break;
|
|
|
|
default:
|
|
throw emu_fatalerror("Unexpected render_primitive type");
|
|
}
|
|
}
|
|
};
|