From 84aa21184bf11fd709ac7ea678ed4db14f94dd3d Mon Sep 17 00:00:00 2001 From: Vas Crabb Date: Mon, 22 Jun 2015 05:04:10 +1000 Subject: [PATCH] Pick the low-hanging fruit (nw) Implemented most of MooglyGuy's new RGB intrinsics for VMX/Altivec Still need to do blend, bilinear filter and merge alpha --- scripts/src/emu.lua | 1 + src/emu/video/rgbsse.c | 4 + src/emu/video/rgbutil.c | 146 +------ src/emu/video/rgbvmx.c | 238 +++++++++++ src/emu/video/rgbvmx.h | 868 +++++++++++++++++++++------------------- 5 files changed, 701 insertions(+), 556 deletions(-) create mode 100644 src/emu/video/rgbvmx.c diff --git a/scripts/src/emu.lua b/scripts/src/emu.lua index 78d7fccf0a0..4a6d1ad6bd5 100644 --- a/scripts/src/emu.lua +++ b/scripts/src/emu.lua @@ -314,6 +314,7 @@ files { MAME_DIR .. "src/emu/video/rgbgen.h", MAME_DIR .. "src/emu/video/rgbsse.c", MAME_DIR .. "src/emu/video/rgbsse.h", + MAME_DIR .. "src/emu/video/rgbvmx.c", MAME_DIR .. "src/emu/video/rgbvmx.h", MAME_DIR .. "src/emu/video/vector.c", MAME_DIR .. "src/emu/video/vector.h", diff --git a/src/emu/video/rgbsse.c b/src/emu/video/rgbsse.c index 5845ee2dae8..2340c0a6925 100644 --- a/src/emu/video/rgbsse.c +++ b/src/emu/video/rgbsse.c @@ -10,6 +10,8 @@ ***************************************************************************/ +#if defined(__SSE2__) || defined(_MSC_VER) + #include "emu.h" #include #include "rgbutil.h" @@ -90,3 +92,5 @@ UINT32 rgbaint_t::bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT color01 = _mm_packus_epi16(color01, color01); return _mm_cvtsi128_si32(color01); } + +#endif // defined(__SSE2__) || defined(_MSC_VER) diff --git a/src/emu/video/rgbutil.c b/src/emu/video/rgbutil.c index 641221ba44d..9dfc4f02e2f 100644 --- a/src/emu/video/rgbutil.c +++ b/src/emu/video/rgbutil.c @@ -22,7 +22,7 @@ const struct _rgbsse_statics rgbsse_statics = { { 0 }, { 255, 255, 255, 255, 255, 255, 255, 255 }, - { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000}, + { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000 }, { 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }, { 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff }, { 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff }, @@ -158,147 +158,3 @@ const struct _rgbsse_statics rgbsse_statics = } }; #endif // defined(__SSE2__) - - - -/*************************************************************************** - VMX/ALTIVEC TABLES -***************************************************************************/ - -#if defined(__ALTIVEC__) -#include -const struct _rgbvmx_statics rgbvmx_statics = -{ - { 255, 255, 255, 255, 255, 255, 255, 255 }, - { - { 0, 256, 0, 256, 0, 256, 0, 256 }, { 1, 255, 1, 255, 1, 255, 1, 255 }, - { 2, 254, 2, 254, 2, 254, 2, 254 }, { 3, 253, 3, 253, 3, 253, 3, 253 }, - { 4, 252, 4, 252, 4, 252, 4, 252 }, { 5, 251, 5, 251, 5, 251, 5, 251 }, - { 6, 250, 6, 250, 6, 250, 6, 250 }, { 7, 249, 7, 249, 7, 249, 7, 249 }, - { 8, 248, 8, 248, 8, 248, 8, 248 }, { 9, 247, 9, 247, 9, 247, 9, 247 }, - { 10, 246, 10, 246, 10, 246, 10, 246 }, { 11, 245, 11, 245, 11, 245, 11, 245 }, - { 12, 244, 12, 244, 12, 244, 12, 244 }, { 13, 243, 13, 243, 13, 243, 13, 243 }, - { 14, 242, 14, 242, 14, 242, 14, 242 }, { 15, 241, 15, 241, 15, 241, 15, 241 }, - { 16, 240, 16, 240, 16, 240, 16, 240 }, { 17, 239, 17, 239, 17, 239, 17, 239 }, - { 18, 238, 18, 238, 18, 238, 18, 238 }, { 19, 237, 19, 237, 19, 237, 19, 237 }, - { 20, 236, 20, 236, 20, 236, 20, 236 }, { 21, 235, 21, 235, 21, 235, 21, 235 }, - { 22, 234, 22, 234, 22, 234, 22, 234 }, { 23, 233, 23, 233, 23, 233, 23, 233 }, - { 24, 232, 24, 232, 24, 232, 24, 232 }, { 25, 231, 25, 231, 25, 231, 25, 231 }, - { 26, 230, 26, 230, 26, 230, 26, 230 }, { 27, 229, 27, 229, 27, 229, 27, 229 }, - { 28, 228, 28, 228, 28, 228, 28, 228 }, { 29, 227, 29, 227, 29, 227, 29, 227 }, - { 30, 226, 30, 226, 30, 226, 30, 226 }, { 31, 225, 31, 225, 31, 225, 31, 225 }, - { 32, 224, 32, 224, 32, 224, 32, 224 }, { 33, 223, 33, 223, 33, 223, 33, 223 }, - { 34, 222, 34, 222, 34, 222, 34, 222 }, { 35, 221, 35, 221, 35, 221, 35, 221 }, - { 36, 220, 36, 220, 36, 220, 36, 220 }, { 37, 219, 37, 219, 37, 219, 37, 219 }, - { 38, 218, 38, 218, 38, 218, 38, 218 }, { 39, 217, 39, 217, 39, 217, 39, 217 }, - { 40, 216, 40, 216, 40, 216, 40, 216 }, { 41, 215, 41, 215, 41, 215, 41, 215 }, - { 42, 214, 42, 214, 42, 214, 42, 214 }, { 43, 213, 43, 213, 43, 213, 43, 213 }, - { 44, 212, 44, 212, 44, 212, 44, 212 }, { 45, 211, 45, 211, 45, 211, 45, 211 }, - { 46, 210, 46, 210, 46, 210, 46, 210 }, { 47, 209, 47, 209, 47, 209, 47, 209 }, - { 48, 208, 48, 208, 48, 208, 48, 208 }, { 49, 207, 49, 207, 49, 207, 49, 207 }, - { 50, 206, 50, 206, 50, 206, 50, 206 }, { 51, 205, 51, 205, 51, 205, 51, 205 }, - { 52, 204, 52, 204, 52, 204, 52, 204 }, { 53, 203, 53, 203, 53, 203, 53, 203 }, - { 54, 202, 54, 202, 54, 202, 54, 202 }, { 55, 201, 55, 201, 55, 201, 55, 201 }, - { 56, 200, 56, 200, 56, 200, 56, 200 }, { 57, 199, 57, 199, 57, 199, 57, 199 }, - { 58, 198, 58, 198, 58, 198, 58, 198 }, { 59, 197, 59, 197, 59, 197, 59, 197 }, - { 60, 196, 60, 196, 60, 196, 60, 196 }, { 61, 195, 61, 195, 61, 195, 61, 195 }, - { 62, 194, 62, 194, 62, 194, 62, 194 }, { 63, 193, 63, 193, 63, 193, 63, 193 }, - { 64, 192, 64, 192, 64, 192, 64, 192 }, { 65, 191, 65, 191, 65, 191, 65, 191 }, - { 66, 190, 66, 190, 66, 190, 66, 190 }, { 67, 189, 67, 189, 67, 189, 67, 189 }, - { 68, 188, 68, 188, 68, 188, 68, 188 }, { 69, 187, 69, 187, 69, 187, 69, 187 }, - { 70, 186, 70, 186, 70, 186, 70, 186 }, { 71, 185, 71, 185, 71, 185, 71, 185 }, - { 72, 184, 72, 184, 72, 184, 72, 184 }, { 73, 183, 73, 183, 73, 183, 73, 183 }, - { 74, 182, 74, 182, 74, 182, 74, 182 }, { 75, 181, 75, 181, 75, 181, 75, 181 }, - { 76, 180, 76, 180, 76, 180, 76, 180 }, { 77, 179, 77, 179, 77, 179, 77, 179 }, - { 78, 178, 78, 178, 78, 178, 78, 178 }, { 79, 177, 79, 177, 79, 177, 79, 177 }, - { 80, 176, 80, 176, 80, 176, 80, 176 }, { 81, 175, 81, 175, 81, 175, 81, 175 }, - { 82, 174, 82, 174, 82, 174, 82, 174 }, { 83, 173, 83, 173, 83, 173, 83, 173 }, - { 84, 172, 84, 172, 84, 172, 84, 172 }, { 85, 171, 85, 171, 85, 171, 85, 171 }, - { 86, 170, 86, 170, 86, 170, 86, 170 }, { 87, 169, 87, 169, 87, 169, 87, 169 }, - { 88, 168, 88, 168, 88, 168, 88, 168 }, { 89, 167, 89, 167, 89, 167, 89, 167 }, - { 90, 166, 90, 166, 90, 166, 90, 166 }, { 91, 165, 91, 165, 91, 165, 91, 165 }, - { 92, 164, 92, 164, 92, 164, 92, 164 }, { 93, 163, 93, 163, 93, 163, 93, 163 }, - { 94, 162, 94, 162, 94, 162, 94, 162 }, { 95, 161, 95, 161, 95, 161, 95, 161 }, - { 96, 160, 96, 160, 96, 160, 96, 160 }, { 97, 159, 97, 159, 97, 159, 97, 159 }, - { 98, 158, 98, 158, 98, 158, 98, 158 }, { 99, 157, 99, 157, 99, 157, 99, 157 }, - { 100, 156, 100, 156, 100, 156, 100, 156 }, { 101, 155, 101, 155, 101, 155, 101, 155 }, - { 102, 154, 102, 154, 102, 154, 102, 154 }, { 103, 153, 103, 153, 103, 153, 103, 153 }, - { 104, 152, 104, 152, 104, 152, 104, 152 }, { 105, 151, 105, 151, 105, 151, 105, 151 }, - { 106, 150, 106, 150, 106, 150, 106, 150 }, { 107, 149, 107, 149, 107, 149, 107, 149 }, - { 108, 148, 108, 148, 108, 148, 108, 148 }, { 109, 147, 109, 147, 109, 147, 109, 147 }, - { 110, 146, 110, 146, 110, 146, 110, 146 }, { 111, 145, 111, 145, 111, 145, 111, 145 }, - { 112, 144, 112, 144, 112, 144, 112, 144 }, { 113, 143, 113, 143, 113, 143, 113, 143 }, - { 114, 142, 114, 142, 114, 142, 114, 142 }, { 115, 141, 115, 141, 115, 141, 115, 141 }, - { 116, 140, 116, 140, 116, 140, 116, 140 }, { 117, 139, 117, 139, 117, 139, 117, 139 }, - { 118, 138, 118, 138, 118, 138, 118, 138 }, { 119, 137, 119, 137, 119, 137, 119, 137 }, - { 120, 136, 120, 136, 120, 136, 120, 136 }, { 121, 135, 121, 135, 121, 135, 121, 135 }, - { 122, 134, 122, 134, 122, 134, 122, 134 }, { 123, 133, 123, 133, 123, 133, 123, 133 }, - { 124, 132, 124, 132, 124, 132, 124, 132 }, { 125, 131, 125, 131, 125, 131, 125, 131 }, - { 126, 130, 126, 130, 126, 130, 126, 130 }, { 127, 129, 127, 129, 127, 129, 127, 129 }, - { 128, 128, 128, 128, 128, 128, 128, 128 }, { 129, 127, 129, 127, 129, 127, 129, 127 }, - { 130, 126, 130, 126, 130, 126, 130, 126 }, { 131, 125, 131, 125, 131, 125, 131, 125 }, - { 132, 124, 132, 124, 132, 124, 132, 124 }, { 133, 123, 133, 123, 133, 123, 133, 123 }, - { 134, 122, 134, 122, 134, 122, 134, 122 }, { 135, 121, 135, 121, 135, 121, 135, 121 }, - { 136, 120, 136, 120, 136, 120, 136, 120 }, { 137, 119, 137, 119, 137, 119, 137, 119 }, - { 138, 118, 138, 118, 138, 118, 138, 118 }, { 139, 117, 139, 117, 139, 117, 139, 117 }, - { 140, 116, 140, 116, 140, 116, 140, 116 }, { 141, 115, 141, 115, 141, 115, 141, 115 }, - { 142, 114, 142, 114, 142, 114, 142, 114 }, { 143, 113, 143, 113, 143, 113, 143, 113 }, - { 144, 112, 144, 112, 144, 112, 144, 112 }, { 145, 111, 145, 111, 145, 111, 145, 111 }, - { 146, 110, 146, 110, 146, 110, 146, 110 }, { 147, 109, 147, 109, 147, 109, 147, 109 }, - { 148, 108, 148, 108, 148, 108, 148, 108 }, { 149, 107, 149, 107, 149, 107, 149, 107 }, - { 150, 106, 150, 106, 150, 106, 150, 106 }, { 151, 105, 151, 105, 151, 105, 151, 105 }, - { 152, 104, 152, 104, 152, 104, 152, 104 }, { 153, 103, 153, 103, 153, 103, 153, 103 }, - { 154, 102, 154, 102, 154, 102, 154, 102 }, { 155, 101, 155, 101, 155, 101, 155, 101 }, - { 156, 100, 156, 100, 156, 100, 156, 100 }, { 157, 99, 157, 99, 157, 99, 157, 99 }, - { 158, 98, 158, 98, 158, 98, 158, 98 }, { 159, 97, 159, 97, 159, 97, 159, 97 }, - { 160, 96, 160, 96, 160, 96, 160, 96 }, { 161, 95, 161, 95, 161, 95, 161, 95 }, - { 162, 94, 162, 94, 162, 94, 162, 94 }, { 163, 93, 163, 93, 163, 93, 163, 93 }, - { 164, 92, 164, 92, 164, 92, 164, 92 }, { 165, 91, 165, 91, 165, 91, 165, 91 }, - { 166, 90, 166, 90, 166, 90, 166, 90 }, { 167, 89, 167, 89, 167, 89, 167, 89 }, - { 168, 88, 168, 88, 168, 88, 168, 88 }, { 169, 87, 169, 87, 169, 87, 169, 87 }, - { 170, 86, 170, 86, 170, 86, 170, 86 }, { 171, 85, 171, 85, 171, 85, 171, 85 }, - { 172, 84, 172, 84, 172, 84, 172, 84 }, { 173, 83, 173, 83, 173, 83, 173, 83 }, - { 174, 82, 174, 82, 174, 82, 174, 82 }, { 175, 81, 175, 81, 175, 81, 175, 81 }, - { 176, 80, 176, 80, 176, 80, 176, 80 }, { 177, 79, 177, 79, 177, 79, 177, 79 }, - { 178, 78, 178, 78, 178, 78, 178, 78 }, { 179, 77, 179, 77, 179, 77, 179, 77 }, - { 180, 76, 180, 76, 180, 76, 180, 76 }, { 181, 75, 181, 75, 181, 75, 181, 75 }, - { 182, 74, 182, 74, 182, 74, 182, 74 }, { 183, 73, 183, 73, 183, 73, 183, 73 }, - { 184, 72, 184, 72, 184, 72, 184, 72 }, { 185, 71, 185, 71, 185, 71, 185, 71 }, - { 186, 70, 186, 70, 186, 70, 186, 70 }, { 187, 69, 187, 69, 187, 69, 187, 69 }, - { 188, 68, 188, 68, 188, 68, 188, 68 }, { 189, 67, 189, 67, 189, 67, 189, 67 }, - { 190, 66, 190, 66, 190, 66, 190, 66 }, { 191, 65, 191, 65, 191, 65, 191, 65 }, - { 192, 64, 192, 64, 192, 64, 192, 64 }, { 193, 63, 193, 63, 193, 63, 193, 63 }, - { 194, 62, 194, 62, 194, 62, 194, 62 }, { 195, 61, 195, 61, 195, 61, 195, 61 }, - { 196, 60, 196, 60, 196, 60, 196, 60 }, { 197, 59, 197, 59, 197, 59, 197, 59 }, - { 198, 58, 198, 58, 198, 58, 198, 58 }, { 199, 57, 199, 57, 199, 57, 199, 57 }, - { 200, 56, 200, 56, 200, 56, 200, 56 }, { 201, 55, 201, 55, 201, 55, 201, 55 }, - { 202, 54, 202, 54, 202, 54, 202, 54 }, { 203, 53, 203, 53, 203, 53, 203, 53 }, - { 204, 52, 204, 52, 204, 52, 204, 52 }, { 205, 51, 205, 51, 205, 51, 205, 51 }, - { 206, 50, 206, 50, 206, 50, 206, 50 }, { 207, 49, 207, 49, 207, 49, 207, 49 }, - { 208, 48, 208, 48, 208, 48, 208, 48 }, { 209, 47, 209, 47, 209, 47, 209, 47 }, - { 210, 46, 210, 46, 210, 46, 210, 46 }, { 211, 45, 211, 45, 211, 45, 211, 45 }, - { 212, 44, 212, 44, 212, 44, 212, 44 }, { 213, 43, 213, 43, 213, 43, 213, 43 }, - { 214, 42, 214, 42, 214, 42, 214, 42 }, { 215, 41, 215, 41, 215, 41, 215, 41 }, - { 216, 40, 216, 40, 216, 40, 216, 40 }, { 217, 39, 217, 39, 217, 39, 217, 39 }, - { 218, 38, 218, 38, 218, 38, 218, 38 }, { 219, 37, 219, 37, 219, 37, 219, 37 }, - { 220, 36, 220, 36, 220, 36, 220, 36 }, { 221, 35, 221, 35, 221, 35, 221, 35 }, - { 222, 34, 222, 34, 222, 34, 222, 34 }, { 223, 33, 223, 33, 223, 33, 223, 33 }, - { 224, 32, 224, 32, 224, 32, 224, 32 }, { 225, 31, 225, 31, 225, 31, 225, 31 }, - { 226, 30, 226, 30, 226, 30, 226, 30 }, { 227, 29, 227, 29, 227, 29, 227, 29 }, - { 228, 28, 228, 28, 228, 28, 228, 28 }, { 229, 27, 229, 27, 229, 27, 229, 27 }, - { 230, 26, 230, 26, 230, 26, 230, 26 }, { 231, 25, 231, 25, 231, 25, 231, 25 }, - { 232, 24, 232, 24, 232, 24, 232, 24 }, { 233, 23, 233, 23, 233, 23, 233, 23 }, - { 234, 22, 234, 22, 234, 22, 234, 22 }, { 235, 21, 235, 21, 235, 21, 235, 21 }, - { 236, 20, 236, 20, 236, 20, 236, 20 }, { 237, 19, 237, 19, 237, 19, 237, 19 }, - { 238, 18, 238, 18, 238, 18, 238, 18 }, { 239, 17, 239, 17, 239, 17, 239, 17 }, - { 240, 16, 240, 16, 240, 16, 240, 16 }, { 241, 15, 241, 15, 241, 15, 241, 15 }, - { 242, 14, 242, 14, 242, 14, 242, 14 }, { 243, 13, 243, 13, 243, 13, 243, 13 }, - { 244, 12, 244, 12, 244, 12, 244, 12 }, { 245, 11, 245, 11, 245, 11, 245, 11 }, - { 246, 10, 246, 10, 246, 10, 246, 10 }, { 247, 9, 247, 9, 247, 9, 247, 9 }, - { 248, 8, 248, 8, 248, 8, 248, 8 }, { 249, 7, 249, 7, 249, 7, 249, 7 }, - { 250, 6, 250, 6, 250, 6, 250, 6 }, { 251, 5, 251, 5, 251, 5, 251, 5 }, - { 252, 4, 252, 4, 252, 4, 252, 4 }, { 253, 3, 253, 3, 253, 3, 253, 3 }, - { 254, 2, 254, 2, 254, 2, 254, 2 }, { 255, 1, 255, 1, 255, 1, 255, 1 } - } -}; -#endif // defined(__ALTIVEC__) diff --git a/src/emu/video/rgbvmx.c b/src/emu/video/rgbvmx.c new file mode 100644 index 00000000000..13f4c19cccc --- /dev/null +++ b/src/emu/video/rgbvmx.c @@ -0,0 +1,238 @@ +// license:BSD-3-Clause +// copyright-holders:Vas Crabb, Ryan Holtz +/*************************************************************************** + + rgbsse.c + + VMX/Altivec optimised RGB utilities. + +***************************************************************************/ + +#if defined(__ALTIVEC__) + +#include "emu.h" +#include +#include "rgbutil.h" + +/*************************************************************************** + TABLES +***************************************************************************/ + +const rgbaint_t::VECU16 rgbaint_t::maxbyte = { 255, 255, 255, 255, 255, 255, 255, 255 }; +const rgbaint_t::VECU32 rgbaint_t::alpha_mask = { 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff }; +const rgbaint_t::VECU32 rgbaint_t::red_mask = { 0xffffffff, 0x00000000, 0xffffffff, 0xffffffff }; +const rgbaint_t::VECU32 rgbaint_t::green_mask = { 0xffffffff, 0xffffffff, 0x00000000, 0xffffffff }; +const rgbaint_t::VECU32 rgbaint_t::blue_mask = { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000 }; +const rgbaint_t::VECU16 rgbaint_t::scale_table[256] = { + { 0, 256, 0, 256, 0, 256, 0, 256 }, { 1, 255, 1, 255, 1, 255, 1, 255 }, + { 2, 254, 2, 254, 2, 254, 2, 254 }, { 3, 253, 3, 253, 3, 253, 3, 253 }, + { 4, 252, 4, 252, 4, 252, 4, 252 }, { 5, 251, 5, 251, 5, 251, 5, 251 }, + { 6, 250, 6, 250, 6, 250, 6, 250 }, { 7, 249, 7, 249, 7, 249, 7, 249 }, + { 8, 248, 8, 248, 8, 248, 8, 248 }, { 9, 247, 9, 247, 9, 247, 9, 247 }, + { 10, 246, 10, 246, 10, 246, 10, 246 }, { 11, 245, 11, 245, 11, 245, 11, 245 }, + { 12, 244, 12, 244, 12, 244, 12, 244 }, { 13, 243, 13, 243, 13, 243, 13, 243 }, + { 14, 242, 14, 242, 14, 242, 14, 242 }, { 15, 241, 15, 241, 15, 241, 15, 241 }, + { 16, 240, 16, 240, 16, 240, 16, 240 }, { 17, 239, 17, 239, 17, 239, 17, 239 }, + { 18, 238, 18, 238, 18, 238, 18, 238 }, { 19, 237, 19, 237, 19, 237, 19, 237 }, + { 20, 236, 20, 236, 20, 236, 20, 236 }, { 21, 235, 21, 235, 21, 235, 21, 235 }, + { 22, 234, 22, 234, 22, 234, 22, 234 }, { 23, 233, 23, 233, 23, 233, 23, 233 }, + { 24, 232, 24, 232, 24, 232, 24, 232 }, { 25, 231, 25, 231, 25, 231, 25, 231 }, + { 26, 230, 26, 230, 26, 230, 26, 230 }, { 27, 229, 27, 229, 27, 229, 27, 229 }, + { 28, 228, 28, 228, 28, 228, 28, 228 }, { 29, 227, 29, 227, 29, 227, 29, 227 }, + { 30, 226, 30, 226, 30, 226, 30, 226 }, { 31, 225, 31, 225, 31, 225, 31, 225 }, + { 32, 224, 32, 224, 32, 224, 32, 224 }, { 33, 223, 33, 223, 33, 223, 33, 223 }, + { 34, 222, 34, 222, 34, 222, 34, 222 }, { 35, 221, 35, 221, 35, 221, 35, 221 }, + { 36, 220, 36, 220, 36, 220, 36, 220 }, { 37, 219, 37, 219, 37, 219, 37, 219 }, + { 38, 218, 38, 218, 38, 218, 38, 218 }, { 39, 217, 39, 217, 39, 217, 39, 217 }, + { 40, 216, 40, 216, 40, 216, 40, 216 }, { 41, 215, 41, 215, 41, 215, 41, 215 }, + { 42, 214, 42, 214, 42, 214, 42, 214 }, { 43, 213, 43, 213, 43, 213, 43, 213 }, + { 44, 212, 44, 212, 44, 212, 44, 212 }, { 45, 211, 45, 211, 45, 211, 45, 211 }, + { 46, 210, 46, 210, 46, 210, 46, 210 }, { 47, 209, 47, 209, 47, 209, 47, 209 }, + { 48, 208, 48, 208, 48, 208, 48, 208 }, { 49, 207, 49, 207, 49, 207, 49, 207 }, + { 50, 206, 50, 206, 50, 206, 50, 206 }, { 51, 205, 51, 205, 51, 205, 51, 205 }, + { 52, 204, 52, 204, 52, 204, 52, 204 }, { 53, 203, 53, 203, 53, 203, 53, 203 }, + { 54, 202, 54, 202, 54, 202, 54, 202 }, { 55, 201, 55, 201, 55, 201, 55, 201 }, + { 56, 200, 56, 200, 56, 200, 56, 200 }, { 57, 199, 57, 199, 57, 199, 57, 199 }, + { 58, 198, 58, 198, 58, 198, 58, 198 }, { 59, 197, 59, 197, 59, 197, 59, 197 }, + { 60, 196, 60, 196, 60, 196, 60, 196 }, { 61, 195, 61, 195, 61, 195, 61, 195 }, + { 62, 194, 62, 194, 62, 194, 62, 194 }, { 63, 193, 63, 193, 63, 193, 63, 193 }, + { 64, 192, 64, 192, 64, 192, 64, 192 }, { 65, 191, 65, 191, 65, 191, 65, 191 }, + { 66, 190, 66, 190, 66, 190, 66, 190 }, { 67, 189, 67, 189, 67, 189, 67, 189 }, + { 68, 188, 68, 188, 68, 188, 68, 188 }, { 69, 187, 69, 187, 69, 187, 69, 187 }, + { 70, 186, 70, 186, 70, 186, 70, 186 }, { 71, 185, 71, 185, 71, 185, 71, 185 }, + { 72, 184, 72, 184, 72, 184, 72, 184 }, { 73, 183, 73, 183, 73, 183, 73, 183 }, + { 74, 182, 74, 182, 74, 182, 74, 182 }, { 75, 181, 75, 181, 75, 181, 75, 181 }, + { 76, 180, 76, 180, 76, 180, 76, 180 }, { 77, 179, 77, 179, 77, 179, 77, 179 }, + { 78, 178, 78, 178, 78, 178, 78, 178 }, { 79, 177, 79, 177, 79, 177, 79, 177 }, + { 80, 176, 80, 176, 80, 176, 80, 176 }, { 81, 175, 81, 175, 81, 175, 81, 175 }, + { 82, 174, 82, 174, 82, 174, 82, 174 }, { 83, 173, 83, 173, 83, 173, 83, 173 }, + { 84, 172, 84, 172, 84, 172, 84, 172 }, { 85, 171, 85, 171, 85, 171, 85, 171 }, + { 86, 170, 86, 170, 86, 170, 86, 170 }, { 87, 169, 87, 169, 87, 169, 87, 169 }, + { 88, 168, 88, 168, 88, 168, 88, 168 }, { 89, 167, 89, 167, 89, 167, 89, 167 }, + { 90, 166, 90, 166, 90, 166, 90, 166 }, { 91, 165, 91, 165, 91, 165, 91, 165 }, + { 92, 164, 92, 164, 92, 164, 92, 164 }, { 93, 163, 93, 163, 93, 163, 93, 163 }, + { 94, 162, 94, 162, 94, 162, 94, 162 }, { 95, 161, 95, 161, 95, 161, 95, 161 }, + { 96, 160, 96, 160, 96, 160, 96, 160 }, { 97, 159, 97, 159, 97, 159, 97, 159 }, + { 98, 158, 98, 158, 98, 158, 98, 158 }, { 99, 157, 99, 157, 99, 157, 99, 157 }, + { 100, 156, 100, 156, 100, 156, 100, 156 }, { 101, 155, 101, 155, 101, 155, 101, 155 }, + { 102, 154, 102, 154, 102, 154, 102, 154 }, { 103, 153, 103, 153, 103, 153, 103, 153 }, + { 104, 152, 104, 152, 104, 152, 104, 152 }, { 105, 151, 105, 151, 105, 151, 105, 151 }, + { 106, 150, 106, 150, 106, 150, 106, 150 }, { 107, 149, 107, 149, 107, 149, 107, 149 }, + { 108, 148, 108, 148, 108, 148, 108, 148 }, { 109, 147, 109, 147, 109, 147, 109, 147 }, + { 110, 146, 110, 146, 110, 146, 110, 146 }, { 111, 145, 111, 145, 111, 145, 111, 145 }, + { 112, 144, 112, 144, 112, 144, 112, 144 }, { 113, 143, 113, 143, 113, 143, 113, 143 }, + { 114, 142, 114, 142, 114, 142, 114, 142 }, { 115, 141, 115, 141, 115, 141, 115, 141 }, + { 116, 140, 116, 140, 116, 140, 116, 140 }, { 117, 139, 117, 139, 117, 139, 117, 139 }, + { 118, 138, 118, 138, 118, 138, 118, 138 }, { 119, 137, 119, 137, 119, 137, 119, 137 }, + { 120, 136, 120, 136, 120, 136, 120, 136 }, { 121, 135, 121, 135, 121, 135, 121, 135 }, + { 122, 134, 122, 134, 122, 134, 122, 134 }, { 123, 133, 123, 133, 123, 133, 123, 133 }, + { 124, 132, 124, 132, 124, 132, 124, 132 }, { 125, 131, 125, 131, 125, 131, 125, 131 }, + { 126, 130, 126, 130, 126, 130, 126, 130 }, { 127, 129, 127, 129, 127, 129, 127, 129 }, + { 128, 128, 128, 128, 128, 128, 128, 128 }, { 129, 127, 129, 127, 129, 127, 129, 127 }, + { 130, 126, 130, 126, 130, 126, 130, 126 }, { 131, 125, 131, 125, 131, 125, 131, 125 }, + { 132, 124, 132, 124, 132, 124, 132, 124 }, { 133, 123, 133, 123, 133, 123, 133, 123 }, + { 134, 122, 134, 122, 134, 122, 134, 122 }, { 135, 121, 135, 121, 135, 121, 135, 121 }, + { 136, 120, 136, 120, 136, 120, 136, 120 }, { 137, 119, 137, 119, 137, 119, 137, 119 }, + { 138, 118, 138, 118, 138, 118, 138, 118 }, { 139, 117, 139, 117, 139, 117, 139, 117 }, + { 140, 116, 140, 116, 140, 116, 140, 116 }, { 141, 115, 141, 115, 141, 115, 141, 115 }, + { 142, 114, 142, 114, 142, 114, 142, 114 }, { 143, 113, 143, 113, 143, 113, 143, 113 }, + { 144, 112, 144, 112, 144, 112, 144, 112 }, { 145, 111, 145, 111, 145, 111, 145, 111 }, + { 146, 110, 146, 110, 146, 110, 146, 110 }, { 147, 109, 147, 109, 147, 109, 147, 109 }, + { 148, 108, 148, 108, 148, 108, 148, 108 }, { 149, 107, 149, 107, 149, 107, 149, 107 }, + { 150, 106, 150, 106, 150, 106, 150, 106 }, { 151, 105, 151, 105, 151, 105, 151, 105 }, + { 152, 104, 152, 104, 152, 104, 152, 104 }, { 153, 103, 153, 103, 153, 103, 153, 103 }, + { 154, 102, 154, 102, 154, 102, 154, 102 }, { 155, 101, 155, 101, 155, 101, 155, 101 }, + { 156, 100, 156, 100, 156, 100, 156, 100 }, { 157, 99, 157, 99, 157, 99, 157, 99 }, + { 158, 98, 158, 98, 158, 98, 158, 98 }, { 159, 97, 159, 97, 159, 97, 159, 97 }, + { 160, 96, 160, 96, 160, 96, 160, 96 }, { 161, 95, 161, 95, 161, 95, 161, 95 }, + { 162, 94, 162, 94, 162, 94, 162, 94 }, { 163, 93, 163, 93, 163, 93, 163, 93 }, + { 164, 92, 164, 92, 164, 92, 164, 92 }, { 165, 91, 165, 91, 165, 91, 165, 91 }, + { 166, 90, 166, 90, 166, 90, 166, 90 }, { 167, 89, 167, 89, 167, 89, 167, 89 }, + { 168, 88, 168, 88, 168, 88, 168, 88 }, { 169, 87, 169, 87, 169, 87, 169, 87 }, + { 170, 86, 170, 86, 170, 86, 170, 86 }, { 171, 85, 171, 85, 171, 85, 171, 85 }, + { 172, 84, 172, 84, 172, 84, 172, 84 }, { 173, 83, 173, 83, 173, 83, 173, 83 }, + { 174, 82, 174, 82, 174, 82, 174, 82 }, { 175, 81, 175, 81, 175, 81, 175, 81 }, + { 176, 80, 176, 80, 176, 80, 176, 80 }, { 177, 79, 177, 79, 177, 79, 177, 79 }, + { 178, 78, 178, 78, 178, 78, 178, 78 }, { 179, 77, 179, 77, 179, 77, 179, 77 }, + { 180, 76, 180, 76, 180, 76, 180, 76 }, { 181, 75, 181, 75, 181, 75, 181, 75 }, + { 182, 74, 182, 74, 182, 74, 182, 74 }, { 183, 73, 183, 73, 183, 73, 183, 73 }, + { 184, 72, 184, 72, 184, 72, 184, 72 }, { 185, 71, 185, 71, 185, 71, 185, 71 }, + { 186, 70, 186, 70, 186, 70, 186, 70 }, { 187, 69, 187, 69, 187, 69, 187, 69 }, + { 188, 68, 188, 68, 188, 68, 188, 68 }, { 189, 67, 189, 67, 189, 67, 189, 67 }, + { 190, 66, 190, 66, 190, 66, 190, 66 }, { 191, 65, 191, 65, 191, 65, 191, 65 }, + { 192, 64, 192, 64, 192, 64, 192, 64 }, { 193, 63, 193, 63, 193, 63, 193, 63 }, + { 194, 62, 194, 62, 194, 62, 194, 62 }, { 195, 61, 195, 61, 195, 61, 195, 61 }, + { 196, 60, 196, 60, 196, 60, 196, 60 }, { 197, 59, 197, 59, 197, 59, 197, 59 }, + { 198, 58, 198, 58, 198, 58, 198, 58 }, { 199, 57, 199, 57, 199, 57, 199, 57 }, + { 200, 56, 200, 56, 200, 56, 200, 56 }, { 201, 55, 201, 55, 201, 55, 201, 55 }, + { 202, 54, 202, 54, 202, 54, 202, 54 }, { 203, 53, 203, 53, 203, 53, 203, 53 }, + { 204, 52, 204, 52, 204, 52, 204, 52 }, { 205, 51, 205, 51, 205, 51, 205, 51 }, + { 206, 50, 206, 50, 206, 50, 206, 50 }, { 207, 49, 207, 49, 207, 49, 207, 49 }, + { 208, 48, 208, 48, 208, 48, 208, 48 }, { 209, 47, 209, 47, 209, 47, 209, 47 }, + { 210, 46, 210, 46, 210, 46, 210, 46 }, { 211, 45, 211, 45, 211, 45, 211, 45 }, + { 212, 44, 212, 44, 212, 44, 212, 44 }, { 213, 43, 213, 43, 213, 43, 213, 43 }, + { 214, 42, 214, 42, 214, 42, 214, 42 }, { 215, 41, 215, 41, 215, 41, 215, 41 }, + { 216, 40, 216, 40, 216, 40, 216, 40 }, { 217, 39, 217, 39, 217, 39, 217, 39 }, + { 218, 38, 218, 38, 218, 38, 218, 38 }, { 219, 37, 219, 37, 219, 37, 219, 37 }, + { 220, 36, 220, 36, 220, 36, 220, 36 }, { 221, 35, 221, 35, 221, 35, 221, 35 }, + { 222, 34, 222, 34, 222, 34, 222, 34 }, { 223, 33, 223, 33, 223, 33, 223, 33 }, + { 224, 32, 224, 32, 224, 32, 224, 32 }, { 225, 31, 225, 31, 225, 31, 225, 31 }, + { 226, 30, 226, 30, 226, 30, 226, 30 }, { 227, 29, 227, 29, 227, 29, 227, 29 }, + { 228, 28, 228, 28, 228, 28, 228, 28 }, { 229, 27, 229, 27, 229, 27, 229, 27 }, + { 230, 26, 230, 26, 230, 26, 230, 26 }, { 231, 25, 231, 25, 231, 25, 231, 25 }, + { 232, 24, 232, 24, 232, 24, 232, 24 }, { 233, 23, 233, 23, 233, 23, 233, 23 }, + { 234, 22, 234, 22, 234, 22, 234, 22 }, { 235, 21, 235, 21, 235, 21, 235, 21 }, + { 236, 20, 236, 20, 236, 20, 236, 20 }, { 237, 19, 237, 19, 237, 19, 237, 19 }, + { 238, 18, 238, 18, 238, 18, 238, 18 }, { 239, 17, 239, 17, 239, 17, 239, 17 }, + { 240, 16, 240, 16, 240, 16, 240, 16 }, { 241, 15, 241, 15, 241, 15, 241, 15 }, + { 242, 14, 242, 14, 242, 14, 242, 14 }, { 243, 13, 243, 13, 243, 13, 243, 13 }, + { 244, 12, 244, 12, 244, 12, 244, 12 }, { 245, 11, 245, 11, 245, 11, 245, 11 }, + { 246, 10, 246, 10, 246, 10, 246, 10 }, { 247, 9, 247, 9, 247, 9, 247, 9 }, + { 248, 8, 248, 8, 248, 8, 248, 8 }, { 249, 7, 249, 7, 249, 7, 249, 7 }, + { 250, 6, 250, 6, 250, 6, 250, 6 }, { 251, 5, 251, 5, 251, 5, 251, 5 }, + { 252, 4, 252, 4, 252, 4, 252, 4 }, { 253, 3, 253, 3, 253, 3, 253, 3 }, + { 254, 2, 254, 2, 254, 2, 254, 2 }, { 255, 1, 255, 1, 255, 1, 255, 1 } +}; + +extern const struct _rgbvmx_statics +{ +} rgbvmx_statics; + +/*************************************************************************** + HIGHER LEVEL OPERATIONS +***************************************************************************/ + +void rgbaint_t::blend(const rgbaint_t& other, UINT8 factor) +{ + m_value = _mm_unpacklo_epi16(m_value, other.m_value); + m_value = vec_add((VECU16)m_value, scale_table[factor]); + m_value = vec_sr(m_value, vec_splat_u32(8)); +} + +void rgbaint_t::scale_and_clamp(const rgbaint_t& scale) +{ + mul(scale); + shr(8); + min(255); +} + +void rgbaint_t::scale_imm_and_clamp(const INT32 scale) +{ + mul_imm(scale); + shr(8); + min(255); +} + +void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2) +{ + mul(scale); + rgbaint_t color2(other); + color2.mul(scale2); + + mul(scale); + add(color2); + shr(8); + min(255); +} + +void rgbaint_t::scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other) +{ + mul_imm(scale); + add(other); + shr(8); + min(255); +} + +void rgbaint_t::scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other) +{ + mul(scale); + add(other); + shr(8); + min(255); +} + +UINT32 rgbaint_t::bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v) +{ + __m128i color00 = _mm_cvtsi32_si128(rgb00); + __m128i color01 = _mm_cvtsi32_si128(rgb01); + __m128i color10 = _mm_cvtsi32_si128(rgb10); + __m128i color11 = _mm_cvtsi32_si128(rgb11); + + /* interleave color01 and color00 at the byte level */ + color01 = _mm_unpacklo_epi8(color01, color00); + color11 = _mm_unpacklo_epi8(color11, color10); + color01 = _mm_unpacklo_epi8(color01, _mm_setzero_si128()); + color11 = _mm_unpacklo_epi8(color11, _mm_setzero_si128()); + color01 = _mm_madd_epi16(color01, *(__m128i *)&rgbsse_statics.scale_table[u][0]); + color11 = _mm_madd_epi16(color11, *(__m128i *)&rgbsse_statics.scale_table[u][0]); + color01 = _mm_slli_epi32(color01, 15); + color11 = _mm_srli_epi32(color11, 1); + color01 = _mm_max_epi16(color01, color11); + color01 = _mm_madd_epi16(color01, *(__m128i *)&rgbsse_statics.scale_table[v][0]); + color01 = _mm_srli_epi32(color01, 15); + color01 = _mm_packs_epi32(color01, color01); + color01 = _mm_packus_epi16(color01, color01); + return _mm_cvtsi128_si32(color01); +} + +#endif // defined(__ALTIVEC__) diff --git a/src/emu/video/rgbvmx.h b/src/emu/video/rgbvmx.h index a0f40eee330..ea92ee7af5f 100644 --- a/src/emu/video/rgbvmx.h +++ b/src/emu/video/rgbvmx.h @@ -1,5 +1,5 @@ // license:BSD-3-Clause -// copyright-holders:Vas Crabb +// copyright-holders:Vas Crabb, Ryan Holtz /*************************************************************************** rgbvmx.h @@ -11,425 +11,471 @@ #ifndef __RGBVMX__ #define __RGBVMX__ -#if defined(__ALTIVEC__) #include -#endif - /*************************************************************************** TYPE DEFINITIONS ***************************************************************************/ -/* intermediate RGB values are stored in a vector */ -typedef vector signed short rgbint; - -/* intermediate RGB values are stored in a vector */ -typedef vector signed short rgbaint; - - - -/*************************************************************************** - BASIC CONVERSIONS -***************************************************************************/ - -/*------------------------------------------------- - rgb_comp_to_rgbint - converts a trio of RGB - components to an rgbint type --------------------------------------------------*/ - -INLINE void rgb_comp_to_rgbint(rgbint *rgb, INT16 r, INT16 g, INT16 b) +class rgbaint_t { - rgbint result = { 0, r, g, b, 0, 0, 0, 0 }; - *rgb = result; -} - - -/*------------------------------------------------- - rgba_comp_to_rgbint - converts a quad of RGB - components to an rgbint type --------------------------------------------------*/ - -INLINE void rgba_comp_to_rgbaint(rgbaint *rgb, INT16 a, INT16 r, INT16 g, INT16 b) -{ - rgbaint result = { a, r, g, b, 0, 0, 0, 0 }; - *rgb = result; -} - - -/*------------------------------------------------- - rgb_to_rgbint - converts a packed trio of RGB - components to an rgbint type --------------------------------------------------*/ - -INLINE void rgb_to_rgbint(rgbint *rgb, rgb_t const &color) -{ - vector signed char temp = (vector signed char)vec_perm((vector signed int)vec_lde(0, color.ptr()), vec_splat_s32(0), vec_lvsl(0, color.ptr())); - *rgb = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), temp); -} +public: + inline rgbaint_t() { } + inline rgbaint_t(UINT32 rgba) { set(rgba); } + inline rgbaint_t(UINT32 a, UINT32 r, UINT32 g, UINT32 b) { set(a, r, g, b); } + inline rgbaint_t(rgb_t& rgb) { set(rgb); } + + inline void set(rgbaint_t& other) { m_value = other.m_value; } + + inline void set(UINT32 rgba) + { + const vector unsigned int zero = vec_splat_u32(0); + const vector unsigned char temp = vec_perm(vec_lde(0, &rgba), zero, vec_lvsl(0, &rgba)); + m_value = vec_mergeh((vector unsigned short)zero, (vector unsigned short)vec_mergeh((vector unsigned char)zero, temp)); + } + + inline void set(UINT32 a, UINT32 r, UINT32 g, UINT32 b) + { + vector unsigned int result = { a, r, g, b }; + m_value = result; + } + + inline void set(rgb_t& rgb) + { + const vector unsigned int zero = vec_splat_u32(0); + const vector unsigned char temp = vec_perm(vec_lde(0, rgb.ptr()), zero, vec_lvsl(0, rgb.ptr())); + m_value = vec_mergeh((vector unsigned short)zero, (vector unsigned short)vec_mergeh((vector unsigned char)zero, temp)); + } + + inline rgb_t to_rgba() + { + const vector unsigned int temp = vec_splat((vector unsigned int)vec_pack(vec_pack(m_value, m_value), vec_splat_u16(0)), 0); + UINT32 result; + vec_ste(temp, 0, &result); + return result; + } + + inline rgb_t to_rgba_clamp() + { + const vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(vec_packsu(m_value, m_value), vec_splat_u16(0)), 0); + UINT32 result; + vec_ste(temp, 0, &result); + return result; + } + + inline void add(const rgbaint_t& color2) + { + m_value = vec_add(m_value, color2.m_value); + } + + inline void add_imm(const UINT32 imm) + { + const vector unsigned int temp = { imm, imm, imm, imm }; + m_value = vec_add(m_value, temp); + } + + inline void add_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + { + const vector unsigned int temp = { a, r, g, b }; + m_value = vec_add(m_value, temp); + } + + inline void sub(const rgbaint_t& color2) + { + m_value = vec_sub(m_value, color2.m_value); + } + + inline void sub_imm(const UINT32 imm) + { + const vector unsigned int temp = { imm, imm, imm, imm }; + m_value = vec_sub(m_value, temp); + } + + inline void sub_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + { + const vector unsigned int temp = { a, r, g, b }; + m_value = vec_sub(m_value, temp); + } + + inline void subr(rgbaint_t& color2) + { + m_value = vec_sub(color2.m_value, m_value); + } + + inline void subr_imm(const UINT32 imm) + { + const vector unsigned int temp = { imm, imm, imm, imm }; + m_value = vec_sub(temp, m_value); + } + + inline void subr_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + { + const vector unsigned int temp = { a, r, g, b }; + m_value = vec_sub(temp, m_value); + } + + inline void set_a(const UINT32 value) + { + const vector unsigned int temp = { value, 0, 0, 0 }; + m_value = vec_or(vec_and(m_value, alpha_mask), temp); + } + + inline void set_r(const UINT32 value) + { + const vector unsigned int temp = { 0, value, 0, 0 }; + m_value = vec_or(vec_and(m_value, red_mask), temp); + } + + inline void set_g(const UINT32 value) + { + const vector unsigned int temp = { 0, 0, value, 0 }; + m_value = vec_or(vec_and(m_value, green_mask), temp); + } + + inline void set_b(const UINT32 value) + { + const vector unsigned int temp = { 0, 0, 0, value }; + m_value = vec_or(vec_and(m_value, blue_mask), temp); + } + + inline UINT8 get_a() + { + UINT8 result; + vec_ste(vec_splat((vector unsigned char)m_value, 3), 0, &result); + return result; + } + + inline UINT8 get_r() + { + UINT8 result; + vec_ste(vec_splat((vector unsigned char)m_value, 7), 0, &result); + return result; + } + + inline UINT8 get_g() + { + UINT8 result; + vec_ste(vec_splat((vector unsigned char)m_value, 11), 0, &result); + return result; + } + + inline UINT8 get_b() + { + UINT8 result; + vec_ste(vec_splat((vector unsigned char)m_value, 15), 0, &result); + return result; + } + + inline UINT32 get_a32() + { + UINT32 result; + vec_ste(vec_splat(m_value, 0), 0, &result); + return result; + } + + inline UINT32 get_r32() + { + UINT32 result; + vec_ste(vec_splat(m_value, 1), 0, &result); + return result; + } + + inline UINT32 get_g32() + { + UINT32 result; + vec_ste(vec_splat(m_value, 2), 0, &result); + return result; + } + + inline UINT32 get_b32() + { + UINT32 result; + vec_ste(vec_splat(m_value, 3), 0, &result); + return result; + } + + inline void mul(const rgbaint_t& color) + { + const vector unsigned int shift = vec_splat_u32(-16); + const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(color.m_value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)color.m_value)); + m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)color.m_value)); + } + + inline void mul_imm(const UINT32 imm) + { + const vector unsigned int value = { imm, imm, imm, imm }; + const vector unsigned int shift = vec_splat_u32(-16); + const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)value)); + m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)value)); + } + + inline void mul_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + { + const vector unsigned int value = { a, r, g, b }; + const vector unsigned int shift = vec_splat_u32(-16); + const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)value)); + m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)value)); + } + + inline void shl(const rgbaint_t& shift) + { + const vector unsigned int limit = { 32, 32, 32, 32 }; + const vector unsigned int temp = vec_splat(shift.m_value, 3); + m_value = vec_and(vec_sl(m_value, temp), vec_cmpgt(limit, temp)); + } + + inline void shl_imm(const UINT8 shift) + { + const vector unsigned int temp = { shift, shift, shift, shift }; + m_value = vec_sl(m_value, temp); + } + + inline void shl_imm_all(const UINT8 shift) + { + const vector unsigned char limit = { 128, 128, 128, 128, 128, 128, 128, 128 }; + const vector unsigned char temp = { shift, shift, shift, shift, shift, shift, shift, shift }; + m_value = vec_and(vec_slo(m_value, temp), (vector unsigned int)vec_cmpgt(limit, temp)); + } + + inline void shr(const rgbaint_t& shift) + { + const vector unsigned int limit = { 32, 32, 32, 32 }; + const vector unsigned int temp = vec_splat(shift.m_value, 3); + m_value = vec_and(vec_sr(m_value, temp), vec_cmpgt(limit, temp)); + } + + inline void shr_imm(const UINT8 shift) + { + const vector unsigned int temp = { shift, shift, shift, shift }; + m_value = vec_sr(m_value, temp); + } + + inline void shr_imm_all(const UINT8 shift) + { + const vector unsigned char limit = { 128, 128, 128, 128, 128, 128, 128, 128 }; + const vector unsigned char temp = { shift, shift, shift, shift, shift, shift, shift, shift }; + m_value = vec_and(vec_sro(m_value, temp), (vector unsigned int)vec_cmpgt(limit, temp)); + } + + inline void sra(const rgbaint_t& shift) + { + const vector unsigned int limit = { 31, 31, 31, 31 }; + m_value = vec_sra(m_value, vec_min(vec_splat(shift.m_value, 3), limit)); + } + + inline void sra_imm(const UINT8 shift) + { + const vector unsigned int temp = { shift, shift, shift, shift }; + m_value = vec_sra(m_value, temp); + } + + inline void or_reg(const rgbaint_t& color2) + { + m_value = vec_or(m_value, color2.m_value); + } + + inline void or_imm(const UINT32 value) + { + const vector unsigned int temp = { value, value, value, value }; + m_value = vec_or(m_value, temp); + } + + inline void or_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + { + const vector unsigned int temp = { a, r, g, b }; + m_value = vec_or(m_value, temp); + } + + inline void and_reg(const rgbaint_t& color) + { + m_value = vec_and(m_value, color.m_value); + } + + inline void and_imm(const UINT32 value) + { + const vector unsigned int temp = { value, value, value, value }; + m_value = vec_and(m_value, temp); + } + + inline void and_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + { + const vector unsigned int temp = { a, r, g, b }; + m_value = vec_and(m_value, temp); + } + + inline void xor_reg(const rgbaint_t& color2) + { + m_value = vec_xor(m_value, color2.m_value); + } + + inline void xor_imm(const INT32 value) + { + const vector unsigned int temp = { value, value, value, value }; + m_value = vec_xor(m_value, temp); + } + + inline void xor_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + { + const vector unsigned int temp = { a, r, g, b }; + m_value = vec_xor(m_value, temp); + } + + inline void clamp_and_clear(const UINT32 sign) + { + const vector unsigned int vzero = vec_splat_u32(0); + vector unsigned int vsign = { sign, sign, sign, sign }; + m_value = vec_and(m_value, vec_cmpeq(vec_and(m_value, vsign), vzero)); + vsign = vec_nor(vec_sra(vsign, vec_splat_u32(1)), vzero); + const vector unsigned int mask = vec_cmpgt(m_value, vsign); + m_value = vec_or(vec_and(vsign, mask), vec_and(m_value, vec_nor(mask, vzero))); + } + + inline void sign_extend(const UINT32 compare, const UINT32 sign) + { + const vector unsigned int compare_vec = { compare, compare, compare, compare }; + const vector unsigned int compare_mask = vec_cmpeq(vec_and(m_value, compare_vec), compare_vec); + const vector unsigned int sign_vec = { sign, sign, sign, sign }; + m_value = vec_or(m_value, vec_and(sign_vec, compare_mask)); + } + + inline void min(const UINT32 value) + { + const vector unsigned int temp = { value, value, value, value }; + m_value = vec_min(m_value, temp); + } + + void blend(const rgbaint_t& other, UINT8 factor); + + void scale_and_clamp(const rgbaint_t& scale); + void scale_imm_and_clamp(const INT32 scale); + void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2); + void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other); + void scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other); + + inline void cmpeq(const rgbaint_t& value) + { + m_value = vec_cmpeq(m_value, value.m_value); + } + + inline void cmpeq_imm(const UINT32 value) + { + const vector unsigned int temp = { value, value, value, value }; + m_value = vec_cmpeq(m_value, temp); + } + + inline void cmpeq_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + { + const vector unsigned int temp = { a, r, g, b }; + m_value = vec_cmpeq(m_value, temp); + } + + inline void cmpgt(const rgbaint_t& value) + { + m_value = vec_cmpgt(m_value, value.m_value); + } + + inline void cmpgt_imm(const UINT32 value) + { + const vector unsigned int temp = { value, value, value, value }; + m_value = vec_cmpgt(m_value, temp); + } + + inline void cmpgt_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + { + const vector unsigned int temp = { a, r, g, b }; + m_value = vec_cmpgt(m_value, temp); + } + + inline void cmplt(const rgbaint_t& value) + { + m_value = vec_cmplt(m_value, value.m_value); + } + + inline void cmplt_imm(const UINT32 value) + { + const vector unsigned int temp = { value, value, value, value }; + m_value = vec_cmplt(m_value, temp); + } + + inline void cmplt_imm_rgba(const UINT32 a, const UINT32 r, const UINT32 g, const UINT32 b) + { + const vector unsigned int temp = { a, r, g, b }; + m_value = vec_cmplt(m_value, temp); + } + + inline rgbaint_t operator=(const rgbaint_t& other) + { + m_value = other.m_value; + return *this; + } + + inline rgbaint_t& operator+=(const rgbaint_t& other) + { + m_value = vec_add(m_value, other.m_value); + return *this; + } + + inline rgbaint_t& operator+=(const INT32 other) + { + const vector unsigned int temp = { other, other, other, other }; + m_value = vec_add(m_value, temp); + return *this; + } + + inline rgbaint_t& operator-=(const rgbaint_t& other) + { + m_value = vec_sub(m_value, other.m_value); + return *this; + } + + inline rgbaint_t& operator*=(const rgbaint_t& other) + { + const vector unsigned int shift = vec_splat_u32(-16); + const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(other.m_value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)other.m_value)); + m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)other.m_value)); + return *this; + } + + inline rgbaint_t& operator*=(const INT32 other) + { + const vector unsigned int value = { other, other, other, other }; + const vector unsigned int shift = vec_splat_u32(-16); + const vector unsigned int temp = vec_add(vec_mule((vector unsigned short)m_value, (vector unsigned short)vec_sl(value, shift)), vec_mule((vector unsigned short)vec_sl(m_value, shift), (vector unsigned short)value)); + m_value = vec_add(vec_sl(temp, shift), vec_mulo((vector unsigned short)m_value, (vector unsigned short)value)); + return *this; + } + + inline rgbaint_t& operator>>=(const INT32 shift) + { + const vector unsigned int temp = { shift, shift, shift, shift }; + m_value = vec_sra(m_value, temp); + return *this; + } + + inline void merge_alpha(rgbaint_t& alpha) + { + m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 7), 7); + m_value = _mm_insert_epi16(m_value, _mm_extract_epi16(alpha.m_value, 6), 6); + } + + static UINT32 bilinear_filter(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v); + +protected: + typedef vector unsigned char VECU8; + typedef vector unsigned short VECU16; + typedef vector unsigned int VECU32; + + vector VECU32 m_value; + + static const VECU16 maxbyte; + static const VECU32 alpha_mask; + static const VECU32 red_mask; + static const VECU32 green_mask; + static const VECU32 blue_mask; + static const VECU16 scale_table[256]; +}; -/*------------------------------------------------- - rgba_to_rgbaint - converts a packed quad of RGB - components to an rgbint type --------------------------------------------------*/ - -INLINE void rgba_to_rgbaint(rgbaint *rgb, rgb_t const &color) -{ - vector signed char temp = (vector signed char)vec_perm((vector signed int)vec_lde(0, color.ptr()), vec_splat_s32(0), vec_lvsl(0, color.ptr())); - *rgb = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), temp); -} - - -/*------------------------------------------------- - rgbint_to_rgb - converts an rgbint back to - a packed trio of RGB values --------------------------------------------------*/ - -INLINE rgb_t rgbint_to_rgb(const rgbint *color) -{ - vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0); - UINT32 result; - vec_ste(temp, 0, &result); - return result; -} - - -/*------------------------------------------------- - rgbaint_to_rgba - converts an rgbint back to - a packed quad of RGB values --------------------------------------------------*/ - -INLINE rgb_t rgbaint_to_rgba(const rgbaint *color) -{ - vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0); - UINT32 result; - vec_ste(temp, 0, &result); - return result; -} - - -/*------------------------------------------------- - rgbint_to_rgb_clamp - converts an rgbint back - to a packed trio of RGB values, clamping them - to bytes first --------------------------------------------------*/ - -INLINE rgb_t rgbint_to_rgb_clamp(const rgbint *color) -{ - vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0); - UINT32 result; - vec_ste(temp, 0, &result); - return result; -} - - -/*------------------------------------------------- - rgbaint_to_rgba_clamp - converts an rgbint back - to a packed quad of RGB values, clamping them - to bytes first --------------------------------------------------*/ - -INLINE rgb_t rgbaint_to_rgba_clamp(const rgbaint *color) -{ - vector unsigned int temp = vec_splat((vector unsigned int)vec_packsu(*color, *color), 0); - UINT32 result; - vec_ste(temp, 0, &result); - return result; -} - - - -/*************************************************************************** - CORE MATH -***************************************************************************/ - -/*------------------------------------------------- - rgbint_add - add two rgbint values --------------------------------------------------*/ - -INLINE void rgbint_add(rgbint *color1, const rgbint *color2) -{ - *color1 = vec_add(*color1, *color2); -} - - -/*------------------------------------------------- - rgbaint_add - add two rgbaint values --------------------------------------------------*/ - -INLINE void rgbaint_add(rgbaint *color1, const rgbaint *color2) -{ - *color1 = vec_add(*color1, *color2); -} - - -/*------------------------------------------------- - rgbint_sub - subtract two rgbint values --------------------------------------------------*/ - -INLINE void rgbint_sub(rgbint *color1, const rgbint *color2) -{ - *color1 = vec_sub(*color1, *color2); -} - - -/*------------------------------------------------- - rgbaint_sub - subtract two rgbaint values --------------------------------------------------*/ - -INLINE void rgbaint_sub(rgbaint *color1, const rgbaint *color2) -{ - *color1 = vec_sub(*color1, *color2); -} - - -/*------------------------------------------------- - rgbint_subr - reverse subtract two rgbint - values --------------------------------------------------*/ - -INLINE void rgbint_subr(rgbint *color1, const rgbint *color2) -{ - *color1 = vec_sub(*color2, *color1); -} - - -/*------------------------------------------------- - rgbaint_subr - reverse subtract two rgbaint - values --------------------------------------------------*/ - -INLINE void rgbaint_subr(rgbaint *color1, const rgbaint *color2) -{ - *color1 = vec_sub(*color2, *color1); -} - - - -/*************************************************************************** - TABLES -***************************************************************************/ - -extern const struct _rgbvmx_statics -{ - rgbaint maxbyte; - rgbaint scale_table[256]; -} rgbvmx_statics; - - - -/*************************************************************************** - HIGHER LEVEL OPERATIONS -***************************************************************************/ - -/*------------------------------------------------- - rgbint_blend - blend two colors by the given - scale factor --------------------------------------------------*/ - -INLINE void rgbint_blend(rgbint *color1, const rgbint *color2, UINT8 color1scale) -{ - vector signed int temp; - *color1 = vec_mergeh(*color1, *color2); - temp = vec_msum(*color1, rgbvmx_statics.scale_table[color1scale], vec_splat_s32(0)); - temp = (vector signed int)vec_sr(temp, vec_splat_u32(8)); - *color1 = vec_packs(temp, temp); -} - - -/*------------------------------------------------- - rgbaint_blend - blend two colors by the given - scale factor --------------------------------------------------*/ - -INLINE void rgbaint_blend(rgbaint *color1, const rgbaint *color2, UINT8 color1scale) -{ - vector signed int temp; - *color1 = vec_mergeh(*color1, *color2); - temp = vec_msum(*color1, rgbvmx_statics.scale_table[color1scale], vec_splat_s32(0)); - temp = (vector signed int)vec_sr(temp, vec_splat_u32(8)); - *color1 = vec_packs(temp, temp); -} - - -/*------------------------------------------------- - rgbint_scale_and_clamp - scale the given - color by an 8.8 scale factor, immediate or - per channel, and clamp to byte values --------------------------------------------------*/ - -INLINE void rgbint_scale_immediate_and_clamp(rgbint *color, INT16 colorscale) -{ - rgbint splatmap = vec_splat((rgbint)vec_lvsl(0, &colorscale), 0); - rgbint vecscale = vec_lde(0, &colorscale); - vector signed int temp; - vecscale = (rgbint)vec_perm(vecscale, vecscale, (vector unsigned char)splatmap); - *color = (rgbint)vec_mergeh(*color, (rgbint)vec_splat_s32(0)); - temp = vec_msum(*color, vecscale, vec_splat_s32(0)); - temp = (vector signed int)vec_sr(temp, vec_splat_u32(8)); - *color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte); -} - -INLINE void rgbint_scale_channel_and_clamp(rgbint *color, const rgbint *colorscale) -{ - rgbint vecscale = (rgbint)vec_mergeh(*colorscale, (rgbint)vec_splat_s32(0)); - vector signed int temp; - *color = (rgbint)vec_mergeh(*color, (rgbint)vec_splat_s32(0)); - temp = vec_msum(*color, vecscale, vec_splat_s32(0)); - temp = (vector signed int)vec_sr(temp, vec_splat_u32(8)); - *color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte); -} - - -/*------------------------------------------------- - rgbaint_scale_and_clamp - scale the given - color by an 8.8 scale factor, immediate or - per channel, and clamp to byte values --------------------------------------------------*/ - -INLINE void rgbaint_scale_immediate_and_clamp(rgbaint *color, INT16 colorscale) -{ - rgbaint splatmap = vec_splat((rgbaint)vec_lvsl(0, &colorscale), 0); - rgbaint vecscale = vec_lde(0, &colorscale); - vector signed int temp; - vecscale = (rgbaint)vec_perm(vecscale, vecscale, (vector unsigned char)splatmap); - *color = (rgbaint)vec_mergeh(*color, (rgbaint)vec_splat_s32(0)); - temp = vec_msum(*color, vecscale, vec_splat_s32(0)); - temp = (vector signed int)vec_sr(temp, vec_splat_u32(8)); - *color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte); -} - -INLINE void rgbaint_scale_channel_and_clamp(rgbaint *color, const rgbint *colorscale) -{ - rgbaint vecscale = (rgbaint)vec_mergeh(*color, (rgbaint)vec_splat_s32(0)); - vector signed int temp; - *color = (rgbaint)vec_mergeh(*color, (rgbaint)vec_splat_s32(0)); - temp = vec_msum(*color, vecscale, vec_splat_s32(0)); - temp = (vector signed int)vec_sr(temp, vec_splat_u32(8)); - *color = vec_min(vec_packs(temp, temp), rgbvmx_statics.maxbyte); -} - - -/*------------------------------------------------- - rgb_bilinear_filter - bilinear filter between - four pixel values --------------------------------------------------*/ - -INLINE rgb_t rgb_bilinear_filter(rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v) -{ - rgbint color00 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr())); - rgbint color01 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr())); - rgbint color10 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr())); - rgbint color11 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr())); - - /* interleave color01 and color00 at the byte level */ - color01 = (rgbint)vec_mergeh((vector signed char)color01, (vector signed char)color00); - color11 = (rgbint)vec_mergeh((vector signed char)color11, (vector signed char)color10); - color01 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01); - color11 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11); - color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0)); - color11 = (rgbint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0)); - color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(1)); - color11 = (rgbint)vec_sl((vector signed int)color11, vec_splat_u32(15)); - color01 = vec_max(color01, color11); - color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0)); - color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(15)); - color01 = vec_packs((vector signed int)color01, (vector signed int)color01); - color01 = (rgbint)vec_packsu(color01, color01); - - UINT32 result; - vec_ste((vector unsigned int)color01, 0, &result); - return result; -} - - -/*------------------------------------------------- - rgba_bilinear_filter - bilinear filter between - four pixel values --------------------------------------------------*/ - -INLINE rgb_t rgba_bilinear_filter(rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v) -{ - rgbaint color00 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr())); - rgbaint color01 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr())); - rgbaint color10 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr())); - rgbaint color11 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr())); - - /* interleave color01 and color00 at the byte level */ - color01 = (rgbaint)vec_mergeh((vector signed char)color01, (vector signed char)color00); - color11 = (rgbaint)vec_mergeh((vector signed char)color11, (vector signed char)color10); - color01 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01); - color11 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11); - color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0)); - color11 = (rgbaint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0)); - color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(1)); - color11 = (rgbaint)vec_sl((vector signed int)color11, vec_splat_u32(15)); - color01 = vec_max(color01, color11); - color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0)); - color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(15)); - color01 = vec_packs((vector signed int)color01, (vector signed int)color01); - color01 = (rgbaint)vec_packsu(color01, color01); - - UINT32 result; - vec_ste((vector unsigned int)color01, 0, &result); - return result; -} - - -/*------------------------------------------------- - rgbint_bilinear_filter - bilinear filter between - four pixel values --------------------------------------------------*/ - -INLINE void rgbint_bilinear_filter(rgbint *color, rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v) -{ - rgbint color00 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr())); - rgbint color01 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr())); - rgbint color10 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr())); - rgbint color11 = (rgbint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr())); - - /* interleave color01 and color00 at the byte level */ - color01 = (rgbint)vec_mergeh((vector signed char)color01, (vector signed char)color00); - color11 = (rgbint)vec_mergeh((vector signed char)color11, (vector signed char)color10); - color01 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01); - color11 = (rgbint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11); - color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0)); - color11 = (rgbint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0)); - color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(1)); - color11 = (rgbint)vec_sl((vector signed int)color11, vec_splat_u32(15)); - color01 = vec_max(color01, color11); - color01 = (rgbint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0)); - color01 = (rgbint)vec_sr((vector signed int)color01, vec_splat_u32(15)); - *color = vec_packs((vector signed int)color01, (vector signed int)color01); -} - - -/*------------------------------------------------- - rgbaint_bilinear_filter - bilinear filter between - four pixel values --------------------------------------------------*/ - -INLINE void rgbaint_bilinear_filter(rgbaint *color, rgb_t const &rgb00, rgb_t const &rgb01, rgb_t const &rgb10, rgb_t const &rgb11, UINT8 u, UINT8 v) -{ - rgbaint color00 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb00.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb00.ptr())); - rgbaint color01 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb01.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb01.ptr())); - rgbaint color10 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb10.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb10.ptr())); - rgbaint color11 = (rgbaint)vec_perm((vector signed int)vec_lde(0, rgb11.ptr()), vec_splat_s32(0), vec_lvsl(0, rgb11.ptr())); - - /* interleave color01 and color00 at the byte level */ - color01 = (rgbaint)vec_mergeh((vector signed char)color01, (vector signed char)color00); - color11 = (rgbaint)vec_mergeh((vector signed char)color11, (vector signed char)color10); - color01 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color01); - color11 = (rgbaint)vec_mergeh((vector signed char)vec_splat_s32(0), (vector signed char)color11); - color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[u], vec_splat_s32(0)); - color11 = (rgbaint)vec_msum(color11, rgbvmx_statics.scale_table[u], vec_splat_s32(0)); - color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(1)); - color11 = (rgbaint)vec_sl((vector signed int)color11, vec_splat_u32(15)); - color01 = vec_max(color01, color11); - color01 = (rgbaint)vec_msum(color01, rgbvmx_statics.scale_table[v], vec_splat_s32(0)); - color01 = (rgbaint)vec_sr((vector signed int)color01, vec_splat_u32(15)); - *color = vec_packs((vector signed int)color01, (vector signed int)color01); -} // altivec.h somehow redefines "bool" in a bad way on PowerPC Mac OS X. really. #ifdef OSX_PPC