mame/3rdparty/lzma/C/AesOpt.c
Vas Crabb 97b6717027 (nw) Clean up the mess on master
This effectively reverts b380514764 and
c24473ddff, restoring the state at
598cd52272.

Before pushing, please check that what you're about to push is sane.
Check your local commit log and ensure there isn't anything out-of-place
before pushing to mainline.  When things like this happen, it wastes
everyone's time.  I really don't need this in a week when real work™ is
busting my balls and I'm behind where I want to be with preparing for
MAME release.
2019-03-26 11:13:37 +11:00

185 lines
4.4 KiB
C

/* AesOpt.c -- Intel's AES
2013-11-12 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if _MSC_VER >= 1500 && !defined(__clang__)
#define USE_INTEL_AES
#endif
#endif
#ifdef USE_INTEL_AES
#include <wmmintrin.h>
void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
{
__m128i m = *p;
for (; numBlocks != 0; numBlocks--, data++)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
const __m128i *w = p + 3;
m = _mm_xor_si128(m, *data);
m = _mm_xor_si128(m, p[2]);
do
{
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenc_si128(m, w[1]);
w += 2;
}
while (--numRounds2 != 0);
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenclast_si128(m, w[1]);
*data = m;
}
*p = m;
}
#define NUM_WAYS 3
#define AES_OP_W(op, n) { \
const __m128i t = w[n]; \
m0 = op(m0, t); \
m1 = op(m1, t); \
m2 = op(m2, t); \
}
#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
{
__m128i iv = *p;
for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1);
const __m128i *w = p + numRounds2 * 2;
__m128i m0, m1, m2;
{
const __m128i t = w[2];
m0 = _mm_xor_si128(t, data[0]);
m1 = _mm_xor_si128(t, data[1]);
m2 = _mm_xor_si128(t, data[2]);
}
numRounds2--;
do
{
AES_DEC(1)
AES_DEC(0)
w -= 2;
}
while (--numRounds2 != 0);
AES_DEC(1)
AES_DEC_LAST(0)
{
__m128i t;
t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t;
t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t;
t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t;
}
}
for (; numBlocks != 0; numBlocks--, data++)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1);
const __m128i *w = p + numRounds2 * 2;
__m128i m = _mm_xor_si128(w[2], *data);
numRounds2--;
do
{
m = _mm_aesdec_si128(m, w[1]);
m = _mm_aesdec_si128(m, w[0]);
w -= 2;
}
while (--numRounds2 != 0);
m = _mm_aesdec_si128(m, w[1]);
m = _mm_aesdeclast_si128(m, w[0]);
m = _mm_xor_si128(m, iv);
iv = *data;
*data = m;
}
*p = iv;
}
void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks)
{
__m128i ctr = *p;
__m128i one;
one.m128i_u64[0] = 1;
one.m128i_u64[1] = 0;
for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
const __m128i *w = p;
__m128i m0, m1, m2;
{
const __m128i t = w[2];
ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
}
w += 3;
do
{
AES_ENC(0)
AES_ENC(1)
w += 2;
}
while (--numRounds2 != 0);
AES_ENC(0)
AES_ENC_LAST(1)
data[0] = _mm_xor_si128(data[0], m0);
data[1] = _mm_xor_si128(data[1], m1);
data[2] = _mm_xor_si128(data[2], m2);
}
for (; numBlocks != 0; numBlocks--, data++)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
const __m128i *w = p;
__m128i m;
ctr = _mm_add_epi64(ctr, one);
m = _mm_xor_si128(ctr, p[2]);
w += 3;
do
{
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenc_si128(m, w[1]);
w += 2;
}
while (--numRounds2 != 0);
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenclast_si128(m, w[1]);
*data = _mm_xor_si128(*data, m);
}
*p = ctr;
}
#else
void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCbc_Encode(p, data, numBlocks);
}
void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCbc_Decode(p, data, numBlocks);
}
void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCtr_Code(p, data, numBlocks);
}
#endif