/* Contrary to what the next comment says, this is now an amd64 CPU
test. */
/*
* x86 CPU test
*
* Copyright (c) 2003 Fabrice Bellard
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*/
#include
#include
#include
#include
#include
#include
#include
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
/*
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm (RFC 1321).
*
* Homepage:
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
*
* Author:
* Alexander Peslyak, better known as Solar Designer
*
* This software was written by Alexander Peslyak in 2001. No copyright is
* claimed, and the software is hereby placed in the public domain.
* In case this attempt to disclaim copyright and place the software in the
* public domain is deemed null and void, then the software is
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
* general public under the following terms:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* There's ABSOLUTELY NO WARRANTY, express or implied.
*
* (This is a heavily cut-down "BSD license".)
*
* This differs from Colin Plumb's older public domain implementation in that
* no exactly 32-bit integer data type is required (any 32-bit or wider
* unsigned integer data type will do), there's no compile-time endianness
* configuration, and the function prototypes match OpenSSL's. No code from
* Colin Plumb's implementation has been reused; this comment merely compares
* the properties of the two independent implementations.
*
* The primary goals of this implementation are portability and ease of use.
* It is meant to be fast, but not as fast as possible. Some known
* optimizations are not included to reduce source code size and avoid
* compile-time configuration.
*/
#include
// BEGIN #include "md5.h"
/* Any 32-bit or wider unsigned integer data type will do */
typedef unsigned int MD5_u32plus;
typedef struct {
MD5_u32plus lo, hi;
MD5_u32plus a, b, c, d;
unsigned char buffer[64];
MD5_u32plus block[16];
} MD5_CTX;
void MD5_Init(MD5_CTX *ctx);
void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
void MD5_Final(unsigned char *result, MD5_CTX *ctx);
// END #include "md5.h"
/*
* The basic MD5 functions.
*
* F and G are optimized compared to their RFC 1321 definitions for
* architectures that lack an AND-NOT instruction, just like in Colin Plumb's
* implementation.
*/
#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
#define H(x, y, z) (((x) ^ (y)) ^ (z))
#define H2(x, y, z) ((x) ^ ((y) ^ (z)))
#define I(x, y, z) ((y) ^ ((x) | ~(z)))
/*
* The MD5 transformation for all four rounds.
*/
#define STEP(f, a, b, c, d, x, t, s) \
(a) += f((b), (c), (d)) + (x) + (t); \
(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
(a) += (b);
/*
* SET reads 4 input bytes in little-endian byte order and stores them in a
* properly aligned word in host byte order.
*
* The check for little-endian architectures that tolerate unaligned memory
* accesses is just an optimization. Nothing will break if it fails to detect
* a suitable architecture.
*
* Unfortunately, this optimization may be a C strict aliasing rules violation
* if the caller's data buffer has effective type that cannot be aliased by
* MD5_u32plus. In practice, this problem may occur if these MD5 routines are
* inlined into a calling function, or with future and dangerously advanced
* link-time optimizations. For the time being, keeping these MD5 routines in
* their own translation unit avoids the problem.
*/
#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
#define SET(n) \
(*(MD5_u32plus *)&ptr[(n) * 4])
#define GET(n) \
SET(n)
#else
#define SET(n) \
(ctx->block[(n)] = \
(MD5_u32plus)ptr[(n) * 4] | \
((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
#define GET(n) \
(ctx->block[(n)])
#endif
/*
* This processes one or more 64-byte data blocks, but does NOT update the bit
* counters. There are no alignment requirements.
*/
static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
{
const unsigned char *ptr;
MD5_u32plus a, b, c, d;
MD5_u32plus saved_a, saved_b, saved_c, saved_d;
ptr = (const unsigned char *)data;
a = ctx->a;
b = ctx->b;
c = ctx->c;
d = ctx->d;
do {
saved_a = a;
saved_b = b;
saved_c = c;
saved_d = d;
/* Round 1 */
STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
/* Round 2 */
STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
/* Round 3 */
STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
/* Round 4 */
STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
a += saved_a;
b += saved_b;
c += saved_c;
d += saved_d;
ptr += 64;
} while (size -= 64);
ctx->a = a;
ctx->b = b;
ctx->c = c;
ctx->d = d;
return ptr;
}
void MD5_Init(MD5_CTX *ctx)
{
ctx->a = 0x67452301;
ctx->b = 0xefcdab89;
ctx->c = 0x98badcfe;
ctx->d = 0x10325476;
ctx->lo = 0;
ctx->hi = 0;
}
void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
{
MD5_u32plus saved_lo;
unsigned long used, available;
saved_lo = ctx->lo;
if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
ctx->hi++;
ctx->hi += size >> 29;
used = saved_lo & 0x3f;
if (used) {
available = 64 - used;
if (size < available) {
memcpy(&ctx->buffer[used], data, size);
return;
}
memcpy(&ctx->buffer[used], data, available);
data = (const unsigned char *)data + available;
size -= available;
body(ctx, ctx->buffer, 64);
}
if (size >= 64) {
data = body(ctx, data, size & ~(unsigned long)0x3f);
size &= 0x3f;
}
memcpy(ctx->buffer, data, size);
}
#define OUT(dst, src) \
(dst)[0] = (unsigned char)(src); \
(dst)[1] = (unsigned char)((src) >> 8); \
(dst)[2] = (unsigned char)((src) >> 16); \
(dst)[3] = (unsigned char)((src) >> 24);
void MD5_Final(unsigned char *result, MD5_CTX *ctx)
{
unsigned long used, available;
used = ctx->lo & 0x3f;
ctx->buffer[used++] = 0x80;
available = 64 - used;
if (available < 8) {
memset(&ctx->buffer[used], 0, available);
body(ctx, ctx->buffer, 64);
used = 0;
available = 64;
}
memset(&ctx->buffer[used], 0, available - 8);
ctx->lo <<= 3;
OUT(&ctx->buffer[56], ctx->lo)
OUT(&ctx->buffer[60], ctx->hi)
body(ctx, ctx->buffer, 64);
OUT(&result[0], ctx->a)
OUT(&result[4], ctx->b)
OUT(&result[8], ctx->c)
OUT(&result[12], ctx->d)
memset(ctx, 0, sizeof(*ctx));
}
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
static MD5_CTX md5ctx;
void xxprintf_start(void)
{
MD5_Init(&md5ctx);
}
void xxprintf_done(void)
{
const char hexchar[16] = "0123456789abcdef";
unsigned char result[100];
memset(result, 0, sizeof(result));
MD5_Final(&result[0], &md5ctx);
printf("final MD5 = ");
int i;
for (i = 0; i < 16; i++) {
printf("%c%c", hexchar[0xF & (result[i] >> 4)],
hexchar[0xF & (result[i] >> 0)]);
}
printf("\n");
}
__attribute__((format(__printf__, 1, 2)))
void xxprintf (const char *format, ...)
{
char buf[128];
memset(buf, 0, sizeof(buf));
va_list vargs;
va_start(vargs, format);
int n = vsnprintf(buf, sizeof(buf)-1, format, vargs);
va_end(vargs);
assert(n < sizeof(buf)-1);
assert(buf[sizeof(buf)-1] == 0);
assert(buf[sizeof(buf)-2] == 0);
MD5_Update(&md5ctx, buf, strlen(buf));
if (0) printf("QQQ %s", buf);
}
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
/* Setting this to 1 creates a very comprehensive test of
integer condition codes. */
#define TEST_INTEGER_VERBOSE 1
typedef long long int int64;
//#define LINUX_VM86_IOPL_FIX
//#define TEST_P4_FLAGS
#define xglue(x, y) x ## y
#define glue(x, y) xglue(x, y)
#define stringify(s) tostring(s)
#define tostring(s) #s
#define CC_C 0x0001
#define CC_P 0x0004
#define CC_A 0x0010
#define CC_Z 0x0040
#define CC_S 0x0080
#define CC_O 0x0800
#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
#define OP add
#include "fb_test_amd64.h"
#define OP sub
#include "fb_test_amd64.h"
#define OP xor
#include "fb_test_amd64.h"
#define OP and
#include "fb_test_amd64.h"
#define OP or
#include "fb_test_amd64.h"
#define OP cmp
#include "fb_test_amd64.h"
#define OP adc
#define OP_CC
#include "fb_test_amd64.h"
#define OP sbb
#define OP_CC
#include "fb_test_amd64.h"
#define OP adcx
#define NSH
#define OP_CC
#include "fb_test_amd64.h"
#define OP adox
#define NSH
#define OP_CC
#include "fb_test_amd64.h"
#define OP inc
#define OP_CC
#define OP1
#include "fb_test_amd64.h"
#define OP dec
#define OP_CC
#define OP1
#include "fb_test_amd64.h"
#define OP neg
#define OP_CC
#define OP1
#include "fb_test_amd64.h"
#define OP not
#define OP_CC
#define OP1
#include "fb_test_amd64.h"
#undef CC_MASK
#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O)
#define OP shl
#include "fb_test_amd64_shift.h"
#define OP shr
#include "fb_test_amd64_shift.h"
#define OP sar
#include "fb_test_amd64_shift.h"
#define OP rol
#include "fb_test_amd64_shift.h"
#define OP ror
#include "fb_test_amd64_shift.h"
#define OP rcr
#define OP_CC
#include "fb_test_amd64_shift.h"
#define OP rcl
#define OP_CC
#include "fb_test_amd64_shift.h"
/* XXX: should be more precise ? */
#undef CC_MASK
#define CC_MASK (CC_C)
/* lea test (modrm support) */
#define TEST_LEA(STR)\
{\
asm("leaq " STR ", %0"\
: "=r" (res)\
: "a" (rax), "b" (rbx), "c" (rcx), "d" (rdx), "S" (rsi), "D" (rdi));\
xxprintf("lea %s = %016llx\n", STR, res);\
}
#define TEST_LEA16(STR)\
{\
asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
: "=wq" (res)\
: "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
xxprintf("lea %s = %08x\n", STR, res);\
}
void test_lea(void)
{
int64 rax, rbx, rcx, rdx, rsi, rdi, res;
rax = 0x0001;
rbx = 0x0002;
rcx = 0x0004;
rdx = 0x0008;
rsi = 0x0010;
rdi = 0x0020;
TEST_LEA("0x4000");
TEST_LEA("(%%rax)");
TEST_LEA("(%%rbx)");
TEST_LEA("(%%rcx)");
TEST_LEA("(%%rdx)");
TEST_LEA("(%%rsi)");
TEST_LEA("(%%rdi)");
TEST_LEA("0x40(%%rax)");
TEST_LEA("0x40(%%rbx)");
TEST_LEA("0x40(%%rcx)");
TEST_LEA("0x40(%%rdx)");
TEST_LEA("0x40(%%rsi)");
TEST_LEA("0x40(%%rdi)");
TEST_LEA("0x4000(%%rax)");
TEST_LEA("0x4000(%%rbx)");
TEST_LEA("0x4000(%%rcx)");
TEST_LEA("0x4000(%%rdx)");
TEST_LEA("0x4000(%%rsi)");
TEST_LEA("0x4000(%%rdi)");
TEST_LEA("(%%rax, %%rcx)");
TEST_LEA("(%%rbx, %%rdx)");
TEST_LEA("(%%rcx, %%rcx)");
TEST_LEA("(%%rdx, %%rcx)");
TEST_LEA("(%%rsi, %%rcx)");
TEST_LEA("(%%rdi, %%rcx)");
TEST_LEA("0x40(%%rax, %%rcx)");
TEST_LEA("0x4000(%%rbx, %%rdx)");
TEST_LEA("(%%rcx, %%rcx, 2)");
TEST_LEA("(%%rdx, %%rcx, 4)");
TEST_LEA("(%%rsi, %%rcx, 8)");
TEST_LEA("(,%%rax, 2)");
TEST_LEA("(,%%rbx, 4)");
TEST_LEA("(,%%rcx, 8)");
TEST_LEA("0x40(,%%rax, 2)");
TEST_LEA("0x40(,%%rbx, 4)");
TEST_LEA("0x40(,%%rcx, 8)");
TEST_LEA("-10(%%rcx, %%rcx, 2)");
TEST_LEA("-10(%%rdx, %%rcx, 4)");
TEST_LEA("-10(%%rsi, %%rcx, 8)");
TEST_LEA("0x4000(%%rcx, %%rcx, 2)");
TEST_LEA("0x4000(%%rdx, %%rcx, 4)");
TEST_LEA("0x4000(%%rsi, %%rcx, 8)");
}
#define TEST_JCC(JCC, v1, v2)\
{ int one = 1; \
int res;\
asm("movl $1, %0\n\t"\
"cmpl %2, %1\n\t"\
"j" JCC " 1f\n\t"\
"movl $0, %0\n\t"\
"1:\n\t"\
: "=r" (res)\
: "r" (v1), "r" (v2));\
xxprintf("%-10s %d\n", "j" JCC, res);\
\
asm("movl $0, %0\n\t"\
"cmpl %2, %1\n\t"\
"set" JCC " %b0\n\t"\
: "=r" (res)\
: "r" (v1), "r" (v2));\
xxprintf("%-10s %d\n", "set" JCC, res);\
{\
asm("movl $0x12345678, %0\n\t"\
"cmpl %2, %1\n\t"\
"cmov" JCC "l %3, %0\n\t"\
: "=r" (res)\
: "r" (v1), "r" (v2), "m" (one));\
xxprintf("%-10s R=0x%08x\n", "cmov" JCC "l", res);\
asm("movl $0x12345678, %0\n\t"\
"cmpl %2, %1\n\t"\
"cmov" JCC "w %w3, %w0\n\t"\
: "=r" (res)\
: "r" (v1), "r" (v2), "r" (one));\
xxprintf("%-10s R=0x%08x\n", "cmov" JCC "w", res);\
} \
}
/* various jump tests */
void test_jcc(void)
{
TEST_JCC("ne", 1, 1);
TEST_JCC("ne", 1, 0);
TEST_JCC("e", 1, 1);
TEST_JCC("e", 1, 0);
TEST_JCC("l", 1, 1);
TEST_JCC("l", 1, 0);
TEST_JCC("l", 1, -1);
TEST_JCC("le", 1, 1);
TEST_JCC("le", 1, 0);
TEST_JCC("le", 1, -1);
TEST_JCC("ge", 1, 1);
TEST_JCC("ge", 1, 0);
TEST_JCC("ge", -1, 1);
TEST_JCC("g", 1, 1);
TEST_JCC("g", 1, 0);
TEST_JCC("g", 1, -1);
TEST_JCC("b", 1, 1);
TEST_JCC("b", 1, 0);
TEST_JCC("b", 1, -1);
TEST_JCC("be", 1, 1);
TEST_JCC("be", 1, 0);
TEST_JCC("be", 1, -1);
TEST_JCC("ae", 1, 1);
TEST_JCC("ae", 1, 0);
TEST_JCC("ae", 1, -1);
TEST_JCC("a", 1, 1);
TEST_JCC("a", 1, 0);
TEST_JCC("a", 1, -1);
TEST_JCC("p", 1, 1);
TEST_JCC("p", 1, 0);
TEST_JCC("np", 1, 1);
TEST_JCC("np", 1, 0);
TEST_JCC("o", 0x7fffffff, 0);
TEST_JCC("o", 0x7fffffff, -1);
TEST_JCC("no", 0x7fffffff, 0);
TEST_JCC("no", 0x7fffffff, -1);
TEST_JCC("s", 0, 1);
TEST_JCC("s", 0, -1);
TEST_JCC("s", 0, 0);
TEST_JCC("ns", 0, 1);
TEST_JCC("ns", 0, -1);
TEST_JCC("ns", 0, 0);
}
#undef CC_MASK
#ifdef TEST_P4_FLAGS
#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
#else
#define CC_MASK (CC_O | CC_C)
#endif
#define OP mul
#include "fb_test_amd64_muldiv.h"
#define OP imul
#include "fb_test_amd64_muldiv.h"
void test_imulw2(int64 op0, int64 op1)
{
int64 res, s1, s0, flags;
s0 = op0;
s1 = op1;
res = s0;
flags = 0;
asm ("pushq %4\n\t"
"popfq\n\t"
"imulw %w2, %w0\n\t"
"pushfq\n\t"
"popq %1\n\t"
: "=q" (res), "=g" (flags)
: "q" (s1), "0" (res), "1" (flags));
xxprintf("%-10s A=%016llx B=%016llx R=%016llx CC=%04llx\n",
"imulw", s0, s1, res, flags & CC_MASK);
}
void test_imull2(int64 op0, int64 op1)
{
int res, s1;
int64 s0, flags;
s0 = op0;
s1 = op1;
res = s0;
flags = 0;
asm ("pushq %4\n\t"
"popfq\n\t"
"imull %2, %0\n\t"
"pushfq\n\t"
"popq %1\n\t"
: "=q" (res), "=g" (flags)
: "q" (s1), "0" (res), "1" (flags));
xxprintf("%-10s A=%016llx B=%08x R=%08x CC=%04llx\n",
"imull", s0, s1, res, flags & CC_MASK);
}
#define TEST_IMUL_IM(size, size1, op0, op1)\
{\
int64 res, flags;\
flags = 0;\
res = 0;\
asm ("pushq %3\n\t"\
"popfq\n\t"\
"imul" size " $" #op0 ", %" size1 "2, %" size1 "0\n\t" \
"pushfq\n\t"\
"popq %1\n\t"\
: "=r" (res), "=g" (flags)\
: "r" (op1), "1" (flags), "0" (res));\
xxprintf("%-10s A=%08x B=%08x R=%016llx CC=%04llx\n",\
"imul" size, op0, op1, res, flags & CC_MASK);\
}
#define TEST_IMUL_IM_L(op0, op1)\
{\
int64 flags = 0;\
int res = 0;\
int res64 = 0;\
asm ("pushq %3\n\t"\
"popfq\n\t"\
"imul $" #op0 ", %2, %0\n\t" \
"pushfq\n\t"\
"popq %1\n\t"\
: "=r" (res64), "=g" (flags)\
: "r" (op1), "1" (flags), "0" (res));\
xxprintf("%-10s A=%08x B=%08x R=%08x CC=%04llx\n",\
"imull", op0, op1, res, flags & CC_MASK);\
}
#undef CC_MASK
#define CC_MASK (0)
#define OP div
#include "fb_test_amd64_muldiv.h"
#define OP idiv
#include "fb_test_amd64_muldiv.h"
void test_mul(void)
{
test_imulb(0x1234561d, 4);
test_imulb(3, -4);
test_imulb(0x80, 0x80);
test_imulb(0x10, 0x10);
test_imulw(0, 0, 0);
test_imulw(0, 0xFF, 0xFF);
test_imulw(0, 0xFF, 0x100);
test_imulw(0, 0x1234001d, 45);
test_imulw(0, 23, -45);
test_imulw(0, 0x8000, 0x8000);
test_imulw(0, 0x100, 0x100);
test_imull(0, 0, 0);
test_imull(0, 0xFFFF, 0xFFFF);
test_imull(0, 0xFFFF, 0x10000);
test_imull(0, 0x1234001d, 45);
test_imull(0, 23, -45);
test_imull(0, 0x80000000, 0x80000000);
test_imull(0, 0x10000, 0x10000);
test_mulb(0x1234561d, 4);
test_mulb(3, -4);
test_mulb(0x80, 0x80);
test_mulb(0x10, 0x10);
test_mulw(0, 0x1234001d, 45);
test_mulw(0, 23, -45);
test_mulw(0, 0x8000, 0x8000);
test_mulw(0, 0x100, 0x100);
test_mull(0, 0x1234001d, 45);
test_mull(0, 23, -45);
test_mull(0, 0x80000000, 0x80000000);
test_mull(0, 0x10000, 0x10000);
test_imulw2(0x1234001d, 45);
test_imulw2(23, -45);
test_imulw2(0x8000, 0x8000);
test_imulw2(0x100, 0x100);
test_imull2(0x1234001d, 45);
test_imull2(23, -45);
test_imull2(0x80000000, 0x80000000);
test_imull2(0x10000, 0x10000);
TEST_IMUL_IM("w", "w", 45, 0x1234);
TEST_IMUL_IM("w", "w", -45, 23);
TEST_IMUL_IM("w", "w", 0x8000, 0x80000000);
TEST_IMUL_IM("w", "w", 0x7fff, 0x1000);
TEST_IMUL_IM_L(45, 0x1234);
TEST_IMUL_IM_L(-45, 23);
TEST_IMUL_IM_L(0x8000, 0x80000000);
TEST_IMUL_IM_L(0x7fff, 0x1000);
test_idivb(0x12341678, 0x127e);
test_idivb(0x43210123, -5);
test_idivb(0x12340004, -1);
test_idivw(0, 0x12345678, 12347);
test_idivw(0, -23223, -45);
test_idivw(0, 0x12348000, -1);
test_idivw(0x12343, 0x12345678, 0x81238567);
test_idivl(0, 0x12345678, 12347);
test_idivl(0, -233223, -45);
test_idivl(0, 0x80000000, -1);
test_idivl(0x12343, 0x12345678, 0x81234567);
test_idivq(0, 0x12345678, 12347);
test_idivq(0, -233223, -45);
test_idivq(0, 0x80000000, -1);
test_idivq(0x12343, 0x12345678, 0x81234567);
test_divb(0x12341678, 0x127e);
test_divb(0x43210123, -5);
test_divb(0x12340004, -1);
test_divw(0, 0x12345678, 12347);
test_divw(0, -23223, -45);
test_divw(0, 0x12348000, -1);
test_divw(0x12343, 0x12345678, 0x81238567);
test_divl(0, 0x12345678, 12347);
test_divl(0, -233223, -45);
test_divl(0, 0x80000000, -1);
test_divl(0x12343, 0x12345678, 0x81234567);
test_divq(0, 0x12345678, 12347);
test_divq(0, -233223, -45);
test_divq(0, 0x80000000, -1);
test_divq(0x12343, 0x12345678, 0x81234567);
}
#define TEST_BSX(op, size, op0)\
{\
int res, val, resz;\
val = op0;\
asm("xorl %1, %1\n"\
"movl $0x12345678, %0\n"\
#op " %" size "2, %" size "0 ; setz %b1" \
: "=r" (res), "=q" (resz)\
: "r" (val));\
xxprintf("%-10s A=%08x R=%08x %d\n", #op, val, res, resz);\
}
void test_bsx(void)
{
TEST_BSX(bsrw, "w", 0);
TEST_BSX(bsrw, "w", 0x12340128);
TEST_BSX(bsrl, "", 0);
TEST_BSX(bsrl, "", 0x00340128);
TEST_BSX(bsfw, "w", 0);
TEST_BSX(bsfw, "w", 0x12340128);
TEST_BSX(bsfl, "", 0);
TEST_BSX(bsfl, "", 0x00340128);
}
/**********************************************/
void test_fops(double a, double b)
{
xxprintf("a=%f b=%f a+b=%f\n", a, b, a + b);
xxprintf("a=%f b=%f a-b=%f\n", a, b, a - b);
xxprintf("a=%f b=%f a*b=%f\n", a, b, a * b);
xxprintf("a=%f b=%f a/b=%f\n", a, b, a / b);
xxprintf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b));
xxprintf("a=%f sqrt(a)=%f\n", a, sqrt(a));
xxprintf("a=%f sin(a)=%f\n", a, sin(a));
xxprintf("a=%f cos(a)=%f\n", a, cos(a));
xxprintf("a=%f tan(a)=%f\n", a, tan(a));
xxprintf("a=%f log(a)=%f\n", a, log(a));
xxprintf("a=%f exp(a)=%f\n", a, exp(a));
xxprintf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
/* just to test some op combining */
xxprintf("a=%f asin(sin(a))=%f\n", a, asin(sin(a)));
xxprintf("a=%f acos(cos(a))=%f\n", a, acos(cos(a)));
xxprintf("a=%f atan(tan(a))=%f\n", a, atan(tan(a)));
}
void test_fcmp(double a, double b)
{
xxprintf("(%f<%f)=%d\n",
a, b, a < b);
xxprintf("(%f<=%f)=%d\n",
a, b, a <= b);
xxprintf("(%f==%f)=%d\n",
a, b, a == b);
xxprintf("(%f>%f)=%d\n",
a, b, a > b);
xxprintf("(%f<=%f)=%d\n",
a, b, a >= b);
{
unsigned long long int rflags;
/* test f(u)comi instruction */
asm("fcomi %2, %1\n"
"pushfq\n"
"popq %0\n"
: "=r" (rflags)
: "t" (a), "u" (b));
xxprintf("fcomi(%f %f)=%016llx\n", a, b, rflags & (CC_Z | CC_P | CC_C));
}
}
void test_fcvt(double a)
{
float fa;
long double la;
int16_t fpuc;
int i;
int64 lla;
int ia;
int16_t wa;
double ra;
fa = a;
la = a;
xxprintf("(float)%f = %f\n", a, fa);
xxprintf("(long double)%f = %Lf\n", a, la);
xxprintf("a=%016llx\n", *(unsigned long long int *) &a);
xxprintf("la=%016llx %04x\n", *(unsigned long long int *) &la,
*(unsigned short *) ((char *)(&la) + 8));
/* test all roundings */
asm volatile ("fstcw %0" : "=m" (fpuc));
for(i=0;i<4;i++) {
short zz = (fpuc & ~0x0c00) | (i << 10);
asm volatile ("fldcw %0" : : "m" (zz));
asm volatile ("fists %0" : "=m" (wa) : "t" (a));
asm volatile ("fistl %0" : "=m" (ia) : "t" (a));
asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st");
asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a));
asm volatile ("fldcw %0" : : "m" (fpuc));
xxprintf("(short)a = %d\n", wa);
xxprintf("(int)a = %d\n", ia);
xxprintf("(int64_t)a = %lld\n", lla);
xxprintf("rint(a) = %f\n", ra);
}
}
#define TEST(N) \
asm("fld" #N : "=t" (a)); \
xxprintf("fld" #N "= %f\n", a);
void test_fconst(void)
{
double a;
TEST(1);
TEST(l2t);
TEST(l2e);
TEST(pi);
TEST(lg2);
TEST(ln2);
TEST(z);
}
void test_fbcd(double a)
{
unsigned short bcd[5];
double b;
asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st");
asm("fbld %1" : "=t" (b) : "m" (bcd[0]));
xxprintf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n",
a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b);
}
#define TEST_ENV(env, save, restore)\
{\
memset((env), 0xaa, sizeof(*(env)));\
for(i=0;i<5;i++)\
asm volatile ("fldl %0" : : "m" (dtab[i]));\
asm(save " %0\n" : : "m" (*(env)));\
asm(restore " %0\n": : "m" (*(env)));\
for(i=0;i<5;i++)\
asm volatile ("fstpl %0" : "=m" (rtab[i]));\
for(i=0;i<5;i++)\
xxprintf("res[%d]=%f\n", i, rtab[i]);\
xxprintf("fpuc=%04x fpus=%04x fptag=%04x\n",\
(env)->fpuc,\
(env)->fpus & 0xff00,\
(env)->fptag);\
}
void test_fenv(void)
{
struct __attribute__((packed)) {
uint16_t fpuc;
uint16_t dummy1;
uint16_t fpus;
uint16_t dummy2;
uint16_t fptag;
uint16_t dummy3;
uint32_t ignored[4];
long double fpregs[8];
} float_env32;
double dtab[8];
double rtab[8];
int i;
for(i=0;i<8;i++)
dtab[i] = i + 1;
TEST_ENV(&float_env32, "fnstenv", "fldenv");
TEST_ENV(&float_env32, "fnsave", "frstor");
/* test for ffree */
for(i=0;i<5;i++)
asm volatile ("fldl %0" : : "m" (dtab[i]));
asm volatile("ffree %st(2)");
asm volatile ("fnstenv %0\n" : : "m" (float_env32));
asm volatile ("fninit");
xxprintf("fptag=%04x\n", float_env32.fptag);
}
#define TEST_FCMOV(a, b, rflags, CC)\
{\
double res;\
asm("pushq %3\n"\
"popfq\n"\
"fcmov" CC " %2, %0\n"\
: "=t" (res)\
: "0" (a), "u" (b), "g" (rflags));\
xxprintf("fcmov%s rflags=0x%04llx-> %f\n", \
CC, rflags, res);\
}
void test_fcmov(void)
{
double a, b;
int64 rflags, i;
a = 1.0;
b = 2.0;
for(i = 0; i < 4; i++) {
rflags = 0;
if (i & 1)
rflags |= CC_C;
if (i & 2)
rflags |= CC_Z;
TEST_FCMOV(a, b, rflags, "b");
TEST_FCMOV(a, b, rflags, "e");
TEST_FCMOV(a, b, rflags, "be");
TEST_FCMOV(a, b, rflags, "nb");
TEST_FCMOV(a, b, rflags, "ne");
TEST_FCMOV(a, b, rflags, "nbe");
}
TEST_FCMOV(a, b, (int64)0, "u");
TEST_FCMOV(a, b, (int64)CC_P, "u");
TEST_FCMOV(a, b, (int64)0, "nu");
TEST_FCMOV(a, b, (int64)CC_P, "nu");
}
void test_floats(void)
{
test_fops(2, 3);
test_fops(1.4, -5);
test_fcmp(2, -1);
test_fcmp(2, 2);
test_fcmp(2, 3);
test_fcvt(0.5);
test_fcvt(-0.5);
test_fcvt(1.0/7.0);
test_fcvt(-1.0/9.0);
test_fcvt(32768);
test_fcvt(-1e20);
test_fconst();
// REINSTATE (maybe): test_fbcd(1234567890123456);
// REINSTATE (maybe): test_fbcd(-123451234567890);
// REINSTATE: test_fenv();
// REINSTATE: test_fcmov();
}
/**********************************************/
#define TEST_XCHG(op, size, opconst)\
{\
int op0, op1;\
op0 = 0x12345678;\
op1 = 0xfbca7654;\
asm(#op " %" size "0, %" size "1" \
: "=q" (op0), opconst (op1) \
: "0" (op0), "1" (op1));\
xxprintf("%-10s A=%08x B=%08x\n",\
#op, op0, op1);\
}
#define TEST_CMPXCHG(op, size, opconst, eax)\
{\
int op0, op1;\
op0 = 0x12345678;\
op1 = 0xfbca7654;\
asm(#op " %" size "0, %" size "1" \
: "=q" (op0), opconst (op1) \
: "0" (op0), "1" (op1), "a" (eax));\
xxprintf("%-10s EAX=%08x A=%08x C=%08x\n",\
#op, eax, op0, op1);\
}
/**********************************************/
/* segmentation tests */
extern char func_lret32;
extern char func_iret32;
uint8_t str_buffer[4096];
#define TEST_STRING1(OP, size, DF, REP)\
{\
int64 rsi, rdi, rax, rcx, rflags;\
\
rsi = (long)(str_buffer + sizeof(str_buffer) / 2);\
rdi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\
rax = 0x12345678;\
rcx = 17;\
\
asm volatile ("pushq $0\n\t"\
"popfq\n\t"\
DF "\n\t"\
REP #OP size "\n\t"\
"cld\n\t"\
"pushfq\n\t"\
"popq %4\n\t"\
: "=S" (rsi), "=D" (rdi), "=a" (rax), "=c" (rcx), "=g" (rflags)\
: "0" (rsi), "1" (rdi), "2" (rax), "3" (rcx));\
xxprintf("%-10s ESI=%016llx EDI=%016llx EAX=%016llx ECX=%016llx EFL=%04llx\n",\
REP #OP size, rsi, rdi, rax, rcx,\
rflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
}
#define TEST_STRING(OP, REP)\
TEST_STRING1(OP, "b", "", REP);\
TEST_STRING1(OP, "w", "", REP);\
TEST_STRING1(OP, "l", "", REP);\
TEST_STRING1(OP, "b", "std", REP);\
TEST_STRING1(OP, "w", "std", REP);\
TEST_STRING1(OP, "l", "std", REP)
void test_string(void)
{
int64 i;
for(i = 0;i < sizeof(str_buffer); i++)
str_buffer[i] = i + 0x56;
TEST_STRING(stos, "");
TEST_STRING(stos, "rep ");
TEST_STRING(lods, ""); /* to verify stos */
// TEST_STRING(lods, "rep ");
TEST_STRING(movs, "");
TEST_STRING(movs, "rep ");
TEST_STRING(lods, ""); /* to verify stos */
/* XXX: better tests */
TEST_STRING(scas, "");
TEST_STRING(scas, "repz ");
TEST_STRING(scas, "repnz ");
// REINSTATE? TEST_STRING(cmps, "");
TEST_STRING(cmps, "repz ");
// REINSTATE? TEST_STRING(cmps, "repnz ");
}
int main(int argc, char **argv)
{
// The three commented out test cases produce different results at different
// compiler optimisation levels. This suggests to me that their inline
// assembly is incorrect. I don't have time to investigate now, though. So
// they are disabled.
xxprintf_start();
test_adc();
test_adcx();
test_add();
test_adox();
test_and();
// test_bsx();
test_cmp();
test_dec();
test_fcmov();
test_fconst();
test_fenv();
test_floats();
test_inc();
// test_jcc();
test_lea();
test_mul();
test_neg();
test_not();
test_or();
test_rcl();
test_rcr();
test_rol();
test_ror();
test_sar();
test_sbb();
test_shl();
test_shr();
// test_string();
test_sub();
test_xor();
xxprintf_done();
// the expected MD5SUM is 66802c845574c7c69f30d29ef85f7ca3
return 0;
}