/*-
 * Copyright (c) 2004 Sam Leffler, Errno Consulting
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer,
 *    without modification.
 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
 *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
 *    redistribution must be conditioned upon including a substantially
 *    similar Disclaimer requirement for further binary redistribution.
 * 3. Neither the names of the above-listed copyright holders nor the names
 *    of any contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * NO WARRANTY
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGES.
 *
 * $FreeBSD: src/tools/tools/crypto/cryptotest.c,v 1.8 2006/05/24 15:40:46 mr Exp $
 */

/*
 * Simple tool for testing hardware/system crypto support.
 *
 * cryptotest [-czsbv] [-a algorithm] [count] [size ...]
 *
 * Run count iterations of a crypt+decrypt or mac operation on a buffer of
 * size bytes.  A random key and iv are used.  Options:
 *	-c	check the results
 *	-z	run all available algorithms on a variety of buffer sizes
 *	-v	be verbose
 *	-b	mark operations for batching
 *	-p	profile kernel crypto operations (must be root)
 *	-t n	fork n threads and run tests concurrently
 * Known algorithms are:
 *	null	null cbc
 *	des	des cbc
 *	3des	3des cbc
 *	blf	blowfish cbc
 *	cast	cast cbc
 *	skj	skipjack cbc
 *	aes	rijndael/aes 128-bit cbc
 *	aes192	rijndael/aes 192-bit cbc
 *	aes256	rijndael/aes 256-bit cbc
 *	md5	md5 hmac
 *	sha1	sha1 hmac
 *	sha256	256-bit sha2 hmac
 *	sha384	384-bit sha2 hmac
 *	sha512	512--bit sha2 hmac
 *
 * For a test of how fast a crypto card is, use something like:
 *	cryptotest -z 1024
 * This will run a series of tests using the available crypto/cipher
 * algorithms over a variety of buffer sizes.  The 1024 says to do 1024
 * iterations.  Extra arguments can be used to specify one or more buffer
 * sizes to use in doing tests.
 *
 * To fork multiple processes all doing the same work, specify -t X on the
 * command line to get X "threads" running simultaneously.  No effort is made
 * to synchronize the threads or otherwise maximize load.
 *
 * If the kernel crypto code is built with CRYPTO_TIMING and you run as root,
 * then you can specify the -p option to get a "profile" of the time spent
 * processing crypto operations.  At present this data is only meaningful for
 * symmetric operations.  To get meaningful numbers you must run on an idle
 * machine.
 *
 * Expect ~400 Mb/s for a Broadcom 582x for 8K buffers on a reasonable CPU
 * (64-bit PCI helps).  Hifn 7811 parts top out at ~110 Mb/s.
 */
#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <paths.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <ctype.h>
#include <errno.h>

#ifdef LINUX
#include <linux/ipc.h>
#include <linux/sem.h>
#else
#include <sys/ipc.h>
#include <sys/sem.h>
#endif

#include <sys/sysctl.h>
#include <sys/time.h>
#include <opencrypto/cryptodev.h>

#define HIFN_ONLY

#if defined(__FreeBSD__) || (defined(__GNU_LIBRARY__) && !defined(_SEM_SEMUN_UNDEFINED))
/* union semun is defined by including <sys/sem.h> */
#else
/* according to X/OPEN we have to define it ourselves */
union semun {
	int val;                  /* value for SETVAL */
	struct semid_ds *buf;     /* buffer for IPC_STAT, IPC_SET */
	unsigned short *array;    /* array for GETALL, SETALL */
	/* Linux specific part: */
	struct seminfo *__buf;    /* buffer for IPC_INFO */
};
#endif

#define	CHUNK	64	/* how much to display */
#define	N(a)		(sizeof (a) / sizeof (a[0]))
#define	streq(a,b)	(strcasecmp(a,b) == 0)

static void hexdump(const char *, int);
static void chunkdump(const char *p, int s) { hexdump(p, MIN(s,CHUNK)); }

struct alg;

enum {
        GEN_RANDOM,
        GEN_RAMP,
        GEN_ZERO,
        GEN_FFFF,
        GEN_MAX         // used as "read from file" in generate_data
};


// configuration and defaults
typedef struct {
        int verbose;
        int opflags;
        int verify;

        int swap_iv;
        int swap_key;
        int swap_data;

        unsigned data_gen_type;
        int fd_read_data_from;
        
        struct alg *alg; 
        int count; 
        int size; 
        int cmd; 
        int thread_count;
        int do_profile;
	char req_devicename[CRYPTO_NAME_LEN];

} config_t;
static config_t global_conf = {
        .verbose = 0,
        .opflags = 0,
        .verify = 0,

        .swap_iv = 0,
        .swap_key = 0,
        .swap_data = 0,

        .data_gen_type = GEN_RANDOM,
        .fd_read_data_from = -1,

        .count = 1,
        .thread_count = 1,

        .req_devicename = "",
};

static void swap_buf(char *buf, int len)
{
	int i;
	char tmp[4];
	for (i = 0; i < len; i += 4) {
		memcpy(tmp, &buf[i], 4);
		buf[i + 0] = tmp[3];
		buf[i + 1] = tmp[2];
		buf[i + 2] = tmp[1];
		buf[i + 3] = tmp[0];
	}
}

static void bail (const char *fmt, ...)
{
        va_list ap;

        fflush (stdout);

        va_start (ap, fmt);
        vfprintf (stderr, fmt, ap);
        va_end (ap);

        exit (-1);
}

#define MAX_IV_SIZE 16

enum {
        CIPHER = 0,
        HASH = 1,
        COMP = 2
};

struct alg {
	const char* name;
	int	type;
	int	blocksize;
	int	minkeylen;
	int	maxkeylen;
	int	code;
} algorithms[] = {
#if defined(CRYPTO_NULL_CBC) && !defined(HIFN_ONLY)
	{ "null",	CIPHER,	8,	1,	256,	CRYPTO_NULL_CBC },
#endif
	{ "des",	CIPHER,	8,	8,	8,	CRYPTO_DES_CBC },
	{ "3des",	CIPHER,	8,	24,	24,	CRYPTO_3DES_CBC },
#if !defined(HIFN_ONLY)
	{ "blf",	CIPHER,	8,	5,	56,	CRYPTO_BLF_CBC },
	{ "cast",	CIPHER,	8,	5,	16,	CRYPTO_CAST_CBC },
	{ "skj",	CIPHER,	8,	10,	10,	CRYPTO_SKIPJACK_CBC },
#endif
	{ "aes",	CIPHER,	16,	16,	16,	CRYPTO_RIJNDAEL128_CBC},
	{ "aes192",	CIPHER,	16,	24,	24,	CRYPTO_RIJNDAEL128_CBC},
	{ "aes256",	CIPHER,	16,	32,	32,	CRYPTO_RIJNDAEL128_CBC},
#ifdef notdef
	{ "arc4",	CIPHER,	8,	1,	32,	CRYPTO_ARC4 },
#endif
#if !defined(HIFN_ONLY)
	{ "md5",	HASH,	8,	16,	16,	CRYPTO_MD5 },
#endif
	{ "md5_hmac",	HASH,	8,	16,	16,	CRYPTO_MD5_HMAC },
#if !defined(HIFN_ONLY)
	{ "sha1",	HASH,	8,	20,	20,	CRYPTO_SHA1 },
#endif
	{ "sha1_hmac",	HASH,	8,	20,	20,	CRYPTO_SHA1_HMAC },
#if !defined(HIFN_ONLY)
	{ "sha256",	HASH,	8,	32,	32,	CRYPTO_SHA2_256_HMAC },
	{ "sha384",	HASH,	8,	48,	48,	CRYPTO_SHA2_384_HMAC },
	{ "sha512",	HASH,	8,	64,	64,	CRYPTO_SHA2_512_HMAC },
#endif
        { "lzs",        COMP,   1,      0,      0,      CRYPTO_LZS_COMP },
};

static void
usage(const char* cmd)
{
	printf ("usage: %s [-c] [-z] [-s] [-b] [-v] [-t threadcount][-d devicename] [-a algorithm] [count] [size ...]\n",
		cmd);
	printf ("where devicename is one of:\n"
		"    anydevice   - any device,\n"
		"    anyhardware - any hardware device,\n"
		"    anysoftware - any software device,\n"
		"    software    - use cryptosoft device;\n"
		"    ... or any hardware device name.\n"
		"where algorithm is one of:\n"
		"    des 3des (default) blowfish cast skipjack\n"
		"    aes (aka rijndael) aes192 aes256 arc4\n"
		"count is the number of encrypt/decrypt ops to do\n"
		"size is the number of bytes of text to encrypt+decrypt\n"
		"\n"
		"-c check the results (slows timing)\n"
		"-z run all available algorithms on a variety of sizes\n"
		"-v be verbose (can be specified multiple times)\n"
		"-b mark operations for batching\n"
		"-t set number of threads to start\n"
		"-p profile kernel crypto operation (must be root)\n"
		"-P <id> - 0 only start profiling capture which returns id,\n"
		"          >0 finish and show details for previous capture\n"
                "-g <type> - generate data using one of the following methods:\n"
                "    0 | random - pseudo random block\n"
                "    1 | ramp   - 0, 1, 2, 3, ... 255, 0, 1, ...\n"
                "    2 | zero   - all zero\n"
                "    3 | ffff   - all 0xFF\n"
                "-G <file> - seed buffers with contents of <file>\n"
                );
	exit(-1);
}

static struct alg*
getalgbycode(int cipher)
{
	int i;

	for (i = 0; i < N(algorithms); i++)
		if (cipher == algorithms[i].code)
			return &algorithms[i];
	return NULL;
}

static struct alg*
getalgbyname(const char* name)
{
	int i;

	for (i = 0; i < N(algorithms); i++)
		if (streq(name, algorithms[i].name))
			return &algorithms[i];
	return NULL;
}

static int
devcrypto(void)
{
	static int fd = -1;

	if (fd < 0) {
		fd = open(_PATH_DEV "crypto", O_RDWR, 0);
		if (fd < 0)
			err(1, _PATH_DEV "crypto");
		if (fcntl(fd, F_SETFD, 1) == -1)
			err(1, "fcntl(F_SETFD) (devcrypto)");
	}
	return fd;
}

static int
crget(void)
{
	int fd;

	if (ioctl(devcrypto(), CRIOGET, &fd) == -1)
		err(1, "ioctl(CRIOGET)");
	if (fcntl(fd, F_SETFD, 1) == -1)
		err(1, "fcntl(F_SETFD) (crget)");
	return fd;
}

static char
rdigit(void)
{
#if 1
	const char a[] = {
		0x10,0x54,0x11,0x48,0x45,0x12,0x4f,0x13,0x49,0x53,0x14,0x41,
		0x15,0x16,0x4e,0x55,0x54,0x17,0x18,0x4a,0x4f,0x42,0x19,0x01
	};
	return 0x20+a[random()%N(a)];
#else
	static unsigned char c = 0;
	return (c++);
#endif
}

static void
generate_data (char *buffer, const config_t *conf)
{
        int i, fd;

        switch (conf->data_gen_type) {
        case GEN_RANDOM:
                for (i = 0; i < conf->size; i++)
                        buffer[i] = rdigit();
                break;
        case GEN_RAMP:
                for (i = 0; i < conf->size; i++)
                        buffer[i] = i;
                break;
        case GEN_ZERO:
                memset (buffer, 0, conf->size);
                break;
        case GEN_FFFF:
                memset (buffer, -1, conf->size);
                break;
        case GEN_MAX:
                memset (buffer, 0, conf->size);
                fd = conf->fd_read_data_from;
                lseek (fd, 0, SEEK_SET);
                read (fd, buffer, conf->size);
        }
}

#define COMP_EXTRA_OUT_BYTES 1024
#define COMP_EXTRA_IN_BYTES 1024
#define COMP_EXTRA_BYTES (COMP_EXTRA_IN_BYTES + COMP_EXTRA_OUT_BYTES)
static void
runtest(const config_t *conf, struct timeval *tv, char *using_devicename)
{
	int i, fd = crget();
	struct timeval start, stop, dt;
	char *cleartext, *ciphertext, *originaltext;
	struct session_op sop;
	struct crypt_op cop;
	int bsize, asize;
	char iv[MAX_IV_SIZE];
        struct alg *alg = conf->alg;

	/* set a bigger size for hashes that have to return data */
	bsize = MAX(conf->size, alg->maxkeylen);

	if(conf->verbose>=3) {
		printf("opened cryptodev, fd=%d..", fd);
	}
	bzero(&sop, sizeof(sop));
        switch (alg->type) {
        case CIPHER:
		sop.keylen = (alg->minkeylen + alg->maxkeylen)/2;
		sop.key = (char *) malloc(sop.keylen);
		if (sop.key == NULL)
			err(1, "malloc (key)");
		for (i = 0; i < sop.keylen; i++)
			sop.key[i] = rdigit();
		if (conf->swap_key)
			swap_buf(sop.key, sop.keylen);
                /* fallthrough desired */

        case COMP: // compression is a cipher without a key :)
		sop.cipher = alg->code;
                break;

        case HASH:
		sop.mackeylen = (alg->minkeylen + alg->maxkeylen)/2;
		sop.mackey = (char *) malloc(sop.mackeylen);
		if (sop.mackey == NULL)
			err(1, "malloc (mac)");
		for (i = 0; i < sop.mackeylen; i++)
			sop.mackey[i] = rdigit();
		sop.mac = alg->code;
		if (conf->swap_key)
			swap_buf(sop.mackey, sop.mackeylen);
                break;

        default:
                bail ("invalid alg[%d] type of %d\n", alg->name, alg->type);
	}

	strncpy (sop.crypto_device_name, using_devicename, sizeof (sop.crypto_device_name));

	if (ioctl(fd, conf->cmd, &sop) < 0) {
		if (conf->cmd == CIOCGSESSION) {
			close(fd);
			if (conf->verbose>=3) {
				printf("cipher %s", alg->name);
				switch (alg->type) {
                                case HASH:
					printf(" mackeylen %u\n", sop.mackeylen);
                                        break;
                                case CIPHER:
					printf(" keylen %u\n", sop.keylen);
                                        break;
                                }
				perror("CIOCGSESSION");
			}
			/* hardware doesn't support algorithm; skip it */
			return;
		}
		printf("cipher %s keylen %u mackeylen %u\n",
			alg->name, sop.keylen, sop.mackeylen);
		err(1, "CIOCGSESSION");
	}

	using_devicename[0]='\0';
	strncpy(using_devicename, sop.crypto_device_name, sizeof(sop.crypto_device_name));

        // allocate enough space for 3 buffers of the required size
        // in case of compression we need to have some more room in case
        // compression fails
        asize = 3*bsize;
        if (alg->type == COMP)
                asize += COMP_EXTRA_BYTES;

	originaltext = malloc(asize);
	if (originaltext == NULL)
		err(1, "malloc (text)");
	cleartext = originaltext+bsize;
	ciphertext = cleartext+bsize;
        generate_data (cleartext, conf);
	memcpy(originaltext, cleartext, conf->size);
	for (i = 0; i < N(iv); i++)
		iv[i] = rdigit();

	if (conf->verbose>=3) {
		printf("session = 0x%x\n", sop.ses);
		printf("count = %d, size = %d\n", conf->count, conf->size);
                if (alg->type == CIPHER) {
			printf("iv[%d]:\n", sizeof iv);
			chunkdump(iv, sizeof iv);
		}
		printf("cleartext[%d]:\n", conf->size);
		chunkdump(cleartext, conf->size);
	}

	gettimeofday(&start, NULL);
	switch (alg->type) {
        case CIPHER:
		for (i = 0; i < conf->count; i++) {
			cop.ses = sop.ses;
			cop.op = COP_ENCRYPT;
			cop.flags = conf->opflags;
			cop.slen = conf->size;
			cop.dlen = conf->size;
			cop.src = cleartext;
			cop.dst = ciphertext;
			cop.mac = 0;
			cop.iv = iv;

			if (conf->swap_data)
				swap_buf(cleartext, conf->size);
			if (conf->swap_data)
				swap_buf(ciphertext, conf->size);
			if (conf->swap_iv)
				swap_buf(iv, N(iv));

			if (ioctl(fd, CIOCCRYPT, &cop) < 0)
				err(1, "ioctl(CIOCCRYPT:%08x)", CIOCCRYPT);

			if (conf->swap_data)
				swap_buf(cleartext, conf->size);
			if (conf->swap_data)
				swap_buf(ciphertext, conf->size);
			if (conf->swap_iv)
				swap_buf(iv, N(iv));

			if (conf->verify && bcmp(ciphertext, cleartext, conf->size) == 0) {
				printf("cipher text unchanged:\n");
				chunkdump(ciphertext, conf->size);
			}

			if (conf->verbose>=3) {
				printf("ciphertext[%d]:\n", conf->size);
				chunkdump(ciphertext, conf->size);
				printf("cipheriv[%d]:\n", cop.dlen);
				chunkdump(iv, cop.dlen);
			}

			memset(cleartext, 'x', MIN(conf->size, CHUNK));
			cop.ses = sop.ses;
			cop.op = COP_DECRYPT;
			cop.flags = conf->opflags;
			cop.slen = conf->size;
			cop.dlen = conf->size;
			cop.src = ciphertext;
			cop.dst = cleartext;
			cop.mac = 0;
			cop.iv = iv;

			if (conf->swap_data)
				swap_buf(cleartext, conf->size);
			if (conf->swap_data)
				swap_buf(ciphertext, conf->size);
			if (conf->swap_iv)
				swap_buf(iv, N(iv));

			if (ioctl(fd, CIOCCRYPT, &cop) < 0)
				err(1, "ioctl(CIOCCRYPT)");

			if (conf->swap_data)
				swap_buf(cleartext, conf->size);
			if (conf->swap_data)
				swap_buf(ciphertext, conf->size);
			if (conf->swap_iv)
				swap_buf(iv, N(iv));

			if (conf->verify && bcmp(cleartext, originaltext, conf->size) != 0) {
				printf("decrypt mismatch:\n");
				printf("original[%d]:\n", conf->size);
				chunkdump(originaltext, conf->size);
				printf("cleartext[%d]:\n", conf->size);
				chunkdump(cleartext, conf->size);
			}

                        if (conf->verbose>=3) {
                                printf("cleartext[%d]:\n", conf->size);
                                chunkdump(cleartext, conf->size);
                        }
		}
                break;

        case HASH:
#if TEST_ENC_RESULT
		memset (originaltext, -1, conf->size);
#endif
		for (i = 0; i < conf->count; i++) {
                        memset (&cop,0,sizeof(cop));
			cop.ses = sop.ses;
			cop.op = 0;
			cop.flags = conf->opflags;
			cop.slen = conf->size;
			cop.dlen = conf->size;
			cop.src = cleartext;
			cop.dst = 0;
			cop.mac = ciphertext;
			cop.iv = 0;

#if TEST_ENC_RESULT
			if (!i) {
				cop.mac = originaltext;
				bzero (ciphertext, conf->size);
			}
#endif

			if (ioctl(fd, CIOCCRYPT, &cop) < 0)
				err(1, "ioctl(CIOCCRYPT) mac: %s", alg->name);

			if (conf->verbose>=3) {
				printf("hash[%d]:\n", conf->size);
				chunkdump(ciphertext, conf->size);
			}

#if TEST_ENC_RESULT
			if (i && memcmp (originaltext, ciphertext, conf->size))
				err (1, "failed encryption pass %d/%d\n", i, conf->count);
#endif
		}
                break;

        case COMP:

                if (conf->verbose>3) 
                        printf ("compression, len=%d\n", conf->size);

		for (i = 0; i < conf->count; i++) {
                        int comp_len;
			cop.ses = sop.ses;
			cop.op = COP_ENCRYPT;
			cop.flags = conf->opflags;
			cop.slen = conf->size;
			cop.dlen = conf->size + COMP_EXTRA_IN_BYTES;
			cop.src = cleartext;
			cop.dst = ciphertext;
			cop.mac = 0;
			cop.iv = 0;

			if (conf->swap_data)
				swap_buf(cleartext, cop.slen);
			if (conf->swap_data)
				swap_buf(ciphertext, cop.dlen);

			if (ioctl(fd, CIOCCRYPT, &cop) < 0)
				err(1, "ioctl(CIOCCRYPT)");

			if (conf->swap_data)
				swap_buf(cleartext, cop.slen);
			if (conf->swap_data)
				swap_buf(ciphertext, cop.dlen);

                        comp_len = cop.dlen;

			if (conf->verbose>=2) {
                                printf ("compressed slen=%d dlen=%d\n", conf->size, comp_len);
                        }

			if (conf->verify && comp_len==conf->size 
                                        && bcmp(ciphertext, cleartext, conf->size) == 0) {
				printf("compressed text unchanged:\n");
				chunkdump(ciphertext, comp_len);
			}

			if (conf->verbose>=3) {
				printf("compressed[%d]:\n", comp_len);
				chunkdump(ciphertext, comp_len);
			}

                        // make sure we can tell that the decompressed data was updated
			memset(cleartext, 'x', MIN(conf->size, CHUNK));

                        // nuke a bit after the we got as compressed data
                        if (comp_len < (conf->size+COMP_EXTRA_BYTES)) {
                                memset(ciphertext + comp_len, 'x', 
                                                MIN(conf->size + COMP_EXTRA_BYTES - comp_len, CHUNK));
                        }

			cop.ses = sop.ses;
			cop.op = COP_DECRYPT;
			cop.flags = conf->opflags;
			cop.slen = comp_len;
			cop.dlen = conf->size + COMP_EXTRA_OUT_BYTES;
			cop.src = ciphertext;
			cop.dst = cleartext;
			cop.mac = 0;
			cop.iv = iv;

			if (conf->swap_data)
				swap_buf(cleartext, conf->size);
			if (conf->swap_data)
				swap_buf(ciphertext, conf->size);

			if (ioctl(fd, CIOCCRYPT, &cop) < 0)
				err(1, "ioctl(CIOCCRYPT)");

			if (conf->swap_data)
				swap_buf(cleartext, conf->size);
			if (conf->swap_data)
				swap_buf(ciphertext, conf->size);

			if (conf->verbose>=2) {
                                printf ("decompressed slen=%d dlen=%d (original=%d)\n", 
                                                comp_len, cop.dlen, conf->size);
                        }

			if (conf->verify && (cop.dlen!=conf->size 
                                        || bcmp(cleartext, originaltext, conf->size) != 0)) {
				printf("decompression mismatch:\n");
				printf("original[%d]:\n", conf->size);
				chunkdump(originaltext, conf->size);
				printf("cleartext[%d]:\n", cop.dlen);
				chunkdump(cleartext, cop.dlen);
			}

                        if (conf->verbose>=3) {
                                printf("cleartext[%d]:\n", conf->size);
                                chunkdump(cleartext, conf->size);
                        }
		}
                break;
	}
	gettimeofday(&stop, NULL);
 
	if (ioctl(fd, CIOCFSESSION, &sop.ses) < 0)
		perror("ioctl(CIOCFSESSION)");

	timersub(&stop, &start, tv);

	free(originaltext);

	close(fd);
}

#ifdef __FreeBSD__
static void
resetstats()
{
	struct cryptostats stats;
	size_t slen;

	slen = sizeof (stats);
	if (sysctlbyname("kern.crypto_stats", &stats, &slen, NULL, 0) < 0) {
		perror("kern.crypto_stats");
		return;
	}
	bzero(&stats.cs_invoke, sizeof (stats.cs_invoke));
	bzero(&stats.cs_done, sizeof (stats.cs_done));
	bzero(&stats.cs_cb, sizeof (stats.cs_cb));
	bzero(&stats.cs_finis, sizeof (stats.cs_finis));
	stats.cs_invoke.min.tv_sec = 10000;
	stats.cs_done.min.tv_sec = 10000;
	stats.cs_cb.min.tv_sec = 10000;
	stats.cs_finis.min.tv_sec = 10000;
	if (sysctlbyname("kern.crypto_stats", NULL, NULL, &stats, sizeof (stats)) < 0)
		perror("kern.cryptostats");
}

static void
printt(const char* tag, struct cryptotstat *ts)
{
	uint64_t avg, min, max;

	if (ts->count == 0)
		return;
	avg = (1000000000LL*ts->acc.tv_sec + ts->acc.tv_nsec) / ts->count;
	min = 1000000000LL*ts->min.tv_sec + ts->min.tv_nsec;
	max = 1000000000LL*ts->max.tv_sec + ts->max.tv_nsec;
	printf("%16.16s: avg %6llu ns : min %6llu ns : max %7llu ns [%u samps]\n",
		tag, avg, min, max, ts->count);
}

static int
profile_start(void)
{
	int otiming;
	size_t tlen = sizeof (otiming);
	int timing = 1;

	resetstats();
	if (sysctlbyname("debug.crypto_timing", &otiming, &tlen,
				&timing, sizeof (timing)) < 0)
		perror("debug.crypto_timing");

	return otiming;
}

static void
profile_end(int otiming)
{
	struct cryptostats stats;
	size_t slen = sizeof (stats);

	if (sysctlbyname("debug.crypto_timing", NULL, NULL,
				&otiming, sizeof (otiming)) < 0)
		perror("debug.crypto_timing");
	if (sysctlbyname("kern.crypto_stats", &stats, &slen, NULL, 0) < 0)
		perror("kern.cryptostats");
	if (stats.cs_invoke.count) {
		printt("dispatch->invoke", &stats.cs_invoke);
		printt("invoke->done", &stats.cs_done);
		printt("done->cb", &stats.cs_cb);
		printt("cb->finis", &stats.cs_finis);
	}
}
#endif

void	waitforinput(void)
{
	char inbuf[16];
	printf("waiting");
	fgets(inbuf, 16, stdin);
}

#if 0
#define thread_debug(fmt,x...) printf(fmt,##x)
#else
#define thread_debug(fmt,x...) do { /* nothing */ } while (0)
#endif

static void
runtests(const config_t *conf)
{
	int i, status;
	double avgtime, ttltime;
	void *region;
	struct timeval *tvp;
	struct timeval total;
#ifdef __FreeBSD__
	int otiming;
#endif
	char using_devicename[CRYPTO_NAME_LEN];
	struct timeval all_start, all_stop, all_duration;
        struct alg *alg = conf->alg;
	int nops = (alg->type==CIPHER) ? 2 * conf->count : conf->count;

	if (conf->verbose>=2)
		printf("# %s starting %d thread%s for %d %s crypts of %7d bytes%s\n",
			conf->req_devicename, conf->thread_count, 
                        conf->thread_count>1 ? "s" : "",
			nops*conf->thread_count, alg->name, conf->size,
			conf->thread_count>1 ? " per thread" : "");

        strncpy (using_devicename, conf->req_devicename, CRYPTO_NAME_LEN);

	if (conf->size % alg->blocksize) {
		if (conf->verbose)
			printf("skipping blocksize %u 'cuz not a multiple of "
				"%s blocksize %u\n",
				conf->size, alg->name, alg->blocksize);
		return;
	}

	region = mmap(NULL, conf->thread_count * sizeof (struct timeval),
			PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0);
	if (region == MAP_FAILED) {
		perror("mmap");
		return;
	}
	tvp = (struct timeval *) region;
	bzero (tvp, conf->thread_count * sizeof (struct timeval));

#ifdef __FreeBSD__
	if (conf->do_profile) {
		otiming = profile_start();
	}
#endif

	if (conf->thread_count > 1) {
		// we need multiple processes, and semaphores
		int sem_id, rc;
		union semun semun; 
		void *thread_info_slab;
		struct sembuf sembuf;
		enum {
			WAIT_FOR_THREAD,
			WAIT_FOR_PARENT,
			SEMSET_COUNT 
		};
		u_short semvalues[SEMSET_COUNT];
		
		sem_id = semget (0, SEMSET_COUNT, IPC_PRIVATE | IPC_CREAT | 0600);
		if (sem_id<0) err (1, "semget failed");

		semvalues[WAIT_FOR_THREAD] = 0;
		semvalues[WAIT_FOR_PARENT] = 0;

		semun.array = semvalues;
		rc = semctl(sem_id, 0, SETALL, semun);
		if (rc<0) err (1, "semctl(SETALL) failed, %d", rc);

		// start all threads and make them wait on their sema
		for (i = 0; i < conf->thread_count; i++)
			if (fork() == 0) {
				// first, let the parent know we are running
				thread_debug ("%d started\n", i);
				sembuf.sem_num = WAIT_FOR_THREAD;
				sembuf.sem_op = 1;
				sembuf.sem_flg = 0;
				rc = semop (sem_id, &sembuf, 1);
				if (rc<0) err (1, "thread %d: returning sema resource failed\n", i);

				// wait for signal to start (get sema)
				thread_debug ("%d waiting for start\n", i);
				sembuf.sem_num = WAIT_FOR_PARENT;
				sembuf.sem_op = -1;
				sembuf.sem_flg = 0;
				rc = semop (sem_id, &sembuf, 1);
				if (rc<0) err (1, "thread %d: waiting on sema failed\n", i);

				// run the test
				thread_debug ("%d running\n", i);
				runtest(conf, &tvp[i], using_devicename);

				// let the paretn process know we are done
				thread_debug ("%d finished\n", i);
				sembuf.sem_num = WAIT_FOR_THREAD;
				sembuf.sem_op = 1;
				sembuf.sem_flg = 0;
				rc = semop (sem_id, &sembuf, 1);
				if (rc<0) err (1, "thread %d: returning sema resource failed\n", i);

				// make sure that parent gets to run before we terminate
				usleep(10);
				exit(0);
			}

		// first, wiat to make sure all threads are running
		thread_debug ("--- wait for all threads to run\n");
		sembuf.sem_num = WAIT_FOR_THREAD;
		sembuf.sem_op = -(conf->thread_count);
		sembuf.sem_flg = 0;
		rc = semop (sem_id, &sembuf, 1);
		if (rc<0) err (1, "waiting on sema failed\n");

		usleep(100);
		gettimeofday(&all_start, NULL);

		// wake them all up
		thread_debug ("--- wake up!\n");
		sembuf.sem_num = WAIT_FOR_PARENT;
		sembuf.sem_op = conf->thread_count;
		sembuf.sem_flg = 0;
		rc = semop (sem_id, &sembuf, 1);
		if (rc<0) err (1, "thread %d: waiting on sema failed\n", i);

		// first, wiat to make sure all threads are running
		thread_debug ("--- wait for all threads to finish\n");
		sembuf.sem_num = WAIT_FOR_THREAD;
		sembuf.sem_op = -(conf->thread_count);
		sembuf.sem_flg = 0;
		rc = semop (sem_id, &sembuf, 1);
		if (rc<0) err (1, "waiting on sema failed\n");

		thread_debug ("--- alldone!\n");

		// take end time
		gettimeofday(&all_stop, NULL);

		// wait for completion
		while (waitpid(WAIT_MYPGRP, &status, 0) != -1)
			;

		semctl (sem_id, 0, IPC_RMID);

	} else {
		// single threaded case
		gettimeofday(&all_start, NULL);
		runtest(conf, tvp, using_devicename);
		gettimeofday(&all_stop, NULL);
	}
	timersub(&all_stop, &all_start, &all_duration);

	// calculate average time taken by each thread to perform the task
	avgtime = 0;
	for (i = 0; i < conf->thread_count; i++) {
		if (!tvp[i].tv_sec && !tvp[i].tv_usec)
			warn ("thread %d took zero time to complete %s", i, alg->name);
		avgtime += (((double)tvp[i].tv_sec * 1000000 + tvp[i].tv_usec) / 1000000);
	}
	avgtime /= conf->thread_count;

	// calculate how much time the whole test took
	ttltime = (((double)all_duration.tv_sec * 1000000 + all_duration.tv_usec) / 1000000);

	if (avgtime) {
		printf("%8s %6.3lf sec, %d thread%s, %7d %6s crypts, %7d bytes, %8.0lf byte/sec, %7.1lf Mb/sec%s\n",
		    using_devicename, 
		    avgtime, conf->thread_count, conf->thread_count>1 ? "s" : "",
		    nops * conf->thread_count, alg->name, 
                    conf->size * conf->thread_count, 
		    (double)nops * conf->size * conf->thread_count / avgtime,
		    (double)nops * conf->size * conf->thread_count / avgtime * 8 / 1024 / 1024,
		    conf->verbose ? " (thread agregate stats)" : "");

		if (conf->verbose) {
		printf("%8s %6.3lf sec, %d thread%s, %7d %6s crypts, %7d bytes, %8.0lf byte/sec, %7.1lf Mb/sec (with thread overhead)\n",
		    using_devicename, 
		    ttltime, conf->thread_count, conf->thread_count>1 ? "s" : "",
		    nops * conf->thread_count, alg->name, 
                    conf->size * conf->thread_count, 
		    (double)nops * conf->size * conf->thread_count / ttltime,
		    (double)nops * conf->size * conf->thread_count / ttltime * 8 / 1024 / 1024);
		}

	} else if (conf->verbose) {
		printf ("average time was zero; statistics are invalid\n");
	}
#ifdef __FreeBSD__
	if (conf->do_profile) {
		profile_end(otiming);
	}
#endif
	fflush(stdout);
}

int
main(int argc, char **argv)
{
	int sizes[128], nsizes = 0;
	int testall = 0;
	int i, ch, otiming;
        config_t *conf = &global_conf;

        conf->cmd = CIOCGSESSION;

	srandom(time(0));

	while ((ch = getopt(argc, argv, "VKDcpzsva:d:bt:S:P:g:G:")) != -1) {
		switch (ch) {
		case 'V': conf->swap_iv = 1; break;
		case 'K': conf->swap_key = 1; break;
		case 'D': conf->swap_data = 1; break;
#ifdef CIOCGSSESSION
		case 's':
			conf->cmd = CIOCGSSESSION;
			break;
#endif
		case 'v':
			conf->verbose++;
			conf->opflags |= COP_LOG_DEBUG;
			break;
		case 'a':
			conf->alg = getalgbyname(optarg);
			if (conf->alg == NULL)
                                usage(argv[0]);
			break;
		case 'd':
			strncpy (conf->req_devicename, optarg, CRYPTO_NAME_LEN);
			break;
		case 'S':
			srandom(atoi(optarg));
			break;
		case 't':
			conf->thread_count = atoi(optarg);
			break;
		case 'z':
			testall = 1;
			break;
		case 'p':
			conf->do_profile = 1;
			break;
		case 'g':
                        conf->data_gen_type = GEN_MAX;
                        if (!strcasecmp(optarg,"random")) {
                                conf->data_gen_type = GEN_RANDOM;

                        } else if (!strcasecmp(optarg,"ramp")) {
                                conf->data_gen_type = GEN_RAMP;

                        } else if (!strcasecmp(optarg,"zero")) {
                                conf->data_gen_type = GEN_ZERO;

                        } else if (!strcasecmp(optarg,"ffff")) {
                                conf->data_gen_type = GEN_FFFF;

                        } else if (optarg && isdigit(optarg[0])) {
                                conf->data_gen_type = atoi(optarg);
                        }
                        
                        if (conf->data_gen_type > GEN_MAX) {
                                fprintf (stderr, 
                                        "# invalid -g option given: %s\n", 
                                        optarg);
                                usage(argv[0]);
                        }
			break;
                case 'G':
                        conf->data_gen_type = GEN_MAX;
                        conf->fd_read_data_from = open (optarg, O_RDONLY);
                        if (conf->fd_read_data_from == -1) {
                                fprintf (stderr, "%s: %s\n", 
                                        optarg, strerror(errno));
                                usage(argv[0]);
                        }
                        break;

		case 'b':
			conf->opflags |= COP_F_BATCH;
			break;
		case 'c':
			conf->verify = 1;
			break;
		case 'P':
#ifdef __FreeBSD__
			otiming = atoi(optarg);
			if (!otiming) {
				fprintf (stderr, "# crypto profiling started\n");
				otiming = profile_start();
				printf ("cryptotest_time=%d\n", otiming);
			} else {
				fprintf (stderr, "# crypto profiling stopped (arg=%d)\n", otiming);
				profile_end(otiming);
			}
			exit (0);
#else
			bail ("option -P only available on FreeBSD\n");
#endif

		default:
			usage(argv[0]);
		}
	}
	argc -= optind, argv += optind;
	if (argc > 0)
		conf->count = atoi(argv[0]);
	while (argc > 1) {
		int s = atoi(argv[1]);
		if (nsizes < N(sizes)) {
			sizes[nsizes++] = s;
		} else {
			printf("Too many sizes, ignoring %u\n", s);
		}
		argc--, argv++;
	}
	if (nsizes == 0) {
		if (conf->alg)
			sizes[nsizes++] = conf->alg->blocksize;
		else
			sizes[nsizes++] = 8;
		if (testall) {
			while (sizes[nsizes-1] < 8*1024) {
				sizes[nsizes] = sizes[nsizes-1]<<1;
				nsizes++;
			}
		}
	}

	if (testall) {
		for (i = 0; i < N(algorithms); i++) {
			int j;
			conf->alg = &algorithms[i];
			for (j = 0; j < nsizes; j++) {
                                conf->size = sizes[j];
				runtests(conf);
                        }
		}
	} else {
                if (conf->alg == NULL)
			conf->alg = getalgbycode(CRYPTO_3DES_CBC);
		for (i = 0; i < nsizes; i++) {
                        conf->size = sizes[i];
                        runtests(conf);
                }
	}

	return (0);
}

static void
hexdump(const char *data, int size)
{
        const char *p;
        unsigned char d;
        int g,i;

        for (g=0, p=data; g<size; g+=16, p+=16) {

                printf("%04x:", g);

                for (i=0; i<16 && (g+i)<size; i++) {
                        d = p[g+i];
                        printf(" %02x", d);
                }
                for (;i<16;i++) {
                        printf("   ");
                }

                printf(" ");

                for (i=0; i<16 && (g+i)<size; i++) {
                        d = p[g+i];
                        if (!isprint(d))
                                d = '.';
                        printf("%c", d);
                }

                printf("\n");
        }
}
