[linux-seccomp.git] / libseccomp / src / hash.c

/**
 * Seccomp Library hash code
 *
 * Release under the Public Domain
 * Author: Bob Jenkins <bob_jenkins@burtleburtle.net>
 */

/*
 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
 *
 * These are functions for producing 32-bit hashes for hash table lookup.
 * jhash_word(), jhash_le(), jhash_be(), mix(), and final() are externally useful
 * functions.  Routines to test the hash are included if SELF_TEST is defined.
 * You can use this free for any purpose.  It's in the public domain.  It has
 * no warranty.
 *
 * You probably want to use jhash_le().  jhash_le() and jhash_be() hash byte
 * arrays.  jhash_le() is is faster than jhash_be() on little-endian machines.
 * Intel and AMD are little-endian machines.
 *
 * If you want to find a hash of, say, exactly 7 integers, do
 *   a = i1;  b = i2;  c = i3;
 *   mix(a,b,c);
 *   a += i4; b += i5; c += i6;
 *   mix(a,b,c);
 *   a += i7;
 *   final(a,b,c);
 *
 * then use c as the hash value.  If you have a variable length array of
 * 4-byte integers to hash, use jhash_word().  If you have a byte array (like
 * a character string), use jhash_le().  If you have several byte arrays, or
 * a mix of things, see the comments above jhash_le().
 *
 * Why is this so big?  I read 12 bytes at a time into 3 4-byte integers, then
 * mix those integers.  This is fast (you can do a lot more thorough mixing
 * with 12*3 instructions on 3 integers than you can with 3 instructions on 1
 * byte), but shoehorning those bytes into integers efficiently is messy.
 */

#include <stdint.h>

#include "arch.h"
#include "hash.h"

#define hashsize(n)	((uint32_t)1<<(n))
#define hashmask(n)	(hashsize(n)-1)
#define rot(x,k)	(((x)<<(k)) | ((x)>>(32-(k))))

/**
 * Mix 3 32-bit values reversibly
 * @param a 32-bit value
 * @param b 32-bit value
 * @param c 32-bit value
 *
 * This is reversible, so any information in (a,b,c) before mix() is still
 * in (a,b,c) after mix().
 *
 * If four pairs of (a,b,c) inputs are run through mix(), or through mix() in
 * reverse, there are at least 32 bits of the output that are sometimes the
 * same for one pair and different for another pair.
 *
 * This was tested for:
 * - pairs that differed by one bit, by two bits, in any combination of top
 *   bits of (a,b,c), or in any combination of bottom bits of (a,b,c).
 * - "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed the
 *   output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly
 *   produced by subtraction) look like a single 1-bit difference.
 * - the base values were pseudorandom, all zero but one bit set, or all zero
 *   plus a counter that starts at zero.
 *
 * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
 * satisfy this are
 *     4  6  8 16 19  4
 *     9 15  3 18 27 15
 *    14  9  3  7 17  3
 *
 * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for "differ"
 * defined as + with a one-bit base and a two-bit delta.  I used
 * http://burtleburtle.net/bob/hash/avalanche.html to choose the operations,
 * constants, and arrangements of the variables.
 *
 * This does not achieve avalanche.  There are input bits of (a,b,c) that fail
 * to affect some output bits of (a,b,c), especially of a.  The most thoroughly
 * mixed value is c, but it doesn't really even achieve avalanche in c.
 *
 * This allows some parallelism.  Read-after-writes are good at doubling the
 * number of bits affected, so the goal of mixing pulls in the opposite
 * direction as the goal of parallelism.  I did what I could.  Rotates seem to
 * cost as much as shifts on every machine I could lay my hands on, and rotates
 * are much kinder to the top and bottom bits, so I used rotates.
 *
 */
#define mix(a,b,c) \
	{ \
		a -= c;  a ^= rot(c, 4);  c += b; \
		b -= a;  b ^= rot(a, 6);  a += c; \
		c -= b;  c ^= rot(b, 8);  b += a; \
		a -= c;  a ^= rot(c,16);  c += b; \
		b -= a;  b ^= rot(a,19);  a += c; \
		c -= b;  c ^= rot(b, 4);  b += a; \
	}

/**
 * Final mixing of 3 32-bit values (a,b,c) into c
 * @param a 32-bit value
 * @param b 32-bit value
 * @param c 32-bit value
 *
 * Pairs of (a,b,c) values differing in only a few bits will usually produce
 * values of c that look totally different.  This was tested for:
 * - pairs that differed by one bit, by two bits, in any combination of top
 *   bits of (a,b,c), or in any combination of bottom bits of (a,b,c).
 * - "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed the
 *   output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly
 *   produced by subtraction) look like a single 1-bit difference.
 * - the base values were pseudorandom, all zero but one bit set, or all zero
 *   plus a counter that starts at zero.
 *
 * These constants passed:
 *  14 11 25 16 4 14 24
 *  12 14 25 16 4 14 24
 * and these came close:
 *   4  8 15 26 3 22 24
 *  10  8 15 26 3 22 24
 *  11  8 15 26 3 22 24
 *
 */
#define final(a,b,c) \
	{ \
		c ^= b; c -= rot(b,14); \
		a ^= c; a -= rot(c,11); \
		b ^= a; b -= rot(a,25); \
		c ^= b; c -= rot(b,16); \
		a ^= c; a -= rot(c,4);  \
		b ^= a; b -= rot(a,14); \
		c ^= b; c -= rot(b,24); \
	}

/**
 * Hash an array of 32-bit values
 * @param k the key, an array of uint32_t values
 * @param length the number of array elements
 * @param initval the previous hash, or an arbitrary value
 *
 * This works on all machines.  To be useful, it requires:
 * - that the key be an array of uint32_t's, and
 * - that the length be the number of uint32_t's in the key
 *
 * The function jhash_word() is identical to jhash_le() on little-endian
 * machines, and identical to jhash_be() on big-endian machines, except that
 * the length has to be measured in uint32_ts rather than in bytes.  jhash_le()
 * is more complicated than jhash_word() only because jhash_le() has to dance
 * around fitting the key bytes into registers.
 *
 */
static uint32_t jhash_word(const uint32_t *k, size_t length, uint32_t initval)
{
	uint32_t a, b, c;

	/* set up the internal state */
	a = b = c = 0xdeadbeef + (((uint32_t)length) << 2) + initval;

	/* handle most of the key */
	while (length > 3) {
		a += k[0];
		b += k[1];
		c += k[2];
		mix(a, b, c);
		length -= 3;
		k += 3;
	}

	/* handle the last 3 uint32_t's */
	switch(length) {
	case 3 :
		c += k[2];
	case 2 :
		b += k[1];
	case 1 :
		a += k[0];
		final(a, b, c);
	case 0:
		/* nothing left to add */
		break;
	}

	return c;
}

/**
 * Hash a variable-length key into a 32-bit value
 * @param key the key (the unaligned variable-length array of bytes)
 * @param length the length of the key, counting by bytes
 * @param initval can be any 4-byte value
 *
 * Returns a 32-bit value.  Every bit of the key affects every bit of the
 * return value.  Two keys differing by one or two bits will have totally
 * different hash values.
 *
 * The best hash table sizes are powers of 2.  There is no need to do mod a
 * prime (mod is sooo slow!).  If you need less than 32 bits, use a bitmask.
 * For example, if you need only 10 bits, do:
 *   h = (h & hashmask(10));
 * In which case, the hash table should have hashsize(10) elements.
 *
 * If you are hashing n strings (uint8_t **)k, do it like this:
 *   for (i=0, h=0; i<n; ++i) h = jhash_le( k[i], len[i], h);
 *
 */
static uint32_t jhash_le(const void *key, size_t length, uint32_t initval)
{
	uint32_t a, b, c;
	union {
		const void *ptr;
		size_t i;
	} u;     /* needed for Mac Powerbook G4 */

	/* set up the internal state */
	a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;

	u.ptr = key;
	if ((arch_def_native->endian == ARCH_ENDIAN_LITTLE) &&
	    ((u.i & 0x3) == 0)) {
		/* read 32-bit chunks */
		const uint32_t *k = (const uint32_t *)key;

		while (length > 12) {
			a += k[0];
			b += k[1];
			c += k[2];
			mix(a, b, c);
			length -= 12;
			k += 3;
		}

		/* "k[2]&0xffffff" actually reads beyond the end of the string,
		 * but then masks off the part it's not allowed to read.
		 * Because the string is aligned, the masked-off tail is in the
		 * same word as the rest of the string.  Every machine with
		 * memory protection I've seen does it on word boundaries, so
		 * is OK with this.  But VALGRIND will still catch it and
		 * complain.  The masking trick does make the hash noticably
		 * faster for short strings (like English words). */
#ifndef VALGRIND

		switch(length) {
		case 12:
			c += k[2];
			b += k[1];
			a += k[0];
			break;
		case 11:
			c += k[2] & 0xffffff;
			b += k[1];
			a += k[0];
			break;
		case 10:
			c += k[2] & 0xffff;
			b += k[1];
			a += k[0];
			break;
		case 9 :
			c += k[2] & 0xff;
			b += k[1];
			a += k[0];
			break;
		case 8 :
			b += k[1];
			a += k[0];
			break;
		case 7 :
			b += k[1] & 0xffffff;
			a += k[0];
			break;
		case 6 :
			b += k[1] & 0xffff;
			a += k[0];
			break;
		case 5 :
			b += k[1] & 0xff;
			a += k[0];
			break;
		case 4 :
			a += k[0];
			break;
		case 3 :
			a += k[0] & 0xffffff;
			break;
		case 2 :
			a += k[0] & 0xffff;
			break;
		case 1 :
			a += k[0] & 0xff;
			break;
		case 0 :
			/* zero length strings require no mixing */
			return c;
		}

#else /* make valgrind happy */

		k8 = (const uint8_t *)k;
		switch(length) {
		case 12:
			c += k[2];
			b += k[1];
			a += k[0];
			break;
		case 11:
			c += ((uint32_t)k8[10]) << 16;
		case 10:
			c += ((uint32_t)k8[9]) << 8;
		case 9 :
			c += k8[8];
		case 8 :
			b += k[1];
			a += k[0];
			break;
		case 7 :
			b += ((uint32_t)k8[6]) << 16;
		case 6 :
			b += ((uint32_t)k8[5]) << 8;
		case 5 :
			b += k8[4];
		case 4 :
			a += k[0];
			break;
		case 3 :
			a += ((uint32_t)k8[2]) << 16;
		case 2 :
			a += ((uint32_t)k8[1]) << 8;
		case 1 :
			a += k8[0];
			break;
		case 0 :
			return c;
		}

#endif /* !valgrind */

	} else if ((arch_def_native->endian == ARCH_ENDIAN_LITTLE) &&
		   ((u.i & 0x1) == 0)) {
		/* read 16-bit chunks */
		const uint16_t *k = (const uint16_t *)key;
		const uint8_t  *k8;

		while (length > 12) {
			a += k[0] + (((uint32_t)k[1]) << 16);
			b += k[2] + (((uint32_t)k[3]) << 16);
			c += k[4] + (((uint32_t)k[5]) << 16);
			mix(a, b, c);
			length -= 12;
			k += 6;
		}

		k8 = (const uint8_t *)k;
		switch(length) {
		case 12:
			c += k[4] + (((uint32_t)k[5]) << 16);
			b += k[2] + (((uint32_t)k[3]) << 16);
			a += k[0] + (((uint32_t)k[1]) << 16);
			break;
		case 11:
			c += ((uint32_t)k8[10]) << 16;
		case 10:
			c += k[4];
			b += k[2] + (((uint32_t)k[3]) << 16);
			a += k[0] + (((uint32_t)k[1]) << 16);
			break;
		case 9 :
			c += k8[8];
		case 8 :
			b += k[2] + (((uint32_t)k[3]) << 16);
			a += k[0] + (((uint32_t)k[1]) << 16);
			break;
		case 7 :
			b += ((uint32_t)k8[6]) << 16;
		case 6 :
			b += k[2];
			a += k[0] + (((uint32_t)k[1]) << 16);
			break;
		case 5 :
			b += k8[4];
		case 4 :
			a += k[0] + (((uint32_t)k[1]) << 16);
			break;
		case 3 :
			a += ((uint32_t)k8[2]) << 16;
		case 2 :
			a += k[0];
			break;
		case 1 :
			a += k8[0];
			break;
		case 0 :
			/* zero length requires no mixing */
			return c;
		}

	} else {
		/* need to read the key one byte at a time */
		const uint8_t *k = (const uint8_t *)key;

		while (length > 12) {
			a += k[0];
			a += ((uint32_t)k[1]) << 8;
			a += ((uint32_t)k[2]) << 16;
			a += ((uint32_t)k[3]) << 24;
			b += k[4];
			b += ((uint32_t)k[5]) << 8;
			b += ((uint32_t)k[6]) << 16;
			b += ((uint32_t)k[7]) << 24;
			c += k[8];
			c += ((uint32_t)k[9]) << 8;
			c += ((uint32_t)k[10]) << 16;
			c += ((uint32_t)k[11]) << 24;
			mix(a, b, c);
			length -= 12;
			k += 12;
		}

		switch(length) {
		case 12:
			c += ((uint32_t)k[11]) << 24;
		case 11:
			c += ((uint32_t)k[10]) << 16;
		case 10:
			c += ((uint32_t)k[9]) << 8;
		case 9 :
			c += k[8];
		case 8 :
			b += ((uint32_t)k[7]) << 24;
		case 7 :
			b += ((uint32_t)k[6]) << 16;
		case 6 :
			b += ((uint32_t)k[5]) << 8;
		case 5 :
			b += k[4];
		case 4 :
			a += ((uint32_t)k[3]) << 24;
		case 3 :
			a += ((uint32_t)k[2]) << 16;
		case 2 :
			a += ((uint32_t)k[1]) << 8;
		case 1 :
			a += k[0];
			break;
		case 0 :
			return c;
		}
	}

	final(a, b, c);
	return c;
}

/**
 * Hash a variable-length key into a 32-bit value
 * @param key the key (the unaligned variable-length array of bytes)
 * @param length the length of the key, counting by bytes
 * @param initval can be any 4-byte value
 *
 * This is the same as jhash_word() on big-endian machines.  It is different
 * from jhash_le() on all machines.  jhash_be() takes advantage of big-endian
 * byte ordering.
 *
 */
static uint32_t jhash_be( const void *key, size_t length, uint32_t initval)
{
	uint32_t a, b, c;
	union {
		const void *ptr;
		size_t i;
	} u; /* to cast key to (size_t) happily */

	/* set up the internal state */
	a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;

	u.ptr = key;
	if ((arch_def_native->endian == ARCH_ENDIAN_BIG) &&
	    ((u.i & 0x3) == 0)) {
		/* read 32-bit chunks */
		const uint32_t *k = (const uint32_t *)key;

		while (length > 12) {
			a += k[0];
			b += k[1];
			c += k[2];
			mix(a, b, c);
			length -= 12;
			k += 3;
		}

		/* "k[2]<<8" actually reads beyond the end of the string, but
		 * then shifts out the part it's not allowed to read.  Because
		 * the string is aligned, the illegal read is in the same word
		 * as the rest of the string.  Every machine with memory
		 * protection I've seen does it on word boundaries, so is OK
		 * with this.  But VALGRIND will still catch it and complain.
		 * The masking trick does make the hash noticably faster for
		 * short strings (like English words). */
#ifndef VALGRIND

		switch(length) {
		case 12:
			c += k[2];
			b += k[1];
			a += k[0];
			break;
		case 11:
			c += k[2] & 0xffffff00;
			b += k[1];
			a += k[0];
			break;
		case 10:
			c += k[2] & 0xffff0000;
			b += k[1];
			a += k[0];
			break;
		case 9 :
			c += k[2] & 0xff000000;
			b += k[1];
			a += k[0];
			break;
		case 8 :
			b += k[1];
			a += k[0];
			break;
		case 7 :
			b += k[1] & 0xffffff00;
			a += k[0];
			break;
		case 6 :
			b += k[1] & 0xffff0000;
			a += k[0];
			break;
		case 5 :
			b += k[1] & 0xff000000;
			a += k[0];
			break;
		case 4 :
			a += k[0];
			break;
		case 3 :
			a += k[0] & 0xffffff00;
			break;
		case 2 :
			a += k[0] & 0xffff0000;
			break;
		case 1 :
			a += k[0] & 0xff000000;
			break;
		case 0 :
			/* zero length strings require no mixing */
			return c;
		}

#else  /* make valgrind happy */

		k8 = (const uint8_t *)k;
		switch(length) {
		case 12:
			c += k[2];
			b += k[1];
			a += k[0];
			break;
		case 11:
			c += ((uint32_t)k8[10]) << 8;
		case 10:
			c += ((uint32_t)k8[9]) << 16;
		case 9 :
			c += ((uint32_t)k8[8]) << 24;
		case 8 :
			b += k[1];
			a += k[0];
			break;
		case 7 :
			b += ((uint32_t)k8[6]) << 8;
		case 6 :
			b += ((uint32_t)k8[5]) << 16;
		case 5 :
			b += ((uint32_t)k8[4]) << 24;
		case 4 :
			a += k[0];
			break;
		case 3 :
			a += ((uint32_t)k8[2]) << 8;
		case 2 :
			a += ((uint32_t)k8[1]) << 16;
		case 1 :
			a += ((uint32_t)k8[0]) << 24;
			break;
		case 0 :
			return c;
		}

#endif /* !VALGRIND */

	} else {
		/* need to read the key one byte at a time */
		const uint8_t *k = (const uint8_t *)key;

		while (length > 12) {
			a += ((uint32_t)k[0]) << 24;
			a += ((uint32_t)k[1]) << 16;
			a += ((uint32_t)k[2]) << 8;
			a += ((uint32_t)k[3]);
			b += ((uint32_t)k[4]) << 24;
			b += ((uint32_t)k[5]) << 16;
			b += ((uint32_t)k[6]) << 8;
			b += ((uint32_t)k[7]);
			c += ((uint32_t)k[8]) << 24;
			c += ((uint32_t)k[9]) << 16;
			c += ((uint32_t)k[10]) << 8;
			c += ((uint32_t)k[11]);
			mix(a, b, c);
			length -= 12;
			k += 12;
		}

		switch(length) {
		case 12:
			c += k[11];
		case 11:
			c += ((uint32_t)k[10]) << 8;
		case 10:
			c += ((uint32_t)k[9]) << 16;
		case 9 :
			c += ((uint32_t)k[8]) << 24;
		case 8 :
			b += k[7];
		case 7 :
			b += ((uint32_t)k[6]) << 8;
		case 6 :
			b += ((uint32_t)k[5]) << 16;
		case 5 :
			b += ((uint32_t)k[4]) << 24;
		case 4 :
			a += k[3];
		case 3 :
			a += ((uint32_t)k[2]) << 8;
		case 2 :
			a += ((uint32_t)k[1]) << 16;
		case 1 :
			a += ((uint32_t)k[0]) << 24;
			break;
		case 0 :
			return c;
		}
	}

	final(a, b, c);
	return c;
}

/**
 * Hash a variable-length key into a 32-bit value
 * @param key the key (the unaligned variable-length array of bytes)
 * @param length the length of the key, counting by bytes
 * @param initval can be any 4-byte value
 *
 * A small wrapper function that selects the proper hash function based on the
 * native machine's byte-ordering.
 *
 */
uint32_t jhash(const void *key, size_t length, uint32_t initval)
{
	if (length % sizeof(uint32_t) == 0)
		return jhash_word(key, (length / sizeof(uint32_t)), initval);
	else if (arch_def_native->endian == ARCH_ENDIAN_BIG)
		return jhash_be(key, length, initval);
	else
		return jhash_le(key, length, initval);
}
Commit	Line	Data
8befd5cc MG	1	/**
	2	* Seccomp Library hash code
	3	*
	4	* Release under the Public Domain
	5	* Author: Bob Jenkins <bob_jenkins@burtleburtle.net>
	6	*/
	7
	8	/*
	9	* lookup3.c, by Bob Jenkins, May 2006, Public Domain.
	10	*
	11	* These are functions for producing 32-bit hashes for hash table lookup.
	12	* jhash_word(), jhash_le(), jhash_be(), mix(), and final() are externally useful
	13	* functions. Routines to test the hash are included if SELF_TEST is defined.
	14	* You can use this free for any purpose. It's in the public domain. It has
	15	* no warranty.
	16	*
	17	* You probably want to use jhash_le(). jhash_le() and jhash_be() hash byte
	18	* arrays. jhash_le() is is faster than jhash_be() on little-endian machines.
	19	* Intel and AMD are little-endian machines.
	20	*
	21	* If you want to find a hash of, say, exactly 7 integers, do
	22	* a = i1; b = i2; c = i3;
	23	* mix(a,b,c);
	24	* a += i4; b += i5; c += i6;
	25	* mix(a,b,c);
	26	* a += i7;
	27	* final(a,b,c);
	28	*
	29	* then use c as the hash value. If you have a variable length array of
	30	* 4-byte integers to hash, use jhash_word(). If you have a byte array (like
	31	* a character string), use jhash_le(). If you have several byte arrays, or
	32	* a mix of things, see the comments above jhash_le().
	33	*
	34	* Why is this so big? I read 12 bytes at a time into 3 4-byte integers, then
	35	* mix those integers. This is fast (you can do a lot more thorough mixing
	36	* with 12*3 instructions on 3 integers than you can with 3 instructions on 1
	37	* byte), but shoehorning those bytes into integers efficiently is messy.
	38	*/
	39
	40	#include <stdint.h>
	41
	42	#include "arch.h"
	43	#include "hash.h"
	44
	45	#define hashsize(n) ((uint32_t)1<<(n))
	46	#define hashmask(n) (hashsize(n)-1)
	47	#define rot(x,k) (((x)<<(k)) \| ((x)>>(32-(k))))
	48
	49	/**
	50	* Mix 3 32-bit values reversibly
	51	* @param a 32-bit value
	52	* @param b 32-bit value
	53	* @param c 32-bit value
	54	*
	55	* This is reversible, so any information in (a,b,c) before mix() is still
	56	* in (a,b,c) after mix().
	57	*
	58	* If four pairs of (a,b,c) inputs are run through mix(), or through mix() in
	59	* reverse, there are at least 32 bits of the output that are sometimes the
	60	* same for one pair and different for another pair.
	61	*
	62	* This was tested for:
	63	* - pairs that differed by one bit, by two bits, in any combination of top
	64	* bits of (a,b,c), or in any combination of bottom bits of (a,b,c).
65	* - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed the
66	* output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly
67	* produced by subtraction) look like a single 1-bit difference.
68	* - the base values were pseudorandom, all zero but one bit set, or all zero
69	* plus a counter that starts at zero.
70	*
71	* Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
72	* satisfy this are
73	* 4 6 8 16 19 4
74	* 9 15 3 18 27 15
75	* 14 9 3 7 17 3
76	*
77	* Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for "differ"
78	* defined as + with a one-bit base and a two-bit delta. I used
79	* http://burtleburtle.net/bob/hash/avalanche.html to choose the operations,
80	* constants, and arrangements of the variables.
81	*
82	* This does not achieve avalanche. There are input bits of (a,b,c) that fail
83	* to affect some output bits of (a,b,c), especially of a. The most thoroughly
84	* mixed value is c, but it doesn't really even achieve avalanche in c.
85	*
86	* This allows some parallelism. Read-after-writes are good at doubling the
87	* number of bits affected, so the goal of mixing pulls in the opposite
88	* direction as the goal of parallelism. I did what I could. Rotates seem to
89	* cost as much as shifts on every machine I could lay my hands on, and rotates
90	* are much kinder to the top and bottom bits, so I used rotates.
91	*
92	*/
93	#define mix(a,b,c) \
94	{ \
95	a -= c; a ^= rot(c, 4); c += b; \
96	b -= a; b ^= rot(a, 6); a += c; \
97	c -= b; c ^= rot(b, 8); b += a; \
98	a -= c; a ^= rot(c,16); c += b; \
99	b -= a; b ^= rot(a,19); a += c; \
100	c -= b; c ^= rot(b, 4); b += a; \
101	}
102
103	/**
104	* Final mixing of 3 32-bit values (a,b,c) into c
105	* @param a 32-bit value
106	* @param b 32-bit value
107	* @param c 32-bit value
108	*
109	* Pairs of (a,b,c) values differing in only a few bits will usually produce
110	* values of c that look totally different. This was tested for:
111	* - pairs that differed by one bit, by two bits, in any combination of top
112	* bits of (a,b,c), or in any combination of bottom bits of (a,b,c).
113	* - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed the
114	* output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly
115	* produced by subtraction) look like a single 1-bit difference.
116	* - the base values were pseudorandom, all zero but one bit set, or all zero
117	* plus a counter that starts at zero.
118	*
119	* These constants passed:
120	* 14 11 25 16 4 14 24
121	* 12 14 25 16 4 14 24
122	* and these came close:
123	* 4 8 15 26 3 22 24
124	* 10 8 15 26 3 22 24
125	* 11 8 15 26 3 22 24
126	*
127	*/
128	#define final(a,b,c) \
129	{ \
130	c ^= b; c -= rot(b,14); \
131	a ^= c; a -= rot(c,11); \
132	b ^= a; b -= rot(a,25); \
133	c ^= b; c -= rot(b,16); \
134	a ^= c; a -= rot(c,4); \
135	b ^= a; b -= rot(a,14); \
136	c ^= b; c -= rot(b,24); \
137	}
138
139	/**
140	* Hash an array of 32-bit values
141	* @param k the key, an array of uint32_t values
142	* @param length the number of array elements
143	* @param initval the previous hash, or an arbitrary value
144	*
145	* This works on all machines. To be useful, it requires:
146	* - that the key be an array of uint32_t's, and
147	* - that the length be the number of uint32_t's in the key
148	*
149	* The function jhash_word() is identical to jhash_le() on little-endian
150	* machines, and identical to jhash_be() on big-endian machines, except that
151	* the length has to be measured in uint32_ts rather than in bytes. jhash_le()
152	* is more complicated than jhash_word() only because jhash_le() has to dance
153	* around fitting the key bytes into registers.
154	*
155	*/
156	static uint32_t jhash_word(const uint32_t *k, size_t length, uint32_t initval)
157	{
158	uint32_t a, b, c;
159
160	/* set up the internal state */
161	a = b = c = 0xdeadbeef + (((uint32_t)length) << 2) + initval;
162
163	/* handle most of the key */
164	while (length > 3) {
165	a += k[0];
166	b += k[1];
167	c += k[2];
168	mix(a, b, c);
169	length -= 3;
170	k += 3;
171	}
172
173	/* handle the last 3 uint32_t's */
174	switch(length) {
175	case 3 :
176	c += k[2];
177	case 2 :
178	b += k[1];
179	case 1 :
180	a += k[0];
181	final(a, b, c);
182	case 0:
183	/* nothing left to add */
184	break;
185	}
186
187	return c;
188	}
189
190	/**
191	* Hash a variable-length key into a 32-bit value
192	* @param key the key (the unaligned variable-length array of bytes)
193	* @param length the length of the key, counting by bytes
194	* @param initval can be any 4-byte value
195	*
196	* Returns a 32-bit value. Every bit of the key affects every bit of the
197	* return value. Two keys differing by one or two bits will have totally
198	* different hash values.
199	*
200	* The best hash table sizes are powers of 2. There is no need to do mod a
201	* prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask.
202	* For example, if you need only 10 bits, do:
203	* h = (h & hashmask(10));
204	* In which case, the hash table should have hashsize(10) elements.
205	*
206	* If you are hashing n strings (uint8_t **)k, do it like this:
207	* for (i=0, h=0; i<n; ++i) h = jhash_le( k[i], len[i], h);
208	*
209	*/
210	static uint32_t jhash_le(const void *key, size_t length, uint32_t initval)
211	{
212	uint32_t a, b, c;
213	union {
214	const void *ptr;
215	size_t i;
216	} u; /* needed for Mac Powerbook G4 */
217
218	/* set up the internal state */
219	a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
220
221	u.ptr = key;
222	if ((arch_def_native->endian == ARCH_ENDIAN_LITTLE) &&
223	((u.i & 0x3) == 0)) {
224	/* read 32-bit chunks */
225	const uint32_t k = (const uint32_t )key;
226
227	while (length > 12) {
228	a += k[0];
229	b += k[1];
230	c += k[2];
231	mix(a, b, c);
232	length -= 12;
233	k += 3;
234	}
235
236	/* "k[2]&0xffffff" actually reads beyond the end of the string,
237	* but then masks off the part it's not allowed to read.
238	* Because the string is aligned, the masked-off tail is in the
239	* same word as the rest of the string. Every machine with
240	* memory protection I've seen does it on word boundaries, so
241	* is OK with this. But VALGRIND will still catch it and
242	* complain. The masking trick does make the hash noticably
243	* faster for short strings (like English words). */
244	#ifndef VALGRIND
245
246	switch(length) {
247	case 12:
248	c += k[2];
249	b += k[1];
250	a += k[0];
251	break;
252	case 11:
253	c += k[2] & 0xffffff;
254	b += k[1];
255	a += k[0];
256	break;
257	case 10:
258	c += k[2] & 0xffff;
259	b += k[1];
260	a += k[0];
261	break;
262	case 9 :
263	c += k[2] & 0xff;
264	b += k[1];
265	a += k[0];
266	break;
267	case 8 :
268	b += k[1];
269	a += k[0];
270	break;
271	case 7 :
272	b += k[1] & 0xffffff;
273	a += k[0];
274	break;
275	case 6 :
276	b += k[1] & 0xffff;
277	a += k[0];
278	break;
279	case 5 :
280	b += k[1] & 0xff;
281	a += k[0];
282	break;
283	case 4 :
284	a += k[0];
285	break;
286	case 3 :
287	a += k[0] & 0xffffff;
288	break;
289	case 2 :
290	a += k[0] & 0xffff;
291	break;
292	case 1 :
293	a += k[0] & 0xff;
294	break;
295	case 0 :
296	/* zero length strings require no mixing */
297	return c;
298	}
299
300	#else /* make valgrind happy */
301
302	k8 = (const uint8_t *)k;
303	switch(length) {
304	case 12:
305	c += k[2];
306	b += k[1];
307	a += k[0];
308	break;
309	case 11:
310	c += ((uint32_t)k8[10]) << 16;
311	case 10:
312	c += ((uint32_t)k8[9]) << 8;
313	case 9 :
314	c += k8[8];
315	case 8 :
316	b += k[1];
317	a += k[0];
318	break;
319	case 7 :
320	b += ((uint32_t)k8[6]) << 16;
321	case 6 :
322	b += ((uint32_t)k8[5]) << 8;
323	case 5 :
324	b += k8[4];
325	case 4 :
326	a += k[0];
327	break;
328	case 3 :
329	a += ((uint32_t)k8[2]) << 16;
330	case 2 :
331	a += ((uint32_t)k8[1]) << 8;
332	case 1 :
333	a += k8[0];
334	break;
335	case 0 :
336	return c;
337	}
338
339	#endif /* !valgrind */
340
341	} else if ((arch_def_native->endian == ARCH_ENDIAN_LITTLE) &&
342	((u.i & 0x1) == 0)) {
343	/* read 16-bit chunks */
344	const uint16_t k = (const uint16_t )key;
345	const uint8_t *k8;
346
347	while (length > 12) {
348	a += k[0] + (((uint32_t)k[1]) << 16);
349	b += k[2] + (((uint32_t)k[3]) << 16);
350	c += k[4] + (((uint32_t)k[5]) << 16);
351	mix(a, b, c);
352	length -= 12;
353	k += 6;
354	}
355
356	k8 = (const uint8_t *)k;
357	switch(length) {
358	case 12:
359	c += k[4] + (((uint32_t)k[5]) << 16);
360	b += k[2] + (((uint32_t)k[3]) << 16);
361	a += k[0] + (((uint32_t)k[1]) << 16);
362	break;
363	case 11:
364	c += ((uint32_t)k8[10]) << 16;
365	case 10:
366	c += k[4];
367	b += k[2] + (((uint32_t)k[3]) << 16);
368	a += k[0] + (((uint32_t)k[1]) << 16);
369	break;
370	case 9 :
371	c += k8[8];
372	case 8 :
373	b += k[2] + (((uint32_t)k[3]) << 16);
374	a += k[0] + (((uint32_t)k[1]) << 16);
375	break;
376	case 7 :
377	b += ((uint32_t)k8[6]) << 16;
378	case 6 :
379	b += k[2];
380	a += k[0] + (((uint32_t)k[1]) << 16);
381	break;
382	case 5 :
383	b += k8[4];
384	case 4 :
385	a += k[0] + (((uint32_t)k[1]) << 16);
386	break;
387	case 3 :
388	a += ((uint32_t)k8[2]) << 16;
389	case 2 :
390	a += k[0];
391	break;
392	case 1 :
393	a += k8[0];
394	break;
395	case 0 :
396	/* zero length requires no mixing */
397	return c;
398	}
399
400	} else {
401	/* need to read the key one byte at a time */
402	const uint8_t k = (const uint8_t )key;
403
404	while (length > 12) {
405	a += k[0];
406	a += ((uint32_t)k[1]) << 8;
407	a += ((uint32_t)k[2]) << 16;
408	a += ((uint32_t)k[3]) << 24;
409	b += k[4];
410	b += ((uint32_t)k[5]) << 8;
411	b += ((uint32_t)k[6]) << 16;
412	b += ((uint32_t)k[7]) << 24;
413	c += k[8];
414	c += ((uint32_t)k[9]) << 8;
415	c += ((uint32_t)k[10]) << 16;
416	c += ((uint32_t)k[11]) << 24;
417	mix(a, b, c);
418	length -= 12;
419	k += 12;
420	}
421
422	switch(length) {
423	case 12:
424	c += ((uint32_t)k[11]) << 24;
425	case 11:
426	c += ((uint32_t)k[10]) << 16;
427	case 10:
428	c += ((uint32_t)k[9]) << 8;
429	case 9 :
430	c += k[8];
431	case 8 :
432	b += ((uint32_t)k[7]) << 24;
433	case 7 :
434	b += ((uint32_t)k[6]) << 16;
435	case 6 :
436	b += ((uint32_t)k[5]) << 8;
437	case 5 :
438	b += k[4];
439	case 4 :
440	a += ((uint32_t)k[3]) << 24;
441	case 3 :
442	a += ((uint32_t)k[2]) << 16;
443	case 2 :
444	a += ((uint32_t)k[1]) << 8;
445	case 1 :
446	a += k[0];
447	break;
448	case 0 :
449	return c;
450	}
451	}
452
453	final(a, b, c);
454	return c;
455	}
456
457	/**
458	* Hash a variable-length key into a 32-bit value
459	* @param key the key (the unaligned variable-length array of bytes)
460	* @param length the length of the key, counting by bytes
461	* @param initval can be any 4-byte value
462	*
463	* This is the same as jhash_word() on big-endian machines. It is different
464	* from jhash_le() on all machines. jhash_be() takes advantage of big-endian
465	* byte ordering.
466	*
467	*/
468	static uint32_t jhash_be( const void *key, size_t length, uint32_t initval)
469	{
470	uint32_t a, b, c;
471	union {
472	const void *ptr;
473	size_t i;
474	} u; /* to cast key to (size_t) happily */
475
476	/* set up the internal state */
477	a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
478
479	u.ptr = key;
480	if ((arch_def_native->endian == ARCH_ENDIAN_BIG) &&
481	((u.i & 0x3) == 0)) {
482	/* read 32-bit chunks */
483	const uint32_t k = (const uint32_t )key;
484
485	while (length > 12) {
486	a += k[0];
487	b += k[1];
488	c += k[2];
489	mix(a, b, c);
490	length -= 12;
491	k += 3;
492	}
493
494	/* "k[2]<<8" actually reads beyond the end of the string, but
495	* then shifts out the part it's not allowed to read. Because
496	* the string is aligned, the illegal read is in the same word
497	* as the rest of the string. Every machine with memory
498	* protection I've seen does it on word boundaries, so is OK
499	* with this. But VALGRIND will still catch it and complain.
500	* The masking trick does make the hash noticably faster for
501	* short strings (like English words). */
502	#ifndef VALGRIND
503
504	switch(length) {
505	case 12:
506	c += k[2];
507	b += k[1];
508	a += k[0];
509	break;
510	case 11:
511	c += k[2] & 0xffffff00;
512	b += k[1];
513	a += k[0];
514	break;
515	case 10:
516	c += k[2] & 0xffff0000;
517	b += k[1];
518	a += k[0];
519	break;
520	case 9 :
521	c += k[2] & 0xff000000;
522	b += k[1];
523	a += k[0];
524	break;
525	case 8 :
526	b += k[1];
527	a += k[0];
528	break;
529	case 7 :
530	b += k[1] & 0xffffff00;
531	a += k[0];
532	break;
533	case 6 :
534	b += k[1] & 0xffff0000;
535	a += k[0];
536	break;
537	case 5 :
538	b += k[1] & 0xff000000;
539	a += k[0];
540	break;
541	case 4 :
542	a += k[0];
543	break;
544	case 3 :
545	a += k[0] & 0xffffff00;
546	break;
547	case 2 :
548	a += k[0] & 0xffff0000;
549	break;
550	case 1 :
551	a += k[0] & 0xff000000;
552	break;
553	case 0 :
554	/* zero length strings require no mixing */
555	return c;
556	}
557
558	#else /* make valgrind happy */
559
560	k8 = (const uint8_t *)k;
561	switch(length) {
562	case 12:
563	c += k[2];
564	b += k[1];
565	a += k[0];
566	break;
567	case 11:
568	c += ((uint32_t)k8[10]) << 8;
569	case 10:
570	c += ((uint32_t)k8[9]) << 16;
571	case 9 :
572	c += ((uint32_t)k8[8]) << 24;
573	case 8 :
574	b += k[1];
575	a += k[0];
576	break;
577	case 7 :
578	b += ((uint32_t)k8[6]) << 8;
579	case 6 :
580	b += ((uint32_t)k8[5]) << 16;
581	case 5 :
582	b += ((uint32_t)k8[4]) << 24;
583	case 4 :
584	a += k[0];
585	break;
586	case 3 :
587	a += ((uint32_t)k8[2]) << 8;
588	case 2 :
589	a += ((uint32_t)k8[1]) << 16;
590	case 1 :
591	a += ((uint32_t)k8[0]) << 24;
592	break;
593	case 0 :
594	return c;
595	}
596
597	#endif /* !VALGRIND */
598
599	} else {
600	/* need to read the key one byte at a time */
601	const uint8_t k = (const uint8_t )key;
602
603	while (length > 12) {
604	a += ((uint32_t)k[0]) << 24;
605	a += ((uint32_t)k[1]) << 16;
606	a += ((uint32_t)k[2]) << 8;
607	a += ((uint32_t)k[3]);
608	b += ((uint32_t)k[4]) << 24;
609	b += ((uint32_t)k[5]) << 16;
610	b += ((uint32_t)k[6]) << 8;
611	b += ((uint32_t)k[7]);
612	c += ((uint32_t)k[8]) << 24;
613	c += ((uint32_t)k[9]) << 16;
614	c += ((uint32_t)k[10]) << 8;
615	c += ((uint32_t)k[11]);
616	mix(a, b, c);
617	length -= 12;
618	k += 12;
619	}
620
621	switch(length) {
622	case 12:
623	c += k[11];
624	case 11:
625	c += ((uint32_t)k[10]) << 8;
626	case 10:
627	c += ((uint32_t)k[9]) << 16;
628	case 9 :
629	c += ((uint32_t)k[8]) << 24;
630	case 8 :
631	b += k[7];
632	case 7 :
633	b += ((uint32_t)k[6]) << 8;
634	case 6 :
635	b += ((uint32_t)k[5]) << 16;
636	case 5 :
637	b += ((uint32_t)k[4]) << 24;
638	case 4 :
639	a += k[3];
640	case 3 :
641	a += ((uint32_t)k[2]) << 8;
642	case 2 :
643	a += ((uint32_t)k[1]) << 16;
644	case 1 :
645	a += ((uint32_t)k[0]) << 24;
646	break;
647	case 0 :
648	return c;
649	}
650	}
651
652	final(a, b, c);
653	return c;
654	}
655
656	/**
657	* Hash a variable-length key into a 32-bit value
658	* @param key the key (the unaligned variable-length array of bytes)
659	* @param length the length of the key, counting by bytes
660	* @param initval can be any 4-byte value
661	*
662	* A small wrapper function that selects the proper hash function based on the
663	* native machine's byte-ordering.
664	*
665	*/
666	uint32_t jhash(const void *key, size_t length, uint32_t initval)
667	{
668	if (length % sizeof(uint32_t) == 0)
669	return jhash_word(key, (length / sizeof(uint32_t)), initval);
670	else if (arch_def_native->endian == ARCH_ENDIAN_BIG)
671	return jhash_be(key, length, initval);
672	else
673	return jhash_le(key, length, initval);
674	}