Source file src/runtime/hash_test.go

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"encoding/binary"
     9  	"fmt"
    10  	"internal/race"
    11  	"internal/testenv"
    12  	"math"
    13  	"math/rand"
    14  	"os"
    15  	. "runtime"
    16  	"slices"
    17  	"strings"
    18  	"testing"
    19  	"unsafe"
    20  )
    21  
    22  func TestMemHash32Equality(t *testing.T) {
    23  	if *UseAeshash {
    24  		t.Skip("skipping since AES hash implementation is used")
    25  	}
    26  	var b [4]byte
    27  	r := rand.New(rand.NewSource(1234))
    28  	seed := uintptr(r.Uint64())
    29  	for i := 0; i < 100; i++ {
    30  		randBytes(r, b[:])
    31  		got := MemHash32(unsafe.Pointer(&b), seed)
    32  		want := MemHash(unsafe.Pointer(&b), seed, 4)
    33  		if got != want {
    34  			t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want)
    35  		}
    36  	}
    37  }
    38  
    39  func TestMemHash64Equality(t *testing.T) {
    40  	if *UseAeshash {
    41  		t.Skip("skipping since AES hash implementation is used")
    42  	}
    43  	var b [8]byte
    44  	r := rand.New(rand.NewSource(1234))
    45  	seed := uintptr(r.Uint64())
    46  	for i := 0; i < 100; i++ {
    47  		randBytes(r, b[:])
    48  		got := MemHash64(unsafe.Pointer(&b), seed)
    49  		want := MemHash(unsafe.Pointer(&b), seed, 8)
    50  		if got != want {
    51  			t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want)
    52  		}
    53  	}
    54  }
    55  
    56  // Smhasher is a torture test for hash functions.
    57  // https://code.google.com/p/smhasher/
    58  // This code is a port of some of the Smhasher tests to Go.
    59  //
    60  // The current AES hash function passes Smhasher. Our fallback
    61  // hash functions don't, so we only enable the difficult tests when
    62  // we know the AES implementation is available.
    63  
    64  // Sanity checks.
    65  // hash should not depend on values outside key.
    66  // hash should not depend on alignment.
    67  func TestSmhasherSanity(t *testing.T) {
    68  	r := rand.New(rand.NewSource(1234))
    69  	const REP = 10
    70  	const KEYMAX = 128
    71  	const PAD = 16
    72  	const OFFMAX = 16
    73  	for k := 0; k < REP; k++ {
    74  		for n := 0; n < KEYMAX; n++ {
    75  			for i := 0; i < OFFMAX; i++ {
    76  				var b [KEYMAX + OFFMAX + 2*PAD]byte
    77  				var c [KEYMAX + OFFMAX + 2*PAD]byte
    78  				randBytes(r, b[:])
    79  				randBytes(r, c[:])
    80  				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
    81  				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
    82  					t.Errorf("hash depends on bytes outside key")
    83  				}
    84  			}
    85  		}
    86  	}
    87  }
    88  
    89  type HashSet struct {
    90  	list []uintptr // list of hashes added
    91  }
    92  
    93  func newHashSet() *HashSet {
    94  	return &HashSet{list: make([]uintptr, 0, 1024)}
    95  }
    96  func (s *HashSet) add(h uintptr) {
    97  	s.list = append(s.list, h)
    98  }
    99  func (s *HashSet) addS(x string) {
   100  	s.add(StringHash(x, 0))
   101  }
   102  func (s *HashSet) addB(x []byte) {
   103  	s.add(BytesHash(x, 0))
   104  }
   105  func (s *HashSet) addS_seed(x string, seed uintptr) {
   106  	s.add(StringHash(x, seed))
   107  }
   108  func (s *HashSet) check(t *testing.T) {
   109  	list := s.list
   110  	slices.Sort(list)
   111  
   112  	collisions := 0
   113  	for i := 1; i < len(list); i++ {
   114  		if list[i] == list[i-1] {
   115  			collisions++
   116  		}
   117  	}
   118  	n := len(list)
   119  
   120  	const SLOP = 50.0
   121  	pairs := int64(n) * int64(n-1) / 2
   122  	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
   123  	stddev := math.Sqrt(expected)
   124  	if float64(collisions) > expected+SLOP*(3*stddev+1) {
   125  		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f threshold=%f", collisions, expected, stddev, expected+SLOP*(3*stddev+1))
   126  	}
   127  	// Reset for reuse
   128  	s.list = s.list[:0]
   129  }
   130  
   131  // a string plus adding zeros must make distinct hashes
   132  func TestSmhasherAppendedZeros(t *testing.T) {
   133  	s := "hello" + strings.Repeat("\x00", 256)
   134  	h := newHashSet()
   135  	for i := 0; i <= len(s); i++ {
   136  		h.addS(s[:i])
   137  	}
   138  	h.check(t)
   139  }
   140  
   141  // All 0-3 byte strings have distinct hashes.
   142  func TestSmhasherSmallKeys(t *testing.T) {
   143  	if race.Enabled {
   144  		t.Skip("Too long for race mode")
   145  	}
   146  	testenv.ParallelOn64Bit(t)
   147  	h := newHashSet()
   148  	var b [3]byte
   149  	for i := 0; i < 256; i++ {
   150  		b[0] = byte(i)
   151  		h.addB(b[:1])
   152  		for j := 0; j < 256; j++ {
   153  			b[1] = byte(j)
   154  			h.addB(b[:2])
   155  			if !testing.Short() {
   156  				for k := 0; k < 256; k++ {
   157  					b[2] = byte(k)
   158  					h.addB(b[:3])
   159  				}
   160  			}
   161  		}
   162  	}
   163  	h.check(t)
   164  }
   165  
   166  // Different length strings of all zeros have distinct hashes.
   167  func TestSmhasherZeros(t *testing.T) {
   168  	t.Parallel()
   169  	N := 256 * 1024
   170  	if testing.Short() {
   171  		N = 1024
   172  	}
   173  	h := newHashSet()
   174  	b := make([]byte, N)
   175  	for i := 0; i <= N; i++ {
   176  		h.addB(b[:i])
   177  	}
   178  	h.check(t)
   179  }
   180  
   181  // Strings with up to two nonzero bytes all have distinct hashes.
   182  func TestSmhasherTwoNonzero(t *testing.T) {
   183  	if GOARCH == "wasm" {
   184  		t.Skip("Too slow on wasm")
   185  	}
   186  	if testing.Short() {
   187  		t.Skip("Skipping in short mode")
   188  	}
   189  	if race.Enabled {
   190  		t.Skip("Too long for race mode")
   191  	}
   192  	testenv.ParallelOn64Bit(t)
   193  	h := newHashSet()
   194  	for n := 2; n <= 16; n++ {
   195  		twoNonZero(h, n)
   196  	}
   197  	h.check(t)
   198  }
   199  func twoNonZero(h *HashSet, n int) {
   200  	b := make([]byte, n)
   201  
   202  	// all zero
   203  	h.addB(b)
   204  
   205  	// one non-zero byte
   206  	for i := 0; i < n; i++ {
   207  		for x := 1; x < 256; x++ {
   208  			b[i] = byte(x)
   209  			h.addB(b)
   210  			b[i] = 0
   211  		}
   212  	}
   213  
   214  	// two non-zero bytes
   215  	for i := 0; i < n; i++ {
   216  		for x := 1; x < 256; x++ {
   217  			b[i] = byte(x)
   218  			for j := i + 1; j < n; j++ {
   219  				for y := 1; y < 256; y++ {
   220  					b[j] = byte(y)
   221  					h.addB(b)
   222  					b[j] = 0
   223  				}
   224  			}
   225  			b[i] = 0
   226  		}
   227  	}
   228  }
   229  
   230  // Test strings with repeats, like "abcdabcdabcdabcd..."
   231  func TestSmhasherCyclic(t *testing.T) {
   232  	if testing.Short() {
   233  		t.Skip("Skipping in short mode")
   234  	}
   235  	if race.Enabled {
   236  		t.Skip("Too long for race mode")
   237  	}
   238  	t.Parallel()
   239  	r := rand.New(rand.NewSource(1234))
   240  	const REPEAT = 8
   241  	const N = 1000000
   242  	h := newHashSet()
   243  	for n := 4; n <= 12; n++ {
   244  		b := make([]byte, REPEAT*n)
   245  		for i := 0; i < N; i++ {
   246  			b[0] = byte(i * 79 % 97)
   247  			b[1] = byte(i * 43 % 137)
   248  			b[2] = byte(i * 151 % 197)
   249  			b[3] = byte(i * 199 % 251)
   250  			randBytes(r, b[4:n])
   251  			for j := n; j < n*REPEAT; j++ {
   252  				b[j] = b[j-n]
   253  			}
   254  			h.addB(b)
   255  		}
   256  		h.check(t)
   257  	}
   258  }
   259  
   260  // Test strings with only a few bits set
   261  func TestSmhasherSparse(t *testing.T) {
   262  	if GOARCH == "wasm" {
   263  		t.Skip("Too slow on wasm")
   264  	}
   265  	if testing.Short() {
   266  		t.Skip("Skipping in short mode")
   267  	}
   268  	t.Parallel()
   269  	h := newHashSet()
   270  	sparse(t, h, 32, 6)
   271  	sparse(t, h, 40, 6)
   272  	sparse(t, h, 48, 5)
   273  	sparse(t, h, 56, 5)
   274  	sparse(t, h, 64, 5)
   275  	sparse(t, h, 96, 4)
   276  	sparse(t, h, 256, 3)
   277  	sparse(t, h, 2048, 2)
   278  }
   279  func sparse(t *testing.T, h *HashSet, n int, k int) {
   280  	b := make([]byte, n/8)
   281  	setbits(h, b, 0, k)
   282  	h.check(t)
   283  }
   284  
   285  // set up to k bits at index i and greater
   286  func setbits(h *HashSet, b []byte, i int, k int) {
   287  	h.addB(b)
   288  	if k == 0 {
   289  		return
   290  	}
   291  	for j := i; j < len(b)*8; j++ {
   292  		b[j/8] |= byte(1 << uint(j&7))
   293  		setbits(h, b, j+1, k-1)
   294  		b[j/8] &= byte(^(1 << uint(j&7)))
   295  	}
   296  }
   297  
   298  // Test all possible combinations of n blocks from the set s.
   299  // "permutation" is a bad name here, but it is what Smhasher uses.
   300  func TestSmhasherPermutation(t *testing.T) {
   301  	if GOARCH == "wasm" {
   302  		t.Skip("Too slow on wasm")
   303  	}
   304  	if testing.Short() {
   305  		t.Skip("Skipping in short mode")
   306  	}
   307  	if race.Enabled {
   308  		t.Skip("Too long for race mode")
   309  	}
   310  	testenv.ParallelOn64Bit(t)
   311  	h := newHashSet()
   312  	permutation(t, h, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
   313  	permutation(t, h, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
   314  	permutation(t, h, []uint32{0, 1}, 20)
   315  	permutation(t, h, []uint32{0, 1 << 31}, 20)
   316  	permutation(t, h, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
   317  }
   318  func permutation(t *testing.T, h *HashSet, s []uint32, n int) {
   319  	b := make([]byte, n*4)
   320  	genPerm(h, b, s, 0)
   321  	h.check(t)
   322  }
   323  func genPerm(h *HashSet, b []byte, s []uint32, n int) {
   324  	h.addB(b[:n])
   325  	if n == len(b) {
   326  		return
   327  	}
   328  	for _, v := range s {
   329  		b[n] = byte(v)
   330  		b[n+1] = byte(v >> 8)
   331  		b[n+2] = byte(v >> 16)
   332  		b[n+3] = byte(v >> 24)
   333  		genPerm(h, b, s, n+4)
   334  	}
   335  }
   336  
   337  type Key interface {
   338  	clear()              // set bits all to 0
   339  	random(r *rand.Rand) // set key to something random
   340  	bits() int           // how many bits key has
   341  	flipBit(i int)       // flip bit i of the key
   342  	hash() uintptr       // hash the key
   343  	name() string        // for error reporting
   344  }
   345  
   346  type BytesKey struct {
   347  	b []byte
   348  }
   349  
   350  func (k *BytesKey) clear() {
   351  	clear(k.b)
   352  }
   353  func (k *BytesKey) random(r *rand.Rand) {
   354  	randBytes(r, k.b)
   355  }
   356  func (k *BytesKey) bits() int {
   357  	return len(k.b) * 8
   358  }
   359  func (k *BytesKey) flipBit(i int) {
   360  	k.b[i>>3] ^= byte(1 << uint(i&7))
   361  }
   362  func (k *BytesKey) hash() uintptr {
   363  	return BytesHash(k.b, 0)
   364  }
   365  func (k *BytesKey) name() string {
   366  	return fmt.Sprintf("bytes%d", len(k.b))
   367  }
   368  
   369  type Int32Key struct {
   370  	i uint32
   371  }
   372  
   373  func (k *Int32Key) clear() {
   374  	k.i = 0
   375  }
   376  func (k *Int32Key) random(r *rand.Rand) {
   377  	k.i = r.Uint32()
   378  }
   379  func (k *Int32Key) bits() int {
   380  	return 32
   381  }
   382  func (k *Int32Key) flipBit(i int) {
   383  	k.i ^= 1 << uint(i)
   384  }
   385  func (k *Int32Key) hash() uintptr {
   386  	return Int32Hash(k.i, 0)
   387  }
   388  func (k *Int32Key) name() string {
   389  	return "int32"
   390  }
   391  
   392  type Int64Key struct {
   393  	i uint64
   394  }
   395  
   396  func (k *Int64Key) clear() {
   397  	k.i = 0
   398  }
   399  func (k *Int64Key) random(r *rand.Rand) {
   400  	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
   401  }
   402  func (k *Int64Key) bits() int {
   403  	return 64
   404  }
   405  func (k *Int64Key) flipBit(i int) {
   406  	k.i ^= 1 << uint(i)
   407  }
   408  func (k *Int64Key) hash() uintptr {
   409  	return Int64Hash(k.i, 0)
   410  }
   411  func (k *Int64Key) name() string {
   412  	return "int64"
   413  }
   414  
   415  type EfaceKey struct {
   416  	i any
   417  }
   418  
   419  func (k *EfaceKey) clear() {
   420  	k.i = nil
   421  }
   422  func (k *EfaceKey) random(r *rand.Rand) {
   423  	k.i = uint64(r.Int63())
   424  }
   425  func (k *EfaceKey) bits() int {
   426  	// use 64 bits. This tests inlined interfaces
   427  	// on 64-bit targets and indirect interfaces on
   428  	// 32-bit targets.
   429  	return 64
   430  }
   431  func (k *EfaceKey) flipBit(i int) {
   432  	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
   433  }
   434  func (k *EfaceKey) hash() uintptr {
   435  	return EfaceHash(k.i, 0)
   436  }
   437  func (k *EfaceKey) name() string {
   438  	return "Eface"
   439  }
   440  
   441  type IfaceKey struct {
   442  	i interface {
   443  		F()
   444  	}
   445  }
   446  type fInter uint64
   447  
   448  func (x fInter) F() {
   449  }
   450  
   451  func (k *IfaceKey) clear() {
   452  	k.i = nil
   453  }
   454  func (k *IfaceKey) random(r *rand.Rand) {
   455  	k.i = fInter(r.Int63())
   456  }
   457  func (k *IfaceKey) bits() int {
   458  	// use 64 bits. This tests inlined interfaces
   459  	// on 64-bit targets and indirect interfaces on
   460  	// 32-bit targets.
   461  	return 64
   462  }
   463  func (k *IfaceKey) flipBit(i int) {
   464  	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
   465  }
   466  func (k *IfaceKey) hash() uintptr {
   467  	return IfaceHash(k.i, 0)
   468  }
   469  func (k *IfaceKey) name() string {
   470  	return "Iface"
   471  }
   472  
   473  // Flipping a single bit of a key should flip each output bit with 50% probability.
   474  func TestSmhasherAvalanche(t *testing.T) {
   475  	if GOARCH == "wasm" {
   476  		t.Skip("Too slow on wasm")
   477  	}
   478  	if testing.Short() {
   479  		t.Skip("Skipping in short mode")
   480  	}
   481  	if race.Enabled {
   482  		t.Skip("Too long for race mode")
   483  	}
   484  	t.Parallel()
   485  	avalancheTest1(t, &BytesKey{make([]byte, 2)})
   486  	avalancheTest1(t, &BytesKey{make([]byte, 4)})
   487  	avalancheTest1(t, &BytesKey{make([]byte, 8)})
   488  	avalancheTest1(t, &BytesKey{make([]byte, 16)})
   489  	avalancheTest1(t, &BytesKey{make([]byte, 32)})
   490  	avalancheTest1(t, &BytesKey{make([]byte, 200)})
   491  	avalancheTest1(t, &Int32Key{})
   492  	avalancheTest1(t, &Int64Key{})
   493  	avalancheTest1(t, &EfaceKey{})
   494  	avalancheTest1(t, &IfaceKey{})
   495  }
   496  func avalancheTest1(t *testing.T, k Key) {
   497  	const REP = 100000
   498  	r := rand.New(rand.NewSource(1234))
   499  	n := k.bits()
   500  
   501  	// grid[i][j] is a count of whether flipping
   502  	// input bit i affects output bit j.
   503  	grid := make([][hashSize]int, n)
   504  
   505  	for z := 0; z < REP; z++ {
   506  		// pick a random key, hash it
   507  		k.random(r)
   508  		h := k.hash()
   509  
   510  		// flip each bit, hash & compare the results
   511  		for i := 0; i < n; i++ {
   512  			k.flipBit(i)
   513  			d := h ^ k.hash()
   514  			k.flipBit(i)
   515  
   516  			// record the effects of that bit flip
   517  			g := &grid[i]
   518  			for j := 0; j < hashSize; j++ {
   519  				g[j] += int(d & 1)
   520  				d >>= 1
   521  			}
   522  		}
   523  	}
   524  
   525  	// Each entry in the grid should be about REP/2.
   526  	// More precisely, we did N = k.bits() * hashSize experiments where
   527  	// each is the sum of REP coin flips. We want to find bounds on the
   528  	// sum of coin flips such that a truly random experiment would have
   529  	// all sums inside those bounds with 99% probability.
   530  	N := n * hashSize
   531  	var c float64
   532  	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
   533  	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
   534  	}
   535  	c *= 11.0 // allowed slack: 40% to 60% - we don't need to be perfectly random
   536  	mean := .5 * REP
   537  	stddev := .5 * math.Sqrt(REP)
   538  	low := int(mean - c*stddev)
   539  	high := int(mean + c*stddev)
   540  	for i := 0; i < n; i++ {
   541  		for j := 0; j < hashSize; j++ {
   542  			x := grid[i][j]
   543  			if x < low || x > high {
   544  				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
   545  			}
   546  		}
   547  	}
   548  }
   549  
   550  // All bit rotations of a set of distinct keys
   551  func TestSmhasherWindowed(t *testing.T) {
   552  	if race.Enabled {
   553  		t.Skip("Too long for race mode")
   554  	}
   555  	t.Parallel()
   556  	h := newHashSet()
   557  	t.Logf("32 bit keys")
   558  	windowed(t, h, &Int32Key{})
   559  	t.Logf("64 bit keys")
   560  	windowed(t, h, &Int64Key{})
   561  	t.Logf("string keys")
   562  	windowed(t, h, &BytesKey{make([]byte, 128)})
   563  }
   564  func windowed(t *testing.T, h *HashSet, k Key) {
   565  	if GOARCH == "wasm" {
   566  		t.Skip("Too slow on wasm")
   567  	}
   568  	if PtrSize == 4 {
   569  		// This test tends to be flaky on 32-bit systems.
   570  		// There's not enough bits in the hash output, so we
   571  		// expect a nontrivial number of collisions, and it is
   572  		// often quite a bit higher than expected. See issue 43130.
   573  		t.Skip("Flaky on 32-bit systems")
   574  	}
   575  	if testing.Short() {
   576  		t.Skip("Skipping in short mode")
   577  	}
   578  	const BITS = 16
   579  
   580  	for r := 0; r < k.bits(); r++ {
   581  		for i := 0; i < 1<<BITS; i++ {
   582  			k.clear()
   583  			for j := 0; j < BITS; j++ {
   584  				if i>>uint(j)&1 != 0 {
   585  					k.flipBit((j + r) % k.bits())
   586  				}
   587  			}
   588  			h.add(k.hash())
   589  		}
   590  		h.check(t)
   591  	}
   592  }
   593  
   594  // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
   595  func TestSmhasherText(t *testing.T) {
   596  	if testing.Short() {
   597  		t.Skip("Skipping in short mode")
   598  	}
   599  	t.Parallel()
   600  	h := newHashSet()
   601  	text(t, h, "Foo", "Bar")
   602  	text(t, h, "FooBar", "")
   603  	text(t, h, "", "FooBar")
   604  }
   605  func text(t *testing.T, h *HashSet, prefix, suffix string) {
   606  	const N = 4
   607  	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
   608  	const L = len(S)
   609  	b := make([]byte, len(prefix)+N+len(suffix))
   610  	copy(b, prefix)
   611  	copy(b[len(prefix)+N:], suffix)
   612  	c := b[len(prefix):]
   613  	for i := 0; i < L; i++ {
   614  		c[0] = S[i]
   615  		for j := 0; j < L; j++ {
   616  			c[1] = S[j]
   617  			for k := 0; k < L; k++ {
   618  				c[2] = S[k]
   619  				for x := 0; x < L; x++ {
   620  					c[3] = S[x]
   621  					h.addB(b)
   622  				}
   623  			}
   624  		}
   625  	}
   626  	h.check(t)
   627  }
   628  
   629  // Make sure different seed values generate different hashes.
   630  func TestSmhasherSeed(t *testing.T) {
   631  	h := newHashSet()
   632  	const N = 100000
   633  	s := "hello"
   634  	for i := 0; i < N; i++ {
   635  		h.addS_seed(s, uintptr(i))
   636  	}
   637  	h.check(t)
   638  }
   639  
   640  func TestIssue66841(t *testing.T) {
   641  	if *UseAeshash && os.Getenv("TEST_ISSUE_66841") == "" {
   642  		// We want to test the backup hash, so if we're running on a machine
   643  		// that uses aeshash, exec ourselves while turning aes off.
   644  		cmd := testenv.CleanCmdEnv(testenv.Command(t, testenv.Executable(t), "-test.run=^TestIssue66841$"))
   645  		cmd.Env = append(cmd.Env, "GODEBUG=cpu.aes=off", "TEST_ISSUE_66841=1")
   646  		out, err := cmd.CombinedOutput()
   647  		if err != nil {
   648  			t.Errorf("%s", string(out))
   649  		}
   650  		// Fall through. Might as well run this test when aeshash is on also.
   651  	}
   652  	h := newHashSet()
   653  	var b [16]byte
   654  	binary.LittleEndian.PutUint64(b[:8], 0xe7037ed1a0b428db) // runtime.m2
   655  	for i := 0; i < 1000; i++ {
   656  		binary.LittleEndian.PutUint64(b[8:], uint64(i))
   657  		h.addB(b[:])
   658  	}
   659  	h.check(t)
   660  }
   661  
   662  // size of the hash output (32 or 64 bits)
   663  const hashSize = 32 + int(^uintptr(0)>>63<<5)
   664  
   665  func randBytes(r *rand.Rand, b []byte) {
   666  	for i := range b {
   667  		b[i] = byte(r.Uint32())
   668  	}
   669  }
   670  
   671  func benchmarkHash(b *testing.B, n int) {
   672  	s := strings.Repeat("A", n)
   673  
   674  	for i := 0; i < b.N; i++ {
   675  		StringHash(s, 0)
   676  	}
   677  	b.SetBytes(int64(n))
   678  }
   679  
   680  func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
   681  func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
   682  func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
   683  func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
   684  func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
   685  
   686  func TestArrayHash(t *testing.T) {
   687  	// Make sure that "" in arrays hash correctly. The hash
   688  	// should at least scramble the input seed so that, e.g.,
   689  	// {"","foo"} and {"foo",""} have different hashes.
   690  
   691  	// If the hash is bad, then all (8 choose 4) = 70 keys
   692  	// have the same hash. If so, we allocate 70/8 = 8
   693  	// overflow buckets. If the hash is good we don't
   694  	// normally allocate any overflow buckets, and the
   695  	// probability of even one or two overflows goes down rapidly.
   696  	// (There is always 1 allocation of the bucket array. The map
   697  	// header is allocated on the stack.)
   698  	f := func() {
   699  		// Make the key type at most 128 bytes. Otherwise,
   700  		// we get an allocation per key.
   701  		type key [8]string
   702  		m := make(map[key]bool, 70)
   703  
   704  		// fill m with keys that have 4 "foo"s and 4 ""s.
   705  		for i := 0; i < 256; i++ {
   706  			var k key
   707  			cnt := 0
   708  			for j := uint(0); j < 8; j++ {
   709  				if i>>j&1 != 0 {
   710  					k[j] = "foo"
   711  					cnt++
   712  				}
   713  			}
   714  			if cnt == 4 {
   715  				m[k] = true
   716  			}
   717  		}
   718  		if len(m) != 70 {
   719  			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
   720  		}
   721  	}
   722  	if n := testing.AllocsPerRun(10, f); n > 6 {
   723  		t.Errorf("too many allocs %f - hash not balanced", n)
   724  	}
   725  }
   726  func TestStructHash(t *testing.T) {
   727  	// See the comment in TestArrayHash.
   728  	f := func() {
   729  		type key struct {
   730  			a, b, c, d, e, f, g, h string
   731  		}
   732  		m := make(map[key]bool, 70)
   733  
   734  		// fill m with keys that have 4 "foo"s and 4 ""s.
   735  		for i := 0; i < 256; i++ {
   736  			var k key
   737  			cnt := 0
   738  			if i&1 != 0 {
   739  				k.a = "foo"
   740  				cnt++
   741  			}
   742  			if i&2 != 0 {
   743  				k.b = "foo"
   744  				cnt++
   745  			}
   746  			if i&4 != 0 {
   747  				k.c = "foo"
   748  				cnt++
   749  			}
   750  			if i&8 != 0 {
   751  				k.d = "foo"
   752  				cnt++
   753  			}
   754  			if i&16 != 0 {
   755  				k.e = "foo"
   756  				cnt++
   757  			}
   758  			if i&32 != 0 {
   759  				k.f = "foo"
   760  				cnt++
   761  			}
   762  			if i&64 != 0 {
   763  				k.g = "foo"
   764  				cnt++
   765  			}
   766  			if i&128 != 0 {
   767  				k.h = "foo"
   768  				cnt++
   769  			}
   770  			if cnt == 4 {
   771  				m[k] = true
   772  			}
   773  		}
   774  		if len(m) != 70 {
   775  			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
   776  		}
   777  	}
   778  	if n := testing.AllocsPerRun(10, f); n > 6 {
   779  		t.Errorf("too many allocs %f - hash not balanced", n)
   780  	}
   781  }
   782  
   783  var sink uint64
   784  
   785  func BenchmarkAlignedLoad(b *testing.B) {
   786  	var buf [16]byte
   787  	p := unsafe.Pointer(&buf[0])
   788  	var s uint64
   789  	for i := 0; i < b.N; i++ {
   790  		s += ReadUnaligned64(p)
   791  	}
   792  	sink = s
   793  }
   794  
   795  func BenchmarkUnalignedLoad(b *testing.B) {
   796  	var buf [16]byte
   797  	p := unsafe.Pointer(&buf[1])
   798  	var s uint64
   799  	for i := 0; i < b.N; i++ {
   800  		s += ReadUnaligned64(p)
   801  	}
   802  	sink = s
   803  }
   804  
   805  func TestCollisions(t *testing.T) {
   806  	if testing.Short() {
   807  		t.Skip("Skipping in short mode")
   808  	}
   809  	t.Parallel()
   810  	for i := 0; i < 16; i++ {
   811  		for j := 0; j < 16; j++ {
   812  			if j == i {
   813  				continue
   814  			}
   815  			var a [16]byte
   816  			m := make(map[uint16]struct{}, 1<<16)
   817  			for n := 0; n < 1<<16; n++ {
   818  				a[i] = byte(n)
   819  				a[j] = byte(n >> 8)
   820  				m[uint16(BytesHash(a[:], 0))] = struct{}{}
   821  			}
   822  			// N balls in N bins, for N=65536
   823  			avg := 41427
   824  			stdDev := 123
   825  			if len(m) < avg-40*stdDev || len(m) > avg+40*stdDev {
   826  				t.Errorf("bad number of collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m))
   827  			}
   828  		}
   829  	}
   830  }
   831  

View as plain text