|
23 | 23 | # A Czech-Havas-Majewski perfect hash implementation.
|
24 | 24 | # See "Fundamental Study Perfect Hashing" by Czech, Havas, Majewski,
|
25 | 25 | # Theoretical Computer Science 182 (1997) 1-143.
|
26 |
| -# |
27 |
| -# Modified by sorting input so as to always write densest rows first. |
28 | 26 |
|
29 | 27 | from rank import rank
|
30 | 28 |
|
|
50 | 48 | print "Key count is %i." % (len(keys),)
|
51 | 49 | print "Max key is %i." % (max_key,)
|
52 | 50 |
|
53 |
| -side = 128 # Power of 2 to ultimately optimise hash key calculation. |
54 |
| -print "Square will be of side %i." % (side,) |
| 51 | +M = 31 |
| 52 | +power = 9 |
| 53 | +side = (1 << power) |
| 54 | +bits = 23 |
| 55 | +print "Table will be of side %i." % (side,) |
55 | 56 |
|
56 |
| -square = [[-1]*(512*side) for i in xrange(side)] |
| 57 | +NOT_A_VALUE = -1 |
57 | 58 |
|
58 |
| -offset = [0]*((max_key / side) + 1) |
59 |
| -ranks = [-1]*max_key |
60 |
| -length = 0 |
| 59 | +square = [[NOT_A_VALUE]*(600*side) for i in xrange(side)] |
| 60 | + |
| 61 | +offset = [0]*(1 << (bits - power)) |
| 62 | +ranks = [NOT_A_VALUE]*max_key |
| 63 | + |
| 64 | +diffused_keys = {} |
61 | 65 |
|
62 |
| -counts = [0]*len(offset) |
| 66 | +def diffuse(k): |
| 67 | + k *= M |
| 68 | + return k & ((1 << bits) - 1) |
63 | 69 |
|
64 | 70 | for k in keys:
|
| 71 | + d = diffuse(k) |
| 72 | + assert d not in diffused_keys |
| 73 | + diffused_keys[diffuse(k)] = k |
| 74 | + |
| 75 | +for k, v in diffused_keys.iteritems(): |
65 | 76 | r = k / side
|
66 |
| - square[k % side][r] = rank(k) |
67 |
| - counts[r] += 1 |
| 77 | + assert square[k % side][r] == NOT_A_VALUE |
| 78 | + square[k % side][r] = rank(v) |
68 | 79 |
|
69 |
| -sorted_rows = sorted( |
70 |
| - xrange(0, len(offset)), |
71 |
| - key=lambda x: counts[x], |
72 |
| - reverse=True) |
| 80 | +length = 0 |
73 | 81 |
|
74 | 82 | for i in xrange(0, len(offset)):
|
75 |
| - z = sorted_rows[i] |
76 |
| - for j in xrange(0, len(ranks)-side): |
| 83 | + for j in xrange(0, len(ranks)): |
77 | 84 | collision = False
|
78 | 85 | for k in xrange(0, side):
|
79 |
| - s = square[k][z] |
| 86 | + s = square[k][i] |
80 | 87 | h = ranks[j+k]
|
81 |
| - collision = (s != -1 and h != -1 and s != h) |
| 88 | + collision = (s != NOT_A_VALUE and h != NOT_A_VALUE and s != h) |
82 | 89 | if collision: break
|
83 | 90 | if not collision:
|
84 |
| - offset[z] = j |
| 91 | + offset[i] = j |
85 | 92 | for k in xrange(0, side):
|
86 |
| - s = square[k][z] |
87 |
| - if s != -1: |
| 93 | + s = square[k][i] |
| 94 | + if s != NOT_A_VALUE: |
88 | 95 | n = j+k
|
89 | 96 | ranks[n] = s
|
90 | 97 | length = max(length, n+1)
|
91 |
| - print "Offset of row %i is %i (length %i)." % (z, j, length) |
| 98 | + print "Offset of row %i is %i (length %i)." % (i, j, length) |
92 | 99 | break
|
93 | 100 |
|
| 101 | +for k in keys: |
| 102 | + d = diffuse(k) |
| 103 | + assert rank(k) == ranks[offset[d / side] + (d % side)] |
| 104 | + |
94 | 105 | for i in xrange(0, length):
|
95 |
| - if ranks[i] == -1: ranks[i] = 0 |
| 106 | + if ranks[i] == NOT_A_VALUE: ranks[i] = 0 |
96 | 107 |
|
97 |
| -with open('./ranks_sort_%s' % (side,), 'w') as f: |
| 108 | +with open('./ranks_%i_%i' % (side, M,), 'w') as f: |
98 | 109 | f.write("%s\n" % (ranks[0:length],))
|
99 | 110 |
|
100 |
| -with open('./offset_sort_%s' % (side,), 'w') as f: |
| 111 | +with open('./offset_%i_%i' % (side, M,), 'w') as f: |
101 | 112 | f.write("%s\n" % (offset,))
|
102 | 113 |
|
103 |
| -print "Hash table has length %i." % (length,) |
| 114 | +print "Accepted hash table with length %i." % (length,) |
0 commit comments