File tree 1 file changed +25
-4
lines changed
1 file changed +25
-4
lines changed Original file line number Diff line number Diff line change @@ -35,11 +35,32 @@ def HyperLogLog(hashes, k):
35
35
#calculate estimate
36
36
estimate = (m ** 2 ) * mean
37
37
38
- #bias can be approximated with the formula 0.7213 / (1 + (1.079/2^k)) for k > 5
39
- #since we want to be using more buckets anyways, and the percise value of the bias is costly to calculate. We will use this as an estimate of the bias
40
- BIAS = 0.7213 / (1 + (1.079 / m ))
38
+ #bias can be approximated with the formula 0.7213 / (1 + (1.079/2^k)) for k >= 6
39
+ #or for values of k < 6 we can use pre-calculated bias factors
40
+ if k <= 4 :
41
+ BIAS = 0.673
42
+ elif k == 5 :
43
+ BIAS == 0.697
44
+ else :
45
+ BIAS = 0.7213 / (1 + (1.079 / m ))
41
46
42
47
#correct for bias
43
48
estimate = BIAS * estimate
44
49
45
- return estimate
50
+ #small range correction
51
+ if estimate < ((5 / 2 ) * m ):
52
+ #get count of registers with rank of 0
53
+ zeros = 0
54
+ for i in buckets :
55
+ if buckets [i ] == 0 :
56
+ zeros += 1
57
+ #apply small range correction
58
+ if not zeros == 0 :
59
+ estimate = m * math .log (estimate , 2 )
60
+
61
+ #large range correction
62
+ elif estimate > ((2 ** 32 ) / 30 ):
63
+ estimate = - 1 * (2 ** 32 ) * math .log (1 - (estimate / (2 ** 32 )))
64
+
65
+ return estimate
66
+
You can’t perform that action at this time.
0 commit comments