You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
#get rank of the first 1 in the remaining bits (number of trailing zeros)
27
20
rank=1
28
21
forcinreversed(data):
@@ -37,19 +30,12 @@ def LogLog(hashes, off, k):
37
30
#get bucket average
38
31
average=sum(buckets) /m
39
32
#calculate estimate
40
-
estimate=m*2**average
33
+
estimate=m*(2**average)
41
34
42
-
#bias converges to 0.79402 for larger numbers of buckets (k > 5)
35
+
#bias converges to 0.397011808 for larger numbers of buckets (k > 5)
43
36
#since we want to be using more buckets anyways, and the percise value of the bias is costly to calculate. We will use this convergance as an estimate of the bias
44
37
BIAS=0.397011808
45
38
#correct for bias
46
39
estimate=BIAS*estimate
47
40
48
-
returnestimate
49
-
50
-
#find the average estimate for n different offsets of the hash with k bits for buckets
print ("LogLog, SuperLogLog, and HyperLogLog Demo")
19
19
print ("by Dawsin Blanchard, Sam Braga, Brian Couture, and Ethan Trott\n")
20
20
21
-
# check to make sure the dataset has been downloaded and named properly
22
-
ifnotos.path.exists(filename):
23
-
print("Error: Please download the dataset from https://data.cityofnewyork.us/api/views/faiq-9dfq/rows.csv?accessType=DOWNLOAD and place it in this directory as "+filename)
24
-
exit()
25
-
26
21
plate_hashes= []
27
22
ifos.path.exists("hashes.csv"):
28
23
print("Loading plate hashes from file..")
29
24
plate_hashes=csv_parser.load_hashes()
30
25
else:
26
+
# check to make sure the dataset has been downloaded and named properly
27
+
ifnotos.path.exists(filename):
28
+
print("Error: Please download the dataset from https://data.cityofnewyork.us/api/views/faiq-9dfq/rows.csv?accessType=DOWNLOAD and place it in this directory as "+filename)
29
+
exit()
30
+
31
31
print("Creating plate hashes..")
32
32
plate_hashes=csv_parser.create_hashes(filename)
33
33
print(str(len(plate_hashes))+" hashes loaded.\n")
34
34
35
35
36
36
# directly tally the number of unique plates
37
-
print("Calculating exact amount of unique plates..")
38
-
start=time.time()
37
+
print("Calculating exact amount of unique plates..\n")
0 commit comments