-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbloom_filter.py
More file actions
31 lines (25 loc) · 885 Bytes
/
bloom_filter.py
File metadata and controls
31 lines (25 loc) · 885 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import math
import mmh3
from bitarray import bitarray
class Bloom_Filter:
def __init__(self, n, fpr):
self.n = n
self.fpr = fpr
self.m = int(-(n * math.log(fpr)) / (math.log(2) ** 2))
self.k = int((self.m / n) * math.log(2))
self.bitarr = bitarray(self.m)
self.bitarr.setall(0)
def hashed(self, item):
h1 = mmh3.hash(item, 41, signed=False)
h2 = mmh3.hash(item, 42, signed=False)
for i in range(self.k):
yield (h1 + i * h2) % self.m
def add(self, items):
for item in items:
for h in self.hashed(item):
self.bitarr[h] = 1
def contains(self, item):
return all(self.bitarr[h] for h in self.hashed(item))
def theoretical_fpr(self):
prob_bit_set = 1 - math.exp(-(self.k * self.n) / self.m)
return prob_bit_set ** self.k