1 原理
布隆过滤器的原理及数学推导可以参考:
https://www.cnblogs.com/xiaohuiduan/p/11488020.html
2 python demo实现
from bitarray import bitarray
# 3rd party
import mmh3
class BloomFilter(set):
def __init__(self, size, hash_count):
super(BloomFilter, self).__init__()
self.bit_array = bitarray(size)
self.bit_array.setall(0)
self.size = size
self.hash_count = hash_count
def __len__(self):
return self.size
def __iter__(self):
return iter(self.bit_array)
def add(self,item):
for i in range(self.hash_count):
index = (mmh3.hash(item,i) % self.size)
self.bit_array[index] =1
return self
def __contains__(self,item):
out = True
for i in range(self.hash_count):
index = (mmh3.hash(item,i) % self.size)