goBloomFilter/bloomfilter.go
2025-04-15 14:27:26 +08:00

98 lines
1.8 KiB
Go

package bloomfilter
import (
"hash"
"hash/fnv"
"math"
)
type BloomFilter struct {
bitmap BitMapEr
m, k uint64
hashFn []hash.Hash64
opt *Options
}
type BitMapEr interface {
Create(uint64)
Set([]uint64) error
Get([]uint64) (bool, error)
}
func optimaMapSize(n uint64, fpRate float64) uint64 {
// m = -n * ln(fpRate) / (ln(2)^2)
size := -float64(n) * math.Log(fpRate) / math.Pow(math.Log(2), 2)
return uint64(math.Ceil(size))
}
func optimaHashCount(n, m uint64) uint64 {
// k = (m/n) * ln(2)
k := float64(m) / float64(n) * math.Log(2)
return uint64(math.Ceil(k))
}
func (bf *BloomFilter) getHashIndex(data []byte, i int) uint64 {
hash64 := bf.hashFn[i%len(bf.hashFn)]
hash64.Reset()
hash64.Write(data)
hash64.Write([]byte{byte(i)})
return hash64.Sum64() % bf.m
}
func (bf *BloomFilter) getSums(data []byte) []uint64 {
var res []uint64
for i := uint64(0); i < bf.k; i++ {
res = append(res, bf.getHashIndex(data, int(i)))
}
return res
}
func NewBloomFilter(n uint64, fpRate float64, opts ...Option) *BloomFilter {
options := &Options{}
for _, opt := range opts {
opt(options)
}
if err := options.repairOption(); err != nil {
panic(err)
}
m := optimaMapSize(n, fpRate)
k := optimaHashCount(n, m)
var bitmap BitMapEr
if options.mode == ModeLocal {
bitmap = &Local{}
} else {
bitmap = &Redis{
client: options.client,
key: options.key,
}
}
bitmap.Create(m)
hashFn := []hash.Hash64{fnv.New64(), fnv.New64a()}
return &BloomFilter{
bitmap: bitmap,
m: m,
k: k,
hashFn: hashFn,
}
}
func (bf *BloomFilter) Add(item []byte) error {
return bf.bitmap.Set(bf.getSums(item))
}
func (bf *BloomFilter) MightContain(item []byte) bool {
get, err := bf.bitmap.Get(bf.getSums(item))
if err != nil {
return false
}
return get
}