go-zero/core/bloom/bloom.go

166 lines
3.8 KiB
Go
Raw Normal View History

2020-07-26 17:09:05 +08:00
package bloom
import (
"context"
_ "embed"
2020-07-26 17:09:05 +08:00
"errors"
"strconv"
"github.com/zeromicro/go-zero/core/hash"
"github.com/zeromicro/go-zero/core/stores/redis"
2020-07-26 17:09:05 +08:00
)
// for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
// maps as k in the error rate table
const maps = 14
var (
// ErrTooLargeOffset indicates the offset is too large in bitset.
ErrTooLargeOffset = errors.New("too large offset")
//go:embed setscript.lua
2024-04-17 23:37:35 +08:00
setLuaScript string
setScript = redis.NewScript(setLuaScript)
//go:embed testscript.lua
2024-04-17 23:37:35 +08:00
testLuaScript string
testScript = redis.NewScript(testLuaScript)
2020-07-26 17:09:05 +08:00
)
type (
2021-02-17 09:58:35 +08:00
// A Filter is a bloom filter.
Filter struct {
bits uint
bitSet bitSetProvider
2020-07-26 17:09:05 +08:00
}
2021-02-17 09:58:35 +08:00
bitSetProvider interface {
check(ctx context.Context, offsets []uint) (bool, error)
set(ctx context.Context, offsets []uint) error
2020-07-26 17:09:05 +08:00
}
)
2021-02-17 09:58:35 +08:00
// New create a Filter, store is the backed redis, key is the key for the bloom filter,
2020-07-26 17:09:05 +08:00
// bits is how many bits will be used, maps is how many hashes for each addition.
// best practices:
// elements - means how many actual elements
// when maps = 14, formula: 0.7*(bits/maps), bits = 20*elements, the error rate is 0.000067 < 1e-4
// for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
2021-02-17 09:58:35 +08:00
func New(store *redis.Redis, key string, bits uint) *Filter {
return &Filter{
2020-07-26 17:09:05 +08:00
bits: bits,
bitSet: newRedisBitSet(store, key, bits),
}
}
2021-02-17 09:58:35 +08:00
// Add adds data into f.
func (f *Filter) Add(data []byte) error {
return f.AddCtx(context.Background(), data)
}
// AddCtx adds data into f with context.
func (f *Filter) AddCtx(ctx context.Context, data []byte) error {
2020-07-26 17:09:05 +08:00
locations := f.getLocations(data)
return f.bitSet.set(ctx, locations)
2020-07-26 17:09:05 +08:00
}
2021-02-17 09:58:35 +08:00
// Exists checks if data is in f.
func (f *Filter) Exists(data []byte) (bool, error) {
return f.ExistsCtx(context.Background(), data)
}
// ExistsCtx checks if data is in f with context.
func (f *Filter) ExistsCtx(ctx context.Context, data []byte) (bool, error) {
2020-07-26 17:09:05 +08:00
locations := f.getLocations(data)
isSet, err := f.bitSet.check(ctx, locations)
2020-07-26 17:09:05 +08:00
if err != nil {
return false, err
}
2022-06-02 09:28:29 +08:00
2022-05-27 18:36:18 +08:00
return isSet, nil
2020-07-26 17:09:05 +08:00
}
2021-02-17 09:58:35 +08:00
func (f *Filter) getLocations(data []byte) []uint {
2020-07-26 17:09:05 +08:00
locations := make([]uint, maps)
for i := uint(0); i < maps; i++ {
hashValue := hash.Hash(append(data, byte(i)))
locations[i] = uint(hashValue % uint64(f.bits))
}
return locations
}
type redisBitSet struct {
store *redis.Redis
key string
bits uint
}
func newRedisBitSet(store *redis.Redis, key string, bits uint) *redisBitSet {
return &redisBitSet{
store: store,
key: key,
bits: bits,
}
}
func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) {
var args []string
for _, offset := range offsets {
if offset >= r.bits {
return nil, ErrTooLargeOffset
}
args = append(args, strconv.FormatUint(uint64(offset), 10))
}
return args, nil
}
func (r *redisBitSet) check(ctx context.Context, offsets []uint) (bool, error) {
2020-07-26 17:09:05 +08:00
args, err := r.buildOffsetArgs(offsets)
if err != nil {
return false, err
}
2024-04-17 23:37:35 +08:00
resp, err := r.store.ScriptRunCtx(ctx, testScript, []string{r.key}, args)
2024-03-08 22:35:17 +08:00
if errors.Is(err, redis.Nil) {
2020-07-26 17:09:05 +08:00
return false, nil
} else if err != nil {
return false, err
}
2021-02-09 13:50:21 +08:00
exists, ok := resp.(int64)
if !ok {
2020-07-26 17:09:05 +08:00
return false, nil
}
2021-02-09 13:50:21 +08:00
return exists == 1, nil
2020-07-26 17:09:05 +08:00
}
// del only use for testing.
2020-07-26 17:09:05 +08:00
func (r *redisBitSet) del() error {
_, err := r.store.Del(r.key)
return err
}
// expire only use for testing.
2020-07-26 17:09:05 +08:00
func (r *redisBitSet) expire(seconds int) error {
return r.store.Expire(r.key, seconds)
}
func (r *redisBitSet) set(ctx context.Context, offsets []uint) error {
2020-07-26 17:09:05 +08:00
args, err := r.buildOffsetArgs(offsets)
if err != nil {
return err
}
2024-04-17 23:37:35 +08:00
_, err = r.store.ScriptRunCtx(ctx, setScript, []string{r.key}, args)
2024-03-08 22:35:17 +08:00
if errors.Is(err, redis.Nil) {
2020-07-26 17:09:05 +08:00
return nil
}
2021-02-09 13:50:21 +08:00
return err
2020-07-26 17:09:05 +08:00
}