Initial commit

This commit is contained in:
Donny
2019-04-22 20:46:32 +08:00
commit 49ab8aadd1
25441 changed files with 4055000 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"bitmap.go",
"bloomfilter.go",
"strategies.go",
],
importmap = "go-common/vendor/github.com/Dai0522/go-hash/bloomfilter",
importpath = "github.com/Dai0522/go-hash/bloomfilter",
visibility = ["//visibility:public"],
deps = ["//vendor/github.com/Dai0522/go-hash/murmur3:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

134
vendor/github.com/Dai0522/go-hash/bloomfilter/bitmap.go generated vendored Normal file
View File

@@ -0,0 +1,134 @@
package bloomfilter
import (
"errors"
"math"
"sync/atomic"
)
const (
// AddrBits .
AddrBits = 6
)
// Bitmap .
type Bitmap interface {
Set(uint64) bool
Get(uint64) bool
BitSize() uint64
BitCount() uint64
Size() uint32
Data() *[]uint64
Merge(*[]uint64) bool
}
// LockFreeBitmap .
type LockFreeBitmap struct {
data []uint64
bitCount uint64
}
// NewLockFreeBitmap .
func NewLockFreeBitmap(bits uint64) (*LockFreeBitmap, error) {
size := int(math.Ceil(float64(bits) / 64.0))
if size <= 0 {
err := errors.New("data length is zero")
return nil, err
}
bm := &LockFreeBitmap{
data: make([]uint64, size),
bitCount: 0,
}
return bm, nil
}
// LoadLockFreeBitmap .
func LoadLockFreeBitmap(d *[]uint64) *LockFreeBitmap {
count := uint64(0)
for _, v := range *d {
count += bitCount(v)
}
return &LockFreeBitmap{
data: *d,
bitCount: count,
}
}
// Set .
func (bits *LockFreeBitmap) Set(bitIndex uint64) bool {
if bits.Get(bitIndex) {
return false
}
longIndex := bitIndex >> AddrBits
mask := uint64(1 << (bitIndex & 63))
for {
old := bits.data[longIndex]
new := old | mask
if old == new {
return false
}
if atomic.CompareAndSwapUint64(&bits.data[longIndex], old, new) {
break
}
}
atomic.AddUint64(&bits.bitCount, 1)
return true
}
// Get .
func (bits *LockFreeBitmap) Get(bitIndex uint64) bool {
return bits.data[bitIndex>>AddrBits]&(uint64(1<<(bitIndex&63))) != 0
}
// BitSize .
func (bits *LockFreeBitmap) BitSize() uint64 {
return uint64(len(bits.data) * 64)
}
// BitCount .
func (bits *LockFreeBitmap) BitCount() uint64 {
return bits.bitCount
}
// Size .
func (bits *LockFreeBitmap) Size() uint32 {
return uint32(len(bits.data))
}
// Data .
func (bits *LockFreeBitmap) Data() *[]uint64 {
return &bits.data
}
// Merge .
func (bits *LockFreeBitmap) Merge(data *[]uint64) bool {
if len(bits.data) != len(*data) {
return false
}
for i := 0; i < len(bits.data); i++ {
for {
old := bits.data[i]
new := bits.data[i] | (*data)[i]
if old == new {
break
}
if atomic.CompareAndSwapUint64(&bits.data[i], old, new) {
break
}
}
}
return true
}
func bitCount(i uint64) uint64 {
var c uint64
for ; i != 0; i = i >> 1 {
if i&1 == 1 {
c++
}
}
return c
}

View File

@@ -0,0 +1,173 @@
package bloomfilter
import (
"bytes"
"encoding/binary"
"errors"
"math"
"github.com/Dai0522/go-hash/murmur3"
)
// BloomFilter .
type BloomFilter struct {
strategy Strategy
bits Bitmap
numHash int
}
// New BloomFilter
func New(expect uint64, fpp float64) (*BloomFilter, error) {
m := optimalNumOfBits(expect, fpp)
b, err := NewLockFreeBitmap(m)
if err != nil {
return nil, err
}
return &BloomFilter{
strategy: &Murur3_128Strategy{
hashFunc: murmur3.New(),
},
bits: b,
numHash: optimalNumOfHash(expect, m),
}, nil
}
// Put little endian byte array
func (bf *BloomFilter) Put(b []byte) bool {
return bf.strategy.Put(b, bf.numHash, bf.bits)
}
// PutUint16 .
func (bf *BloomFilter) PutUint16(i uint16) bool {
b := make([]byte, 4)
binary.LittleEndian.PutUint16(b, i)
return bf.strategy.Put(b, bf.numHash, bf.bits)
}
// PutUint32 .
func (bf *BloomFilter) PutUint32(i uint32) bool {
b := make([]byte, 4)
binary.LittleEndian.PutUint32(b, i)
return bf.strategy.Put(b, bf.numHash, bf.bits)
}
// PutUint64 .
func (bf *BloomFilter) PutUint64(i uint64) bool {
b := make([]byte, 8)
binary.LittleEndian.PutUint64(b, i)
return bf.strategy.Put(b, bf.numHash, bf.bits)
}
// MightContain little endian byte array
func (bf *BloomFilter) MightContain(b []byte) bool {
return bf.strategy.MightContain(b, bf.numHash, bf.bits)
}
// MightContainUint16 .
func (bf *BloomFilter) MightContainUint16(i uint16) bool {
b := make([]byte, 4)
binary.LittleEndian.PutUint16(b, i)
return bf.strategy.MightContain(b, bf.numHash, bf.bits)
}
// MightContainUint32 .
func (bf *BloomFilter) MightContainUint32(i uint32) bool {
b := make([]byte, 4)
binary.LittleEndian.PutUint32(b, i)
return bf.strategy.MightContain(b, bf.numHash, bf.bits)
}
// MightContainUint64 .
func (bf *BloomFilter) MightContainUint64(i uint64) bool {
b := make([]byte, 8)
binary.LittleEndian.PutUint64(b, i)
return bf.strategy.MightContain(b, bf.numHash, bf.bits)
}
// ExpectedFpp returns the probability that mightContain will erroneously
// return true for an object that has not actually been put in
func (bf *BloomFilter) ExpectedFpp() float64 {
return math.Pow(float64(bf.bits.BitCount())/float64(bf.bits.BitSize()), float64(bf.numHash))
}
// ApproximateElementCount returns an estimate for the total number of
// distinct elements that have been added to this Bloom filter. This
// approximation is reasonably accurate if it does not exceed the value
// of that was used when constructing the filter
func (bf *BloomFilter) ApproximateElementCount() int {
size := float64(bf.bits.BitSize())
count := float64(bf.bits.BitCount())
res := math.Log1p(-1*count/size) * size / float64(bf.numHash)
return int(math.Ceil(res + 0.5))
}
// Serialized serialized bloom filter
func (bf *BloomFilter) Serialized() *[]byte {
// Serial form:
// 1 signed byte for the strategy
// 1 unsigned byte for the number of hash functions
// 1 big endian int, the number of longs in our bitset
// N big endian longs of our bitset
var buf bytes.Buffer
buf.WriteByte(byte(1))
buf.WriteByte(byte(bf.numHash))
size := make([]byte, 4)
binary.BigEndian.PutUint32(size, bf.bits.Size())
buf.Write(size)
dataBuf := make([]byte, 8)
data := *bf.bits.Data()
for _, v := range data {
binary.BigEndian.PutUint64(dataBuf, v)
buf.Write(dataBuf)
}
res := buf.Bytes()
return &res
}
// Load load serialized bloom filter into memory
func Load(b *[]byte) (*BloomFilter, error) {
if len(*b) < 10 {
return nil, errors.New("invaled data")
}
numHash := int((*b)[1])
length := binary.BigEndian.Uint32((*b)[2:6])
data := make([]uint64, length)
for i := 0; i < int(length); i++ {
j := (i * 8) + 6
data[i] = binary.BigEndian.Uint64((*b)[j : j+8])
}
bits := LoadLockFreeBitmap(&data)
bf := &BloomFilter{
strategy: &Murur3_128Strategy{
hashFunc: murmur3.New(),
},
bits: bits,
numHash: numHash,
}
return bf, nil
}
// Merge return dst bloom filter ptr
func Merge(src *BloomFilter, dst *BloomFilter) *BloomFilter {
if src == nil || dst == nil {
return dst
}
dst.bits.Merge(src.bits.Data())
return dst
}
func optimalNumOfHash(n, m uint64) int {
return int(math.Max(1, math.Floor((float64(m/n)*math.Log(2))+0.5)))
}
func optimalNumOfBits(n uint64, p float64) uint64 {
if p == 0.0 {
p = math.SmallestNonzeroFloat64
}
return uint64(-1 * float64(n) * math.Log(p) / (math.Log(2) * math.Log(2)))
}

View File

@@ -0,0 +1,53 @@
package bloomfilter
import (
"encoding/binary"
"math"
"github.com/Dai0522/go-hash/murmur3"
)
// Strategy .
type Strategy interface {
Put([]byte, int, Bitmap) bool
MightContain([]byte, int, Bitmap) bool
}
// Murur3_128Strategy .
type Murur3_128Strategy struct {
hashFunc *murmur3.Murmur3
}
// Put .
func (s *Murur3_128Strategy) Put(b []byte, n int, bits Bitmap) bool {
bitSize := bits.BitSize()
hashCode := s.hashFunc.Murmur3_128(b)
h1 := binary.LittleEndian.Uint64(hashCode[:8])
h2 := binary.LittleEndian.Uint64(hashCode[8:])
bitsChanged := false
combine := h1
for i := 0; i < n; i++ {
res := bits.Set((combine & math.MaxInt64) % bitSize)
bitsChanged = bitsChanged || res
combine += h2
}
return bitsChanged
}
// MightContain .
func (s *Murur3_128Strategy) MightContain(b []byte, n int, bits Bitmap) bool {
bitSize := bits.BitSize()
hashCode := s.hashFunc.Murmur3_128(b)
h1 := binary.LittleEndian.Uint64(hashCode[:8])
h2 := binary.LittleEndian.Uint64(hashCode[8:])
combine := h1
for i := 0; i < n; i++ {
if !bits.Get((combine & math.MaxInt64) % bitSize) {
return false
}
combine += h2
}
return true
}

28
vendor/github.com/Dai0522/go-hash/murmur3/BUILD.bazel generated vendored Normal file
View File

@@ -0,0 +1,28 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"murmur3.go",
"murmur3_128.go",
"murmur3_32.go",
"murmur3_64.go",
],
importmap = "go-common/vendor/github.com/Dai0522/go-hash/murmur3",
importpath = "github.com/Dai0522/go-hash/murmur3",
visibility = ["//visibility:public"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

39
vendor/github.com/Dai0522/go-hash/murmur3/murmur3.go generated vendored Normal file
View File

@@ -0,0 +1,39 @@
package murmur3
// Murmur3 .
type Murmur3 struct {
seed uint32
}
// New .
func New() *Murmur3 {
return NewWithSeed(0)
}
// NewWithSeed .
func NewWithSeed(s uint32) *Murmur3 {
return &Murmur3{
seed: s,
}
}
// Murmur3_32 .
func (h *Murmur3) Murmur3_32(b []byte) []byte {
return murmur3_32(h.seed, b)
}
// Murmur3_64 .
func (h *Murmur3) Murmur3_64(b []byte) []byte {
return murmur3_64(h.seed, b)
}
// Murmur3_128 little endian []byte.
func (h *Murmur3) Murmur3_128(b []byte) []byte {
h1, h2 := murmur3_128(h.seed, b)
return []byte{
byte(h1), byte(h1 >> 8), byte(h1 >> 16), byte(h1 >> 24),
byte(h1 >> 32), byte(h1 >> 40), byte(h1 >> 48), byte(h1 >> 56),
byte(h2), byte(h2 >> 8), byte(h2 >> 16), byte(h2 >> 24),
byte(h2 >> 32), byte(h2 >> 40), byte(h2 >> 48), byte(h2 >> 56),
}
}

View File

@@ -0,0 +1,133 @@
package murmur3
import (
"unsafe"
)
const (
c1_128 = 0x87c37b91114253d5
c2_128 = 0x4cf5ad432745937f
size128 = 16
)
// murmur3_128
func murmur3_128(seed uint32, b []byte) (uint64, uint64) {
h1 := uint64(seed)
h2 := uint64(seed)
length := len(b)
h1, h2 = bmix64(b, h1, h2)
// tail
tail := b[length-(length&15):]
var k1, k2 uint64
switch length & 15 {
case 15:
k2 ^= uint64(tail[14]) << 48
fallthrough
case 14:
k2 ^= uint64(tail[13]) << 40
fallthrough
case 13:
k2 ^= uint64(tail[12]) << 32
fallthrough
case 12:
k2 ^= uint64(tail[11]) << 24
fallthrough
case 11:
k2 ^= uint64(tail[10]) << 16
fallthrough
case 10:
k2 ^= uint64(tail[9]) << 8
fallthrough
case 9:
k2 ^= uint64(tail[8]) << 0
k2 *= c2_128
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
k2 *= c1_128
h2 ^= k2
fallthrough
case 8:
k1 ^= uint64(tail[7]) << 56
fallthrough
case 7:
k1 ^= uint64(tail[6]) << 48
fallthrough
case 6:
k1 ^= uint64(tail[5]) << 40
fallthrough
case 5:
k1 ^= uint64(tail[4]) << 32
fallthrough
case 4:
k1 ^= uint64(tail[3]) << 24
fallthrough
case 3:
k1 ^= uint64(tail[2]) << 16
fallthrough
case 2:
k1 ^= uint64(tail[1]) << 8
fallthrough
case 1:
k1 ^= uint64(tail[0]) << 0
k1 *= c1_128
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
k1 *= c2_128
h1 ^= k1
}
h1 ^= uint64(length)
h2 ^= uint64(length)
h1 += h2
h2 += h1
h1 = fmix64(h1)
h2 = fmix64(h2)
h1 += h2
h2 += h1
return h1, h2
}
// bmix64 .
func bmix64(b []byte, h1, h2 uint64) (uint64, uint64) {
nblocks := len(b) / 16
// body
for i := 0; i < nblocks; i++ {
t := (*[2]uint64)(unsafe.Pointer(&b[i*16]))
k1, k2 := t[0], t[1]
k1 *= c1_128
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
k1 *= c2_128
h1 ^= k1
h1 = (h1 << 27) | (h1 >> 37) // rotl64(h1, 27)
h1 += h2
h1 = h1*5 + 0x52dce729
k2 *= c2_128
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
k2 *= c1_128
h2 ^= k2
h2 = (h2 << 31) | (h2 >> 33) // rotl64(h2, 31)
h2 += h1
h2 = h2*5 + 0x38495ab5
}
return h1, h2
}
// fmix64 .
func fmix64(k uint64) uint64 {
k ^= k >> 33
k *= 0xff51afd7ed558ccd
k ^= k >> 33
k *= 0xc4ceb9fe1a85ec53
k ^= k >> 33
return k
}

View File

@@ -0,0 +1,6 @@
package murmur3
func murmur3_32(seed uint32, b []byte) []byte {
// TODO: implement
return b
}

View File

@@ -0,0 +1,6 @@
package murmur3
func murmur3_64(seed uint32, b []byte) []byte {
// TODO: implement
return b
}