voidCreateFilter(const Slice* keys, int n, std::string* dst)constoverride{ // Compute bloom filter size (in both bits and bytes) size_t bits = n * bits_per_key_;
// For small n, we can see a very high false positive rate. Fix it // by enforcing a minimum bloom filter length. if (bits < 64) bits = 64;
size_t bytes = (bits + 7) / 8; bits = bytes * 8;
constsize_t init_size = dst->size(); dst->resize(init_size + bytes, 0); dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter char* array = &(*dst)[init_size]; for (int i = 0; i < n; i++) { // Use double-hashing to generate a sequence of hash values. // See analysis in [Kirsch,Mitzenmacher 2006]. uint32_t h = BloomHash(keys[i]); constuint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits for (size_t j = 0; j < k_; j++) { constuint32_t bitpos = h % bits; array[bitpos / 8] |= (1 << (bitpos % 8)); h += delta; } } }
boolKeyMayMatch(const Slice& key, const Slice& bloom_filter)constoverride{ constsize_t len = bloom_filter.size(); if (len < 2) returnfalse;
// Use the encoded k so that we can read filters generated by // bloom filters created using different parameters. constsize_t k = array[len - 1]; if (k > 30) { // Reserved for potentially new encodings for short bloom filters. // Consider it a match. returntrue; }
uint32_t h = BloomHash(key); constuint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits for (size_t j = 0; j < k; j++) { constuint32_t bitpos = h % bits; if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) returnfalse; h += delta; } returntrue; }