Skip to content

Commit 2b8b285

Browse files
committed
Introduce bloom_filter_size for BRIN bloom opclass
Move the calculation of Bloom filter parameters (for BRIN indexes) into a separate function to make reuse easier. At the moment we only call it from one place, but that may change and it's easier to read anyway. Reviewed-by: Heikki Linnakangas Discussion: https://postgr.es/m/0e1f3350-c9cf-ab62-43a5-5dae314de89c%40enterprisedb.com
1 parent 28d03fe commit 2b8b285

File tree

1 file changed

+47
-16
lines changed

1 file changed

+47
-16
lines changed

src/backend/access/brin/brin_bloom.c

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,48 @@ typedef struct BloomFilter
259259
char data[FLEXIBLE_ARRAY_MEMBER];
260260
} BloomFilter;
261261

262+
/*
263+
* bloom_filter_size
264+
* Calculate Bloom filter parameters (nbits, nbytes, nhashes).
265+
*
266+
* Given expected number of distinct values and desired false positive rate,
267+
* calculates the optimal parameters of the Bloom filter.
268+
*
269+
* The resulting parameters are returned through nbytesp (number of bytes),
270+
* nbitsp (number of bits) and nhashesp (number of hash functions). If a
271+
* pointer is NULL, the parameter is not returned.
272+
*/
273+
static void
274+
bloom_filter_size(int ndistinct, double false_positive_rate,
275+
int *nbytesp, int *nbitsp, int *nhashesp)
276+
{
277+
double k;
278+
int nbits,
279+
nbytes;
280+
281+
/* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
282+
nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
283+
284+
/* round m to whole bytes */
285+
nbytes = ((nbits + 7) / 8);
286+
nbits = nbytes * 8;
287+
288+
/*
289+
* round(log(2.0) * m / ndistinct), but assume round() may not be
290+
* available on Windows
291+
*/
292+
k = log(2.0) * nbits / ndistinct;
293+
k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
294+
295+
if (nbytesp)
296+
*nbytesp = nbytes;
297+
298+
if (nbitsp)
299+
*nbitsp = nbits;
300+
301+
if (nhashesp)
302+
*nhashesp = (int) k;
303+
}
262304

263305
/*
264306
* bloom_init
@@ -275,19 +317,15 @@ bloom_init(int ndistinct, double false_positive_rate)
275317

276318
int nbits; /* size of filter / number of bits */
277319
int nbytes; /* size of filter / number of bytes */
278-
279-
double k; /* number of hash functions */
320+
int nhashes; /* number of hash functions */
280321

281322
Assert(ndistinct > 0);
282323
Assert((false_positive_rate >= BLOOM_MIN_FALSE_POSITIVE_RATE) &&
283324
(false_positive_rate < BLOOM_MAX_FALSE_POSITIVE_RATE));
284325

285-
/* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
286-
nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
287-
288-
/* round m to whole bytes */
289-
nbytes = ((nbits + 7) / 8);
290-
nbits = nbytes * 8;
326+
/* calculate bloom filter size / parameters */
327+
bloom_filter_size(ndistinct, false_positive_rate,
328+
&nbytes, &nbits, &nhashes);
291329

292330
/*
293331
* Reject filters that are obviously too large to store on a page.
@@ -310,13 +348,6 @@ bloom_init(int ndistinct, double false_positive_rate)
310348
elog(ERROR, "the bloom filter is too large (%d > %zu)", nbytes,
311349
BloomMaxFilterSize);
312350

313-
/*
314-
* round(log(2.0) * m / ndistinct), but assume round() may not be
315-
* available on Windows
316-
*/
317-
k = log(2.0) * nbits / ndistinct;
318-
k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
319-
320351
/*
321352
* We allocate the whole filter. Most of it is going to be 0 bits, so the
322353
* varlena is easy to compress.
@@ -326,7 +357,7 @@ bloom_init(int ndistinct, double false_positive_rate)
326357
filter = (BloomFilter *) palloc0(len);
327358

328359
filter->flags = 0;
329-
filter->nhashes = (int) k;
360+
filter->nhashes = nhashes;
330361
filter->nbits = nbits;
331362

332363
SET_VARSIZE(filter, len);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy