Skip to content

Commit 948c979

Browse files
committed
Add two HyperLogLog functions
New functions initHyperLogLogError() and freeHyperLogLog() simplify using this module from elsewhere. Author: Tomáš Vondra Review: Peter Geoghegan
1 parent 9ff6027 commit 948c979

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

src/backend/lib/hyperloglog.c

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
static inline uint8 rho(uint32 x, uint8 b);
5757

5858
/*
59-
* Initialize HyperLogLog track state
59+
* Initialize HyperLogLog track state, by bit width
6060
*
6161
* bwidth is bit width (so register size will be 2 to the power of bwidth).
6262
* Must be between 4 and 16 inclusive.
@@ -107,6 +107,52 @@ initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
107107
cState->alphaMM = alpha * cState->nRegisters * cState->nRegisters;
108108
}
109109

110+
/*
111+
* Initialize HyperLogLog track state, by error rate
112+
*
113+
* Instead of specifying bwidth (number of bits used for addressing the
114+
* register), this method allows sizing the counter for particular error
115+
* rate using a simple formula from the paper:
116+
*
117+
* e = 1.04 / sqrt(m)
118+
*
119+
* where 'm' is the number of registers, i.e. (2^bwidth). The method
120+
* finds the lowest bwidth with 'e' below the requested error rate, and
121+
* then uses it to initialize the counter.
122+
*
123+
* As bwidth has to be between 4 and 16, the worst possible error rate
124+
* is between ~25% (bwidth=4) and 0.4% (bwidth=16).
125+
*/
126+
void
127+
initHyperLogLogError(hyperLogLogState *cState, double error)
128+
{
129+
uint8 bwidth = 4;
130+
131+
while (bwidth < 16)
132+
{
133+
double m = (Size) 1 << bwidth;
134+
135+
if (1.04 / sqrt(m) < error)
136+
break;
137+
bwidth++;
138+
}
139+
140+
initHyperLogLog(cState, bwidth);
141+
}
142+
143+
/*
144+
* Free HyperLogLog track state
145+
*
146+
* Releases allocated resources, but not the state itself (in case it's not
147+
* allocated by palloc).
148+
*/
149+
void
150+
freeHyperLogLog(hyperLogLogState *cState)
151+
{
152+
Assert(cState->hashesArr != NULL);
153+
pfree(cState->hashesArr);
154+
}
155+
110156
/*
111157
* Adds element to the estimator, from caller-supplied hash.
112158
*

src/include/lib/hyperloglog.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,10 @@ typedef struct hyperLogLogState
6060
} hyperLogLogState;
6161

6262
extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth);
63+
extern void initHyperLogLogError(hyperLogLogState *cState, double error);
6364
extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash);
6465
extern double estimateHyperLogLog(hyperLogLogState *cState);
6566
extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState);
67+
extern void freeHyperLogLog(hyperLogLogState *cState);
6668

6769
#endif /* HYPERLOGLOG_H */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy