Skip to content

Commit bd5b63a

Browse files
committed
Add handling of situation when AQO shmem storage is overflowed.
Our tactics here: log a problem, switch backend into CONTROLLED mode and go further. TODO: 1) change aqo.mode for all backends; 2) switch to FROZEN mode if data storage is full. 3) How to process overflow of DSM?
1 parent 7186c43 commit bd5b63a

File tree

5 files changed

+158
-29
lines changed

5 files changed

+158
-29
lines changed

aqo.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,34 @@ _PG_init(void)
208208
0,
209209
NULL,
210210
NULL,
211-
NULL);
211+
NULL
212+
);
213+
214+
DefineCustomIntVariable("aqo.fs_max_items",
215+
"Max number of feature spaces that AQO can operate with.",
216+
NULL,
217+
&fs_max_items,
218+
1000,
219+
1, INT_MAX,
220+
PGC_SUSET,
221+
0,
222+
NULL,
223+
NULL,
224+
NULL
225+
);
226+
227+
DefineCustomIntVariable("aqo.fss_max_items",
228+
"Max number of feature subspaces that AQO can operate with.",
229+
NULL,
230+
&fss_max_items,
231+
1000,
232+
0, INT_MAX,
233+
PGC_SUSET,
234+
0,
235+
NULL,
236+
NULL,
237+
NULL
238+
);
212239

213240
prev_shmem_startup_hook = shmem_startup_hook;
214241
shmem_startup_hook = aqo_init_shmem;

aqo_shared.c

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ shmem_startup_hook_type prev_shmem_startup_hook = NULL;
2626
AQOSharedState *aqo_state = NULL;
2727
HTAB *fss_htab = NULL;
2828
static int aqo_htab_max_items = 1000;
29-
static int fs_max_items = 1000; /* Max number of different feature spaces in ML model */
30-
static int fss_max_items = 10000;
29+
int fs_max_items = 1; /* Max number of different feature spaces in ML model */
30+
int fss_max_items = 1; /* Max number of different feature subspaces in ML model */
3131
static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */
3232
static dsm_segment *seg = NULL;
3333

@@ -217,30 +217,26 @@ aqo_init_shmem(void)
217217

218218
info.keysize = sizeof(((StatEntry *) 0)->queryid);
219219
info.entrysize = sizeof(StatEntry);
220-
stat_htab = ShmemInitHash("AQO Stat HTAB",
221-
fs_max_items, fs_max_items,
220+
stat_htab = ShmemInitHash("AQO Stat HTAB", 64, fs_max_items,
222221
&info, HASH_ELEM | HASH_BLOBS);
223222

224223
/* Init shared memory table for query texts */
225224
info.keysize = sizeof(((QueryTextEntry *) 0)->queryid);
226225
info.entrysize = sizeof(QueryTextEntry);
227-
qtexts_htab = ShmemInitHash("AQO Query Texts HTAB",
228-
fs_max_items, fs_max_items,
226+
qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", 64, fs_max_items,
229227
&info, HASH_ELEM | HASH_BLOBS);
230228

231229
/* Shared memory hash table for the data */
232230
info.keysize = sizeof(data_key);
233231
info.entrysize = sizeof(DataEntry);
234-
data_htab = ShmemInitHash("AQO Data HTAB",
235-
fss_max_items, fss_max_items,
232+
data_htab = ShmemInitHash("AQO Data HTAB", 64, fss_max_items,
236233
&info, HASH_ELEM | HASH_BLOBS);
237234

238235
/* Shared memory hash table for queries */
239236
info.keysize = sizeof(((QueriesEntry *) 0)->queryid);
240237
info.entrysize = sizeof(QueriesEntry);
241-
queries_htab = ShmemInitHash("AQO Queries HTAB",
242-
fs_max_items, fs_max_items,
243-
&info, HASH_ELEM | HASH_BLOBS);
238+
queries_htab = ShmemInitHash("AQO Queries HTAB", 64, fs_max_items,
239+
&info, HASH_ELEM | HASH_BLOBS);
244240

245241
LWLockRelease(AddinShmemInitLock);
246242
LWLockRegisterTranche(aqo_state->lock.tranche, "AQO");

aqo_shared.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ extern shmem_startup_hook_type prev_shmem_startup_hook;
5050
extern AQOSharedState *aqo_state;
5151
extern HTAB *fss_htab;
5252

53+
extern int fs_max_items; /* Max number of feature spaces that AQO can operate */
54+
extern int fss_max_items;
5355

5456
extern Size aqo_memsize(void);
5557
extern void reset_dsm_cache(void);

preprocessing.c

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -295,15 +295,34 @@ aqo_planner(Query *parse,
295295
* concurrent addition from another backend we will try to restart
296296
* preprocessing routine.
297297
*/
298-
aqo_queries_store(query_context.query_hash, query_context.fspace_hash,
298+
if (aqo_queries_store(query_context.query_hash, query_context.fspace_hash,
299299
query_context.learn_aqo, query_context.use_aqo,
300-
query_context.auto_tuning);
300+
query_context.auto_tuning))
301+
{
302+
/*
303+
* Add query text into the ML-knowledge base. Just for further
304+
* analysis. In the case of cached plans we may have NULL query text.
305+
*/
306+
if (!aqo_qtext_store(query_context.query_hash, query_string))
307+
{
308+
Assert(0); /* panic only on debug installation */
309+
elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?");
310+
}
311+
}
312+
else
313+
{
314+
/*
315+
* In the case of problems (shmem overflow, as a typical issue) -
316+
* disable AQO for the query class.
317+
*/
318+
disable_aqo_for_query();
301319

302-
/*
303-
* Add query text into the ML-knowledge base. Just for further
304-
* analysis. In the case of cached plans we may have NULL query text.
305-
*/
306-
aqo_qtext_store(query_context.query_hash, query_string);
320+
/*
321+
* Switch AQO to controlled mode. In this mode we wouldn't add new
322+
* query classes, just use and learn on existed set.
323+
*/
324+
aqo_mode = AQO_MODE_CONTROLLED;
325+
}
307326
}
308327

309328
if (force_collect_stat)

storage.c

Lines changed: 95 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -204,10 +204,10 @@ add_deactivated_query(uint64 queryid)
204204
/*
205205
* Update AQO statistics.
206206
*
207-
* Add a record (and replace old, if all stat slots is full) to stat slot for
208-
* a query class.
207+
* Add a record (or update an existed) to stat storage for the query class.
209208
* Returns a copy of stat entry, allocated in current memory context. Caller is
210209
* in charge to free this struct after usage.
210+
* If stat hash table is full, return NULL and log this fact.
211211
*/
212212
StatEntry *
213213
aqo_stat_store(uint64 queryid, bool use_aqo,
@@ -216,16 +216,36 @@ aqo_stat_store(uint64 queryid, bool use_aqo,
216216
StatEntry *entry;
217217
bool found;
218218
int pos;
219+
bool tblOverflow;
220+
HASHACTION action;
219221

220222
Assert(stat_htab);
221223

222224
LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE);
223-
entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found);
225+
tblOverflow = hash_get_num_entries(stat_htab) < fs_max_items ? false : true;
226+
action = tblOverflow ? HASH_FIND : HASH_ENTER;
227+
entry = (StatEntry *) hash_search(stat_htab, &queryid, action, &found);
224228

225229
/* Initialize entry on first usage */
226230
if (!found)
227231
{
228-
uint64 qid = entry->queryid;
232+
uint64 qid;
233+
234+
if (action == HASH_FIND)
235+
{
236+
/*
237+
* Hash table is full. To avoid possible problems - don't try to add
238+
* more, just exit
239+
*/
240+
LWLockRelease(&aqo_state->stat_lock);
241+
ereport(LOG,
242+
(errcode(ERRCODE_OUT_OF_MEMORY),
243+
errmsg("[AQO] Stat storage is full. No more feature spaces can be added."),
244+
errhint("Increase value of aqo.fs_max_items on restart of the instance")));
245+
return NULL;
246+
}
247+
248+
qid = entry->queryid;
229249
memset(entry, 0, sizeof(StatEntry));
230250
entry->queryid = qid;
231251
}
@@ -907,6 +927,8 @@ aqo_qtext_store(uint64 queryid, const char *query_string)
907927
{
908928
QueryTextEntry *entry;
909929
bool found;
930+
bool tblOverflow;
931+
HASHACTION action;
910932

911933
Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock));
912934

@@ -916,7 +938,12 @@ aqo_qtext_store(uint64 queryid, const char *query_string)
916938
dsa_init();
917939

918940
LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE);
919-
entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_ENTER,
941+
942+
/* Check hash table overflow */
943+
tblOverflow = hash_get_num_entries(qtexts_htab) < fs_max_items ? false : true;
944+
action = tblOverflow ? HASH_FIND : HASH_ENTER;
945+
946+
entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, action,
920947
&found);
921948

922949
/* Initialize entry on first usage */
@@ -925,6 +952,20 @@ aqo_qtext_store(uint64 queryid, const char *query_string)
925952
size_t size = strlen(query_string) + 1;
926953
char *strptr;
927954

955+
if (action == HASH_FIND)
956+
{
957+
/*
958+
* Hash table is full. To avoid possible problems - don't try to add
959+
* more, just exit
960+
*/
961+
LWLockRelease(&aqo_state->qtexts_lock);
962+
ereport(LOG,
963+
(errcode(ERRCODE_OUT_OF_MEMORY),
964+
errmsg("[AQO] Query texts storage is full. No more feature spaces can be added."),
965+
errhint("Increase value of aqo.fs_max_items on restart of the instance")));
966+
return false;
967+
}
968+
928969
entry->queryid = queryid;
929970
entry->qtext_dp = dsa_allocate(qtext_dsa, size);
930971
Assert(DsaPointerIsValid(entry->qtext_dp));
@@ -933,7 +974,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string)
933974
aqo_state->qtexts_changed = true;
934975
}
935976
LWLockRelease(&aqo_state->qtexts_lock);
936-
return !found;
977+
return true;
937978
}
938979

939980
Datum
@@ -1089,17 +1130,38 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids)
10891130
char *ptr;
10901131
ListCell *lc;
10911132
size_t size;
1133+
bool tblOverflow;
1134+
HASHACTION action;
10921135

10931136
Assert(!LWLockHeldByMe(&aqo_state->data_lock));
10941137

10951138
dsa_init();
10961139

10971140
LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE);
1098-
entry = (DataEntry *) hash_search(data_htab, &key, HASH_ENTER, &found);
1141+
1142+
/* Check hash table overflow */
1143+
tblOverflow = hash_get_num_entries(data_htab) < fss_max_items ? false : true;
1144+
action = tblOverflow ? HASH_FIND : HASH_ENTER;
1145+
1146+
entry = (DataEntry *) hash_search(data_htab, &key, action, &found);
10991147

11001148
/* Initialize entry on first usage */
11011149
if (!found)
11021150
{
1151+
if (action == HASH_FIND)
1152+
{
1153+
/*
1154+
* Hash table is full. To avoid possible problems - don't try to add
1155+
* more, just exit
1156+
*/
1157+
LWLockRelease(&aqo_state->data_lock);
1158+
ereport(LOG,
1159+
(errcode(ERRCODE_OUT_OF_MEMORY),
1160+
errmsg("[AQO] Data storage is full. No more data can be added."),
1161+
errhint("Increase value of aqo.fss_max_items on restart of the instance")));
1162+
return false;
1163+
}
1164+
11031165
entry->cols = data->cols;
11041166
entry->rows = data->rows;
11051167
entry->nrels = list_length(reloids);
@@ -1603,11 +1665,13 @@ aqo_queries_remove(PG_FUNCTION_ARGS)
16031665
}
16041666

16051667
bool
1606-
aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo,
1607-
bool use_aqo, bool auto_tuning)
1668+
aqo_queries_store(uint64 queryid,
1669+
uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning)
16081670
{
16091671
QueriesEntry *entry;
16101672
bool found;
1673+
bool tblOverflow;
1674+
HASHACTION action;
16111675

16121676
Assert(queries_htab);
16131677

@@ -1616,8 +1680,29 @@ aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo,
16161680
use_aqo == false && auto_tuning == false));
16171681

16181682
LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE);
1619-
entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER,
1683+
1684+
/* Check hash table overflow */
1685+
tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true;
1686+
action = tblOverflow ? HASH_FIND : HASH_ENTER;
1687+
1688+
entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action,
16201689
&found);
1690+
1691+
/* Initialize entry on first usage */
1692+
if (!found && action == HASH_FIND)
1693+
{
1694+
/*
1695+
* Hash table is full. To avoid possible problems - don't try to add
1696+
* more, just exit
1697+
*/
1698+
LWLockRelease(&aqo_state->queries_lock);
1699+
ereport(LOG,
1700+
(errcode(ERRCODE_OUT_OF_MEMORY),
1701+
errmsg("[AQO] Queries storage is full. No more feature spaces can be added."),
1702+
errhint("Increase value of aqo.fs_max_items on restart of the instance")));
1703+
return false;
1704+
}
1705+
16211706
entry->fs = fs;
16221707
entry->learn_aqo = learn_aqo;
16231708
entry->use_aqo = use_aqo;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy