Skip to content

Commit 2cd4e83

Browse files
committed
Fix contrib/auto_explain to not cause problems in parallel workers.
A parallel worker process should not be making any decisions of its own about whether to auto-explain. If the parent session process passed down flags asking for instrumentation data, do that, otherwise not. Trying to enable instrumentation anyway leads to bugs like the "could not find key N in shm TOC" failure reported in bug #15821 from Christian Hofstaedtler. We can implement this cheaply by piggybacking on the existing logic for not doing anything when we've chosen not to sample a statement. While at it, clean up some tin-eared coding related to the sampling feature, including an off-by-one error that meant that asking for 1.0 sampling rate didn't actually result in sampling every statement. Although the specific case reported here only manifested in >= v11, I believe that related misbehaviors can be demonstrated in any version that has parallel query; and the off-by-one error is certainly there back to 9.6 where that feature was added. So back-patch to 9.6. Discussion: https://postgr.es/m/15821-5eb422e980594075@postgresql.org
1 parent eaf0292 commit 2cd4e83

File tree

1 file changed

+28
-15
lines changed

1 file changed

+28
-15
lines changed

contrib/auto_explain/auto_explain.c

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include <limits.h>
1616

17+
#include "access/parallel.h"
1718
#include "commands/explain.h"
1819
#include "executor/instrument.h"
1920
#include "jit/jit.h"
@@ -59,19 +60,20 @@ static const struct config_enum_entry loglevel_options[] = {
5960
/* Current nesting depth of ExecutorRun calls */
6061
static int nesting_level = 0;
6162

63+
/* Is the current top-level query to be sampled? */
64+
static bool current_query_sampled = false;
65+
66+
#define auto_explain_enabled() \
67+
(auto_explain_log_min_duration >= 0 && \
68+
(nesting_level == 0 || auto_explain_log_nested_statements) && \
69+
current_query_sampled)
70+
6271
/* Saved hook values in case of unload */
6372
static ExecutorStart_hook_type prev_ExecutorStart = NULL;
6473
static ExecutorRun_hook_type prev_ExecutorRun = NULL;
6574
static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
6675
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
6776

68-
/* Is the current query sampled, per backend */
69-
static bool current_query_sampled = true;
70-
71-
#define auto_explain_enabled() \
72-
(auto_explain_log_min_duration >= 0 && \
73-
(nesting_level == 0 || auto_explain_log_nested_statements))
74-
7577
void _PG_init(void);
7678
void _PG_fini(void);
7779

@@ -249,14 +251,25 @@ static void
249251
explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
250252
{
251253
/*
252-
* For rate sampling, randomly choose top-level statement. Either all
253-
* nested statements will be explained or none will.
254+
* At the beginning of each top-level statement, decide whether we'll
255+
* sample this statement. If nested-statement explaining is enabled,
256+
* either all nested statements will be explained or none will.
257+
*
258+
* When in a parallel worker, we should do nothing, which we can implement
259+
* cheaply by pretending we decided not to sample the current statement.
260+
* If EXPLAIN is active in the parent session, data will be collected and
261+
* reported back to the parent, and it's no business of ours to interfere.
254262
*/
255-
if (auto_explain_log_min_duration >= 0 && nesting_level == 0)
256-
current_query_sampled = (random() < auto_explain_sample_rate *
257-
MAX_RANDOM_VALUE);
263+
if (nesting_level == 0)
264+
{
265+
if (auto_explain_log_min_duration >= 0 && !IsParallelWorker())
266+
current_query_sampled = (random() < auto_explain_sample_rate *
267+
((double) MAX_RANDOM_VALUE + 1));
268+
else
269+
current_query_sampled = false;
270+
}
258271

259-
if (auto_explain_enabled() && current_query_sampled)
272+
if (auto_explain_enabled())
260273
{
261274
/* Enable per-node instrumentation iff log_analyze is required. */
262275
if (auto_explain_log_analyze && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)
@@ -275,7 +288,7 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
275288
else
276289
standard_ExecutorStart(queryDesc, eflags);
277290

278-
if (auto_explain_enabled() && current_query_sampled)
291+
if (auto_explain_enabled())
279292
{
280293
/*
281294
* Set up to track total elapsed time in ExecutorRun. Make sure the
@@ -346,7 +359,7 @@ explain_ExecutorFinish(QueryDesc *queryDesc)
346359
static void
347360
explain_ExecutorEnd(QueryDesc *queryDesc)
348361
{
349-
if (queryDesc->totaltime && auto_explain_enabled() && current_query_sampled)
362+
if (queryDesc->totaltime && auto_explain_enabled())
350363
{
351364
double msec;
352365

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy