Skip to content

Commit 86c9ebf

Browse files
authored
Merge pull request bucardo#59 from kabalin/pgbouncer_maxwait
Add pgbouncer_maxwait check
2 parents 1bd5920 + 801821c commit 86c9ebf

File tree

1 file changed

+132
-0
lines changed

1 file changed

+132
-0
lines changed

check_postgres.pl

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,9 @@ package check_postgres;
211211
'pgb-backends-msg' => q{$1 of $2 connections ($3%)},
212212
'pgb-backends-none' => q{No connections},
213213
'pgb-backends-users' => q{$1 for number of users must be a number or percentage},
214+
'pgb-maxwait-msg' => q{longest wait: $1s},
215+
'pgb-maxwait-nomatch'=> q{No matching rows were found},
216+
'pgb-maxwait-skipped'=> q{No matching rows were found (skipped rows: $1)},
214217
'PID' => q{PID},
215218
'port' => q{port},
216219
'preptxn-none' => q{No prepared transactions found},
@@ -1913,6 +1916,7 @@ package check_postgres;
19131916
pgb_pool_maxwait => [1, 'Check the current maximum wait time for client connections in pgbouncer pools.'],
19141917
pgbouncer_backends => [0, 'Check how many clients are connected to pgbouncer compared to max_client_conn.'],
19151918
pgbouncer_checksum => [0, 'Check that no pgbouncer settings have changed since the last check.'],
1919+
pgbouncer_maxwait => [0, 'Check how long the first (oldest) client in queue has been waiting.'],
19161920
pgagent_jobs => [0, 'Check for no failed pgAgent jobs within a specified period of time.'],
19171921
prepared_txns => [1, 'Checks number and age of prepared transactions.'],
19181922
query_runtime => [0, 'Check how long a specific query takes to run.'],
@@ -2769,6 +2773,9 @@ sub finishup {
27692773
## Check the current maximum wait time for client connections in pgbouncer pools
27702774
check_pgb_pool('maxwait') if $action eq 'pgb_pool_maxwait';
27712775

2776+
## Check how long the first (oldest) client in queue has been waiting.
2777+
check_pgbouncer_maxwait() if $action eq 'pgbouncer_maxwait';
2778+
27722779
## Check how many clients are connected to pgbouncer compared to max_client_conn.
27732780
check_pgbouncer_backends() if $action eq 'pgbouncer_backends';
27742781

@@ -6758,6 +6765,107 @@ sub check_pgbouncer_checksum {
67586765

67596766
} ## end of check_pgbouncer_checksum
67606767

6768+
sub check_pgbouncer_maxwait {
6769+
6770+
## Check how long the first (oldest) client in queue has waited, in
6771+
## seconds.
6772+
## Supports: Nagios, MRTG
6773+
## Warning and critical are time limits - defaults to seconds
6774+
## Valid units: s[econd], m[inute], h[our], d[ay]
6775+
## All above may be written as plural as well (e.g. "2 hours")
6776+
## Can also ignore databases with exclude and limit with include
6777+
6778+
my $arg = shift || {};
6779+
6780+
my ($warning, $critical) = validate_range
6781+
({
6782+
type => 'time',
6783+
});
6784+
6785+
## Grab information from the pg_stat_activity table
6786+
## Since we clobber old info on a qtime "tie", use an ORDER BY
6787+
$SQL = qq{SHOW POOLS};
6788+
6789+
my $info = run_command($SQL, { regex => qr{\d+}, emptyok => 1 } );
6790+
6791+
## Default values for information gathered
6792+
my ($maxwait, $database, $user, $cl_active, $cl_waiting) =
6793+
(0,'?','?',0,0);
6794+
6795+
for $db (@{$info->{db}}) {
6796+
6797+
## Parse the psql output and gather stats from the winning row
6798+
## Read in and parse the psql output
6799+
my $skipped = 0;
6800+
ROW: for my $r (@{$db->{slurp}}) {
6801+
6802+
## Apply --exclude and --include arguments to the database name
6803+
if (skip_item($r->{database})) {
6804+
$skipped++;
6805+
next ROW;
6806+
}
6807+
6808+
## Assign stats if we have a new winner
6809+
if ($r->{maxwait} > $maxwait) {
6810+
$database = $r->{database};
6811+
$user = $r->{user};
6812+
$cl_active = $r->{cl_active};
6813+
$cl_waiting = $r->{cl_waiting};
6814+
$maxwait = $r->{maxwait};
6815+
}
6816+
}
6817+
6818+
## We don't really care why things matches as far as the final output
6819+
## But it's nice to report what we can
6820+
if ($database eq '?') {
6821+
$MRTG and do_mrtg({one => 0, msg => 'No rows'});
6822+
$db->{perf} = "0;$warning;$critical";
6823+
6824+
if ($skipped) {
6825+
add_ok msg('pgb-maxwait-skipped', $skipped);
6826+
}
6827+
else {
6828+
add_ok msg('pgb-maxwait-nomatch', $maxwait);
6829+
}
6830+
return;
6831+
}
6832+
6833+
## Details on who the offender was
6834+
my $whodunit = sprintf q{%s:%s %s:%s cl_active:%s cl_waiting:%s},
6835+
msg('database'),
6836+
$database,
6837+
msg('username'),
6838+
$user,
6839+
$cl_active,
6840+
$cl_waiting;
6841+
6842+
$MRTG and do_mrtg({one => $maxwait, msg => "$whodunit"});
6843+
6844+
$db->{perf} .= sprintf q{'%s'=%s;%s;%s},
6845+
$whodunit,
6846+
$maxwait,
6847+
$warning,
6848+
$critical;
6849+
6850+
my $m = msg('pgb-maxwait-msg', $maxwait);
6851+
my $msg = sprintf '%s (%s)', $m, $whodunit;
6852+
6853+
if (length $critical and $maxwait >= $critical) {
6854+
add_critical $msg;
6855+
}
6856+
elsif (length $warning and $maxwait >= $warning) {
6857+
add_warning $msg;
6858+
}
6859+
else {
6860+
add_ok $msg;
6861+
}
6862+
}
6863+
6864+
return;
6865+
6866+
6867+
} ## end of check_pgbouncer_maxwait
6868+
67616869
sub check_pgbouncer_backends {
67626870

67636871
## Check the number of connections to pgbouncer compared to
@@ -10504,6 +10612,30 @@ =head2 B<pgbouncer_checksum>
1050410612
checksum must be provided as the C<--mrtg> argument. The fourth line always gives the
1050510613
current checksum.
1050610614
10615+
=head2 B<pgbouncer_maxwait>
10616+
10617+
(C<symlink: check_postgres_pgbouncer_maxwait>) Checks how long the first
10618+
(oldest) client in the queue has been waiting, in seconds. If this starts
10619+
increasing, then the current pool of servers does not handle requests quick
10620+
enough. Reason may be either overloaded server or just too small of a
10621+
pool_size setting in pbouncer config file. Databases can be filtered by use
10622+
of the I<--include> and I<--exclude> options. See the L</"BASIC FILTERING">
10623+
section for more details. The values or the I<--warning> and I<--critical>
10624+
options are units of time, and must be provided (no default). Valid units are
10625+
'seconds', 'minutes', 'hours', or 'days'. Each may be written singular or
10626+
abbreviated to just the first letter. If no units are given, the units are
10627+
assumed to be seconds.
10628+
10629+
This action requires Postgres 8.3 or better.
10630+
10631+
Example 1: Give a critical if any transaction has been open for more than 10
10632+
minutes:
10633+
10634+
check_postgres_pgbouncer_maxwait -p 6432 -u pgbouncer --critical='10 minutes'
10635+
10636+
For MRTG output, returns the maximum time in seconds a transaction has been
10637+
open on the first line. The fourth line gives the name of the database.
10638+
1050710639
=head2 B<pgagent_jobs>
1050810640
1050910641
(C<symlink: check_postgres_pgagent_jobs>) Checks that all the pgAgent jobs

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy