@@ -163,6 +163,7 @@ package check_postgres;
163
163
' listening' => q{ listening} ,
164
164
' locks-msg' => q{ total "$1" locks: $2} ,
165
165
' locks-msg2' => q{ total locks: $1} ,
166
+ ' lockwait-msg' => q{ $1: $2($3) blocking $4($5) for $6 blocked statement "$7"} ,
166
167
' logfile-bad' => q{ Invalid logfile "$1"} ,
167
168
' logfile-debug' => q{ Final logfile: $1} ,
168
169
' logfile-dne' => q{ logfile $1 does not exist!} ,
@@ -1902,6 +1903,7 @@ package check_postgres;
1902
1903
last_autovacuum => [0, ' Check the maximum time in seconds since any one table has been autovacuumed.' ],
1903
1904
listener => [0, ' Checks for specific listeners.' ],
1904
1905
locks => [0, ' Checks the number of locks.' ],
1906
+ lockwait => [0, ' Checks for blocking locks.' ],
1905
1907
logfile => [1, ' Checks that the logfile is being written to correctly.' ],
1906
1908
new_version_bc => [0, ' Checks if a newer version of Bucardo is available.' ],
1907
1909
new_version_box => [0, ' Checks if a newer version of boxinfo is available.' ],
@@ -2709,6 +2711,9 @@ sub finishup {
2709
2711
# # Check number and type of locks
2710
2712
check_locks() if $action eq ' locks' ;
2711
2713
2714
+ # # Check lock wait
2715
+ check_lockwait() if $action eq ' lockwait' ;
2716
+
2712
2717
# # Logfile is being written to
2713
2718
check_logfile() if $action eq ' logfile' ;
2714
2719
@@ -6177,6 +6182,63 @@ sub check_locks {
6177
6182
6178
6183
} # # end of check_locks
6179
6184
6185
+ sub check_lockwait {
6186
+
6187
+ # # Check lock wait
6188
+ # # By default, checks all databases
6189
+ # # Can check specific databases with include
6190
+ # # Can ignore databases with exclude
6191
+ # # Warning and critical is time
6192
+ # # Example: --warning='1 min' --critical='2 min'
6193
+
6194
+ my ($warning , $critical ) = validate_range
6195
+ ({
6196
+ type => ' time' ,
6197
+ default_warning => ' 1 min' ,
6198
+ default_critical => ' 2 min' ,
6199
+ });
6200
+
6201
+ $SQL = qq{ SELECT a.datname AS datname,
6202
+ bl.pid AS blocked_pid,
6203
+ a.usename AS blocked_user,
6204
+ ka.pid AS blocking_pid,
6205
+ ka.usename AS blocking_user,
6206
+ round(extract (epoch from current_timestamp - a.query_start)) AS waited_sec,
6207
+ a.query AS blocked_statement
6208
+ FROM pg_catalog.pg_locks bl
6209
+ JOIN pg_catalog.pg_stat_activity a ON a.pid = bl.pid
6210
+ JOIN pg_catalog.pg_stat_activity ka ON (ka.pid = ANY(pg_blocking_pids(bl.pid)))
6211
+ WHERE NOT bl.granted
6212
+ } ;
6213
+ my $info = run_command($SQL , { regex => qr {\w } , emptyok => 1 });
6214
+ my $n = 0;
6215
+ for $db (@{$info -> {db }}) {
6216
+ ROW: for my $r (@{$db -> {slurp }}) {
6217
+ my ($dbname ,$blocked_pid ,$blocked_user ,$blocking_pid ,$blocking_user ,$waited_sec ,$blocked_statement )
6218
+ = ($r -> {datname },$r -> {blocked_pid }, $r -> {blocked_user }, $r -> {blocking_pid },
6219
+ $r -> {blocking_user },$r -> {waited_sec },$r -> {blocked_statement });
6220
+
6221
+ # # May be forcibly skipping this database via arguments
6222
+ next ROW if skip_item($dbname );
6223
+
6224
+ my $msg = msg ' lockwait-msg' ,$dbname ,$blocking_user ,$blocking_pid ,$blocked_user ,$blocked_pid ,pretty_time($waited_sec ),$blocked_statement ;
6225
+ if (length $critical and $waited_sec >= $critical ) {
6226
+ add_critical $msg ;
6227
+ }
6228
+ elsif (length $warning and $waited_sec >= $warning ) {
6229
+ add_warning $msg ;
6230
+ }
6231
+ else {
6232
+ add_ok $msg ;
6233
+ }
6234
+ $n ++;
6235
+ }
6236
+ }
6237
+ add_ok ' No blocking locks' if ($n ==0);
6238
+ do_mrtg( {one => $n } ) if $MRTG ;
6239
+ return ;
6240
+
6241
+ } # # end of check_lockwait
6180
6242
6181
6243
sub check_logfile {
6182
6244
@@ -10493,6 +10555,22 @@ =head2 B<locks>
10493
10555
10494
10556
For MRTG output, returns the number of locks on the first line, and the name of the database on the fourth line.
10495
10557
10558
+ =head2 B<lockwait >
10559
+
10560
+ (C<symlink: check_postgres_lockwait > ) Check if there are blocking blocks and for how long. There is no
10561
+ need to run this more than once per database cluster. Databases can be filtered
10562
+ with the I<--include > and I<--exclude > options. See the L</"BASIC FILTERING"> section
10563
+ for more details.
10564
+
10565
+ The I<--warning > and I<--critical > options is time,
10566
+ which represent the time for which the lock has been blocking.
10567
+
10568
+ Example 1: Warn if a lock has been blocking for more than a minute, critcal if for more than 2 minutes
10569
+
10570
+ check_postgres_lockwait --host=garrett --warning='1 min' --critical='2 min'
10571
+
10572
+ For MRTG output, returns the number of blocked sessions.
10573
+
10496
10574
=head2 B<logfile >
10497
10575
10498
10576
(C<symlink: check_postgres_logfile > ) Ensures that the logfile is in the expected location and is being logged to.
0 commit comments