@@ -211,6 +211,9 @@ package check_postgres;
211
211
' pgb-backends-msg' => q{ $1 of $2 connections ($3%)} ,
212
212
' pgb-backends-none' => q{ No connections} ,
213
213
' pgb-backends-users' => q{ $1 for number of users must be a number or percentage} ,
214
+ ' pgb-maxwait-msg' => q{ longest wait: $1s} ,
215
+ ' pgb-maxwait-nomatch' => q{ No matching rows were found} ,
216
+ ' pgb-maxwait-skipped' => q{ No matching rows were found (skipped rows: $1)} ,
214
217
' PID' => q{ PID} ,
215
218
' port' => q{ port} ,
216
219
' preptxn-none' => q{ No prepared transactions found} ,
@@ -1913,6 +1916,7 @@ package check_postgres;
1913
1916
pgb_pool_maxwait => [1, ' Check the current maximum wait time for client connections in pgbouncer pools.' ],
1914
1917
pgbouncer_backends => [0, ' Check how many clients are connected to pgbouncer compared to max_client_conn.' ],
1915
1918
pgbouncer_checksum => [0, ' Check that no pgbouncer settings have changed since the last check.' ],
1919
+ pgbouncer_maxwait => [0, ' Check how long the first (oldest) client in queue has been waiting.' ],
1916
1920
pgagent_jobs => [0, ' Check for no failed pgAgent jobs within a specified period of time.' ],
1917
1921
prepared_txns => [1, ' Checks number and age of prepared transactions.' ],
1918
1922
query_runtime => [0, ' Check how long a specific query takes to run.' ],
@@ -2769,6 +2773,9 @@ sub finishup {
2769
2773
# # Check the current maximum wait time for client connections in pgbouncer pools
2770
2774
check_pgb_pool(' maxwait' ) if $action eq ' pgb_pool_maxwait' ;
2771
2775
2776
+ # # Check how long the first (oldest) client in queue has been waiting.
2777
+ check_pgbouncer_maxwait() if $action eq ' pgbouncer_maxwait' ;
2778
+
2772
2779
# # Check how many clients are connected to pgbouncer compared to max_client_conn.
2773
2780
check_pgbouncer_backends() if $action eq ' pgbouncer_backends' ;
2774
2781
@@ -6758,6 +6765,107 @@ sub check_pgbouncer_checksum {
6758
6765
6759
6766
} # # end of check_pgbouncer_checksum
6760
6767
6768
+ sub check_pgbouncer_maxwait {
6769
+
6770
+ # # Check how long the first (oldest) client in queue has waited, in
6771
+ # # seconds.
6772
+ # # Supports: Nagios, MRTG
6773
+ # # Warning and critical are time limits - defaults to seconds
6774
+ # # Valid units: s[econd], m[inute], h[our], d[ay]
6775
+ # # All above may be written as plural as well (e.g. "2 hours")
6776
+ # # Can also ignore databases with exclude and limit with include
6777
+
6778
+ my $arg = shift || {};
6779
+
6780
+ my ($warning , $critical ) = validate_range
6781
+ ({
6782
+ type => ' time' ,
6783
+ });
6784
+
6785
+ # # Grab information from the pg_stat_activity table
6786
+ # # Since we clobber old info on a qtime "tie", use an ORDER BY
6787
+ $SQL = qq{ SHOW POOLS} ;
6788
+
6789
+ my $info = run_command($SQL , { regex => qr {\d +} , emptyok => 1 } );
6790
+
6791
+ # # Default values for information gathered
6792
+ my ($maxwait , $database , $user , $cl_active , $cl_waiting ) =
6793
+ (0,' ?' ,' ?' ,0,0);
6794
+
6795
+ for $db (@{$info -> {db }}) {
6796
+
6797
+ # # Parse the psql output and gather stats from the winning row
6798
+ # # Read in and parse the psql output
6799
+ my $skipped = 0;
6800
+ ROW: for my $r (@{$db -> {slurp }}) {
6801
+
6802
+ # # Apply --exclude and --include arguments to the database name
6803
+ if (skip_item($r -> {database })) {
6804
+ $skipped ++;
6805
+ next ROW;
6806
+ }
6807
+
6808
+ # # Assign stats if we have a new winner
6809
+ if ($r -> {maxwait } > $maxwait ) {
6810
+ $database = $r -> {database };
6811
+ $user = $r -> {user };
6812
+ $cl_active = $r -> {cl_active };
6813
+ $cl_waiting = $r -> {cl_waiting };
6814
+ $maxwait = $r -> {maxwait };
6815
+ }
6816
+ }
6817
+
6818
+ # # We don't really care why things matches as far as the final output
6819
+ # # But it's nice to report what we can
6820
+ if ($database eq ' ?' ) {
6821
+ $MRTG and do_mrtg({one => 0, msg => ' No rows' });
6822
+ $db -> {perf } = " 0;$warning ;$critical " ;
6823
+
6824
+ if ($skipped ) {
6825
+ add_ok msg(' pgb-maxwait-skipped' , $skipped );
6826
+ }
6827
+ else {
6828
+ add_ok msg(' pgb-maxwait-nomatch' , $maxwait );
6829
+ }
6830
+ return ;
6831
+ }
6832
+
6833
+ # # Details on who the offender was
6834
+ my $whodunit = sprintf q{ %s:%s %s:%s cl_active:%s cl_waiting:%s} ,
6835
+ msg(' database' ),
6836
+ $database ,
6837
+ msg(' username' ),
6838
+ $user ,
6839
+ $cl_active ,
6840
+ $cl_waiting ;
6841
+
6842
+ $MRTG and do_mrtg({one => $maxwait , msg => " $whodunit " });
6843
+
6844
+ $db -> {perf } .= sprintf q{ '%s'=%s;%s;%s} ,
6845
+ $whodunit ,
6846
+ $maxwait ,
6847
+ $warning ,
6848
+ $critical ;
6849
+
6850
+ my $m = msg(' pgb-maxwait-msg' , $maxwait );
6851
+ my $msg = sprintf ' %s (%s)' , $m , $whodunit ;
6852
+
6853
+ if (length $critical and $maxwait >= $critical ) {
6854
+ add_critical $msg ;
6855
+ }
6856
+ elsif (length $warning and $maxwait >= $warning ) {
6857
+ add_warning $msg ;
6858
+ }
6859
+ else {
6860
+ add_ok $msg ;
6861
+ }
6862
+ }
6863
+
6864
+ return ;
6865
+
6866
+
6867
+ } # # end of check_pgbouncer_maxwait
6868
+
6761
6869
sub check_pgbouncer_backends {
6762
6870
6763
6871
# # Check the number of connections to pgbouncer compared to
@@ -10504,6 +10612,30 @@ =head2 B<pgbouncer_checksum>
10504
10612
checksum must be provided as the C<--mrtg > argument. The fourth line always gives the
10505
10613
current checksum.
10506
10614
10615
+ =head2 B<pgbouncer_maxwait >
10616
+
10617
+ (C<symlink: check_postgres_pgbouncer_maxwait > ) Checks how long the first
10618
+ (oldest) client in the queue has been waiting, in seconds. If this starts
10619
+ increasing, then the current pool of servers does not handle requests quick
10620
+ enough. Reason may be either overloaded server or just too small of a
10621
+ pool_size setting in pbouncer config file. Databases can be filtered by use
10622
+ of the I<--include > and I<--exclude > options. See the L</"BASIC FILTERING">
10623
+ section for more details. The values or the I<--warning > and I<--critical >
10624
+ options are units of time, and must be provided (no default). Valid units are
10625
+ 'seconds', 'minutes', 'hours', or 'days'. Each may be written singular or
10626
+ abbreviated to just the first letter. If no units are given, the units are
10627
+ assumed to be seconds.
10628
+
10629
+ This action requires Postgres 8.3 or better.
10630
+
10631
+ Example 1: Give a critical if any transaction has been open for more than 10
10632
+ minutes:
10633
+
10634
+ check_postgres_pgbouncer_maxwait -p 6432 -u pgbouncer --critical='10 minutes'
10635
+
10636
+ For MRTG output, returns the maximum time in seconds a transaction has been
10637
+ open on the first line. The fourth line gives the name of the database.
10638
+
10507
10639
=head2 B<pgagent_jobs >
10508
10640
10509
10641
(C<symlink: check_postgres_pgagent_jobs > ) Checks that all the pgAgent jobs
0 commit comments