diff --git a/.github/workflows/mamonsu-tests-dev.yml b/.github/workflows/mamonsu-tests-dev.yml
index 0336b7c..2427a7d 100644
--- a/.github/workflows/mamonsu-tests-dev.yml
+++ b/.github/workflows/mamonsu-tests-dev.yml
@@ -86,7 +86,7 @@ jobs:
echo "zabbix_address=$(hostname -I | awk '{print $1}')" >> $GITHUB_OUTPUT
id: zabbix_address
- name: Edit Zabbix address in agent.conf
- run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.12.conf
+ run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.13.conf
- name: Copy test scripts to container
run: docker exec $( echo "${{ matrix.docker_os }}" | sed 's/://' | sed 's/\.//' ) mkdir -p -m 755 /mamonsu/
diff --git a/.github/workflows/mamonsu-tests-master.yml b/.github/workflows/mamonsu-tests-master.yml
index 6dfb86d..e14042c 100644
--- a/.github/workflows/mamonsu-tests-master.yml
+++ b/.github/workflows/mamonsu-tests-master.yml
@@ -91,7 +91,7 @@ jobs:
echo "zabbix_address=$(hostname -I | awk '{print $1}')" >> $GITHUB_OUTPUT
id: zabbix_address
- name: Edit Zabbix address in agent.conf
- run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.12.conf
+ run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.13.conf
- name: Copy test scripts to container
run: docker exec $( echo "${{ matrix.docker_os }}" | sed 's/://' | sed 's/\.//' ) mkdir -p -m 755 /mamonsu/
diff --git a/README.md b/README.md
index a4f3076..fd6b6bf 100644
--- a/README.md
+++ b/README.md
@@ -179,7 +179,7 @@ Pre-built _mamonsu_ packages are provided in official Postgres Pro repository: [
**Install from repository using script:**
```shell
-$ wget https://repo.postgrespro.ru/mamonsu/keys/pgpro-repo-add.sh
+$ wget https://repo.postgrespro.ru/mamonsu/mamonsu/keys/pgpro-repo-add.sh
$ sudo chmod 700 ./pgpro-repo-add.sh
$ sudo ./pgpro-repo-add.sh
```
diff --git a/documentation/metrics.md b/documentation/metrics.md
index 15ef4a1..d507a8e 100644
--- a/documentation/metrics.md
+++ b/documentation/metrics.md
@@ -3691,7 +3691,8 @@ Default config:
### Replication
Default config:
- lag_more_than_in_sec = 300
+ lag_more_than_in_sec = 300\
+ critical_bytes_held_by_non_active_slot = 1073741824 bytes
### Items
@@ -3763,6 +3764,37 @@ Default config:
*Non-active Replication Slots* calculates as count of slots with `false` active status.
+- **Bytes Held By Non-active Replication Slots**
+
+ Zabbix item:
+
+
+ Name |
+ PostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME} |
+
+
+ Key |
+ pgsql.replication.non_active_slots_discovery[] |
+
+
+ Type |
+ Numeric (float) |
+
+
+ Units |
+ Bytes |
+
+
+ Delta |
+ As Is |
+
+
+ Supported Version |
+ 10+ |
+
+
+
+ *Non-active Replication Slots* calculates as count of slots with `false` active status.
- **Streaming Replication Lag**
@@ -3861,12 +3893,40 @@ Default config:
+- **PostgreSQL Replication: Non-active Slots Discovery**
+
+ Items:
+
+
+ Name |
+ PostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME} |
+
+
+ Key |
+ pgsql.replication.non_active_slots_discovery[] |
+
+
+ Type |
+ Numeric (float) |
+
+
+ Units |
+ Bytes |
+
+
+ Delta |
+ As Is |
+
+
+
### Triggers
- **PostgreSQL Instance: server mode has been changed on {HOSTNAME} to {ITEM.LASTVALUE}**
- **PostgreSQL number of non-active replication slots on {HOSTNAME} (value={ITEM.LASTVALUE})**
-
+ Disabled by default
+- **PostgreSQL Replication: bytes held by slot {#NON_ACTIVE_SLOT_NAME} is too high (value={ITEM.LASTVALUE})**
+ Triggers if *PostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME}* exceeds `critical_bytes_held_by_non_active_slot`.
- **PostgreSQL streaming lag too high on {HOSTNAME} (value={ITEM.LASTVALUE})**
Triggers if *PostgreSQL Replication: Streaming Replication Lag* exceeds `lag_more_than_in_sec`.
diff --git a/github-actions-tests/mamonsu_build.sh b/github-actions-tests/mamonsu_build.sh
index a766806..6c24eb9 100644
--- a/github-actions-tests/mamonsu_build.sh
+++ b/github-actions-tests/mamonsu_build.sh
@@ -41,7 +41,7 @@ if [ "${OS%:*}" = "centos" ]; then
python3 setup.py build && python3 setup.py install
make rpm
sudo rpm -i ./mamonsu*.rpm
- cat /mamonsu/github-actions-tests/sources/agent_3.5.12.conf > /etc/mamonsu/agent.conf
+ cat /mamonsu/github-actions-tests/sources/agent_3.5.13.conf > /etc/mamonsu/agent.conf
# ensuring mamonsu can actually start
sudo su -s /bin/bash -c "mamonsu bootstrap -x --user postgres -d mamonsu_test_db" mamonsu
/etc/init.d/mamonsu restart
@@ -65,7 +65,7 @@ elif [ "${OS%:*}" = "ubuntu" ]; then
python3 setup.py build && python3 setup.py install
make deb
sudo dpkg -i ./mamonsu*.deb
- cat /mamonsu/github-actions-tests/sources/agent_3.5.12.conf > /etc/mamonsu/agent.conf
+ cat /mamonsu/github-actions-tests/sources/agent_3.5.13.conf > /etc/mamonsu/agent.conf
# ensuring mamonsu can actually start
sudo su -s /bin/bash -c "mamonsu bootstrap -x --user postgres -d mamonsu_test_db" mamonsu
service mamonsu restart
diff --git a/github-actions-tests/sources/agent_3.5.12.conf b/github-actions-tests/sources/agent_3.5.13.conf
similarity index 100%
rename from github-actions-tests/sources/agent_3.5.12.conf
rename to github-actions-tests/sources/agent_3.5.13.conf
diff --git a/mamonsu/__init__.py b/mamonsu/__init__.py
index b43c491..9264cb8 100644
--- a/mamonsu/__init__.py
+++ b/mamonsu/__init__.py
@@ -1,7 +1,7 @@
__author__ = 'Dmitry Vasilyev'
__author_email__ = 'info@postgrespro.ru'
__description__ = 'Monitoring agent for PostgreSQL'
-__version__ = '3.5.12'
+__version__ = '3.5.13'
__licence__ = 'BSD'
__url__ = 'https://github.com/postgrespro/mamonsu'
diff --git a/mamonsu/lib/default_config.py b/mamonsu/lib/default_config.py
index c7f2d98..12791a1 100644
--- a/mamonsu/lib/default_config.py
+++ b/mamonsu/lib/default_config.py
@@ -35,6 +35,8 @@ def default_host():
host = os.environ.get('PGHOST') or 'auto'
if platform.FREEBSD:
host = os.environ.get('PGHOST') or 'auto'
+ if platform.DARWIN:
+ host = os.environ.get('PGHOST') or 'auto'
return host
@staticmethod
diff --git a/mamonsu/lib/platform.py b/mamonsu/lib/platform.py
index 5ea5faa..279200d 100644
--- a/mamonsu/lib/platform.py
+++ b/mamonsu/lib/platform.py
@@ -3,5 +3,6 @@
LINUX = (sys.platform == 'linux' or sys.platform == 'linux2')
WINDOWS = (sys.platform == 'win32' or sys.platform == 'win64')
FREEBSD = ('freebsd' in sys.platform)
+DARWIN = sys.platform == 'darwin'
UNIX = LINUX or FREEBSD
INTEGER_TYPES = int,
diff --git a/mamonsu/lib/queue.py b/mamonsu/lib/queue.py
index 96ceadf..e348fc4 100644
--- a/mamonsu/lib/queue.py
+++ b/mamonsu/lib/queue.py
@@ -10,25 +10,21 @@ def __init__(self):
self.lock = threading.Lock()
def add(self, metric):
- self.lock.acquire()
- self.queue.insert(0, metric)
- self.lock.release()
+ with self.lock:
+ self.queue.insert(0, metric)
# replace last metric
def replace(self, metric):
- self.lock.acquire()
- self.queue.pop()
- self.queue.append(metric)
- self.lock.release()
+ with self.lock:
+ if self.queue:
+ self.queue.pop()
+ self.queue.append(metric)
def size(self):
- self.lock.acquire()
- result = len(self.queue)
- self.lock.release()
- return result
+ with self.lock:
+ return len(self.queue)
def flush(self):
- self.lock.acquire()
- result, self.queue = self.queue, []
- self.lock.release()
- return result
+ with self.lock:
+ result, self.queue = self.queue, []
+ return result
diff --git a/mamonsu/plugins/pgsql/driver/pool.py b/mamonsu/plugins/pgsql/driver/pool.py
index 6576f92..a8433d9 100644
--- a/mamonsu/plugins/pgsql/driver/pool.py
+++ b/mamonsu/plugins/pgsql/driver/pool.py
@@ -86,7 +86,7 @@ class Pool(object):
"""
SELECT application_name,
{0}
- coalesce((pg_{1}_{2}_diff(pg_current_{1}_{2}(), replay_lsn))::int, 0) AS total_lag
+ coalesce((pg_{1}_{2}_diff(pg_current_{1}_{2}(), replay_{2}))::int, 0) AS total_lag
FROM pg_stat_replication;
""",
"""
@@ -95,6 +95,30 @@ class Pool(object):
total_lag
FROM mamonsu.count_{1}_lag_lsn();
"""
+ ),
+ "wal_held_bytes_master": (
+ """
+ SELECT slot_name,
+ coalesce((pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn))::int, 0) AS wal_held_bytes
+ FROM pg_replication_slots;
+ """,
+ """
+ SELECT slot_name,
+ wal_held_bytes
+ FROM mamonsu.bytes_held_by_inactive_slot_on_master();
+ """
+ ),
+ "wal_held_bytes_replica": (
+ """
+ SELECT slot_name,
+ coalesce((pg_wal_lsn_diff(pg_last_wal_replay_lsn(), restart_lsn))::int, 0) AS wal_held_bytes
+ FROM pg_replication_slots;
+ """,
+ """
+ SELECT slot_name,
+ wal_held_bytes
+ FROM mamonsu.bytes_held_by_inactive_slot_on_replica();
+ """
)
}
diff --git a/mamonsu/plugins/pgsql/replication.py b/mamonsu/plugins/pgsql/replication.py
index 8a51889..7ed701c 100644
--- a/mamonsu/plugins/pgsql/replication.py
+++ b/mamonsu/plugins/pgsql/replication.py
@@ -13,7 +13,8 @@ class Replication(Plugin):
AgentPluginType = "pg"
# key: (macro, value)
plugin_macros = {
- "critical_lag_seconds": [("macro", "{$CRITICAL_LAG_SECONDS}"), ("value", 60 * 5)]
+ "critical_lag_seconds": [("macro", "{$CRITICAL_LAG_SECONDS}"), ("value", 60 * 5)],
+ "critical_bytes_held_by_none_active_slot": [("macro", "{$CRITICAL_BYTES_HELD_BY_NON_ACTIVE_SLOT}"), ("value", 1024 * 1024 * 1024)]
}
# get time of replication lag
@@ -30,8 +31,15 @@ class Replication(Plugin):
WHERE active = 'false';
"""
+ query_bytes_held_by_non_active_slot = """
+ SELECT slot_name, coalesce(pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::bigint, 0) AS wal_size_bytes
+ FROM pg_replication_slots
+ WHERE active = 'false';
+ """
+
# for discovery rule for name of each replica
key_lsn_replication_discovery = "pgsql.replication.discovery{0}"
+ key_replication_non_active_slots_discovery = "pgsql.replication.non_active_slots_discovery{0}"
key_total_lag = "pgsql.replication.total_lag{0}"
# for PG 10 and higher
key_flush = "pgsql.replication.flush_lag{0}"
@@ -42,6 +50,7 @@ class Replication(Plugin):
key_replication = "pgsql.replication_lag{0}"
key_non_active_slots = "pgsql.replication.non_active_slots{0}"
+ key_non_active_slots_held_bytes = "pgsql.replication.non_active_slots_held_bytes{0}"
def run(self, zbx):
@@ -79,6 +88,14 @@ def run(self, zbx):
zbx.send("pgsql.replication.replay_lag[{0}]".format(info[0]), float(info[5]))
zbx.send("pgsql.replication.discovery[]", zbx.json({"data": lags}))
del lags
+ bytes_held_by_non_active_slot = Pooler.run_sql_type("wal_held_bytes_master", args=[])
+ if bytes_held_by_non_active_slot:
+ discovery = []
+ for info in bytes_held_by_non_active_slot:
+ discovery.append({"{#NON_ACTIVE_SLOT_NAME}": info[0]})
+ zbx.send("pgsql.replication.non_active_slots_held_bytes[{0}]".format(info[0]), int(info[1]))
+ zbx.send("pgsql.replication.non_active_slots_discovery[]", zbx.json({"data": discovery}))
+ del discovery
elif Pooler.is_superuser() or Pooler.is_bootstraped():
result_lags = Pooler.run_sql_type("wal_lag_lsn", args=[" ", "xlog", "location"])
if result_lags:
@@ -90,7 +107,15 @@ def run(self, zbx):
del lags
else:
self.disable_and_exit_if_not_superuser()
-
+ else:
+ bytes_held_by_non_active_slot = Pooler.run_sql_type("wal_held_bytes_replica", args=[])
+ if bytes_held_by_non_active_slot:
+ discovery = []
+ for info in bytes_held_by_non_active_slot:
+ discovery.append({"{#NON_ACTIVE_SLOT_NAME}": info[0]})
+ zbx.send("pgsql.replication.non_active_slots_held_bytes[{0}]".format(info[0]), int(info[1]))
+ zbx.send("pgsql.replication.non_active_slots_discovery[]", zbx.json({"data": discovery}))
+ del discovery
non_active_slots = Pooler.query(self.query_non_active_slots)
zbx.send(self.key_non_active_slots.format("[]"), int(non_active_slots[0][0]))
@@ -132,7 +157,8 @@ def triggers(self, template, dashboard=False):
}) + template.trigger({
"name": "PostgreSQL Replication: number of non-active replication slots on {HOSTNAME} (value={ITEM.LASTVALUE})",
"expression": "{#TEMPLATE:" + self.right_type(self.key_non_active_slots) + ".last()}>" + str(
- NUMBER_NON_ACTIVE_SLOTS)
+ NUMBER_NON_ACTIVE_SLOTS),
+ "status": 1
})
return triggers
@@ -198,7 +224,42 @@ def discovery_rules(self, template, dashboard=False):
]
}
]
- return template.discovery_rule(rule=rule, conditions=conditions, items=items, graphs=graphs)
+ active_slots_discovery_rule = template.discovery_rule(rule=rule, conditions=conditions, items=items, graphs=graphs)
+
+ rule = {
+ "name": "PostgreSQL Replication: Non Active Slots Discovery",
+ "key": self.key_replication_non_active_slots_discovery.format("[{0}]".format(self.Macros[self.Type]))
+ }
+ if Plugin.old_zabbix:
+ conditions = []
+ rule["filter"] = "{#NON_ACTIVE_SLOT_NAME}:.*"
+ else:
+ conditions = [{
+ "condition": [
+ {"macro": "{#NON_ACTIVE_SLOT_NAME}",
+ "value": ".*",
+ "operator": 8,
+ "formulaid": "A"}
+ ]
+ }]
+ items = [
+ {"key": self.right_type(self.key_non_active_slots_held_bytes, var_discovery="{#NON_ACTIVE_SLOT_NAME},"),
+ "name": "PostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME}",
+ "value_type": Plugin.VALUE_TYPE.numeric_float,
+ "delay": self.plugin_config("interval"),
+ "drawtype": 2}
+ ]
+ graphs = []
+ triggers = [
+ {
+ "name": "PostgreSQL Replication: bytes held by slot {#NON_ACTIVE_SLOT_NAME} is too high (value={ITEM.LASTVALUE})",
+ "expression": "{#TEMPLATE:" + self.right_type(self.key_non_active_slots_held_bytes, var_discovery="{#NON_ACTIVE_SLOT_NAME},") + ".last()}>" +
+ self.plugin_macros["critical_bytes_held_by_none_active_slot"][0][1]
+ }
+ ]
+ non_active_slots_discovery_rule = template.discovery_rule(rule=rule, conditions=conditions, items=items, graphs=graphs, triggers=triggers)
+
+ return active_slots_discovery_rule + non_active_slots_discovery_rule
def keys_and_queries(self, template_zabbix):
result = []
diff --git a/mamonsu/plugins/system/linux/disk_sizes.py b/mamonsu/plugins/system/linux/disk_sizes.py
index 898c2c0..d461812 100644
--- a/mamonsu/plugins/system/linux/disk_sizes.py
+++ b/mamonsu/plugins/system/linux/disk_sizes.py
@@ -20,7 +20,7 @@ class DiskSizes(Plugin):
ExcludeFsTypes = [
"none", "unknown", "rootfs", "iso9660", "squashfs", "udf", "romfs", "ramfs", "debugfs", "cgroup", "cgroup_root",
- "pstore", "devtmpfs", "autofs", "cgroup", "configfs", "devpts", "efivarfs", "fusectl", "fuse.gvfsd-fuse",
+ "pstore", "devtmpfs", "autofs", "cgroup2", "configfs", "devpts", "efivarfs", "fusectl", "fuse.gvfsd-fuse",
"hugetlbfs", "mqueue", "binfmt_misc", "nfsd", "proc", "pstore", "selinuxfs", "rpc_pipefs", "securityfs",
"sysfs", "nsfs", "tmpfs", "tracefs"
]
diff --git a/mamonsu/tools/bootstrap/sql.py b/mamonsu/tools/bootstrap/sql.py
index f37be0f..bf99442 100644
--- a/mamonsu/tools/bootstrap/sql.py
+++ b/mamonsu/tools/bootstrap/sql.py
@@ -236,6 +236,23 @@
coalesce((pg_{7}_diff(pg_current_{7}(), replay_{9}))::bigint, 0) AS total_lag
FROM pg_stat_replication
$$ LANGUAGE SQL SECURITY DEFINER;
+
+DROP FUNCTION IF EXISTS mamonsu.bytes_held_by_inactive_slot_on_master();
+CREATE OR REPLACE FUNCTION mamonsu.bytes_held_by_inactive_slot_on_master()
+RETURNS TABLE(slot_name TEXT, wal_held_bytes BIGINT) AS $$
+SELECT slot_name::TEXT, coalesce((pg_{7}_diff(pg_current_wal_lsn(), restart_lsn))::bigint, 0) AS wal_held_bytes
+FROM pg_replication_slots
+WHERE active = 'false'
+$$ LANGUAGE SQL SECURITY DEFINER;
+
+DROP FUNCTION IF EXISTS mamonsu.bytes_held_by_inactive_slot_on_replica();
+CREATE OR REPLACE FUNCTION mamonsu.bytes_held_by_inactive_slot_on_replica()
+RETURNS TABLE(slot_name TEXT, wal_held_bytes BIGINT) AS $$
+SELECT slot_name::TEXT, coalesce((pg_{7}_diff(pg_last_wal_replay_lsn(), restart_lsn))::bigint, 0) AS wal_held_bytes
+FROM pg_replication_slots
+WHERE active = 'false'
+$$ LANGUAGE SQL SECURITY DEFINER;
+
"""
CreatePgBuffercacheFunctionsSQL = """
diff --git a/packaging/debian/changelog b/packaging/debian/changelog
index 218931a..6efa097 100644
--- a/packaging/debian/changelog
+++ b/packaging/debian/changelog
@@ -1,3 +1,9 @@
+mamonsu (3.5.13-1) stable; urgency=low
+ * Added a new metric that displays the bytes held by non-active replication slots, along with the corresponding trigger.;
+ * Set the trigger for 'number of non-active replication slots' to be disabled by default.;
+ * Fixed the Linux plugin to ensure compatibility with recent Linux versions that use cgroups2.;
+ * Resolved a deadlock issue in the send queue that caused Mamonsu to hang after network problems.;
+
mamonsu (3.5.12-1) stable; urgency=low
* Port version parser code from public archive of pypa/pkg_resources;
* Thread-safe implementation of connection cache;
diff --git a/packaging/rpm/SPECS/mamonsu.spec b/packaging/rpm/SPECS/mamonsu.spec
index dcc7c9f..dcfd2bd 100644
--- a/packaging/rpm/SPECS/mamonsu.spec
+++ b/packaging/rpm/SPECS/mamonsu.spec
@@ -1,5 +1,5 @@
Name: mamonsu
-Version: 3.5.12
+Version: 3.5.13
Release: 1%{?dist}
Summary: Monitoring agent for PostgreSQL
Group: Applications/Internet
@@ -73,6 +73,12 @@ chown -R mamonsu:mamonsu /var/log/mamonsu
chown -R mamonsu:mamonsu /etc/mamonsu
%changelog
+* Thu May 29 2025 Andrey Papsuyko - 3.5.13-1
+ - Added a new metric that displays the bytes held by non-active replication slots, along with the corresponding trigger.;
+ - Set the trigger for 'number of non-active replication slots' to be disabled by default.;
+ - Fixed the Linux plugin to ensure compatibility with recent Linux versions that use cgroups2.;
+ - Resolved a deadlock issue in the send queue that caused Mamonsu to hang after network problems.;
+
* Wed Mar 5 2025 Maxim Styushin - 3.5.12-1
- Port version parser code from public archive of pypa/pkg_resources;
- Thread-safe implementation of connection cache;
diff --git a/packaging/win/mamonsu.def.nsh b/packaging/win/mamonsu.def.nsh
index 1b60f1c..5afbfdc 100644
--- a/packaging/win/mamonsu.def.nsh
+++ b/packaging/win/mamonsu.def.nsh
@@ -1,5 +1,5 @@
!define NAME Mamonsu
-!define VERSION 3.5.12
+!define VERSION 3.5.13
!define MAMONSU_REG_PATH "Software\PostgresPro\Mamonsu"
!define MAMONSU_REG_UNINSTALLER_PATH "Software\Microsoft\Windows\CurrentVersion\Uninstall"
!define EDB_REG "SOFTWARE\Postgresql"
pFad - Phonifier reborn
Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy