diff --git a/.github/workflows/mamonsu-tests-dev.yml b/.github/workflows/mamonsu-tests-dev.yml index 0336b7c..2427a7d 100644 --- a/.github/workflows/mamonsu-tests-dev.yml +++ b/.github/workflows/mamonsu-tests-dev.yml @@ -86,7 +86,7 @@ jobs: echo "zabbix_address=$(hostname -I | awk '{print $1}')" >> $GITHUB_OUTPUT id: zabbix_address - name: Edit Zabbix address in agent.conf - run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.12.conf + run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.13.conf - name: Copy test scripts to container run: docker exec $( echo "${{ matrix.docker_os }}" | sed 's/://' | sed 's/\.//' ) mkdir -p -m 755 /mamonsu/ diff --git a/.github/workflows/mamonsu-tests-master.yml b/.github/workflows/mamonsu-tests-master.yml index 6dfb86d..e14042c 100644 --- a/.github/workflows/mamonsu-tests-master.yml +++ b/.github/workflows/mamonsu-tests-master.yml @@ -91,7 +91,7 @@ jobs: echo "zabbix_address=$(hostname -I | awk '{print $1}')" >> $GITHUB_OUTPUT id: zabbix_address - name: Edit Zabbix address in agent.conf - run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.12.conf + run: sed -i "s/\(address *= *\).*/\1 ${{ steps.zabbix_address.outputs.zabbix_address }}/" ${{ env.MAMONSU_PATH }}/github-actions-tests/sources/agent_3.5.13.conf - name: Copy test scripts to container run: docker exec $( echo "${{ matrix.docker_os }}" | sed 's/://' | sed 's/\.//' ) mkdir -p -m 755 /mamonsu/ diff --git a/README.md b/README.md index a4f3076..fd6b6bf 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ Pre-built _mamonsu_ packages are provided in official Postgres Pro repository: [ **Install from repository using script:** ```shell -$ wget https://repo.postgrespro.ru/mamonsu/keys/pgpro-repo-add.sh +$ wget https://repo.postgrespro.ru/mamonsu/mamonsu/keys/pgpro-repo-add.sh $ sudo chmod 700 ./pgpro-repo-add.sh $ sudo ./pgpro-repo-add.sh ``` diff --git a/documentation/metrics.md b/documentation/metrics.md index 15ef4a1..d507a8e 100644 --- a/documentation/metrics.md +++ b/documentation/metrics.md @@ -3691,7 +3691,8 @@ Default config: ### Replication Default config: -        lag_more_than_in_sec = 300 +        lag_more_than_in_sec = 300\ +        critical_bytes_held_by_non_active_slot = 1073741824 bytes ### Items @@ -3763,6 +3764,37 @@ Default config: *Non-active Replication Slots* calculates as count of slots with `false` active status. +- **Bytes Held By Non-active Replication Slots** + + Zabbix item: + + + + + + + + + + + + + + + + + + + + + + + + + +
NamePostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME}
Keypgsql.replication.non_active_slots_discovery[]
TypeNumeric (float)
UnitsBytes
DeltaAs Is
Supported Version10+
+ + *Non-active Replication Slots* calculates as count of slots with `false` active status. - **Streaming Replication Lag** @@ -3861,12 +3893,40 @@ Default config: +- **PostgreSQL Replication: Non-active Slots Discovery** + + Items: + + + + + + + + + + + + + + + + + + + + + +
NamePostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME}
Keypgsql.replication.non_active_slots_discovery[]
TypeNumeric (float)
UnitsBytes
DeltaAs Is
+ ### Triggers - **PostgreSQL Instance: server mode has been changed on {HOSTNAME} to {ITEM.LASTVALUE}** - **PostgreSQL number of non-active replication slots on {HOSTNAME} (value={ITEM.LASTVALUE})** - + Disabled by default +- **PostgreSQL Replication: bytes held by slot {#NON_ACTIVE_SLOT_NAME} is too high (value={ITEM.LASTVALUE})** + Triggers if *PostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME}* exceeds `critical_bytes_held_by_non_active_slot`. - **PostgreSQL streaming lag too high on {HOSTNAME} (value={ITEM.LASTVALUE})** Triggers if *PostgreSQL Replication: Streaming Replication Lag* exceeds `lag_more_than_in_sec`. diff --git a/github-actions-tests/mamonsu_build.sh b/github-actions-tests/mamonsu_build.sh index a766806..6c24eb9 100644 --- a/github-actions-tests/mamonsu_build.sh +++ b/github-actions-tests/mamonsu_build.sh @@ -41,7 +41,7 @@ if [ "${OS%:*}" = "centos" ]; then python3 setup.py build && python3 setup.py install make rpm sudo rpm -i ./mamonsu*.rpm - cat /mamonsu/github-actions-tests/sources/agent_3.5.12.conf > /etc/mamonsu/agent.conf + cat /mamonsu/github-actions-tests/sources/agent_3.5.13.conf > /etc/mamonsu/agent.conf # ensuring mamonsu can actually start sudo su -s /bin/bash -c "mamonsu bootstrap -x --user postgres -d mamonsu_test_db" mamonsu /etc/init.d/mamonsu restart @@ -65,7 +65,7 @@ elif [ "${OS%:*}" = "ubuntu" ]; then python3 setup.py build && python3 setup.py install make deb sudo dpkg -i ./mamonsu*.deb - cat /mamonsu/github-actions-tests/sources/agent_3.5.12.conf > /etc/mamonsu/agent.conf + cat /mamonsu/github-actions-tests/sources/agent_3.5.13.conf > /etc/mamonsu/agent.conf # ensuring mamonsu can actually start sudo su -s /bin/bash -c "mamonsu bootstrap -x --user postgres -d mamonsu_test_db" mamonsu service mamonsu restart diff --git a/github-actions-tests/sources/agent_3.5.12.conf b/github-actions-tests/sources/agent_3.5.13.conf similarity index 100% rename from github-actions-tests/sources/agent_3.5.12.conf rename to github-actions-tests/sources/agent_3.5.13.conf diff --git a/mamonsu/__init__.py b/mamonsu/__init__.py index b43c491..9264cb8 100644 --- a/mamonsu/__init__.py +++ b/mamonsu/__init__.py @@ -1,7 +1,7 @@ __author__ = 'Dmitry Vasilyev' __author_email__ = 'info@postgrespro.ru' __description__ = 'Monitoring agent for PostgreSQL' -__version__ = '3.5.12' +__version__ = '3.5.13' __licence__ = 'BSD' __url__ = 'https://github.com/postgrespro/mamonsu' diff --git a/mamonsu/lib/default_config.py b/mamonsu/lib/default_config.py index c7f2d98..12791a1 100644 --- a/mamonsu/lib/default_config.py +++ b/mamonsu/lib/default_config.py @@ -35,6 +35,8 @@ def default_host(): host = os.environ.get('PGHOST') or 'auto' if platform.FREEBSD: host = os.environ.get('PGHOST') or 'auto' + if platform.DARWIN: + host = os.environ.get('PGHOST') or 'auto' return host @staticmethod diff --git a/mamonsu/lib/platform.py b/mamonsu/lib/platform.py index 5ea5faa..279200d 100644 --- a/mamonsu/lib/platform.py +++ b/mamonsu/lib/platform.py @@ -3,5 +3,6 @@ LINUX = (sys.platform == 'linux' or sys.platform == 'linux2') WINDOWS = (sys.platform == 'win32' or sys.platform == 'win64') FREEBSD = ('freebsd' in sys.platform) +DARWIN = sys.platform == 'darwin' UNIX = LINUX or FREEBSD INTEGER_TYPES = int, diff --git a/mamonsu/lib/queue.py b/mamonsu/lib/queue.py index 96ceadf..e348fc4 100644 --- a/mamonsu/lib/queue.py +++ b/mamonsu/lib/queue.py @@ -10,25 +10,21 @@ def __init__(self): self.lock = threading.Lock() def add(self, metric): - self.lock.acquire() - self.queue.insert(0, metric) - self.lock.release() + with self.lock: + self.queue.insert(0, metric) # replace last metric def replace(self, metric): - self.lock.acquire() - self.queue.pop() - self.queue.append(metric) - self.lock.release() + with self.lock: + if self.queue: + self.queue.pop() + self.queue.append(metric) def size(self): - self.lock.acquire() - result = len(self.queue) - self.lock.release() - return result + with self.lock: + return len(self.queue) def flush(self): - self.lock.acquire() - result, self.queue = self.queue, [] - self.lock.release() - return result + with self.lock: + result, self.queue = self.queue, [] + return result diff --git a/mamonsu/plugins/pgsql/driver/pool.py b/mamonsu/plugins/pgsql/driver/pool.py index 6576f92..a8433d9 100644 --- a/mamonsu/plugins/pgsql/driver/pool.py +++ b/mamonsu/plugins/pgsql/driver/pool.py @@ -86,7 +86,7 @@ class Pool(object): """ SELECT application_name, {0} - coalesce((pg_{1}_{2}_diff(pg_current_{1}_{2}(), replay_lsn))::int, 0) AS total_lag + coalesce((pg_{1}_{2}_diff(pg_current_{1}_{2}(), replay_{2}))::int, 0) AS total_lag FROM pg_stat_replication; """, """ @@ -95,6 +95,30 @@ class Pool(object): total_lag FROM mamonsu.count_{1}_lag_lsn(); """ + ), + "wal_held_bytes_master": ( + """ + SELECT slot_name, + coalesce((pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn))::int, 0) AS wal_held_bytes + FROM pg_replication_slots; + """, + """ + SELECT slot_name, + wal_held_bytes + FROM mamonsu.bytes_held_by_inactive_slot_on_master(); + """ + ), + "wal_held_bytes_replica": ( + """ + SELECT slot_name, + coalesce((pg_wal_lsn_diff(pg_last_wal_replay_lsn(), restart_lsn))::int, 0) AS wal_held_bytes + FROM pg_replication_slots; + """, + """ + SELECT slot_name, + wal_held_bytes + FROM mamonsu.bytes_held_by_inactive_slot_on_replica(); + """ ) } diff --git a/mamonsu/plugins/pgsql/replication.py b/mamonsu/plugins/pgsql/replication.py index 8a51889..7ed701c 100644 --- a/mamonsu/plugins/pgsql/replication.py +++ b/mamonsu/plugins/pgsql/replication.py @@ -13,7 +13,8 @@ class Replication(Plugin): AgentPluginType = "pg" # key: (macro, value) plugin_macros = { - "critical_lag_seconds": [("macro", "{$CRITICAL_LAG_SECONDS}"), ("value", 60 * 5)] + "critical_lag_seconds": [("macro", "{$CRITICAL_LAG_SECONDS}"), ("value", 60 * 5)], + "critical_bytes_held_by_none_active_slot": [("macro", "{$CRITICAL_BYTES_HELD_BY_NON_ACTIVE_SLOT}"), ("value", 1024 * 1024 * 1024)] } # get time of replication lag @@ -30,8 +31,15 @@ class Replication(Plugin): WHERE active = 'false'; """ + query_bytes_held_by_non_active_slot = """ + SELECT slot_name, coalesce(pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::bigint, 0) AS wal_size_bytes + FROM pg_replication_slots + WHERE active = 'false'; + """ + # for discovery rule for name of each replica key_lsn_replication_discovery = "pgsql.replication.discovery{0}" + key_replication_non_active_slots_discovery = "pgsql.replication.non_active_slots_discovery{0}" key_total_lag = "pgsql.replication.total_lag{0}" # for PG 10 and higher key_flush = "pgsql.replication.flush_lag{0}" @@ -42,6 +50,7 @@ class Replication(Plugin): key_replication = "pgsql.replication_lag{0}" key_non_active_slots = "pgsql.replication.non_active_slots{0}" + key_non_active_slots_held_bytes = "pgsql.replication.non_active_slots_held_bytes{0}" def run(self, zbx): @@ -79,6 +88,14 @@ def run(self, zbx): zbx.send("pgsql.replication.replay_lag[{0}]".format(info[0]), float(info[5])) zbx.send("pgsql.replication.discovery[]", zbx.json({"data": lags})) del lags + bytes_held_by_non_active_slot = Pooler.run_sql_type("wal_held_bytes_master", args=[]) + if bytes_held_by_non_active_slot: + discovery = [] + for info in bytes_held_by_non_active_slot: + discovery.append({"{#NON_ACTIVE_SLOT_NAME}": info[0]}) + zbx.send("pgsql.replication.non_active_slots_held_bytes[{0}]".format(info[0]), int(info[1])) + zbx.send("pgsql.replication.non_active_slots_discovery[]", zbx.json({"data": discovery})) + del discovery elif Pooler.is_superuser() or Pooler.is_bootstraped(): result_lags = Pooler.run_sql_type("wal_lag_lsn", args=[" ", "xlog", "location"]) if result_lags: @@ -90,7 +107,15 @@ def run(self, zbx): del lags else: self.disable_and_exit_if_not_superuser() - + else: + bytes_held_by_non_active_slot = Pooler.run_sql_type("wal_held_bytes_replica", args=[]) + if bytes_held_by_non_active_slot: + discovery = [] + for info in bytes_held_by_non_active_slot: + discovery.append({"{#NON_ACTIVE_SLOT_NAME}": info[0]}) + zbx.send("pgsql.replication.non_active_slots_held_bytes[{0}]".format(info[0]), int(info[1])) + zbx.send("pgsql.replication.non_active_slots_discovery[]", zbx.json({"data": discovery})) + del discovery non_active_slots = Pooler.query(self.query_non_active_slots) zbx.send(self.key_non_active_slots.format("[]"), int(non_active_slots[0][0])) @@ -132,7 +157,8 @@ def triggers(self, template, dashboard=False): }) + template.trigger({ "name": "PostgreSQL Replication: number of non-active replication slots on {HOSTNAME} (value={ITEM.LASTVALUE})", "expression": "{#TEMPLATE:" + self.right_type(self.key_non_active_slots) + ".last()}>" + str( - NUMBER_NON_ACTIVE_SLOTS) + NUMBER_NON_ACTIVE_SLOTS), + "status": 1 }) return triggers @@ -198,7 +224,42 @@ def discovery_rules(self, template, dashboard=False): ] } ] - return template.discovery_rule(rule=rule, conditions=conditions, items=items, graphs=graphs) + active_slots_discovery_rule = template.discovery_rule(rule=rule, conditions=conditions, items=items, graphs=graphs) + + rule = { + "name": "PostgreSQL Replication: Non Active Slots Discovery", + "key": self.key_replication_non_active_slots_discovery.format("[{0}]".format(self.Macros[self.Type])) + } + if Plugin.old_zabbix: + conditions = [] + rule["filter"] = "{#NON_ACTIVE_SLOT_NAME}:.*" + else: + conditions = [{ + "condition": [ + {"macro": "{#NON_ACTIVE_SLOT_NAME}", + "value": ".*", + "operator": 8, + "formulaid": "A"} + ] + }] + items = [ + {"key": self.right_type(self.key_non_active_slots_held_bytes, var_discovery="{#NON_ACTIVE_SLOT_NAME},"), + "name": "PostgreSQL Replication: Bytes held by non-active slot {#NON_ACTIVE_SLOT_NAME}", + "value_type": Plugin.VALUE_TYPE.numeric_float, + "delay": self.plugin_config("interval"), + "drawtype": 2} + ] + graphs = [] + triggers = [ + { + "name": "PostgreSQL Replication: bytes held by slot {#NON_ACTIVE_SLOT_NAME} is too high (value={ITEM.LASTVALUE})", + "expression": "{#TEMPLATE:" + self.right_type(self.key_non_active_slots_held_bytes, var_discovery="{#NON_ACTIVE_SLOT_NAME},") + ".last()}>" + + self.plugin_macros["critical_bytes_held_by_none_active_slot"][0][1] + } + ] + non_active_slots_discovery_rule = template.discovery_rule(rule=rule, conditions=conditions, items=items, graphs=graphs, triggers=triggers) + + return active_slots_discovery_rule + non_active_slots_discovery_rule def keys_and_queries(self, template_zabbix): result = [] diff --git a/mamonsu/plugins/system/linux/disk_sizes.py b/mamonsu/plugins/system/linux/disk_sizes.py index 898c2c0..d461812 100644 --- a/mamonsu/plugins/system/linux/disk_sizes.py +++ b/mamonsu/plugins/system/linux/disk_sizes.py @@ -20,7 +20,7 @@ class DiskSizes(Plugin): ExcludeFsTypes = [ "none", "unknown", "rootfs", "iso9660", "squashfs", "udf", "romfs", "ramfs", "debugfs", "cgroup", "cgroup_root", - "pstore", "devtmpfs", "autofs", "cgroup", "configfs", "devpts", "efivarfs", "fusectl", "fuse.gvfsd-fuse", + "pstore", "devtmpfs", "autofs", "cgroup2", "configfs", "devpts", "efivarfs", "fusectl", "fuse.gvfsd-fuse", "hugetlbfs", "mqueue", "binfmt_misc", "nfsd", "proc", "pstore", "selinuxfs", "rpc_pipefs", "securityfs", "sysfs", "nsfs", "tmpfs", "tracefs" ] diff --git a/mamonsu/tools/bootstrap/sql.py b/mamonsu/tools/bootstrap/sql.py index f37be0f..bf99442 100644 --- a/mamonsu/tools/bootstrap/sql.py +++ b/mamonsu/tools/bootstrap/sql.py @@ -236,6 +236,23 @@ coalesce((pg_{7}_diff(pg_current_{7}(), replay_{9}))::bigint, 0) AS total_lag FROM pg_stat_replication $$ LANGUAGE SQL SECURITY DEFINER; + +DROP FUNCTION IF EXISTS mamonsu.bytes_held_by_inactive_slot_on_master(); +CREATE OR REPLACE FUNCTION mamonsu.bytes_held_by_inactive_slot_on_master() +RETURNS TABLE(slot_name TEXT, wal_held_bytes BIGINT) AS $$ +SELECT slot_name::TEXT, coalesce((pg_{7}_diff(pg_current_wal_lsn(), restart_lsn))::bigint, 0) AS wal_held_bytes +FROM pg_replication_slots +WHERE active = 'false' +$$ LANGUAGE SQL SECURITY DEFINER; + +DROP FUNCTION IF EXISTS mamonsu.bytes_held_by_inactive_slot_on_replica(); +CREATE OR REPLACE FUNCTION mamonsu.bytes_held_by_inactive_slot_on_replica() +RETURNS TABLE(slot_name TEXT, wal_held_bytes BIGINT) AS $$ +SELECT slot_name::TEXT, coalesce((pg_{7}_diff(pg_last_wal_replay_lsn(), restart_lsn))::bigint, 0) AS wal_held_bytes +FROM pg_replication_slots +WHERE active = 'false' +$$ LANGUAGE SQL SECURITY DEFINER; + """ CreatePgBuffercacheFunctionsSQL = """ diff --git a/packaging/debian/changelog b/packaging/debian/changelog index 218931a..6efa097 100644 --- a/packaging/debian/changelog +++ b/packaging/debian/changelog @@ -1,3 +1,9 @@ +mamonsu (3.5.13-1) stable; urgency=low + * Added a new metric that displays the bytes held by non-active replication slots, along with the corresponding trigger.; + * Set the trigger for 'number of non-active replication slots' to be disabled by default.; + * Fixed the Linux plugin to ensure compatibility with recent Linux versions that use cgroups2.; + * Resolved a deadlock issue in the send queue that caused Mamonsu to hang after network problems.; + mamonsu (3.5.12-1) stable; urgency=low * Port version parser code from public archive of pypa/pkg_resources; * Thread-safe implementation of connection cache; diff --git a/packaging/rpm/SPECS/mamonsu.spec b/packaging/rpm/SPECS/mamonsu.spec index dcc7c9f..dcfd2bd 100644 --- a/packaging/rpm/SPECS/mamonsu.spec +++ b/packaging/rpm/SPECS/mamonsu.spec @@ -1,5 +1,5 @@ Name: mamonsu -Version: 3.5.12 +Version: 3.5.13 Release: 1%{?dist} Summary: Monitoring agent for PostgreSQL Group: Applications/Internet @@ -73,6 +73,12 @@ chown -R mamonsu:mamonsu /var/log/mamonsu chown -R mamonsu:mamonsu /etc/mamonsu %changelog +* Thu May 29 2025 Andrey Papsuyko - 3.5.13-1 + - Added a new metric that displays the bytes held by non-active replication slots, along with the corresponding trigger.; + - Set the trigger for 'number of non-active replication slots' to be disabled by default.; + - Fixed the Linux plugin to ensure compatibility with recent Linux versions that use cgroups2.; + - Resolved a deadlock issue in the send queue that caused Mamonsu to hang after network problems.; + * Wed Mar 5 2025 Maxim Styushin - 3.5.12-1 - Port version parser code from public archive of pypa/pkg_resources; - Thread-safe implementation of connection cache; diff --git a/packaging/win/mamonsu.def.nsh b/packaging/win/mamonsu.def.nsh index 1b60f1c..5afbfdc 100644 --- a/packaging/win/mamonsu.def.nsh +++ b/packaging/win/mamonsu.def.nsh @@ -1,5 +1,5 @@ !define NAME Mamonsu -!define VERSION 3.5.12 +!define VERSION 3.5.13 !define MAMONSU_REG_PATH "Software\PostgresPro\Mamonsu" !define MAMONSU_REG_UNINSTALLER_PATH "Software\Microsoft\Windows\CurrentVersion\Uninstall" !define EDB_REG "SOFTWARE\Postgresql" pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy