[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[ansible-taler-exchange] branch master updated: tighten alert rules
From: |
Admin |
Subject: |
[ansible-taler-exchange] branch master updated: tighten alert rules |
Date: |
Tue, 03 Jun 2025 01:07:46 +0200 |
This is an automated email from the git hooks/post-receive script.
grothoff pushed a commit to branch master
in repository ansible-taler-exchange.
The following commit(s) were added to refs/heads/master by this push:
new e1a4259 tighten alert rules
e1a4259 is described below
commit e1a42593a08bbe6e8e3dd7e491064fd7cd48fef7
Author: Christian Grothoff <christian@grothoff.org>
AuthorDate: Tue Jun 3 01:07:41 2025 +0200
tighten alert rules
---
roles/monitoring/files/etc/prometheus/alert_rules.yml | 4 ++--
.../monitoring/files/etc/prometheus/node-exporter-rules.yml | 12 ++++++------
roles/monitoring/templates/etc/prometheus/alertmanager.yml | 2 +-
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/roles/monitoring/files/etc/prometheus/alert_rules.yml
b/roles/monitoring/files/etc/prometheus/alert_rules.yml
index 914fef6..21722d8 100644
--- a/roles/monitoring/files/etc/prometheus/alert_rules.yml
+++ b/roles/monitoring/files/etc/prometheus/alert_rules.yml
@@ -11,13 +11,13 @@ groups:
description: "CPU latency is above 80% for more than 1 minute."
- alert: LowDiskSpace
- expr: (node_filesystem_free_bytes / node_filesystem_size_bytes) * 100 < 10
+ expr: (node_filesystem_free_bytes / node_filesystem_size_bytes) * 100 < 50
for: 1m
labels:
severity: critical
annotations:
summary: "Low Disk Space detected"
- description: "Disk space is below 10% for more than 1 minute."
+ description: "Disk space is below 50% for more than 1 minute."
- alert: HighMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes))
* 100 > 80
diff --git a/roles/monitoring/files/etc/prometheus/node-exporter-rules.yml
b/roles/monitoring/files/etc/prometheus/node-exporter-rules.yml
index 1e14044..cd3bac7 100644
--- a/roles/monitoring/files/etc/prometheus/node-exporter-rules.yml
+++ b/roles/monitoring/files/etc/prometheus/node-exporter-rules.yml
@@ -5,13 +5,13 @@ groups:
rules:
- alert: HostOutOfMemory
- expr: '(node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes <
.10)'
+ expr: '(node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes <
.20)'
for: 2m
labels:
severity: warning
annotations:
summary: Host out of memory (instance {{ $labels.instance }})
- description: "Node memory is filling up (< 10% left)\n VALUE = {{
$value }}\n LABELS = {{ $labels }}"
+ description: "Node memory is filling up (< 20% left)\n VALUE = {{
$value }}\n LABELS = {{ $labels }}"
- alert: HostMemoryUnderMemoryPressure
expr: '(rate(node_vmstat_pgmajfault[5m]) > 1000)'
@@ -59,13 +59,13 @@ groups:
description: "Disk is too busy (IO wait > 80%)\n VALUE = {{ $value
}}\n LABELS = {{ $labels }}"
- alert: HostOutOfDiskSpace
- expr: '(node_filesystem_avail_bytes{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"}
/ node_filesystem_size_bytes < .10 and on (instance, device, mountpoint)
node_filesystem_readonly == 0)'
+ expr: '(node_filesystem_avail_bytes{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"}
/ node_filesystem_size_bytes < .50 and on (instance, device, mountpoint)
node_filesystem_readonly == 0)'
for: 2m
labels:
severity: critical
annotations:
summary: Host out of disk space (instance {{ $labels.instance }})
- description: "Disk is almost full (< 10% left)\n VALUE = {{ $value
}}\n LABELS = {{ $labels }}"
+ description: "Disk is almost full (< 50% left)\n VALUE = {{ $value
}}\n LABELS = {{ $labels }}"
- alert: HostDiskMayFillIn24Hours
expr:
'predict_linear(node_filesystem_avail_bytes{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"}[1h],
86400) <= 0 and node_filesystem_avail_bytes > 0'
@@ -77,13 +77,13 @@ groups:
description: "Filesystem will likely run out of space within the next
24 hours.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostOutOfInodes
- expr: '(node_filesystem_files_free / node_filesystem_files < .10 and ON
(instance, device, mountpoint) node_filesystem_readonly == 0)'
+ expr: '(node_filesystem_files_free / node_filesystem_files < .50 and ON
(instance, device, mountpoint) node_filesystem_readonly == 0)'
for: 2m
labels:
severity: critical
annotations:
summary: Host out of inodes (instance {{ $labels.instance }})
- description: "Disk is almost running out of available inodes (< 10%
left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
+ description: "Disk is almost running out of available inodes (< 50%
left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostFilesystemDeviceError
expr: 'node_filesystem_device_error{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"}
== 1'
diff --git a/roles/monitoring/templates/etc/prometheus/alertmanager.yml
b/roles/monitoring/templates/etc/prometheus/alertmanager.yml
index d7474d3..d662a65 100644
--- a/roles/monitoring/templates/etc/prometheus/alertmanager.yml
+++ b/roles/monitoring/templates/etc/prometheus/alertmanager.yml
@@ -3,7 +3,7 @@
global:
# The smarthost and SMTP sender used for mail notifications.
- smtp_smarthost: 'firefly.gnunet.org'
+ smtp_smarthost: 'firefly.gnunet.org:25'
smtp_from: 'alertmanager@taler.net'
smtp_require_tls: false
#smtp_auth_username: 'alertmanager'
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [ansible-taler-exchange] branch master updated: tighten alert rules,
Admin <=