gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[ansible-taler-exchange] branch master updated: tighten alert rules


From: Admin
Subject: [ansible-taler-exchange] branch master updated: tighten alert rules
Date: Tue, 03 Jun 2025 01:07:46 +0200

This is an automated email from the git hooks/post-receive script.

grothoff pushed a commit to branch master
in repository ansible-taler-exchange.

The following commit(s) were added to refs/heads/master by this push:
     new e1a4259  tighten alert rules
e1a4259 is described below

commit e1a42593a08bbe6e8e3dd7e491064fd7cd48fef7
Author: Christian Grothoff <christian@grothoff.org>
AuthorDate: Tue Jun 3 01:07:41 2025 +0200

    tighten alert rules
---
 roles/monitoring/files/etc/prometheus/alert_rules.yml        |  4 ++--
 .../monitoring/files/etc/prometheus/node-exporter-rules.yml  | 12 ++++++------
 roles/monitoring/templates/etc/prometheus/alertmanager.yml   |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/roles/monitoring/files/etc/prometheus/alert_rules.yml 
b/roles/monitoring/files/etc/prometheus/alert_rules.yml
index 914fef6..21722d8 100644
--- a/roles/monitoring/files/etc/prometheus/alert_rules.yml
+++ b/roles/monitoring/files/etc/prometheus/alert_rules.yml
@@ -11,13 +11,13 @@ groups:
       description: "CPU latency is above 80% for more than 1 minute."
 
   - alert: LowDiskSpace
-    expr: (node_filesystem_free_bytes / node_filesystem_size_bytes) * 100 < 10
+    expr: (node_filesystem_free_bytes / node_filesystem_size_bytes) * 100 < 50
     for: 1m
     labels:
       severity: critical
     annotations:
       summary: "Low Disk Space detected"
-      description: "Disk space is below 10% for more than 1 minute."
+      description: "Disk space is below 50% for more than 1 minute."
 
   - alert: HighMemoryUsage
     expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) 
* 100 > 80
diff --git a/roles/monitoring/files/etc/prometheus/node-exporter-rules.yml 
b/roles/monitoring/files/etc/prometheus/node-exporter-rules.yml
index 1e14044..cd3bac7 100644
--- a/roles/monitoring/files/etc/prometheus/node-exporter-rules.yml
+++ b/roles/monitoring/files/etc/prometheus/node-exporter-rules.yml
@@ -5,13 +5,13 @@ groups:
   rules:
 
     - alert: HostOutOfMemory
-      expr: '(node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes < 
.10)'
+      expr: '(node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes < 
.20)'
       for: 2m
       labels:
         severity: warning
       annotations:
         summary: Host out of memory (instance {{ $labels.instance }})
-        description: "Node memory is filling up (< 10% left)\n  VALUE = {{ 
$value }}\n  LABELS = {{ $labels }}"
+        description: "Node memory is filling up (< 20% left)\n  VALUE = {{ 
$value }}\n  LABELS = {{ $labels }}"
 
     - alert: HostMemoryUnderMemoryPressure
       expr: '(rate(node_vmstat_pgmajfault[5m]) > 1000)'
@@ -59,13 +59,13 @@ groups:
         description: "Disk is too busy (IO wait > 80%)\n  VALUE = {{ $value 
}}\n  LABELS = {{ $labels }}"
 
     - alert: HostOutOfDiskSpace
-      expr: '(node_filesystem_avail_bytes{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"} 
/ node_filesystem_size_bytes < .10 and on (instance, device, mountpoint) 
node_filesystem_readonly == 0)'
+      expr: '(node_filesystem_avail_bytes{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"} 
/ node_filesystem_size_bytes < .50 and on (instance, device, mountpoint) 
node_filesystem_readonly == 0)'
       for: 2m
       labels:
         severity: critical
       annotations:
         summary: Host out of disk space (instance {{ $labels.instance }})
-        description: "Disk is almost full (< 10% left)\n  VALUE = {{ $value 
}}\n  LABELS = {{ $labels }}"
+        description: "Disk is almost full (< 50% left)\n  VALUE = {{ $value 
}}\n  LABELS = {{ $labels }}"
 
     - alert: HostDiskMayFillIn24Hours
       expr: 
'predict_linear(node_filesystem_avail_bytes{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"}[1h],
 86400) <= 0 and node_filesystem_avail_bytes > 0'
@@ -77,13 +77,13 @@ groups:
         description: "Filesystem will likely run out of space within the next 
24 hours.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: HostOutOfInodes
-      expr: '(node_filesystem_files_free / node_filesystem_files < .10 and ON 
(instance, device, mountpoint) node_filesystem_readonly == 0)'
+      expr: '(node_filesystem_files_free / node_filesystem_files < .50 and ON 
(instance, device, mountpoint) node_filesystem_readonly == 0)'
       for: 2m
       labels:
         severity: critical
       annotations:
         summary: Host out of inodes (instance {{ $labels.instance }})
-        description: "Disk is almost running out of available inodes (< 10% 
left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+        description: "Disk is almost running out of available inodes (< 50% 
left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
     - alert: HostFilesystemDeviceError
       expr: 'node_filesystem_device_error{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"} 
== 1'
diff --git a/roles/monitoring/templates/etc/prometheus/alertmanager.yml 
b/roles/monitoring/templates/etc/prometheus/alertmanager.yml
index d7474d3..d662a65 100644
--- a/roles/monitoring/templates/etc/prometheus/alertmanager.yml
+++ b/roles/monitoring/templates/etc/prometheus/alertmanager.yml
@@ -3,7 +3,7 @@
 
 global:
   # The smarthost and SMTP sender used for mail notifications.
-  smtp_smarthost: 'firefly.gnunet.org'
+  smtp_smarthost: 'firefly.gnunet.org:25'
   smtp_from: 'alertmanager@taler.net'
   smtp_require_tls: false
     #smtp_auth_username: 'alertmanager'

-- 
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]