gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[ansible-taler-exchange] branch master updated: use firefly for alerts


From: Admin
Subject: [ansible-taler-exchange] branch master updated: use firefly for alerts
Date: Mon, 02 Jun 2025 09:14:40 +0200

This is an automated email from the git hooks/post-receive script.

grothoff pushed a commit to branch master
in repository ansible-taler-exchange.

The following commit(s) were added to refs/heads/master by this push:
     new 613cd9b  use firefly for alerts
613cd9b is described below

commit 613cd9be4a8a28539f754a1f83a13f204e26cfac
Author: Christian Grothoff <christian@grothoff.org>
AuthorDate: Mon Jun 2 09:14:36 2025 +0200

    use firefly for alerts
---
 .../files/etc/prometheus/alert_rules.yml           | 29 ++++++++++++++++++++++
 .../monitoring/files/etc/prometheus/prometheus.yml |  8 +++---
 roles/monitoring/tasks/main.yml                    |  8 ++++++
 .../templates/etc/prometheus/alertmanager.yml      | 12 ++++-----
 4 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/roles/monitoring/files/etc/prometheus/alert_rules.yml 
b/roles/monitoring/files/etc/prometheus/alert_rules.yml
new file mode 100644
index 0000000..914fef6
--- /dev/null
+++ b/roles/monitoring/files/etc/prometheus/alert_rules.yml
@@ -0,0 +1,29 @@
+groups:
+- name: node_exporter_alerts
+  rules:
+  - alert: HighCPULatency
+    expr: sum(rate(node_cpu_seconds_total{mode="system"}[1m])) / 
count(node_cpu_seconds_total{mode="system"}) * 100 > 80
+    for: 1m
+    labels:
+      severity: warning
+    annotations:
+      summary: "High CPU Latency detected"
+      description: "CPU latency is above 80% for more than 1 minute."
+
+  - alert: LowDiskSpace
+    expr: (node_filesystem_free_bytes / node_filesystem_size_bytes) * 100 < 10
+    for: 1m
+    labels:
+      severity: critical
+    annotations:
+      summary: "Low Disk Space detected"
+      description: "Disk space is below 10% for more than 1 minute."
+
+  - alert: HighMemoryUsage
+    expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) 
* 100 > 80
+    for: 1m
+    labels:
+      severity: warning
+    annotations:
+      summary: "High Memory Usage detected"
+      description: "Memory usage is above 80% for more than 1 minute."
diff --git a/roles/monitoring/files/etc/prometheus/prometheus.yml 
b/roles/monitoring/files/etc/prometheus/prometheus.yml
index bf121a3..10038d9 100644
--- a/roles/monitoring/files/etc/prometheus/prometheus.yml
+++ b/roles/monitoring/files/etc/prometheus/prometheus.yml
@@ -1,7 +1,7 @@
 # my global config
 global:
-  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default 
is every 1 minute.
-  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is 
every 1 minute.
+  scrape_interval: 60s # Set the scrape interval to every 15 seconds. Default 
is every 1 minute.
+  evaluation_interval: 60s # Evaluate rules every 15 seconds. The default is 
every 1 minute.
   # scrape_timeout is set to the global default (10s).
 
 # Alertmanager configuration -- FIXME: not yet setup!
@@ -31,13 +31,13 @@ scrape_configs:
 
   # Job, for local node exporter
   - job_name: 'node_exporter_metrics'
-    scrape_interval: 5s
+    scrape_interval: 60s
     static_configs:
       - targets: ['localhost:9100']
 
   # Job, for local nginx exporter
   - job_name: 'nginx_exporter_metrics'
-    scrape_interval: 5s
+    scrape_interval: 60s
     static_configs:
       - targets: ['localhost:9113']
 
diff --git a/roles/monitoring/tasks/main.yml b/roles/monitoring/tasks/main.yml
index 4aec691..c7a4df5 100644
--- a/roles/monitoring/tasks/main.yml
+++ b/roles/monitoring/tasks/main.yml
@@ -200,6 +200,14 @@
     group: root
     mode: "0644"
 
+- name: Configure node-exporter rules for alertmanager
+  copy:
+    src: etc/prometheus/alert_rules.yml
+    dest: /etc/prometheus/alert_rules.yml
+    owner: root
+    group: root
+    mode: "0644"
+
 - name: Ensure exporter services are enabled and started
   service:
     name: "{{ item }}"
diff --git a/roles/monitoring/templates/etc/prometheus/alertmanager.yml 
b/roles/monitoring/templates/etc/prometheus/alertmanager.yml
index cb68bf2..d7474d3 100644
--- a/roles/monitoring/templates/etc/prometheus/alertmanager.yml
+++ b/roles/monitoring/templates/etc/prometheus/alertmanager.yml
@@ -3,14 +3,14 @@
 
 global:
   # The smarthost and SMTP sender used for mail notifications.
-  smtp_smarthost: 'localhost:25'
+  smtp_smarthost: 'firefly.gnunet.org'
   smtp_from: 'alertmanager@taler.net'
   smtp_require_tls: false
     #smtp_auth_username: 'alertmanager'
     #smtp_auth_password: 'password'
 
 # The directory from which notification templates are read.
-templates: 
+templates:
 - '/etc/prometheus/alertmanager_templates/*.tmpl'
 
 # The root route on which each incoming alert enters.
@@ -23,7 +23,7 @@ route:
   # When a new group of alerts is created by an incoming alert, wait at
   # least 'group_wait' to send the initial notification.
   # This way ensures that you get multiple alerts for the same group that start
-  # firing shortly after another are batched together on the first 
+  # firing shortly after another are batched together on the first
   # notification.
   group_wait: 30s
 
@@ -33,12 +33,12 @@ route:
 
   # If an alert has successfully been sent, wait 'repeat_interval' to
   # resend them.
-  repeat_interval: 12h 
+  repeat_interval: 12h
 
   # A default receiver
   receiver: taler-warning-mails
 
-  # All the above attributes are inherited by all child routes and can 
+  # All the above attributes are inherited by all child routes and can
   # overwritten on each.
 
   # The child route trees.
@@ -50,7 +50,7 @@ route:
 
 # Inhibition rules allow to mute a set of alerts given that another alert is
 # firing.
-# We use this to mute any warning-level notifications if the same alert is 
+# We use this to mute any warning-level notifications if the same alert is
 # already critical.
 inhibit_rules:
 - source_match:

-- 
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]