From f7dd86f1d394aa1aba9942ce6197b52ad9910413 Mon Sep 17 00:00:00 2001 From: ilyamak04 Date: Thu, 24 Jul 2025 21:19:57 +0300 Subject: [PATCH] add prometheus --- docs/monitoring/Prometheus.md | 946 +++++++++++++++++++++++++++++++++- 1 file changed, 945 insertions(+), 1 deletion(-) diff --git a/docs/monitoring/Prometheus.md b/docs/monitoring/Prometheus.md index 25dd845..7173cb9 100644 --- a/docs/monitoring/Prometheus.md +++ b/docs/monitoring/Prometheus.md @@ -259,4 +259,948 @@ increase(<метрика>[<интервал>]) - `promtool check config prometheus.yml`- проверка синтаксиса - `promtool check rules rules.yml` - проверка синтаксиса -- `promtool test rules rules_test.yml` \ No newline at end of file +- `promtool test rules rules_test.yml` + +### Telegraf + +- `telegraf --test --config /etc/telegraf/telegraf.conf --input-filter tail` - запустить телеграф в режиме отладки + + +### PostgreSQL Exporter + +- Создать пользователя для мониторинга +```sql +CREATE USER postgres_exporter WITH PASSWORD 'password'; +ALTER USER postgres_exporter SET SEARCH_PATH TO pg_catalog; +GRANT CONNECT ON DATABASE postgres TO postgres_exporter; +GRANT USAGE ON SCHEMA pg_catalog TO postgres_exporter; +GRANT EXECUTE ON FUNCTION pg_ls_waldir TO postgres_exporter; +GRANT pg_read_all_stats TO postgres_exporter; +``` + +- Установить Exporter +```bash +wget https://github.com/prometheus-community/postgres_exporter/releases/download/v0.17.1/postgres_exporter-0.17.1.linux-amd64.tar.gz +tar -xzvf postgres_exporter-0.17.1.linux-amd64.tar.gz +rm postgres_exporter-0.17.1.linux-amd64.tar.gz +cd postgres_exporter-0.17.1.linux-amd64/ +mv postgres_exporter /usr/local/bin/ +cd .. +rm -rf postgres_exporter-0.17.1.linux-amd64/ +chmod +x /usr/local/bin/postgres_exporter +/usr/local/bin/postgres_exporter --version +``` + +- Добавить в `postgresql.conf` +```conf +ssl = on +ssl_cert_file = '/etc/postgresql/ssl/server.crt' +ssl_key_file = '/etc/postgresql/ssl/server.key' +ssl_ca_file = '/etc/postgresql/ssl/root.crt' +``` + +- Сгенирировать серты, включая корневой (самоподписные) +```bash +mkdir -p /etc/postgresql/ssl +chown postgres:postgres /etc/postgresql/ssl +chmod 700 /etc/postgresql/ssl + +openssl genrsa -out /etc/postgresql/ssl/root.key 4096 +openssl req -x509 -new -nodes -key /etc/postgresql/ssl/root.key -sha256 -days 3650 \ + -out /etc/postgresql/ssl/root.crt \ + -subj "/CN=PostgreSQL Root CA" +chmod 600 /etc/postgresql/ssl/root.key + + +openssl genrsa -out /etc/postgresql/ssl/server.key 2048 +openssl req -new -key /etc/postgresql/ssl/server.key -out /etc/postgresql/ssl/server.csr \ + -subj "/CN=$(hostname)" +openssl x509 -req -in /etc/postgresql/ssl/server.csr -CA /etc/postgresql/ssl/root.crt \ + -CAkey /etc/postgresql/ssl/root.key -CAcreateserial -out /etc/postgresql/ssl/server.crt \ + -days 3650 -sha256 +chmod 600 /etc/postgresql/ssl/server.key +chown postgres:postgres /etc/postgresql/ssl/server.* + +systemctl restart postgresql +``` + +- Ограничить в `pg_hba.conf` доступ для пользователя `postgres_exporter` +```conf +hostssl postgres postgres_exporter 127.0.0.1/32 scram-sha-256 +``` + +- Конфигурируем клиент +```bash +useradd -r -s /bin/false postgres_exporter +chown -R postgres_exporter:postgres_exporter /etc/postgres_exporter +chmod 600 /etc/postgres_exporter/* + +mkdir -p /etc/postgres_exporter +cp /etc/postgresql/ssl/root.crt /etc/postgres_exporter/ +chmod 700 /etc/postgres_exporter +chmod 400 /etc/postgres_exporter/root.crt +``` + +- Конфигурируем systemd +```bash +# Файл /etc/postgres_exporter/env +DATA_SOURCE_NAME=postgresql://postgres_exporter:password@127.0.0.1:5432/postgres?sslmode=verify-ca&sslrootcert=/etc/postgres_exporter/root.crt +``` +```bash +# Файл /etc/systemd/system/postgres_exporter.service +[Unit] +Description=PostgreSQL Exporter for Prometheus +After=network.target + +[Service] +User=postgres_exporter +Group=postgres_exporter +EnvironmentFile=/etc/postgres_exporter/env +ExecStart=/usr/local/bin/postgres_exporter + +[Install] +WantedBy=multi-user.target +``` + +- Добавить в prometheus.yml +```yml +- job_name: 'postgres_exporter' + static_configs: + - targets: ['localhost:9187'] +``` + +```bash +systemctl restart prometheus +``` + +### MariaDB Exporter + +#### На сервере MariaDB + +- Создать файл `openssl.cnf` +```conf +[ req ] +prompt = no +distinguished_name = req_distinguished_name +req_extensions = v3_req + +[ req_distinguished_name ] +CN = 89.22.228.13 + +[ v3_req ] +keyUsage = keyEncipherment, dataEncipherment +extendedKeyUsage = serverAuth +subjectAltName = @alt_names + +[ alt_names ] +DNS.1 = mariadb.mcarov.pro +IP.1 = 89.22.228.13 +``` + +- Создаём серты +```bash +mkdir -p /etc/mysql/ssl + +openssl req -x509 -new -nodes -days 3650 \ + -subj "/CN=MyMariaDB-CA" \ + -keyout /etc/mysql/ssl/ca-key.pem \ + -out /etc/mysql/ssl/ca.pem \ + -sha256 -days 3650 + +openssl req -new -nodes -newkey rsa:2048 \ + -keyout /etc/mysql/ssl/server-key.pem \ + -out /etc/mysql/ssl/server.csr \ + -config openssl.cnf + +openssl x509 -req -in /etc/mysql/ssl/server.csr \ + -CA /etc/mysql/ssl/ca.pem -CAkey /etc/mysql/ssl/ca-key.pem \ + -CAcreateserial \ + -out /etc/mysql/ssl/server-cert.pem \ + -days 3650 -sha256 \ + -extensions v3_req -extfile openssl.cnf +``` + +- В конфиге MariaDB +```conf +[mysqld] +ssl-ca=/etc/mysql/ssl/ca.pem +ssl-cert=/etc/mysql/ssl/server-cert.pem +ssl-key=/etc/mysql/ssl/server-key.pem +``` +```bash +sudo systemctl restart mariadb +``` + +- Создать в базе пользователя экспортера +```sql +CREATE USER 'mariadb_exporter'@'192.109.139.92' IDENTIFIED BY 'mariadb_exporter'; +GRANT SELECT, PROCESS, REPLICATION CLIENT, RELOAD ON *.* TO 'mariadb_exporter'@'192.109.139.92' IDENTIFIED BY 'mariadb_exporter'; +FLUSH PRIVILEGES; +``` + +#### На сервере Exporter + +- Ставим экспортер на вм, где есть Pronetheus Server +```bash +wget https://github.com/prometheus/mysqld_exporter/releases/download/v0.17.2/mysqld_exporter-0.17.2.linux-amd64.tar.gz +tar -xzvf mysqld_exporter-0.17.2.linux-amd64.tar.gz +rm mysqld_exporter-0.17.2.linux-amd64.tar.gz +mv mysqld_exporter-0.17.2.linux-amd64/mysqld_exporter /usr/local/bin + chmod +x /usr/local/bin/mysqld_exporter +``` + +- Создаём пользователя для экспортера +```bash +useradd -r -s /usr/sbin/nologin mariadb_exporter +mkdir -p /etc/mariadb_exporter +chown -R mariadb_exporter:mariadb_exporter /etc/mariadb_exporter +``` + +- Создать `/etc/mariadb_exporter/.my.cnf` +```conf +[client] +user=mariadb_exporter +password=mariadb_exporter +host=89.22.228.14 +ssl-ca=/etc/mariadb_exporter/ca.pem +ssl-verify-server-cert +``` +```bash +chown -R mariadb_exporter:mariadb_exporter /etc/mariadb_exporter +``` + +- Создаём Unit-файл +```conf +[Unit] +Description=Prometheus MariaDB Exporter +After=network.target + +[Service] +User=mariadb_exporter +Group=mariadb_exporter +ExecStart=/usr/local/bin/mysqld_exporter \ + --config.my-cnf=/etc/mariadb_exporter/.my.cnf +Restart=on-failure + +[Install] +WantedBy=multi-user.target +``` +```bash +systemctl daemon-reload +systemctl enable --now mariadb_exporter +``` + +- Добавляем в `prometheus.yml` +```yml +- job_name: 'mariadb_exporter' + static_configs: + - targets: ['localhost:9104'] +``` + +- Проверка +```bash +curl http://localhost:9104/metrics +``` + +## Alerts + +### Устанавливаем `Alertmanager` + +```bash +useradd -r -s /usr/sbin/nologin alertmanager + +wget https://github.com/prometheus/alertmanager/releases/download/v0.28.1/alertmanager-0.28.1.linux-amd64.tar.gz +tar -xzvf alertmanager-*.tar.gz +mv alertmanager-*/alertmanager /usr/local/bin/ +mv alertmanager-*/amtool /usr/local/bin/ +chown alertmanager:alertmanager /usr/local/bin/alertmanager +chown alertmanager:alertmanager /usr/local/bin/amtool +rm -rf alertmanager-* +``` + +- Подготовка +```bash +mkdir -p /etc/alertmanager +mkdir -p /var/lib/alertmanager +chown -R alertmanager:alertmanager /etc/alertmanager /var/lib/alertmanager +``` + +- Файл `/etc/alertmanager/alertmanager.yml` +```yml +global: + resolve_timeout: 5m + +route: + group_by: ['alertname'] + group_wait: 30s + group_interval: 5m + repeat_interval: 1h + receiver: 'default-receiver' + +receivers: +- name: 'default-receiver' + webhook_configs: + - url: 'http://localhost:9093/-/healthy' # временный URL для теста + send_resolved: true +``` + +- Файл `/etc/systemd/system/alertmanager.service` +```conf +[Unit] +Description=Alertmanager +Wants=network-online.target +After=network-online.target + +[Service] +User=alertmanager +Group=alertmanager +Type=simple +ExecStart=/usr/local/bin/alertmanager \ + --config.file=/etc/alertmanager/alertmanager.yml \ + --storage.path=/var/lib/alertmanager +Restart=always + +[Install] +WantedBy=multi-user.target +``` +```bash +systemctl daemon-reload +systemctl enable --now alertmanager +systemctl status alertmanager +``` + +#### HTTPS Alertmanager + +- `vi /etc/nginx/sites-available/alertmanager.mcarov.pro` +```conf +server { + listen 80; + server_name alertmanager.mcarov.pro; + + location / { + proxy_pass http://127.0.0.1:9093; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} +``` +```bash +ln -s /etc/nginx/sites-available/alertmanager.mcarov.pro /etc/nginx/sites-enabled/ +nginx -t +systemctl reload nginx +``` +```bash +certbot --nginx -d alertmanager.mcarov.pro +``` + +- Аутентификация +```bash +htpasswd -c /etc/nginx/alertmanager/.htpasswd admin +``` +```conf +# Добавить в кофиг nginx +auth_basic "Alertmanager"; +auth_basic_user_file /etc/nginx/alertmanager/.htpasswd; +``` +```bash +nginx -t +systemctl reload nginx +``` + +### Настройка алертов + +- Добавить `Alertmanager` в `prometheus.yml` +```yml +alerting: + alertmanagers: + - static_configs: + - targets: ['localhost:9093'] +``` +```bash +systemctl restart prometheus +``` + +- Проверка +``` +# Должен вернуть ОК +curl http://localhost:9093/-/healthy +amtool check-config /etc/alertmanager/alertmanager.yml +``` + + + +- Создай бота в телеграм, создай чат, добавь его в чат +- Получи `chat-id` +!!! warning "" + В чате должно быть минимум 1 сообщения +```bash +curl "https://api.telegram.org/bot/getUpdates" | jq +``` + +- `/etc/alertmanager/templates/telegram.tmpl` +``` +{{ define "telegram.critical.message" }} +{{ if eq .Status "firing" }} +🔥 *[CRITICAL ALERT]* {{ .CommonLabels.alertname }} +📌 **Instance**: {{ .CommonLabels.instance }} +🕒 **Firing since**: {{ (.Alerts.Firing | first).StartsAt.Format "2006-01-02 15:04:05" }} +📝 **Summary**: {{ .CommonAnnotations.summary }} + +{{ .CommonAnnotations.description }} +{{ if .CommonAnnotations.runbook }}🔗 **Runbook**: {{ .CommonAnnotations.runbook }}{{ end }} +{{ else }} +✅ *[CRITICAL RESOLVED]* {{ .CommonLabels.alertname }} +📌 **Instance**: {{ .CommonLabels.instance }} +🕒 **Resolved at**: {{ (.Alerts.Resolved | first).EndsAt.Format "2006-01-02 15:04:05" }} +📝 **Summary**: {{ .CommonAnnotations.summary }} +{{ end }} +{{ end }} + +{{ define "telegram.warning.message" }} +{{ if eq .Status "firing" }} +⚠️ *[WARNING]* {{ .CommonLabels.alertname }} + +{{ .CommonAnnotations.summary }} +**Details**: {{ .CommonAnnotations.description }} +{{ else }} +✅ *[WARNING RESOLVED]* {{ .CommonLabels.alertname }} +📝 {{ .CommonAnnotations.summary }} +{{ end }} +{{ end }} + +{{ define "telegram.db.message" }} +{{ if eq .Status "firing" }} +🛠 *[DB ALERT]* {{ .CommonLabels.alertname }} ({{ .CommonLabels.service }}) + +{{ .CommonAnnotations.description }} +**Action required**: {{ .CommonAnnotations.runbook }} +{{ else }} +✅ *[DB ALERT RESOLVED]* {{ .CommonLabels.alertname }} ({{ .CommonLabels.service }}) +📝 {{ .CommonAnnotations.summary }} +{{ end }} +{{ end }} + +{{ define "telegram.default.message" }} +{{ if eq .Status "firing" }} +ℹ️ *[ALERT]* {{ .CommonLabels.alertname }} + +{{ .CommonAnnotations.summary }} +{{ .CommonAnnotations.description }} +{{ else }} +✅ *[ALERT RESOLVED]* {{ .CommonLabels.alertname }} +📝 {{ .CommonAnnotations.summary }} +{{ end }} +{{ end }} +``` + +- `/etc/alertmanager/alertmanager.yml` +```yml +global: + resolve_timeout: 5m + http_config: + follow_redirects: true + +templates: + - '/etc/alertmanager/templates/*.tmpl' + +route: + group_by: ['alertname', 'severity'] + group_wait: 30s + group_interval: 5m + repeat_interval: 4h + receiver: 'telegram-default' + + routes: + - match: + severity: 'critical' + receiver: 'telegram-critical' + continue: false + group_interval: 15m + repeat_interval: 2h + + - match: + severity: 'warning' + receiver: 'telegram-warnings' + group_interval: 1h + repeat_interval: 12h + + - match_re: + service: 'mysql|postgres|influx' + receiver: 'telegram-db-team' + +inhibit_rules: +- source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname'] + +receivers: +- name: 'telegram-critical' + telegram_configs: + - bot_token: 'BOT_TOKEN' + chat_id: CHAT_ID + parse_mode: 'Markdown' + message: '{{ template "telegram.critical.message" . }}' + send_resolved: true + +- name: 'telegram-warnings' + telegram_configs: + - bot_token: 'BOT_TOKEN' + chat_id: CHAT_ID + parse_mode: 'Markdown' + message: '{{ template "telegram.warning.message" . }}' + send_resolved: true + +- name: 'telegram-db-team' + telegram_configs: + - bot_token: 'BOT_TOKEN' + chat_id: CHAT_ID + parse_mode: 'Markdown' + message: '{{ template "telegram.db.message" . }}' + send_resolved: true + +- name: 'telegram-default' + telegram_configs: + - bot_token: 'BOT_TOKEN' + chat_id: CHAT_ID + parse_mode: 'Markdown' + message: '{{ template "telegram.default.message" . }}' + send_resolved: true +``` +```bash +# проверка конфига +amtool check-config /etc/alertmanager/alertmanager.yml +# проверка шаблонов +amtool check-config /etc/alertmanager/alertmanager.yml --template-files /etc/alertmanager/templates/*.tmpl +``` +```bash +systemctl restart prometheus alertmanager +``` + +- `/etc/prometheus/rules/*_rules.yml` +```yml +groups: +- name: Infrastructure + rules: + # CPU + - alert: HighCpuUsage + expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100 > 80 + for: 10m + labels: + severity: warning + category: infra + annotations: + summary: "High CPU usage on {{ $labels.instance }}" + description: "CPU usage is {{ $value }}% for 10 minutes." + + # Memory + - alert: HighMemoryUsage + expr: (1 - (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes))) * 100 > 85 + for: 15m + labels: + severity: warning + annotations: + summary: "High memory usage on {{ $labels.instance }}" + description: "Memory usage is {{ $value }}% for 15 minutes." + + # Disk + - alert: LowDiskSpace + expr: (node_filesystem_avail_bytes{mountpoint=~"/|/var", fstype!="tmpfs"} / node_filesystem_size_bytes{mountpoint=~"/|/var"} * 100) < 15 + for: 10m + labels: + severity: critical + annotations: + summary: "Low disk space on {{ $labels.mountpoint }} ({{ $labels.instance }})" + description: "Only {{ printf \"%.2f\" $value }}% space left on {{ $labels.mountpoint }}." + + # Network + - alert: HighNetworkErrors + expr: rate(node_network_transmit_errs_total[2m]) + rate(node_network_receive_errs_total[2m]) > 10 + for: 5m + labels: + severity: warning + annotations: + summary: "Network errors on {{ $labels.instance }}" + +- name: ServiceHealth + rules: + # Service Availability + - alert: ServiceDown + expr: up == 0 + for: 3m + labels: + severity: critical + annotations: + summary: "Service {{ $labels.job }} down on {{ $labels.instance }}" + description: "The service has been down for more than 3 minutes." + +- name: PrometheusMonitoring + rules: + # Prometheus self-monitoring + - alert: PrometheusDown + expr: up{job="prometheus"} == 0 + for: 5m + labels: + severity: critical + category: monitoring + annotations: + summary: "Prometheus is unreachable" + description: "Prometheus is up==0 for more than 5 minutes." + + # Exporter monitoring (Node Exporter) + - alert: NodeExporterDown + expr: up{job="node"} == 0 + for: 5m + labels: + severity: critical + annotations: + summary: "Node Exporter down on {{ $labels.instance }}" + + # Alertmanager monitoring + - alert: AlertmanagerDown + expr: up{job="alertmanager"} == 0 + for: 5m + labels: + severity: critical + annotations: + summary: "Alertmanager is unreachable" +``` +```bash +# проверить +promtool check rules /etc/prometheus/rules/*.yml +``` + +### Blackbox Exporter + +- Создаём пользователя +```bash +useradd --no-create-home --shell /usr/sbin/nologin blackbox_exporter +``` + +- Устанавливаем +```bash +wget https://github.com/prometheus/blackbox_exporter/releases/download/v0.26.0/blackbox_exporter-0.26.0.linux-amd64.tar.gz +tar -xzvf blackbox_exporter-0.26.0.linux-amd64.tar.gz +mv blackbox_exporter-0.26.0.linux-amd64/blackbox_exporter /usr/local/bin/ +chown blackbox_exporter:blackbox_exporter /usr/local/bin/blackbox_exporter +rm -rf blackbox_exporter* +``` + +- Структура конфига `Blackbox exporter` +```bash +modules: + : + prober: + +``` + +- Конфигурируем экспортер +```bash +mkdir /etc/blackbox_exporter +``` + +- `vi /etc/blackbox_exporter/blackbox.yml` +```yml +modules: + http_2xx: + prober: http + timeout: 5s + http: + valid_status_codes: + - 200 + - 201 + - 202 + - 203 + - 204 + - 205 + - 206 + - 207 + - 208 + - 226 + + http_auth_2xx: + prober: http + timeout: 5s + http: + headers: + # логин:пароль в base64 (echo -n login:pass | base64) + Authorization: "Basic YWRtRTc0fakehashfdVnZldWE=" + valid_status_codes: + - 200 + - 201 + - 202 + - 203 + - 204 + - 205 + - 206 + - 207 + - 208 + - 226 + + tcp_connect: + prober: tcp + timeout: 5s + + postgres_tcp: + prober: tcp + tcp: + tls: false + + mariadb_tcp: + prober: tcp + tcp: + query_response: + - expect: "^" + tls: false + + vm_icmp: + prober: icmp + timeout: 3s +``` + +```bash +chown -R blackbox_exporter:blackbox_exporter /etc/blackbox_exporter +``` + +- `vi /etc/systemd/system/blackbox_exporter.service` +```bash +[Unit] +Description=Prometheus Blackbox Exporter +Wants=network-online.target +After=network-online.target + +[Service] +User=blackbox_exporter +Group=blackbox_exporter +AmbientCapabilities=CAP_NET_RAW +CapabilityBoundingSet=CAP_NET_RAW +Type=simple +ExecStart=/usr/local/bin/blackbox_exporter \ + --config.file=/etc/blackbox_exporter/blackbox.yml \ + --web.listen-address="127.0.0.1:9115" +Restart=on-failure + +[Install] +WantedBy=multi-user.target +``` + +- Запускаем экспортер +```bash +systemctl daemon-reload +systemctl enable --now blackbox_exporter +systemctl status blackbox_exporter +``` + +- Проверка +```bash +curl http://127.0.0.1:9115/metrics +``` + +- Настроим доступ к экспортеру по HTTPS `vi /etc/nginx/sites-available/blackbox.mcarov.pro` +```bash +server { + server_name blackbox.mcarov.pro; + + location / { + proxy_pass http://127.0.0.1:9115; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + access_log /var/log/nginx/blackbox.mcarov.pro.access.log; + error_log /var/log/nginx/blackbox.mcarov.pro.error.log; +} +``` + +```bash +ln -s /etc/nginx/sites-available/blackbox.mcarov.pro /etc/nginx/sites-enabled/ +nginx -t +systemctl reload nginx +``` + +```bash +certbot --nginx -d blackbox.mcarov.pro +``` + +- Добавить в `/etc/crontab` для обновления сертов +```bash +0 0,12 * * * root /opt/certbot/bin/python -c 'import random; import time; time.sleep(random.random() * 3600)' && sudo certbot renew -q +``` + +- Базовая аутентификация +```bash +apt install apache2-utils +mkdir -p /etc/nginx/blackbox +htpasswd -c /etc/nginx/blackbox/.htpasswd admin +``` + +```conf +# Добавить в конфиг nginx +auth_basic "Blackbox auth"; +auth_basic_user_file /etc/nginx/blackbox/.htpasswd; +``` + +```bash +nginx -t +systemctl reload nginx +``` + +- `vi /etc/prometheus/prometheus.yml` +```yml + - job_name: 'blackbox-http' + metrics_path: /probe + scheme: https + basic_auth: + username: 'admin' + password: 'password' + tls_config: + insecure_skip_verify: false + params: + module: [http_2xx] + static_configs: + - targets: + - https://grafana.mcarov.pro + - https://git.mcarov.pro + - https://wiki.mcarov.pro + - https://minio.mcarov.pro + - https://influx.mcarov.pro/ping + relabel_configs: &relabel + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: blackbox.mcarov.pro + + - job_name: 'blackbox-http-auth' + metrics_path: /probe + scheme: https + basic_auth: + username: 'admin' + password: 'password' + tls_config: + insecure_skip_verify: false + params: + module: [http_auth_2xx] + static_configs: + - targets: + - https://alertmanager.mcarov.pro + - https://prometheus.mcarov.pro + - https://blackbox.mcarov.pro + relabel_configs: *relabel + + - job_name: 'postgres-check' + metrics_path: /probe + scheme: https + basic_auth: + username: 'admin' + password: 'password' + tls_config: + insecure_skip_verify: false + params: + module: [postgres_tcp] + static_configs: + - targets: + - 127.0.0.1:5432 + relabel_configs: *relabel + + - job_name: 'mariadb-check' + metrics_path: /probe + scheme: https + basic_auth: + username: 'admin' + password: 'password' + tls_config: + insecure_skip_verify: false + params: + module: [mariadb_tcp] + static_configs: + - targets: + - 89.22.28.13:3306 + relabel_configs: *relabel + + - job_name: 'vm-ping' + metrics_path: /probe + scheme: https + basic_auth: + username: 'admin' + password: 'password' + tls_config: + insecure_skip_verify: false + params: + module: [vm_icmp] + static_configs: + - targets: + - 192.10.139.92 + - 150.21.66.94 + - 89.2.228.13 + relabel_configs: *relabel +``` + +#### ура, Алерты для блэкбокс + +- `vi blackbox_alerts.yml` +```yml +groups: +- name: blackbox_exporter_alerts + rules: + - alert: ServiceDown + expr: probe_success == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "Сервис {{ $labels.instance }} недоступен" + description: | + Сервис {{ $labels.instance }} (job={{ $labels.job }}) + не отвечает уже более 2 минут. + + - alert: HighLatency + expr: probe_success == 1 and avg_over_time(probe_duration_seconds[1m]) > 1 + for: 5m + labels: + severity: warning + annotations: + summary: "Высокая задержка у {{ $labels.instance }}" + description: | + Сервис {{ $labels.instance }} (job={{ $labels.job }}) отвечает медленно: + probe_duration_seconds={{ printf "%.3f" $value }}s (больше 1s) + уже более 5 минут. + + - alert: BlackboxSslCertificateWillExpireSoon + expr: 0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3 + for: 0m + labels: + severity: warning + annotations: + summary: "TLS-сертификат истёкает для {{ $labels.instance }}" + description: | + Срок действия TLS-сертификата для {{ $labels.instance }} истёкает через 3 дня. + + + - alert: TLSCertificateExpired + expr: round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 0 + for: 0m + labels: + severity: critical + annotations: + summary: "TLS-сертификат истёк для {{ $labels.instance }}" +``` + +```bash +chown prometheus:prometheus /etc/prometheus/rules/blackbox_alerts.yml +``` + +- Добавить в `prometheus.yml` +```bash +rule_files: + - 'rules/blackbox_alerts.yml' +``` + +- `promtool check config /etc/prometheus/prometheus.yml`- проверка синтаксиса +- `promtool check rules /etc/prometheus/rules/blackbox_alerts.yml` - проверка синтаксиса \ No newline at end of file