|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-alertmanager.rules.yaml > alertmanager.rules
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-etcd.yaml > etcd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-general.rules.yaml > general.rules
|
| Labels |
State |
Active Since |
Value |
|
alertname="TargetDown"
job="pmc-partner-api"
namespace="pmc-production"
service="pmc-partner-api"
severity="warning"
|
firing |
2026-03-23 13:24:52.549584131 +0000 UTC |
100 |
| Annotations |
- message
- 100% of the pmc-partner-api/pmc-partner-api targets in pmc-production namespace are down.
|
|
alertname="TargetDown"
job="kube-proxy"
namespace="kube-system"
service="kube-prometheus-stack-kube-proxy"
severity="warning"
|
firing |
2026-03-23 13:24:52.549584131 +0000 UTC |
100 |
| Annotations |
- message
- 100% of the kube-proxy/kube-prometheus-stack-kube-proxy targets in kube-system namespace are down.
|
|
alertname="TargetDown"
job="kube-state-metrics"
namespace="monitoring"
service="kube-prometheus-stack-kube-state-metrics"
severity="warning"
|
firing |
2026-03-23 13:24:52.549584131 +0000 UTC |
100 |
| Annotations |
- message
- 100% of the kube-state-metrics/kube-prometheus-stack-kube-state-metrics targets in monitoring namespace are down.
|
|
alertname="TargetDown"
job="kube-prometheus-stack-operator"
namespace="monitoring"
service="kube-prometheus-stack-operator"
severity="warning"
|
firing |
2026-03-23 13:25:22.549584131 +0000 UTC |
100 |
| Annotations |
- message
- 100% of the kube-prometheus-stack-operator/kube-prometheus-stack-operator targets in monitoring namespace are down.
|
|
alert: Watchdog
expr: vector(1)
labels:
severity: none
annotations:
message: |
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
| Labels |
State |
Active Since |
Value |
|
alertname="Watchdog"
severity="none"
|
firing |
2026-03-23 13:24:52.549584131 +0000 UTC |
1 |
| Annotations |
- message
- This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-kube-apiserver-slos.yaml > kube-apiserver-slos
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-kube-state-metrics.yaml > kube-state-metrics
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-kubernetes-apps.yaml > kubernetes-apps
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alert: KubeJobFailed
expr: kube_job_failed{job="kube-state-metrics",namespace=~".*"}
> 0
for: 15m
labels:
severity: warning
annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
summary: Job failed to complete.
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-kubernetes-resources.yaml > kubernetes-resources
|
| Labels |
State |
Active Since |
Value |
|
alertname="CPUThrottlingHigh"
container="fluent-bit"
namespace="logging"
pod="fluent-bit-9jxlr"
severity="info"
|
firing |
2026-03-23 13:25:29.587197825 +0000 UTC |
0.6947422154160285 |
| Annotations |
- description
- 69.47% throttling of CPU in namespace logging for container fluent-bit in pod fluent-bit-9jxlr.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
- summary
- Processes experience elevated CPU throttling.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-kubernetes-storage.yaml > kubernetes-storage
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-kubernetes-system-apiserver.yaml > kubernetes-system-apiserver
|
| Labels |
State |
Active Since |
Value |
|
alertname="KubeClientCertificateExpiration"
endpoint="https"
instance="192.168.66.14:6443"
job="apiserver"
namespace="default"
service="kubernetes"
severity="warning"
|
firing |
2026-03-23 13:25:23.389000355 +0000 UTC |
1.14291619e+08 |
| Annotations |
- description
- A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- summary
- Client certificate is about to expire.
|
|
alertname="KubeClientCertificateExpiration"
endpoint="https"
instance="192.168.66.13:6443"
job="apiserver"
namespace="default"
service="kubernetes"
severity="warning"
|
firing |
2026-03-23 13:25:23.389000355 +0000 UTC |
3.0431233e+07 |
| Annotations |
- description
- A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- summary
- Client certificate is about to expire.
|
|
alertname="KubeClientCertificateExpiration"
endpoint="https"
instance="192.168.66.12:6443"
job="apiserver"
namespace="default"
service="kubernetes"
severity="warning"
|
firing |
2026-03-23 13:25:23.389000355 +0000 UTC |
1.44898287e+08 |
| Annotations |
- description
- A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- summary
- Client certificate is about to expire.
|
|
| Labels |
State |
Active Since |
Value |
|
alertname="KubeClientCertificateExpiration"
endpoint="https"
instance="192.168.66.12:6443"
job="apiserver"
namespace="default"
service="kubernetes"
severity="critical"
|
firing |
2026-03-23 13:25:23.389000355 +0000 UTC |
1.44898287e+08 |
| Annotations |
- description
- A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- summary
- Client certificate is about to expire.
|
|
alertname="KubeClientCertificateExpiration"
endpoint="https"
instance="192.168.66.14:6443"
job="apiserver"
namespace="default"
service="kubernetes"
severity="critical"
|
firing |
2026-03-23 13:25:23.389000355 +0000 UTC |
1.14291619e+08 |
| Annotations |
- description
- A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- summary
- Client certificate is about to expire.
|
|
alertname="KubeClientCertificateExpiration"
endpoint="https"
instance="192.168.66.13:6443"
job="apiserver"
namespace="default"
service="kubernetes"
severity="critical"
|
firing |
2026-03-23 13:25:23.389000355 +0000 UTC |
3.0431233e+07 |
| Annotations |
- description
- A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- summary
- Client certificate is about to expire.
|
|
|
|
alert: AggregatedAPIErrors
expr: sum
by(name, namespace) (increase(aggregator_unavailable_apiservice_count[5m])) >
2
labels:
severity: warning
annotations:
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported
errors. The number of errors have increased for it in the past five minutes. High
values indicate that the availability of the service changes too often.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
summary: An aggregated API has reported errors.
|
alert: KubeAPIDown
expr: absent(up{job="apiserver"}
== 1)
for: 15m
labels:
severity: critical
annotations:
description: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
summary: Target disappeared from Prometheus target discovery.
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-kubernetes-system-controller-manager.yaml > kubernetes-system-controller-manager
|
alert: KubeControllerManagerDown
expr: absent(up{job="kube-controller-manager"}
== 1)
for: 15m
labels:
severity: critical
annotations:
description: KubeControllerManager has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
summary: Target disappeared from Prometheus target discovery.
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-kubernetes-system-kubelet.yaml > kubernetes-system-kubelet
|
|
|
|
|
|
|
alert: KubeletClientCertificateExpiration
expr: kubelet_certificate_manager_client_ttl_seconds
< 86400
labels:
severity: critical
annotations:
description: Client certificate for Kubelet on node {{ $labels.node }} expires in
{{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration
summary: Kubelet client certificate is about to expire.
|
alert: KubeletClientCertificateExpiration
expr: kubelet_certificate_manager_client_ttl_seconds
< 604800
labels:
severity: warning
annotations:
description: Client certificate for Kubelet on node {{ $labels.node }} expires in
{{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration
summary: Kubelet client certificate is about to expire.
|
|
|
alert: KubeletDown
expr: absent(up{job="kubelet",metrics_path="/metrics"}
== 1)
for: 15m
labels:
severity: critical
annotations:
description: Kubelet has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
summary: Target disappeared from Prometheus target discovery.
|
|
|
|
|
alert: KubeletServerCertificateExpiration
expr: kubelet_certificate_manager_server_ttl_seconds
< 604800
labels:
severity: warning
annotations:
description: Server certificate for Kubelet on node {{ $labels.node }} expires in
{{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration
summary: Kubelet server certificate is about to expire.
|
alert: KubeletServerCertificateExpiration
expr: kubelet_certificate_manager_server_ttl_seconds
< 86400
labels:
severity: critical
annotations:
description: Server certificate for Kubelet on node {{ $labels.node }} expires in
{{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration
summary: Kubelet server certificate is about to expire.
|
alert: KubeletServerCertificateRenewalErrors
expr: increase(kubelet_server_expiration_renew_errors[5m])
> 0
for: 15m
labels:
severity: warning
annotations:
description: Kubelet on node {{ $labels.node }} has failed to renew its server certificate
({{ $value | humanize }} errors in the last 5 minutes).
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificaterenewalerrors
summary: Kubelet has failed to renew its server certificate.
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-kubernetes-system-scheduler.yaml > kubernetes-system-scheduler
|
alert: KubeSchedulerDown
expr: absent(up{job="kube-scheduler"}
== 1)
for: 15m
labels:
severity: critical
annotations:
description: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
summary: Target disappeared from Prometheus target discovery.
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-kubernetes-system.yaml > kubernetes-system
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-node-exporter.yaml > node-exporter
|
alert: NodeClockNotSynchronising
expr: min_over_time(node_timex_sync_status[5m])
== 0
for: 10m
labels:
severity: warning
annotations:
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured
on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
summary: Clock not synchronising.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alert: NodeNetworkReceiveErrs
expr: increase(node_network_receive_errs_total[2m])
> 10
for: 1h
labels:
severity: warning
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors.
|
alert: NodeNetworkTransmitErrs
expr: increase(node_network_transmit_errs_total[2m])
> 10
for: 1h
labels:
severity: warning
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors.
|
alert: NodeRAIDDegraded
expr: node_md_disks_required
- ignoring(state) (node_md_disks{state="active"}) > 0
for: 15m
labels:
severity: critical
annotations:
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }}
is in degraded state due to one or more disks failures. Number of spare drives
is insufficient to fix issue automatically.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddegraded
summary: RAID Array is degraded
|
alert: NodeRAIDDiskFailure
expr: node_md_disks{state="fail"}
> 0
labels:
severity: warning
annotations:
description: At least one device in RAID array on {{ $labels.instance }} failed.
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddiskfailure
summary: Failed device in RAID array
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-node-network.yaml > node-network
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-prometheus-operator.yaml > prometheus-operator
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0/monitoring-kube-prometheus-stack-prometheus.yaml > prometheus
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|