Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ dist-ssr
deploy/docker-compose/mysql
deploy/docker-compose/redis

.codex
*.tar.gz
47 changes: 43 additions & 4 deletions alert/eval/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import (
"github.com/zeromicro/go-zero/core/logc"
)

const ThresholdModeNodeOverride = "node_override"

// Metrics Prometheus 数据源
func metrics(ctx *ctx.Context, datasourceId, datasourceType string, rule models.AlertRule) []string {
pools := ctx.Redis.ProviderPools()
Expand Down Expand Up @@ -57,16 +59,16 @@ func metrics(ctx *ctx.Context, datasourceId, datasourceType string, rule models.
return nil
}

// 按优先级排序规则(P0 > P1 > P2)
rules := sortRulesByPriority(rule.PrometheusConfig.Rules)

for _, v := range resQuery {
// 避免共享引用导致的指纹不一致问题
metricLabels := make(map[string]interface{})
for k, val := range v.GetMetric() {
metricLabels[k] = val
}

// 按当前指标标签选择阈值规则,未命中覆盖配置时回退默认规则。
rules := sortRulesByPriority(selectPrometheusRules(rule.PrometheusConfig, metricLabels))

// 使用独立的标签副本来生成指纹,避免修改原始数据
fingerprintLabels := make(map[string]interface{})
for k, val := range metricLabels {
Expand Down Expand Up @@ -119,7 +121,7 @@ func metrics(ctx *ctx.Context, datasourceId, datasourceType string, rule models.
event.Fingerprint = fingerprint
event.Severity = ruleExpr.Severity
event.SearchQL = fmt.Sprintf("%s %s %v", rule.PrometheusConfig.PromQL, operator, value)
event.ForDuration = rule.GetForDuration(ruleExpr.Severity)
event.ForDuration = ruleExpr.ForDuration
event.Annotations = tools.ParserVariables(rule.PrometheusConfig.Annotations, tools.ConvertStructToMap(event))
event.Status = models.StatePreAlert

Expand Down Expand Up @@ -168,6 +170,43 @@ func metrics(ctx *ctx.Context, datasourceId, datasourceType string, rule models.
return curFingerprints
}

func selectPrometheusRules(config models.PrometheusConfig, metricLabels map[string]interface{}) []models.Rules {
if config.ThresholdMode != ThresholdModeNodeOverride {
return config.Rules
}

for _, override := range config.ThresholdOverrides {
if len(override.Rules) == 0 {
continue
}

if matchMetricLabels(metricLabels, override.MatchLabels) {
return override.Rules
}
}

return config.Rules
}

func matchMetricLabels(metricLabels map[string]interface{}, matchLabels map[string]string) bool {
if len(matchLabels) == 0 {
return false
}

for key, expected := range matchLabels {
actual, ok := metricLabels[key]
if !ok {
return false
}

if fmt.Sprintf("%v", actual) != expected {
return false
}
}

return true
}

// sortRulesByPriority 按优先级排序规则
func sortRulesByPriority(rules []models.Rules) []models.Rules {
sortedRules := make([]models.Rules, len(rules))
Expand Down
118 changes: 118 additions & 0 deletions alert/eval/query_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package eval

import (
"reflect"
"testing"
"watchAlert/internal/models"
)

func TestSelectPrometheusRulesReturnsDefaultRulesWhenModeIsEmpty(t *testing.T) {
defaultRules := []models.Rules{
{Severity: "P0", Expr: "> 95", ForDuration: 300},
}

got := selectPrometheusRules(models.PrometheusConfig{
Rules: defaultRules,
}, map[string]interface{}{
"instance": "node-a:9100",
})

if !reflect.DeepEqual(got, defaultRules) {
t.Fatalf("expected default rules, got %#v", got)
}
}

func TestSelectPrometheusRulesReturnsOverrideRulesWhenLabelsMatch(t *testing.T) {
defaultRules := []models.Rules{
{Severity: "P0", Expr: "> 95", ForDuration: 300},
}
overrideRules := []models.Rules{
{Severity: "P0", Expr: "> 98", ForDuration: 600},
}

got := selectPrometheusRules(models.PrometheusConfig{
Rules: defaultRules,
ThresholdMode: ThresholdModeNodeOverride,
ThresholdOverrides: []models.ThresholdOverride{
{
MatchLabels: map[string]string{"instance": "node-a:9100"},
Rules: overrideRules,
},
},
}, map[string]interface{}{
"instance": "node-a:9100",
})

if !reflect.DeepEqual(got, overrideRules) {
t.Fatalf("expected override rules, got %#v", got)
}
}

func TestSelectPrometheusRulesFallsBackToDefaultRulesWhenLabelsDoNotMatch(t *testing.T) {
defaultRules := []models.Rules{
{Severity: "P0", Expr: "> 95", ForDuration: 300},
}
overrideRules := []models.Rules{
{Severity: "P0", Expr: "> 98", ForDuration: 600},
}

got := selectPrometheusRules(models.PrometheusConfig{
Rules: defaultRules,
ThresholdMode: ThresholdModeNodeOverride,
ThresholdOverrides: []models.ThresholdOverride{
{
MatchLabels: map[string]string{"instance": "node-a:9100"},
Rules: overrideRules,
},
},
}, map[string]interface{}{
"instance": "node-b:9100",
})

if !reflect.DeepEqual(got, defaultRules) {
t.Fatalf("expected default rules, got %#v", got)
}
}

func TestSelectPrometheusRulesSkipsEmptyOverrideRules(t *testing.T) {
defaultRules := []models.Rules{
{Severity: "P0", Expr: "> 95", ForDuration: 300},
}

got := selectPrometheusRules(models.PrometheusConfig{
Rules: defaultRules,
ThresholdMode: ThresholdModeNodeOverride,
ThresholdOverrides: []models.ThresholdOverride{
{
MatchLabels: map[string]string{"instance": "node-a:9100"},
},
},
}, map[string]interface{}{
"instance": "node-a:9100",
})

if !reflect.DeepEqual(got, defaultRules) {
t.Fatalf("expected default rules, got %#v", got)
}
}

func TestMatchMetricLabelsRequiresAllLabelsToMatch(t *testing.T) {
metricLabels := map[string]interface{}{
"instance": "node-a:9100",
"os_type": "Linux",
}

if !matchMetricLabels(metricLabels, map[string]string{
"instance": "node-a:9100",
"os_type": "Linux",
}) {
t.Fatal("expected all labels to match")
}

if matchMetricLabels(metricLabels, map[string]string{
"instance": "node-a:9100",
"os_type": "Windows",
}) {
t.Fatal("expected mismatched label to fail")
}
}
9 changes: 9 additions & 0 deletions internal/models/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ type PrometheusConfig struct {
PromQL string `json:"promQL"`
Annotations string `json:"annotations"`
//ForDuration int64 `json:"forDuration"`
Rules []Rules `json:"rules"`

ThresholdMode string `json:"thresholdMode"`
ThresholdOverrides []ThresholdOverride `json:"thresholdOverrides"`
Rules []Rules `json:"rules"`
CallbakPromQLs []CallbakPromQLs `json:"callbakPromQLs"`
}
Expand All @@ -112,6 +116,11 @@ type Rules struct {
Expr string `json:"expr"`
}

type ThresholdOverride struct {
MatchLabels map[string]string `json:"matchLabels"`
Rules []Rules `json:"rules"`
}

type EffectiveTime struct {
Week []string `json:"week"`
StartTime int `json:"startTime"`
Expand Down