diff --git a/.gitignore b/.gitignore index 848f41e..8a37e8c 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,5 @@ dist-ssr deploy/docker-compose/mysql deploy/docker-compose/redis +.codex +*.tar.gz \ No newline at end of file diff --git a/alert/eval/query.go b/alert/eval/query.go index 1350199..504e317 100644 --- a/alert/eval/query.go +++ b/alert/eval/query.go @@ -15,6 +15,8 @@ import ( "github.com/zeromicro/go-zero/core/logc" ) +const ThresholdModeNodeOverride = "node_override" + // Metrics Prometheus 数据源 func metrics(ctx *ctx.Context, datasourceId, datasourceType string, rule models.AlertRule) []string { pools := ctx.Redis.ProviderPools() @@ -57,9 +59,6 @@ func metrics(ctx *ctx.Context, datasourceId, datasourceType string, rule models. return nil } - // 按优先级排序规则(P0 > P1 > P2) - rules := sortRulesByPriority(rule.PrometheusConfig.Rules) - for _, v := range resQuery { // 避免共享引用导致的指纹不一致问题 metricLabels := make(map[string]interface{}) @@ -67,6 +66,9 @@ func metrics(ctx *ctx.Context, datasourceId, datasourceType string, rule models. metricLabels[k] = val } + // 按当前指标标签选择阈值规则,未命中覆盖配置时回退默认规则。 + rules := sortRulesByPriority(selectPrometheusRules(rule.PrometheusConfig, metricLabels)) + // 使用独立的标签副本来生成指纹,避免修改原始数据 fingerprintLabels := make(map[string]interface{}) for k, val := range metricLabels { @@ -119,7 +121,7 @@ func metrics(ctx *ctx.Context, datasourceId, datasourceType string, rule models. event.Fingerprint = fingerprint event.Severity = ruleExpr.Severity event.SearchQL = fmt.Sprintf("%s %s %v", rule.PrometheusConfig.PromQL, operator, value) - event.ForDuration = rule.GetForDuration(ruleExpr.Severity) + event.ForDuration = ruleExpr.ForDuration event.Annotations = tools.ParserVariables(rule.PrometheusConfig.Annotations, tools.ConvertStructToMap(event)) event.Status = models.StatePreAlert @@ -168,6 +170,43 @@ func metrics(ctx *ctx.Context, datasourceId, datasourceType string, rule models. return curFingerprints } +func selectPrometheusRules(config models.PrometheusConfig, metricLabels map[string]interface{}) []models.Rules { + if config.ThresholdMode != ThresholdModeNodeOverride { + return config.Rules + } + + for _, override := range config.ThresholdOverrides { + if len(override.Rules) == 0 { + continue + } + + if matchMetricLabels(metricLabels, override.MatchLabels) { + return override.Rules + } + } + + return config.Rules +} + +func matchMetricLabels(metricLabels map[string]interface{}, matchLabels map[string]string) bool { + if len(matchLabels) == 0 { + return false + } + + for key, expected := range matchLabels { + actual, ok := metricLabels[key] + if !ok { + return false + } + + if fmt.Sprintf("%v", actual) != expected { + return false + } + } + + return true +} + // sortRulesByPriority 按优先级排序规则 func sortRulesByPriority(rules []models.Rules) []models.Rules { sortedRules := make([]models.Rules, len(rules)) diff --git a/alert/eval/query_test.go b/alert/eval/query_test.go new file mode 100644 index 0000000..26eada8 --- /dev/null +++ b/alert/eval/query_test.go @@ -0,0 +1,118 @@ +package eval + +import ( + "reflect" + "testing" + "watchAlert/internal/models" +) + +func TestSelectPrometheusRulesReturnsDefaultRulesWhenModeIsEmpty(t *testing.T) { + defaultRules := []models.Rules{ + {Severity: "P0", Expr: "> 95", ForDuration: 300}, + } + + got := selectPrometheusRules(models.PrometheusConfig{ + Rules: defaultRules, + }, map[string]interface{}{ + "instance": "node-a:9100", + }) + + if !reflect.DeepEqual(got, defaultRules) { + t.Fatalf("expected default rules, got %#v", got) + } +} + +func TestSelectPrometheusRulesReturnsOverrideRulesWhenLabelsMatch(t *testing.T) { + defaultRules := []models.Rules{ + {Severity: "P0", Expr: "> 95", ForDuration: 300}, + } + overrideRules := []models.Rules{ + {Severity: "P0", Expr: "> 98", ForDuration: 600}, + } + + got := selectPrometheusRules(models.PrometheusConfig{ + Rules: defaultRules, + ThresholdMode: ThresholdModeNodeOverride, + ThresholdOverrides: []models.ThresholdOverride{ + { + MatchLabels: map[string]string{"instance": "node-a:9100"}, + Rules: overrideRules, + }, + }, + }, map[string]interface{}{ + "instance": "node-a:9100", + }) + + if !reflect.DeepEqual(got, overrideRules) { + t.Fatalf("expected override rules, got %#v", got) + } +} + +func TestSelectPrometheusRulesFallsBackToDefaultRulesWhenLabelsDoNotMatch(t *testing.T) { + defaultRules := []models.Rules{ + {Severity: "P0", Expr: "> 95", ForDuration: 300}, + } + overrideRules := []models.Rules{ + {Severity: "P0", Expr: "> 98", ForDuration: 600}, + } + + got := selectPrometheusRules(models.PrometheusConfig{ + Rules: defaultRules, + ThresholdMode: ThresholdModeNodeOverride, + ThresholdOverrides: []models.ThresholdOverride{ + { + MatchLabels: map[string]string{"instance": "node-a:9100"}, + Rules: overrideRules, + }, + }, + }, map[string]interface{}{ + "instance": "node-b:9100", + }) + + if !reflect.DeepEqual(got, defaultRules) { + t.Fatalf("expected default rules, got %#v", got) + } +} + +func TestSelectPrometheusRulesSkipsEmptyOverrideRules(t *testing.T) { + defaultRules := []models.Rules{ + {Severity: "P0", Expr: "> 95", ForDuration: 300}, + } + + got := selectPrometheusRules(models.PrometheusConfig{ + Rules: defaultRules, + ThresholdMode: ThresholdModeNodeOverride, + ThresholdOverrides: []models.ThresholdOverride{ + { + MatchLabels: map[string]string{"instance": "node-a:9100"}, + }, + }, + }, map[string]interface{}{ + "instance": "node-a:9100", + }) + + if !reflect.DeepEqual(got, defaultRules) { + t.Fatalf("expected default rules, got %#v", got) + } +} + +func TestMatchMetricLabelsRequiresAllLabelsToMatch(t *testing.T) { + metricLabels := map[string]interface{}{ + "instance": "node-a:9100", + "os_type": "Linux", + } + + if !matchMetricLabels(metricLabels, map[string]string{ + "instance": "node-a:9100", + "os_type": "Linux", + }) { + t.Fatal("expected all labels to match") + } + + if matchMetricLabels(metricLabels, map[string]string{ + "instance": "node-a:9100", + "os_type": "Windows", + }) { + t.Fatal("expected mismatched label to fail") + } +} diff --git a/internal/models/rule.go b/internal/models/rule.go index 32507f0..09523d7 100644 --- a/internal/models/rule.go +++ b/internal/models/rule.go @@ -97,6 +97,10 @@ type PrometheusConfig struct { PromQL string `json:"promQL"` Annotations string `json:"annotations"` //ForDuration int64 `json:"forDuration"` + Rules []Rules `json:"rules"` + + ThresholdMode string `json:"thresholdMode"` + ThresholdOverrides []ThresholdOverride `json:"thresholdOverrides"` Rules []Rules `json:"rules"` CallbakPromQLs []CallbakPromQLs `json:"callbakPromQLs"` } @@ -112,6 +116,11 @@ type Rules struct { Expr string `json:"expr"` } +type ThresholdOverride struct { + MatchLabels map[string]string `json:"matchLabels"` + Rules []Rules `json:"rules"` +} + type EffectiveTime struct { Week []string `json:"week"` StartTime int `json:"startTime"`