From 62e1830a3705decb1085a445c35d7ef5c4b3fe17 Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Fri, 24 Apr 2026 10:12:16 -0400 Subject: [PATCH 1/3] Performance improvements to CEL rule evaluation Three improvements to hot-path validation, inspired by protovalidate-go. Guard `_validate_cel` when `self._cel` is empty (bufbuild/protovalidate-go#261) Adds an early return in `CelRules._validate_cel` when there are no CEL runners, skipping activation dict creation and `datetime.now()` entirely. Also guards the `_msg_to_cel` call in `MessageRules.validate()` so the message-to-CEL conversion is skipped when there are no CEL rules to run. Benchmark (required-only field, 0 CEL runners): -37%. Skip `now` computation when unused (bufbuild/protovalidate-go#289) Adds `_uses_now` to `CelRules`, set at compile time in `add_rule` by checking whether `"now"` appears in the expression string. Only `timestamp.gt_now`, `timestamp.lt_now`, `timestamp.within`, and custom expressions referencing `now` will call `datetime.datetime.now()`. Benchmarks: `int32.gt` (5 CEL runners) -25%, `timestamp.gt_now` -6%. Early-exit loop in `cel_unique` (bufbuild/protovalidate-go#289) Replaces `len(val) == len(set(val))` with a loop that returns `False` on the first duplicate, avoiding building the full set unnecessarily. --- protovalidate/internal/extra_func.py | 7 ++++++- protovalidate/internal/rules.py | 15 +++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 5d4b821..c386c5c 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -328,7 +328,12 @@ def cel_unique(val: celtypes.Value) -> celpy.Result: if not isinstance(val, celtypes.ListType | list): msg = "invalid argument, expected list" raise celpy.CELEvalError(msg) - return celtypes.BoolType(len(val) == len(set(val))) + seen: set[celtypes.Value] = set() + for item in val: + if item in seen: + return celtypes.BoolType(False) # noqa: FBT003 + seen.add(item) + return celtypes.BoolType(True) # noqa: FBT003 class Ipv4: diff --git a/protovalidate/internal/rules.py b/protovalidate/internal/rules.py index 5e1660c..18f577c 100644 --- a/protovalidate/internal/rules.py +++ b/protovalidate/internal/rules.py @@ -342,6 +342,7 @@ class CelRules(Rules): _cel: list[CelRunner] _rules: message.Message | None = None _rules_cel: celtypes.Value | None = None + _uses_now: bool = False def __init__(self, rules: message.Message | None): self._cel = [] @@ -357,11 +358,14 @@ def _validate_cel( this_cel: celtypes.Value | None = None, for_key: bool = False, ): + if not self._cel: + return activation: dict[str, celtypes.Value] = {} if this_cel is not None: activation["this"] = this_cel activation["rules"] = self._rules_cel - activation["now"] = celtypes.TimestampType(datetime.datetime.now(tz=datetime.timezone.utc)) + if self._uses_now: + activation["now"] = celtypes.TimestampType(datetime.datetime.now(tz=datetime.timezone.utc)) for cel in self._cel: activation["rule"] = cel.rule_cel result = cel.runner.evaluate(activation) @@ -409,6 +413,8 @@ def add_rule( rules = validate_pb2.Rule() rules.id = expression rules.expression = expression + if "now" in rules.expression: + self._uses_now = True ast = env.compile(rules.expression) prog = env.program(ast, functions=funcs) rule_value = None @@ -463,9 +469,10 @@ def __init__(self, rules: message.Message | None, desc: descriptor.Descriptor): self._desc = desc def validate(self, ctx: RuleContext, message: message.Message): - self._validate_cel(ctx, this_cel=_msg_to_cel(message)) - if ctx.done: - return + if self._cel: + self._validate_cel(ctx, this_cel=_msg_to_cel(message)) + if ctx.done: + return for oneof in self._oneofs: oneof.validate(ctx, message) if ctx.done: From f6b1c39f9eb1cedc7673f855d8062af3cce93aa9 Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Fri, 24 Apr 2026 10:12:34 -0400 Subject: [PATCH 2/3] Use word-boundary regex to detect `now` in CEL expressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The simple `"now" in expression` substring check could produce false positives for identifiers like `renown` or `knows`, setting `_uses_now` unnecessarily. Replace with `_NOW_RE = re.compile(r"\bnow\b")` so only the standalone identifier is matched. The `\b` assertion matches at the boundary between word characters (`[a-zA-Z0-9_]`) and non-word characters. Since CEL identifiers are `[_a-zA-Z][_a-zA-Z0-9]*`, any occurrence of `now` as an identifier — including inside expressions like `timestamp(now)` — is always flanked by non-identifier characters, so this approach produces no false negatives. --- protovalidate/internal/rules.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/protovalidate/internal/rules.py b/protovalidate/internal/rules.py index 18f577c..78fbf4c 100644 --- a/protovalidate/internal/rules.py +++ b/protovalidate/internal/rules.py @@ -14,6 +14,7 @@ import dataclasses import datetime +import re import typing from collections.abc import Callable, Container, Iterable, Mapping @@ -44,6 +45,9 @@ def _is_repeated(field: descriptor.FieldDescriptor) -> bool: return field.label == descriptor.FieldDescriptor.LABEL_REPEATED # type: ignore[attr-defined] +_NOW_RE = re.compile(r"\bnow\b") + + class CompilationError(Exception): pass @@ -413,7 +417,7 @@ def add_rule( rules = validate_pb2.Rule() rules.id = expression rules.expression = expression - if "now" in rules.expression: + if _NOW_RE.search(rules.expression): self._uses_now = True ast = env.compile(rules.expression) prog = env.program(ast, functions=funcs) From 6071c3f67e6156be8f6362abe53cb4492146730e Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Fri, 24 Apr 2026 10:26:07 -0400 Subject: [PATCH 3/3] Revert "Use word-boundary regex to detect `now` in CEL expressions" This reverts commit f6b1c39f9eb1cedc7673f855d8062af3cce93aa9. --- protovalidate/internal/rules.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/protovalidate/internal/rules.py b/protovalidate/internal/rules.py index 78fbf4c..18f577c 100644 --- a/protovalidate/internal/rules.py +++ b/protovalidate/internal/rules.py @@ -14,7 +14,6 @@ import dataclasses import datetime -import re import typing from collections.abc import Callable, Container, Iterable, Mapping @@ -45,9 +44,6 @@ def _is_repeated(field: descriptor.FieldDescriptor) -> bool: return field.label == descriptor.FieldDescriptor.LABEL_REPEATED # type: ignore[attr-defined] -_NOW_RE = re.compile(r"\bnow\b") - - class CompilationError(Exception): pass @@ -417,7 +413,7 @@ def add_rule( rules = validate_pb2.Rule() rules.id = expression rules.expression = expression - if _NOW_RE.search(rules.expression): + if "now" in rules.expression: self._uses_now = True ast = env.compile(rules.expression) prog = env.program(ast, functions=funcs)