DataDog · Eldolfin · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
@@ -147,6 +147,8 @@ redundant_else = "allow" # less clear
 option_if_let_else = "allow" # less clear
 unreadable_literal = "allow" # annoying for all the ids in the codebase
 cognitive_complexity = "allow" # using clippy::too_many_lines already
+manual_string_new = "allow" # prefer "".to_string() / "".to_owned() for clarity at call sites
+cast_possible_truncation = "allow" # intentional truncations are common (e.g. nanosecond timestamps)
 
 [workspace.lints.rust]
 future_incompatible = { level = "warn", priority = -1 }

@@ -29,3 +29,6 @@ criterion = "0.5"
 name = "normalization_utils"
 harness = false
 path = "benches/normalization_utils.rs"
+
+[lints]
+workspace = true
@@ -37,7 +37,7 @@ fn normalize_name_bench(c: &mut Criterion) {
 
 #[inline]
 fn normalize_fnmut_string<F>(
-    mut group: BenchmarkGroup<WallTime>,
+    mut group: BenchmarkGroup<'_, WallTime>,
     cases: &[&str],
     elements: usize,
     function_name: &str,
@@ -73,12 +73,12 @@ fn normalize_fnmut_string<F>(
                     },
                     |strings| {
                         #[allow(clippy::unit_arg)]
-                        strings.iter_mut().for_each(|string| {
+                        for string in strings.iter_mut() {
                             black_box(function(black_box(string)));
-                        });
+                        }
                     },
                     BatchSize::LargeInput,
-                )
+                );
             },
         );
     }
@@ -111,7 +111,7 @@ fn normalize_span_bench(c: &mut Criterion) {
             duration: 12000000,
             error: 1,
             resource: "GET /some/reblochon".to_string(),
-            service: "".to_string(),
+            service: String::new(),
             name: "django.controller".to_string(),
             span_id: 1456,
             start: 1448466849000000000,
@@ -137,7 +137,7 @@ fn normalize_span_bench(c: &mut Criterion) {
                 || case.to_owned(),
                 |t| black_box(normalize_trace(black_box(t))),
                 BatchSize::LargeInput,
-            )
+            );
         },
     );
 }

@@ -14,8 +14,10 @@ const MAX_META_STRUCT_SIZE: u8 = 100;
 const MAX_LINKS_SIZE: u8 = 10;
 const MAX_EVENTS_SIZE: u8 = 10;
 
-/// Helper function to generate an arbitrary AttributeAnyValue
-fn arbitrary_attribute_any_value(u: &mut Unstructured) -> arbitrary::Result<pb::AttributeAnyValue> {
+/// Helper function to generate an arbitrary `AttributeAnyValue`
+fn arbitrary_attribute_any_value(
+    u: &mut Unstructured<'_>,
+) -> arbitrary::Result<pb::AttributeAnyValue> {
     let value_type: u8 = u.arbitrary()?;
 
     match value_type % 4 {
@@ -73,6 +75,10 @@ pub struct FuzzSpan {
 }
 
 impl<'a> Arbitrary<'a> for FuzzSpan {
+    #[allow(
+        clippy::too_many_lines,
+        reason = "FIXME: generating all Span fields inline; splitting would require many helper fns"
+    )]
     fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result<Self> {
         // Generate all basic fields
         let service: String = u.arbitrary()?;
@@ -186,7 +192,7 @@ impl<'a> Arbitrary<'a> for FuzzSpan {
             span_events.push(event);
         }
 
-        Ok(FuzzSpan {
+        Ok(Self {
             span: pb::Span {
                 service,
                 name,
@@ -208,7 +214,7 @@ impl<'a> Arbitrary<'a> for FuzzSpan {
     }
 }
 
-/// Main fuzzing function that tests normalize_span with arbitrary data
+/// Main fuzzing function that tests `normalize_span` with arbitrary data
 pub fn fuzz_normalize_span(fuzz_span: FuzzSpan) {
     let mut span = fuzz_span.span;
 

@@ -8,17 +8,17 @@ pub(crate) const MAX_TYPE_LEN: usize = 100;
 /// an arbitrary cutoff to spot weird-looking values
 /// nanoseconds since epoch on Jan 1, 2000
 const YEAR_2000_NANOSEC_TS: i64 = 946684800000000000;
-/// DEFAULT_SPAN_NAME is the default name we assign a span if it's missing and we have no reasonable
-/// fallback
+/// `DEFAULT_SPAN_NAME` is the default name we assign a span if it's missing and we have no
+/// reasonable fallback
 pub(crate) const DEFAULT_SPAN_NAME: &str = "unnamed_operation";
-/// DEFAULT_SERVICE_NAME is the default name we assign a service if it's missing and we have no
+/// `DEFAULT_SERVICE_NAME` is the default name we assign a service if it's missing and we have no
 /// reasonable fallback
 pub(crate) const DEFAULT_SERVICE_NAME: &str = "unnamed-service";
-/// MAX_NAME_LEN the maximum length a name can have
+/// `MAX_NAME_LEN` the maximum length a name can have
 pub(crate) const MAX_NAME_LEN: usize = 100;
-/// MAX_SERVICE_LEN the maximum length a service can have
+/// `MAX_SERVICE_LEN` the maximum length a service can have
 const MAX_SERVICE_LEN: usize = 100;
-/// MAX_SERVICE_LEN the maximum length a tag can have
+/// `MAX_SERVICE_LEN` the maximum length a tag can have
 const MAX_TAG_LEN: usize = 200;
 
 // normalize_service normalizes a span service
@@ -72,7 +72,7 @@ pub fn normalize_span_start_duration(start: &mut i64, duration: &mut i64) {
     }
 }
 
-pub fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) {
+pub const fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) {
     // ParentID, TraceID and SpanID set in the client could be the same
     // Supporting the ParentID == TraceID == SpanID for the root span, is compliant
     // with the Zipkin implementation. Furthermore, as described in the PR
@@ -83,6 +83,17 @@ pub fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) {
     }
 }
 
+/// Normalizes `tag` in-place, lowercasing and replacing illegal characters with `_`.
+///
+/// # Panics
+///
+/// Panics if the tag contains bytes that are not valid UTF-8 continuation sequences; in practice
+/// this cannot happen because the function only processes bytes it has already validated via
+/// `next_code_point`.
+#[allow(
+    clippy::too_many_lines,
+    reason = "FIXME: split into sub-functions requires threading mutable cursor state, deferred"
+)]
 pub fn normalize_tag(tag: &mut String) {
     // Since we know that we're only going to write valid utf8 we can work with the Vec directly
     let bytes = unsafe { tag.as_mut_vec() };
@@ -249,7 +260,7 @@ fn normalize_metric_name(name: &mut String) {
                 // we don't go back to the beginning
                 write_cursor -= 1;
                 bytes[write_cursor] = b'.';
-                last_written_char = b'.'
+                last_written_char = b'.';
             }
             // If we've written a _ or a . last, do nothing
             (_, b'_' | b'.') => {}
@@ -381,6 +392,6 @@ mod tests {
     fn test_name() {
         let mut v = input.to_owned();
         normalize_tag(&mut v);
-        assert_eq!(v, expected)
+        assert_eq!(v, expected);
     }
 }
@@ -38,7 +38,7 @@ pub(crate) fn normalize_span(s: &mut pb::Span) -> anyhow::Result<()> {
         if !is_valid_status_code(code) {
             s.meta.remove("http.status_code");
         }
-    };
+    }
 
     Ok(())
 }
@@ -50,9 +50,14 @@ pub(crate) fn is_valid_status_code(sc: &str) -> bool {
     false
 }
 
-/// normalize_trace takes a trace and
+/// `normalize_trace` takes a trace and
 /// * returns an error if there is a trace ID discrepancy between 2 spans
 /// * returns an error if at least one span cannot be normalized
+///
+/// # Errors
+///
+/// Returns an error if the trace is empty, if spans have mismatched trace IDs, or if any span
+/// fails normalization.
 pub fn normalize_trace(trace: &mut [pb::Span]) -> anyhow::Result<()> {
     let first_trace_id = match trace.first() {
         Some(first_span) => first_span.trace_id,
@@ -71,17 +76,18 @@ pub fn normalize_trace(trace: &mut [pb::Span]) -> anyhow::Result<()> {
     Ok(())
 }
 
-/// normalize_chunk takes a trace chunk and
+/// `normalize_chunk` takes a trace chunk and
 /// * populates origin field if it wasn't populated
 /// * populates priority field if it wasn't populated the root span is used to populate these
-///   fields, and it's index in TraceChunk spans vec must be passed.
+///   fields, and it's index in `TraceChunk` spans vec must be passed.
+///
+/// # Errors
+///
+/// Returns an error if `root_span_index` is out of bounds for the chunk's spans.
 pub fn normalize_chunk(chunk: &mut pb::TraceChunk, root_span_index: usize) -> anyhow::Result<()> {
     // check if priority is not populated
-    let root_span = match chunk.spans.get(root_span_index) {
-        Some(span) => span,
-        None => {
-            anyhow::bail!("Normalize Chunk Error: root_span_index > length of trace chunk spans")
-        }
+    let Some(root_span) = chunk.spans.get(root_span_index) else {
+        anyhow::bail!("Normalize Chunk Error: root_span_index > length of trace chunk spans")
     };
 
     if chunk.priority == SamplerPriority::None as i32 {
@@ -101,7 +107,7 @@ pub fn normalize_chunk(chunk: &mut pb::TraceChunk, root_span_index: usize) -> an
     if chunk.origin.is_empty() {
         if let Some(origin) = root_span.meta.get(TAG_ORIGIN) {
             // Older tracers set origin in the root span.
-            chunk.origin = origin.to_string();
+            chunk.origin = origin.clone();
         }
     }
     Ok(())
@@ -512,7 +518,7 @@ mod tests {
         let mut root = new_test_span();
         root.metrics.insert(
             normalizer::TAG_SAMPLING_PRIORITY.to_string(),
-            normalizer::SamplerPriority::UserKeep as i32 as f64,
+            f64::from(normalizer::SamplerPriority::UserKeep as i32),
         );
 
         let mut chunk = new_test_chunk_with_span(root);
@@ -526,7 +532,7 @@ mod tests {
         let mut root = new_test_span();
         root.metrics.insert(
             normalizer::TAG_SAMPLING_PRIORITY.to_string(),
-            normalizer::SamplerPriority::UserKeep as i32 as f64,
+            f64::from(normalizer::SamplerPriority::UserKeep as i32),
         );
 
         let mut chunk = new_test_chunk_with_span(root);
@@ -554,7 +560,7 @@ mod tests {
         chunk.spans = vec![new_test_span(), new_test_span(), new_test_span()];
         chunk.spans[1].metrics.insert(
             normalizer::TAG_SAMPLING_PRIORITY.to_string(),
-            normalizer::SamplerPriority::UserKeep as i32 as f64,
+            f64::from(normalizer::SamplerPriority::UserKeep as i32),
         );
         assert!(normalizer::normalize_chunk(&mut chunk, 0).is_ok());
         assert_eq!(normalizer::SamplerPriority::UserKeep as i32, chunk.priority);

@@ -2,15 +2,19 @@
 // SPDX-License-Identifier: Apache-2.0
 
 /// Taken from the source of
-/// https://doc.rust-lang.org/std/primitive.str.html#method.floor_char_boundary
-/// TODO remove when str::floor_char_boundary is stable
+/// <https://doc.rust-lang.org/std/primitive.str.html#method.floor_char_boundary>
+/// TODO remove when `str::floor_char_boundary` is stable
 #[inline]
-pub(crate) fn floor_char_boundary(s: &str, index: usize) -> usize {
+pub fn floor_char_boundary(s: &str, index: usize) -> usize {
     #[allow(clippy::unwrap_used)]
     if index >= s.len() {
         s.len()
     } else {
         let lower_bound = index.saturating_sub(3);
+        #[allow(
+            clippy::cast_possible_wrap,
+            reason = "intentional: checks UTF-8 continuation byte bitmask via signed comparison"
+        )]
         let new_index = s.as_bytes()[lower_bound..=index]
             .iter()
             .rposition(|b| (*b as i8) >= -0x40);
@@ -19,11 +23,11 @@ pub(crate) fn floor_char_boundary(s: &str, index: usize) -> usize {
     }
 }
 
-pub(crate) fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
+pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
     // Decode UTF-8
     let x = *bytes.next()?;
     if x < 128 {
-        return Some(x as u32);
+        return Some(u32::from(x));
     }
 
     // Multibyte case follows
@@ -40,7 +44,7 @@ pub(crate) fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) ->
         // SAFETY: `bytes` produces an UTF-8-like string,
         // so the iterator must produce a value here.
         let z = *bytes.next()?;
-        let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
+        let y_z = utf8_acc_cont_byte(u32::from(y & CONT_MASK), z);
         ch = (init << 12) | y_z;
         if x >= 0xF0 {
             // [x y z w] case