diff --git a/Cargo.toml b/Cargo.toml index dc3e374492..1801c1a208 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,6 +147,8 @@ redundant_else = "allow" # less clear option_if_let_else = "allow" # less clear unreadable_literal = "allow" # annoying for all the ids in the codebase cognitive_complexity = "allow" # using clippy::too_many_lines already +manual_string_new = "allow" # prefer "".to_string() / "".to_owned() for clarity at call sites +cast_possible_truncation = "allow" # intentional truncations are common (e.g. nanosecond timestamps) [workspace.lints.rust] future_incompatible = { level = "warn", priority = -1 } diff --git a/libdd-trace-normalization/Cargo.toml b/libdd-trace-normalization/Cargo.toml index 1579a5e299..5549a84a09 100644 --- a/libdd-trace-normalization/Cargo.toml +++ b/libdd-trace-normalization/Cargo.toml @@ -29,3 +29,6 @@ criterion = "0.5" name = "normalization_utils" harness = false path = "benches/normalization_utils.rs" + +[lints] +workspace = true diff --git a/libdd-trace-normalization/benches/normalization_utils.rs b/libdd-trace-normalization/benches/normalization_utils.rs index 6c69ff495f..c134788f0a 100644 --- a/libdd-trace-normalization/benches/normalization_utils.rs +++ b/libdd-trace-normalization/benches/normalization_utils.rs @@ -37,7 +37,7 @@ fn normalize_name_bench(c: &mut Criterion) { #[inline] fn normalize_fnmut_string( - mut group: BenchmarkGroup, + mut group: BenchmarkGroup<'_, WallTime>, cases: &[&str], elements: usize, function_name: &str, @@ -73,12 +73,12 @@ fn normalize_fnmut_string( }, |strings| { #[allow(clippy::unit_arg)] - strings.iter_mut().for_each(|string| { + for string in strings.iter_mut() { black_box(function(black_box(string))); - }); + } }, BatchSize::LargeInput, - ) + ); }, ); } @@ -111,7 +111,7 @@ fn normalize_span_bench(c: &mut Criterion) { duration: 12000000, error: 1, resource: "GET /some/reblochon".to_string(), - service: "".to_string(), + service: String::new(), name: "django.controller".to_string(), span_id: 1456, start: 1448466849000000000, @@ -137,7 +137,7 @@ fn normalize_span_bench(c: &mut Criterion) { || case.to_owned(), |t| black_box(normalize_trace(black_box(t))), BatchSize::LargeInput, - ) + ); }, ); } diff --git a/libdd-trace-normalization/src/fuzz.rs b/libdd-trace-normalization/src/fuzz.rs index 96762383be..8400d48b53 100644 --- a/libdd-trace-normalization/src/fuzz.rs +++ b/libdd-trace-normalization/src/fuzz.rs @@ -14,8 +14,10 @@ const MAX_META_STRUCT_SIZE: u8 = 100; const MAX_LINKS_SIZE: u8 = 10; const MAX_EVENTS_SIZE: u8 = 10; -/// Helper function to generate an arbitrary AttributeAnyValue -fn arbitrary_attribute_any_value(u: &mut Unstructured) -> arbitrary::Result { +/// Helper function to generate an arbitrary `AttributeAnyValue` +fn arbitrary_attribute_any_value( + u: &mut Unstructured<'_>, +) -> arbitrary::Result { let value_type: u8 = u.arbitrary()?; match value_type % 4 { @@ -73,6 +75,10 @@ pub struct FuzzSpan { } impl<'a> Arbitrary<'a> for FuzzSpan { + #[allow( + clippy::too_many_lines, + reason = "FIXME: generating all Span fields inline; splitting would require many helper fns" + )] fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { // Generate all basic fields let service: String = u.arbitrary()?; @@ -186,7 +192,7 @@ impl<'a> Arbitrary<'a> for FuzzSpan { span_events.push(event); } - Ok(FuzzSpan { + Ok(Self { span: pb::Span { service, name, @@ -208,7 +214,7 @@ impl<'a> Arbitrary<'a> for FuzzSpan { } } -/// Main fuzzing function that tests normalize_span with arbitrary data +/// Main fuzzing function that tests `normalize_span` with arbitrary data pub fn fuzz_normalize_span(fuzz_span: FuzzSpan) { let mut span = fuzz_span.span; diff --git a/libdd-trace-normalization/src/normalize_utils.rs b/libdd-trace-normalization/src/normalize_utils.rs index b70093c817..b066299224 100644 --- a/libdd-trace-normalization/src/normalize_utils.rs +++ b/libdd-trace-normalization/src/normalize_utils.rs @@ -8,17 +8,17 @@ pub(crate) const MAX_TYPE_LEN: usize = 100; /// an arbitrary cutoff to spot weird-looking values /// nanoseconds since epoch on Jan 1, 2000 const YEAR_2000_NANOSEC_TS: i64 = 946684800000000000; -/// DEFAULT_SPAN_NAME is the default name we assign a span if it's missing and we have no reasonable -/// fallback +/// `DEFAULT_SPAN_NAME` is the default name we assign a span if it's missing and we have no +/// reasonable fallback pub(crate) const DEFAULT_SPAN_NAME: &str = "unnamed_operation"; -/// DEFAULT_SERVICE_NAME is the default name we assign a service if it's missing and we have no +/// `DEFAULT_SERVICE_NAME` is the default name we assign a service if it's missing and we have no /// reasonable fallback pub(crate) const DEFAULT_SERVICE_NAME: &str = "unnamed-service"; -/// MAX_NAME_LEN the maximum length a name can have +/// `MAX_NAME_LEN` the maximum length a name can have pub(crate) const MAX_NAME_LEN: usize = 100; -/// MAX_SERVICE_LEN the maximum length a service can have +/// `MAX_SERVICE_LEN` the maximum length a service can have const MAX_SERVICE_LEN: usize = 100; -/// MAX_SERVICE_LEN the maximum length a tag can have +/// `MAX_SERVICE_LEN` the maximum length a tag can have const MAX_TAG_LEN: usize = 200; // normalize_service normalizes a span service @@ -72,7 +72,7 @@ pub fn normalize_span_start_duration(start: &mut i64, duration: &mut i64) { } } -pub fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) { +pub const fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) { // ParentID, TraceID and SpanID set in the client could be the same // Supporting the ParentID == TraceID == SpanID for the root span, is compliant // with the Zipkin implementation. Furthermore, as described in the PR @@ -83,6 +83,17 @@ pub fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) { } } +/// Normalizes `tag` in-place, lowercasing and replacing illegal characters with `_`. +/// +/// # Panics +/// +/// Panics if the tag contains bytes that are not valid UTF-8 continuation sequences; in practice +/// this cannot happen because the function only processes bytes it has already validated via +/// `next_code_point`. +#[allow( + clippy::too_many_lines, + reason = "FIXME: split into sub-functions requires threading mutable cursor state, deferred" +)] pub fn normalize_tag(tag: &mut String) { // Since we know that we're only going to write valid utf8 we can work with the Vec directly let bytes = unsafe { tag.as_mut_vec() }; @@ -249,7 +260,7 @@ fn normalize_metric_name(name: &mut String) { // we don't go back to the beginning write_cursor -= 1; bytes[write_cursor] = b'.'; - last_written_char = b'.' + last_written_char = b'.'; } // If we've written a _ or a . last, do nothing (_, b'_' | b'.') => {} @@ -381,6 +392,6 @@ mod tests { fn test_name() { let mut v = input.to_owned(); normalize_tag(&mut v); - assert_eq!(v, expected) + assert_eq!(v, expected); } } diff --git a/libdd-trace-normalization/src/normalizer.rs b/libdd-trace-normalization/src/normalizer.rs index 7450dad908..15b1ea8041 100644 --- a/libdd-trace-normalization/src/normalizer.rs +++ b/libdd-trace-normalization/src/normalizer.rs @@ -38,7 +38,7 @@ pub(crate) fn normalize_span(s: &mut pb::Span) -> anyhow::Result<()> { if !is_valid_status_code(code) { s.meta.remove("http.status_code"); } - }; + } Ok(()) } @@ -50,9 +50,14 @@ pub(crate) fn is_valid_status_code(sc: &str) -> bool { false } -/// normalize_trace takes a trace and +/// `normalize_trace` takes a trace and /// * returns an error if there is a trace ID discrepancy between 2 spans /// * returns an error if at least one span cannot be normalized +/// +/// # Errors +/// +/// Returns an error if the trace is empty, if spans have mismatched trace IDs, or if any span +/// fails normalization. pub fn normalize_trace(trace: &mut [pb::Span]) -> anyhow::Result<()> { let first_trace_id = match trace.first() { Some(first_span) => first_span.trace_id, @@ -71,17 +76,18 @@ pub fn normalize_trace(trace: &mut [pb::Span]) -> anyhow::Result<()> { Ok(()) } -/// normalize_chunk takes a trace chunk and +/// `normalize_chunk` takes a trace chunk and /// * populates origin field if it wasn't populated /// * populates priority field if it wasn't populated the root span is used to populate these -/// fields, and it's index in TraceChunk spans vec must be passed. +/// fields, and it's index in `TraceChunk` spans vec must be passed. +/// +/// # Errors +/// +/// Returns an error if `root_span_index` is out of bounds for the chunk's spans. pub fn normalize_chunk(chunk: &mut pb::TraceChunk, root_span_index: usize) -> anyhow::Result<()> { // check if priority is not populated - let root_span = match chunk.spans.get(root_span_index) { - Some(span) => span, - None => { - anyhow::bail!("Normalize Chunk Error: root_span_index > length of trace chunk spans") - } + let Some(root_span) = chunk.spans.get(root_span_index) else { + anyhow::bail!("Normalize Chunk Error: root_span_index > length of trace chunk spans") }; if chunk.priority == SamplerPriority::None as i32 { @@ -101,7 +107,7 @@ pub fn normalize_chunk(chunk: &mut pb::TraceChunk, root_span_index: usize) -> an if chunk.origin.is_empty() { if let Some(origin) = root_span.meta.get(TAG_ORIGIN) { // Older tracers set origin in the root span. - chunk.origin = origin.to_string(); + chunk.origin = origin.clone(); } } Ok(()) @@ -512,7 +518,7 @@ mod tests { let mut root = new_test_span(); root.metrics.insert( normalizer::TAG_SAMPLING_PRIORITY.to_string(), - normalizer::SamplerPriority::UserKeep as i32 as f64, + f64::from(normalizer::SamplerPriority::UserKeep as i32), ); let mut chunk = new_test_chunk_with_span(root); @@ -526,7 +532,7 @@ mod tests { let mut root = new_test_span(); root.metrics.insert( normalizer::TAG_SAMPLING_PRIORITY.to_string(), - normalizer::SamplerPriority::UserKeep as i32 as f64, + f64::from(normalizer::SamplerPriority::UserKeep as i32), ); let mut chunk = new_test_chunk_with_span(root); @@ -554,7 +560,7 @@ mod tests { chunk.spans = vec![new_test_span(), new_test_span(), new_test_span()]; chunk.spans[1].metrics.insert( normalizer::TAG_SAMPLING_PRIORITY.to_string(), - normalizer::SamplerPriority::UserKeep as i32 as f64, + f64::from(normalizer::SamplerPriority::UserKeep as i32), ); assert!(normalizer::normalize_chunk(&mut chunk, 0).is_ok()); assert_eq!(normalizer::SamplerPriority::UserKeep as i32, chunk.priority); diff --git a/libdd-trace-normalization/src/utf8_helpers.rs b/libdd-trace-normalization/src/utf8_helpers.rs index 51f7f38105..3a11a6ed49 100644 --- a/libdd-trace-normalization/src/utf8_helpers.rs +++ b/libdd-trace-normalization/src/utf8_helpers.rs @@ -2,15 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 /// Taken from the source of -/// https://doc.rust-lang.org/std/primitive.str.html#method.floor_char_boundary -/// TODO remove when str::floor_char_boundary is stable +/// +/// TODO remove when `str::floor_char_boundary` is stable #[inline] -pub(crate) fn floor_char_boundary(s: &str, index: usize) -> usize { +pub fn floor_char_boundary(s: &str, index: usize) -> usize { #[allow(clippy::unwrap_used)] if index >= s.len() { s.len() } else { let lower_bound = index.saturating_sub(3); + #[allow( + clippy::cast_possible_wrap, + reason = "intentional: checks UTF-8 continuation byte bitmask via signed comparison" + )] let new_index = s.as_bytes()[lower_bound..=index] .iter() .rposition(|b| (*b as i8) >= -0x40); @@ -19,11 +23,11 @@ pub(crate) fn floor_char_boundary(s: &str, index: usize) -> usize { } } -pub(crate) fn next_code_point<'a, I: Iterator>(bytes: &mut I) -> Option { +pub fn next_code_point<'a, I: Iterator>(bytes: &mut I) -> Option { // Decode UTF-8 let x = *bytes.next()?; if x < 128 { - return Some(x as u32); + return Some(u32::from(x)); } // Multibyte case follows @@ -40,7 +44,7 @@ pub(crate) fn next_code_point<'a, I: Iterator>(bytes: &mut I) -> // SAFETY: `bytes` produces an UTF-8-like string, // so the iterator must produce a value here. let z = *bytes.next()?; - let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z); + let y_z = utf8_acc_cont_byte(u32::from(y & CONT_MASK), z); ch = (init << 12) | y_z; if x >= 0xF0 { // [x y z w] case