Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ redundant_else = "allow" # less clear
option_if_let_else = "allow" # less clear
unreadable_literal = "allow" # annoying for all the ids in the codebase
cognitive_complexity = "allow" # using clippy::too_many_lines already
manual_string_new = "allow" # prefer "".to_string() / "".to_owned() for clarity at call sites
cast_possible_truncation = "allow" # intentional truncations are common (e.g. nanosecond timestamps)

[workspace.lints.rust]
future_incompatible = { level = "warn", priority = -1 }
Expand Down
3 changes: 3 additions & 0 deletions libdd-trace-normalization/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@ criterion = "0.5"
name = "normalization_utils"
harness = false
path = "benches/normalization_utils.rs"

[lints]
workspace = true
12 changes: 6 additions & 6 deletions libdd-trace-normalization/benches/normalization_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ fn normalize_name_bench(c: &mut Criterion) {

#[inline]
fn normalize_fnmut_string<F>(
mut group: BenchmarkGroup<WallTime>,
mut group: BenchmarkGroup<'_, WallTime>,
cases: &[&str],
elements: usize,
function_name: &str,
Expand Down Expand Up @@ -73,12 +73,12 @@ fn normalize_fnmut_string<F>(
},
|strings| {
#[allow(clippy::unit_arg)]
strings.iter_mut().for_each(|string| {
for string in strings.iter_mut() {
black_box(function(black_box(string)));
});
}
},
BatchSize::LargeInput,
)
);
},
);
}
Expand Down Expand Up @@ -111,7 +111,7 @@ fn normalize_span_bench(c: &mut Criterion) {
duration: 12000000,
error: 1,
resource: "GET /some/reblochon".to_string(),
service: "".to_string(),
service: String::new(),
name: "django.controller".to_string(),
span_id: 1456,
start: 1448466849000000000,
Expand All @@ -137,7 +137,7 @@ fn normalize_span_bench(c: &mut Criterion) {
|| case.to_owned(),
|t| black_box(normalize_trace(black_box(t))),
BatchSize::LargeInput,
)
);
},
);
}
Expand Down
14 changes: 10 additions & 4 deletions libdd-trace-normalization/src/fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ const MAX_META_STRUCT_SIZE: u8 = 100;
const MAX_LINKS_SIZE: u8 = 10;
const MAX_EVENTS_SIZE: u8 = 10;

/// Helper function to generate an arbitrary AttributeAnyValue
fn arbitrary_attribute_any_value(u: &mut Unstructured) -> arbitrary::Result<pb::AttributeAnyValue> {
/// Helper function to generate an arbitrary `AttributeAnyValue`
fn arbitrary_attribute_any_value(
u: &mut Unstructured<'_>,
) -> arbitrary::Result<pb::AttributeAnyValue> {
let value_type: u8 = u.arbitrary()?;

match value_type % 4 {
Expand Down Expand Up @@ -73,6 +75,10 @@ pub struct FuzzSpan {
}

impl<'a> Arbitrary<'a> for FuzzSpan {
#[allow(
clippy::too_many_lines,
reason = "FIXME: generating all Span fields inline; splitting would require many helper fns"
)]
fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result<Self> {
// Generate all basic fields
let service: String = u.arbitrary()?;
Expand Down Expand Up @@ -186,7 +192,7 @@ impl<'a> Arbitrary<'a> for FuzzSpan {
span_events.push(event);
}

Ok(FuzzSpan {
Ok(Self {
span: pb::Span {
service,
name,
Expand All @@ -208,7 +214,7 @@ impl<'a> Arbitrary<'a> for FuzzSpan {
}
}

/// Main fuzzing function that tests normalize_span with arbitrary data
/// Main fuzzing function that tests `normalize_span` with arbitrary data
pub fn fuzz_normalize_span(fuzz_span: FuzzSpan) {
let mut span = fuzz_span.span;

Expand Down
29 changes: 20 additions & 9 deletions libdd-trace-normalization/src/normalize_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ pub(crate) const MAX_TYPE_LEN: usize = 100;
/// an arbitrary cutoff to spot weird-looking values
/// nanoseconds since epoch on Jan 1, 2000
const YEAR_2000_NANOSEC_TS: i64 = 946684800000000000;
/// DEFAULT_SPAN_NAME is the default name we assign a span if it's missing and we have no reasonable
/// fallback
/// `DEFAULT_SPAN_NAME` is the default name we assign a span if it's missing and we have no
/// reasonable fallback
pub(crate) const DEFAULT_SPAN_NAME: &str = "unnamed_operation";
/// DEFAULT_SERVICE_NAME is the default name we assign a service if it's missing and we have no
/// `DEFAULT_SERVICE_NAME` is the default name we assign a service if it's missing and we have no
/// reasonable fallback
pub(crate) const DEFAULT_SERVICE_NAME: &str = "unnamed-service";
/// MAX_NAME_LEN the maximum length a name can have
/// `MAX_NAME_LEN` the maximum length a name can have
pub(crate) const MAX_NAME_LEN: usize = 100;
/// MAX_SERVICE_LEN the maximum length a service can have
/// `MAX_SERVICE_LEN` the maximum length a service can have
const MAX_SERVICE_LEN: usize = 100;
/// MAX_SERVICE_LEN the maximum length a tag can have
/// `MAX_SERVICE_LEN` the maximum length a tag can have
const MAX_TAG_LEN: usize = 200;

// normalize_service normalizes a span service
Expand Down Expand Up @@ -72,7 +72,7 @@ pub fn normalize_span_start_duration(start: &mut i64, duration: &mut i64) {
}
}

pub fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) {
pub const fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) {
// ParentID, TraceID and SpanID set in the client could be the same
// Supporting the ParentID == TraceID == SpanID for the root span, is compliant
// with the Zipkin implementation. Furthermore, as described in the PR
Expand All @@ -83,6 +83,17 @@ pub fn normalize_parent_id(parent_id: &mut u64, trace_id: u64, span_id: u64) {
}
}

/// Normalizes `tag` in-place, lowercasing and replacing illegal characters with `_`.
///
/// # Panics
///
/// Panics if the tag contains bytes that are not valid UTF-8 continuation sequences; in practice
/// this cannot happen because the function only processes bytes it has already validated via
/// `next_code_point`.
#[allow(
clippy::too_many_lines,
reason = "FIXME: split into sub-functions requires threading mutable cursor state, deferred"
)]
pub fn normalize_tag(tag: &mut String) {
// Since we know that we're only going to write valid utf8 we can work with the Vec directly
let bytes = unsafe { tag.as_mut_vec() };
Expand Down Expand Up @@ -249,7 +260,7 @@ fn normalize_metric_name(name: &mut String) {
// we don't go back to the beginning
write_cursor -= 1;
bytes[write_cursor] = b'.';
last_written_char = b'.'
last_written_char = b'.';
}
// If we've written a _ or a . last, do nothing
(_, b'_' | b'.') => {}
Expand Down Expand Up @@ -381,6 +392,6 @@ mod tests {
fn test_name() {
let mut v = input.to_owned();
normalize_tag(&mut v);
assert_eq!(v, expected)
assert_eq!(v, expected);
}
}
32 changes: 19 additions & 13 deletions libdd-trace-normalization/src/normalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pub(crate) fn normalize_span(s: &mut pb::Span) -> anyhow::Result<()> {
if !is_valid_status_code(code) {
s.meta.remove("http.status_code");
}
};
}

Ok(())
}
Expand All @@ -50,9 +50,14 @@ pub(crate) fn is_valid_status_code(sc: &str) -> bool {
false
}

/// normalize_trace takes a trace and
/// `normalize_trace` takes a trace and
/// * returns an error if there is a trace ID discrepancy between 2 spans
/// * returns an error if at least one span cannot be normalized
///
/// # Errors
///
/// Returns an error if the trace is empty, if spans have mismatched trace IDs, or if any span
/// fails normalization.
pub fn normalize_trace(trace: &mut [pb::Span]) -> anyhow::Result<()> {
let first_trace_id = match trace.first() {
Some(first_span) => first_span.trace_id,
Expand All @@ -71,17 +76,18 @@ pub fn normalize_trace(trace: &mut [pb::Span]) -> anyhow::Result<()> {
Ok(())
}

/// normalize_chunk takes a trace chunk and
/// `normalize_chunk` takes a trace chunk and
/// * populates origin field if it wasn't populated
/// * populates priority field if it wasn't populated the root span is used to populate these
/// fields, and it's index in TraceChunk spans vec must be passed.
/// fields, and it's index in `TraceChunk` spans vec must be passed.
///
/// # Errors
///
/// Returns an error if `root_span_index` is out of bounds for the chunk's spans.
pub fn normalize_chunk(chunk: &mut pb::TraceChunk, root_span_index: usize) -> anyhow::Result<()> {
// check if priority is not populated
let root_span = match chunk.spans.get(root_span_index) {
Some(span) => span,
None => {
anyhow::bail!("Normalize Chunk Error: root_span_index > length of trace chunk spans")
}
let Some(root_span) = chunk.spans.get(root_span_index) else {
anyhow::bail!("Normalize Chunk Error: root_span_index > length of trace chunk spans")
};

if chunk.priority == SamplerPriority::None as i32 {
Expand All @@ -101,7 +107,7 @@ pub fn normalize_chunk(chunk: &mut pb::TraceChunk, root_span_index: usize) -> an
if chunk.origin.is_empty() {
if let Some(origin) = root_span.meta.get(TAG_ORIGIN) {
// Older tracers set origin in the root span.
chunk.origin = origin.to_string();
chunk.origin = origin.clone();
}
}
Ok(())
Expand Down Expand Up @@ -512,7 +518,7 @@ mod tests {
let mut root = new_test_span();
root.metrics.insert(
normalizer::TAG_SAMPLING_PRIORITY.to_string(),
normalizer::SamplerPriority::UserKeep as i32 as f64,
f64::from(normalizer::SamplerPriority::UserKeep as i32),
);

let mut chunk = new_test_chunk_with_span(root);
Expand All @@ -526,7 +532,7 @@ mod tests {
let mut root = new_test_span();
root.metrics.insert(
normalizer::TAG_SAMPLING_PRIORITY.to_string(),
normalizer::SamplerPriority::UserKeep as i32 as f64,
f64::from(normalizer::SamplerPriority::UserKeep as i32),
);

let mut chunk = new_test_chunk_with_span(root);
Expand Down Expand Up @@ -554,7 +560,7 @@ mod tests {
chunk.spans = vec![new_test_span(), new_test_span(), new_test_span()];
chunk.spans[1].metrics.insert(
normalizer::TAG_SAMPLING_PRIORITY.to_string(),
normalizer::SamplerPriority::UserKeep as i32 as f64,
f64::from(normalizer::SamplerPriority::UserKeep as i32),
);
assert!(normalizer::normalize_chunk(&mut chunk, 0).is_ok());
assert_eq!(normalizer::SamplerPriority::UserKeep as i32, chunk.priority);
Expand Down
16 changes: 10 additions & 6 deletions libdd-trace-normalization/src/utf8_helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
// SPDX-License-Identifier: Apache-2.0

/// Taken from the source of
/// https://doc.rust-lang.org/std/primitive.str.html#method.floor_char_boundary
/// TODO remove when str::floor_char_boundary is stable
/// <https://doc.rust-lang.org/std/primitive.str.html#method.floor_char_boundary>
/// TODO remove when `str::floor_char_boundary` is stable
#[inline]
pub(crate) fn floor_char_boundary(s: &str, index: usize) -> usize {
pub fn floor_char_boundary(s: &str, index: usize) -> usize {
#[allow(clippy::unwrap_used)]
if index >= s.len() {
s.len()
} else {
let lower_bound = index.saturating_sub(3);
#[allow(
clippy::cast_possible_wrap,
reason = "intentional: checks UTF-8 continuation byte bitmask via signed comparison"
)]
let new_index = s.as_bytes()[lower_bound..=index]
.iter()
.rposition(|b| (*b as i8) >= -0x40);
Expand All @@ -19,11 +23,11 @@ pub(crate) fn floor_char_boundary(s: &str, index: usize) -> usize {
}
}

pub(crate) fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
// Decode UTF-8
let x = *bytes.next()?;
if x < 128 {
return Some(x as u32);
return Some(u32::from(x));
}

// Multibyte case follows
Expand All @@ -40,7 +44,7 @@ pub(crate) fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) ->
// SAFETY: `bytes` produces an UTF-8-like string,
// so the iterator must produce a value here.
let z = *bytes.next()?;
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
let y_z = utf8_acc_cont_byte(u32::from(y & CONT_MASK), z);
ch = (init << 12) | y_z;
if x >= 0xF0 {
// [x y z w] case
Expand Down
Loading