From b18e13c51d5408c3c114a20c493c9c26c514fd4d Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Tue, 5 May 2026 15:16:01 +0200 Subject: [PATCH 01/14] refacto: cleanup v04 serializer --- .../src/trace_exporter/trace_serializer.rs | 15 ++++--- .../src/msgpack_encoder/v04/mod.rs | 6 +-- libdd-trace-utils/src/trace_utils.rs | 40 +++++++++---------- libdd-trace-utils/src/tracer_payload.rs | 7 +--- 4 files changed, 33 insertions(+), 35 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs b/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs index 93b9909cdc..d0aaf98b35 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs @@ -17,7 +17,7 @@ use libdd_trace_utils::msgpack_encoder; use libdd_trace_utils::span::{v04::Span, TraceData}; use libdd_trace_utils::trace_utils::{self, TracerHeaderTags}; use libdd_trace_utils::tracer_metadata::TracerMetadata; -use libdd_trace_utils::tracer_payload; +use libdd_trace_utils::tracer_payload::{self, TraceEncoding}; /// Minimal capacity of fresh buffers allocated to encode traces, in bytes. const MIN_BUFFER_CAPACITY: usize = 1024; @@ -74,13 +74,16 @@ impl TraceSerializer { &self, traces: Vec>>, ) -> Result, TraceExporterError> { + let map_err = |e: anyhow::Error| { + TraceExporterError::Deserialization(DecodeError::InvalidFormat(e.to_string())) + }; match self.output_format { TraceExporterOutputFormat::V1 => Ok(tracer_payload::TraceChunks::V1(traces)), - format => { - let use_v05_format = matches!(format, TraceExporterOutputFormat::V05); - trace_utils::collect_trace_chunks(traces, use_v05_format).map_err(|e| { - TraceExporterError::Deserialization(DecodeError::InvalidFormat(e.to_string())) - }) + TraceExporterOutputFormat::V04 => { + trace_utils::collect_trace_chunks(traces, TraceEncoding::V04).map_err(map_err) + } + TraceExporterOutputFormat::V05 => { + trace_utils::collect_trace_chunks(traces, TraceEncoding::V05).map_err(map_err) } } } diff --git a/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs index 1c4e0ec3af..5eeffb34a7 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs @@ -123,8 +123,7 @@ pub fn to_vec_with_capacity]>>( capacity: u32, ) -> Vec { let mut buf = ByteBuf::with_capacity(capacity as usize); - #[allow(clippy::expect_used)] - to_writer(&mut buf, traces).expect("infallible: the error is std::convert::Infallible"); + let _ = to_writer(&mut buf, traces); buf.into_vec() } @@ -158,7 +157,6 @@ pub fn to_vec_with_capacity]>>( /// ``` pub fn to_encoded_byte_len]>>(traces: &[S]) -> u32 { let mut counter = super::CountLength(0); - #[allow(clippy::expect_used)] - to_writer(&mut counter, traces).expect("infallible: CountLength never fails"); + let _ = to_writer(&mut counter, traces); counter.0 } diff --git a/libdd-trace-utils/src/trace_utils.rs b/libdd-trace-utils/src/trace_utils.rs index 0837e21bf9..f1e5e6da08 100644 --- a/libdd-trace-utils/src/trace_utils.rs +++ b/libdd-trace-utils/src/trace_utils.rs @@ -7,7 +7,7 @@ use crate::span::v05::dict::SharedDict; use crate::span::{v05, TraceData}; pub use crate::tracer_header_tags::TracerHeaderTags; use crate::tracer_payload::TracerPayloadCollection; -use crate::tracer_payload::{self, TraceChunks}; +use crate::tracer_payload::{self, TraceChunks, TraceEncoding}; use anyhow::anyhow; use bytes::buf::Reader; use bytes::Buf; @@ -592,26 +592,26 @@ pub fn enrich_span_with_azure_function_metadata(span: &mut pb::Span) { pub fn collect_trace_chunks( traces: Vec>>, - use_v05_format: bool, + format: TraceEncoding, ) -> anyhow::Result> { - if use_v05_format { - let mut shared_dict = SharedDict::default(); - let mut v05_traces: Vec> = Vec::with_capacity(traces.len()); - for trace in traces { - let trace_len = trace.len(); - let v05_trace = trace.into_iter().try_fold( - Vec::with_capacity(trace_len), - |mut acc, span| -> anyhow::Result> { - acc.push(v05::from_v04_span(span, &mut shared_dict)?); - Ok(acc) - }, - )?; - - v05_traces.push(v05_trace); + match format { + TraceEncoding::V05 => { + let mut shared_dict = SharedDict::default(); + let mut v05_traces: Vec> = Vec::with_capacity(traces.len()); + for trace in traces { + let trace_len = trace.len(); + let v05_trace = trace.into_iter().try_fold( + Vec::with_capacity(trace_len), + |mut acc, span| -> anyhow::Result> { + acc.push(v05::from_v04_span(span, &mut shared_dict)?); + Ok(acc) + }, + )?; + v05_traces.push(v05_trace); + } + Ok(TraceChunks::V05((shared_dict, v05_traces))) } - Ok(TraceChunks::V05((shared_dict, v05_traces))) - } else { - Ok(TraceChunks::V04(traces)) + TraceEncoding::V04 => Ok(TraceChunks::V04(traces)), } } @@ -1132,7 +1132,7 @@ mod tests { fn test_collect_trace_chunks_v05() { let chunk = vec![create_test_no_alloc_span(123, 456, 789, 1, true)]; - let collection = collect_trace_chunks(vec![chunk], true).unwrap(); + let collection = collect_trace_chunks(vec![chunk], TraceEncoding::V05).unwrap(); let (dict, traces) = match collection { TraceChunks::V05(payload) => payload, diff --git a/libdd-trace-utils/src/tracer_payload.rs b/libdd-trace-utils/src/tracer_payload.rs index 79e603ba75..37ac1a524d 100644 --- a/libdd-trace-utils/src/tracer_payload.rs +++ b/libdd-trace-utils/src/tracer_payload.rs @@ -228,16 +228,13 @@ pub fn decode_to_trace_chunks( data: libdd_tinybytes::Bytes, encoding_type: TraceEncoding, ) -> Result<(TraceChunks, usize), anyhow::Error> { - let (data, size) = match encoding_type { + let (data, size) = match &encoding_type { TraceEncoding::V04 => msgpack_decoder::v04::from_bytes(data), TraceEncoding::V05 => msgpack_decoder::v05::from_bytes(data), } .map_err(|e| anyhow::format_err!("Error deserializing trace from request body: {e}"))?; - Ok(( - collect_trace_chunks(data, matches!(encoding_type, TraceEncoding::V05))?, - size, - )) + Ok((collect_trace_chunks(data, encoding_type)?, size)) } #[cfg(test)] From f9e935a6d990986e7c8b5bce493d5a453c087640 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Fri, 22 May 2026 13:36:38 +0200 Subject: [PATCH 02/14] fix: address comments --- libdd-common/src/lib.rs | 34 +++++++++++++++++++ .../src/trace_exporter/trace_serializer.rs | 11 +++--- libdd-trace-utils/src/msgpack_encoder/mod.rs | 15 ++++++++ .../src/msgpack_encoder/v04/mod.rs | 12 ++++++- .../src/msgpack_encoder/v1/mod.rs | 14 ++++++-- libdd-trace-utils/src/trace_utils.rs | 33 +++++++++++++----- libdd-trace-utils/src/tracer_payload.rs | 4 +-- 7 files changed, 105 insertions(+), 18 deletions(-) diff --git a/libdd-common/src/lib.rs b/libdd-common/src/lib.rs index 83e913896e..dbd2e4a090 100644 --- a/libdd-common/src/lib.rs +++ b/libdd-common/src/lib.rs @@ -135,6 +135,40 @@ impl RwLockExt for RwLock { } } +/// Extension trait that extracts the value from a `Result` whose error type is uninhabited. +/// +/// The signature constrains callers at compile time: the method is only available when the +/// error type is [`core::convert::Infallible`]. No panics — the compiler proves the `Err` +/// arm unreachable from the type. +/// +/// # Examples +/// +/// ``` +/// use libdd_common::ResultInfallibleExt; +/// use std::convert::Infallible; +/// +/// let result: Result = Ok(42); +/// assert_eq!(result.unwrap_infallible(), 42); +/// ``` +pub trait ResultInfallibleExt: sealed::Sealed { + fn unwrap_infallible(self) -> T; +} + +impl ResultInfallibleExt for Result { + #[inline(always)] + fn unwrap_infallible(self) -> T { + match self { + Ok(value) => value, + Err(never) => match never {}, + } + } +} + +mod sealed { + pub trait Sealed {} + impl Sealed for Result {} +} + pub mod header { #![allow(clippy::declare_interior_mutable_const)] use http::{header::HeaderName, HeaderValue}; diff --git a/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs b/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs index d0aaf98b35..e54af5f9d6 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs @@ -74,16 +74,17 @@ impl TraceSerializer { &self, traces: Vec>>, ) -> Result, TraceExporterError> { - let map_err = |e: anyhow::Error| { - TraceExporterError::Deserialization(DecodeError::InvalidFormat(e.to_string())) - }; match self.output_format { TraceExporterOutputFormat::V1 => Ok(tracer_payload::TraceChunks::V1(traces)), TraceExporterOutputFormat::V04 => { - trace_utils::collect_trace_chunks(traces, TraceEncoding::V04).map_err(map_err) + trace_utils::collect_trace_chunks(traces, TraceEncoding::V04).map_err(|e| { + TraceExporterError::Deserialization(DecodeError::InvalidFormat(e.to_string())) + }) } TraceExporterOutputFormat::V05 => { - trace_utils::collect_trace_chunks(traces, TraceEncoding::V05).map_err(map_err) + trace_utils::collect_trace_chunks(traces, TraceEncoding::V05).map_err(|e| { + TraceExporterError::Deserialization(DecodeError::InvalidFormat(e.to_string())) + }) } } } diff --git a/libdd-trace-utils/src/msgpack_encoder/mod.rs b/libdd-trace-utils/src/msgpack_encoder/mod.rs index 06898a33eb..2d54a03349 100644 --- a/libdd-trace-utils/src/msgpack_encoder/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/mod.rs @@ -4,6 +4,21 @@ pub mod v04; pub mod v1; +use rmp::encode::ValueWriteError; +use std::convert::Infallible; + +/// Flatten `ValueWriteError` (uninhabited because both variants wrap +/// `Infallible`) into the bare `Infallible` so callers can use +/// [`libdd_common::ResultInfallibleExt`]. +#[inline(always)] +pub(crate) fn flatten_value_write_infallible(err: ValueWriteError) -> Infallible { + match err { + ValueWriteError::InvalidMarkerWrite(never) | ValueWriteError::InvalidDataWrite(never) => { + never + } + } +} + /// A writer that counts bytes without storing them, used to compute encoded payload size. pub(crate) struct CountLength(u32); diff --git a/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs index 5eeffb34a7..f216e28c37 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v04/mod.rs @@ -3,6 +3,7 @@ use crate::span::v04::Span; use crate::span::TraceData; +use libdd_common::ResultInfallibleExt; use rmp::encode::{write_array_len, ByteBuf, RmpWrite, ValueWriteError}; mod span; @@ -123,7 +124,11 @@ pub fn to_vec_with_capacity]>>( capacity: u32, ) -> Vec { let mut buf = ByteBuf::with_capacity(capacity as usize); - let _ = to_writer(&mut buf, traces); + // `ByteBuf`'s `RmpWrite::Error` is `Infallible`, so `to_writer` cannot fail. The compiler + // proves the `Err` arm unreachable through `unwrap_infallible`. + to_writer(&mut buf, traces) + .map_err(super::flatten_value_write_infallible) + .unwrap_infallible(); buf.into_vec() } @@ -157,6 +162,11 @@ pub fn to_vec_with_capacity]>>( /// ``` pub fn to_encoded_byte_len]>>(traces: &[S]) -> u32 { let mut counter = super::CountLength(0); + // `CountLength` impls `std::io::Write` (whose error type is `std::io::Error`, not + // `Infallible`), so we can't statically prove infallibility via `unwrap_infallible` + // the way we do for `ByteBuf`. In practice `CountLength::write*` only ever return + // `Ok`, so the error path here is unreachable today; should `CountLength` ever grow + // a fallible code path, fuzz tests on the msgpack encoded length would catch it. let _ = to_writer(&mut counter, traces); counter.0 } diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index 86cef2e027..479a908729 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -6,6 +6,7 @@ mod span_v04; use crate::span::v04::Span; use crate::span::TraceData; use crate::tracer_metadata::TracerMetadata; +use libdd_common::ResultInfallibleExt; use rmp::encode::{ write_array_len, write_bin, write_map_len, write_sint, write_str, write_uint, write_uint8, ByteBuf, RmpWrite, ValueWriteError, @@ -399,7 +400,11 @@ pub fn to_vec_with_capacity]>>( metadata: &TracerMetadata, ) -> Vec { let mut buf = ByteBuf::with_capacity(capacity as usize); - let _ = encode_payload(&mut buf, traces, metadata); // infallible: ByteBuf write never fails + // `ByteBuf`'s `RmpWrite::Error` is `Infallible`, so `encode_payload` cannot fail. The + // compiler proves the `Err` arm unreachable through `unwrap_infallible`. + encode_payload(&mut buf, traces, metadata) + .map_err(super::flatten_value_write_infallible) + .unwrap_infallible(); buf.into_vec() } @@ -409,7 +414,12 @@ pub fn to_encoded_byte_len]>>( metadata: &TracerMetadata, ) -> u32 { let mut counter = super::CountLength(0); - let _ = encode_payload(&mut counter, traces, metadata); // infallible: CountLength write never fails + // `CountLength` impls `std::io::Write` (whose error type is `std::io::Error`, not + // `Infallible`), so we can't statically prove infallibility via `unwrap_infallible` + // the way we do for `ByteBuf`. In practice `CountLength::write*` only ever return + // `Ok`, so the error path here is unreachable today; should `CountLength` ever grow + // a fallible code path, fuzz tests on the msgpack encoded length would catch it. + let _ = encode_payload(&mut counter, traces, metadata); counter.0 } diff --git a/libdd-trace-utils/src/trace_utils.rs b/libdd-trace-utils/src/trace_utils.rs index f1e5e6da08..3cd53b04d7 100644 --- a/libdd-trace-utils/src/trace_utils.rs +++ b/libdd-trace-utils/src/trace_utils.rs @@ -599,14 +599,10 @@ pub fn collect_trace_chunks( let mut shared_dict = SharedDict::default(); let mut v05_traces: Vec> = Vec::with_capacity(traces.len()); for trace in traces { - let trace_len = trace.len(); - let v05_trace = trace.into_iter().try_fold( - Vec::with_capacity(trace_len), - |mut acc, span| -> anyhow::Result> { - acc.push(v05::from_v04_span(span, &mut shared_dict)?); - Ok(acc) - }, - )?; + let v05_trace = trace + .into_iter() + .map(|span| v05::from_v04_span(span, &mut shared_dict)) + .collect::>>()?; v05_traces.push(v05_trace); } Ok(TraceChunks::V05((shared_dict, v05_traces))) @@ -1203,6 +1199,27 @@ mod tests { ); } + #[test] + fn test_collect_trace_chunks_v04() { + let chunk = vec![create_test_no_alloc_span(123, 456, 789, 1, true)]; + + let collection = collect_trace_chunks(vec![chunk], TraceEncoding::V04).unwrap(); + + let traces = match collection { + TraceChunks::V04(traces) => traces, + _ => panic!("Unexpected type"), + }; + + assert_eq!(traces.len(), 1); + assert_eq!(traces[0].len(), 1); + let span = &traces[0][0]; + assert_eq!(span.trace_id, 123); + assert_eq!(span.span_id, 456); + assert_eq!(span.parent_id, 789); + assert_eq!(span.start, 1); + assert_eq!(span.error, 0); + } + #[test] fn test_rmp_serde_deserialize_meta_with_null_values() { // Create a JSON representation with null value in meta diff --git a/libdd-trace-utils/src/tracer_payload.rs b/libdd-trace-utils/src/tracer_payload.rs index 37ac1a524d..e30f04df91 100644 --- a/libdd-trace-utils/src/tracer_payload.rs +++ b/libdd-trace-utils/src/tracer_payload.rs @@ -12,7 +12,7 @@ use std::iter::Iterator; pub type TracerPayloadV04 = Vec; pub type TracerPayloadV05 = Vec; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] /// Enumerates the different encoding types. pub enum TraceEncoding { /// v0.4 encoding (TracerPayloadV04). @@ -228,7 +228,7 @@ pub fn decode_to_trace_chunks( data: libdd_tinybytes::Bytes, encoding_type: TraceEncoding, ) -> Result<(TraceChunks, usize), anyhow::Error> { - let (data, size) = match &encoding_type { + let (data, size) = match encoding_type { TraceEncoding::V04 => msgpack_decoder::v04::from_bytes(data), TraceEncoding::V05 => msgpack_decoder::v05::from_bytes(data), } From 0656b67cbe9d4938c33fb13e19399b67f577a806 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Tue, 26 May 2026 14:05:54 +0200 Subject: [PATCH 03/14] feat(trace-exporter): add v1 span and its encoder --- .../src/msgpack_encoder/v1/mod.rs | 1029 +++++++++++++++++ .../src/msgpack_encoder/v1/span_v04.rs | 2 - .../src/msgpack_encoder/v1/span_v1.rs | 271 +++++ libdd-trace-utils/src/span/mod.rs | 1 + libdd-trace-utils/src/span/v1/mod.rs | 311 +++++ 5 files changed, 1612 insertions(+), 2 deletions(-) create mode 100644 libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs create mode 100644 libdd-trace-utils/src/span/v1/mod.rs diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index 479a908729..ca3a97600f 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -2,8 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 mod span_v04; +mod span_v1; use crate::span::v04::Span; +use crate::span::v1::TracerPayload; use crate::span::TraceData; use crate::tracer_metadata::TracerMetadata; use libdd_common::ResultInfallibleExt; @@ -423,6 +425,162 @@ pub fn to_encoded_byte_len]>>( counter.0 } +/// Encodes a [`TracerPayload`] (V1 canonical data model) as a V1 msgpack payload. +/// +/// This is the M3 encoder. The byte layout matches [`encode_payload`] (the M1 v0.4 → V1 +/// encoder) so equivalent inputs produce byte-identical outputs. +fn encode_payload_v1( + writer: &mut W, + payload: &TracerPayload, +) -> Result<(), ValueWriteError> { + let mut table = StringTable::new(); + + let has_attributes = !payload.attributes.is_empty(); + + let map_len = 1u32 // chunks always present + + (!payload.language_name.borrow().is_empty()) as u32 + + (!payload.language_version.borrow().is_empty()) as u32 + + (!payload.tracer_version.borrow().is_empty()) as u32 + + (!payload.runtime_id.borrow().is_empty()) as u32 + + (!payload.env.borrow().is_empty()) as u32 + + (!payload.hostname.borrow().is_empty()) as u32 + + (!payload.app_version.borrow().is_empty()) as u32 + + has_attributes as u32; + + write_map_len(writer, map_len)?; + + if !payload.language_name.borrow().is_empty() { + write_uint8(writer, trace_key::LANGUAGE_NAME)?; + table.write_interned(writer, payload.language_name.borrow())?; + } + + if !payload.language_version.borrow().is_empty() { + write_uint8(writer, trace_key::LANGUAGE_VERSION)?; + table.write_interned(writer, payload.language_version.borrow())?; + } + + if !payload.tracer_version.borrow().is_empty() { + write_uint8(writer, trace_key::TRACER_VERSION)?; + table.write_interned(writer, payload.tracer_version.borrow())?; + } + + if !payload.runtime_id.borrow().is_empty() { + write_uint8(writer, trace_key::RUNTIME_ID)?; + table.write_interned(writer, payload.runtime_id.borrow())?; + } + + if !payload.env.borrow().is_empty() { + write_uint8(writer, trace_key::ENV_REF)?; + table.write_interned(writer, payload.env.borrow())?; + } + + if !payload.hostname.borrow().is_empty() { + write_uint8(writer, trace_key::HOSTNAME_REF)?; + table.write_interned(writer, payload.hostname.borrow())?; + } + + if !payload.app_version.borrow().is_empty() { + write_uint8(writer, trace_key::APP_VERSION_REF)?; + table.write_interned(writer, payload.app_version.borrow())?; + } + + if has_attributes { + write_uint8(writer, trace_key::ATTRIBUTES)?; + span_v1::encode_attributes_map(writer, &payload.attributes, &mut table)?; + } + + write_uint8(writer, trace_key::CHUNKS)?; + write_array_len(writer, payload.chunks.len() as u32)?; + for chunk in &payload.chunks { + encode_chunk_v1(writer, chunk, &mut table)?; + } + + Ok(()) +} + +fn encode_chunk_v1( + writer: &mut W, + chunk: &crate::span::v1::TraceChunk, + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + let has_origin = chunk + .origin + .as_ref() + .is_some_and(|o| !>::borrow(o).is_empty()); + + let fields = 2u32 // trace_id + spans + + has_origin as u32 + + chunk.priority.is_some() as u32 + + chunk.sampling_mechanism.is_some() as u32; + + write_map_len(writer, fields)?; + + write_uint8(writer, chunk_key::TRACE_ID)?; + write_bin(writer, &chunk.trace_id)?; + + if let Some(origin) = chunk + .origin + .as_ref() + .filter(|o| !>::borrow(o).is_empty()) + { + write_uint8(writer, chunk_key::ORIGIN)?; + table.write_interned(writer, >::borrow(origin))?; + } + + if let Some(priority) = chunk.priority { + write_uint8(writer, chunk_key::PRIORITY)?; + write_sint(writer, priority as i64)?; + } + + if let Some(mechanism) = chunk.sampling_mechanism { + write_uint8(writer, chunk_key::SAMPLING_MECHANISM)?; + write_uint(writer, mechanism as u64)?; + } + + write_uint8(writer, chunk_key::SPANS)?; + write_array_len(writer, chunk.spans.len() as u32)?; + for span in &chunk.spans { + span_v1::encode_span(writer, span, table)?; + } + + Ok(()) +} + +/// Serializes a V1 [`TracerPayload`] into a `Vec` using the V1 msgpack format. +pub fn to_vec_from_payload(payload: &TracerPayload) -> Vec { + to_vec_from_payload_with_capacity(payload, 0) +} + +/// Serializes a V1 [`TracerPayload`] into a `Vec` with a pre-allocated capacity. +pub fn to_vec_from_payload_with_capacity( + payload: &TracerPayload, + capacity: u32, +) -> Vec { + let mut buf = ByteBuf::with_capacity(capacity as usize); + encode_payload_v1(&mut buf, payload) + .map_err(super::flatten_value_write_infallible) + .unwrap_infallible(); + buf.into_vec() +} + +/// Serializes a V1 [`TracerPayload`] into a caller-provided slice. +/// +/// # Errors +/// Returns a `ValueWriteError` if the underlying writer fails (e.g. buffer too small). +pub fn write_payload_to_slice( + slice: &mut &mut [u8], + payload: &TracerPayload, +) -> Result<(), ValueWriteError> { + encode_payload_v1(slice, payload) +} + +/// Returns the number of bytes the V1 payload for `payload` would occupy when encoded. +pub fn to_encoded_byte_len_from_payload(payload: &TracerPayload) -> u32 { + let mut counter = super::CountLength(0); + let _ = encode_payload_v1(&mut counter, payload); + counter.0 +} + #[cfg(test)] mod tests { use super::*; @@ -866,3 +1024,874 @@ mod tests { ); } } + +#[cfg(test)] +mod v1_payload_tests { + //! Unit tests for the M3 encoder (`encode_payload_v1`). + //! + //! Verifies the encoder produces a valid V1 payload from the canonical + //! [`crate::span::v1::TracerPayload`] data model and that core invariants (interning, byte + //! length, optional fields) hold. + + use super::*; + use crate::span::v1::{ + AttributeValue, Span as V1Span, SpanBytes as V1SpanBytes, SpanKind, TraceChunkBytes, + TracerPayloadBytes, + }; + use libdd_tinybytes::BytesString; + + fn bs(s: &str) -> BytesString { + BytesString::from_slice(s.as_bytes()).unwrap_or_default() + } + + fn make_span(service: &str, name: &str, span_id: u64) -> V1SpanBytes { + V1Span { + service: bs(service), + name: bs(name), + resource: bs("res"), + span_id, + start: 1_000_000, + duration: 500, + ..Default::default() + } + } + + fn make_chunk(spans: Vec, trace_id: [u8; 16]) -> TraceChunkBytes { + TraceChunkBytes { + trace_id, + spans, + ..Default::default() + } + } + + #[test] + fn empty_payload_is_valid_msgpack_map() { + let payload = TracerPayloadBytes::default(); + let encoded = to_vec_from_payload(&payload); + // Map with a single entry (chunks), then an empty array. `0x81` = fixmap of length 1, + // followed by chunk key (0x0b), then `0x90` (fixarray length 0). + assert_eq!(encoded, vec![0x81, 0x0b, 0x90]); + } + + #[test] + fn payload_byte_len_matches_to_vec() { + let chunk = make_chunk(vec![make_span("svc", "op", 1)], [0u8; 16]); + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + let len = to_encoded_byte_len_from_payload(&payload); + assert_eq!(encoded.len() as u32, len); + } + + #[test] + fn span_kind_is_always_emitted_as_uint() { + // Default SpanKind (Internal=1) must be emitted. The encoded payload contains + // `kind_key (0x10) | uint 1 (0x01)`. + let chunk = make_chunk(vec![make_span("svc", "op", 1)], [0u8; 16]); + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + let pat = [0x10u8, 0x01u8]; + assert!( + encoded.windows(2).any(|w| w == pat), + "Kind (key=16) Internal (=1) must be emitted" + ); + } + + #[test] + fn typed_attributes_carry_correct_type_discriminants() { + let mut attrs = HashMap::new(); + attrs.insert(bs("k_str"), AttributeValue::String(bs("v"))); + let span = V1Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1, + duration: 1, + attributes: attrs, + ..Default::default() + }; + let chunk = make_chunk(vec![span], [0u8; 16]); + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + // String attribute → type discriminant = 1 (`AnyValueKey::String`). + assert!( + encoded.windows(b"k_str".len()).any(|w| w == b"k_str"), + "attribute key must appear" + ); + } + + #[test] + fn bytes_attribute_uses_bin_marker() { + // A Bytes attribute must use the msgpack `bin` family, not `str`. + let mut attrs = HashMap::new(); + attrs.insert( + bs("payload"), + AttributeValue::Bytes(libdd_tinybytes::Bytes::copy_from_slice(b"\xde\xad")), + ); + let span = V1Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1, + duration: 1, + attributes: attrs, + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![make_chunk(vec![span], [0u8; 16])], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + // bin8 marker `0xc4` followed by length `0x02` and the bytes themselves. + let want = [0xc4u8, 0x02, 0xde, 0xad]; + assert!( + encoded.windows(4).any(|w| w == want), + "Bytes attribute must be encoded as msgpack bin" + ); + } + + #[test] + fn list_and_keyvalue_attributes_round_trip_through_recursion() { + let mut nested = HashMap::new(); + nested.insert(bs("nk"), AttributeValue::Int(7)); + let mut attrs = HashMap::new(); + attrs.insert( + bs("list"), + AttributeValue::List(vec![ + AttributeValue::String(bs("a")), + AttributeValue::Bool(true), + ]), + ); + attrs.insert(bs("kv"), AttributeValue::KeyValue(nested)); + let span = V1Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1, + duration: 1, + attributes: attrs, + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![make_chunk(vec![span], [0u8; 16])], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + // The keys and the nested key must all appear at least once. + for s in &[b"list" as &[u8], b"kv", b"a", b"nk"] { + assert!( + encoded.windows(s.len()).any(|w| w == *s), + "{} should appear in payload", + std::str::from_utf8(s).unwrap() + ); + } + } + + #[test] + fn promoted_fields_at_payload_level() { + let payload = TracerPayloadBytes { + language_name: bs("python"), + language_version: bs("3.11"), + tracer_version: bs("2.0.0"), + runtime_id: bs("rt-1"), + env: bs("prod"), + hostname: bs("h"), + app_version: bs("1.2.3"), + chunks: vec![make_chunk(vec![make_span("svc", "op", 1)], [0u8; 16])], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + for s in &[ + b"python" as &[u8], + b"3.11", + b"2.0.0", + b"rt-1", + b"prod", + b"1.2.3", + ] { + assert!( + encoded.windows(s.len()).any(|w| w == *s), + "{} should appear", + std::str::from_utf8(s).unwrap() + ); + } + } + + #[test] + fn chunk_level_attrs_emitted_when_set() { + let chunk = TraceChunkBytes { + trace_id: [0u8; 16], + priority: Some(1), + origin: Some(bs("lambda")), + sampling_mechanism: Some(4), + spans: vec![make_span("svc", "op", 1)], + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + assert!( + encoded.windows(b"lambda".len()).any(|w| w == b"lambda"), + "chunk origin should appear" + ); + // sampling_mechanism=4 → SAMPLING_MECHANISM (0x07) + positive fixint 0x04 + let want = [chunk_key::SAMPLING_MECHANISM, 0x04]; + assert!(encoded.windows(2).any(|w| w == want)); + } + + #[test] + fn span_kind_otel_values() { + for (kind, expected_byte) in [ + (SpanKind::Internal, 0x01u8), + (SpanKind::Server, 0x02), + (SpanKind::Client, 0x03), + (SpanKind::Producer, 0x04), + (SpanKind::Consumer, 0x05), + ] { + let span = V1Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1, + duration: 1, + span_kind: kind, + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![make_chunk(vec![span], [0u8; 16])], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + let want = [0x10u8, expected_byte]; + assert!( + encoded.windows(2).any(|w| w == want), + "SpanKind {kind:?} should produce byte {expected_byte:#x}" + ); + } + } + + #[test] + fn string_interning_works_across_chunks() { + // The string "shared" appears in two chunks. The second occurrence must be a uint ID, + // not a fresh str. Compare against a baseline with a single occurrence to verify. + let chunk_with_two = TracerPayloadBytes { + chunks: vec![ + make_chunk(vec![make_span("shared", "op1", 1)], [0u8; 16]), + make_chunk(vec![make_span("shared", "op2", 2)], [0u8; 16]), + ], + ..Default::default() + }; + let single = TracerPayloadBytes { + chunks: vec![make_chunk(vec![make_span("shared", "op1", 1)], [0u8; 16])], + ..Default::default() + }; + let two = to_vec_from_payload(&chunk_with_two); + let one = to_vec_from_payload(&single); + assert!( + two.len() < 2 * one.len(), + "interning should reduce repeated payload size" + ); + } +} + +#[cfg(test)] +mod cross_validation_tests { + //! Cross-validates that the M1 encoder (v0.4 spans → V1 payload) and the M3 encoder + //! (v1::Span → V1 payload) produce **byte-identical** output for equivalent inputs. + //! + //! All tests are limited to deterministic content (at most one attribute key per map) so the + //! `HashMap` iteration order cannot diverge between the two inputs. + + use super::*; + use crate::span::v04::SpanBytes as V04Span; + use crate::span::v1::{ + AttributeValue, SpanBytes as V1SpanBytes, SpanKind, TraceChunkBytes, TracerPayloadBytes, + }; + use libdd_tinybytes::BytesString; + + fn bs(s: &str) -> BytesString { + BytesString::from_slice(s.as_bytes()).unwrap_or_default() + } + + /// Builds a 128-bit big-endian trace_id from `(high, low)` 64-bit halves. + fn tid_bytes(high: u64, low: u64) -> [u8; 16] { + let mut out = [0u8; 16]; + out[..8].copy_from_slice(&high.to_be_bytes()); + out[8..].copy_from_slice(&low.to_be_bytes()); + out + } + + /// Asserts that encoding `v04` (with `metadata`) via M1 produces the same bytes as + /// encoding `v1` via M3. Includes a hex-diff message on mismatch. + #[track_caller] + fn assert_byte_equal( + v04_traces: &[Vec], + metadata: &TracerMetadata, + v1_payload: &TracerPayloadBytes, + ) { + let m1 = to_vec(v04_traces, metadata); + let m3 = to_vec_from_payload(v1_payload); + if m1 != m3 { + panic!( + "M1 and M3 encoders diverged:\n M1 ({:3} bytes): {}\n M3 ({:3} bytes): {}", + m1.len(), + hex_dump(&m1), + m3.len(), + hex_dump(&m3) + ); + } + } + + fn hex_dump(b: &[u8]) -> String { + b.iter().map(|c| format!("{c:02x}")).collect::() + } + + #[test] + fn empty_payload_byte_identical() { + let v04: Vec> = vec![]; + let v1 = TracerPayloadBytes::default(); + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn minimal_single_span_byte_identical() { + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 0x42, + span_id: 1, + start: 1_000_000, + duration: 500, + ..Default::default() + }]]; + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 0x42), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1_000_000, + duration: 500, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn span_with_parent_and_error_byte_identical() { + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 2, + parent_id: 1, + start: 1000, + duration: 100, + error: 1, + ..Default::default() + }]]; + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 2, + parent_id: 1, + start: 1000, + duration: 100, + error: true, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn promoted_fields_byte_identical() { + // M1 reads env/version/component/span.kind from v04 meta and promotes them; M3 takes + // them directly from the v1::Span fields. Both must produce the same bytes. + let mut meta = HashMap::new(); + meta.insert(bs("env"), bs("prod")); + meta.insert(bs("version"), bs("1.2.3")); + meta.insert(bs("component"), bs("flask")); + meta.insert(bs("span.kind"), bs("server")); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + meta, + ..Default::default() + }]]; + + // metadata.env populated → M1 picks env from metadata first (it's set on the builder). + let metadata = TracerMetadata { + env: "prod".to_string(), + app_version: "1.2.3".to_string(), + ..Default::default() + }; + + let v1 = TracerPayloadBytes { + env: bs("prod"), + app_version: bs("1.2.3"), + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + env: bs("prod"), + version: bs("1.2.3"), + component: bs("flask"), + span_kind: SpanKind::Server, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &metadata, &v1); + } + + #[test] + fn single_string_meta_attribute_byte_identical() { + // One non-promoted meta tag → one attribute triplet. With a single entry the HashMap + // iteration order cannot vary. + let mut meta = HashMap::new(); + meta.insert(bs("custom.tag"), bs("hello")); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + meta, + ..Default::default() + }]]; + + let mut attrs = HashMap::new(); + attrs.insert(bs("custom.tag"), AttributeValue::String(bs("hello"))); + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + attributes: attrs, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn single_float_metric_byte_identical() { + let mut metrics = HashMap::new(); + metrics.insert(bs("score"), 1.5f64); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + metrics, + ..Default::default() + }]]; + + let mut attrs = HashMap::new(); + attrs.insert(bs("score"), AttributeValue::Float(1.5)); + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + attributes: attrs, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn single_bytes_meta_struct_byte_identical() { + let mut meta_struct = HashMap::new(); + meta_struct.insert( + bs("payload"), + libdd_tinybytes::Bytes::copy_from_slice(b"\xde\xad\xbe\xef"), + ); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + meta_struct, + ..Default::default() + }]]; + + let mut attrs = HashMap::new(); + attrs.insert( + bs("payload"), + AttributeValue::Bytes(libdd_tinybytes::Bytes::copy_from_slice(b"\xde\xad\xbe\xef")), + ); + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + attributes: attrs, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn chunk_origin_only_byte_identical() { + // The M1 encoder's `is_promoted` filter only strips env/version/component/span.kind/ + // _dd.p.tid — it intentionally keeps `_dd.origin` in span attributes even though it's + // also lifted to the chunk. M3 must reproduce that duplication for byte equality. + let mut meta = HashMap::new(); + meta.insert(bs("_dd.origin"), bs("lambda")); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + meta, + ..Default::default() + }]]; + + let mut attrs = HashMap::new(); + attrs.insert(bs("_dd.origin"), AttributeValue::String(bs("lambda"))); + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + origin: Some(bs("lambda")), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + attributes: attrs, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn trace_id_128_bit_from_dd_p_tid_byte_identical() { + let mut meta = HashMap::new(); + meta.insert(bs("_dd.p.tid"), bs("640cfd5400000000")); + + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 0x0123456789abcdef, + span_id: 1, + start: 1000, + duration: 100, + meta, + ..Default::default() + }]]; + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0x640cfd5400000000, 0x0123456789abcdef), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn tracer_metadata_fields_byte_identical() { + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + ..Default::default() + }]]; + let metadata = TracerMetadata { + language: "python".to_string(), + language_version: "3.11".to_string(), + tracer_version: "2.0.0".to_string(), + runtime_id: "abc-uuid".to_string(), + hostname: "h1".to_string(), + ..Default::default() + }; + + let v1 = TracerPayloadBytes { + language_name: bs("python"), + language_version: bs("3.11"), + tracer_version: bs("2.0.0"), + runtime_id: bs("abc-uuid"), + hostname: bs("h1"), + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &metadata, &v1); + } + + #[test] + fn payload_attribute_git_commit_sha_byte_identical() { + let v04 = vec![vec![V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + ..Default::default() + }]]; + let metadata = TracerMetadata { + git_commit_sha: "abc123".to_string(), + ..Default::default() + }; + + let mut payload_attrs = HashMap::new(); + payload_attrs.insert( + bs("_dd.git.commit.sha"), + AttributeValue::String(bs("abc123")), + ); + + let v1 = TracerPayloadBytes { + attributes: payload_attrs, + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &metadata, &v1); + } + + #[test] + fn span_with_single_link_byte_identical() { + let v04_span = V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + span_links: vec![crate::span::v04::SpanLink { + trace_id: 0x0123456789abcdef, + trace_id_high: 0, + span_id: 99, + tracestate: bs("running"), + flags: 0, + attributes: HashMap::new(), + }], + ..Default::default() + }; + let v04 = vec![vec![v04_span]]; + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + span_links: vec![crate::span::v1::SpanLinkBytes { + trace_id: tid_bytes(0, 0x0123456789abcdef), + span_id: 99, + tracestate: bs("running"), + flags: 0, + attributes: HashMap::new(), + }], + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } + + #[test] + fn span_with_single_event_byte_identical() { + use crate::span::v04::{AttributeAnyValue, AttributeArrayValue}; + + let v04_span = V04Span { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + trace_id: 1, + span_id: 1, + start: 1000, + duration: 100, + span_events: vec![crate::span::v04::SpanEvent { + time_unix_nano: 42, + name: bs("exception"), + attributes: HashMap::from([( + bs("exception.message"), + AttributeAnyValue::SingleValue(AttributeArrayValue::String(bs("boom"))), + )]), + }], + ..Default::default() + }; + let v04 = vec![vec![v04_span]]; + + let v1 = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs("svc"), + name: bs("op"), + resource: bs("res"), + span_id: 1, + start: 1000, + duration: 100, + span_events: vec![crate::span::v1::SpanEventBytes { + time_unix_nano: 42, + name: bs("exception"), + attributes: HashMap::from([( + bs("exception.message"), + AttributeValue::String(bs("boom")), + )]), + }], + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + assert_byte_equal(&v04, &TracerMetadata::default(), &v1); + } +} diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs index 2c5962f8f1..022f80a7af 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs @@ -61,8 +61,6 @@ pub(super) enum AnyValueKey { Int64 = 4, Bytes = 5, Array = 6, - /// Not used in V04→V1 conversion (V04 has no key-value list type), defined for completeness. - #[allow(dead_code)] KeyValueList = 7, } diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs new file mode 100644 index 0000000000..e7af11fd84 --- /dev/null +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs @@ -0,0 +1,271 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! V1 msgpack encoder that consumes the canonical [`crate::span::v1`] data model. +//! +//! Mirrors the encoder in [`super::span_v04`] but takes pre-promoted fields directly from the +//! struct instead of extracting them from v0.4 meta. The byte layout is identical so payloads +//! produced from equivalent inputs by either encoder are byte-for-byte equal — see the +//! cross-validation tests in [`super::cross_validation_tests`]. + +use crate::span::v1::{AttributeValue, Span, SpanEvent, SpanLink}; +use crate::span::TraceData; +use rmp::encode::{ + write_array_len, write_bin, write_bool, write_f64, write_map_len, write_sint, write_u64, + write_uint, write_uint8, RmpWrite, ValueWriteError, +}; +use std::borrow::Borrow; + +use super::span_v04::{AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey}; +use super::StringTable; + +/// Encodes a typed [`AttributeValue`] as `[type_uint8, value]`. +/// +/// Recursive: [`AttributeValue::List`] and [`AttributeValue::KeyValue`] contain nested +/// [`AttributeValue`]s that are encoded in the same `[type, value]` shape. +pub(super) fn encode_attribute_value( + writer: &mut W, + value: &AttributeValue, + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + match value { + AttributeValue::String(s) => { + write_uint8(writer, AnyValueKey::String as u8)?; + table.write_interned(writer, s.borrow())?; + } + AttributeValue::Bool(b) => { + write_uint8(writer, AnyValueKey::Bool as u8)?; + write_bool(writer, *b).map_err(ValueWriteError::InvalidDataWrite)?; + } + AttributeValue::Float(f) => { + write_uint8(writer, AnyValueKey::Double as u8)?; + write_f64(writer, *f)?; + } + AttributeValue::Int(i) => { + write_uint8(writer, AnyValueKey::Int64 as u8)?; + write_sint(writer, *i)?; + } + AttributeValue::Bytes(b) => { + write_uint8(writer, AnyValueKey::Bytes as u8)?; + write_bin(writer, b.borrow())?; + } + AttributeValue::List(arr) => { + write_uint8(writer, AnyValueKey::Array as u8)?; + write_array_len(writer, arr.len() as u32)?; + for v in arr { + encode_attribute_value(writer, v, table)?; + } + } + AttributeValue::KeyValue(map) => { + write_uint8(writer, AnyValueKey::KeyValueList as u8)?; + write_map_len(writer, map.len() as u32)?; + for (k, v) in map { + table.write_interned(writer, k.borrow())?; + encode_attribute_value(writer, v, table)?; + } + } + } + Ok(()) +} + +/// Encodes a flat triplet attributes array: `[key, type_uint8, value, key, type_uint8, value, ...]`. +/// +/// The array length is `3 * map.len()` per the V1 wire format. +pub(super) fn encode_attributes_map( + writer: &mut W, + map: &std::collections::HashMap>, + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + write_array_len(writer, (map.len() as u32) * 3)?; + for (k, v) in map { + table.write_interned(writer, k.borrow())?; + encode_attribute_value(writer, v, table)?; + } + Ok(()) +} + +/// Encodes span links from the V1 data model. +pub(super) fn encode_span_links( + writer: &mut W, + span_links: &[SpanLink], + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + write_uint8(writer, SpanKey::SpanLinks as u8)?; + write_array_len(writer, span_links.len() as u32)?; + + for link in span_links { + let link_len = 1 // trace_id (always) + + (link.span_id != 0) as u32 + + (!link.attributes.is_empty()) as u32 + + (!link.tracestate.borrow().is_empty()) as u32 + + (link.flags != 0) as u32; + + write_map_len(writer, link_len)?; + + write_uint8(writer, SpanLinkKey::TraceId as u8)?; + write_bin(writer, &link.trace_id)?; + + if link.span_id != 0 { + write_uint8(writer, SpanLinkKey::SpanId as u8)?; + write_u64(writer, link.span_id)?; + } + + if !link.attributes.is_empty() { + write_uint8(writer, SpanLinkKey::Attributes as u8)?; + encode_attributes_map(writer, &link.attributes, table)?; + } + + if !link.tracestate.borrow().is_empty() { + write_uint8(writer, SpanLinkKey::TraceState as u8)?; + table.write_interned(writer, link.tracestate.borrow())?; + } + + if link.flags != 0 { + write_uint8(writer, SpanLinkKey::Flags as u8)?; + write_uint(writer, link.flags as u64)?; + } + } + + Ok(()) +} + +/// Encodes span events from the V1 data model. +pub(super) fn encode_span_events( + writer: &mut W, + span_events: &[SpanEvent], + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + write_uint8(writer, SpanKey::SpanEvents as u8)?; + write_array_len(writer, span_events.len() as u32)?; + + for event in span_events { + let event_len = 2 // time + name + + (!event.attributes.is_empty()) as u32; + + write_map_len(writer, event_len)?; + + write_uint8(writer, SpanEventKey::Time as u8)?; + write_u64(writer, event.time_unix_nano)?; + + write_uint8(writer, SpanEventKey::Name as u8)?; + table.write_interned(writer, event.name.borrow())?; + + if !event.attributes.is_empty() { + write_uint8(writer, SpanEventKey::Attributes as u8)?; + encode_attributes_map(writer, &event.attributes, table)?; + } + } + + Ok(()) +} + +/// Encodes a [`Span`] (V1 data model) into V1 msgpack. +/// +/// Field-write order matches [`super::span_v04::encode_span`] so equivalent inputs produce +/// byte-identical output across the two encoders. +pub(super) fn encode_span( + writer: &mut W, + span: &Span, + table: &mut StringTable, +) -> Result<(), ValueWriteError> { + let is_parent = span.parent_id != 0; + let has_duration = span.duration != 0; + let has_error = span.error; + let has_attributes = !span.attributes.is_empty(); + let has_env = !span.env.borrow().is_empty(); + let has_version = !span.version.borrow().is_empty(); + let has_component = !span.component.borrow().is_empty(); + + let span_len = 3 // span_id, start, kind — always present + + (!span.service.borrow().is_empty()) as u32 + + (!span.name.borrow().is_empty()) as u32 + + (!span.resource.borrow().is_empty()) as u32 + + (!span.r#type.borrow().is_empty()) as u32 + + is_parent as u32 + + has_duration as u32 + + has_error as u32 + + has_attributes as u32 + + (!span.span_links.is_empty()) as u32 + + (!span.span_events.is_empty()) as u32 + + has_env as u32 + + has_version as u32 + + has_component as u32; + + write_map_len(writer, span_len)?; + + if !span.service.borrow().is_empty() { + write_uint8(writer, SpanKey::Service as u8)?; + table.write_interned(writer, span.service.borrow())?; + } + + if !span.name.borrow().is_empty() { + write_uint8(writer, SpanKey::Name as u8)?; + table.write_interned(writer, span.name.borrow())?; + } + + if !span.resource.borrow().is_empty() { + write_uint8(writer, SpanKey::Resource as u8)?; + table.write_interned(writer, span.resource.borrow())?; + } + + write_uint8(writer, SpanKey::SpanId as u8)?; + write_u64(writer, span.span_id)?; + + write_uint8(writer, SpanKey::Start as u8)?; + // V1 normalization is the producer's responsibility: a negative `start` is not expected in + // the canonical data model. Cast preserves bits — callers that need wall-clock substitution + // should perform it before constructing the v1::Span. + write_u64(writer, span.start as u64)?; + + if is_parent { + write_uint8(writer, SpanKey::ParentId as u8)?; + write_u64(writer, span.parent_id)?; + } + + if has_duration { + write_uint8(writer, SpanKey::Duration as u8)?; + // Same rationale as for `start`: V1 inputs are expected to be normalized. + write_u64(writer, span.duration.max(0) as u64)?; + } + + if has_error { + write_uint8(writer, SpanKey::Error as u8)?; + write_bool(writer, true).map_err(ValueWriteError::InvalidDataWrite)?; + } + + if !span.r#type.borrow().is_empty() { + write_uint8(writer, SpanKey::Type as u8)?; + table.write_interned(writer, span.r#type.borrow())?; + } + + if has_attributes { + write_uint8(writer, SpanKey::Attributes as u8)?; + encode_attributes_map(writer, &span.attributes, table)?; + } + + if !span.span_links.is_empty() { + encode_span_links(writer, &span.span_links, table)?; + } + + if !span.span_events.is_empty() { + encode_span_events(writer, &span.span_events, table)?; + } + + if has_env { + write_uint8(writer, SpanKey::Env as u8)?; + table.write_interned(writer, span.env.borrow())?; + } + if has_version { + write_uint8(writer, SpanKey::Version as u8)?; + table.write_interned(writer, span.version.borrow())?; + } + if has_component { + write_uint8(writer, SpanKey::Component as u8)?; + table.write_interned(writer, span.component.borrow())?; + } + // SpanKind is always emitted per OTEL spec (default = Internal = 1). + write_uint8(writer, SpanKey::Kind as u8)?; + write_uint(writer, span.span_kind as u64)?; + + Ok(()) +} diff --git a/libdd-trace-utils/src/span/mod.rs b/libdd-trace-utils/src/span/mod.rs index e6358dfc7a..ceb68e5b53 100644 --- a/libdd-trace-utils/src/span/mod.rs +++ b/libdd-trace-utils/src/span/mod.rs @@ -4,6 +4,7 @@ pub mod trace_utils; pub mod v04; pub mod v05; +pub mod v1; use crate::msgpack_decoder::decode::buffer::read_string_ref_nomut; use crate::msgpack_decoder::decode::error::DecodeError; diff --git a/libdd-trace-utils/src/span/v1/mod.rs b/libdd-trace-utils/src/span/v1/mod.rs new file mode 100644 index 0000000000..993aab7b05 --- /dev/null +++ b/libdd-trace-utils/src/span/v1/mod.rs @@ -0,0 +1,311 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Canonical internal representation of a V1 trace. +//! +//! See the design doc and `RFC: Efficient Trace Payload Protocol`. Compared to v0.4, V1: +//! - promotes `env`, `version`, `component`, and `span.kind` out of the meta map into dedicated +//! span fields; +//! - merges `meta`, `metrics`, and `meta_struct` into a single typed [`AttributeValue`] map; +//! - represents `error` as `bool` and `trace_id` as a 128-bit big-endian byte array carried at the +//! chunk level. + +use crate::span::{BytesData, SliceData, TraceData}; +use std::collections::HashMap; + +/// OpenTelemetry SpanKind values, encoded on the wire as a `uint32`. +/// +/// Unset / unknown kinds default to [`SpanKind::Internal`] to match the OTEL spec and the agent's +/// behavior in `pkg/trace/api/converter.go`. +#[repr(u32)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum SpanKind { + #[default] + Internal = 1, + Server = 2, + Client = 3, + Producer = 4, + Consumer = 5, +} + +impl SpanKind { + /// Parses the legacy v0.4 `span.kind` meta string into a [`SpanKind`]. + /// + /// Unrecognized values map to [`SpanKind::Internal`] per OTEL semantics. This is the + /// infallible counterpart to [`FromStr::from_str`]: callers converting from v0.4 always have a + /// well-defined SpanKind, even if the upstream tag is missing or invalid. + pub fn from_meta(s: &str) -> Self { + match s { + "server" => SpanKind::Server, + "client" => SpanKind::Client, + "producer" => SpanKind::Producer, + "consumer" => SpanKind::Consumer, + _ => SpanKind::Internal, + } + } +} + +/// Typed V1 attribute value. +/// +/// Replaces v0.4's split `meta` / `metrics` / `meta_struct` maps. The byte layout on the wire is a +/// `(key, type_uint8, value)` triplet — see `msgpack_encoder::v1::span_v1`. +#[derive(Debug, PartialEq)] +pub enum AttributeValue { + String(T::Text), + Float(f64), + Int(i64), + Bool(bool), + Bytes(T::Bytes), + KeyValue(HashMap>), + List(Vec>), +} + +impl Clone for AttributeValue +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + match self { + AttributeValue::String(v) => AttributeValue::String(v.clone()), + AttributeValue::Float(v) => AttributeValue::Float(*v), + AttributeValue::Int(v) => AttributeValue::Int(*v), + AttributeValue::Bool(v) => AttributeValue::Bool(*v), + AttributeValue::Bytes(v) => AttributeValue::Bytes(v.clone()), + AttributeValue::KeyValue(m) => AttributeValue::KeyValue(m.clone()), + AttributeValue::List(v) => AttributeValue::List(v.clone()), + } + } +} + +/// Canonical V1 span model. +/// +/// Generic over [`TraceData`] so the same type can be used with owned (`BytesData`) or borrowed +/// (`SliceData`) string buffers — matching the v0.4 [`crate::span::v04::Span`] pattern. +#[derive(Debug, PartialEq, Default)] +pub struct Span { + pub service: T::Text, + pub name: T::Text, + pub resource: T::Text, + pub r#type: T::Text, + /// 128-bit trace ID stored as big-endian bytes. Wire-level trace ID lives at the chunk; the + /// per-span copy lets callers route a span to its chunk without scanning siblings. + pub trace_id: [u8; 16], + pub span_id: u64, + pub parent_id: u64, + pub start: i64, + pub duration: i64, + pub error: bool, + pub span_kind: SpanKind, + pub env: T::Text, + pub version: T::Text, + pub component: T::Text, + pub attributes: HashMap>, + pub span_links: Vec>, + pub span_events: Vec>, +} + +impl Clone for Span +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + Span { + service: self.service.clone(), + name: self.name.clone(), + resource: self.resource.clone(), + r#type: self.r#type.clone(), + trace_id: self.trace_id, + span_id: self.span_id, + parent_id: self.parent_id, + start: self.start, + duration: self.duration, + error: self.error, + span_kind: self.span_kind, + env: self.env.clone(), + version: self.version.clone(), + component: self.component.clone(), + attributes: self.attributes.clone(), + span_links: self.span_links.clone(), + span_events: self.span_events.clone(), + } + } +} + +/// V1 span link. The 128-bit linked trace ID is stored in big-endian bytes. +#[derive(Debug, PartialEq, Default)] +pub struct SpanLink { + pub trace_id: [u8; 16], + pub span_id: u64, + pub attributes: HashMap>, + pub tracestate: T::Text, + pub flags: u32, +} + +impl Clone for SpanLink +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + SpanLink { + trace_id: self.trace_id, + span_id: self.span_id, + attributes: self.attributes.clone(), + tracestate: self.tracestate.clone(), + flags: self.flags, + } + } +} + +/// V1 span event. +#[derive(Debug, PartialEq, Default)] +pub struct SpanEvent { + pub time_unix_nano: u64, + pub name: T::Text, + pub attributes: HashMap>, +} + +impl Clone for SpanEvent +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + SpanEvent { + time_unix_nano: self.time_unix_nano, + name: self.name.clone(), + attributes: self.attributes.clone(), + } + } +} + +/// A V1 trace chunk: a group of spans sharing the same `trace_id`, plus chunk-level metadata +/// promoted out of span meta (priority, origin, sampling mechanism). +#[derive(Debug, PartialEq, Default)] +pub struct TraceChunk { + pub trace_id: [u8; 16], + pub priority: Option, + pub origin: Option, + pub sampling_mechanism: Option, + pub dropped_trace: bool, + pub attributes: HashMap>, + pub spans: Vec>, +} + +impl Clone for TraceChunk +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + TraceChunk { + trace_id: self.trace_id, + priority: self.priority, + origin: self.origin.clone(), + sampling_mechanism: self.sampling_mechanism, + dropped_trace: self.dropped_trace, + attributes: self.attributes.clone(), + spans: self.spans.clone(), + } + } +} + +/// A V1 tracer payload: tracer-level metadata and the list of trace chunks it carries. +#[derive(Debug, PartialEq, Default)] +pub struct TracerPayload { + pub language_name: T::Text, + pub language_version: T::Text, + pub tracer_version: T::Text, + pub runtime_id: T::Text, + pub env: T::Text, + pub hostname: T::Text, + pub app_version: T::Text, + pub attributes: HashMap>, + pub chunks: Vec>, +} + +impl Clone for TracerPayload +where + T::Text: Clone, + T::Bytes: Clone, +{ + fn clone(&self) -> Self { + TracerPayload { + language_name: self.language_name.clone(), + language_version: self.language_version.clone(), + tracer_version: self.tracer_version.clone(), + runtime_id: self.runtime_id.clone(), + env: self.env.clone(), + hostname: self.hostname.clone(), + app_version: self.app_version.clone(), + attributes: self.attributes.clone(), + chunks: self.chunks.clone(), + } + } +} + +pub type SpanBytes = Span; +pub type SpanLinkBytes = SpanLink; +pub type SpanEventBytes = SpanEvent; +pub type AttributeValueBytes = AttributeValue; +pub type TraceChunkBytes = TraceChunk; +pub type TracerPayloadBytes = TracerPayload; + +pub type SpanSlice<'a> = Span>; +pub type SpanLinkSlice<'a> = SpanLink>; +pub type SpanEventSlice<'a> = SpanEvent>; +pub type AttributeValueSlice<'a> = AttributeValue>; +pub type TraceChunkSlice<'a> = TraceChunk>; +pub type TracerPayloadSlice<'a> = TracerPayload>; + +#[cfg(test)] +mod tests { + use super::*; + use libdd_tinybytes::BytesString; + + #[test] + fn span_kind_default_is_internal() { + assert_eq!(SpanKind::default(), SpanKind::Internal); + } + + #[test] + fn span_kind_from_meta() { + assert_eq!(SpanKind::from_meta("server"), SpanKind::Server); + assert_eq!(SpanKind::from_meta("client"), SpanKind::Client); + assert_eq!(SpanKind::from_meta("producer"), SpanKind::Producer); + assert_eq!(SpanKind::from_meta("consumer"), SpanKind::Consumer); + assert_eq!(SpanKind::from_meta("internal"), SpanKind::Internal); + assert_eq!(SpanKind::from_meta(""), SpanKind::Internal); + assert_eq!(SpanKind::from_meta("anything-else"), SpanKind::Internal); + } + + #[test] + fn span_kind_repr_matches_otel_spec() { + assert_eq!(SpanKind::Internal as u32, 1); + assert_eq!(SpanKind::Server as u32, 2); + assert_eq!(SpanKind::Client as u32, 3); + assert_eq!(SpanKind::Producer as u32, 4); + assert_eq!(SpanKind::Consumer as u32, 5); + } + + #[test] + fn span_default_has_zero_trace_id_and_internal_kind() { + let s = SpanBytes::default(); + assert_eq!(s.trace_id, [0u8; 16]); + assert_eq!(s.span_kind, SpanKind::Internal); + assert!(!s.error); + assert!(s.attributes.is_empty()); + } + + #[test] + fn attribute_value_clone_preserves_variants() { + let s = AttributeValueBytes::String(BytesString::from_static("v")); + assert_eq!(s.clone(), s); + let n = AttributeValueBytes::Int(42); + assert_eq!(n.clone(), n); + let list = AttributeValueBytes::List(vec![AttributeValueBytes::Bool(true)]); + assert_eq!(list.clone(), list); + } +} From 53ae7735c6eae6618bd0c818bdfc453218ddf5e6 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Tue, 26 May 2026 17:13:00 +0200 Subject: [PATCH 04/14] chore: align v1 doc comments with v04 style Tighten module-level docs, drop verbose design-doc references, and shorten per-item comments on the V1 data model and msgpack encoder to match the concise style used in v04. --- .../src/msgpack_encoder/v1/mod.rs | 16 +++--- .../src/msgpack_encoder/v1/span_v1.rs | 23 ++------- libdd-trace-utils/src/span/v1/mod.rs | 51 ++++++++----------- 3 files changed, 32 insertions(+), 58 deletions(-) diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index 9d14f8447c..2c24f400bc 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -423,10 +423,7 @@ pub fn to_encoded_byte_len]>>( counter.0 } -/// Encodes a [`TracerPayload`] (V1 canonical data model) as a V1 msgpack payload. -/// -/// This is the M3 encoder. The byte layout matches [`encode_payload`] (the M1 v0.4 → V1 -/// encoder) so equivalent inputs produce byte-identical outputs. +/// Encodes a [`TracerPayload`] (V1 data model) as a V1 msgpack payload. fn encode_payload_v1( writer: &mut W, payload: &TracerPayload, @@ -496,6 +493,7 @@ fn encode_payload_v1( Ok(()) } +/// Encodes one V1 chunk (a group of spans sharing a trace ID). fn encode_chunk_v1( writer: &mut W, chunk: &crate::span::v1::TraceChunk, @@ -544,12 +542,12 @@ fn encode_chunk_v1( Ok(()) } -/// Serializes a V1 [`TracerPayload`] into a `Vec` using the V1 msgpack format. +/// Serializes a [`TracerPayload`] into a `Vec` using the V1 msgpack format. pub fn to_vec_from_payload(payload: &TracerPayload) -> Vec { to_vec_from_payload_with_capacity(payload, 0) } -/// Serializes a V1 [`TracerPayload`] into a `Vec` with a pre-allocated capacity. +/// Serializes a [`TracerPayload`] into a `Vec` with a pre-allocated capacity. pub fn to_vec_from_payload_with_capacity( payload: &TracerPayload, capacity: u32, @@ -561,10 +559,10 @@ pub fn to_vec_from_payload_with_capacity( buf.into_vec() } -/// Serializes a V1 [`TracerPayload`] into a caller-provided slice. +/// Serializes a [`TracerPayload`] into a caller-provided slice. /// /// # Errors -/// Returns a `ValueWriteError` if the underlying writer fails (e.g. buffer too small). +/// Returns a `ValueWriteError` if the underlying writer fails. pub fn write_payload_to_slice( slice: &mut &mut [u8], payload: &TracerPayload, @@ -572,7 +570,7 @@ pub fn write_payload_to_slice( encode_payload_v1(slice, payload) } -/// Returns the number of bytes the V1 payload for `payload` would occupy when encoded. +/// Returns the number of bytes `payload` would occupy when encoded. pub fn to_encoded_byte_len_from_payload(payload: &TracerPayload) -> u32 { let mut counter = super::CountLength(0); let _ = encode_payload_v1(&mut counter, payload); diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs index e7af11fd84..982342fc21 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs @@ -1,12 +1,9 @@ // Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -//! V1 msgpack encoder that consumes the canonical [`crate::span::v1`] data model. +//! V1 msgpack encoder that consumes the [`crate::span::v1`] data model. //! -//! Mirrors the encoder in [`super::span_v04`] but takes pre-promoted fields directly from the -//! struct instead of extracting them from v0.4 meta. The byte layout is identical so payloads -//! produced from equivalent inputs by either encoder are byte-for-byte equal — see the -//! cross-validation tests in [`super::cross_validation_tests`]. +//! The byte layout matches [`super::span_v04`] so equivalent inputs produce byte-identical output. use crate::span::v1::{AttributeValue, Span, SpanEvent, SpanLink}; use crate::span::TraceData; @@ -20,9 +17,6 @@ use super::span_v04::{AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey}; use super::StringTable; /// Encodes a typed [`AttributeValue`] as `[type_uint8, value]`. -/// -/// Recursive: [`AttributeValue::List`] and [`AttributeValue::KeyValue`] contain nested -/// [`AttributeValue`]s that are encoded in the same `[type, value]` shape. pub(super) fn encode_attribute_value( writer: &mut W, value: &AttributeValue, @@ -68,9 +62,7 @@ pub(super) fn encode_attribute_value( Ok(()) } -/// Encodes a flat triplet attributes array: `[key, type_uint8, value, key, type_uint8, value, ...]`. -/// -/// The array length is `3 * map.len()` per the V1 wire format. +/// Encodes a flat triplet attributes array: `[key, type_uint8, value, ...]`. pub(super) fn encode_attributes_map( writer: &mut W, map: &std::collections::HashMap>, @@ -160,9 +152,6 @@ pub(super) fn encode_span_events( } /// Encodes a [`Span`] (V1 data model) into V1 msgpack. -/// -/// Field-write order matches [`super::span_v04::encode_span`] so equivalent inputs produce -/// byte-identical output across the two encoders. pub(super) fn encode_span( writer: &mut W, span: &Span, @@ -212,9 +201,6 @@ pub(super) fn encode_span( write_u64(writer, span.span_id)?; write_uint8(writer, SpanKey::Start as u8)?; - // V1 normalization is the producer's responsibility: a negative `start` is not expected in - // the canonical data model. Cast preserves bits — callers that need wall-clock substitution - // should perform it before constructing the v1::Span. write_u64(writer, span.start as u64)?; if is_parent { @@ -224,7 +210,6 @@ pub(super) fn encode_span( if has_duration { write_uint8(writer, SpanKey::Duration as u8)?; - // Same rationale as for `start`: V1 inputs are expected to be normalized. write_u64(writer, span.duration.max(0) as u64)?; } @@ -263,7 +248,7 @@ pub(super) fn encode_span( write_uint8(writer, SpanKey::Component as u8)?; table.write_interned(writer, span.component.borrow())?; } - // SpanKind is always emitted per OTEL spec (default = Internal = 1). + // SpanKind is always emitted (default = Internal). write_uint8(writer, SpanKey::Kind as u8)?; write_uint(writer, span.span_kind as u64)?; diff --git a/libdd-trace-utils/src/span/v1/mod.rs b/libdd-trace-utils/src/span/v1/mod.rs index 993aab7b05..bef124ee0a 100644 --- a/libdd-trace-utils/src/span/v1/mod.rs +++ b/libdd-trace-utils/src/span/v1/mod.rs @@ -1,22 +1,11 @@ // Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -//! Canonical internal representation of a V1 trace. -//! -//! See the design doc and `RFC: Efficient Trace Payload Protocol`. Compared to v0.4, V1: -//! - promotes `env`, `version`, `component`, and `span.kind` out of the meta map into dedicated -//! span fields; -//! - merges `meta`, `metrics`, and `meta_struct` into a single typed [`AttributeValue`] map; -//! - represents `error` as `bool` and `trace_id` as a 128-bit big-endian byte array carried at the -//! chunk level. - use crate::span::{BytesData, SliceData, TraceData}; use std::collections::HashMap; /// OpenTelemetry SpanKind values, encoded on the wire as a `uint32`. -/// -/// Unset / unknown kinds default to [`SpanKind::Internal`] to match the OTEL spec and the agent's -/// behavior in `pkg/trace/api/converter.go`. +/// Unset or unrecognized kinds default to [`SpanKind::Internal`]. #[repr(u32)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum SpanKind { @@ -29,11 +18,8 @@ pub enum SpanKind { } impl SpanKind { - /// Parses the legacy v0.4 `span.kind` meta string into a [`SpanKind`]. - /// - /// Unrecognized values map to [`SpanKind::Internal`] per OTEL semantics. This is the - /// infallible counterpart to [`FromStr::from_str`]: callers converting from v0.4 always have a - /// well-defined SpanKind, even if the upstream tag is missing or invalid. + /// Parses a v0.4 `span.kind` meta value into a [`SpanKind`]. + /// Unrecognized values map to [`SpanKind::Internal`]. pub fn from_meta(s: &str) -> Self { match s { "server" => SpanKind::Server, @@ -46,9 +32,7 @@ impl SpanKind { } /// Typed V1 attribute value. -/// -/// Replaces v0.4's split `meta` / `metrics` / `meta_struct` maps. The byte layout on the wire is a -/// `(key, type_uint8, value)` triplet — see `msgpack_encoder::v1::span_v1`. +/// Replaces v0.4's split `meta` / `metrics` / `meta_struct` maps. #[derive(Debug, PartialEq)] pub enum AttributeValue { String(T::Text), @@ -78,18 +62,24 @@ where } } -/// Canonical V1 span model. +/// The generic representation of a V1 span. /// -/// Generic over [`TraceData`] so the same type can be used with owned (`BytesData`) or borrowed -/// (`SliceData`) string buffers — matching the v0.4 [`crate::span::v04::Span`] pattern. +/// `T` is the type used to represent strings in the span, it can be either owned (e.g. +/// BytesString) or borrowed (e.g. &str). To define a generic function taking any `Span` you can +/// use the [`TraceData`] trait: +/// ``` +/// use libdd_trace_utils::span::{v1::Span, TraceData}; +/// fn foo(span: Span) { +/// let _ = span.attributes.get("foo"); +/// } +/// ``` #[derive(Debug, PartialEq, Default)] pub struct Span { pub service: T::Text, pub name: T::Text, pub resource: T::Text, pub r#type: T::Text, - /// 128-bit trace ID stored as big-endian bytes. Wire-level trace ID lives at the chunk; the - /// per-span copy lets callers route a span to its chunk without scanning siblings. + /// 128-bit trace ID stored as big-endian bytes. pub trace_id: [u8; 16], pub span_id: u64, pub parent_id: u64, @@ -133,7 +123,8 @@ where } } -/// V1 span link. The 128-bit linked trace ID is stored in big-endian bytes. +/// The generic representation of a V1 span link. +/// `T` is the type used to represent strings in the span link. #[derive(Debug, PartialEq, Default)] pub struct SpanLink { pub trace_id: [u8; 16], @@ -159,7 +150,8 @@ where } } -/// V1 span event. +/// The generic representation of a V1 span event. +/// `T` is the type used to represent strings in the span event. #[derive(Debug, PartialEq, Default)] pub struct SpanEvent { pub time_unix_nano: u64, @@ -181,8 +173,7 @@ where } } -/// A V1 trace chunk: a group of spans sharing the same `trace_id`, plus chunk-level metadata -/// promoted out of span meta (priority, origin, sampling mechanism). +/// A V1 trace chunk: a group of spans sharing the same `trace_id`, plus chunk-level metadata. #[derive(Debug, PartialEq, Default)] pub struct TraceChunk { pub trace_id: [u8; 16], @@ -212,7 +203,7 @@ where } } -/// A V1 tracer payload: tracer-level metadata and the list of trace chunks it carries. +/// A V1 tracer payload: tracer-level metadata and the trace chunks it carries. #[derive(Debug, PartialEq, Default)] pub struct TracerPayload { pub language_name: T::Text, From ed6a8a98259bc0cc741c9dfede8eecd1ff653818 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Tue, 2 Jun 2026 16:41:25 +0200 Subject: [PATCH 05/14] fix: added missing attributes --- .../src/msgpack_encoder/v1/mod.rs | 196 +++++++++++++++++- .../src/msgpack_encoder/v1/span_v1.rs | 89 +++++++- 2 files changed, 270 insertions(+), 15 deletions(-) diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index 2c24f400bc..e441157e87 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -34,7 +34,9 @@ mod trace_key { mod chunk_key { pub const PRIORITY: u8 = 1; pub const ORIGIN: u8 = 2; + pub const ATTRIBUTES: u8 = 3; pub const SPANS: u8 = 4; + pub const DROPPED_TRACE: u8 = 5; pub const TRACE_ID: u8 = 6; /// Sampling mechanism (previously the `_dd.p.dm` span tag). pub const SAMPLING_MECHANISM: u8 = 7; @@ -503,11 +505,15 @@ fn encode_chunk_v1( .origin .as_ref() .is_some_and(|o| !>::borrow(o).is_empty()); + let has_attributes = !chunk.attributes.is_empty(); + let has_dropped = chunk.dropped_trace; let fields = 2u32 // trace_id + spans + has_origin as u32 + chunk.priority.is_some() as u32 - + chunk.sampling_mechanism.is_some() as u32; + + chunk.sampling_mechanism.is_some() as u32 + + has_attributes as u32 + + has_dropped as u32; write_map_len(writer, fields)?; @@ -533,6 +539,16 @@ fn encode_chunk_v1( write_uint(writer, mechanism as u64)?; } + if has_attributes { + write_uint8(writer, chunk_key::ATTRIBUTES)?; + span_v1::encode_attributes_map(writer, &chunk.attributes, table)?; + } + + if has_dropped { + write_uint8(writer, chunk_key::DROPPED_TRACE)?; + rmp::encode::write_bool(writer, true).map_err(ValueWriteError::InvalidDataWrite)?; + } + write_uint8(writer, chunk_key::SPANS)?; write_array_len(writer, chunk.spans.len() as u32)?; for span in &chunk.spans { @@ -542,12 +558,59 @@ fn encode_chunk_v1( Ok(()) } -/// Serializes a [`TracerPayload`] into a `Vec` using the V1 msgpack format. +/// Serializes a `TracerPayload` into a vector of bytes with a default capacity of 0. +/// +/// # Arguments +/// +/// * `payload` - A reference to a `TracerPayload`. +/// +/// # Returns +/// +/// * `Vec` - A vector containing the encoded payload. +/// +/// # Examples +/// +/// ``` +/// use libdd_trace_utils::msgpack_encoder::v1::to_vec_from_payload; +/// use libdd_trace_utils::span::v1::TracerPayloadSlice; +/// +/// let payload = TracerPayloadSlice { +/// language_name: "rust".into(), +/// ..Default::default() +/// }; +/// let encoded = to_vec_from_payload(&payload); +/// +/// assert!(!encoded.is_empty()); +/// ``` pub fn to_vec_from_payload(payload: &TracerPayload) -> Vec { to_vec_from_payload_with_capacity(payload, 0) } -/// Serializes a [`TracerPayload`] into a `Vec` with a pre-allocated capacity. +/// Serializes a `TracerPayload` into a vector of bytes with specified capacity. +/// +/// # Arguments +/// +/// * `payload` - A reference to a `TracerPayload`. +/// * `capacity` - Desired initial capacity of the resulting vector. +/// +/// # Returns +/// +/// * `Vec` - A vector containing the encoded payload. +/// +/// # Examples +/// +/// ``` +/// use libdd_trace_utils::msgpack_encoder::v1::to_vec_from_payload_with_capacity; +/// use libdd_trace_utils::span::v1::TracerPayloadSlice; +/// +/// let payload = TracerPayloadSlice { +/// language_name: "rust".into(), +/// ..Default::default() +/// }; +/// let encoded = to_vec_from_payload_with_capacity(&payload, 1024); +/// +/// assert!(encoded.capacity() >= 1024); +/// ``` pub fn to_vec_from_payload_with_capacity( payload: &TracerPayload, capacity: u32, @@ -559,10 +622,36 @@ pub fn to_vec_from_payload_with_capacity( buf.into_vec() } -/// Serializes a [`TracerPayload`] into a caller-provided slice. +/// Encodes a `TracerPayload` into a slice of bytes. +/// +/// # Arguments +/// +/// * `slice` - A mutable reference to a byte slice. +/// * `payload` - A reference to a `TracerPayload`. +/// +/// # Returns +/// +/// * `Ok(())` - If encoding succeeds. +/// * `Err(ValueWriteError)` - If encoding fails. /// /// # Errors -/// Returns a `ValueWriteError` if the underlying writer fails. +/// +/// This function will return an error if the underlying writer fails (e.g. buffer too small). +/// +/// # Examples +/// +/// ``` +/// use libdd_trace_utils::msgpack_encoder::v1::write_payload_to_slice; +/// use libdd_trace_utils::span::v1::TracerPayloadSlice; +/// +/// let mut buffer = vec![0u8; 1024]; +/// let payload = TracerPayloadSlice { +/// language_name: "rust".into(), +/// ..Default::default() +/// }; +/// +/// write_payload_to_slice(&mut &mut buffer[..], &payload).expect("Encoding failed"); +/// ``` pub fn write_payload_to_slice( slice: &mut &mut [u8], payload: &TracerPayload, @@ -570,7 +659,33 @@ pub fn write_payload_to_slice( encode_payload_v1(slice, payload) } -/// Returns the number of bytes `payload` would occupy when encoded. +/// Computes the number of bytes required to encode the given `TracerPayload`. +/// +/// This does not allocate any actual buffer, but simulates writing in order to measure +/// the encoded size of the payload. +/// +/// # Arguments +/// +/// * `payload` - A reference to a `TracerPayload`. +/// +/// # Returns +/// +/// * `u32` - The number of bytes that would be written by the encoder. +/// +/// # Examples +/// +/// ``` +/// use libdd_trace_utils::msgpack_encoder::v1::to_encoded_byte_len_from_payload; +/// use libdd_trace_utils::span::v1::TracerPayloadSlice; +/// +/// let payload = TracerPayloadSlice { +/// language_name: "rust".into(), +/// ..Default::default() +/// }; +/// let encoded_len = to_encoded_byte_len_from_payload(&payload); +/// +/// assert!(encoded_len > 0); +/// ``` pub fn to_encoded_byte_len_from_payload(payload: &TracerPayload) -> u32 { let mut counter = super::CountLength(0); let _ = encode_payload_v1(&mut counter, payload); @@ -1248,6 +1363,75 @@ mod v1_payload_tests { assert!(encoded.windows(2).any(|w| w == want)); } + #[test] + fn chunk_dropped_trace_emitted_when_true() { + let chunk = TraceChunkBytes { + trace_id: [0u8; 16], + dropped_trace: true, + spans: vec![make_span("svc", "op", 1)], + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + // DROPPED_TRACE (0x05) + msgpack true marker (0xc3) + let want = [chunk_key::DROPPED_TRACE, 0xc3]; + assert!( + encoded.windows(2).any(|w| w == want), + "DROPPED_TRACE marker + true should appear in payload" + ); + } + + #[test] + fn chunk_dropped_trace_skipped_when_false() { + let chunk = TraceChunkBytes { + trace_id: [0u8; 16], + dropped_trace: false, + spans: vec![make_span("svc", "op", 1)], + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + assert!( + !encoded.contains(&chunk_key::DROPPED_TRACE), + "DROPPED_TRACE key should not be emitted when false" + ); + } + + #[test] + fn chunk_attributes_emitted_when_set() { + let mut attrs = std::collections::HashMap::new(); + attrs.insert(bs("region"), AttributeValue::String(bs("us-east-1"))); + let chunk = TraceChunkBytes { + trace_id: [0u8; 16], + attributes: attrs, + spans: vec![make_span("svc", "op", 1)], + ..Default::default() + }; + let payload = TracerPayloadBytes { + chunks: vec![chunk], + ..Default::default() + }; + let encoded = to_vec_from_payload(&payload); + // ATTRIBUTES (0x03) + msgpack fixarray header for 3 elements (0x93) + let want = [chunk_key::ATTRIBUTES, 0x93]; + assert!( + encoded.windows(2).any(|w| w == want), + "ATTRIBUTES key + flat-triplet array header should appear" + ); + assert!( + encoded + .windows(b"us-east-1".len()) + .any(|w| w == b"us-east-1"), + "chunk attribute value should be in the payload" + ); + } + #[test] fn span_kind_otel_values() { for (kind, expected_byte) in [ diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs index 982342fc21..6e38521fe0 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs @@ -1,10 +1,6 @@ // Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -//! V1 msgpack encoder that consumes the [`crate::span::v1`] data model. -//! -//! The byte layout matches [`super::span_v04`] so equivalent inputs produce byte-identical output. - use crate::span::v1::{AttributeValue, Span, SpanEvent, SpanLink}; use crate::span::TraceData; use rmp::encode::{ @@ -16,7 +12,22 @@ use std::borrow::Borrow; use super::span_v04::{AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey}; use super::StringTable; -/// Encodes a typed [`AttributeValue`] as `[type_uint8, value]`. +/// Encodes a typed `AttributeValue` as `[type_uint8, value]`. +/// +/// # Arguments +/// +/// * `writer` - A RmpWriter compatible with rmp writing functions. +/// * `value` - The attribute value to encode. +/// * `table` - The streaming string intern table used for interning string values. +/// +/// # Returns +/// +/// * `Ok(())` - Nothing if successful. +/// * `Err(ValueWriteError)` - An error if the writing fails. +/// +/// # Errors +/// +/// This function will return any error emitted by the writer. pub(super) fn encode_attribute_value( writer: &mut W, value: &AttributeValue, @@ -62,7 +73,22 @@ pub(super) fn encode_attribute_value( Ok(()) } -/// Encodes a flat triplet attributes array: `[key, type_uint8, value, ...]`. +/// Encodes a map of attributes as a flat triplet array: `[key, type_uint8, value, ...]`. +/// +/// # Arguments +/// +/// * `writer` - A RmpWriter compatible with rmp writing functions. +/// * `map` - The attribute map to encode. +/// * `table` - The streaming string intern table used for interning keys and string values. +/// +/// # Returns +/// +/// * `Ok(())` - Nothing if successful. +/// * `Err(ValueWriteError)` - An error if the writing fails. +/// +/// # Errors +/// +/// This function will return any error emitted by the writer. pub(super) fn encode_attributes_map( writer: &mut W, map: &std::collections::HashMap>, @@ -76,7 +102,22 @@ pub(super) fn encode_attributes_map( Ok(()) } -/// Encodes span links from the V1 data model. +/// Encodes a `SpanLink` object into a slice of bytes. +/// +/// # Arguments +/// +/// * `writer` - A RmpWriter compatible with rmp writing functions. +/// * `span_links` - The span links to encode. +/// * `table` - The streaming string intern table. +/// +/// # Returns +/// +/// * `Ok(())` - Nothing if successful. +/// * `Err(ValueWriteError)` - An error if the writing fails. +/// +/// # Errors +/// +/// This function will return any error emitted by the writer. pub(super) fn encode_span_links( writer: &mut W, span_links: &[SpanLink], @@ -121,7 +162,22 @@ pub(super) fn encode_span_links( Ok(()) } -/// Encodes span events from the V1 data model. +/// Encodes a `SpanEvent` object into a slice of bytes. +/// +/// # Arguments +/// +/// * `writer` - A RmpWriter compatible with rmp writing functions. +/// * `span_events` - The span events to encode. +/// * `table` - The streaming string intern table. +/// +/// # Returns +/// +/// * `Ok(())` - Nothing if successful. +/// * `Err(ValueWriteError)` - An error if the writing fails. +/// +/// # Errors +/// +/// This function will return any error emitted by the writer. pub(super) fn encode_span_events( writer: &mut W, span_events: &[SpanEvent], @@ -151,7 +207,22 @@ pub(super) fn encode_span_events( Ok(()) } -/// Encodes a [`Span`] (V1 data model) into V1 msgpack. +/// Encodes a `Span` object into a slice of bytes. +/// +/// # Arguments +/// +/// * `writer` - A RmpWriter compatible with rmp writing functions. +/// * `span` - The span to encode. +/// * `table` - The streaming string intern table. +/// +/// # Returns +/// +/// * `Ok(())` - Nothing if successful. +/// * `Err(ValueWriteError)` - An error if the writing fails. +/// +/// # Errors +/// +/// This function will return any error emitted by the writer. pub(super) fn encode_span( writer: &mut W, span: &Span, From 98faf4fcb0c8ac25df9756f7770133df75407446 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Tue, 2 Jun 2026 17:02:36 +0200 Subject: [PATCH 06/14] fix: move span key to avoid dependancy --- .../src/msgpack_encoder/v1/mod.rs | 56 ++++++++++++++++++- .../src/msgpack_encoder/v1/span_v04.rs | 54 +----------------- .../src/msgpack_encoder/v1/span_v1.rs | 3 +- 3 files changed, 56 insertions(+), 57 deletions(-) diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index e441157e87..e55d533e4c 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -42,6 +42,58 @@ mod chunk_key { pub const SAMPLING_MECHANISM: u8 = 7; } +/// Integer keys for V1 span fields. +#[repr(u8)] +pub(super) enum SpanKey { + Service = 1, + Name = 2, + Resource = 3, + SpanId = 4, + ParentId = 5, + Start = 6, + Duration = 7, + Error = 8, + Attributes = 9, + Type = 10, + SpanLinks = 11, + SpanEvents = 12, + Env = 13, + Version = 14, + Component = 15, + Kind = 16, +} + +/// Integer keys for V1 span link fields. +#[repr(u8)] +pub(super) enum SpanLinkKey { + TraceId = 1, + SpanId = 2, + Attributes = 3, + TraceState = 4, + Flags = 5, +} + +/// Integer keys for V1 span event fields. +#[repr(u8)] +pub(super) enum SpanEventKey { + Time = 1, + Name = 2, + Attributes = 3, +} + +/// Type discriminants for attribute values. +/// An attribute value is encoded as [type_uint8][actual_value]. +#[repr(u8)] +pub(super) enum AnyValueKey { + String = 1, + Bool = 2, + Double = 3, + Int64 = 4, + Bytes = 5, + Array = 6, + KeyValueList = 7, +} + /// Streaming string intern table. /// /// The first time a string is written, it is emitted as a msgpack `str` and assigned an @@ -305,12 +357,12 @@ fn encode_payload]>>( write_array_len(writer, attr_count * 3)?; if let Some(v) = payload_attrs.apm_mode { table.write_interned(writer, "_dd.apm_mode")?; - write_uint8(writer, span_v04::AnyValueKey::String as u8)?; + write_uint8(writer, AnyValueKey::String as u8)?; table.write_interned(writer, v)?; } if let Some(v) = payload_attrs.git_commit_sha { table.write_interned(writer, "_dd.git.commit.sha")?; - write_uint8(writer, span_v04::AnyValueKey::String as u8)?; + write_uint8(writer, AnyValueKey::String as u8)?; table.write_interned(writer, v)?; } } diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs index 022f80a7af..aece6d73a4 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs @@ -10,59 +10,7 @@ use rmp::encode::{ use std::borrow::Borrow; use std::time; -use super::StringTable; - -/// Integer keys for V1 span fields. -#[repr(u8)] -pub(super) enum SpanKey { - Service = 1, - Name = 2, - Resource = 3, - SpanId = 4, - ParentId = 5, - Start = 6, - Duration = 7, - Error = 8, - Attributes = 9, - Type = 10, - SpanLinks = 11, - SpanEvents = 12, - Env = 13, - Version = 14, - Component = 15, - Kind = 16, -} - -/// Integer keys for V1 span link fields. -#[repr(u8)] -pub(super) enum SpanLinkKey { - TraceId = 1, - SpanId = 2, - Attributes = 3, - TraceState = 4, - Flags = 5, -} - -/// Integer keys for V1 span event fields. -#[repr(u8)] -pub(super) enum SpanEventKey { - Time = 1, - Name = 2, - Attributes = 3, -} - -/// Type discriminants for attribute values. -/// An attribute value is encoded as [type_uint8][actual_value]. -#[repr(u8)] -pub(super) enum AnyValueKey { - String = 1, - Bool = 2, - Double = 3, - Int64 = 4, - Bytes = 5, - Array = 6, - KeyValueList = 7, -} +use super::{AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey, StringTable}; /// Maps the `span.kind` string tag (from v0.4 meta) to the OTEL SpanKind uint32. /// diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs index 6e38521fe0..1c430f8fa6 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs @@ -9,8 +9,7 @@ use rmp::encode::{ }; use std::borrow::Borrow; -use super::span_v04::{AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey}; -use super::StringTable; +use super::{AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey, StringTable}; /// Encodes a typed `AttributeValue` as `[type_uint8, value]`. /// From c00fb1921a6bc5004018c9989747eeb499a86097 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Wed, 3 Jun 2026 14:29:12 +0200 Subject: [PATCH 07/14] fix: address comments --- .../src/msgpack_encoder/v1/mod.rs | 59 ++++++++++++------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index e55d533e4c..a0dd1148af 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -1190,7 +1190,7 @@ mod tests { #[cfg(test)] mod v1_payload_tests { - //! Unit tests for the M3 encoder (`encode_payload_v1`). + //! Unit tests for the v1::Span encoder (`encode_payload_v1`). //! //! Verifies the encoder produces a valid V1 payload from the canonical //! [`crate::span::v1::TracerPayload`] data model and that core invariants (interning, byte @@ -1204,7 +1204,7 @@ mod v1_payload_tests { use libdd_tinybytes::BytesString; fn bs(s: &str) -> BytesString { - BytesString::from_slice(s.as_bytes()).unwrap_or_default() + BytesString::from_slice(s.as_bytes()).expect("test string must fit in BytesString") } fn make_span(service: &str, name: &str, span_id: u64) -> V1SpanBytes { @@ -1519,7 +1519,9 @@ mod v1_payload_tests { #[test] fn string_interning_works_across_chunks() { // The string "shared" appears in two chunks. The second occurrence must be a uint ID, - // not a fresh str. Compare against a baseline with a single occurrence to verify. + // not a fresh str. Verify by (a) scanning the encoded bytes for the literal "shared" + // — it must appear exactly once — and (b) confirming the two-chunk payload is smaller + // than two independent single-chunk payloads. let chunk_with_two = TracerPayloadBytes { chunks: vec![ make_chunk(vec![make_span("shared", "op1", 1)], [0u8; 16]), @@ -1533,6 +1535,15 @@ mod v1_payload_tests { }; let two = to_vec_from_payload(&chunk_with_two); let one = to_vec_from_payload(&single); + let shared_occurrences = two + .windows(b"shared".len()) + .filter(|w| *w == b"shared") + .count(); + assert_eq!( + shared_occurrences, 1, + "the literal bytes \"shared\" must appear exactly once; subsequent uses must be \ + encoded as interning IDs" + ); assert!( two.len() < 2 * one.len(), "interning should reduce repeated payload size" @@ -1542,8 +1553,8 @@ mod v1_payload_tests { #[cfg(test)] mod cross_validation_tests { - //! Cross-validates that the M1 encoder (v0.4 spans → V1 payload) and the M3 encoder - //! (v1::Span → V1 payload) produce **byte-identical** output for equivalent inputs. + //! Cross-validates that the v0.4→V1 encoder and the v1::Span encoder produce + //! **byte-identical** output for equivalent inputs. //! //! All tests are limited to deterministic content (at most one attribute key per map) so the //! `HashMap` iteration order cannot diverge between the two inputs. @@ -1556,7 +1567,7 @@ mod cross_validation_tests { use libdd_tinybytes::BytesString; fn bs(s: &str) -> BytesString { - BytesString::from_slice(s.as_bytes()).unwrap_or_default() + BytesString::from_slice(s.as_bytes()).expect("test string must fit in BytesString") } /// Builds a 128-bit big-endian trace_id from `(high, low)` 64-bit halves. @@ -1567,23 +1578,24 @@ mod cross_validation_tests { out } - /// Asserts that encoding `v04` (with `metadata`) via M1 produces the same bytes as - /// encoding `v1` via M3. Includes a hex-diff message on mismatch. + /// Asserts that encoding `v04` (with `metadata`) via the v0.4→V1 encoder produces the + /// same bytes as encoding `v1` via the v1::Span encoder. Includes a hex-diff message on + /// mismatch. #[track_caller] fn assert_byte_equal( v04_traces: &[Vec], metadata: &TracerMetadata, v1_payload: &TracerPayloadBytes, ) { - let m1 = to_vec(v04_traces, metadata); - let m3 = to_vec_from_payload(v1_payload); - if m1 != m3 { + let v04_encoded = to_vec(v04_traces, metadata); + let v1_encoded = to_vec_from_payload(v1_payload); + if v04_encoded != v1_encoded { panic!( - "M1 and M3 encoders diverged:\n M1 ({:3} bytes): {}\n M3 ({:3} bytes): {}", - m1.len(), - hex_dump(&m1), - m3.len(), - hex_dump(&m3) + "v0.4→V1 and v1::Span encoders diverged:\n v0.4→V1 ({:3} bytes): {}\n v1::Span ({:3} bytes): {}", + v04_encoded.len(), + hex_dump(&v04_encoded), + v1_encoded.len(), + hex_dump(&v1_encoded) ); } } @@ -1671,8 +1683,9 @@ mod cross_validation_tests { #[test] fn promoted_fields_byte_identical() { - // M1 reads env/version/component/span.kind from v04 meta and promotes them; M3 takes - // them directly from the v1::Span fields. Both must produce the same bytes. + // The v0.4→V1 encoder reads env/version/component/span.kind from v04 meta and promotes + // them; the v1::Span encoder takes them directly from the v1::Span fields. Both must + // produce the same bytes. let mut meta = HashMap::new(); meta.insert(bs("env"), bs("prod")); meta.insert(bs("version"), bs("1.2.3")); @@ -1691,7 +1704,8 @@ mod cross_validation_tests { ..Default::default() }]]; - // metadata.env populated → M1 picks env from metadata first (it's set on the builder). + // metadata.env populated → the v0.4→V1 encoder picks env from metadata first (it's set + // on the builder). let metadata = TracerMetadata { env: "prod".to_string(), app_version: "1.2.3".to_string(), @@ -1857,9 +1871,10 @@ mod cross_validation_tests { #[test] fn chunk_origin_only_byte_identical() { - // The M1 encoder's `is_promoted` filter only strips env/version/component/span.kind/ - // _dd.p.tid — it intentionally keeps `_dd.origin` in span attributes even though it's - // also lifted to the chunk. M3 must reproduce that duplication for byte equality. + // The v0.4→V1 encoder's `is_promoted` filter only strips env/version/component/ + // span.kind/_dd.p.tid — it intentionally keeps `_dd.origin` in span attributes even + // though it's also lifted to the chunk. The v1::Span encoder must reproduce that + // duplication for byte equality. let mut meta = HashMap::new(); meta.insert(bs("_dd.origin"), bs("lambda")); From 8af9a8e43a59b1cd805e986448378145da65ef1f Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Tue, 9 Jun 2026 17:20:42 +0200 Subject: [PATCH 08/14] fix: address comments --- Cargo.lock | 7 + LICENSE-3rdparty.csv | 1 + libdd-trace-utils/Cargo.toml | 1 + .../src/msgpack_encoder/v1/mod.rs | 704 ++---------------- .../src/msgpack_encoder/v1/span_v04.rs | 16 +- .../src/msgpack_encoder/v1/span_v1.rs | 39 +- libdd-trace-utils/src/span/v1/mod.rs | 160 +--- ...nd_data_v1_native_trace_snapshot_test.json | 43 ++ libdd-trace-utils/tests/test_send_data.rs | 281 +++++++ 9 files changed, 445 insertions(+), 807 deletions(-) create mode 100644 libdd-trace-utils/tests/snapshots/compare_send_data_v1_native_trace_snapshot_test.json diff --git a/Cargo.lock b/Cargo.lock index f165a747a3..10966b248b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3452,6 +3452,7 @@ dependencies = [ "serde", "serde_json", "tempfile", + "thin-vec", "tokio", "tracing", "urlencoding", @@ -5748,6 +5749,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "thin-vec" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f7e269b48f0a7dd0146680fa24b50cc67fc0373f086a5b2f99bd084639b482" + [[package]] name = "thiserror" version = "1.0.68" diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 16d8a34b9e..35909364f9 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -429,6 +429,7 @@ target-triple,https://github.com/dtolnay/target-triple,MIT OR Apache-2.0,David T tempfile,https://github.com/Stebalien/tempfile,MIT OR Apache-2.0,"Steven Allen , The Rust Project Developers, Ashley Mannix , Jason White " termcolor,https://github.com/BurntSushi/termcolor,Unlicense OR MIT,Andrew Gallant test-case-macros,https://github.com/frondeus/test-case,MIT,"Marcin Sas-Szymanski , Wojciech Polak , Łukasz Biel " +thin-vec,https://github.com/mozilla/thin-vec,MIT OR Apache-2.0,Aria Beingessner thiserror,https://github.com/dtolnay/thiserror,MIT OR Apache-2.0,David Tolnay thiserror-impl,https://github.com/dtolnay/thiserror,MIT OR Apache-2.0,David Tolnay thread_local,https://github.com/Amanieu/thread_local-rs,MIT OR Apache-2.0,Amanieu d'Antras diff --git a/libdd-trace-utils/Cargo.toml b/libdd-trace-utils/Cargo.toml index a68994b50d..fcb7b4e40f 100644 --- a/libdd-trace-utils/Cargo.toml +++ b/libdd-trace-utils/Cargo.toml @@ -45,6 +45,7 @@ libdd-tinybytes = { version = "1.1.1", path = "../libdd-tinybytes", features = [ "serialization", ] } indexmap = "2.11" +thin-vec = "0.2" # Compression feature flate2 = { version = "1.0", optional = true } diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index a0dd1148af..1963668688 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -94,6 +94,15 @@ pub(super) enum AnyValueKey { KeyValueList = 7, } +/// Number of msgpack items written per `[type, value]` pair when typed values are flattened +/// into a parent array (e.g. `AttributeValue::List`). +pub(super) const TYPED_VALUE_STRIDE: u32 = 2; + +/// Number of msgpack items written per `[key, type, value]` triplet when typed attribute +/// entries are flattened into a parent array (top-level attribute maps and +/// `AttributeValue::KeyValue`). +pub(super) const FLAT_ATTR_STRIDE: u32 = 3; + /// Streaming string intern table. /// /// The first time a string is written, it is emitted as a msgpack `str` and assigned an @@ -134,6 +143,21 @@ impl StringTable { } } +/// Returns the span start time in UNIX nanos, falling back to the current wall-clock time when +/// the input is negative. Matches the agent's `validateAndFixStartTime`, which substitutes +/// `time.Now().UnixNano()` for invalid start values; without this, a negative `i64` would wrap +/// to a near-`u64::MAX` timestamp on cast. +pub(super) fn span_start_unix_nanos(start: i64) -> u64 { + if start < 0 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos() as u64) + .unwrap_or(0) + } else { + start as u64 + } +} + /// Promoted fields extracted from the payload's spans, written at the top-level map. struct PayloadAttrs<'a> { env: Option<&'a str>, @@ -354,7 +378,7 @@ fn encode_payload]>>( // Encoded as a flat array of triplets: [key, type_uint, value, ...] // String values use type discriminant 1. write_uint8(writer, trace_key::ATTRIBUTES)?; - write_array_len(writer, attr_count * 3)?; + write_array_len(writer, attr_count * FLAT_ATTR_STRIDE)?; if let Some(v) = payload_attrs.apm_mode { table.write_interned(writer, "_dd.apm_mode")?; write_uint8(writer, AnyValueKey::String as u8)?; @@ -553,15 +577,12 @@ fn encode_chunk_v1( chunk: &crate::span::v1::TraceChunk, table: &mut StringTable, ) -> Result<(), ValueWriteError> { - let has_origin = chunk - .origin - .as_ref() - .is_some_and(|o| !>::borrow(o).is_empty()); + let origin = >::borrow(&chunk.origin); let has_attributes = !chunk.attributes.is_empty(); let has_dropped = chunk.dropped_trace; let fields = 2u32 // trace_id + spans - + has_origin as u32 + + !origin.is_empty() as u32 + chunk.priority.is_some() as u32 + chunk.sampling_mechanism.is_some() as u32 + has_attributes as u32 @@ -572,13 +593,9 @@ fn encode_chunk_v1( write_uint8(writer, chunk_key::TRACE_ID)?; write_bin(writer, &chunk.trace_id)?; - if let Some(origin) = chunk - .origin - .as_ref() - .filter(|o| !>::borrow(o).is_empty()) - { + if !origin.is_empty() { write_uint8(writer, chunk_key::ORIGIN)?; - table.write_interned(writer, >::borrow(origin))?; + table.write_interned(writer, origin)?; } if let Some(priority) = chunk.priority { @@ -623,19 +640,19 @@ fn encode_chunk_v1( /// # Examples /// /// ``` -/// use libdd_trace_utils::msgpack_encoder::v1::to_vec_from_payload; +/// use libdd_trace_utils::msgpack_encoder::v1::to_vec_from_payload_v1; /// use libdd_trace_utils::span::v1::TracerPayloadSlice; /// /// let payload = TracerPayloadSlice { /// language_name: "rust".into(), /// ..Default::default() /// }; -/// let encoded = to_vec_from_payload(&payload); +/// let encoded = to_vec_from_payload_v1(&payload); /// /// assert!(!encoded.is_empty()); /// ``` -pub fn to_vec_from_payload(payload: &TracerPayload) -> Vec { - to_vec_from_payload_with_capacity(payload, 0) +pub fn to_vec_from_payload_v1(payload: &TracerPayload) -> Vec { + to_vec_from_payload_with_capacity_v1(payload, 0) } /// Serializes a `TracerPayload` into a vector of bytes with specified capacity. @@ -652,18 +669,18 @@ pub fn to_vec_from_payload(payload: &TracerPayload) -> Vec /// # Examples /// /// ``` -/// use libdd_trace_utils::msgpack_encoder::v1::to_vec_from_payload_with_capacity; +/// use libdd_trace_utils::msgpack_encoder::v1::to_vec_from_payload_with_capacity_v1; /// use libdd_trace_utils::span::v1::TracerPayloadSlice; /// /// let payload = TracerPayloadSlice { /// language_name: "rust".into(), /// ..Default::default() /// }; -/// let encoded = to_vec_from_payload_with_capacity(&payload, 1024); +/// let encoded = to_vec_from_payload_with_capacity_v1(&payload, 1024); /// /// assert!(encoded.capacity() >= 1024); /// ``` -pub fn to_vec_from_payload_with_capacity( +pub fn to_vec_from_payload_with_capacity_v1( payload: &TracerPayload, capacity: u32, ) -> Vec { @@ -693,7 +710,7 @@ pub fn to_vec_from_payload_with_capacity( /// # Examples /// /// ``` -/// use libdd_trace_utils::msgpack_encoder::v1::write_payload_to_slice; +/// use libdd_trace_utils::msgpack_encoder::v1::write_payload_to_slice_v1; /// use libdd_trace_utils::span::v1::TracerPayloadSlice; /// /// let mut buffer = vec![0u8; 1024]; @@ -702,9 +719,9 @@ pub fn to_vec_from_payload_with_capacity( /// ..Default::default() /// }; /// -/// write_payload_to_slice(&mut &mut buffer[..], &payload).expect("Encoding failed"); +/// write_payload_to_slice_v1(&mut &mut buffer[..], &payload).expect("Encoding failed"); /// ``` -pub fn write_payload_to_slice( +pub fn write_payload_to_slice_v1( slice: &mut &mut [u8], payload: &TracerPayload, ) -> Result<(), ValueWriteError> { @@ -727,18 +744,18 @@ pub fn write_payload_to_slice( /// # Examples /// /// ``` -/// use libdd_trace_utils::msgpack_encoder::v1::to_encoded_byte_len_from_payload; +/// use libdd_trace_utils::msgpack_encoder::v1::to_encoded_byte_len_from_payload_v1; /// use libdd_trace_utils::span::v1::TracerPayloadSlice; /// /// let payload = TracerPayloadSlice { /// language_name: "rust".into(), /// ..Default::default() /// }; -/// let encoded_len = to_encoded_byte_len_from_payload(&payload); +/// let encoded_len = to_encoded_byte_len_from_payload_v1(&payload); /// /// assert!(encoded_len > 0); /// ``` -pub fn to_encoded_byte_len_from_payload(payload: &TracerPayload) -> u32 { +pub fn to_encoded_byte_len_from_payload_v1(payload: &TracerPayload) -> u32 { let mut counter = super::CountLength(0); let _ = encode_payload_v1(&mut counter, payload); counter.0 @@ -1201,6 +1218,7 @@ mod v1_payload_tests { AttributeValue, Span as V1Span, SpanBytes as V1SpanBytes, SpanKind, TraceChunkBytes, TracerPayloadBytes, }; + use crate::span::vec_map::VecMap; use libdd_tinybytes::BytesString; fn bs(s: &str) -> BytesString { @@ -1230,7 +1248,7 @@ mod v1_payload_tests { #[test] fn empty_payload_is_valid_msgpack_map() { let payload = TracerPayloadBytes::default(); - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); // Map with a single entry (chunks), then an empty array. `0x81` = fixmap of length 1, // followed by chunk key (0x0b), then `0x90` (fixarray length 0). assert_eq!(encoded, vec![0x81, 0x0b, 0x90]); @@ -1243,8 +1261,8 @@ mod v1_payload_tests { chunks: vec![chunk], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); - let len = to_encoded_byte_len_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); + let len = to_encoded_byte_len_from_payload_v1(&payload); assert_eq!(encoded.len() as u32, len); } @@ -1257,7 +1275,7 @@ mod v1_payload_tests { chunks: vec![chunk], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); let pat = [0x10u8, 0x01u8]; assert!( encoded.windows(2).any(|w| w == pat), @@ -1267,7 +1285,7 @@ mod v1_payload_tests { #[test] fn typed_attributes_carry_correct_type_discriminants() { - let mut attrs = HashMap::new(); + let mut attrs = VecMap::new(); attrs.insert(bs("k_str"), AttributeValue::String(bs("v"))); let span = V1Span { service: bs("svc"), @@ -1284,7 +1302,7 @@ mod v1_payload_tests { chunks: vec![chunk], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); // String attribute → type discriminant = 1 (`AnyValueKey::String`). assert!( encoded.windows(b"k_str".len()).any(|w| w == b"k_str"), @@ -1295,7 +1313,7 @@ mod v1_payload_tests { #[test] fn bytes_attribute_uses_bin_marker() { // A Bytes attribute must use the msgpack `bin` family, not `str`. - let mut attrs = HashMap::new(); + let mut attrs = VecMap::new(); attrs.insert( bs("payload"), AttributeValue::Bytes(libdd_tinybytes::Bytes::copy_from_slice(b"\xde\xad")), @@ -1314,7 +1332,7 @@ mod v1_payload_tests { chunks: vec![make_chunk(vec![span], [0u8; 16])], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); // bin8 marker `0xc4` followed by length `0x02` and the bytes themselves. let want = [0xc4u8, 0x02, 0xde, 0xad]; assert!( @@ -1325,12 +1343,12 @@ mod v1_payload_tests { #[test] fn list_and_keyvalue_attributes_round_trip_through_recursion() { - let mut nested = HashMap::new(); + let mut nested = VecMap::new(); nested.insert(bs("nk"), AttributeValue::Int(7)); - let mut attrs = HashMap::new(); + let mut attrs = VecMap::new(); attrs.insert( bs("list"), - AttributeValue::List(vec![ + AttributeValue::List(thin_vec::thin_vec![ AttributeValue::String(bs("a")), AttributeValue::Bool(true), ]), @@ -1350,7 +1368,7 @@ mod v1_payload_tests { chunks: vec![make_chunk(vec![span], [0u8; 16])], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); // The keys and the nested key must all appear at least once. for s in &[b"list" as &[u8], b"kv", b"a", b"nk"] { assert!( @@ -1374,7 +1392,7 @@ mod v1_payload_tests { chunks: vec![make_chunk(vec![make_span("svc", "op", 1)], [0u8; 16])], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); for s in &[ b"python" as &[u8], b"3.11", @@ -1396,7 +1414,7 @@ mod v1_payload_tests { let chunk = TraceChunkBytes { trace_id: [0u8; 16], priority: Some(1), - origin: Some(bs("lambda")), + origin: bs("lambda"), sampling_mechanism: Some(4), spans: vec![make_span("svc", "op", 1)], ..Default::default() @@ -1405,7 +1423,7 @@ mod v1_payload_tests { chunks: vec![chunk], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); assert!( encoded.windows(b"lambda".len()).any(|w| w == b"lambda"), "chunk origin should appear" @@ -1427,7 +1445,7 @@ mod v1_payload_tests { chunks: vec![chunk], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); // DROPPED_TRACE (0x05) + msgpack true marker (0xc3) let want = [chunk_key::DROPPED_TRACE, 0xc3]; assert!( @@ -1448,7 +1466,7 @@ mod v1_payload_tests { chunks: vec![chunk], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); assert!( !encoded.contains(&chunk_key::DROPPED_TRACE), "DROPPED_TRACE key should not be emitted when false" @@ -1457,7 +1475,7 @@ mod v1_payload_tests { #[test] fn chunk_attributes_emitted_when_set() { - let mut attrs = std::collections::HashMap::new(); + let mut attrs = VecMap::new(); attrs.insert(bs("region"), AttributeValue::String(bs("us-east-1"))); let chunk = TraceChunkBytes { trace_id: [0u8; 16], @@ -1469,7 +1487,7 @@ mod v1_payload_tests { chunks: vec![chunk], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); // ATTRIBUTES (0x03) + msgpack fixarray header for 3 elements (0x93) let want = [chunk_key::ATTRIBUTES, 0x93]; assert!( @@ -1507,7 +1525,7 @@ mod v1_payload_tests { chunks: vec![make_chunk(vec![span], [0u8; 16])], ..Default::default() }; - let encoded = to_vec_from_payload(&payload); + let encoded = to_vec_from_payload_v1(&payload); let want = [0x10u8, expected_byte]; assert!( encoded.windows(2).any(|w| w == want), @@ -1533,8 +1551,8 @@ mod v1_payload_tests { chunks: vec![make_chunk(vec![make_span("shared", "op1", 1)], [0u8; 16])], ..Default::default() }; - let two = to_vec_from_payload(&chunk_with_two); - let one = to_vec_from_payload(&single); + let two = to_vec_from_payload_v1(&chunk_with_two); + let one = to_vec_from_payload_v1(&single); let shared_occurrences = two .windows(b"shared".len()) .filter(|w| *w == b"shared") @@ -1550,595 +1568,3 @@ mod v1_payload_tests { ); } } - -#[cfg(test)] -mod cross_validation_tests { - //! Cross-validates that the v0.4→V1 encoder and the v1::Span encoder produce - //! **byte-identical** output for equivalent inputs. - //! - //! All tests are limited to deterministic content (at most one attribute key per map) so the - //! `HashMap` iteration order cannot diverge between the two inputs. - - use super::*; - use crate::span::v04::SpanBytes as V04Span; - use crate::span::v1::{ - AttributeValue, SpanBytes as V1SpanBytes, SpanKind, TraceChunkBytes, TracerPayloadBytes, - }; - use libdd_tinybytes::BytesString; - - fn bs(s: &str) -> BytesString { - BytesString::from_slice(s.as_bytes()).expect("test string must fit in BytesString") - } - - /// Builds a 128-bit big-endian trace_id from `(high, low)` 64-bit halves. - fn tid_bytes(high: u64, low: u64) -> [u8; 16] { - let mut out = [0u8; 16]; - out[..8].copy_from_slice(&high.to_be_bytes()); - out[8..].copy_from_slice(&low.to_be_bytes()); - out - } - - /// Asserts that encoding `v04` (with `metadata`) via the v0.4→V1 encoder produces the - /// same bytes as encoding `v1` via the v1::Span encoder. Includes a hex-diff message on - /// mismatch. - #[track_caller] - fn assert_byte_equal( - v04_traces: &[Vec], - metadata: &TracerMetadata, - v1_payload: &TracerPayloadBytes, - ) { - let v04_encoded = to_vec(v04_traces, metadata); - let v1_encoded = to_vec_from_payload(v1_payload); - if v04_encoded != v1_encoded { - panic!( - "v0.4→V1 and v1::Span encoders diverged:\n v0.4→V1 ({:3} bytes): {}\n v1::Span ({:3} bytes): {}", - v04_encoded.len(), - hex_dump(&v04_encoded), - v1_encoded.len(), - hex_dump(&v1_encoded) - ); - } - } - - fn hex_dump(b: &[u8]) -> String { - b.iter().map(|c| format!("{c:02x}")).collect::() - } - - #[test] - fn empty_payload_byte_identical() { - let v04: Vec> = vec![]; - let v1 = TracerPayloadBytes::default(); - assert_byte_equal(&v04, &TracerMetadata::default(), &v1); - } - - #[test] - fn minimal_single_span_byte_identical() { - let v04 = vec![vec![V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 0x42, - span_id: 1, - start: 1_000_000, - duration: 500, - ..Default::default() - }]]; - - let v1 = TracerPayloadBytes { - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 0x42), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1_000_000, - duration: 500, - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &TracerMetadata::default(), &v1); - } - - #[test] - fn span_with_parent_and_error_byte_identical() { - let v04 = vec![vec![V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 1, - span_id: 2, - parent_id: 1, - start: 1000, - duration: 100, - error: 1, - ..Default::default() - }]]; - - let v1 = TracerPayloadBytes { - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 1), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 2, - parent_id: 1, - start: 1000, - duration: 100, - error: true, - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &TracerMetadata::default(), &v1); - } - - #[test] - fn promoted_fields_byte_identical() { - // The v0.4→V1 encoder reads env/version/component/span.kind from v04 meta and promotes - // them; the v1::Span encoder takes them directly from the v1::Span fields. Both must - // produce the same bytes. - let mut meta = HashMap::new(); - meta.insert(bs("env"), bs("prod")); - meta.insert(bs("version"), bs("1.2.3")); - meta.insert(bs("component"), bs("flask")); - meta.insert(bs("span.kind"), bs("server")); - - let v04 = vec![vec![V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 1, - span_id: 1, - start: 1000, - duration: 100, - meta, - ..Default::default() - }]]; - - // metadata.env populated → the v0.4→V1 encoder picks env from metadata first (it's set - // on the builder). - let metadata = TracerMetadata { - env: "prod".to_string(), - app_version: "1.2.3".to_string(), - ..Default::default() - }; - - let v1 = TracerPayloadBytes { - env: bs("prod"), - app_version: bs("1.2.3"), - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 1), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1000, - duration: 100, - env: bs("prod"), - version: bs("1.2.3"), - component: bs("flask"), - span_kind: SpanKind::Server, - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &metadata, &v1); - } - - #[test] - fn single_string_meta_attribute_byte_identical() { - // One non-promoted meta tag → one attribute triplet. With a single entry the HashMap - // iteration order cannot vary. - let mut meta = HashMap::new(); - meta.insert(bs("custom.tag"), bs("hello")); - - let v04 = vec![vec![V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 1, - span_id: 1, - start: 1000, - duration: 100, - meta, - ..Default::default() - }]]; - - let mut attrs = HashMap::new(); - attrs.insert(bs("custom.tag"), AttributeValue::String(bs("hello"))); - - let v1 = TracerPayloadBytes { - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 1), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1000, - duration: 100, - attributes: attrs, - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &TracerMetadata::default(), &v1); - } - - #[test] - fn single_float_metric_byte_identical() { - let mut metrics = HashMap::new(); - metrics.insert(bs("score"), 1.5f64); - - let v04 = vec![vec![V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 1, - span_id: 1, - start: 1000, - duration: 100, - metrics, - ..Default::default() - }]]; - - let mut attrs = HashMap::new(); - attrs.insert(bs("score"), AttributeValue::Float(1.5)); - - let v1 = TracerPayloadBytes { - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 1), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1000, - duration: 100, - attributes: attrs, - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &TracerMetadata::default(), &v1); - } - - #[test] - fn single_bytes_meta_struct_byte_identical() { - let mut meta_struct = HashMap::new(); - meta_struct.insert( - bs("payload"), - libdd_tinybytes::Bytes::copy_from_slice(b"\xde\xad\xbe\xef"), - ); - - let v04 = vec![vec![V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 1, - span_id: 1, - start: 1000, - duration: 100, - meta_struct, - ..Default::default() - }]]; - - let mut attrs = HashMap::new(); - attrs.insert( - bs("payload"), - AttributeValue::Bytes(libdd_tinybytes::Bytes::copy_from_slice(b"\xde\xad\xbe\xef")), - ); - - let v1 = TracerPayloadBytes { - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 1), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1000, - duration: 100, - attributes: attrs, - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &TracerMetadata::default(), &v1); - } - - #[test] - fn chunk_origin_only_byte_identical() { - // The v0.4→V1 encoder's `is_promoted` filter only strips env/version/component/ - // span.kind/_dd.p.tid — it intentionally keeps `_dd.origin` in span attributes even - // though it's also lifted to the chunk. The v1::Span encoder must reproduce that - // duplication for byte equality. - let mut meta = HashMap::new(); - meta.insert(bs("_dd.origin"), bs("lambda")); - - let v04 = vec![vec![V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 1, - span_id: 1, - start: 1000, - duration: 100, - meta, - ..Default::default() - }]]; - - let mut attrs = HashMap::new(); - attrs.insert(bs("_dd.origin"), AttributeValue::String(bs("lambda"))); - let v1 = TracerPayloadBytes { - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 1), - origin: Some(bs("lambda")), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1000, - duration: 100, - attributes: attrs, - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &TracerMetadata::default(), &v1); - } - - #[test] - fn trace_id_128_bit_from_dd_p_tid_byte_identical() { - let mut meta = HashMap::new(); - meta.insert(bs("_dd.p.tid"), bs("640cfd5400000000")); - - let v04 = vec![vec![V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 0x0123456789abcdef, - span_id: 1, - start: 1000, - duration: 100, - meta, - ..Default::default() - }]]; - - let v1 = TracerPayloadBytes { - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0x640cfd5400000000, 0x0123456789abcdef), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1000, - duration: 100, - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &TracerMetadata::default(), &v1); - } - - #[test] - fn tracer_metadata_fields_byte_identical() { - let v04 = vec![vec![V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 1, - span_id: 1, - start: 1000, - duration: 100, - ..Default::default() - }]]; - let metadata = TracerMetadata { - language: "python".to_string(), - language_version: "3.11".to_string(), - tracer_version: "2.0.0".to_string(), - runtime_id: "abc-uuid".to_string(), - hostname: "h1".to_string(), - ..Default::default() - }; - - let v1 = TracerPayloadBytes { - language_name: bs("python"), - language_version: bs("3.11"), - tracer_version: bs("2.0.0"), - runtime_id: bs("abc-uuid"), - hostname: bs("h1"), - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 1), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1000, - duration: 100, - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &metadata, &v1); - } - - #[test] - fn payload_attribute_git_commit_sha_byte_identical() { - let v04 = vec![vec![V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 1, - span_id: 1, - start: 1000, - duration: 100, - ..Default::default() - }]]; - let metadata = TracerMetadata { - git_commit_sha: "abc123".to_string(), - ..Default::default() - }; - - let mut payload_attrs = HashMap::new(); - payload_attrs.insert( - bs("_dd.git.commit.sha"), - AttributeValue::String(bs("abc123")), - ); - - let v1 = TracerPayloadBytes { - attributes: payload_attrs, - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 1), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1000, - duration: 100, - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &metadata, &v1); - } - - #[test] - fn span_with_single_link_byte_identical() { - let v04_span = V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 1, - span_id: 1, - start: 1000, - duration: 100, - span_links: vec![crate::span::v04::SpanLink { - trace_id: 0x0123456789abcdef, - trace_id_high: 0, - span_id: 99, - tracestate: bs("running"), - flags: 0, - attributes: HashMap::new(), - }], - ..Default::default() - }; - let v04 = vec![vec![v04_span]]; - - let v1 = TracerPayloadBytes { - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 1), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1000, - duration: 100, - span_links: vec![crate::span::v1::SpanLinkBytes { - trace_id: tid_bytes(0, 0x0123456789abcdef), - span_id: 99, - tracestate: bs("running"), - flags: 0, - attributes: HashMap::new(), - }], - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &TracerMetadata::default(), &v1); - } - - #[test] - fn span_with_single_event_byte_identical() { - use crate::span::v04::{AttributeAnyValue, AttributeArrayValue}; - - let v04_span = V04Span { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - trace_id: 1, - span_id: 1, - start: 1000, - duration: 100, - span_events: vec![crate::span::v04::SpanEvent { - time_unix_nano: 42, - name: bs("exception"), - attributes: HashMap::from([( - bs("exception.message"), - AttributeAnyValue::SingleValue(AttributeArrayValue::String(bs("boom"))), - )]), - }], - ..Default::default() - }; - let v04 = vec![vec![v04_span]]; - - let v1 = TracerPayloadBytes { - chunks: vec![TraceChunkBytes { - trace_id: tid_bytes(0, 1), - spans: vec![V1SpanBytes { - service: bs("svc"), - name: bs("op"), - resource: bs("res"), - span_id: 1, - start: 1000, - duration: 100, - span_events: vec![crate::span::v1::SpanEventBytes { - time_unix_nano: 42, - name: bs("exception"), - attributes: HashMap::from([( - bs("exception.message"), - AttributeValue::String(bs("boom")), - )]), - }], - ..Default::default() - }], - ..Default::default() - }], - ..Default::default() - }; - - assert_byte_equal(&v04, &TracerMetadata::default(), &v1); - } -} diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs index aece6d73a4..a62c35d632 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs @@ -8,9 +8,8 @@ use rmp::encode::{ ValueWriteError, }; use std::borrow::Borrow; -use std::time; -use super::{AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey, StringTable}; +use super::{span_start_unix_nanos, AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey, StringTable}; /// Maps the `span.kind` string tag (from v0.4 meta) to the OTEL SpanKind uint32. /// @@ -247,18 +246,7 @@ pub fn encode_span( write_u64(writer, span.span_id)?; write_uint8(writer, SpanKey::Start as u8)?; - if span.start < 0 { - // Fall back to wall-clock now (UNIX nanos). Matches the agent's - // `validateAndFixStartTime` which substitutes `time.Now().UnixNano()` - // for invalid start values. - let now = time::SystemTime::now() - .duration_since(time::UNIX_EPOCH) - .map(|d| d.as_nanos() as u64) - .unwrap_or(0); - write_u64(writer, now)?; - } else { - write_u64(writer, span.start as u64)?; - } + write_u64(writer, span_start_unix_nanos(span.start))?; if is_parent { write_uint8(writer, SpanKey::ParentId as u8)?; diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs index 1c430f8fa6..44792f2235 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v1.rs @@ -2,14 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 use crate::span::v1::{AttributeValue, Span, SpanEvent, SpanLink}; +use crate::span::vec_map::VecMap; use crate::span::TraceData; use rmp::encode::{ write_array_len, write_bin, write_bool, write_f64, write_map_len, write_sint, write_u64, write_uint, write_uint8, RmpWrite, ValueWriteError, }; use std::borrow::Borrow; +use std::collections::HashSet; -use super::{AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey, StringTable}; +use super::{ + span_start_unix_nanos, AnyValueKey, SpanEventKey, SpanKey, SpanLinkKey, StringTable, + FLAT_ATTR_STRIDE, TYPED_VALUE_STRIDE, +}; /// Encodes a typed `AttributeValue` as `[type_uint8, value]`. /// @@ -54,19 +59,18 @@ pub(super) fn encode_attribute_value( write_bin(writer, b.borrow())?; } AttributeValue::List(arr) => { + // Encoded as a flat array of `[type, value]` pairs. write_uint8(writer, AnyValueKey::Array as u8)?; - write_array_len(writer, arr.len() as u32)?; + write_array_len(writer, arr.len() as u32 * TYPED_VALUE_STRIDE)?; for v in arr { encode_attribute_value(writer, v, table)?; } } AttributeValue::KeyValue(map) => { + // Encoded as a flat array of `[key, type, value]` triplets — consistent with the + // top-level attributes map (`encode_attributes_map`). write_uint8(writer, AnyValueKey::KeyValueList as u8)?; - write_map_len(writer, map.len() as u32)?; - for (k, v) in map { - table.write_interned(writer, k.borrow())?; - encode_attribute_value(writer, v, table)?; - } + encode_attributes_map(writer, map, table)?; } } Ok(()) @@ -90,11 +94,24 @@ pub(super) fn encode_attribute_value( /// This function will return any error emitted by the writer. pub(super) fn encode_attributes_map( writer: &mut W, - map: &std::collections::HashMap>, + map: &VecMap>, table: &mut StringTable, ) -> Result<(), ValueWriteError> { - write_array_len(writer, (map.len() as u32) * 3)?; - for (k, v) in map { + // `VecMap` tolerates duplicate keys for fast insertion (later writes shadow earlier ones via + // `get`). Dedup here so the wire format carries each key once, with the last-written value. + // Walk in reverse keeping first-seen (= last-written), then reverse to restore insertion + // order. `T::Text: Hash + Eq + Borrow` per the `SpanText` trait. + let mut seen: HashSet<&str> = HashSet::with_capacity(map.len()); + let mut deduped: Vec<(&T::Text, &AttributeValue)> = map + .iter() + .rev() + .filter(|(k, _)| seen.insert(>::borrow(k))) + .map(|(k, v)| (k, v)) + .collect(); + deduped.reverse(); + + write_array_len(writer, deduped.len() as u32 * FLAT_ATTR_STRIDE)?; + for (k, v) in deduped { table.write_interned(writer, k.borrow())?; encode_attribute_value(writer, v, table)?; } @@ -271,7 +288,7 @@ pub(super) fn encode_span( write_u64(writer, span.span_id)?; write_uint8(writer, SpanKey::Start as u8)?; - write_u64(writer, span.start as u64)?; + write_u64(writer, span_start_unix_nanos(span.start))?; if is_parent { write_uint8(writer, SpanKey::ParentId as u8)?; diff --git a/libdd-trace-utils/src/span/v1/mod.rs b/libdd-trace-utils/src/span/v1/mod.rs index bef124ee0a..a58fc233df 100644 --- a/libdd-trace-utils/src/span/v1/mod.rs +++ b/libdd-trace-utils/src/span/v1/mod.rs @@ -1,8 +1,9 @@ // Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use crate::span::vec_map::VecMap; use crate::span::{BytesData, SliceData, TraceData}; -use std::collections::HashMap; +pub use thin_vec::ThinVec; /// OpenTelemetry SpanKind values, encoded on the wire as a `uint32`. /// Unset or unrecognized kinds default to [`SpanKind::Internal`]. @@ -40,33 +41,16 @@ pub enum AttributeValue { Int(i64), Bool(bool), Bytes(T::Bytes), - KeyValue(HashMap>), - List(Vec>), -} - -impl Clone for AttributeValue -where - T::Text: Clone, - T::Bytes: Clone, -{ - fn clone(&self) -> Self { - match self { - AttributeValue::String(v) => AttributeValue::String(v.clone()), - AttributeValue::Float(v) => AttributeValue::Float(*v), - AttributeValue::Int(v) => AttributeValue::Int(*v), - AttributeValue::Bool(v) => AttributeValue::Bool(*v), - AttributeValue::Bytes(v) => AttributeValue::Bytes(v.clone()), - AttributeValue::KeyValue(m) => AttributeValue::KeyValue(m.clone()), - AttributeValue::List(v) => AttributeValue::List(v.clone()), - } - } + KeyValue(VecMap>), + List(ThinVec>), } /// The generic representation of a V1 span. /// -/// `T` is the type used to represent strings in the span, it can be either owned (e.g. -/// BytesString) or borrowed (e.g. &str). To define a generic function taking any `Span` you can -/// use the [`TraceData`] trait: +/// `T: TraceData` carries the associated text type `T::Text` used for every string field in the +/// span; `T::Text` can be either owned (e.g. [`BytesString`](libdd_tinybytes::BytesString)) or +/// borrowed (e.g. `&str`). To define a generic function taking any `Span` you can use the +/// [`TraceData`] trait: /// ``` /// use libdd_trace_utils::span::{v1::Span, TraceData}; /// fn foo(span: Span) { @@ -79,8 +63,6 @@ pub struct Span { pub name: T::Text, pub resource: T::Text, pub r#type: T::Text, - /// 128-bit trace ID stored as big-endian bytes. - pub trace_id: [u8; 16], pub span_id: u64, pub parent_id: u64, pub start: i64, @@ -90,37 +72,9 @@ pub struct Span { pub env: T::Text, pub version: T::Text, pub component: T::Text, - pub attributes: HashMap>, - pub span_links: Vec>, - pub span_events: Vec>, -} - -impl Clone for Span -where - T::Text: Clone, - T::Bytes: Clone, -{ - fn clone(&self) -> Self { - Span { - service: self.service.clone(), - name: self.name.clone(), - resource: self.resource.clone(), - r#type: self.r#type.clone(), - trace_id: self.trace_id, - span_id: self.span_id, - parent_id: self.parent_id, - start: self.start, - duration: self.duration, - error: self.error, - span_kind: self.span_kind, - env: self.env.clone(), - version: self.version.clone(), - component: self.component.clone(), - attributes: self.attributes.clone(), - span_links: self.span_links.clone(), - span_events: self.span_events.clone(), - } - } + pub attributes: VecMap>, + pub span_links: ThinVec>, + pub span_events: ThinVec>, } /// The generic representation of a V1 span link. @@ -129,48 +83,18 @@ where pub struct SpanLink { pub trace_id: [u8; 16], pub span_id: u64, - pub attributes: HashMap>, + pub attributes: VecMap>, pub tracestate: T::Text, pub flags: u32, } -impl Clone for SpanLink -where - T::Text: Clone, - T::Bytes: Clone, -{ - fn clone(&self) -> Self { - SpanLink { - trace_id: self.trace_id, - span_id: self.span_id, - attributes: self.attributes.clone(), - tracestate: self.tracestate.clone(), - flags: self.flags, - } - } -} - /// The generic representation of a V1 span event. /// `T` is the type used to represent strings in the span event. #[derive(Debug, PartialEq, Default)] pub struct SpanEvent { pub time_unix_nano: u64, pub name: T::Text, - pub attributes: HashMap>, -} - -impl Clone for SpanEvent -where - T::Text: Clone, - T::Bytes: Clone, -{ - fn clone(&self) -> Self { - SpanEvent { - time_unix_nano: self.time_unix_nano, - name: self.name.clone(), - attributes: self.attributes.clone(), - } - } + pub attributes: VecMap>, } /// A V1 trace chunk: a group of spans sharing the same `trace_id`, plus chunk-level metadata. @@ -178,31 +102,13 @@ where pub struct TraceChunk { pub trace_id: [u8; 16], pub priority: Option, - pub origin: Option, + pub origin: T::Text, pub sampling_mechanism: Option, pub dropped_trace: bool, - pub attributes: HashMap>, + pub attributes: VecMap>, pub spans: Vec>, } -impl Clone for TraceChunk -where - T::Text: Clone, - T::Bytes: Clone, -{ - fn clone(&self) -> Self { - TraceChunk { - trace_id: self.trace_id, - priority: self.priority, - origin: self.origin.clone(), - sampling_mechanism: self.sampling_mechanism, - dropped_trace: self.dropped_trace, - attributes: self.attributes.clone(), - spans: self.spans.clone(), - } - } -} - /// A V1 tracer payload: tracer-level metadata and the trace chunks it carries. #[derive(Debug, PartialEq, Default)] pub struct TracerPayload { @@ -213,30 +119,10 @@ pub struct TracerPayload { pub env: T::Text, pub hostname: T::Text, pub app_version: T::Text, - pub attributes: HashMap>, + pub attributes: VecMap>, pub chunks: Vec>, } -impl Clone for TracerPayload -where - T::Text: Clone, - T::Bytes: Clone, -{ - fn clone(&self) -> Self { - TracerPayload { - language_name: self.language_name.clone(), - language_version: self.language_version.clone(), - tracer_version: self.tracer_version.clone(), - runtime_id: self.runtime_id.clone(), - env: self.env.clone(), - hostname: self.hostname.clone(), - app_version: self.app_version.clone(), - attributes: self.attributes.clone(), - chunks: self.chunks.clone(), - } - } -} - pub type SpanBytes = Span; pub type SpanLinkBytes = SpanLink; pub type SpanEventBytes = SpanEvent; @@ -254,7 +140,6 @@ pub type TracerPayloadSlice<'a> = TracerPayload>; #[cfg(test)] mod tests { use super::*; - use libdd_tinybytes::BytesString; #[test] fn span_kind_default_is_internal() { @@ -282,21 +167,10 @@ mod tests { } #[test] - fn span_default_has_zero_trace_id_and_internal_kind() { + fn span_default_has_internal_kind() { let s = SpanBytes::default(); - assert_eq!(s.trace_id, [0u8; 16]); assert_eq!(s.span_kind, SpanKind::Internal); assert!(!s.error); assert!(s.attributes.is_empty()); } - - #[test] - fn attribute_value_clone_preserves_variants() { - let s = AttributeValueBytes::String(BytesString::from_static("v")); - assert_eq!(s.clone(), s); - let n = AttributeValueBytes::Int(42); - assert_eq!(n.clone(), n); - let list = AttributeValueBytes::List(vec![AttributeValueBytes::Bool(true)]); - assert_eq!(list.clone(), list); - } } diff --git a/libdd-trace-utils/tests/snapshots/compare_send_data_v1_native_trace_snapshot_test.json b/libdd-trace-utils/tests/snapshots/compare_send_data_v1_native_trace_snapshot_test.json new file mode 100644 index 0000000000..ba36402ca8 --- /dev/null +++ b/libdd-trace-utils/tests/snapshots/compare_send_data_v1_native_trace_snapshot_test.json @@ -0,0 +1,43 @@ +[[ + { + "name": "test_send_data_v1_native_snapshot_root", + "service": "test-service", + "resource": "/api/users", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "web", + "meta": { + "_dd.origin": "synthetics", + "_dd.p.dm": "-4", + "_dd.p.tid": "0123456789abcdef", + "component": "http", + "env": "test-env", + "http.method": "GET", + "span.kind": "server", + "version": "1.2.3" + }, + "metrics": { + "_sampling_priority_v1": 1, + "http.duration_ms": 12.5, + "http.status_code": 200, + "http.success": 1 + }, + "span_links": [ + { + "trace_id": 18364758544493064720, + "trace_id_high": 81985529216486895, + "span_id": 11574427654092267680, + "tracestate": "dd=t.tid:abc", + "flags": 1 + } + ], + "span_events": [ + { + "time_unix_nano": 1727211691770715042, + "name": "exception" + } + ], + "duration": 5000, + "start": 1000000 + }]] diff --git a/libdd-trace-utils/tests/test_send_data.rs b/libdd-trace-utils/tests/test_send_data.rs index 37ae09d691..c0dfb7bfe2 100644 --- a/libdd-trace-utils/tests/test_send_data.rs +++ b/libdd-trace-utils/tests/test_send_data.rs @@ -12,6 +12,7 @@ mod tracing_integration_tests { use libdd_common::{http_common, Endpoint}; use libdd_tinybytes::{Bytes, BytesString}; use libdd_trace_utils::send_data::SendData; + use libdd_trace_utils::span::vec_map::VecMap; use libdd_trace_utils::test_utils::datadog_test_agent::DatadogTestAgent; use libdd_trace_utils::test_utils::{create_test_json_span, create_test_no_alloc_span}; use libdd_trace_utils::trace_utils::TracerHeaderTags; @@ -392,4 +393,284 @@ mod tracing_integration_tests { "eyJ0cmFjaW5nX3NhbXBsaW5nX3J1bGVzIjogW3sic2VydmljZSI6ICJ0ZXN0LXNlcnZpY2UiLCAibmFtZSI6ICJ0ZXN0LW5hbWUiLCAic2FtcGxlX3JhdGUiOiAwLjV9XX0=" ); } + + // ───────────────────────── V1 integration tests ────────────────────────── + // + // These tests cover the v1::Span encoder end-to-end: the payload is built directly from the + // `TracerPayload` data model in Rust, encoded with `to_vec_from_payload_v1`, POSTed to the + // `dd-apm-test-agent`'s `/v1.0/traces`, and validated via snapshot. The test-agent is the V1 + // decoder, so this exercises the full round-trip without us having to maintain one in this + // crate. + + fn bs_v1(s: &str) -> BytesString { + BytesString::from_slice(s.as_bytes()).expect("test string must fit in BytesString") + } + + /// 128-bit big-endian trace_id from `(high, low)` 64-bit halves. + fn tid_bytes(high: u64, low: u64) -> [u8; 16] { + let mut out = [0u8; 16]; + out[..8].copy_from_slice(&high.to_be_bytes()); + out[8..].copy_from_slice(&low.to_be_bytes()); + out + } + + /// POSTs a raw V1 msgpack payload to the test-agent's `/v1.0/traces` and asserts the agent + /// returns 2xx. Headers are the minimum the agent needs to attach the payload to a snapshot + /// session (`X-Datadog-Test-Session-Token` query param + `Datadog-Meta-Lang*` for routing). + async fn post_v1_payload(uri: hyper::Uri, body: Vec) { + use libdd_capabilities_impl::HttpClientCapability; + let client = NativeCapabilities::new_client(); + let req = http::Request::builder() + .method(http::Method::POST) + .uri(uri) + .header("Content-type", "application/msgpack") + .header("Datadog-Meta-Lang", "test-lang") + .header("Datadog-Meta-Lang-Version", "2.0") + .header("Datadog-Meta-Lang-Interpreter", "interpreter") + .header("Datadog-Meta-Tracer-Version", "1.0") + .body(bytes::Bytes::from(body)) + .expect("failed to build request"); + let response = client.request(req).await.expect("request failed"); + assert!( + response.status().is_success(), + "test-agent rejected V1 payload: status={} body={:?}", + response.status(), + String::from_utf8_lossy(response.body()) + ); + } + + /// Builds a TracerPayload that exercises the multi-key attribute paths the v0.4→V1 encoder + /// can't cover on its own (HashMap iteration order makes byte-by-byte cross-validation flaky + /// for n > 1), plus the primitive `AttributeValue` variants the test-agent currently + /// supports. + /// + /// NOTE: `AttributeValue::List` and `AttributeValue::KeyValue` are deliberately omitted — + /// `ddapm-test-agent` v1.56.0 returns `400: Array of strings values are not supported yet` + /// for the `Array` variant. Once test-agent V1 support catches up, add them here too. + fn make_v1_payload(name_prefix: &str) -> libdd_trace_utils::span::v1::TracerPayloadBytes { + use libdd_trace_utils::span::v1::{ + AttributeValue, AttributeValueBytes, SpanBytes as V1SpanBytes, SpanEventBytes, + SpanKind, SpanLinkBytes, TraceChunkBytes, TracerPayloadBytes, + }; + + // Multi-key attribute map on the root span — primitive variants only. + let mut root_attrs: VecMap = VecMap::new(); + root_attrs.insert(bs_v1("http.method"), AttributeValue::String(bs_v1("GET"))); + root_attrs.insert(bs_v1("http.status_code"), AttributeValue::Int(200)); + root_attrs.insert(bs_v1("http.success"), AttributeValue::Bool(true)); + root_attrs.insert(bs_v1("http.duration_ms"), AttributeValue::Float(12.5)); + + let span_link = SpanLinkBytes { + trace_id: tid_bytes(0x0123_4567_89ab_cdef, 0xfedc_ba98_7654_3210), + span_id: 0xa0a0_a0a0_a0a0_a0a0, + tracestate: bs_v1("dd=t.tid:abc"), + flags: 1, + attributes: VecMap::new(), + }; + + let span_event = SpanEventBytes { + time_unix_nano: 1_727_211_691_770_715_042, + name: bs_v1("exception"), + attributes: VecMap::new(), + }; + + let root_span = V1SpanBytes { + service: bs_v1("test-service"), + name: bs_v1(&format!("{name_prefix}_root")), + resource: bs_v1("/api/users"), + r#type: bs_v1("web"), + span_id: 1, + parent_id: 0, + start: 1_000_000, + duration: 5_000, + span_kind: SpanKind::Server, + env: bs_v1("test-env"), + version: bs_v1("1.2.3"), + component: bs_v1("http"), + attributes: root_attrs, + span_links: thin_vec::thin_vec![span_link], + span_events: thin_vec::thin_vec![span_event], + ..Default::default() + }; + + // Multi-key chunk-level attributes. + let mut chunk_attrs = VecMap::new(); + chunk_attrs.insert(bs_v1("_dd.p.dm"), AttributeValue::String(bs_v1("-4"))); + chunk_attrs.insert( + bs_v1("_dd.p.tid"), + AttributeValue::String(bs_v1("0123456789abcdef")), + ); + + let chunk = TraceChunkBytes { + trace_id: tid_bytes(0, 0xdeadbeef), + priority: Some(1), + origin: bs_v1("synthetics"), + sampling_mechanism: Some(4), + attributes: chunk_attrs, + dropped_trace: false, + spans: vec![root_span], + }; + + TracerPayloadBytes { + language_name: bs_v1("test-lang"), + language_version: bs_v1("2.0"), + tracer_version: bs_v1("1.0"), + runtime_id: bs_v1("test-runtime-id"), + env: bs_v1("test-env"), + hostname: bs_v1("test-host"), + app_version: bs_v1("1.2.3"), + attributes: VecMap::new(), + chunks: vec![chunk], + } + } + + /// End-to-end round-trip: builds a V1 payload directly from `TracerPayload`, encodes it + /// with `to_vec_from_payload_v1`, POSTs to the test-agent, and asserts the snapshot. + #[cfg_attr(miri, ignore)] + #[tokio::test] + async fn compare_v1_native_trace_snapshot_test() { + use libdd_trace_utils::msgpack_encoder::v1::to_vec_from_payload_v1; + + let relative_snapshot_path = "libdd-trace-utils/tests/snapshots/"; + let snapshot_name = "compare_send_data_v1_native_trace_snapshot_test"; + let test_agent = DatadogTestAgent::new(Some(relative_snapshot_path), None, &[]).await; + + let uri = test_agent + .get_uri_for_endpoint("v1.0/traces", Some(snapshot_name)) + .await; + + test_agent.start_session(snapshot_name, None).await; + + let payload = make_v1_payload("test_send_data_v1_native_snapshot"); + let encoded = to_vec_from_payload_v1(&payload); + + post_v1_payload(uri, encoded).await; + + test_agent.assert_snapshot(snapshot_name).await; + } + + /// Recursively normalizes a JSON value by sorting every object's keys. Necessary because the + /// test-agent serializes maps in HashMap iteration order, which is non-deterministic in Rust + /// — two semantically-equivalent traces can decode to JSON with keys in different orders. + fn normalize_json(v: &mut serde_json::Value) { + match v { + serde_json::Value::Object(map) => { + let entries: Vec<(String, serde_json::Value)> = + map.iter().map(|(k, v)| (k.clone(), v.clone())).collect(); + let mut sorted: std::collections::BTreeMap = + entries.into_iter().collect(); + for child in sorted.values_mut() { + normalize_json(child); + } + *map = sorted.into_iter().collect(); + } + serde_json::Value::Array(arr) => { + for child in arr.iter_mut() { + normalize_json(child); + } + } + _ => {} + } + } + + /// Asserts the v0.4→V1 encoder and the v1::Span encoder produce decoded traces with the same + /// canonical content. Replaces the byte-equality cross-validation suite — instead of + /// comparing raw bytes (which forced n=1 collections due to HashMap order non-determinism), + /// we let the test-agent decode both payloads and compare the resulting structures after + /// recursive key sorting. + #[cfg_attr(miri, ignore)] + #[tokio::test] + async fn v04_and_v1_encoders_produce_equivalent_decoded_traces() { + use libdd_trace_utils::msgpack_encoder::v1::{to_vec, to_vec_from_payload_v1}; + use libdd_trace_utils::span::v04::SpanBytes as V04SpanBytes; + use libdd_trace_utils::span::v1::{ + AttributeValue, SpanBytes as V1SpanBytes, SpanKind, TraceChunkBytes, TracerPayloadBytes, + }; + use libdd_trace_utils::tracer_metadata::TracerMetadata; + + let test_agent = DatadogTestAgent::new(None, None, &[]).await; + let uri = test_agent.get_uri_for_endpoint("v1.0/traces", None).await; + + // ── v0.4 input ───────────────────────────────────────────────────────────── + let mut meta_v04 = HashMap::new(); + meta_v04.insert(bs_v1("env"), bs_v1("test-env")); + meta_v04.insert(bs_v1("http.method"), bs_v1("GET")); + let mut metrics_v04 = HashMap::new(); + metrics_v04.insert(bs_v1("http.duration_ms"), 12.5_f64); + let v04_traces: Vec> = vec![vec![V04SpanBytes { + service: bs_v1("svc"), + name: bs_v1("op"), + resource: bs_v1("res"), + trace_id: 1, + span_id: 1, + start: 1_000_000, + duration: 5_000, + meta: meta_v04, + metrics: metrics_v04, + ..Default::default() + }]]; + let metadata = TracerMetadata::default(); + + // ── v1::Span input — semantically equivalent to the v0.4 one ─────────────── + // `env` is promoted out of meta by the v0.4→V1 encoder; in the V1 model it lives on the + // span as a typed field. `http.method` and `http.duration_ms` go to span attributes. + let mut attrs_v1 = VecMap::new(); + attrs_v1.insert(bs_v1("http.method"), AttributeValue::String(bs_v1("GET"))); + attrs_v1.insert(bs_v1("http.duration_ms"), AttributeValue::Float(12.5)); + let v1_payload = TracerPayloadBytes { + chunks: vec![TraceChunkBytes { + trace_id: tid_bytes(0, 1), + spans: vec![V1SpanBytes { + service: bs_v1("svc"), + name: bs_v1("op"), + resource: bs_v1("res"), + span_id: 1, + start: 1_000_000, + duration: 5_000, + span_kind: SpanKind::Internal, + env: bs_v1("test-env"), + attributes: attrs_v1, + ..Default::default() + }], + ..Default::default() + }], + ..Default::default() + }; + + // ── Encode each via its dedicated encoder and POST both ──────────────────── + let bytes_v04 = to_vec(&v04_traces, &metadata); + let bytes_v1 = to_vec_from_payload_v1(&v1_payload); + post_v1_payload(uri.clone(), bytes_v04).await; + post_v1_payload(uri, bytes_v1).await; + + // ── Fetch what the test-agent decoded and compare structurally ───────────── + // The two POSTs share the same trace_id, so the test-agent groups them into a single + // "trace" containing both decoded spans. Equivalence means those two spans must match + // after recursive key normalization. + let traces = test_agent.get_sent_traces().await; + assert_eq!( + traces.len(), + 1, + "expected 1 merged trace, got {}", + traces.len() + ); + let spans = traces[0] + .as_array() + .expect("trace must be an array of spans"); + assert_eq!( + spans.len(), + 2, + "expected 2 spans (one per encoder), got {}", + spans.len() + ); + + let mut a = spans[0].clone(); + let mut b = spans[1].clone(); + normalize_json(&mut a); + normalize_json(&mut b); + assert_eq!( + a, b, + "v0.4→V1 and v1::Span encoders must decode to the same span" + ); + } } From d35c9606ed28d90cd37988ffa7601c37f3345d32 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Tue, 9 Jun 2026 17:41:49 +0200 Subject: [PATCH 09/14] fix: test --- libdd-trace-utils/src/span/v1/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libdd-trace-utils/src/span/v1/mod.rs b/libdd-trace-utils/src/span/v1/mod.rs index a58fc233df..d1c6e5eeef 100644 --- a/libdd-trace-utils/src/span/v1/mod.rs +++ b/libdd-trace-utils/src/span/v1/mod.rs @@ -34,7 +34,7 @@ impl SpanKind { /// Typed V1 attribute value. /// Replaces v0.4's split `meta` / `metrics` / `meta_struct` maps. -#[derive(Debug, PartialEq)] +#[derive(Debug)] pub enum AttributeValue { String(T::Text), Float(f64), @@ -57,7 +57,7 @@ pub enum AttributeValue { /// let _ = span.attributes.get("foo"); /// } /// ``` -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, Default)] pub struct Span { pub service: T::Text, pub name: T::Text, @@ -79,7 +79,7 @@ pub struct Span { /// The generic representation of a V1 span link. /// `T` is the type used to represent strings in the span link. -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, Default)] pub struct SpanLink { pub trace_id: [u8; 16], pub span_id: u64, @@ -90,7 +90,7 @@ pub struct SpanLink { /// The generic representation of a V1 span event. /// `T` is the type used to represent strings in the span event. -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, Default)] pub struct SpanEvent { pub time_unix_nano: u64, pub name: T::Text, @@ -98,7 +98,7 @@ pub struct SpanEvent { } /// A V1 trace chunk: a group of spans sharing the same `trace_id`, plus chunk-level metadata. -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, Default)] pub struct TraceChunk { pub trace_id: [u8; 16], pub priority: Option, @@ -110,7 +110,7 @@ pub struct TraceChunk { } /// A V1 tracer payload: tracer-level metadata and the trace chunks it carries. -#[derive(Debug, PartialEq, Default)] +#[derive(Debug, Default)] pub struct TracerPayload { pub language_name: T::Text, pub language_version: T::Text, From 0464b0b2ee78d27e27e652b34d4a4019db865cf3 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Thu, 11 Jun 2026 15:02:56 +0200 Subject: [PATCH 10/14] fix: comment --- libdd-trace-utils/src/msgpack_encoder/v1/mod.rs | 2 +- libdd-trace-utils/src/span/v1/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index 1963668688..7ded93af93 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -1348,7 +1348,7 @@ mod v1_payload_tests { let mut attrs = VecMap::new(); attrs.insert( bs("list"), - AttributeValue::List(thin_vec::thin_vec![ + AttributeValue::List(vec![ AttributeValue::String(bs("a")), AttributeValue::Bool(true), ]), diff --git a/libdd-trace-utils/src/span/v1/mod.rs b/libdd-trace-utils/src/span/v1/mod.rs index d1c6e5eeef..459cbc5438 100644 --- a/libdd-trace-utils/src/span/v1/mod.rs +++ b/libdd-trace-utils/src/span/v1/mod.rs @@ -42,7 +42,7 @@ pub enum AttributeValue { Bool(bool), Bytes(T::Bytes), KeyValue(VecMap>), - List(ThinVec>), + List(Vec>), } /// The generic representation of a V1 span. From 075657ee3d7382eafcf4d782f0752558bc0f4660 Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Thu, 11 Jun 2026 16:19:04 +0200 Subject: [PATCH 11/14] fix: last comment --- .../src/msgpack_encoder/v1/mod.rs | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index 7ded93af93..3ffa3d9135 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -339,6 +339,12 @@ fn encode_payload]>>( write_map_len(writer, map_len)?; + write_uint8(writer, trace_key::CHUNKS)?; + write_array_len(writer, traces.len() as u32)?; + for trace in traces { + encode_chunk(writer, trace.as_ref(), &mut table)?; + } + if !metadata.language.is_empty() { write_uint8(writer, trace_key::LANGUAGE_NAME)?; table.write_interned(writer, &metadata.language)?; @@ -391,12 +397,6 @@ fn encode_payload]>>( } } - write_uint8(writer, trace_key::CHUNKS)?; - write_array_len(writer, traces.len() as u32)?; - for trace in traces { - encode_chunk(writer, trace.as_ref(), &mut table)?; - } - Ok(()) } @@ -428,6 +428,12 @@ fn encode_chunk( write_uint8(writer, chunk_key::TRACE_ID)?; write_bin(writer, &attrs.trace_id.to_be_bytes())?; + write_uint8(writer, chunk_key::SPANS)?; + write_array_len(writer, spans.len() as u32)?; + for span in spans { + span_v04::encode_span(writer, span, table)?; + } + if let Some(origin) = attrs.origin { write_uint8(writer, chunk_key::ORIGIN)?; table.write_interned(writer, origin)?; @@ -443,12 +449,6 @@ fn encode_chunk( write_uint(writer, mechanism as u64)?; } - write_uint8(writer, chunk_key::SPANS)?; - write_array_len(writer, spans.len() as u32)?; - for span in spans { - span_v04::encode_span(writer, span, table)?; - } - Ok(()) } @@ -522,6 +522,12 @@ fn encode_payload_v1( write_map_len(writer, map_len)?; + write_uint8(writer, trace_key::CHUNKS)?; + write_array_len(writer, payload.chunks.len() as u32)?; + for chunk in &payload.chunks { + encode_chunk_v1(writer, chunk, &mut table)?; + } + if !payload.language_name.borrow().is_empty() { write_uint8(writer, trace_key::LANGUAGE_NAME)?; table.write_interned(writer, payload.language_name.borrow())?; @@ -562,12 +568,6 @@ fn encode_payload_v1( span_v1::encode_attributes_map(writer, &payload.attributes, &mut table)?; } - write_uint8(writer, trace_key::CHUNKS)?; - write_array_len(writer, payload.chunks.len() as u32)?; - for chunk in &payload.chunks { - encode_chunk_v1(writer, chunk, &mut table)?; - } - Ok(()) } @@ -593,6 +593,12 @@ fn encode_chunk_v1( write_uint8(writer, chunk_key::TRACE_ID)?; write_bin(writer, &chunk.trace_id)?; + write_uint8(writer, chunk_key::SPANS)?; + write_array_len(writer, chunk.spans.len() as u32)?; + for span in &chunk.spans { + span_v1::encode_span(writer, span, table)?; + } + if !origin.is_empty() { write_uint8(writer, chunk_key::ORIGIN)?; table.write_interned(writer, origin)?; @@ -618,12 +624,6 @@ fn encode_chunk_v1( rmp::encode::write_bool(writer, true).map_err(ValueWriteError::InvalidDataWrite)?; } - write_uint8(writer, chunk_key::SPANS)?; - write_array_len(writer, chunk.spans.len() as u32)?; - for span in &chunk.spans { - span_v1::encode_span(writer, span, table)?; - } - Ok(()) } From 4d7f045f994ede6583cdb2817fc1f934a74d4d0b Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Thu, 11 Jun 2026 17:06:21 +0200 Subject: [PATCH 12/14] fix: conflicts --- Cargo.lock | 1 + libdd-trace-utils/Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 10966b248b..3c17d048f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3449,6 +3449,7 @@ dependencies = [ "rmp", "rmp-serde", "rmpv", + "rustc-hash", "serde", "serde_json", "tempfile", diff --git a/libdd-trace-utils/Cargo.toml b/libdd-trace-utils/Cargo.toml index fcb7b4e40f..35d494ad48 100644 --- a/libdd-trace-utils/Cargo.toml +++ b/libdd-trace-utils/Cargo.toml @@ -45,6 +45,7 @@ libdd-tinybytes = { version = "1.1.1", path = "../libdd-tinybytes", features = [ "serialization", ] } indexmap = "2.11" +rustc-hash = "2" thin-vec = "0.2" # Compression feature From 25dbb4c14db5b8269612876b117ce00030bd027c Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Thu, 11 Jun 2026 17:34:16 +0200 Subject: [PATCH 13/14] fix: clippy --- libdd-trace-utils/tests/test_send_data.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libdd-trace-utils/tests/test_send_data.rs b/libdd-trace-utils/tests/test_send_data.rs index e9aa95fa40..8188f433ab 100644 --- a/libdd-trace-utils/tests/test_send_data.rs +++ b/libdd-trace-utils/tests/test_send_data.rs @@ -580,12 +580,15 @@ mod tracing_integration_tests { #[cfg_attr(miri, ignore)] #[tokio::test] async fn v04_and_v1_encoders_produce_equivalent_decoded_traces() { - use libdd_trace_utils::msgpack_encoder::v1::{to_vec, to_vec_from_payload_v1}; use libdd_trace_utils::span::v04::SpanBytes as V04SpanBytes; use libdd_trace_utils::span::v1::{ AttributeValue, SpanBytes as V1SpanBytes, SpanKind, TraceChunkBytes, TracerPayloadBytes, }; - use libdd_trace_utils::tracer_metadata::TracerMetadata; + use libdd_trace_utils::{ + msgpack_encoder::v1::{to_vec, to_vec_from_payload_v1}, + tracer_metadata::TracerMetadata, + }; + use std::collections::HashMap; let test_agent = DatadogTestAgent::new(None, None, &[]).await; let uri = test_agent.get_uri_for_endpoint("v1.0/traces", None).await; From b22e805433a256c4b06ef2db359298ee542bd62e Mon Sep 17 00:00:00 2001 From: Anais Raison Date: Thu, 11 Jun 2026 18:00:48 +0200 Subject: [PATCH 14/14] fix: clippy --- libdd-trace-utils/tests/test_send_data.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libdd-trace-utils/tests/test_send_data.rs b/libdd-trace-utils/tests/test_send_data.rs index 8188f433ab..67b87b0671 100644 --- a/libdd-trace-utils/tests/test_send_data.rs +++ b/libdd-trace-utils/tests/test_send_data.rs @@ -588,16 +588,15 @@ mod tracing_integration_tests { msgpack_encoder::v1::{to_vec, to_vec_from_payload_v1}, tracer_metadata::TracerMetadata, }; - use std::collections::HashMap; let test_agent = DatadogTestAgent::new(None, None, &[]).await; let uri = test_agent.get_uri_for_endpoint("v1.0/traces", None).await; // ── v0.4 input ───────────────────────────────────────────────────────────── - let mut meta_v04 = HashMap::new(); + let mut meta_v04 = VecMap::new(); meta_v04.insert(bs_v1("env"), bs_v1("test-env")); meta_v04.insert(bs_v1("http.method"), bs_v1("GET")); - let mut metrics_v04 = HashMap::new(); + let mut metrics_v04 = VecMap::new(); metrics_v04.insert(bs_v1("http.duration_ms"), 12.5_f64); let v04_traces: Vec> = vec![vec![V04SpanBytes { service: bs_v1("svc"),