From 477834f68adeae612780861952a738bf37d1979c Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 5 Jun 2026 10:05:31 -0400 Subject: [PATCH 1/6] test(profiling): add profiles dictionary benchmarks --- libdd-profiling/benches/main.rs | 7 +- .../benches/profiles_dictionary.rs | 84 +++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 libdd-profiling/benches/profiles_dictionary.rs diff --git a/libdd-profiling/benches/main.rs b/libdd-profiling/benches/main.rs index 3c29e7fef6..4561b539a8 100644 --- a/libdd-profiling/benches/main.rs +++ b/libdd-profiling/benches/main.rs @@ -5,5 +5,10 @@ use criterion::criterion_main; mod add_samples; mod interning_strings; +mod profiles_dictionary; -criterion_main!(interning_strings::benches, add_samples::benches); +criterion_main!( + interning_strings::benches, + add_samples::benches, + profiles_dictionary::benches +); diff --git a/libdd-profiling/benches/profiles_dictionary.rs b/libdd-profiling/benches/profiles_dictionary.rs new file mode 100644 index 0000000000..5e8ecfaa7d --- /dev/null +++ b/libdd-profiling/benches/profiles_dictionary.rs @@ -0,0 +1,84 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use criterion::{black_box, criterion_group, BatchSize, BenchmarkId, Criterion, Throughput}; +use libdd_profiling::profiles::datatypes::ProfilesDictionary; +use std::sync::Barrier; +use std::thread; +use std::time::Duration; + +const THREAD_COUNTS: [usize; 4] = [1, 2, 4, 16]; +const STRINGS_PER_THREAD: usize = 1024; + +fn make_strings(thread_count: usize) -> Vec> { + (0..thread_count) + .map(|thread_id| { + (0..STRINGS_PER_THREAD) + .map(|string_id| { + format!( + "/opt/datadog/profiler/thread-{thread_id}/module-{string_id:04}/function-{}::{}", + string_id % 97, + string_id.wrapping_mul(2_654_435_761usize) + ) + }) + .collect() + }) + .collect() +} + +fn insert_profile_strings(dict: &ProfilesDictionary, strings: &[String]) { + for string in strings { + black_box(dict.try_insert_str2(black_box(string.as_str())).unwrap()); + } +} + +fn insert_dictionary_strings_concurrently(strings: &[Vec]) -> ProfilesDictionary { + let dict = ProfilesDictionary::try_new().unwrap(); + + if let [strings] = strings { + insert_profile_strings(&dict, strings); + return dict; + } + + let barrier = Barrier::new(strings.len()); + thread::scope(|scope| { + for thread_strings in strings { + let dict = &dict; + let barrier = &barrier; + scope.spawn(move || { + barrier.wait(); + insert_profile_strings(dict, thread_strings); + }); + } + }); + + dict +} + +pub fn bench_profiles_dictionary(c: &mut Criterion) { + let mut group = c.benchmark_group("profiles_dictionary/unique_string_inserts"); + group.warm_up_time(Duration::from_secs(1)); + group.measurement_time(Duration::from_secs(5)); + group.sample_size(10); + + for thread_count in THREAD_COUNTS { + let strings = make_strings(thread_count); + let total_strings = thread_count * STRINGS_PER_THREAD; + group.throughput(Throughput::Elements(total_strings as u64)); + group.bench_with_input( + BenchmarkId::new("threads", thread_count), + &strings, + |b, strings| { + b.iter_batched( + || strings, + |strings| black_box(insert_dictionary_strings_concurrently(strings)), + BatchSize::LargeInput, + ); + }, + ); + } + + group.finish(); +} + +criterion_group!(benches, bench_profiles_dictionary); From 79a61526fed966cfbe01f1508b332d5420cab8c2 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 9 Jun 2026 16:32:52 -0400 Subject: [PATCH 2/6] Define consts and update comments --- libdd-profiling/benches/profiles_dictionary.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libdd-profiling/benches/profiles_dictionary.rs b/libdd-profiling/benches/profiles_dictionary.rs index 5e8ecfaa7d..63583b91fd 100644 --- a/libdd-profiling/benches/profiles_dictionary.rs +++ b/libdd-profiling/benches/profiles_dictionary.rs @@ -9,6 +9,11 @@ use std::time::Duration; const THREAD_COUNTS: [usize; 4] = [1, 2, 4, 16]; const STRINGS_PER_THREAD: usize = 1024; +// Bound one generated function-name component so the input has repeated +// function-like fragments while each full string stays unique. +const FUNCTION_NAME_VARIANTS: usize = 97; +// Knuth/Fibonacci multiplicative hash constant, used only to vary synthetic input. +const KNUTH_MULTIPLICATIVE_HASH: usize = 2_654_435_761; fn make_strings(thread_count: usize) -> Vec> { (0..thread_count) @@ -17,8 +22,8 @@ fn make_strings(thread_count: usize) -> Vec> { .map(|string_id| { format!( "/opt/datadog/profiler/thread-{thread_id}/module-{string_id:04}/function-{}::{}", - string_id % 97, - string_id.wrapping_mul(2_654_435_761usize) + string_id % FUNCTION_NAME_VARIANTS, + string_id.wrapping_mul(KNUTH_MULTIPLICATIVE_HASH) ) }) .collect() From 90514f9748caf4f37b97770fb4ec477a6be75dc9 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 9 Jun 2026 17:03:54 -0400 Subject: [PATCH 3/6] drop inner black_box --- libdd-profiling/benches/profiles_dictionary.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdd-profiling/benches/profiles_dictionary.rs b/libdd-profiling/benches/profiles_dictionary.rs index 63583b91fd..a4f2d115ec 100644 --- a/libdd-profiling/benches/profiles_dictionary.rs +++ b/libdd-profiling/benches/profiles_dictionary.rs @@ -33,7 +33,7 @@ fn make_strings(thread_count: usize) -> Vec> { fn insert_profile_strings(dict: &ProfilesDictionary, strings: &[String]) { for string in strings { - black_box(dict.try_insert_str2(black_box(string.as_str())).unwrap()); + black_box(dict.try_insert_str2(string.as_str()).unwrap()); } } From 8121b664ff3cee820a7518b1eff3335555fdfedd Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 9 Jun 2026 17:05:51 -0400 Subject: [PATCH 4/6] add comments --- libdd-profiling/benches/profiles_dictionary.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libdd-profiling/benches/profiles_dictionary.rs b/libdd-profiling/benches/profiles_dictionary.rs index a4f2d115ec..194a1e6dcb 100644 --- a/libdd-profiling/benches/profiles_dictionary.rs +++ b/libdd-profiling/benches/profiles_dictionary.rs @@ -67,6 +67,8 @@ pub fn bench_profiles_dictionary(c: &mut Criterion) { group.sample_size(10); for thread_count in THREAD_COUNTS { + // Precompute input outside the measured closure so the benchmark measures + // dictionary insertion rather than string formatting/allocation. let strings = make_strings(thread_count); let total_strings = thread_count * STRINGS_PER_THREAD; group.throughput(Throughput::Elements(total_strings as u64)); From b123964103659306d5ce31d836e8e0ac4ce990df Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 9 Jun 2026 17:09:29 -0400 Subject: [PATCH 5/6] impl varying length strings --- .../benches/profiles_dictionary.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/libdd-profiling/benches/profiles_dictionary.rs b/libdd-profiling/benches/profiles_dictionary.rs index 194a1e6dcb..75c78c11e0 100644 --- a/libdd-profiling/benches/profiles_dictionary.rs +++ b/libdd-profiling/benches/profiles_dictionary.rs @@ -12,6 +12,7 @@ const STRINGS_PER_THREAD: usize = 1024; // Bound one generated function-name component so the input has repeated // function-like fragments while each full string stays unique. const FUNCTION_NAME_VARIANTS: usize = 97; +const STRING_SHAPE_VARIANTS: usize = 4; // Knuth/Fibonacci multiplicative hash constant, used only to vary synthetic input. const KNUTH_MULTIPLICATIVE_HASH: usize = 2_654_435_761; @@ -20,11 +21,19 @@ fn make_strings(thread_count: usize) -> Vec> { .map(|thread_id| { (0..STRINGS_PER_THREAD) .map(|string_id| { - format!( - "/opt/datadog/profiler/thread-{thread_id}/module-{string_id:04}/function-{}::{}", - string_id % FUNCTION_NAME_VARIANTS, - string_id.wrapping_mul(KNUTH_MULTIPLICATIVE_HASH) - ) + let function_id = string_id % FUNCTION_NAME_VARIANTS; + let mixed_id = string_id.wrapping_mul(KNUTH_MULTIPLICATIVE_HASH); + + match string_id % STRING_SHAPE_VARIANTS { + 0 => format!("function_{function_id}::{mixed_id}"), + 1 => format!("/src/thread_{thread_id}/module_{function_id}/file_{string_id:04}.rs"), + 2 => { + format!("datadog::profiling::module_{function_id}::function_{mixed_id}") + } + _ => format!( + "/opt/datadog/profiler/thread-{thread_id}/module-{function_id}/src/file_{string_id:04}.rs::function_{function_id}::{mixed_id}", + ), + } }) .collect() }) From e4a91af692fb8f8d54102f01fc6346714de2e627 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 9 Jun 2026 17:13:42 -0400 Subject: [PATCH 6/6] add comment why the function partitions the strins --- libdd-profiling/benches/profiles_dictionary.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libdd-profiling/benches/profiles_dictionary.rs b/libdd-profiling/benches/profiles_dictionary.rs index 75c78c11e0..8de484d250 100644 --- a/libdd-profiling/benches/profiles_dictionary.rs +++ b/libdd-profiling/benches/profiles_dictionary.rs @@ -16,7 +16,9 @@ const STRING_SHAPE_VARIANTS: usize = 4; // Knuth/Fibonacci multiplicative hash constant, used only to vary synthetic input. const KNUTH_MULTIPLICATIVE_HASH: usize = 2_654_435_761; -fn make_strings(thread_count: usize) -> Vec> { +// The outer Vec partitions precomputed input by benchmark worker thread; each +// inner Vec is the set of strings inserted by one worker. +fn make_partitioned_strings(thread_count: usize) -> Vec> { (0..thread_count) .map(|thread_id| { (0..STRINGS_PER_THREAD) @@ -78,7 +80,7 @@ pub fn bench_profiles_dictionary(c: &mut Criterion) { for thread_count in THREAD_COUNTS { // Precompute input outside the measured closure so the benchmark measures // dictionary insertion rather than string formatting/allocation. - let strings = make_strings(thread_count); + let strings = make_partitioned_strings(thread_count); let total_strings = thread_count * STRINGS_PER_THREAD; group.throughput(Throughput::Elements(total_strings as u64)); group.bench_with_input(