From de740013a9f25a874cc223ca869ecaa223df2873 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Thu, 18 Jun 2026 08:16:00 +0200 Subject: [PATCH 1/2] [mathcore] Fix failure detection of testkdTreeBinning The test had no failure return code, so errors would go undetected. --- math/mathcore/test/testkdTreeBinning.cxx | 42 +++++++++++++++--------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/math/mathcore/test/testkdTreeBinning.cxx b/math/mathcore/test/testkdTreeBinning.cxx index ba763375a6bb4..0f202ef401d50 100644 --- a/math/mathcore/test/testkdTreeBinning.cxx +++ b/math/mathcore/test/testkdTreeBinning.cxx @@ -28,7 +28,13 @@ using std::cout; using std::cerr; using std::endl; -void testkdTreeBinning() { +// Returns the number of detected failures (0 on success) so that the caller can +// turn it into a non-zero process exit code: ROOT's Error() only prints, it does +// not by itself make the test fail under ctest. +int testkdTreeBinning() +{ + + int nfail = 0; // ----------------------------------------------------------------------------------------------- // C r e a t e r a n d o m s a m p l e @@ -83,14 +89,22 @@ void testkdTreeBinning() { std::cout << "Bin with minimum density: " << ibinMin << " density = " << kdBins->GetBinDensity(ibinMin) << " content = " << kdBins->GetBinContent(ibinMin) << std::endl; std::cout << "Bin with maximum density: " << ibinMax << " density = " << kdBins->GetBinDensity(ibinMax) << " content = " << kdBins->GetBinContent(ibinMax) << std::endl; - if (kdBins->GetBinContent(ibinMax) != DATASZ/NBINS) + if (kdBins->GetBinContent(ibinMax) != DATASZ / NBINS) { Error("testkdTreeBinning","Wrong bin content"); + ++nfail; + } // order bins by density kdBins->SortBinsByDensity(true); - if (kdBins->GetBinMinDensity() != 0) Error("testkdTreeBinning","Wrong minimum bin after sorting"); - if (kdBins->GetBinMaxDensity() != nbins-1) Error("testkdTreeBinning","Wrong maximum bin after sorting"); + if (kdBins->GetBinMinDensity() != 0) { + Error("testkdTreeBinning", "Wrong minimum bin after sorting"); + ++nfail; + } + if (kdBins->GetBinMaxDensity() != nbins - 1) { + Error("testkdTreeBinning", "Wrong maximum bin after sorting"); + ++nfail; + } if (showGraphics) { new TCanvas(); @@ -104,7 +118,6 @@ void testkdTreeBinning() { double point[2] = {0,0}; // double binCenter[2]; gRandom->SetSeed(0); - bool ok = true; for (int itimes = 0; itimes < ntimes; itimes++) { // generate a random point in 2D @@ -126,12 +139,12 @@ void testkdTreeBinning() { std::cout << " point y " << point[1] << " BIN CENTER is " << binCenter[1] << " min " << binMin[1] << " max " << binMax[1] << std::endl; } - ok &= point[0] > binMin[0] && point[0] < binMax[0]; - ok &= point[1] > binMin[1] && point[1] < binMax[1]; + bool ok = point[0] > binMin[0] && point[0] < binMax[0] && point[1] > binMin[1] && point[1] < binMax[1]; if (!ok) { Error ("testkdTreeBinning::FindBin"," Point is not in the right bin " ); std::cout << " point x " << point[0] << " BIN CENTER is " << binCenter[0] << " min " << binMin[0] << " max " << binMax[0] << std::endl; std::cout << " point y " << point[1] << " BIN CENTER is " << binCenter[1] << " min " << binMin[1] << " max " << binMax[1] << std::endl; + ++nfail; } if (itimes < 2 && showGraphics ) { @@ -143,15 +156,13 @@ void testkdTreeBinning() { } delete [] binCenter; - } - else + } else { Error("testkdTreeBinning::FindBin"," Bin %d is not existing ",ibin); - - + ++nfail; + } } - return; - + return nfail; } int main(int argc, char **argv) @@ -180,7 +191,7 @@ int main(int argc, char **argv) if ( showGraphics ) theApp = new TApplication("App",&argc,argv); - testkdTreeBinning(); + int nfail = testkdTreeBinning(); if ( showGraphics ) { @@ -189,7 +200,6 @@ int main(int argc, char **argv) theApp = nullptr; } - return 0; - + return nfail; } From 1c6eafcb2489ad868acfc1d8798a0f292d8ad102 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Thu, 18 Jun 2026 08:11:05 +0200 Subject: [PATCH 2/2] [mathcore] Fix TKDTreeBinning::FindBin returning out-of-range bins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TKDTreeBinning computed fNBins independently from the kd-tree it builds. SetNBins picks a bucket size of fDataSize / nBins (integer division), but the underlying TKDTree creates ceil(fNPoints / bucketSize) terminal nodes. When the requested number of bins does not divide the data size evenly, the bucket size is rounded down and the tree ends up with more terminal nodes than fNBins. Since FindBin returns a terminal-node index, it could return a value >= GetNBins(), pointing at a bin with no edges and no content: ```c++ binning.GetNBins() # 1001 binning.FindBin([1.2, 0.5, 0.5, 0.5, 0.5]) # 1004 binning.GetBinContent(1004) # RuntimeWarning: No such bin ``` Set fNBins to the actual number of terminal nodes (GetNNodes() + 1) after building the tree, so FindBin always returns an index within [0, GetNBins()) and every bin has valid edges and content. This also self-corrects existing objects on read-back, as the Streamer rebuilds the tree via SetNBins. Also fix SetBinsContent to read the per-node point counts directly from the tree (GetNPointsNode) instead of an incorrect last-bin heuristic, so bin contents are exact and sum to the data size. Add a regression test covering the non-evenly-dividing case. Closes #10786. 🤖 Done with the help of AI. --- math/mathcore/src/TKDTreeBinning.cxx | 15 ++++-- math/mathcore/test/testkdTreeBinning.cxx | 63 ++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/math/mathcore/src/TKDTreeBinning.cxx b/math/mathcore/src/TKDTreeBinning.cxx index 055add4c6fead..c7407b25fb0a3 100644 --- a/math/mathcore/src/TKDTreeBinning.cxx +++ b/math/mathcore/src/TKDTreeBinning.cxx @@ -136,6 +136,12 @@ void TKDTreeBinning::SetNBins(UInt_t bins) { fDataBins = new TKDTreeID(fDataSize, fDim, fDataSize / (fNBins - remainingData)); // TKDTree input is data size, data dimension and the content size of bins ("bucket" size) SetTreeData(); fDataBins->Build(); + // The kd-tree determines the actual number of bins: every terminal node + // is a bin and there are GetNNodes() + 1 of them. When the requested bin + // count does not divide the data size evenly, the bucket size is rounded + // down and the tree ends up with more terminal nodes than the requested + // number of bins. We must therefore use the real number of terminal nodes. + fNBins = fDataBins->GetNNodes() + 1; SetBinsEdges(); SetBinsContent(); } else { @@ -238,12 +244,13 @@ void TKDTreeBinning::SetTreeData() { } void TKDTreeBinning::SetBinsContent() { - // Sets the bins' content + // Sets the bins' content from the actual number of points in each terminal + // node of the kd-tree. All terminal nodes hold fBucketSize points except + // possibly the last one, so query the tree directly instead of guessing. fBinsContent.resize(fNBins); + UInt_t nTreeNodes = fDataBins->GetNNodes(); for (UInt_t i = 0; i < fNBins; ++i) - fBinsContent[i] = fDataBins->GetBucketSize(); - if ( fDataSize % fNBins != 0 ) - fBinsContent[fNBins - 1] = fDataSize % (fNBins-1); + fBinsContent[i] = fDataBins->GetNPointsNode(i + nTreeNodes); } void TKDTreeBinning::SetBinsEdges() { diff --git a/math/mathcore/test/testkdTreeBinning.cxx b/math/mathcore/test/testkdTreeBinning.cxx index 0f202ef401d50..e4eb67c2bf73a 100644 --- a/math/mathcore/test/testkdTreeBinning.cxx +++ b/math/mathcore/test/testkdTreeBinning.cxx @@ -165,6 +165,67 @@ int testkdTreeBinning() return nfail; } +// Regression test for the case where the requested number of bins does not +// divide the data size evenly. In that situation the kd-tree builds more +// terminal nodes (bins) than naively expected, and FindBin() must never return +// an index outside [0, GetNBins()). See +// https://github.com/root-project/root/issues/10784 about +// TKDTreeBinning::FindBin returning non-existent bins. Returns the number of +// detected failures (0 on success) so that the caller can turn it into a +// non-zero process exit code: ROOT's Error() only prints, it does not by +// itself make the test fail under ctest. +int testkdTreeBinningFindBinRange() +{ + + int nfail = 0; + + const UInt_t DATASZ = 100500; // deliberately NOT a multiple of NBINS + const UInt_t DATADIM = 5; + const UInt_t NBINS = 1000; + + std::vector smp(DATASZ * DATADIM); + TRandom3 r; + r.SetSeed(1); + for (UInt_t i = 0; i < DATADIM; ++i) + for (UInt_t j = 0; j < DATASZ; ++j) + smp[DATASZ * i + j] = r.Uniform(-1., 1.); + + TKDTreeBinning kdBins(DATASZ, DATADIM, smp, NBINS); + + const UInt_t nbins = kdBins.GetNBins(); + + // The number of bins must match the number of terminal nodes of the kd-tree. + if ((int)nbins != kdBins.GetTree()->GetNNodes() + 1) { + Error("testkdTreeBinningFindBinRange", "GetNBins() (%u) != number of kd-tree terminal nodes (%d)", nbins, + kdBins.GetTree()->GetNNodes() + 1); + ++nfail; + } + + // Every data point must be assigned to a valid bin. + std::vector point(DATADIM); + for (UInt_t j = 0; j < DATASZ; ++j) { + for (UInt_t i = 0; i < DATADIM; ++i) + point[i] = smp[DATASZ * i + j]; + UInt_t bin = kdBins.FindBin(point.data()); + if (bin >= nbins) { + Error("testkdTreeBinningFindBinRange", "FindBin returned out-of-range bin %u (NBins = %u)", bin, nbins); + ++nfail; + break; + } + } + + // The total bin content must add up to the data size. + Long64_t total = 0; + for (UInt_t i = 0; i < nbins; ++i) + total += kdBins.GetBinContent(i); + if (total != (Long64_t)DATASZ) { + Error("testkdTreeBinningFindBinRange", "Sum of bin contents (%lld) != data size (%u)", total, DATASZ); + ++nfail; + } + + return nfail; +} + int main(int argc, char **argv) { // Parse command line arguments @@ -193,6 +254,8 @@ int main(int argc, char **argv) int nfail = testkdTreeBinning(); + nfail += testkdTreeBinningFindBinRange(); + if ( showGraphics ) { theApp->Run();