Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions math/mathcore/src/TKDTreeBinning.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ void TKDTreeBinning::SetNBins(UInt_t bins) {
fDataBins = new TKDTreeID(fDataSize, fDim, fDataSize / (fNBins - remainingData)); // TKDTree input is data size, data dimension and the content size of bins ("bucket" size)
SetTreeData();
fDataBins->Build();
// The kd-tree determines the actual number of bins: every terminal node
// is a bin and there are GetNNodes() + 1 of them. When the requested bin
// count does not divide the data size evenly, the bucket size is rounded
// down and the tree ends up with more terminal nodes than the requested
// number of bins. We must therefore use the real number of terminal nodes.
fNBins = fDataBins->GetNNodes() + 1;
SetBinsEdges();
SetBinsContent();
} else {
Expand Down Expand Up @@ -238,12 +244,13 @@ void TKDTreeBinning::SetTreeData() {
}

void TKDTreeBinning::SetBinsContent() {
// Sets the bins' content
// Sets the bins' content from the actual number of points in each terminal
// node of the kd-tree. All terminal nodes hold fBucketSize points except
// possibly the last one, so query the tree directly instead of guessing.
fBinsContent.resize(fNBins);
UInt_t nTreeNodes = fDataBins->GetNNodes();
for (UInt_t i = 0; i < fNBins; ++i)
fBinsContent[i] = fDataBins->GetBucketSize();
if ( fDataSize % fNBins != 0 )
fBinsContent[fNBins - 1] = fDataSize % (fNBins-1);
fBinsContent[i] = fDataBins->GetNPointsNode(i + nTreeNodes);
}

void TKDTreeBinning::SetBinsEdges() {
Expand Down
99 changes: 86 additions & 13 deletions math/mathcore/test/testkdTreeBinning.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,13 @@ using std::cout;
using std::cerr;
using std::endl;

void testkdTreeBinning() {
// Returns the number of detected failures (0 on success) so that the caller can
// turn it into a non-zero process exit code: ROOT's Error() only prints, it does
// not by itself make the test fail under ctest.
int testkdTreeBinning()
{

int nfail = 0;

// -----------------------------------------------------------------------------------------------
// C r e a t e r a n d o m s a m p l e
Expand Down Expand Up @@ -83,14 +89,22 @@ void testkdTreeBinning() {
std::cout << "Bin with minimum density: " << ibinMin << " density = " << kdBins->GetBinDensity(ibinMin) << " content = " << kdBins->GetBinContent(ibinMin) << std::endl;
std::cout << "Bin with maximum density: " << ibinMax << " density = " << kdBins->GetBinDensity(ibinMax) << " content = " << kdBins->GetBinContent(ibinMax) << std::endl;

if (kdBins->GetBinContent(ibinMax) != DATASZ/NBINS)
if (kdBins->GetBinContent(ibinMax) != DATASZ / NBINS) {
Error("testkdTreeBinning","Wrong bin content");
++nfail;
}

// order bins by density
kdBins->SortBinsByDensity(true);

if (kdBins->GetBinMinDensity() != 0) Error("testkdTreeBinning","Wrong minimum bin after sorting");
if (kdBins->GetBinMaxDensity() != nbins-1) Error("testkdTreeBinning","Wrong maximum bin after sorting");
if (kdBins->GetBinMinDensity() != 0) {
Error("testkdTreeBinning", "Wrong minimum bin after sorting");
++nfail;
}
if (kdBins->GetBinMaxDensity() != nbins - 1) {
Error("testkdTreeBinning", "Wrong maximum bin after sorting");
++nfail;
}

if (showGraphics) {
new TCanvas();
Expand All @@ -104,7 +118,6 @@ void testkdTreeBinning() {
double point[2] = {0,0};
// double binCenter[2];
gRandom->SetSeed(0);
bool ok = true;
for (int itimes = 0; itimes < ntimes; itimes++) {

// generate a random point in 2D
Expand All @@ -126,12 +139,12 @@ void testkdTreeBinning() {
std::cout << " point y " << point[1] << " BIN CENTER is " << binCenter[1] << " min " << binMin[1] << " max " << binMax[1] << std::endl;
}

ok &= point[0] > binMin[0] && point[0] < binMax[0];
ok &= point[1] > binMin[1] && point[1] < binMax[1];
bool ok = point[0] > binMin[0] && point[0] < binMax[0] && point[1] > binMin[1] && point[1] < binMax[1];
if (!ok) {
Error ("testkdTreeBinning::FindBin"," Point is not in the right bin " );
std::cout << " point x " << point[0] << " BIN CENTER is " << binCenter[0] << " min " << binMin[0] << " max " << binMax[0] << std::endl;
std::cout << " point y " << point[1] << " BIN CENTER is " << binCenter[1] << " min " << binMin[1] << " max " << binMax[1] << std::endl;
++nfail;
}

if (itimes < 2 && showGraphics ) {
Expand All @@ -143,15 +156,74 @@ void testkdTreeBinning() {
}

delete [] binCenter;
}
else
} else {
Error("testkdTreeBinning::FindBin"," Bin %d is not existing ",ibin);
++nfail;
}
}

return nfail;
}

// Regression test for the case where the requested number of bins does not
// divide the data size evenly. In that situation the kd-tree builds more
// terminal nodes (bins) than naively expected, and FindBin() must never return
// an index outside [0, GetNBins()). See
// https://github.com/root-project/root/issues/10784 about
// TKDTreeBinning::FindBin returning non-existent bins. Returns the number of
// detected failures (0 on success) so that the caller can turn it into a
// non-zero process exit code: ROOT's Error() only prints, it does not by
// itself make the test fail under ctest.
int testkdTreeBinningFindBinRange()
{

int nfail = 0;

const UInt_t DATASZ = 100500; // deliberately NOT a multiple of NBINS
const UInt_t DATADIM = 5;
const UInt_t NBINS = 1000;

std::vector<Double_t> smp(DATASZ * DATADIM);
TRandom3 r;
r.SetSeed(1);
for (UInt_t i = 0; i < DATADIM; ++i)
for (UInt_t j = 0; j < DATASZ; ++j)
smp[DATASZ * i + j] = r.Uniform(-1., 1.);

TKDTreeBinning kdBins(DATASZ, DATADIM, smp, NBINS);

const UInt_t nbins = kdBins.GetNBins();

// The number of bins must match the number of terminal nodes of the kd-tree.
if ((int)nbins != kdBins.GetTree()->GetNNodes() + 1) {
Error("testkdTreeBinningFindBinRange", "GetNBins() (%u) != number of kd-tree terminal nodes (%d)", nbins,
kdBins.GetTree()->GetNNodes() + 1);
++nfail;
}

return;
// Every data point must be assigned to a valid bin.
std::vector<Double_t> point(DATADIM);
for (UInt_t j = 0; j < DATASZ; ++j) {
for (UInt_t i = 0; i < DATADIM; ++i)
point[i] = smp[DATASZ * i + j];
UInt_t bin = kdBins.FindBin(point.data());
if (bin >= nbins) {
Error("testkdTreeBinningFindBinRange", "FindBin returned out-of-range bin %u (NBins = %u)", bin, nbins);
++nfail;
break;
}
}

// The total bin content must add up to the data size.
Long64_t total = 0;
for (UInt_t i = 0; i < nbins; ++i)
total += kdBins.GetBinContent(i);
if (total != (Long64_t)DATASZ) {
Error("testkdTreeBinningFindBinRange", "Sum of bin contents (%lld) != data size (%u)", total, DATASZ);
++nfail;
}

return nfail;
}

int main(int argc, char **argv)
Expand Down Expand Up @@ -180,7 +252,9 @@ int main(int argc, char **argv)
if ( showGraphics )
theApp = new TApplication("App",&argc,argv);

testkdTreeBinning();
int nfail = testkdTreeBinning();

nfail += testkdTreeBinningFindBinRange();

if ( showGraphics )
{
Expand All @@ -189,7 +263,6 @@ int main(int argc, char **argv)
theApp = nullptr;
}

return 0;

return nfail;
}

Loading