From 92ef8ffbe5d17b538bacc4db181e1181df119dbc Mon Sep 17 00:00:00 2001 From: silverweed Date: Tue, 28 Apr 2026 11:44:23 +0200 Subject: [PATCH 1/6] [ntuple] Fix RFieldFundamental::GenerateColumns using the wrong repr idx --- .../inc/ROOT/RField/RFieldFundamental.hxx | 4 +-- tree/ntuple/test/ntuple_multi_column.cxx | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx b/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx index 9caf202bd0171..cc0033ea523ee 100644 --- a/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx +++ b/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx @@ -399,11 +399,11 @@ protected: fAvailableColumns.emplace_back(ROOT::Internal::RColumn::Create(onDiskTypes[0], 0, representationIndex)); if (onDiskTypes[0] == ROOT::ENTupleColumnType::kReal32Trunc) { const auto &fdesc = desc.GetFieldDescriptor(Base::GetOnDiskId()); - const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]); + const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[representationIndex]); column->SetBitsOnStorage(coldesc.GetBitsOnStorage()); } else if (onDiskTypes[0] == ROOT::ENTupleColumnType::kReal32Quant) { const auto &fdesc = desc.GetFieldDescriptor(Base::GetOnDiskId()); - const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]); + const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[representationIndex]); assert(coldesc.GetValueRange().has_value()); const auto [valMin, valMax] = *coldesc.GetValueRange(); column->SetBitsOnStorage(coldesc.GetBitsOnStorage()); diff --git a/tree/ntuple/test/ntuple_multi_column.cxx b/tree/ntuple/test/ntuple_multi_column.cxx index 7a4e6bb6eb4a4..4cc69ec5cfeff 100644 --- a/tree/ntuple/test/ntuple_multi_column.cxx +++ b/tree/ntuple/test/ntuple_multi_column.cxx @@ -377,6 +377,36 @@ TEST(RNTuple, MultiColumnRepresentationBulk) EXPECT_FLOAT_EQ(2.0, arr[0]); } +TEST(RNTuple, MultiColumnRepresentationVariableBitWidth) +{ + FileRaii fileGuard("test_ntuple_multi_column_representation_varbitwidth.root"); + + { + auto model = RNTupleModel::Create(); + auto fldPx = std::make_unique>("px"); + fldPx->SetTruncated(26); + fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal32}, {ROOT::ENTupleColumnType::kReal32Trunc}}); + model->AddField(std::move(fldPx)); + auto ptrPx = model->GetDefaultEntry().GetPtr("px"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + *ptrPx = 1.0; + writer->Fill(); + writer->CommitCluster(); + ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetConstField("px")), 1); + *ptrPx = 2.0; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + auto fldPx = reader->GetModel().GetDefaultEntry().GetPtr("px"); + + reader->LoadEntry(0); + EXPECT_FLOAT_EQ(1.0, *fldPx); + reader->LoadEntry(1); + EXPECT_FLOAT_EQ(2.0, *fldPx); +} + TEST(RNTuple, MultiColumnRepresentationDedup) { FileRaii fileGuard("test_ntuple_multi_column_representation_dedup.root"); From cf07e61e29c825994f7e72dd7d4e4598d0ae7ed5 Mon Sep 17 00:00:00 2001 From: silverweed Date: Fri, 17 Apr 2026 11:08:30 +0200 Subject: [PATCH 2/6] [ntuple] update merger test to make sure we test nRepetitions --- tree/ntuple/test/ntuple_merger.cxx | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tree/ntuple/test/ntuple_merger.cxx b/tree/ntuple/test/ntuple_merger.cxx index de27ead34d3f9..8e1279e3ed143 100644 --- a/tree/ntuple/test/ntuple_merger.cxx +++ b/tree/ntuple/test/ntuple_merger.cxx @@ -1015,12 +1015,13 @@ TEST(RNTupleMerger, MergeLateModelExtension) { auto model = RNTupleModel::Create(); auto fieldFoo = model->MakeField>("foo"); - auto fieldVfoo = model->MakeField>("vfoo"); + auto fieldVfoo = model->MakeField[3]>("vfoo"); auto fieldBar = model->MakeField("bar"); auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard1.GetPath(), RNTupleWriteOptions()); for (size_t i = 0; i < 10; ++i) { fieldFoo->insert(std::make_pair(std::to_string(i), i * 123)); - *fieldVfoo = {(int)i * 123}; + fieldVfoo[0] = {(int)i * 123}; + fieldVfoo[2] = {(int)i * 345}; *fieldBar = i * 321; ntuple->Fill(); } @@ -1031,14 +1032,15 @@ TEST(RNTupleMerger, MergeLateModelExtension) auto model = RNTupleModel::Create(); auto fieldBaz = model->MakeField("baz"); auto fieldFoo = model->MakeField>("foo"); - auto fieldVfoo = model->MakeField>("vfoo"); + auto fieldVfoo = model->MakeField[3]>("vfoo"); auto wopts = RNTupleWriteOptions(); wopts.SetCompression(0); auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard2.GetPath(), wopts); for (size_t i = 0; i < 10; ++i) { *fieldBaz = i * 567; fieldFoo->insert(std::make_pair(std::to_string(i), i * 765)); - *fieldVfoo = {(int)i * 765}; + fieldVfoo[0] = {(int)i * 765}; + fieldVfoo[2] = {(int)i * 987}; ntuple->Fill(); } } @@ -1072,21 +1074,25 @@ TEST(RNTupleMerger, MergeLateModelExtension) auto ntuple = RNTupleReader::Open("ntuple", fileGuard3.GetPath()); EXPECT_EQ(ntuple->GetNEntries(), 20); auto foo = ntuple->GetModel().GetDefaultEntry().GetPtr>("foo"); - auto vfoo = ntuple->GetModel().GetDefaultEntry().GetPtr>("vfoo"); + auto vfoo = ntuple->GetModel().GetDefaultEntry().GetPtr[3]>("vfoo"); auto bar = ntuple->GetModel().GetDefaultEntry().GetPtr("bar"); auto baz = ntuple->GetModel().GetDefaultEntry().GetPtr("baz"); for (int i = 0; i < 10; ++i) { ntuple->LoadEntry(i); ASSERT_EQ((*foo)[std::to_string(i)], i * 123); - ASSERT_EQ((*vfoo)[0], i * 123); + ASSERT_EQ(vfoo[0][0], i * 123); + ASSERT_EQ(vfoo[2][0], i * 345); + ASSERT_TRUE(vfoo[1].empty()); ASSERT_EQ(*bar, i * 321); ASSERT_EQ(*baz, 0); } for (int i = 10; i < 20; ++i) { ntuple->LoadEntry(i); ASSERT_EQ((*foo)[std::to_string(i - 10)], (i - 10) * 765); - ASSERT_EQ((*vfoo)[0], (i - 10) * 765); + ASSERT_EQ(vfoo[0][0], (i - 10) * 765); + ASSERT_EQ(vfoo[2][0], (i - 10) * 987); + ASSERT_TRUE(vfoo[1].empty()); ASSERT_EQ(*bar, 0); ASSERT_EQ(*baz, (i - 10) * 567); } From 0fbbf5ca540768180acff7e68382fe341fa73c47 Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 22 Apr 2026 14:47:11 +0200 Subject: [PATCH 3/6] [ntuple] Clarify a bit RFieldBase::EntryToColumnElementIndex Also fix the type of result --- tree/ntuple/inc/ROOT/RFieldBase.hxx | 7 ++++--- tree/ntuple/src/RFieldBase.cxx | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RFieldBase.hxx b/tree/ntuple/inc/ROOT/RFieldBase.hxx index 6e691a9f98d95..c5ba2a98dba08 100644 --- a/tree/ntuple/inc/ROOT/RFieldBase.hxx +++ b/tree/ntuple/inc/ROOT/RFieldBase.hxx @@ -260,14 +260,15 @@ private: func(target); } - /// Translate an entry index to a column element index of the principal column and vice versa. These functions - /// take into account the role and number of repetitions on each level of the field hierarchy as follows: + /// Translate an entry index to a column element index of the principal column. This function + /// takes into account the role and number of repetitions on each level of the field hierarchy as follows: /// - Top level fields: element index == entry index /// - Record fields propagate their principal column index to the principal columns of direct descendant fields /// - Collection and variant fields set the principal column index of their children to 0 /// /// The column element index also depends on the number of repetitions of each field in the hierarchy, e.g., given a - /// field with type `std::array, 2>`, this function returns 8 for the innermost field. + /// field with type `std::array, 2>`, this function called with `globalIndex == 1` + /// returns 8 for the innermost field. ROOT::NTupleSize_t EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const; /// Flushes data from active columns diff --git a/tree/ntuple/src/RFieldBase.cxx b/tree/ntuple/src/RFieldBase.cxx index e185389f1190a..0e833729882ab 100644 --- a/tree/ntuple/src/RFieldBase.cxx +++ b/tree/ntuple/src/RFieldBase.cxx @@ -683,14 +683,14 @@ void ROOT::RFieldBase::Attach(std::unique_ptr child, std::stri ROOT::NTupleSize_t ROOT::RFieldBase::EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const { - std::size_t result = globalIndex; + ROOT::NTupleSize_t result = globalIndex; for (auto f = this; f != nullptr; f = f->GetParent()) { auto parent = f->GetParent(); if (parent && (parent->GetStructure() == ROOT::ENTupleStructure::kCollection || parent->GetStructure() == ROOT::ENTupleStructure::kVariant)) { return 0U; } - result *= std::max(f->GetNRepetitions(), std::size_t{1U}); + result *= std::max(f->GetNRepetitions(), ROOT::NTupleSize_t{1U}); } return result; } From b34d344b429d19465b0ba3cb5aed492b93175708 Mon Sep 17 00:00:00 2001 From: silverweed Date: Mon, 4 May 2026 14:45:20 +0200 Subject: [PATCH 4/6] [ntuple] Introduce feature flag 0 (nested deferred columns) In code, this feature is represented by an update in the logic of RClusterDescriptorBuilder::AddExtendedColumnRanges(), which now handles properly the case where a column is added in a later cluster and therefore cannot rely on the (NEntries * NRepetitions) calculation, so it instead copies its FirstElementIndex and NElements from the 0th representation (which is guaranteed to have valid numbers for them). --- tree/ntuple/doc/BinaryFormatSpecification.md | 9 +++++++-- tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx | 6 ++++++ tree/ntuple/src/RNTupleDescriptor.cxx | 17 +++++++++++++++-- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/tree/ntuple/doc/BinaryFormatSpecification.md b/tree/ntuple/doc/BinaryFormatSpecification.md index 2a87e514a8752..a04f1520606f4 100644 --- a/tree/ntuple/doc/BinaryFormatSpecification.md +++ b/tree/ntuple/doc/BinaryFormatSpecification.md @@ -1,4 +1,4 @@ -# RNTuple Binary Format Specification 1.0.2.0 +# RNTuple Binary Format Specification 1.1.0.0 ## Versioning Notes @@ -167,7 +167,12 @@ That means that readers need to continue reading feature flags as long as their Readers should gracefully abort reading when they encounter unknown bits set. -At the moment, there are no feature flag bits defined. +Here is the list of all currently-defined feature flags. Note that the flag name is only for informational purposes +and is not normative. + +| Flag Bit | Introduced in | Name | Meaning | +|----------|---------------|-------------------------|----------------------------------------------| +| 0 | 1.1.0.0 | Nested Deferred Columns | Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended
(i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field
backed by columns with different encoding, e.g. a `vector` whose elements are represented by SplitReal32
in the first ntuple and by Real32 in the second. | ## Frames diff --git a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx index 6a1c35ea264ac..6bdc98ebce381 100644 --- a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx @@ -768,6 +768,12 @@ public: /// All known feature flags. /// Note that the flag values represent the bit _index_, not the already-bitshifted integer. enum EFeatureFlags { + /// Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended + /// (i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field + /// backed by columns with different encoding, e.g. a vector whose elements are represented by SplitReal32 + /// in the first ntuple and by Real32 in the second. + /// Added in version 1.1.0.0 of the binary format. + kFeatureFlag_NestedDeferredColumns = 0, // Insert new feature flags here, with contiguous values. If at any point a "hole" appears in the valid feature // flags values, the check in RNTupleSerialize must be updated. diff --git a/tree/ntuple/src/RNTupleDescriptor.cxx b/tree/ntuple/src/RNTupleDescriptor.cxx index b1e8407b86b68..2653460526ff6 100644 --- a/tree/ntuple/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/src/RNTupleDescriptor.cxx @@ -931,8 +931,21 @@ ROOT::Internal::RClusterDescriptorBuilder::AddExtendedColumnRanges(const RNTuple // `ROOT::RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the // field zero excluding subfields of collection and variant fields. if (c.IsDeferredColumn()) { - columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions); - columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions); + if (c.GetRepresentationIndex() == 0) { + columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions); + columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions); + } else { + // Deferred representations which are not the first cannot count on the number of elements being + // equal to Entries * nRepetitions because they might have been added in a later cluster. But they + // can rely on the first representation having the correct FirstElement/NElements (by definition + // the first representation cannot be an "extended" one), therefore they can just copy the value + // from it. + const auto &field = desc.GetFieldDescriptor(fieldId); + const auto firstReprColumnId = field.GetLogicalColumnIds()[c.GetIndex()]; + const auto &firstReprColumnRange = fCluster.fColumnRanges[firstReprColumnId]; + columnRange.SetFirstElementIndex(firstReprColumnRange.GetFirstElementIndex()); + columnRange.SetNElements(firstReprColumnRange.GetNElements()); + } if (!columnRange.IsSuppressed()) { auto &pageRange = fCluster.fPageRanges[physicalId]; pageRange.fPhysicalColumnId = physicalId; From 3ebd5e0b368de0a73a3a9ceb48d47fb793f2d679 Mon Sep 17 00:00:00 2001 From: silverweed Date: Thu, 18 Jun 2026 10:25:33 +0200 Subject: [PATCH 5/6] [ntuple] patch up column ids in the header extension when shifting --- tree/ntuple/src/RNTupleDescriptor.cxx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tree/ntuple/src/RNTupleDescriptor.cxx b/tree/ntuple/src/RNTupleDescriptor.cxx index 2653460526ff6..145dee51c22b3 100644 --- a/tree/ntuple/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/src/RNTupleDescriptor.cxx @@ -1363,6 +1363,14 @@ void ROOT::Internal::RNTupleDescriptorBuilder::ShiftAliasColumns(std::uint32_t o R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0); fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c)); } + + // Patch up column ids in the header extension + if (auto &xHeader = fDescriptor.fHeaderExtension) { + for (auto &columnId : xHeader->fExtendedColumnRepresentations) { + if (columnId >= fDescriptor.GetNPhysicalColumns()) + columnId += offset; + } + } } ROOT::RResult ROOT::Internal::RNTupleDescriptorBuilder::AddCluster(RClusterDescriptor &&clusterDesc) From 46dbe4c8488d62cf97344bb147014b145b28f875 Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 17 Jun 2026 17:02:01 +0200 Subject: [PATCH 6/6] [ntuple] Allow adding duplicate column representations This is necessary to support multiple representations that use the same column type but different metadata (e.g. different bit width on Real32Trunc columns) --- tree/ntuple/src/RFieldBase.cxx | 5 +---- tree/ntuple/test/ntuple_multi_column.cxx | 9 --------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/tree/ntuple/src/RFieldBase.cxx b/tree/ntuple/src/RFieldBase.cxx index 0e833729882ab..b7efc3a527b01 100644 --- a/tree/ntuple/src/RFieldBase.cxx +++ b/tree/ntuple/src/RFieldBase.cxx @@ -850,10 +850,7 @@ void ROOT::RFieldBase::SetColumnRepresentatives(const RColumnRepresentations::Se if (itRepresentative == std::end(validTypes)) throw RException(R__FAIL("invalid column representative")); - // don't add a duplicate representation - if (std::find_if(fColumnRepresentatives.begin(), fColumnRepresentatives.end(), - [&r](const auto &rep) { return r == rep.get(); }) == fColumnRepresentatives.end()) - fColumnRepresentatives.emplace_back(*itRepresentative); + fColumnRepresentatives.emplace_back(*itRepresentative); } } diff --git a/tree/ntuple/test/ntuple_multi_column.cxx b/tree/ntuple/test/ntuple_multi_column.cxx index 4cc69ec5cfeff..963c5fabd9277 100644 --- a/tree/ntuple/test/ntuple_multi_column.cxx +++ b/tree/ntuple/test/ntuple_multi_column.cxx @@ -406,12 +406,3 @@ TEST(RNTuple, MultiColumnRepresentationVariableBitWidth) reader->LoadEntry(1); EXPECT_FLOAT_EQ(2.0, *fldPx); } - -TEST(RNTuple, MultiColumnRepresentationDedup) -{ - FileRaii fileGuard("test_ntuple_multi_column_representation_dedup.root"); - - auto fldPx = RFieldBase::Create("px", "float").Unwrap(); - fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal16}, {ROOT::ENTupleColumnType::kReal16}}); - EXPECT_EQ(fldPx->GetColumnRepresentatives().size(), 1); -}