diff --git a/tree/ntuple/doc/BinaryFormatSpecification.md b/tree/ntuple/doc/BinaryFormatSpecification.md
index 2a87e514a8752..a04f1520606f4 100644
--- a/tree/ntuple/doc/BinaryFormatSpecification.md
+++ b/tree/ntuple/doc/BinaryFormatSpecification.md
@@ -1,4 +1,4 @@
-# RNTuple Binary Format Specification 1.0.2.0
+# RNTuple Binary Format Specification 1.1.0.0
## Versioning Notes
@@ -167,7 +167,12 @@ That means that readers need to continue reading feature flags as long as their
Readers should gracefully abort reading when they encounter unknown bits set.
-At the moment, there are no feature flag bits defined.
+Here is the list of all currently-defined feature flags. Note that the flag name is only for informational purposes
+and is not normative.
+
+| Flag Bit | Introduced in | Name | Meaning |
+|----------|---------------|-------------------------|----------------------------------------------|
+| 0 | 1.1.0.0 | Nested Deferred Columns | Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended
(i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field
backed by columns with different encoding, e.g. a `vector` whose elements are represented by SplitReal32
in the first ntuple and by Real32 in the second. |
## Frames
diff --git a/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx b/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx
index 9caf202bd0171..cc0033ea523ee 100644
--- a/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx
+++ b/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx
@@ -399,11 +399,11 @@ protected:
fAvailableColumns.emplace_back(ROOT::Internal::RColumn::Create(onDiskTypes[0], 0, representationIndex));
if (onDiskTypes[0] == ROOT::ENTupleColumnType::kReal32Trunc) {
const auto &fdesc = desc.GetFieldDescriptor(Base::GetOnDiskId());
- const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
+ const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[representationIndex]);
column->SetBitsOnStorage(coldesc.GetBitsOnStorage());
} else if (onDiskTypes[0] == ROOT::ENTupleColumnType::kReal32Quant) {
const auto &fdesc = desc.GetFieldDescriptor(Base::GetOnDiskId());
- const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
+ const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[representationIndex]);
assert(coldesc.GetValueRange().has_value());
const auto [valMin, valMax] = *coldesc.GetValueRange();
column->SetBitsOnStorage(coldesc.GetBitsOnStorage());
diff --git a/tree/ntuple/inc/ROOT/RFieldBase.hxx b/tree/ntuple/inc/ROOT/RFieldBase.hxx
index 6e691a9f98d95..c5ba2a98dba08 100644
--- a/tree/ntuple/inc/ROOT/RFieldBase.hxx
+++ b/tree/ntuple/inc/ROOT/RFieldBase.hxx
@@ -260,14 +260,15 @@ private:
func(target);
}
- /// Translate an entry index to a column element index of the principal column and vice versa. These functions
- /// take into account the role and number of repetitions on each level of the field hierarchy as follows:
+ /// Translate an entry index to a column element index of the principal column. This function
+ /// takes into account the role and number of repetitions on each level of the field hierarchy as follows:
/// - Top level fields: element index == entry index
/// - Record fields propagate their principal column index to the principal columns of direct descendant fields
/// - Collection and variant fields set the principal column index of their children to 0
///
/// The column element index also depends on the number of repetitions of each field in the hierarchy, e.g., given a
- /// field with type `std::array, 2>`, this function returns 8 for the innermost field.
+ /// field with type `std::array, 2>`, this function called with `globalIndex == 1`
+ /// returns 8 for the innermost field.
ROOT::NTupleSize_t EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const;
/// Flushes data from active columns
diff --git a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx
index 6a1c35ea264ac..6bdc98ebce381 100644
--- a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx
+++ b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx
@@ -768,6 +768,12 @@ public:
/// All known feature flags.
/// Note that the flag values represent the bit _index_, not the already-bitshifted integer.
enum EFeatureFlags {
+ /// Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended
+ /// (i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field
+ /// backed by columns with different encoding, e.g. a vector whose elements are represented by SplitReal32
+ /// in the first ntuple and by Real32 in the second.
+ /// Added in version 1.1.0.0 of the binary format.
+ kFeatureFlag_NestedDeferredColumns = 0,
// Insert new feature flags here, with contiguous values. If at any point a "hole" appears in the valid feature
// flags values, the check in RNTupleSerialize must be updated.
diff --git a/tree/ntuple/src/RFieldBase.cxx b/tree/ntuple/src/RFieldBase.cxx
index e185389f1190a..b7efc3a527b01 100644
--- a/tree/ntuple/src/RFieldBase.cxx
+++ b/tree/ntuple/src/RFieldBase.cxx
@@ -683,14 +683,14 @@ void ROOT::RFieldBase::Attach(std::unique_ptr child, std::stri
ROOT::NTupleSize_t ROOT::RFieldBase::EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const
{
- std::size_t result = globalIndex;
+ ROOT::NTupleSize_t result = globalIndex;
for (auto f = this; f != nullptr; f = f->GetParent()) {
auto parent = f->GetParent();
if (parent && (parent->GetStructure() == ROOT::ENTupleStructure::kCollection ||
parent->GetStructure() == ROOT::ENTupleStructure::kVariant)) {
return 0U;
}
- result *= std::max(f->GetNRepetitions(), std::size_t{1U});
+ result *= std::max(f->GetNRepetitions(), ROOT::NTupleSize_t{1U});
}
return result;
}
@@ -850,10 +850,7 @@ void ROOT::RFieldBase::SetColumnRepresentatives(const RColumnRepresentations::Se
if (itRepresentative == std::end(validTypes))
throw RException(R__FAIL("invalid column representative"));
- // don't add a duplicate representation
- if (std::find_if(fColumnRepresentatives.begin(), fColumnRepresentatives.end(),
- [&r](const auto &rep) { return r == rep.get(); }) == fColumnRepresentatives.end())
- fColumnRepresentatives.emplace_back(*itRepresentative);
+ fColumnRepresentatives.emplace_back(*itRepresentative);
}
}
diff --git a/tree/ntuple/src/RNTupleDescriptor.cxx b/tree/ntuple/src/RNTupleDescriptor.cxx
index b1e8407b86b68..145dee51c22b3 100644
--- a/tree/ntuple/src/RNTupleDescriptor.cxx
+++ b/tree/ntuple/src/RNTupleDescriptor.cxx
@@ -931,8 +931,21 @@ ROOT::Internal::RClusterDescriptorBuilder::AddExtendedColumnRanges(const RNTuple
// `ROOT::RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the
// field zero excluding subfields of collection and variant fields.
if (c.IsDeferredColumn()) {
- columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions);
- columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions);
+ if (c.GetRepresentationIndex() == 0) {
+ columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions);
+ columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions);
+ } else {
+ // Deferred representations which are not the first cannot count on the number of elements being
+ // equal to Entries * nRepetitions because they might have been added in a later cluster. But they
+ // can rely on the first representation having the correct FirstElement/NElements (by definition
+ // the first representation cannot be an "extended" one), therefore they can just copy the value
+ // from it.
+ const auto &field = desc.GetFieldDescriptor(fieldId);
+ const auto firstReprColumnId = field.GetLogicalColumnIds()[c.GetIndex()];
+ const auto &firstReprColumnRange = fCluster.fColumnRanges[firstReprColumnId];
+ columnRange.SetFirstElementIndex(firstReprColumnRange.GetFirstElementIndex());
+ columnRange.SetNElements(firstReprColumnRange.GetNElements());
+ }
if (!columnRange.IsSuppressed()) {
auto &pageRange = fCluster.fPageRanges[physicalId];
pageRange.fPhysicalColumnId = physicalId;
@@ -1350,6 +1363,14 @@ void ROOT::Internal::RNTupleDescriptorBuilder::ShiftAliasColumns(std::uint32_t o
R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0);
fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c));
}
+
+ // Patch up column ids in the header extension
+ if (auto &xHeader = fDescriptor.fHeaderExtension) {
+ for (auto &columnId : xHeader->fExtendedColumnRepresentations) {
+ if (columnId >= fDescriptor.GetNPhysicalColumns())
+ columnId += offset;
+ }
+ }
}
ROOT::RResult ROOT::Internal::RNTupleDescriptorBuilder::AddCluster(RClusterDescriptor &&clusterDesc)
diff --git a/tree/ntuple/test/ntuple_merger.cxx b/tree/ntuple/test/ntuple_merger.cxx
index de27ead34d3f9..8e1279e3ed143 100644
--- a/tree/ntuple/test/ntuple_merger.cxx
+++ b/tree/ntuple/test/ntuple_merger.cxx
@@ -1015,12 +1015,13 @@ TEST(RNTupleMerger, MergeLateModelExtension)
{
auto model = RNTupleModel::Create();
auto fieldFoo = model->MakeField>("foo");
- auto fieldVfoo = model->MakeField>("vfoo");
+ auto fieldVfoo = model->MakeField[3]>("vfoo");
auto fieldBar = model->MakeField("bar");
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard1.GetPath(), RNTupleWriteOptions());
for (size_t i = 0; i < 10; ++i) {
fieldFoo->insert(std::make_pair(std::to_string(i), i * 123));
- *fieldVfoo = {(int)i * 123};
+ fieldVfoo[0] = {(int)i * 123};
+ fieldVfoo[2] = {(int)i * 345};
*fieldBar = i * 321;
ntuple->Fill();
}
@@ -1031,14 +1032,15 @@ TEST(RNTupleMerger, MergeLateModelExtension)
auto model = RNTupleModel::Create();
auto fieldBaz = model->MakeField("baz");
auto fieldFoo = model->MakeField>("foo");
- auto fieldVfoo = model->MakeField>("vfoo");
+ auto fieldVfoo = model->MakeField[3]>("vfoo");
auto wopts = RNTupleWriteOptions();
wopts.SetCompression(0);
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard2.GetPath(), wopts);
for (size_t i = 0; i < 10; ++i) {
*fieldBaz = i * 567;
fieldFoo->insert(std::make_pair(std::to_string(i), i * 765));
- *fieldVfoo = {(int)i * 765};
+ fieldVfoo[0] = {(int)i * 765};
+ fieldVfoo[2] = {(int)i * 987};
ntuple->Fill();
}
}
@@ -1072,21 +1074,25 @@ TEST(RNTupleMerger, MergeLateModelExtension)
auto ntuple = RNTupleReader::Open("ntuple", fileGuard3.GetPath());
EXPECT_EQ(ntuple->GetNEntries(), 20);
auto foo = ntuple->GetModel().GetDefaultEntry().GetPtr>("foo");
- auto vfoo = ntuple->GetModel().GetDefaultEntry().GetPtr>("vfoo");
+ auto vfoo = ntuple->GetModel().GetDefaultEntry().GetPtr[3]>("vfoo");
auto bar = ntuple->GetModel().GetDefaultEntry().GetPtr("bar");
auto baz = ntuple->GetModel().GetDefaultEntry().GetPtr("baz");
for (int i = 0; i < 10; ++i) {
ntuple->LoadEntry(i);
ASSERT_EQ((*foo)[std::to_string(i)], i * 123);
- ASSERT_EQ((*vfoo)[0], i * 123);
+ ASSERT_EQ(vfoo[0][0], i * 123);
+ ASSERT_EQ(vfoo[2][0], i * 345);
+ ASSERT_TRUE(vfoo[1].empty());
ASSERT_EQ(*bar, i * 321);
ASSERT_EQ(*baz, 0);
}
for (int i = 10; i < 20; ++i) {
ntuple->LoadEntry(i);
ASSERT_EQ((*foo)[std::to_string(i - 10)], (i - 10) * 765);
- ASSERT_EQ((*vfoo)[0], (i - 10) * 765);
+ ASSERT_EQ(vfoo[0][0], (i - 10) * 765);
+ ASSERT_EQ(vfoo[2][0], (i - 10) * 987);
+ ASSERT_TRUE(vfoo[1].empty());
ASSERT_EQ(*bar, 0);
ASSERT_EQ(*baz, (i - 10) * 567);
}
diff --git a/tree/ntuple/test/ntuple_multi_column.cxx b/tree/ntuple/test/ntuple_multi_column.cxx
index 7a4e6bb6eb4a4..963c5fabd9277 100644
--- a/tree/ntuple/test/ntuple_multi_column.cxx
+++ b/tree/ntuple/test/ntuple_multi_column.cxx
@@ -377,11 +377,32 @@ TEST(RNTuple, MultiColumnRepresentationBulk)
EXPECT_FLOAT_EQ(2.0, arr[0]);
}
-TEST(RNTuple, MultiColumnRepresentationDedup)
+TEST(RNTuple, MultiColumnRepresentationVariableBitWidth)
{
- FileRaii fileGuard("test_ntuple_multi_column_representation_dedup.root");
+ FileRaii fileGuard("test_ntuple_multi_column_representation_varbitwidth.root");
- auto fldPx = RFieldBase::Create("px", "float").Unwrap();
- fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal16}, {ROOT::ENTupleColumnType::kReal16}});
- EXPECT_EQ(fldPx->GetColumnRepresentatives().size(), 1);
+ {
+ auto model = RNTupleModel::Create();
+ auto fldPx = std::make_unique>("px");
+ fldPx->SetTruncated(26);
+ fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal32}, {ROOT::ENTupleColumnType::kReal32Trunc}});
+ model->AddField(std::move(fldPx));
+ auto ptrPx = model->GetDefaultEntry().GetPtr("px");
+ auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath());
+ *ptrPx = 1.0;
+ writer->Fill();
+ writer->CommitCluster();
+ ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation(
+ const_cast(writer->GetModel().GetConstField("px")), 1);
+ *ptrPx = 2.0;
+ writer->Fill();
+ }
+
+ auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath());
+ auto fldPx = reader->GetModel().GetDefaultEntry().GetPtr("px");
+
+ reader->LoadEntry(0);
+ EXPECT_FLOAT_EQ(1.0, *fldPx);
+ reader->LoadEntry(1);
+ EXPECT_FLOAT_EQ(2.0, *fldPx);
}