-
Notifications
You must be signed in to change notification settings - Fork 1.5k
[ntuple] Add RNTupleLocatorMulti class for kTypeMulti locator #22577
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -201,6 +201,28 @@ public: | |||||
| std::uint64_t GetLocation() const { return fLocation; } | ||||||
| }; | ||||||
|
|
||||||
| /// RNTupleLocator payload for the kTypeMulti locator (type 0x03). Used by storage | ||||||
| /// backends that pack multiple pages into shared objects (e.g., S3 Mode A). The two | ||||||
| /// 32-bit fields are written to disk as separate integers, so the wire format is | ||||||
| /// directly interpretable without knowledge of any internal bit packing. | ||||||
| class RNTupleLocatorMulti { | ||||||
| private: | ||||||
| std::uint32_t fObjectId = 0; | ||||||
| std::uint32_t fOffset = 0; | ||||||
|
|
||||||
| public: | ||||||
| RNTupleLocatorMulti() = default; | ||||||
| RNTupleLocatorMulti(std::uint32_t objectId, std::uint32_t offset) : fObjectId(objectId), fOffset(offset) {} | ||||||
|
|
||||||
| bool operator==(const RNTupleLocatorMulti &other) const | ||||||
| { | ||||||
| return fObjectId == other.fObjectId && fOffset == other.fOffset; | ||||||
| } | ||||||
|
|
||||||
| std::uint32_t GetObjectId() const { return fObjectId; } | ||||||
| std::uint32_t GetOffset() const { return fOffset; } | ||||||
| }; | ||||||
|
|
||||||
| // Workaround missing return type overloading | ||||||
| class RNTupleLocator; | ||||||
| namespace Internal { | ||||||
|
|
@@ -216,6 +238,11 @@ template <> | |||||
| struct RNTupleLocatorHelper<RNTupleLocatorObject64> { | ||||||
| static RNTupleLocatorObject64 Get(const RNTupleLocator &loc); | ||||||
| }; | ||||||
|
|
||||||
| template <> | ||||||
| struct RNTupleLocatorHelper<RNTupleLocatorMulti> { | ||||||
| static RNTupleLocatorMulti Get(const RNTupleLocator &loc); | ||||||
| }; | ||||||
| } // namespace Internal | ||||||
|
|
||||||
| /// Generic information about the physical location of data. Values depend on the concrete storage type. E.g., | ||||||
|
|
@@ -226,6 +253,7 @@ struct RNTupleLocatorHelper<RNTupleLocatorObject64> { | |||||
| class RNTupleLocator { | ||||||
| friend struct Internal::RNTupleLocatorHelper<std::uint64_t>; | ||||||
| friend struct Internal::RNTupleLocatorHelper<RNTupleLocatorObject64>; | ||||||
| friend struct Internal::RNTupleLocatorHelper<RNTupleLocatorMulti>; | ||||||
|
|
||||||
| public: | ||||||
| /// Values for the _Type_ field in non-disk locators. Serializable types must have the MSb == 0; see | ||||||
|
|
@@ -277,7 +305,8 @@ public: | |||||
| void SetType(ELocatorType type); | ||||||
| void SetReserved(std::uint8_t reserved); | ||||||
|
|
||||||
| /// Note that for GetPosition() / SetPosition(), the locator type must correspond (kTypeFile, kTypeObject64). | ||||||
| /// Note that for GetPosition() / SetPosition(), the locator type must correspond | ||||||
| /// (kTypeFile, kTypeObject64, kTypeMulti). | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For convenience we probably want to phrase it like
Suggested change
and not update it with every locator type
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will update this comment as well |
||||||
|
|
||||||
| template <typename T> | ||||||
| T GetPosition() const | ||||||
|
|
@@ -287,6 +316,7 @@ public: | |||||
|
|
||||||
| void SetPosition(std::uint64_t position); | ||||||
| void SetPosition(RNTupleLocatorObject64 position); | ||||||
| void SetPosition(RNTupleLocatorMulti position); | ||||||
| }; | ||||||
|
|
||||||
| namespace Internal { | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -511,6 +511,50 @@ | |
| return ROOT::RResult<void>::Success(); | ||
| } | ||
|
|
||
| std::uint32_t SerializeLocatorPayloadMulti(const ROOT::RNTupleLocator &locator, unsigned char *buffer) | ||
| { | ||
| const auto &data = locator.GetPosition<ROOT::RNTupleLocatorMulti>(); | ||
| const uint32_t sizeofNBytesOnStorage = (locator.GetNBytesOnStorage() > std::numeric_limits<std::uint32_t>::max()) | ||
| ? sizeof(std::uint64_t) | ||
| : sizeof(std::uint32_t); | ||
| if (buffer) { | ||
| if (sizeofNBytesOnStorage == sizeof(std::uint32_t)) { | ||
| RNTupleSerializer::SerializeUInt32(locator.GetNBytesOnStorage(), buffer); | ||
| } else { | ||
| RNTupleSerializer::SerializeUInt64(locator.GetNBytesOnStorage(), buffer); | ||
| } | ||
| RNTupleSerializer::SerializeUInt32(data.GetObjectId(), buffer + sizeofNBytesOnStorage); | ||
| RNTupleSerializer::SerializeUInt32(data.GetOffset(), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would use the pattern found in other serialize functions of: auto base = reinterpret_cast<unsigned char *>(buffer);
auto pos = base;
void **where = (buffer == nullptr) ? &buffer : reinterpret_cast<void **>(&pos);
pos += RNTupleSerializer::SerializeUInt32(..., *where);
pos += ...instead of manually calculating the write position |
||
| buffer + sizeofNBytesOnStorage + sizeof(std::uint32_t)); | ||
| } | ||
| return sizeofNBytesOnStorage + 2 * sizeof(std::uint32_t); | ||
| } | ||
|
|
||
| ROOT::RResult<void> DeserializeLocatorPayloadMulti(const unsigned char *buffer, std::uint32_t sizeofLocatorPayload, | ||
| ROOT::RNTupleLocator &locator) | ||
| { | ||
| std::uint32_t sizeofNBytesOnStorage; | ||
| if (sizeofLocatorPayload == 12) { | ||
| std::uint32_t nBytesOnStorage; | ||
| RNTupleSerializer::DeserializeUInt32(buffer, nBytesOnStorage); | ||
| locator.SetNBytesOnStorage(nBytesOnStorage); | ||
| sizeofNBytesOnStorage = sizeof(std::uint32_t); | ||
| } else if (sizeofLocatorPayload == 16) { | ||
| std::uint64_t nBytesOnStorage; | ||
| RNTupleSerializer::DeserializeUInt64(buffer, nBytesOnStorage); | ||
| locator.SetNBytesOnStorage(nBytesOnStorage); | ||
| sizeofNBytesOnStorage = sizeof(std::uint64_t); | ||
| } else { | ||
| return R__FAIL("invalid Multi locator payload size: " + std::to_string(sizeofLocatorPayload)); | ||
| } | ||
| std::uint32_t objectId; | ||
| std::uint32_t offset; | ||
| RNTupleSerializer::DeserializeUInt32(buffer + sizeofNBytesOnStorage, objectId); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like for the serialize: keep track of the buffer position with a variable and increment it with the Deserialize* return values instead of calculating the offset every time |
||
| RNTupleSerializer::DeserializeUInt32(buffer + sizeofNBytesOnStorage + sizeof(std::uint32_t), offset); | ||
| locator.SetPosition(ROOT::RNTupleLocatorMulti(objectId, offset)); | ||
| return ROOT::RResult<void>::Success(); | ||
| } | ||
|
|
||
| std::uint32_t SerializeAliasColumn(const ROOT::RColumnDescriptor &columnDesc, | ||
| const ROOT::Internal::RNTupleSerializer::RContext &context, void *buffer) | ||
| { | ||
|
|
@@ -1091,7 +1135,7 @@ | |
| locatorType = 0x02; | ||
| break; | ||
| case RNTupleLocator::kTypeMulti: | ||
| size += SerializeLocatorPayloadObject64(locator, payloadp); | ||
| size += SerializeLocatorPayloadMulti(locator, payloadp); | ||
| locatorType = 0x03; | ||
| break; | ||
| default: | ||
|
|
@@ -1144,7 +1188,7 @@ | |
| break; | ||
| case 0x03: | ||
| locator.SetType(RNTupleLocator::kTypeMulti); | ||
| DeserializeLocatorPayloadObject64(bytes, payloadSize, locator); | ||
| DeserializeLocatorPayloadMulti(bytes, payloadSize, locator); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Both for this and the Object64 case: we need to check the return value and forward RResult in case of failure. |
||
| break; | ||
| default: locator.SetType(RNTupleLocator::kTypeUnknown); | ||
| } | ||
|
|
@@ -1879,7 +1923,7 @@ | |
| continue; | ||
| // NOTE: this assumes all valid feature flags are consecutive, thus we can just check the highest one set. | ||
| unsigned int highestBitSet = 64 * i + (63 - ROOT::Internal::LeadingZeroes(featureFlags[i])); | ||
| if (highestBitSet >= ROOT::RNTupleDescriptor::kFeatureFlag_COUNT) | ||
| return R__FAIL("unsupported format feature: " + std::to_string(highestBitSet)); | ||
| } | ||
| return ROOT::RResult<void>::Success(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -402,7 +402,7 @@ TEST(RNTuple, SerializeLocator) | |||
| // Multi locator round-trip with 32-bit nBytesOnStorage | ||||
| locator = RNTupleLocator{}; | ||||
| locator.SetType(RNTupleLocator::kTypeMulti); | ||||
| locator.SetPosition(RNTupleLocatorObject64{42U}); | ||||
| locator.SetPosition(RNTupleLocatorMulti{7, 1024}); | ||||
| locator.SetNBytesOnStorage(1024U); | ||||
| locator.SetReserved(0); | ||||
| EXPECT_EQ(16u, RNTupleSerializer::SerializeLocator(locator, buffer).Unwrap()); | ||||
|
|
@@ -411,7 +411,9 @@ TEST(RNTuple, SerializeLocator) | |||
| EXPECT_EQ(locator.GetType(), RNTupleLocator::kTypeMulti); | ||||
| EXPECT_EQ(locator.GetNBytesOnStorage(), 1024U); | ||||
| EXPECT_EQ(locator.GetReserved(), 0); | ||||
| EXPECT_EQ(42U, locator.GetPosition<RNTupleLocatorObject64>().GetLocation()); | ||||
| auto multi = locator.GetPosition<RNTupleLocatorMulti>(); | ||||
| EXPECT_EQ(7U, multi.GetObjectId()); | ||||
| EXPECT_EQ(1024U, multi.GetOffset()); | ||||
|
|
||||
| // Multi locator round-trip with 64-bit nBytesOnStorage and reserved bit | ||||
| locator.SetNBytesOnStorage(static_cast<std::uint64_t>(std::numeric_limits<std::uint32_t>::max()) + 1); | ||||
|
|
@@ -422,7 +424,9 @@ TEST(RNTuple, SerializeLocator) | |||
| EXPECT_EQ(locator.GetType(), RNTupleLocator::kTypeMulti); | ||||
| EXPECT_EQ(locator.GetNBytesOnStorage(), static_cast<std::uint64_t>(std::numeric_limits<std::uint32_t>::max()) + 1); | ||||
| EXPECT_EQ(locator.GetReserved(), 1); | ||||
| EXPECT_EQ(42U, locator.GetPosition<RNTupleLocatorObject64>().GetLocation()); | ||||
| multi = locator.GetPosition<RNTupleLocatorMulti>(); | ||||
| EXPECT_EQ(7U, multi.GetObjectId()); | ||||
| EXPECT_EQ(1024U, multi.GetOffset()); | ||||
|
|
||||
| std::int32_t *head = reinterpret_cast<std::int32_t *>(buffer); | ||||
| #ifndef R__BYTESWAP | ||||
|
|
@@ -435,6 +439,70 @@ TEST(RNTuple, SerializeLocator) | |||
| EXPECT_EQ(locator.GetType(), RNTupleLocator::kTypeUnknown); | ||||
| } | ||||
|
|
||||
| TEST(RNTuple, RNTupleLocatorMultiFields) | ||||
| { | ||||
| // Default-constructed: both fields zero | ||||
| RNTupleLocatorMulti zero; | ||||
| EXPECT_EQ(0U, zero.GetObjectId()); | ||||
| EXPECT_EQ(0U, zero.GetOffset()); | ||||
|
|
||||
| // Non-trivial values | ||||
| RNTupleLocatorMulti m(0x12345, 0xABCDE); | ||||
| EXPECT_EQ(0x12345U, m.GetObjectId()); | ||||
| EXPECT_EQ(0xABCDEU, m.GetOffset()); | ||||
|
|
||||
| // Max 32-bit values: both fields use the full uint32 range | ||||
| RNTupleLocatorMulti maxVals(0xFFFFFFFFU, 0xFFFFFFFFU); | ||||
| EXPECT_EQ(0xFFFFFFFFU, maxVals.GetObjectId()); | ||||
| EXPECT_EQ(0xFFFFFFFFU, maxVals.GetOffset()); | ||||
|
|
||||
| // Object id only: no leak into offset | ||||
| RNTupleLocatorMulti idOnly(0xFFFFFFFFU, 0U); | ||||
| EXPECT_EQ(0xFFFFFFFFU, idOnly.GetObjectId()); | ||||
| EXPECT_EQ(0U, idOnly.GetOffset()); | ||||
|
|
||||
| // Offset only: no leak into object id | ||||
| RNTupleLocatorMulti offsetOnly(0U, 0xFFFFFFFFU); | ||||
| EXPECT_EQ(0U, offsetOnly.GetObjectId()); | ||||
| EXPECT_EQ(0xFFFFFFFFU, offsetOnly.GetOffset()); | ||||
|
|
||||
| // Equality semantics | ||||
| EXPECT_EQ(RNTupleLocatorMulti(1, 2), RNTupleLocatorMulti(1, 2)); | ||||
| EXPECT_FALSE(RNTupleLocatorMulti(1, 2) == RNTupleLocatorMulti(1, 3)); | ||||
| EXPECT_FALSE(RNTupleLocatorMulti(1, 2) == RNTupleLocatorMulti(2, 2)); | ||||
| } | ||||
|
|
||||
| TEST(RNTuple, RNTupleLocatorMultiTypeEnforcement) | ||||
| { | ||||
| RNTupleLocator locator; | ||||
|
|
||||
| // SetPosition(Multi) requires kTypeMulti. | ||||
| locator.SetType(RNTupleLocator::kTypeObject64); | ||||
| EXPECT_THROW(locator.SetPosition(RNTupleLocatorMulti(1, 2)), ROOT::RException); | ||||
| locator.SetType(RNTupleLocator::kTypeFile); | ||||
| EXPECT_THROW(locator.SetPosition(RNTupleLocatorMulti(1, 2)), ROOT::RException); | ||||
|
|
||||
| // SetPosition(Object64) no longer accepts kTypeMulti after the split. | ||||
| locator = RNTupleLocator{}; | ||||
| locator.SetType(RNTupleLocator::kTypeMulti); | ||||
| EXPECT_THROW(locator.SetPosition(RNTupleLocatorObject64{1}), ROOT::RException); | ||||
|
|
||||
| // Correct usage round-trip. | ||||
| locator.SetPosition(RNTupleLocatorMulti(1, 2)); | ||||
| auto m = locator.GetPosition<RNTupleLocatorMulti>(); | ||||
| EXPECT_EQ(1U, m.GetObjectId()); | ||||
| EXPECT_EQ(2U, m.GetOffset()); | ||||
|
|
||||
| // GetPosition<Object64>() no longer accepts kTypeMulti after the split. | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
| EXPECT_THROW(locator.GetPosition<RNTupleLocatorObject64>(), ROOT::RException); | ||||
|
|
||||
| // GetPosition<Multi>() rejects non-Multi types. | ||||
| locator = RNTupleLocator{}; | ||||
| locator.SetType(RNTupleLocator::kTypeObject64); | ||||
| locator.SetPosition(RNTupleLocatorObject64{1}); | ||||
| EXPECT_THROW(locator.GetPosition<RNTupleLocatorMulti>(), ROOT::RException); | ||||
| } | ||||
|
|
||||
| TEST(RNTuple, SerializeEnvelopeLink) | ||||
| { | ||||
| RNTupleSerializer::REnvelopeLink link; | ||||
|
|
||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The "wire format"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I meant the on disk format I will update the comment so its more clear