Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion tree/ntuple/inc/ROOT/RNTupleTypes.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,28 @@ public:
std::uint64_t GetLocation() const { return fLocation; }
};

/// RNTupleLocator payload for the kTypeMulti locator (type 0x03). Used by storage
/// backends that pack multiple pages into shared objects (e.g., S3 Mode A). The two
/// 32-bit fields are written to disk as separate integers, so the wire format is

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The "wire format"?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant the on disk format I will update the comment so its more clear

/// directly interpretable without knowledge of any internal bit packing.
class RNTupleLocatorMulti {
private:
std::uint32_t fObjectId = 0;
std::uint32_t fOffset = 0;

public:
RNTupleLocatorMulti() = default;
RNTupleLocatorMulti(std::uint32_t objectId, std::uint32_t offset) : fObjectId(objectId), fOffset(offset) {}

bool operator==(const RNTupleLocatorMulti &other) const
{
return fObjectId == other.fObjectId && fOffset == other.fOffset;
}

std::uint32_t GetObjectId() const { return fObjectId; }
std::uint32_t GetOffset() const { return fOffset; }
};

// Workaround missing return type overloading
class RNTupleLocator;
namespace Internal {
Expand All @@ -216,6 +238,11 @@ template <>
struct RNTupleLocatorHelper<RNTupleLocatorObject64> {
static RNTupleLocatorObject64 Get(const RNTupleLocator &loc);
};

template <>
struct RNTupleLocatorHelper<RNTupleLocatorMulti> {
static RNTupleLocatorMulti Get(const RNTupleLocator &loc);
};
} // namespace Internal

/// Generic information about the physical location of data. Values depend on the concrete storage type. E.g.,
Expand All @@ -226,6 +253,7 @@ struct RNTupleLocatorHelper<RNTupleLocatorObject64> {
class RNTupleLocator {
friend struct Internal::RNTupleLocatorHelper<std::uint64_t>;
friend struct Internal::RNTupleLocatorHelper<RNTupleLocatorObject64>;
friend struct Internal::RNTupleLocatorHelper<RNTupleLocatorMulti>;

public:
/// Values for the _Type_ field in non-disk locators. Serializable types must have the MSb == 0; see
Expand Down Expand Up @@ -277,7 +305,8 @@ public:
void SetType(ELocatorType type);
void SetReserved(std::uint8_t reserved);

/// Note that for GetPosition() / SetPosition(), the locator type must correspond (kTypeFile, kTypeObject64).
/// Note that for GetPosition() / SetPosition(), the locator type must correspond
/// (kTypeFile, kTypeObject64, kTypeMulti).

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For convenience we probably want to phrase it like

Suggested change
/// (kTypeFile, kTypeObject64, kTypeMulti).
/// (kTypeFile, kTypeObject64, ...).

and not update it with every locator type

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will update this comment as well


template <typename T>
T GetPosition() const
Expand All @@ -287,6 +316,7 @@ public:

void SetPosition(std::uint64_t position);
void SetPosition(RNTupleLocatorObject64 position);
void SetPosition(RNTupleLocatorMulti position);
};

namespace Internal {
Expand Down
48 changes: 46 additions & 2 deletions tree/ntuple/src/RNTupleSerialize.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,50 @@
return ROOT::RResult<void>::Success();
}

std::uint32_t SerializeLocatorPayloadMulti(const ROOT::RNTupleLocator &locator, unsigned char *buffer)
{
const auto &data = locator.GetPosition<ROOT::RNTupleLocatorMulti>();
const uint32_t sizeofNBytesOnStorage = (locator.GetNBytesOnStorage() > std::numeric_limits<std::uint32_t>::max())
? sizeof(std::uint64_t)
: sizeof(std::uint32_t);
if (buffer) {
if (sizeofNBytesOnStorage == sizeof(std::uint32_t)) {
RNTupleSerializer::SerializeUInt32(locator.GetNBytesOnStorage(), buffer);
} else {
RNTupleSerializer::SerializeUInt64(locator.GetNBytesOnStorage(), buffer);
}
RNTupleSerializer::SerializeUInt32(data.GetObjectId(), buffer + sizeofNBytesOnStorage);
RNTupleSerializer::SerializeUInt32(data.GetOffset(),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use the pattern found in other serialize functions of:

   auto base = reinterpret_cast<unsigned char *>(buffer);
   auto pos = base;
   void **where = (buffer == nullptr) ? &buffer : reinterpret_cast<void **>(&pos);

   pos += RNTupleSerializer::SerializeUInt32(..., *where);
   pos += ...

instead of manually calculating the write position

buffer + sizeofNBytesOnStorage + sizeof(std::uint32_t));
}
return sizeofNBytesOnStorage + 2 * sizeof(std::uint32_t);
}

ROOT::RResult<void> DeserializeLocatorPayloadMulti(const unsigned char *buffer, std::uint32_t sizeofLocatorPayload,
ROOT::RNTupleLocator &locator)
{
std::uint32_t sizeofNBytesOnStorage;
if (sizeofLocatorPayload == 12) {
std::uint32_t nBytesOnStorage;
RNTupleSerializer::DeserializeUInt32(buffer, nBytesOnStorage);
locator.SetNBytesOnStorage(nBytesOnStorage);
sizeofNBytesOnStorage = sizeof(std::uint32_t);
} else if (sizeofLocatorPayload == 16) {
std::uint64_t nBytesOnStorage;
RNTupleSerializer::DeserializeUInt64(buffer, nBytesOnStorage);
locator.SetNBytesOnStorage(nBytesOnStorage);
sizeofNBytesOnStorage = sizeof(std::uint64_t);
} else {
return R__FAIL("invalid Multi locator payload size: " + std::to_string(sizeofLocatorPayload));
}
std::uint32_t objectId;
std::uint32_t offset;
RNTupleSerializer::DeserializeUInt32(buffer + sizeofNBytesOnStorage, objectId);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like for the serialize: keep track of the buffer position with a variable and increment it with the Deserialize* return values instead of calculating the offset every time

RNTupleSerializer::DeserializeUInt32(buffer + sizeofNBytesOnStorage + sizeof(std::uint32_t), offset);
locator.SetPosition(ROOT::RNTupleLocatorMulti(objectId, offset));
return ROOT::RResult<void>::Success();
}

std::uint32_t SerializeAliasColumn(const ROOT::RColumnDescriptor &columnDesc,
const ROOT::Internal::RNTupleSerializer::RContext &context, void *buffer)
{
Expand Down Expand Up @@ -1091,7 +1135,7 @@
locatorType = 0x02;
break;
case RNTupleLocator::kTypeMulti:
size += SerializeLocatorPayloadObject64(locator, payloadp);
size += SerializeLocatorPayloadMulti(locator, payloadp);
locatorType = 0x03;
break;
default:
Expand Down Expand Up @@ -1144,7 +1188,7 @@
break;
case 0x03:
locator.SetType(RNTupleLocator::kTypeMulti);
DeserializeLocatorPayloadObject64(bytes, payloadSize, locator);
DeserializeLocatorPayloadMulti(bytes, payloadSize, locator);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both for this and the Object64 case: we need to check the return value and forward RResult in case of failure.

break;
default: locator.SetType(RNTupleLocator::kTypeUnknown);
}
Expand Down Expand Up @@ -1879,7 +1923,7 @@
continue;
// NOTE: this assumes all valid feature flags are consecutive, thus we can just check the highest one set.
unsigned int highestBitSet = 64 * i + (63 - ROOT::Internal::LeadingZeroes(featureFlags[i]));
if (highestBitSet >= ROOT::RNTupleDescriptor::kFeatureFlag_COUNT)

Check warning on line 1926 in tree/ntuple/src/RNTupleSerialize.cxx

View workflow job for this annotation

GitHub Actions / alma8

comparison of unsigned expression >= 0 is always true [-Wtype-limits]
return R__FAIL("unsupported format feature: " + std::to_string(highestBitSet));
}
return ROOT::RResult<void>::Success();
Expand Down
21 changes: 19 additions & 2 deletions tree/ntuple/src/RNTupleTypes.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,18 @@ void ROOT::RNTupleLocator::SetPosition(std::uint64_t position)

void ROOT::RNTupleLocator::SetPosition(RNTupleLocatorObject64 position)
{
if (GetType() != kTypeObject64 && GetType() != kTypeMulti)
if (GetType() != kTypeObject64)
throw RException(R__FAIL("cannot set position as 64bit object for type " + std::to_string(GetType())));
fPosition = position.GetLocation();
}

void ROOT::RNTupleLocator::SetPosition(RNTupleLocatorMulti position)
{
if (GetType() != kTypeMulti)
throw RException(R__FAIL("cannot set position as Multi locator for type " + std::to_string(GetType())));
fPosition = (static_cast<std::uint64_t>(position.GetObjectId()) << 32) | position.GetOffset();
}

std::uint64_t ROOT::Internal::RNTupleLocatorHelper<std::uint64_t>::Get(const RNTupleLocator &loc)
{
if (loc.GetType() != ROOT::RNTupleLocator::kTypeFile)
Expand All @@ -95,7 +102,17 @@ std::uint64_t ROOT::Internal::RNTupleLocatorHelper<std::uint64_t>::Get(const RNT
ROOT::RNTupleLocatorObject64
ROOT::Internal::RNTupleLocatorHelper<ROOT::RNTupleLocatorObject64>::Get(const RNTupleLocator &loc)
{
if (loc.GetType() != ROOT::RNTupleLocator::kTypeObject64 && loc.GetType() != ROOT::RNTupleLocator::kTypeMulti)
if (loc.GetType() != ROOT::RNTupleLocator::kTypeObject64)
throw RException(R__FAIL("cannot retrieve position as 64bit object for type " + std::to_string(loc.GetType())));
return RNTupleLocatorObject64{loc.fPosition};
}

ROOT::RNTupleLocatorMulti
ROOT::Internal::RNTupleLocatorHelper<ROOT::RNTupleLocatorMulti>::Get(const RNTupleLocator &loc)
{
if (loc.GetType() != ROOT::RNTupleLocator::kTypeMulti)
throw RException(
R__FAIL("cannot retrieve position as Multi locator for type " + std::to_string(loc.GetType())));
return RNTupleLocatorMulti(static_cast<std::uint32_t>(loc.fPosition >> 32),
static_cast<std::uint32_t>(loc.fPosition));
}
74 changes: 71 additions & 3 deletions tree/ntuple/test/ntuple_serialize.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ TEST(RNTuple, SerializeLocator)
// Multi locator round-trip with 32-bit nBytesOnStorage
locator = RNTupleLocator{};
locator.SetType(RNTupleLocator::kTypeMulti);
locator.SetPosition(RNTupleLocatorObject64{42U});
locator.SetPosition(RNTupleLocatorMulti{7, 1024});
locator.SetNBytesOnStorage(1024U);
locator.SetReserved(0);
EXPECT_EQ(16u, RNTupleSerializer::SerializeLocator(locator, buffer).Unwrap());
Expand All @@ -411,7 +411,9 @@ TEST(RNTuple, SerializeLocator)
EXPECT_EQ(locator.GetType(), RNTupleLocator::kTypeMulti);
EXPECT_EQ(locator.GetNBytesOnStorage(), 1024U);
EXPECT_EQ(locator.GetReserved(), 0);
EXPECT_EQ(42U, locator.GetPosition<RNTupleLocatorObject64>().GetLocation());
auto multi = locator.GetPosition<RNTupleLocatorMulti>();
EXPECT_EQ(7U, multi.GetObjectId());
EXPECT_EQ(1024U, multi.GetOffset());

// Multi locator round-trip with 64-bit nBytesOnStorage and reserved bit
locator.SetNBytesOnStorage(static_cast<std::uint64_t>(std::numeric_limits<std::uint32_t>::max()) + 1);
Expand All @@ -422,7 +424,9 @@ TEST(RNTuple, SerializeLocator)
EXPECT_EQ(locator.GetType(), RNTupleLocator::kTypeMulti);
EXPECT_EQ(locator.GetNBytesOnStorage(), static_cast<std::uint64_t>(std::numeric_limits<std::uint32_t>::max()) + 1);
EXPECT_EQ(locator.GetReserved(), 1);
EXPECT_EQ(42U, locator.GetPosition<RNTupleLocatorObject64>().GetLocation());
multi = locator.GetPosition<RNTupleLocatorMulti>();
EXPECT_EQ(7U, multi.GetObjectId());
EXPECT_EQ(1024U, multi.GetOffset());

std::int32_t *head = reinterpret_cast<std::int32_t *>(buffer);
#ifndef R__BYTESWAP
Expand All @@ -435,6 +439,70 @@ TEST(RNTuple, SerializeLocator)
EXPECT_EQ(locator.GetType(), RNTupleLocator::kTypeUnknown);
}

TEST(RNTuple, RNTupleLocatorMultiFields)
{
// Default-constructed: both fields zero
RNTupleLocatorMulti zero;
EXPECT_EQ(0U, zero.GetObjectId());
EXPECT_EQ(0U, zero.GetOffset());

// Non-trivial values
RNTupleLocatorMulti m(0x12345, 0xABCDE);
EXPECT_EQ(0x12345U, m.GetObjectId());
EXPECT_EQ(0xABCDEU, m.GetOffset());

// Max 32-bit values: both fields use the full uint32 range
RNTupleLocatorMulti maxVals(0xFFFFFFFFU, 0xFFFFFFFFU);
EXPECT_EQ(0xFFFFFFFFU, maxVals.GetObjectId());
EXPECT_EQ(0xFFFFFFFFU, maxVals.GetOffset());

// Object id only: no leak into offset
RNTupleLocatorMulti idOnly(0xFFFFFFFFU, 0U);
EXPECT_EQ(0xFFFFFFFFU, idOnly.GetObjectId());
EXPECT_EQ(0U, idOnly.GetOffset());

// Offset only: no leak into object id
RNTupleLocatorMulti offsetOnly(0U, 0xFFFFFFFFU);
EXPECT_EQ(0U, offsetOnly.GetObjectId());
EXPECT_EQ(0xFFFFFFFFU, offsetOnly.GetOffset());

// Equality semantics
EXPECT_EQ(RNTupleLocatorMulti(1, 2), RNTupleLocatorMulti(1, 2));
EXPECT_FALSE(RNTupleLocatorMulti(1, 2) == RNTupleLocatorMulti(1, 3));
EXPECT_FALSE(RNTupleLocatorMulti(1, 2) == RNTupleLocatorMulti(2, 2));
}

TEST(RNTuple, RNTupleLocatorMultiTypeEnforcement)
{
RNTupleLocator locator;

// SetPosition(Multi) requires kTypeMulti.
locator.SetType(RNTupleLocator::kTypeObject64);
EXPECT_THROW(locator.SetPosition(RNTupleLocatorMulti(1, 2)), ROOT::RException);
locator.SetType(RNTupleLocator::kTypeFile);
EXPECT_THROW(locator.SetPosition(RNTupleLocatorMulti(1, 2)), ROOT::RException);

// SetPosition(Object64) no longer accepts kTypeMulti after the split.
locator = RNTupleLocator{};
locator.SetType(RNTupleLocator::kTypeMulti);
EXPECT_THROW(locator.SetPosition(RNTupleLocatorObject64{1}), ROOT::RException);

// Correct usage round-trip.
locator.SetPosition(RNTupleLocatorMulti(1, 2));
auto m = locator.GetPosition<RNTupleLocatorMulti>();
EXPECT_EQ(1U, m.GetObjectId());
EXPECT_EQ(2U, m.GetOffset());

// GetPosition<Object64>() no longer accepts kTypeMulti after the split.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// GetPosition<Object64>() no longer accepts kTypeMulti after the split.

EXPECT_THROW(locator.GetPosition<RNTupleLocatorObject64>(), ROOT::RException);

// GetPosition<Multi>() rejects non-Multi types.
locator = RNTupleLocator{};
locator.SetType(RNTupleLocator::kTypeObject64);
locator.SetPosition(RNTupleLocatorObject64{1});
EXPECT_THROW(locator.GetPosition<RNTupleLocatorMulti>(), ROOT::RException);
}

TEST(RNTuple, SerializeEnvelopeLink)
{
RNTupleSerializer::REnvelopeLink link;
Expand Down
1 change: 1 addition & 0 deletions tree/ntuple/test/ntuple_test.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
using ROOT::EExtraTypeInfoIds;
using ROOT::RNTupleLocalIndex;
using ROOT::RNTupleLocator;
using ROOT::RNTupleLocatorMulti;
using ROOT::RNTupleLocatorObject64;
using ROOT::Internal::RColumnIndex;
using RClusterDescriptor = ROOT::RClusterDescriptor;
Expand Down
Loading