diff --git a/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx b/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx index 4f679532c1c87..ac10afcdf95df 100644 --- a/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx +++ b/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx @@ -50,6 +50,11 @@ enum class ENTupleInspectorHist { kUncompressedSize }; +enum class ESchemaProfileFormat { + /// https://www.speedscope.app/file-format-schema.json + kSpeedscopeJSON +}; + // clang-format off /** \class ROOT::Experimental::RNTupleInspector @@ -493,6 +498,14 @@ public: { PrintFieldTreeAsDot(GetDescriptor().GetFieldZero(), output); } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Print a string that represents the tree of the (sub)fields and columns of an RNTuple in a format which a + /// performance profile visualizer can render + /// + /// \param[in] format The output format for the flamegraph specification + /// + void PrintSchemaProfile(ESchemaProfileFormat format, std::ostream &output = std::cout) const; }; } // namespace Experimental } // namespace ROOT diff --git a/tree/ntupleutil/src/RNTupleInspector.cxx b/tree/ntupleutil/src/RNTupleInspector.cxx index 1a5b192f6ad53..1e9c9e62e2c3a 100644 --- a/tree/ntupleutil/src/RNTupleInspector.cxx +++ b/tree/ntupleutil/src/RNTupleInspector.cxx @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -565,3 +566,126 @@ void ROOT::Experimental::RNTupleInspector::PrintFieldTreeAsDot(const ROOT::RFiel if (isZeroField) output << "}"; } + +struct SpeedscopeFrame { + std::string primaryString; + std::string secondaryString; + std::uint64_t openingPosition = 0; + std::uint64_t closingPosition = 0; +}; + +static void PrintSpeedscopeFrames(const std::vector &frames, std::ostream &output) +{ + output << "{\n"; + output << " \"$schema\":\"https://www.speedscope.app/file-format-schema.json\",\n"; + output << " \"shared\":{\n"; + output << " \"frames\":[\n"; + + for (std::size_t i = 0; i < frames.size(); ++i) { + output << " { \"name\":\"" << frames[i].primaryString + << "\", \"file\":\"Type: " << frames[i].secondaryString + << ", Size: " << frames[i].closingPosition - frames[i].openingPosition << "B\" }" + << (i + 1 < frames.size() ? ",\n" : "\n"); + } + + output << " ]\n"; + output << " },\n"; + output << " \"profiles\":[\n"; + output << " {\n"; + output << " \"type\":\"evented\",\n"; + output << " \"name\":\"Flattened Timeline\",\n"; + output << " \"unit\":\"bytes\",\n"; + output << " \"startValue\":0,\n"; + output << " \"endValue\":" << frames.back().closingPosition << ",\n"; + output << " \"events\":[\n"; + + bool first = true; + + // Parameter idx Index of the frame being processed + // Parameter limit + // - If the frame is not root: Closing Position of it's father + // - If the frame is root: Closing Position of the last element of frames + // Returns index of the next index to be processed + std::function processRecursive = [&](std::size_t idx, + std::uint32_t limit) -> std::size_t { + while (idx < frames.size() && frames[idx].openingPosition < limit) { + std::size_t currentIdx = idx; + + if (!first) + output << ",\n"; + + output << " {\"type\":\"O\",\"frame\":" << currentIdx + << ",\"at\":" << frames[currentIdx].openingPosition << "}"; + first = false; + + idx = processRecursive(idx + 1, frames[currentIdx].closingPosition); + + output << ",\n {\"type\":\"C\",\"frame\":" << currentIdx + << ",\"at\":" << frames[currentIdx].closingPosition << "}"; + } + return idx; + }; + + processRecursive(0, frames.back().closingPosition); + + output << "\n ]\n"; + output << " }\n"; + output << " ]\n"; + output << "}\n"; +} + +void ROOT::Experimental::RNTupleInspector::PrintSchemaProfile(ESchemaProfileFormat format, std::ostream &output) const +{ + // There is only one format at the moment + assert(format == ESchemaProfileFormat::kSpeedscopeJSON); + + const auto &tupleDescriptor = GetDescriptor(); + ROOT::DescriptorId_t rootId = tupleDescriptor.GetFieldZeroId(); + const auto &rootFieldDescriptor = tupleDescriptor.GetFieldDescriptor(rootId); + + std::vector frames; + std::uint32_t positionCursor = 0; + + // Returns size of the visited field + auto visitFieldsRecursive = [&](auto &self, const ROOT::RFieldDescriptor &fieldDescriptor) -> std::size_t { + SpeedscopeFrame fieldSpeedscopeFrame; + fieldSpeedscopeFrame.primaryString = tupleDescriptor.GetQualifiedFieldName(fieldDescriptor.GetId()); + fieldSpeedscopeFrame.secondaryString = fieldDescriptor.GetTypeName(); + fieldSpeedscopeFrame.openingPosition = positionCursor; + frames.push_back(fieldSpeedscopeFrame); + + std::size_t fieldSpeedscopeFrameIndex = frames.size() - 1; + + std::size_t subTreeSize = 0; + const auto &childIds = fieldDescriptor.GetLinkIds(); + + for (const auto &childFieldId : childIds) { + const auto &childFieldDescriptor = tupleDescriptor.GetFieldDescriptor(childFieldId); + subTreeSize += self(self, childFieldDescriptor); + } + + for (const auto &columnDescriptor : tupleDescriptor.GetColumnIterable(fieldDescriptor.GetId())) { + const auto &columnInfo = GetColumnInspector(columnDescriptor.GetPhysicalId()); + std::size_t columnSize = columnInfo.GetCompressedSize(); + + SpeedscopeFrame columnSpeedscopeFrame; + columnSpeedscopeFrame.primaryString = tupleDescriptor.GetQualifiedFieldName(fieldDescriptor.GetId()) + + " [col#" + std::to_string(columnDescriptor.GetPhysicalId()) + "]"; + columnSpeedscopeFrame.secondaryString = + ROOT::Internal::RColumnElementBase::GetColumnTypeName(columnDescriptor.GetType()); + columnSpeedscopeFrame.openingPosition = positionCursor; + positionCursor += columnSize; + columnSpeedscopeFrame.closingPosition = positionCursor; + frames.push_back(columnSpeedscopeFrame); + subTreeSize += columnSize; + } + + frames[fieldSpeedscopeFrameIndex].closingPosition = positionCursor; + + return subTreeSize; + }; + + visitFieldsRecursive(visitFieldsRecursive, rootFieldDescriptor); + + PrintSpeedscopeFrames(frames, output); +} diff --git a/tree/ntupleutil/test/ntuple_inspector.cxx b/tree/ntupleutil/test/ntuple_inspector.cxx index 5812a926eb9ee..44231fb9b9ea5 100644 --- a/tree/ntupleutil/test/ntuple_inspector.cxx +++ b/tree/ntupleutil/test/ntuple_inspector.cxx @@ -862,3 +862,59 @@ TEST(RNTupleInspector, FieldTreeAsDot) "int

Type: std::int32_t

ID: 1

>]\n}"; EXPECT_EQ(dot, expected); } + +TEST(RNTupleInspector, SchemaProfileSpecification) +{ + FileRaii fileGuard("test_schema_profile_specification.root"); + { + auto model = RNTupleModel::Create(); + auto fieldFloat1 = model->MakeField("float1"); + auto fieldInt = model->MakeField("int"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath()); + + for (int i = 0; i < 10; ++i) { + *fieldFloat1 = 3.14f * i; + *fieldInt = 42 * i; + writer->Fill(); + } + } + auto inspector = RNTupleInspector::Create("ntuple", fileGuard.GetPath()); + std::ostringstream schemaProfileStream; + inspector->PrintSchemaProfile(ROOT::Experimental::ESchemaProfileFormat::kSpeedscopeJSON, schemaProfileStream); + const std::string schemaProfileSpecificaiton = schemaProfileStream.str(); + const std::string expected = R"({ + "$schema":"https://www.speedscope.app/file-format-schema.json", + "shared":{ + "frames":[ + { "name":"", "file":"Type: , Size: 80B" }, + { "name":"float1", "file":"Type: float, Size: 40B" }, + { "name":"float1 [col#0]", "file":"Type: SplitReal32, Size: 40B" }, + { "name":"int", "file":"Type: std::int32_t, Size: 40B" }, + { "name":"int [col#1]", "file":"Type: SplitInt32, Size: 40B" } + ] + }, + "profiles":[ + { + "type":"evented", + "name":"Flattened Timeline", + "unit":"bytes", + "startValue":0, + "endValue":80, + "events":[ + {"type":"O","frame":0,"at":0}, + {"type":"O","frame":1,"at":0}, + {"type":"O","frame":2,"at":0}, + {"type":"C","frame":2,"at":40}, + {"type":"C","frame":1,"at":40}, + {"type":"O","frame":3,"at":40}, + {"type":"O","frame":4,"at":40}, + {"type":"C","frame":4,"at":80}, + {"type":"C","frame":3,"at":80}, + {"type":"C","frame":0,"at":80} + ] + } + ] +} +)"; + EXPECT_EQ(schemaProfileSpecificaiton, expected); +}