Skip to content

Commit 4ebf849

Browse files
authored
DPL: improve handling of RNTuple (#13818)
- Support more integer types, including tests. - Add ability to support objects which are not grouped in a TDirectory
1 parent a206db4 commit 4ebf849

File tree

5 files changed

+53
-7
lines changed

5 files changed

+53
-7
lines changed

Framework/AnalysisSupport/src/RNTuplePlugin.cxx

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,31 @@ struct RootNTupleVisitor : public ROOT::Experimental::Detail::RFieldVisitor {
187187
this->datatype = arrow::int32();
188188
}
189189

190+
void VisitInt8Field(const ROOT::Experimental::RField<std::int8_t>& field) override
191+
{
192+
this->datatype = arrow::int8();
193+
}
194+
195+
void VisitInt16Field(const ROOT::Experimental::RField<std::int16_t>& field) override
196+
{
197+
this->datatype = arrow::int16();
198+
}
199+
200+
void VisitUInt32Field(const ROOT::Experimental::RField<std::uint32_t>& field) override
201+
{
202+
this->datatype = arrow::uint32();
203+
}
204+
205+
void VisitUInt8Field(const ROOT::Experimental::RField<std::uint8_t>& field) override
206+
{
207+
this->datatype = arrow::uint8();
208+
}
209+
210+
void VisitUInt16Field(const ROOT::Experimental::RField<std::uint16_t>& field) override
211+
{
212+
this->datatype = arrow::int16();
213+
}
214+
190215
void VisitBoolField(const ROOT::Experimental::RField<bool>& field) override
191216
{
192217
this->datatype = arrow::boolean();
@@ -240,6 +265,8 @@ std::unique_ptr<ROOT::Experimental::RFieldBase> rootFieldFromArrow(std::shared_p
240265
return std::make_unique<RField<float>>(name);
241266
case arrow::Type::DOUBLE:
242267
return std::make_unique<RField<double>>(name);
268+
case arrow::Type::STRING:
269+
return std::make_unique<RField<std::string>>(name);
243270
default:
244271
throw runtime_error("Unsupported arrow column type");
245272
}

Framework/Core/include/Framework/RootArrowFilesystem.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ struct RootArrowFactoryPlugin {
8383
struct RootObjectReadingCapability {
8484
// The unique name of this capability
8585
std::string name = "unknown";
86+
// Convert a logical filename to an actual object to be read
87+
// This can be used, e.g. to read an RNTuple stored in
88+
// a flat directory structure in a TFile vs a TTree stored inside
89+
// a TDirectory (e.g. /DF_1000/o2tracks).
90+
std::function<std::string(std::string)> lfn2objectPath;
8691
// Given a TFile, return the object which this capability support
8792
// Use a void * in order not to expose the kind of object to the
8893
// generic reading code. This is also where we load the plugin

Framework/Core/src/Plugin.cxx

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,12 +179,12 @@ struct ImplementationContext {
179179

180180
std::function<void*(TDirectoryFile*, std::string const&)> getHandleByClass(char const* classname)
181181
{
182-
return [classname](TDirectoryFile* file, std::string const& path) { return file->GetObjectChecked(path.c_str(), TClass::GetClass(classname)); };
182+
return [c = TClass::GetClass(classname)](TDirectoryFile* file, std::string const& path) { return file->GetObjectChecked(path.c_str(), c); };
183183
}
184184

185185
std::function<void*(TBufferFile*, std::string const&)> getBufferHandleByClass(char const* classname)
186186
{
187-
return [classname](TBufferFile* buffer, std::string const& path) { buffer->Reset(); return buffer->ReadObjectAny(TClass::GetClass(classname)); };
187+
return [c = TClass::GetClass(classname)](TBufferFile* buffer, std::string const& path) { buffer->Reset(); return buffer->ReadObjectAny(c); };
188188
}
189189

190190
void lazyLoadFactory(std::vector<RootArrowFactory>& implementations, char const* specs)
@@ -210,6 +210,13 @@ struct RNTupleObjectReadingCapability : o2::framework::RootObjectReadingCapabili
210210

211211
return new RootObjectReadingCapability{
212212
.name = "rntuple",
213+
.lfn2objectPath = [](std::string s) {
214+
std::replace(s.begin()+1, s.end(), '/', '-');
215+
if (s.starts_with("/")) {
216+
return s;
217+
} else {
218+
return "/" + s;
219+
} },
213220
.getHandle = getHandleByClass("ROOT::Experimental::RNTuple"),
214221
.getBufferHandle = getBufferHandleByClass("ROOT::Experimental::RNTuple"),
215222
.factory = [context]() -> RootArrowFactory& {
@@ -226,6 +233,7 @@ struct TTreeObjectReadingCapability : o2::framework::RootObjectReadingCapability
226233

227234
return new RootObjectReadingCapability{
228235
.name = "ttree",
236+
.lfn2objectPath = [](std::string s) { return s; },
229237
.getHandle = getHandleByClass("TTree"),
230238
.getBufferHandle = getBufferHandleByClass("TTree"),
231239
.factory = [context]() -> RootArrowFactory& {

Framework/Core/src/RootArrowFilesystem.cxx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ std::shared_ptr<VirtualRootFileSystemBase> TFileFileSystem::GetSubFilesystem(arr
4747
// file, so that we can support TTree and RNTuple at the same time
4848
// without having to depend on both.
4949
for (auto& capability : mObjectFactory.capabilities) {
50-
void* handle = capability.getHandle(mFile, source.path());
50+
auto objectPath = capability.lfn2objectPath(source.path());
51+
void* handle = capability.getHandle(mFile, objectPath);
5152
if (!handle) {
5253
continue;
5354
}
@@ -238,6 +239,7 @@ std::shared_ptr<VirtualRootFileSystemBase> TBufferFileFS::GetSubFilesystem(arrow
238239
// file, so that we can support TTree and RNTuple at the same time
239240
// without having to depend on both.
240241
for (auto& capability : mObjectFactory.capabilities) {
242+
241243
void* handle = capability.getBufferHandle(mBuffer, source.path());
242244
if (handle) {
243245
mFilesystem = capability.factory().getSubFilesystem(handle);

Framework/Core/test/test_Root2ArrowTable.cxx

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ bool validateContents(std::shared_ptr<arrow::RecordBatch> batch)
369369

370370
bool validateSchema(std::shared_ptr<arrow::Schema> schema)
371371
{
372-
REQUIRE(schema->num_fields() == 10);
372+
REQUIRE(schema->num_fields() == 11);
373373
REQUIRE(schema->field(0)->type()->id() == arrow::float32()->id());
374374
REQUIRE(schema->field(1)->type()->id() == arrow::float32()->id());
375375
REQUIRE(schema->field(2)->type()->id() == arrow::float32()->id());
@@ -380,6 +380,7 @@ bool validateSchema(std::shared_ptr<arrow::Schema> schema)
380380
REQUIRE(schema->field(7)->type()->id() == arrow::boolean()->id());
381381
REQUIRE(schema->field(8)->type()->id() == arrow::fixed_size_list(arrow::boolean(), 2)->id());
382382
REQUIRE(schema->field(9)->type()->id() == arrow::list(arrow::int32())->id());
383+
REQUIRE(schema->field(10)->type()->id() == arrow::int8()->id());
383384
return true;
384385
}
385386

@@ -435,6 +436,7 @@ TEST_CASE("RootTree2Dataset")
435436
bool manyBool[2];
436437
int vla[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
437438
int vlaSize = 0;
439+
char byte;
438440

439441
t->Branch("px", &px, "px/F");
440442
t->Branch("py", &py, "py/F");
@@ -447,6 +449,7 @@ TEST_CASE("RootTree2Dataset")
447449
t->Branch("manyBools", &manyBool, "manyBools[2]/O");
448450
t->Branch("vla_size", &vlaSize, "vla_size/I");
449451
t->Branch("vla", vla, "vla[vla_size]/I");
452+
t->Branch("byte", &byte, "byte/B");
450453
// fill the tree
451454
for (Int_t i = 0; i < 100; i++) {
452455
xyz[0] = 1;
@@ -463,6 +466,7 @@ TEST_CASE("RootTree2Dataset")
463466
manyBool[0] = (i % 4 == 0);
464467
manyBool[1] = (i % 5 == 0);
465468
vlaSize = i % 10;
469+
byte = i;
466470
t->Fill();
467471
}
468472
}
@@ -512,7 +516,7 @@ TEST_CASE("RootTree2Dataset")
512516
auto batches = (*scanner)();
513517
auto result = batches.result();
514518
REQUIRE(result.ok());
515-
REQUIRE((*result)->columns().size() == 10);
519+
REQUIRE((*result)->columns().size() == 11);
516520
REQUIRE((*result)->num_rows() == 100);
517521
validateContents(*result);
518522

@@ -552,7 +556,7 @@ TEST_CASE("RootTree2Dataset")
552556
auto batchesWritten = (*scanner)();
553557
auto resultWritten = batches.result();
554558
REQUIRE(resultWritten.ok());
555-
REQUIRE((*resultWritten)->columns().size() == 10);
559+
REQUIRE((*resultWritten)->columns().size() == 11);
556560
REQUIRE((*resultWritten)->num_rows() == 100);
557561
validateContents(*resultWritten);
558562
}
@@ -586,7 +590,7 @@ TEST_CASE("RootTree2Dataset")
586590
auto rntupleBatchesWritten = (*rntupleScannerWritten)();
587591
auto rntupleResultWritten = rntupleBatchesWritten.result();
588592
REQUIRE(rntupleResultWritten.ok());
589-
REQUIRE((*rntupleResultWritten)->columns().size() == 10);
593+
REQUIRE((*rntupleResultWritten)->columns().size() == 11);
590594
REQUIRE(validateSchema((*rntupleResultWritten)->schema()));
591595
REQUIRE((*rntupleResultWritten)->num_rows() == 100);
592596
REQUIRE(validateContents(*rntupleResultWritten));

0 commit comments

Comments
 (0)