Skip to content

Commit edd0bad

Browse files
committed
DPL Analysis: add RNTuple arrow::Dataset support
1 parent 950b8b7 commit edd0bad

File tree

4 files changed

+813
-1
lines changed

4 files changed

+813
-1
lines changed

Framework/Core/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ o2_add_library(Framework
159159
FairMQ::FairMQ
160160
ROOT::Tree
161161
ROOT::Hist
162+
ROOT::ROOTNTuple
163+
ROOT::ROOTNTupleUtil
162164
O2::FrameworkFoundation
163165
O2::CommonConstants
164166
O2::Headers
@@ -298,6 +300,7 @@ add_executable(o2-test-framework-root
298300
target_link_libraries(o2-test-framework-root PRIVATE O2::Framework)
299301
target_link_libraries(o2-test-framework-root PRIVATE O2::Catch2)
300302
target_link_libraries(o2-test-framework-root PRIVATE ROOT::ROOTDataFrame)
303+
target_link_libraries(o2-test-framework-root PRIVATE ROOT::ROOTNTuple)
301304
set_property(TARGET o2-test-framework-root PROPERTY RUNTIME_OUTPUT_DIRECTORY ${outdir})
302305
add_test(NAME framework:root COMMAND o2-test-framework-root --skip-benchmarks)
303306
add_test(NAME framework:crash COMMAND sh -e -c "PATH=${CMAKE_RUNTIME_OUTPUT_DIRECTORY}:$PATH ${CMAKE_CURRENT_LIST_DIR}/test/test_AllCrashTypes.sh")

Framework/Core/include/Framework/RootArrowFilesystem.h

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ class TTree;
2323
class TBufferFile;
2424
class TDirectoryFile;
2525

26+
namespace ROOT::Experimental
27+
{
28+
class RNTuple;
29+
} // namespace ROOT::Experimental
30+
2631
namespace o2::framework
2732
{
2833

@@ -35,6 +40,15 @@ class TTreeFileWriteOptions : public arrow::dataset::FileWriteOptions
3540
}
3641
};
3742

43+
class RNTupleFileWriteOptions : public arrow::dataset::FileWriteOptions
44+
{
45+
public:
46+
RNTupleFileWriteOptions(std::shared_ptr<arrow::dataset::FileFormat> format)
47+
: FileWriteOptions(format)
48+
{
49+
}
50+
};
51+
3852
// This is to avoid having to implement a bunch of unimplemented methods
3953
// for all the possible virtual filesystem we can invent on top of ROOT
4054
// data structures.
@@ -97,6 +111,19 @@ class TTreeFileSystem : public VirtualRootFileSystemBase
97111
virtual TTree* GetTree(arrow::dataset::FileSource source) = 0;
98112
};
99113

114+
// A filesystem which allows me to get a RNTuple
115+
class RNTupleFileSystem : public VirtualRootFileSystemBase
116+
{
117+
public:
118+
~RNTupleFileSystem() override;
119+
120+
std::shared_ptr<VirtualRootFileSystemBase> GetSubFilesystem(arrow::dataset::FileSource source) override
121+
{
122+
return std::dynamic_pointer_cast<VirtualRootFileSystemBase>(shared_from_this());
123+
};
124+
virtual ROOT::Experimental::RNTuple* GetRNTuple(arrow::dataset::FileSource source) = 0;
125+
};
126+
100127
class SingleTreeFileSystem : public TTreeFileSystem
101128
{
102129
public:
@@ -121,6 +148,30 @@ class SingleTreeFileSystem : public TTreeFileSystem
121148
TTree* mTree;
122149
};
123150

151+
class SingleRNTupleFileSystem : public RNTupleFileSystem
152+
{
153+
public:
154+
SingleRNTupleFileSystem(ROOT::Experimental::RNTuple* tuple)
155+
: RNTupleFileSystem(),
156+
mTuple(tuple)
157+
{
158+
}
159+
160+
std::string type_name() const override
161+
{
162+
return "rntuple";
163+
}
164+
165+
ROOT::Experimental::RNTuple* GetRNTuple(arrow::dataset::FileSource) override
166+
{
167+
// Simply return the only TTree we have
168+
return mTuple;
169+
}
170+
171+
private:
172+
ROOT::Experimental::RNTuple* mTuple;
173+
};
174+
124175
class TFileFileSystem : public VirtualRootFileSystemBase
125176
{
126177
public:
@@ -179,6 +230,70 @@ class TTreeFileFragment : public arrow::dataset::FileFragment
179230
}
180231
};
181232

233+
class RNTupleFileFragment : public arrow::dataset::FileFragment
234+
{
235+
public:
236+
RNTupleFileFragment(arrow::dataset::FileSource source,
237+
std::shared_ptr<arrow::dataset::FileFormat> format,
238+
arrow::compute::Expression partition_expression,
239+
std::shared_ptr<arrow::Schema> physical_schema)
240+
: FileFragment(std::move(source), std::move(format), std::move(partition_expression), std::move(physical_schema))
241+
{
242+
}
243+
};
244+
245+
class RNTupleFileFormat : public arrow::dataset::FileFormat
246+
{
247+
size_t& mTotCompressedSize;
248+
size_t& mTotUncompressedSize;
249+
250+
public:
251+
RNTupleFileFormat(size_t& totalCompressedSize, size_t& totalUncompressedSize)
252+
: FileFormat({}),
253+
mTotCompressedSize(totalCompressedSize),
254+
mTotUncompressedSize(totalUncompressedSize)
255+
{
256+
}
257+
258+
~RNTupleFileFormat() override = default;
259+
260+
std::string type_name() const override
261+
{
262+
return "rntuple";
263+
}
264+
265+
bool Equals(const FileFormat& other) const override
266+
{
267+
return other.type_name() == this->type_name();
268+
}
269+
270+
arrow::Result<bool> IsSupported(const arrow::dataset::FileSource& source) const override
271+
{
272+
auto fs = std::dynamic_pointer_cast<VirtualRootFileSystemBase>(source.filesystem());
273+
auto subFs = fs->GetSubFilesystem(source);
274+
if (std::dynamic_pointer_cast<RNTupleFileSystem>(subFs)) {
275+
return true;
276+
}
277+
return false;
278+
}
279+
280+
arrow::Result<std::shared_ptr<arrow::Schema>> Inspect(const arrow::dataset::FileSource& source) const override;
281+
282+
arrow::Result<arrow::RecordBatchGenerator> ScanBatchesAsync(
283+
const std::shared_ptr<arrow::dataset::ScanOptions>& options,
284+
const std::shared_ptr<arrow::dataset::FileFragment>& fragment) const override;
285+
286+
std::shared_ptr<arrow::dataset::FileWriteOptions> DefaultWriteOptions() override;
287+
288+
arrow::Result<std::shared_ptr<arrow::dataset::FileWriter>> MakeWriter(std::shared_ptr<arrow::io::OutputStream> destination,
289+
std::shared_ptr<arrow::Schema> schema,
290+
std::shared_ptr<arrow::dataset::FileWriteOptions> options,
291+
arrow::fs::FileLocator destination_locator) const override;
292+
arrow::Result<std::shared_ptr<arrow::dataset::FileFragment>> MakeFragment(
293+
arrow::dataset::FileSource source, arrow::compute::Expression partition_expression,
294+
std::shared_ptr<arrow::Schema> physical_schema) override;
295+
};
296+
182297
class TTreeFileFormat : public arrow::dataset::FileFormat
183298
{
184299
size_t& mTotCompressedSize;

0 commit comments

Comments
 (0)