1111#include " Framework/RootArrowFilesystem.h"
1212#include " Framework/Endian.h"
1313#include " Framework/RuntimeError.h"
14+ #include < Rtypes.h>
15+ #include < arrow/array/array_primitive.h>
1416#include < arrow/array/builder_nested.h>
1517#include < arrow/array/builder_primitive.h>
18+ #include < memory>
1619#include < stdexcept>
1720#include < TFile.h>
1821#include < TLeaf.h>
1922#include < TBufferFile.h>
2023#include < TTree.h>
2124#include < TDirectoryFile.h>
25+ #include < arrow/type.h>
26+ #include < arrow/type_fwd.h>
2227
2328namespace
2429{
@@ -73,35 +78,52 @@ namespace o2::framework
7378{
7479
7580TFileFileSystem::TFileFileSystem (TDirectoryFile* f, size_t readahead)
76- : TTreeFileSystem (),
81+ : VirtualRootFileSystemBase (),
7782 mFile (f)
7883{
7984 ((TFile*)mFile )->SetReadaheadSize (50 * 1024 * 1024 );
8085}
8186
82- TTree* TFileFileSystem::GetTree (arrow::dataset::FileSource source)
87+ std::shared_ptr<VirtualRootFileSystemBase> TFileFileSystem::GetSubFilesystem (arrow::dataset::FileSource source)
8388{
84- // Simply return the only TTree we have
85- return (TTree*)mFile ->Get (source.path ().c_str ());
89+ auto tree = (TTree*)mFile ->GetObjectChecked (source.path ().c_str (), TClass::GetClass<TTree>());
90+ if (tree) {
91+ return std::shared_ptr<VirtualRootFileSystemBase>(new SingleTreeFileSystem (tree));
92+ }
93+
94+
95+ auto directory = (TDirectoryFile*)mFile ->GetObjectChecked (source.path ().c_str (), TClass::GetClass<TDirectory>());
96+ if (directory) {
97+ return std::shared_ptr<VirtualRootFileSystemBase>(new TFileFileSystem (directory, 50 * 1024 * 1024 ));
98+ }
99+ throw runtime_error_f (" Unsupported file layout" );
86100}
87101
88- arrow::Result<arrow::fs::FileInfo> TTreeFileSystem ::GetFileInfo (const std::string& path)
102+ arrow::Result<arrow::fs::FileInfo> TFileFileSystem ::GetFileInfo (const std::string& path)
89103{
90104 arrow::fs::FileInfo result;
91105 result.set_type (arrow::fs::FileType::NotFound);
92106 result.set_path (path);
93107 arrow::dataset::FileSource source (path, shared_from_this ());
94108
95- for (auto branch : *GetTree (source)->GetListOfBranches ()) {
96- if (strncmp (branch->GetName (), result.path ().c_str (), path.size ()) == 0 ) {
97- result.set_type (arrow::fs::FileType::File);
98- return result;
99- }
109+ auto fs = GetSubFilesystem (source);
110+
111+ // For now we only support single trees.
112+ if (std::dynamic_pointer_cast<SingleTreeFileSystem>(fs)) {
113+ result.set_type (arrow::fs::FileType::File);
114+ return result;
100115 }
101116 return result;
102117}
103118
104- arrow::Result<arrow::fs::FileInfoVector> TTreeFileSystem::GetFileInfo (const arrow::fs::FileSelector& select)
119+ arrow::Result<arrow::fs::FileInfo> VirtualRootFileSystemBase::GetFileInfo (std::string const &)
120+ {
121+ arrow::fs::FileInfo result;
122+ result.set_type (arrow::fs::FileType::NotFound);
123+ return result;
124+ }
125+
126+ arrow::Result<arrow::fs::FileInfoVector> VirtualRootFileSystemBase::GetFileInfo (const arrow::fs::FileSelector& select)
105127{
106128 arrow::fs::FileInfoVector results;
107129 auto selected = this ->GetFileInfo (select.base_dir );
@@ -111,59 +133,59 @@ arrow::Result<arrow::fs::FileInfoVector> TTreeFileSystem::GetFileInfo(const arro
111133 return results;
112134}
113135
114- arrow::Status TTreeFileSystem ::CreateDir (const std::string& path, bool recursive)
136+ arrow::Status VirtualRootFileSystemBase ::CreateDir (const std::string& path, bool recursive)
115137{
116138 return arrow::Status::NotImplemented (" Read only filesystem" );
117139}
118140
119- arrow::Status TTreeFileSystem ::DeleteDir (const std::string& path)
141+ arrow::Status VirtualRootFileSystemBase ::DeleteDir (const std::string& path)
120142{
121143 return arrow::Status::NotImplemented (" Read only filesystem" );
122144}
123145
124- arrow::Status TTreeFileSystem ::CopyFile (const std::string& src, const std::string& dest)
146+ arrow::Status VirtualRootFileSystemBase ::CopyFile (const std::string& src, const std::string& dest)
125147{
126148 return arrow::Status::NotImplemented (" Read only filesystem" );
127149}
128150
129- arrow::Status TTreeFileSystem ::Move (const std::string& src, const std::string& dest)
151+ arrow::Status VirtualRootFileSystemBase ::Move (const std::string& src, const std::string& dest)
130152{
131153 return arrow::Status::NotImplemented (" Read only filesystem" );
132154}
133155
134- arrow::Status TTreeFileSystem ::DeleteDirContents (const std::string& path, bool missing_dir_ok)
156+ arrow::Status VirtualRootFileSystemBase ::DeleteDirContents (const std::string& path, bool missing_dir_ok)
135157{
136158 return arrow::Status::NotImplemented (" Read only filesystem" );
137159}
138160
139- arrow::Status TTreeFileSystem ::DeleteRootDirContents ()
161+ arrow::Status VirtualRootFileSystemBase ::DeleteRootDirContents ()
140162{
141163 return arrow::Status::NotImplemented (" Read only filesystem" );
142164}
143165
144- arrow::Status TTreeFileSystem ::DeleteFile (const std::string& path)
166+ arrow::Status VirtualRootFileSystemBase ::DeleteFile (const std::string& path)
145167{
146168 return arrow::Status::NotImplemented (" Read only filesystem" );
147169}
148170
149- arrow::Result<std::shared_ptr<arrow::io::InputStream>> TTreeFileSystem ::OpenInputStream (const std::string& path)
171+ arrow::Result<std::shared_ptr<arrow::io::InputStream>> VirtualRootFileSystemBase ::OpenInputStream (const std::string& path)
150172{
151173 return arrow::Status::NotImplemented (" Non streamable filesystem" );
152174}
153175
154- arrow::Result<std::shared_ptr<arrow::io::RandomAccessFile>> TTreeFileSystem ::OpenInputFile (const std::string& path)
176+ arrow::Result<std::shared_ptr<arrow::io::RandomAccessFile>> VirtualRootFileSystemBase ::OpenInputFile (const std::string& path)
155177{
156178 return arrow::Status::NotImplemented (" No random access file system" );
157179}
158180
159- arrow::Result<std::shared_ptr<arrow::io::OutputStream>> TTreeFileSystem ::OpenOutputStream (
181+ arrow::Result<std::shared_ptr<arrow::io::OutputStream>> VirtualRootFileSystemBase ::OpenOutputStream (
160182 const std::string& path,
161183 const std::shared_ptr<const arrow::KeyValueMetadata>& metadata)
162184{
163185 return arrow::Status::NotImplemented (" Non streamable filesystem" );
164186}
165187
166- arrow::Result<std::shared_ptr<arrow::io::OutputStream>> TTreeFileSystem ::OpenAppendStream (
188+ arrow::Result<std::shared_ptr<arrow::io::OutputStream>> VirtualRootFileSystemBase ::OpenAppendStream (
167189 const std::string& path,
168190 const std::shared_ptr<const arrow::KeyValueMetadata>& metadata)
169191{
@@ -173,8 +195,13 @@ arrow::Result<std::shared_ptr<arrow::io::OutputStream>> TTreeFileSystem::OpenApp
173195arrow::Result<std::shared_ptr<arrow::Schema>> TTreeFileFormat::Inspect (const arrow::dataset::FileSource& source) const
174196{
175197 arrow::Schema schema{{}};
176- auto fs = std::dynamic_pointer_cast<TTreeFileSystem>(source.filesystem ());
177- TTree* tree = fs->GetTree (source);
198+ auto fs = std::dynamic_pointer_cast<VirtualRootFileSystemBase>(source.filesystem ());
199+ // Actually get the TTree from the ROOT file.
200+ auto treeFs = std::dynamic_pointer_cast<TTreeFileSystem>(fs->GetSubFilesystem (source));
201+ if (!treeFs.get ()) {
202+ throw runtime_error_f (" Unknown filesystem %s\n " , source.filesystem ()->type_name ().c_str ());
203+ }
204+ TTree* tree = treeFs->GetTree (source);
178205
179206 auto branches = tree->GetListOfBranches ();
180207 auto n = branches->GetEntries ();
@@ -270,7 +297,9 @@ arrow::Result<arrow::RecordBatchGenerator> TTreeFileFormat::ScanBatchesAsync(
270297 auto physical_schema = *treeFragment->ReadPhysicalSchema ();
271298
272299 static TBufferFile buffer{TBuffer::EMode::kWrite , 4 * 1024 * 1024 };
273- auto fs = std::dynamic_pointer_cast<TTreeFileSystem>(treeFragment->source ().filesystem ());
300+ auto containerFS = std::dynamic_pointer_cast<VirtualRootFileSystemBase>(treeFragment->source ().filesystem ());
301+ auto fs = std::dynamic_pointer_cast<TTreeFileSystem>(containerFS->GetSubFilesystem (treeFragment->source ()));
302+
274303 int64_t rows = -1 ;
275304 TTree* tree = fs->GetTree (treeFragment->source ());
276305 for (auto & field : fields) {
@@ -446,9 +475,42 @@ arrow::Result<arrow::RecordBatchGenerator> TTreeFileFormat::ScanBatchesAsync(
446475}
447476
448477TBufferFileFS::TBufferFileFS (TBufferFile* f)
449- : TTreeFileSystem(),
450- mTree((TTree*)f->ReadObject(TTree::Class()))
478+ : VirtualRootFileSystemBase(),
479+ mBuffer(f),
480+ mFilesystem(nullptr )
481+ {
482+ }
483+
484+ TTreeFileSystem::~TTreeFileSystem () = default ;
485+
486+
487+ arrow::Result<arrow::fs::FileInfo> TBufferFileFS::GetFileInfo (const std::string& path)
488+ {
489+ arrow::fs::FileInfo result;
490+ result.set_type (arrow::fs::FileType::NotFound);
491+ result.set_path (path);
492+ arrow::dataset::FileSource source (path, shared_from_this ());
493+
494+ // Only once to avoid rereading the streamed tree.
495+ if (!mFilesystem .get ()) {
496+ return result;
497+ }
498+
499+ // For now we only support single trees.
500+ if (std::dynamic_pointer_cast<SingleTreeFileSystem>(mFilesystem )) {
501+ result.set_type (arrow::fs::FileType::File);
502+ return result;
503+ }
504+ return result;
505+ }
506+
507+ std::shared_ptr<VirtualRootFileSystemBase> TBufferFileFS::GetSubFilesystem (arrow::dataset::FileSource source)
451508{
509+ if (!mFilesystem .get ()) {
510+ auto tree = ((TTree*)mBuffer ->ReadObject (TTree::Class ()));
511+ mFilesystem = std::make_shared<SingleTreeFileSystem>(tree);
512+ }
513+ return mFilesystem ;
452514}
453515
454516} // namespace o2::framework
0 commit comments