Skip to content

Commit 7e92b47

Browse files
committed
No need to copy files locally in AbstractSingleCellHandler
1 parent e6225b0 commit 7e92b47

File tree

1 file changed

+16
-77
lines changed

1 file changed

+16
-77
lines changed

singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java

Lines changed: 16 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
3737
import org.labkey.api.singlecell.CellHashingService;
3838
import org.labkey.api.singlecell.pipeline.AbstractSingleCellPipelineStep;
39-
import org.labkey.api.singlecell.pipeline.AbstractSingleCellStep;
4039
import org.labkey.api.singlecell.pipeline.SingleCellRawDataStep;
4140
import org.labkey.api.singlecell.pipeline.SingleCellStep;
4241
import org.labkey.api.util.FileUtil;
@@ -385,87 +384,27 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
385384

386385
List<SingleCellStep.SeuratObjectWrapper> currentFiles;
387386
Set<File> originalInputs = inputFiles.stream().map(SequenceOutputFile::getFile).collect(Collectors.toSet());
388-
Map<File, File> localCopyToOrig = new HashMap<>();
387+
Map<String, File> inputFileMap = new HashMap<>();
389388
if (_doProcessRawCounts)
390389
{
391390
currentFiles = processRawCounts(ctx, inputFiles, basename);
392391
}
393392
else
394393
{
395-
try
396-
{
397-
Set<String> distinctIds = new HashSet<>();
398-
Set<String> copiedFiles = new HashSet<>();
394+
Set<String> distinctIds = new HashSet<>();
399395

400-
currentFiles = new ArrayList<>();
401-
for (SequenceOutputFile so : inputFiles)
396+
currentFiles = new ArrayList<>();
397+
for (SequenceOutputFile so : inputFiles)
398+
{
399+
String datasetId = FileUtil.makeLegalName(so.getReadset() != null ? ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() : so.getName());
400+
if (distinctIds.contains(datasetId))
402401
{
403-
String datasetId = FileUtil.makeLegalName(so.getReadset() != null ? ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() : so.getName());
404-
if (distinctIds.contains(datasetId))
405-
{
406-
throw new PipelineJobException("Duplicate dataset Ids in input data: " + datasetId);
407-
}
408-
distinctIds.add(datasetId);
409-
410-
//ensure local copy:
411-
if (copiedFiles.contains(so.getFile().getName()))
412-
{
413-
throw new PipelineJobException("Duplicate files names in input data: " + so.getFile().getName());
414-
}
415-
copiedFiles.add(so.getFile().getName());
416-
417-
File local = new File(ctx.getOutputDir(), so.getFile().getName());
418-
if (local.exists())
419-
{
420-
local.delete();
421-
}
422-
423-
FileUtils.copyFile(so.getFile(), local);
424-
_resumer.getFileManager().addIntermediateFile(local);
425-
426-
File cellBarcodes = CellHashingServiceImpl.get().getCellBarcodesFromSeurat(so.getFile(), false);
427-
if (cellBarcodes.exists())
428-
{
429-
ctx.getLogger().debug("Also making local copy of cellBarcodes TSV: " + cellBarcodes.getPath());
430-
File cellBarcodesLocal = new File(ctx.getOutputDir(), cellBarcodes.getName());
431-
if (cellBarcodesLocal.exists())
432-
{
433-
cellBarcodesLocal.delete();
434-
}
435-
436-
FileUtils.copyFile(cellBarcodes, cellBarcodesLocal);
437-
_resumer.getFileManager().addIntermediateFile(cellBarcodesLocal);
438-
}
439-
else
440-
{
441-
ctx.getLogger().debug("cellBarcodes TSV not found, expected: " + cellBarcodes.getPath());
442-
}
443-
444-
File metadataFile = CellHashingServiceImpl.get().getMetaTableFromSeurat(so.getFile(), false);
445-
if (metadataFile.exists())
446-
{
447-
ctx.getLogger().debug("Also making local copy of metadata TSV: " + metadataFile.getPath());
448-
File metadataFileLocal = new File(ctx.getOutputDir(), metadataFile.getName());
449-
if (metadataFileLocal.exists())
450-
{
451-
metadataFileLocal.delete();
452-
}
453-
454-
FileUtils.copyFile(metadataFile, metadataFileLocal);
455-
_resumer.getFileManager().addIntermediateFile(metadataFileLocal);
456-
}
457-
else
458-
{
459-
ctx.getLogger().warn("metadataFile TSV not found, expected: " + metadataFile.getPath());
460-
}
461-
462-
currentFiles.add(new SingleCellStep.SeuratObjectWrapper(datasetId, datasetId, local, so));
463-
localCopyToOrig.put(local, so.getFile());
402+
throw new PipelineJobException("Duplicate dataset Ids in input data: " + datasetId);
464403
}
465-
}
466-
catch (IOException e)
467-
{
468-
throw new PipelineJobException(e);
404+
distinctIds.add(datasetId);
405+
406+
currentFiles.add(new SingleCellStep.SeuratObjectWrapper(datasetId, datasetId, so.getFile(), so));
407+
inputFileMap.put(so.getName(), so.getFile());
469408
}
470409
}
471410

@@ -671,14 +610,14 @@ else if (inputFiles.size() == 1)
671610

672611
//This indicates the job processed an input file, but did not create a new object (like running FindMarkers)
673612
boolean skipOutput = false;
674-
if (localCopyToOrig.containsKey(output.getFile()))
613+
if (inputFileMap.containsKey(output.getFile().getName()))
675614
{
676615
try
677616
{
678-
ctx.getLogger().debug("Comparing file context of output to determine if it matches input: "+ output.getFile().getName());
679-
ctx.getLogger().debug("Original file: " + localCopyToOrig.get(output.getFile()));
617+
ctx.getLogger().debug("Comparing file context of output to determine if it matches input: " + output.getFile().getName());
618+
ctx.getLogger().debug("Original file: " + inputFileMap.get(output.getFile().getName()));
680619
ctx.getLogger().debug("Pipeline output file: " + output.getFile());
681-
if (FileUtils.contentEquals(localCopyToOrig.get(output.getFile()), output.getFile()))
620+
if (FileUtils.contentEquals(inputFileMap.get(output.getFile().getName()), output.getFile()))
682621
{
683622
ctx.getLogger().info("Sequence output is the same as an input, will not re-create output for seurat object: " + output.getFile().getPath());
684623
skipOutput = true;

0 commit comments

Comments
 (0)