|
36 | 36 | import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; |
37 | 37 | import org.labkey.api.singlecell.CellHashingService; |
38 | 38 | import org.labkey.api.singlecell.pipeline.AbstractSingleCellPipelineStep; |
39 | | -import org.labkey.api.singlecell.pipeline.AbstractSingleCellStep; |
40 | 39 | import org.labkey.api.singlecell.pipeline.SingleCellRawDataStep; |
41 | 40 | import org.labkey.api.singlecell.pipeline.SingleCellStep; |
42 | 41 | import org.labkey.api.util.FileUtil; |
@@ -385,87 +384,27 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c |
385 | 384 |
|
386 | 385 | List<SingleCellStep.SeuratObjectWrapper> currentFiles; |
387 | 386 | Set<File> originalInputs = inputFiles.stream().map(SequenceOutputFile::getFile).collect(Collectors.toSet()); |
388 | | - Map<File, File> localCopyToOrig = new HashMap<>(); |
| 387 | + Map<String, File> inputFileMap = new HashMap<>(); |
389 | 388 | if (_doProcessRawCounts) |
390 | 389 | { |
391 | 390 | currentFiles = processRawCounts(ctx, inputFiles, basename); |
392 | 391 | } |
393 | 392 | else |
394 | 393 | { |
395 | | - try |
396 | | - { |
397 | | - Set<String> distinctIds = new HashSet<>(); |
398 | | - Set<String> copiedFiles = new HashSet<>(); |
| 394 | + Set<String> distinctIds = new HashSet<>(); |
399 | 395 |
|
400 | | - currentFiles = new ArrayList<>(); |
401 | | - for (SequenceOutputFile so : inputFiles) |
| 396 | + currentFiles = new ArrayList<>(); |
| 397 | + for (SequenceOutputFile so : inputFiles) |
| 398 | + { |
| 399 | + String datasetId = FileUtil.makeLegalName(so.getReadset() != null ? ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() : so.getName()); |
| 400 | + if (distinctIds.contains(datasetId)) |
402 | 401 | { |
403 | | - String datasetId = FileUtil.makeLegalName(so.getReadset() != null ? ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() : so.getName()); |
404 | | - if (distinctIds.contains(datasetId)) |
405 | | - { |
406 | | - throw new PipelineJobException("Duplicate dataset Ids in input data: " + datasetId); |
407 | | - } |
408 | | - distinctIds.add(datasetId); |
409 | | - |
410 | | - //ensure local copy: |
411 | | - if (copiedFiles.contains(so.getFile().getName())) |
412 | | - { |
413 | | - throw new PipelineJobException("Duplicate files names in input data: " + so.getFile().getName()); |
414 | | - } |
415 | | - copiedFiles.add(so.getFile().getName()); |
416 | | - |
417 | | - File local = new File(ctx.getOutputDir(), so.getFile().getName()); |
418 | | - if (local.exists()) |
419 | | - { |
420 | | - local.delete(); |
421 | | - } |
422 | | - |
423 | | - FileUtils.copyFile(so.getFile(), local); |
424 | | - _resumer.getFileManager().addIntermediateFile(local); |
425 | | - |
426 | | - File cellBarcodes = CellHashingServiceImpl.get().getCellBarcodesFromSeurat(so.getFile(), false); |
427 | | - if (cellBarcodes.exists()) |
428 | | - { |
429 | | - ctx.getLogger().debug("Also making local copy of cellBarcodes TSV: " + cellBarcodes.getPath()); |
430 | | - File cellBarcodesLocal = new File(ctx.getOutputDir(), cellBarcodes.getName()); |
431 | | - if (cellBarcodesLocal.exists()) |
432 | | - { |
433 | | - cellBarcodesLocal.delete(); |
434 | | - } |
435 | | - |
436 | | - FileUtils.copyFile(cellBarcodes, cellBarcodesLocal); |
437 | | - _resumer.getFileManager().addIntermediateFile(cellBarcodesLocal); |
438 | | - } |
439 | | - else |
440 | | - { |
441 | | - ctx.getLogger().debug("cellBarcodes TSV not found, expected: " + cellBarcodes.getPath()); |
442 | | - } |
443 | | - |
444 | | - File metadataFile = CellHashingServiceImpl.get().getMetaTableFromSeurat(so.getFile(), false); |
445 | | - if (metadataFile.exists()) |
446 | | - { |
447 | | - ctx.getLogger().debug("Also making local copy of metadata TSV: " + metadataFile.getPath()); |
448 | | - File metadataFileLocal = new File(ctx.getOutputDir(), metadataFile.getName()); |
449 | | - if (metadataFileLocal.exists()) |
450 | | - { |
451 | | - metadataFileLocal.delete(); |
452 | | - } |
453 | | - |
454 | | - FileUtils.copyFile(metadataFile, metadataFileLocal); |
455 | | - _resumer.getFileManager().addIntermediateFile(metadataFileLocal); |
456 | | - } |
457 | | - else |
458 | | - { |
459 | | - ctx.getLogger().warn("metadataFile TSV not found, expected: " + metadataFile.getPath()); |
460 | | - } |
461 | | - |
462 | | - currentFiles.add(new SingleCellStep.SeuratObjectWrapper(datasetId, datasetId, local, so)); |
463 | | - localCopyToOrig.put(local, so.getFile()); |
| 402 | + throw new PipelineJobException("Duplicate dataset Ids in input data: " + datasetId); |
464 | 403 | } |
465 | | - } |
466 | | - catch (IOException e) |
467 | | - { |
468 | | - throw new PipelineJobException(e); |
| 404 | + distinctIds.add(datasetId); |
| 405 | + |
| 406 | + currentFiles.add(new SingleCellStep.SeuratObjectWrapper(datasetId, datasetId, so.getFile(), so)); |
| 407 | + inputFileMap.put(so.getName(), so.getFile()); |
469 | 408 | } |
470 | 409 | } |
471 | 410 |
|
@@ -671,14 +610,14 @@ else if (inputFiles.size() == 1) |
671 | 610 |
|
672 | 611 | //This indicates the job processed an input file, but did not create a new object (like running FindMarkers) |
673 | 612 | boolean skipOutput = false; |
674 | | - if (localCopyToOrig.containsKey(output.getFile())) |
| 613 | + if (inputFileMap.containsKey(output.getFile().getName())) |
675 | 614 | { |
676 | 615 | try |
677 | 616 | { |
678 | | - ctx.getLogger().debug("Comparing file context of output to determine if it matches input: "+ output.getFile().getName()); |
679 | | - ctx.getLogger().debug("Original file: " + localCopyToOrig.get(output.getFile())); |
| 617 | + ctx.getLogger().debug("Comparing file context of output to determine if it matches input: " + output.getFile().getName()); |
| 618 | + ctx.getLogger().debug("Original file: " + inputFileMap.get(output.getFile().getName())); |
680 | 619 | ctx.getLogger().debug("Pipeline output file: " + output.getFile()); |
681 | | - if (FileUtils.contentEquals(localCopyToOrig.get(output.getFile()), output.getFile())) |
| 620 | + if (FileUtils.contentEquals(inputFileMap.get(output.getFile().getName()), output.getFile())) |
682 | 621 | { |
683 | 622 | ctx.getLogger().info("Sequence output is the same as an input, will not re-create output for seurat object: " + output.getFile().getPath()); |
684 | 623 | skipOutput = true; |
|
0 commit comments