|
27 | 27 | import org.labkey.api.pipeline.RecordedAction; |
28 | 28 | import org.labkey.api.pipeline.RecordedActionSet; |
29 | 29 | import org.labkey.api.pipeline.WorkDirectoryTask; |
| 30 | +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; |
30 | 31 | import org.labkey.api.sequenceanalysis.pipeline.TaskFileManager; |
31 | 32 | import org.labkey.api.util.Compress; |
32 | 33 | import org.labkey.api.util.FileType; |
|
45 | 46 | import java.util.Arrays; |
46 | 47 | import java.util.Collection; |
47 | 48 | import java.util.Collections; |
| 49 | +import java.util.HashMap; |
48 | 50 | import java.util.HashSet; |
49 | 51 | import java.util.List; |
| 52 | +import java.util.Map; |
50 | 53 | import java.util.Set; |
51 | 54 |
|
52 | 55 | /** |
@@ -163,6 +166,8 @@ public RecordedActionSet run() throws PipelineJobException |
163 | 166 | List<FileGroup> fileGroups = getHelper().getSettings().getFileGroups(getPipelineJob()); |
164 | 167 | List<SequenceReadsetImpl> readsets = getHelper().getSettings().getReadsets(getPipelineJob()); |
165 | 168 |
|
| 169 | + checkForDuplicateFileNames(readsets, fileGroups); |
| 170 | + |
166 | 171 | if (!SequenceNormalizationTask.shouldRunRemote(getJob())) |
167 | 172 | { |
168 | 173 | getJob().getLogger().info("No files required external normalization, processing inputs locally"); |
@@ -486,5 +491,61 @@ private static void moveInputToAnalysisDir(File input, SequenceJob job, Collecti |
486 | 491 | throw new PipelineJobException(e); |
487 | 492 | } |
488 | 493 | } |
| 494 | + |
| 495 | + private void checkForDuplicateFileNames(List<SequenceReadsetImpl> readsets, List<FileGroup> fileGroups) throws PipelineJobException |
| 496 | + { |
| 497 | + // check for duplicate filename between incoming and existing |
| 498 | + for (SequenceReadsetImpl r : readsets) |
| 499 | + { |
| 500 | + boolean readsetExists = r.getReadsetId() != null && r.getReadsetId() > 0; |
| 501 | + SequenceReadsetImpl existingReadset = readsetExists ? ((SequenceReadsetImpl) SequenceAnalysisService.get().getReadset(r.getReadsetId(), getJob().getUser())) : null; |
| 502 | + List<ReadDataImpl> preexistingReadData = readsetExists ? existingReadset.getReadDataImpl() : Collections.emptyList(); |
| 503 | + if (!preexistingReadData.isEmpty()) |
| 504 | + { |
| 505 | + Map<String, File> existingFileNames = new HashMap<>(); |
| 506 | + preexistingReadData.forEach(rd -> { |
| 507 | + existingFileNames.put(rd.getFile1().getName(), rd.getFile1()); |
| 508 | + if (rd.getFile2() != null) |
| 509 | + { |
| 510 | + existingFileNames.put(rd.getFile2().getName(), rd.getFile2()); |
| 511 | + } |
| 512 | + }); |
| 513 | + |
| 514 | + Map<String, File> sharedFns = new HashMap<>(); |
| 515 | + for (FileGroup fg : fileGroups) |
| 516 | + { |
| 517 | + if (r.getFileSetName() != null && r.getFileSetName().equals(fg.name)) |
| 518 | + { |
| 519 | + for (FileGroup.FilePair fp : fg.filePairs) |
| 520 | + { |
| 521 | + if (existingFileNames.containsKey(fp.file1.getName())) |
| 522 | + { |
| 523 | + sharedFns.put(fp.file1.getName(), fp.file1); |
| 524 | + } |
| 525 | + |
| 526 | + if (fp.file2 != null && existingFileNames.containsKey(fp.file2.getName())) |
| 527 | + { |
| 528 | + sharedFns.put(fp.file2.getName(), fp.file2); |
| 529 | + } |
| 530 | + } |
| 531 | + } |
| 532 | + } |
| 533 | + |
| 534 | + if (!sharedFns.isEmpty()) |
| 535 | + { |
| 536 | + getJob().getLogger().debug("Duplicate file names found between incoming and existing for: " + r.getName()); |
| 537 | + for (String newFile : sharedFns.keySet()) |
| 538 | + { |
| 539 | + long diff = Math.abs(sharedFns.get(newFile).length() - existingFileNames.get(newFile).length()); |
| 540 | + getJob().getLogger().debug("File name: " + newFile + ", with size difference: " + diff); |
| 541 | + if (diff < 100) |
| 542 | + { |
| 543 | + throw new PipelineJobException("Identical filenames with nearly identical size detected between existing and new files for readset: " + r.getName()); |
| 544 | + } |
| 545 | + } |
| 546 | + } |
| 547 | + } |
| 548 | + } |
| 549 | + } |
489 | 550 | } |
490 | 551 |
|
0 commit comments