Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ public int hashCode()
public JSONObject toJSON(Container container)
{
JSONObject jsonObject = super.toJSON(container);
if (isIdFile() && LibSourceFile.DIANN_REPORT_TSV_PLACEHOLDER.equals(getName()) && !found())
if (isIdFile() && LibSourceFile.DIANN_REPORT_PLACEHOLDER.equals(getName()) && !found())
{
jsonObject.put("statusDetails", "The DIA-NN TSV report must be in the same directory as the " +
jsonObject.put("statusDetails", "The DIA-NN report file (.parquet or .tsv) must be in the same directory as the " +
".speclib, and share some leading characters in the file name");
}
return jsonObject;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@

public class SpecLibValidator extends SpecLibValidation<ValidatorSkylineDocSpecLib>
{
private static final List<String> RAW_FILE_TYPES = List.of("raw", "wiff", "lcd", "d", "mzxml", "mzml");
private static final String TSV = "tsv";
private static final String PARQUET = "parquet";

private List<ValidatorSkylineDocSpecLib> _docsWithLibrary;
private SpecLibKeyWithSize _key;
private SpecLibInfo _specLibInfo;
Expand Down Expand Up @@ -188,20 +192,22 @@ private static List<LibSourceFile> getLibSources(SpecLibReader libReader, ISpect
}
else if (sourceFiles.stream().anyMatch(LibSourceFile::isDiannSearch))
{
// Building a library with DIA-NN results in Skyline requires a .speclib file and a report TSV file.
// The .blib file includes the name of .speclib but not the name of the report TSV file.
// Building a library without the TSV gives this error message in Skyline:
// "...the TSV report is required to read speclib files and must be in the same directory as the speclib
// and share some leading characters (e.g. somedata-tsv.speclib and somedata-report.tsv)..."
// Building a library with DIA-NN results in Skyline requires a .speclib file and a report file (.parquet or .tsv).
// The .blib file includes the name of .speclib but not the name of the report file.
// Building a library without the report file gives this error message in Skyline:
// "...the Parquet or TSV report is required to read speclib files and must be in the same directory as the speclib
// and share some leading characters (e.g. somedata-tsv.speclib and somedata-report.parquet)..."

// At some point Skyline may start including the names of all source files in the .blib SQLite file,
// so first check if any TSV files were listed as sources in the .blib
boolean hasTsvFiles = sourceFiles.stream()
.anyMatch(file -> file.hasIdFile() && file.getIdFile().toLowerCase().endsWith(".tsv"));
if (!hasTsvFiles)
// so first check if any Parquet or TSV files were listed as sources in the .blib
boolean hasReportFiles = sourceFiles.stream()
.anyMatch(file -> file.hasIdFile()
&& (TSV.equals(FileUtil.getExtension(file.getIdFile().toLowerCase()))
|| PARQUET.equals(FileUtil.getExtension(file.getIdFile().toLowerCase()))));
if (!hasReportFiles)
{
// If there is no TSV source listed in the .blib, then add a placeholder for the DIA-NN report file.
sourceFiles.add(new LibSourceFile(null, LibSourceFile.DIANN_REPORT_TSV_PLACEHOLDER, null));
// If there is no Parquet or TSV source listed in the .blib, then add a placeholder for the DIA-NN report file.
sourceFiles.add(new LibSourceFile(null, LibSourceFile.DIANN_REPORT_PLACEHOLDER, null));
}
}

Expand Down Expand Up @@ -254,7 +260,10 @@ private void validateLibrarySources(List<LibSourceFile> sources, FileContentServ
if (source.hasSpectrumSourceFile() && !checkedFiles.contains(ssf))
{
checkedFiles.add(ssf);
Path path = getPath(ssf, rawFilesDirPaths, source.isMaxQuantSearch(), fcs);
// Libraries built with MaxQuant or DIA-NN v2.0 results may only have the base raw file names (without extension)
// stored in the BLIB. If the library source is either MaxQuant or DIA-NN we will compare with base file names of valid raw files.
boolean allowBaseName = source.isMaxQuantSearch() || source.isDiannSearch();
Path path = getPath(ssf, rawFilesDirPaths, allowBaseName);
SpecLibSourceFile sourceFile = new SpecLibSourceFile(ssf, SPECTRUM);
sourceFile.setSpecLibValidationId(getId());
sourceFile.setPath(path != null ? path.toString() : DataFile.NOT_FOUND);
Expand All @@ -263,24 +272,24 @@ private void validateLibrarySources(List<LibSourceFile> sources, FileContentServ
String idFile = source.getIdFile();
if (source.hasIdFile() && !checkedFiles.contains(idFile))
{
if (LibSourceFile.DIANN_REPORT_TSV_PLACEHOLDER.equals(idFile)) continue; // We will look for this when we come to the .speclib file
if (LibSourceFile.DIANN_REPORT_PLACEHOLDER.equals(idFile)) continue; // We will look for this when we come to the .speclib file

checkedFiles.add(idFile);
Path path = getPath(idFile, rawFilesDirPaths, false, fcs);
Path path = getPath(idFile, rawFilesDirPaths, false);
SpecLibSourceFile sourceFile = new SpecLibSourceFile(idFile, PEPTIDE_ID);
sourceFile.setSpecLibValidationId(getId());
sourceFile.setPath(path != null ? path.toString() : DataFile.NOT_FOUND);
idFiles.add(sourceFile);

if (source.isDiannSearch())
{
// If this is a DIA-NN .speclib file, check for the required report TSV file.
// We are doing this because the .blib does not include the name of the report TSV file.
// We only know that: "the TSV report is required to read speclib files and must be in the
// If this is a DIA-NN .speclib file, check for the required report file (Parquet or TSV).
// We are doing this because the .blib does not include the name of the report file.
// We only know that: "the Parquet or TSV report is required to read speclib files and must be in the
// same directory as the speclib and share some leading characters
// (e.g. somedata-tsv.speclib and somedata-report.tsv)"
// (e.g. somedata-tsv.speclib and somedata-report.parquet)"
Path reportFilePath = sourceFile.found() ? getDiannReportFilePath(path) : null;
SpecLibSourceFile diannReportSourceFile = new SpecLibSourceFile(LibSourceFile.DIANN_REPORT_TSV_PLACEHOLDER, PEPTIDE_ID);
SpecLibSourceFile diannReportSourceFile = new SpecLibSourceFile(LibSourceFile.DIANN_REPORT_PLACEHOLDER, PEPTIDE_ID);
diannReportSourceFile.setSpecLibValidationId(getId());
diannReportSourceFile.setPath(reportFilePath != null ? reportFilePath.toString() : DataFile.NOT_FOUND);
idFiles.add(diannReportSourceFile);
Expand All @@ -292,11 +301,11 @@ private void validateLibrarySources(List<LibSourceFile> sources, FileContentServ
setIdFiles(idFiles);
}

private Path getPath(String name, Set<Path> rawFilesDirPaths, boolean isMaxquant, FileContentService fcs)
private Path getPath(String name, Set<Path> rawFilesDirPaths, boolean allowBaseName)
{
for (Path rawFilesDir: rawFilesDirPaths)
{
Path path = findInDirectoryTree(rawFilesDir, name, isMaxquant);
Path path = findInDirectoryTree(rawFilesDir, name, allowBaseName);
if (path != null)
{
return path;
Expand All @@ -321,7 +330,19 @@ private static Path getDiannReportFilePath(Path speclibFilePath)

private static Path getDiannReportFilePath(String specLibFileName, List<Path> candidateFiles)
{
Map<Path, Integer> prefixLengthMap = getCommonPrefixLengthsForTsvFiles(candidateFiles, specLibFileName);
// First look for a matching Parquet file
Map<Path, Integer> prefixLengthMap = getCommonPrefixLengthsForParquetFiles(candidateFiles, specLibFileName);
// Find the Parquet file with the longest common prefix
Path parquetFile = prefixLengthMap.entrySet().stream()
.sorted((entry1, entry2) -> Integer.compare(entry2.getValue(), entry1.getValue())) // Sort descending by matching prefix length
.map(Map.Entry::getKey) // File paths
.findFirst() // Get the first file that meets the conditions
.orElse(null);
if (parquetFile != null) return parquetFile;


// Look for a matching TSV file if we did not find a Parquet file
prefixLengthMap = getCommonPrefixLengthsForTsvFiles(candidateFiles, specLibFileName);

// Find the TSV file with the longest common prefix that also has the expected column headers in the first line
return prefixLengthMap.entrySet().stream()
Expand All @@ -332,12 +353,12 @@ private static Path getDiannReportFilePath(String specLibFileName, List<Path> ca
.orElse(null);
}

private static Map<Path, Integer> getCommonPrefixLengthsForTsvFiles(List<Path> files, String specLibFileName)
private static Map<Path, Integer> getCommonPrefixLengths(List<Path> files, String specLibFileName, String fileExtension)
{
String specLibFileBaseName = FileUtil.getBaseName(specLibFileName); // Remove file extension
Map<Path, Integer> prefixLengthMap = new HashMap<>();
files.stream()
.filter(file -> file.getFileName().toString().toLowerCase().endsWith(".tsv")) // Ensure it's a TSV file
.filter(file -> fileExtension.equals(FileUtil.getExtension(file.getFileName().toString().toLowerCase())))
.forEach(file -> {
// Get the longest common prefix length
int commonPrefixLength = commonPrefixLength(specLibFileBaseName, FileUtil.getBaseName(file.getFileName().toString()));
Expand All @@ -350,6 +371,16 @@ private static Map<Path, Integer> getCommonPrefixLengthsForTsvFiles(List<Path> f
return prefixLengthMap;
}

private static Map<Path, Integer> getCommonPrefixLengthsForTsvFiles(List<Path> files, String specLibFileName)
{
return getCommonPrefixLengths(files, specLibFileName, TSV);
}

private static Map<Path, Integer> getCommonPrefixLengthsForParquetFiles(List<Path> files, String specLibFileName)
{
return getCommonPrefixLengths(files, specLibFileName, PARQUET);
}

private static int commonPrefixLength(String s1, String s2)
{
int maxLength = Math.min(s1.length(), s2.length());
Expand Down Expand Up @@ -418,7 +449,7 @@ private Path findInDirectoryTree(java.nio.file.Path rawFilesDirPath, String file
return filePath;
}

// Look for zip files
// Look for zip files, of raw files with matching base names if we are allowing basename matching.
try (Stream<Path> list = Files.list(rawFilesDirPath).filter(p -> FileUtil.getFileName(p).startsWith(fileName)))
{
for (Path path : list.collect(Collectors.toList()))
Expand All @@ -438,14 +469,27 @@ private static boolean accept(String fileName, String uploadedFileName)
return accept(fileName, uploadedFileName, false);
}

private static boolean accept(String fileName, String uploadedFileName, boolean allowBasenameOnly)
private static boolean accept(String fileName, String uploadedFileName, boolean allowBaseName)
{
// Accept QC_10.9.17.raw OR for QC_10.9.17.raw.zip
// 170428_DBS_cal_7a.d OR 170428_DBS_cal_7a.d.zip
String ext = FileUtil.getExtension(uploadedFileName).toLowerCase();
// If allowBaseName is set to true, accept
// B_240207_IO5x75_HeLa_400ng.raw (or another valid raw file extension) for B_240207_IO5x75_HeLa_400ng
String ext = FileUtil.getExtension(uploadedFileName);
ext = ext != null ? ext.toLowerCase() : "";
return fileName.equals(uploadedFileName)
|| ext.equals("zip") && fileName.equals(FileUtil.getBaseName(uploadedFileName))
|| (allowBasenameOnly && fileName.equals(FileUtil.getBaseName(uploadedFileName)));
|| (allowBaseName && fileName.equals(getUploadedRawFileBaseName(uploadedFileName)));
}

private static String getUploadedRawFileBaseName(String uploadedFileName)
{
String ext = FileUtil.getExtension(uploadedFileName.toLowerCase());
if (!RAW_FILE_TYPES.stream().anyMatch(type -> type.equals(ext)))
{
return null;
}
return FileUtil.getBaseName(uploadedFileName);
}

public static class SpecLibKeyWithSize
Expand Down Expand Up @@ -567,6 +611,10 @@ public void testAccept()
// Accept 170428_DBS_cal_7a.d OR 170428_DBS_cal_7a.d.zip
assertTrue(accept("170428_DBS_cal_7a.d", "170428_DBS_cal_7a.d"));
assertTrue(accept("170428_DBS_cal_7a.d", "170428_DBS_cal_7a.d.zip"));

assertFalse(accept("B_240207_IO5x75_HeLa_400ng", "B_240207_IO5x75_HeLa_400ng.raw"));
assertTrue(accept("B_240207_IO5x75_HeLa_400ng", "B_240207_IO5x75_HeLa_400ng.raw", true));
assertFalse(accept("B_240207_IO5x75_HeLa_400ng", "B_240207_IO5x75_HeLa_400ng.txt", true));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public boolean isMaxQuantSearch()
return (hasIdFile() && getIdFile().endsWith("msms.txt")) || containsScoreType("MAXQUANT SCORE");
}

public static String DIANN_REPORT_TSV_PLACEHOLDER = "DIA-NN report file";
public static String DIANN_REPORT_PLACEHOLDER = "DIA-NN report file";

// These are some of the column headers that we expect to see in a DIA-NN report TSV file
public static List<String> DIANN_REPORT_EXPECTED_HEADERS = List.of("File.Name", "Run", "Protein.Group", "Protein.Ids", "Protein.Names");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Dummy file for testing.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Dummy file for testing.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
Files downloaded from https://panoramaweb.org/QuickProt_datasets.url (Ranish lab)
- Files used to build the library test_diann_library.blib were downloaded from https://panoramaweb.org/QuickProt_datasets.url (Ranish lab).
- test_diann_V2_library.blib was built with test Skyline files downloaded from https://github.com/ProteoWizard/pwiz/tree/master/pwiz_tools/BiblioSpec/tests/inputs/diann2-synchro-pasef.
Raw files used for testing validation of test_diann_V2_library.blib are dummy files.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,23 @@ public void verifyLibrarySourceFileStatusDetails(String file, String libraryFile
);
}

// Verifies the path at which the file corresponding to a Peptide Id library source was found.
public void verifyPeptideIdFilePath(String peptideIdFile, String pathFound, String libraryFileName, String librarySize)
{
var panel = elementCache().specLibsPanel;
scrollIntoView(panel);
expandLibraryRow(panel, libraryFileName, librarySize);

var filesTable = panel.findElement(getFilesTableLocator(libraryFileName, "lib-id-files-status"));
filesTable.findElement(
Locator.XPathLocator.tag("tbody").child("tr")
.child(Locator.tag("td").withText(peptideIdFile))
.followingSibling("td")
.child(Locator.tag("span").withClass("pxv-valid").withText("FOUND"))
.parent().followingSibling("td").withText(pathFound)
);
}

public void verifySpectralLibraryStatus(String libraryFile, String fileSize, String statusText,
List<String> skylineDocNames,
List<String> spectrumFiles, List<String> spectrumFilesMissing,
Expand Down
Loading