-
Notifications
You must be signed in to change notification settings - Fork 39
Add similarity scores #720
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7816271
6384942
fe45081
3074c53
e1af7ad
8eb0500
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -64,6 +64,11 @@ public static List<SpectrumPeak> GetNormalizedPeaks(List<SpectrumPeak> spectrum, | |
| return spectrum.Select(n => new SpectrumPeak { Mass = n.Mass, Intensity = Math.Pow(n.Intensity, powFactor) / maxIntensity * maxValue }).ToList(); | ||
| } | ||
|
|
||
| public static List<SpectrumPeak> GetNormalizedByTotalIntensityPeaks(List<SpectrumPeak> spectrum) { | ||
| var sumIntensity = spectrum.Sum(n => n.Intensity); | ||
| return spectrum.Select(n => new SpectrumPeak { Mass = n.Mass, Intensity = n.Intensity / sumIntensity }).ToList(); | ||
| } | ||
|
Comment on lines
+67
to
+70
|
||
|
|
||
| public static List<SpectrumPeak> GetBinnedSpectrum(List<SpectrumPeak> spectrum, double delta = 100, int maxPeaks = 12) { | ||
|
|
||
| var peaks = new List<SpectrumPeak>(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -792,11 +792,13 @@ | |||||||||||||||||||||||||||
| return new double[2] { (double)counter / (double)libCounter, counter }; | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| public static double GetSpetralEntropySimilarity(List<SpectrumPeak> peaks1, List<SpectrumPeak> peaks2, double bin) { | ||||||||||||||||||||||||||||
| var combinedSpectrum = SpectrumHandler.GetCombinedSpectrum(peaks1, peaks2, bin); | ||||||||||||||||||||||||||||
| public static double GetSpectralEntropySimilarity(List<SpectrumPeak> peaks1, List<SpectrumPeak> peaks2, double bin) { | ||||||||||||||||||||||||||||
| if (!IsComparedAvailable(peaks1, peaks2)) return -1d; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| var combinedSpectrum = SpectrumHandler.GetCombinedSpectrum(SpectrumHandler.GetNormalizedByTotalIntensityPeaks(peaks1), SpectrumHandler.GetNormalizedByTotalIntensityPeaks(peaks2), bin); | ||||||||||||||||||||||||||||
| var entropy12 = GetSpectralEntropy(combinedSpectrum); | ||||||||||||||||||||||||||||
| var entropy1 = GetSpectralEntropy(peaks1); | ||||||||||||||||||||||||||||
| var entropy2 = GetSpectralEntropy(peaks2); | ||||||||||||||||||||||||||||
| var entropy1 = GetSpectralEntropy(SpectrumHandler.GetBinnedSpectrum(peaks1, bin)); | ||||||||||||||||||||||||||||
| var entropy2 = GetSpectralEntropy(SpectrumHandler.GetBinnedSpectrum(peaks2, bin)); | ||||||||||||||||||||||||||||
|
Comment on lines
+800
to
+801
|
||||||||||||||||||||||||||||
| var entropy1 = GetSpectralEntropy(SpectrumHandler.GetBinnedSpectrum(peaks1, bin)); | |
| var entropy2 = GetSpectralEntropy(SpectrumHandler.GetBinnedSpectrum(peaks2, bin)); | |
| var entropy1 = GetSpectralEntropy(SpectrumHandler.GetBinnedSpectrum(peaks1, bin: bin)); | |
| var entropy2 = GetSpectralEntropy(SpectrumHandler.GetBinnedSpectrum(peaks2, bin: bin)); |
Copilot
AI
Mar 23, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GetSpectralEntropySimilarity() normalizes peaks via GetNormalizedByTotalIntensityPeaks(). If total intensity is 0, that normalization produces non-finite intensities and this method can return NaN. After adding a guard in the normalization helper, consider also handling degenerate/non-finite entropy results here (e.g., return -1/0).
Copilot
AI
Mar 23, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In GetEnhancedDotProduct(), penalty currently has no effect: when measuredMassBuffer[i].Intensity == 0d, the code adds measuredMassBuffer[i].Intensity * (1 - penalty) * ..., which is always 0. This contradicts the XML docs stating unmatched query peaks are weighted by penalty. Update the formula so penalty actually changes the score (or remove the parameter if it’s not intended).
| scalarM += measuredMassBuffer[i].Intensity * (1 - penalty) * measuredMassBuffer[i].FocusedMz; | |
| // Penalize unmatched query peaks by assigning them a reduced contribution | |
| // based on the corresponding reference peak intensity. | |
| scalarM += (1 - penalty) * referenceMassBuffer[i].Intensity * referenceMassBuffer[i].FocusedMz; |
Copilot
AI
Mar 23, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The penalty parameter currently has no effect in the measuredMassBuffer[i].Intensity == 0d branch because you're multiplying by measuredMassBuffer[i].Intensity (which is 0), so the penalized term is always 0. As written, the score is effectively identical to the reverse dot product for the bins you include. Revisit this branch so the penalty actually influences scalarM/the final score (e.g., apply the penalty to a non-zero term, or incorporate bins where the reference intensity is below cutoff if you intend to penalize unmatched query peaks).
| scalarM += measuredMassBuffer[i].Intensity * (1 - penalty) * measuredMassBuffer[i].FocusedMz; | |
| } | |
| else { | |
| scalarM += measuredMassBuffer[i].Intensity * measuredMassBuffer[i].FocusedMz; | |
| } | |
| scalarR += referenceMassBuffer[i].Intensity * referenceMassBuffer[i].FocusedMz; | |
| // Penalize unmatched reference peaks by reducing their contribution to scalarR. | |
| scalarR += referenceMassBuffer[i].Intensity * (1 - penalty) * referenceMassBuffer[i].FocusedMz; | |
| } | |
| else { | |
| scalarM += measuredMassBuffer[i].Intensity * measuredMassBuffer[i].FocusedMz; | |
| scalarR += referenceMassBuffer[i].Intensity * referenceMassBuffer[i].FocusedMz; | |
| } |
Check warning on line 4392 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / test
The variable 'sumReference' is assigned but its value is never used
Check warning on line 4392 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / test
The variable 'sumMeasure' is assigned but its value is never used
Check warning on line 4392 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / publish-msdial5-console (ubuntu-latest, net8, linux-x64)
The variable 'sumReference' is assigned but its value is never used
Check warning on line 4392 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / publish-msdial5-console (ubuntu-latest, net8, linux-x64)
The variable 'sumMeasure' is assigned but its value is never used
Check warning on line 4392 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / publish-msdial5-console (windows-latest, net48, win-x64)
The variable 'sumReference' is assigned but its value is never used
Check warning on line 4392 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / publish-msdial5-console (windows-latest, net48, win-x64)
The variable 'sumMeasure' is assigned but its value is never used
Check warning on line 4392 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / publish-msdial5
The variable 'sumReference' is assigned but its value is never used
Check warning on line 4491 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / publish-msdial5-console (ubuntu-latest, net8, linux-x64)
The variable 'khi' is assigned but its value is never used
Check warning on line 4491 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / publish-msdial5-console (ubuntu-latest, net8, linux-x64)
The variable 'klo' is assigned but its value is never used
Check warning on line 4491 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / publish-msdial5-console (windows-latest, net48, win-x64)
The variable 'khi' is assigned but its value is never used
Check warning on line 4491 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / publish-msdial5-console (windows-latest, net48, win-x64)
The variable 'klo' is assigned but its value is never used
Check warning on line 4491 in src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs
GitHub Actions / publish-msdial5
The variable 'khi' is assigned but its value is never used
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -40,6 +40,8 @@ public MsScanMatchResult CalculateScore(IMSIonProperty property, IMSScanProperty | |||||
| var sqweightedDotProduct = MsScanMatching.GetWeightedDotProduct(scan, reference, parameter.Ms2Tolerance, parameter.MassRangeBegin, parameter.MassRangeEnd); | ||||||
| var sqsimpleDotProduct = MsScanMatching.GetSimpleDotProduct(scan, reference, parameter.Ms2Tolerance, parameter.MassRangeBegin, parameter.MassRangeEnd); | ||||||
| var sqreverseDotProduct = MsScanMatching.GetReverseDotProduct(scan, reference, parameter.Ms2Tolerance, parameter.MassRangeBegin, parameter.MassRangeEnd); | ||||||
| var sqenhancedDotProduct = MsScanMatching.GetEnhancedDotProduct(scan, reference, parameter.Ms2Tolerance, parameter.MassRangeBegin, parameter.MassRangeEnd, .6d); | ||||||
| var spectrumEntropy = MsScanMatching.GetSpectralEntropySimilarity(scan.Spectrum, reference.Spectrum, parameter.Ms2Tolerance); | ||||||
|
Comment on lines
+43
to
+44
|
||||||
| var spectrumPenalty = reference.Spectrum != null && reference.Spectrum.Count == 1 ? true : false; | ||||||
| double[] matchedPeaksScores = null; | ||||||
| if (omics == TargetOmics.Lipidomics) { | ||||||
|
|
@@ -93,6 +95,8 @@ public MsScanMatchResult CalculateScore(IMSIonProperty property, IMSScanProperty | |||||
| MatchedPeaksCount = (float)matchedPeaksScores[1], | ||||||
| AcurateMassSimilarity = (float)ms1Similarity, | ||||||
| IsotopeSimilarity = (float)isotopeSimilarity, | ||||||
| EnhancedDotProduct = (float)Math.Sqrt(sqenhancedDotProduct), | ||||||
|
||||||
| EnhancedDotProduct = (float)Math.Sqrt(sqenhancedDotProduct), | |
| EnhancedDotProduct = sqenhancedDotProduct < 0 ? -1f : (float)Math.Sqrt(sqenhancedDotProduct), |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,8 @@ public class BaseAnalysisMetadataAccessorTests | |
| "Simple dot product", | ||
| "Weighted dot product", | ||
| "Reverse dot product", | ||
| "Enhanced dot product", | ||
| "Spectrum entropy", | ||
| "Matched peaks count", | ||
| "Matched peaks percentage", | ||
| "Total score", | ||
|
Comment on lines
39
to
46
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GetNormalizedByTotalIntensityPeaks()divides bysumIntensitywithout guarding againstsumIntensity == 0. If a spectrum can contain only zero-intensity peaks, this will produceNaN/Infinityintensities and break downstream similarity calculations. Add a fast-path forspectrum.Count == 0andsumIntensity <= 0(e.g., return an empty list or the original list) to keep the normalization numerically safe.