LadnerLab · SeanGolez · Feb 18, 2025 · Aug 8, 2023 · Jun 10, 2024 · Jun 12, 2024
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@
 
 *~
 \#*#
+extensions/__pycache__/code.cpython-38.pyc
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -52,13 +52,24 @@ list( APPEND PepSIRF_LINK_LIBS
 )
 
 if(OpenMP_FOUND)
-  message( "OpenMP enabled" )
-  list( APPEND PepSIRF_LINK_LIBS OpenMP::OpenMP_CXX )
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp")
-  add_definitions( -DENABLE_OPENMP )
+  message("OpenMP enabled")
+
+  if(APPLE)
+    # Get libomp filepath
+    execute_process(COMMAND brew --prefix libomp OUTPUT_VARIABLE BREW_PREFIX OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp -I${BREW_PREFIX}/include")
+    list(APPEND PepSIRF_LINK_LIBS "${BREW_PREFIX}/lib/libomp.dylib")
+  else()
+    list(APPEND PepSIRF_LINK_LIBS OpenMP::OpenMP_CXX)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp")
+  endif()
+
+  # Define OpenMP macro
+  add_definitions(-DENABLE_OPENMP)
 
 else()
-  message( "WARNING: OpenMP not found, parallelism disabled." )
+  message("WARNING: OpenMP not found, parallelism disabled.")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas -Wno-unused-value")
 endif()
 

diff --git a/docs/5-changelog.md b/docs/5-changelog.md
@@ -10,10 +10,14 @@ permalink: /changelog/
 
 ## 1.7.0 | 2024-10-3
 
-<strong>Docker: added new feature (Issue #254).</strong> Added the ability to run PepSIRF as a Docker image and added a page for instructions.
+### Bug Fixes:
 
 <strong>CMakelists: bug fix (Issue #197).</strong> Resolved CMake not locating OpenMP on MacOS. Tutorial for fix added to installation page.
 
+## New Features:
+
+<strong>Docker: added new feature (Issue #254).</strong> Added the ability to run PepSIRF as a Docker image and added a page for instructions.
+
 <strong>Subjoin: added new feature (Issue #236).</strong> Added a functionality to the "-i" option in Subjoin to accept a regex pattern instead of a filename which contains sample/peptide names. The sample/peptide names used from the score matrix file will be filtered by whether they contain the regex pattern.
 
 <strong>Demux: added new feature (Issue #234).</strong> Added "--unmapped-reads-output" option to Demux, which writes all reads that have not been mapped to a sample/peptide to the specified filename.

diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock
@@ -1,23 +1,31 @@
 GEM
   remote: https://rubygems.org/
   specs:
-    activesupport (6.0.5)
+    activesupport (7.1.3.4)
+      base64
+      bigdecimal
       concurrent-ruby (~> 1.0, >= 1.0.2)
-      i18n (>= 0.7, < 2)
-      minitest (~> 5.1)
-      tzinfo (~> 1.1)
-      zeitwerk (~> 2.2, >= 2.2.2)
+      connection_pool (>= 2.2.5)
+      drb
+      i18n (>= 1.6, < 2)
+      minitest (>= 5.1)
+      mutex_m
+      tzinfo (~> 2.0)
     addressable (2.8.0)
       public_suffix (>= 2.0.2, < 5.0)
+    base64 (0.2.0)
+    bigdecimal (3.1.8)
     coffee-script (2.4.1)
       coffee-script-source
       execjs
     coffee-script-source (1.11.1)
     colorator (1.1.0)
-    commonmarker (0.23.9)
-    concurrent-ruby (1.1.10)
+    commonmarker (0.23.10)
+    concurrent-ruby (1.3.3)
+    connection_pool (2.4.1)
     dnsruby (1.61.9)
       simpleidn (~> 0.1)
+    drb (2.2.1)
     em-websocket (0.5.3)
       eventmachine (>= 0.12.9)
       http_parser.rb (~> 0)
@@ -51,12 +59,12 @@ GEM
     ffi (1.15.5)
     forwardable-extended (2.6.0)
     gemoji (3.0.1)
-    github-pages (226)
+    github-pages (228)
       github-pages-health-check (= 1.17.9)
-      jekyll (= 3.9.2)
+      jekyll (= 3.9.3)
       jekyll-avatar (= 0.7.0)
       jekyll-coffeescript (= 1.1.1)
-      jekyll-commonmark-ghpages (= 0.2.0)
+      jekyll-commonmark-ghpages (= 0.4.0)
       jekyll-default-layout (= 0.1.4)
       jekyll-feed (= 0.15.1)
       jekyll-gist (= 1.5.0)
@@ -90,10 +98,10 @@ GEM
       jemoji (= 0.12.0)
       kramdown (= 2.3.2)
       kramdown-parser-gfm (= 1.1.0)
-      liquid (= 4.0.3)
+      liquid (= 4.0.4)
       mercenary (~> 0.3)
       minima (= 2.5.1)
-      nokogiri (>= 1.13.4, < 2.0)
+      nokogiri (>= 1.13.6, < 2.0)
       rouge (= 3.26.0)
       terminal-table (~> 1.4)
     github-pages-health-check (1.17.9)
@@ -106,13 +114,13 @@ GEM
       activesupport (>= 2)
       nokogiri (>= 1.4)
     http_parser.rb (0.8.0)
-    i18n (0.9.5)
+    i18n (1.14.5)
       concurrent-ruby (~> 1.0)
-    jekyll (3.9.2)
+    jekyll (3.9.3)
       addressable (~> 2.4)
       colorator (~> 1.0)
       em-websocket (~> 0.5)
-      i18n (~> 0.7)
+      i18n (>= 0.7, < 2)
       jekyll-sass-converter (~> 1.0)
       jekyll-watch (~> 2.0)
       kramdown (>= 1.17, < 3)
@@ -128,11 +136,11 @@ GEM
       coffee-script-source (~> 1.11.1)
     jekyll-commonmark (1.4.0)
       commonmarker (~> 0.22)
-    jekyll-commonmark-ghpages (0.2.0)
-      commonmarker (~> 0.23.4)
+    jekyll-commonmark-ghpages (0.4.0)
+      commonmarker (~> 0.23.7)
       jekyll (~> 3.9.0)
       jekyll-commonmark (~> 1.4.0)
-      rouge (>= 2.0, < 4.0)
+      rouge (>= 2.0, < 5.0)
     jekyll-default-layout (0.1.4)
       jekyll (~> 3.0)
     jekyll-feed (0.15.1)
@@ -220,7 +228,7 @@ GEM
       rexml
     kramdown-parser-gfm (1.1.0)
       kramdown (~> 2.0)
-    liquid (4.0.3)
+    liquid (4.0.4)
     listen (3.7.1)
       rb-fsevent (~> 0.10, >= 0.10.3)
       rb-inotify (~> 0.9, >= 0.9.10)
@@ -230,8 +238,9 @@ GEM
       jekyll (>= 3.5, < 5.0)
       jekyll-feed (~> 0.9)
       jekyll-seo-tag (~> 2.1)
-    minitest (5.15.0)
+    minitest (5.23.1)
     multipart-post (2.1.1)
+    mutex_m (0.2.0)
     nokogiri (1.14.3)
       mini_portile2 (~> 2.8.0)
       racc (~> 1.4)
@@ -245,7 +254,8 @@ GEM
     rb-fsevent (0.11.1)
     rb-inotify (0.10.1)
       ffi (~> 1.0)
-    rexml (3.2.5)
+    rexml (3.3.0)
+      strscan
     rouge (3.26.0)
     ruby2_keywords (0.0.5)
     rubyzip (2.3.2)
@@ -260,18 +270,17 @@ GEM
       faraday (> 0.8, < 2.0)
     simpleidn (0.2.1)
       unf (~> 0.1.4)
+    strscan (3.1.0)
     terminal-table (1.8.0)
       unicode-display_width (~> 1.1, >= 1.1.1)
-    thread_safe (0.3.6)
     typhoeus (1.4.0)
       ethon (>= 0.9.0)
-    tzinfo (1.2.9)
-      thread_safe (~> 0.1)
+    tzinfo (2.0.6)
+      concurrent-ruby (~> 1.0)
     unf (0.1.4)
       unf_ext
     unf_ext (0.0.8.1)
     unicode-display_width (1.8.0)
-    zeitwerk (2.5.4)
 
 PLATFORMS
   ruby

diff --git a/extensions/e_k_bias.py b/extensions/e_k_bias.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+import pandas as pd
+import argparse
+import fastatools as ft
+import numpy as np
+
+def main():
+
+	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+	parser.add_argument('-i', '--fasta-file',  help='Directory with enriched petide files for input', required=True)
+	parser.add_argument('-o', '--output-file', default="e_k_bias_out.tsv", help='Name of .tsv to output file with AA bias data')
+
+	args = parser.parse_args()
+
+	# get proportion of e's and k's for each peptide
+	e_k_props = get_e_k_props(args.fasta_file)
+
+	# get percentiles
+	percentile_dict = get_percentiles(e_k_props)
+
+	# create output df
+	out_data = [(name, round(prop, 3), round(percentile_dict[name], 2)) for name, prop in e_k_props.items()]
+
+	pd.DataFrame(out_data, columns=["CodeName", "e_k_Prop", "e_k_Percentile"]).to_csv(args.output_file, index=False, sep='\t')
+
+
+# get proportion of e's and k's for each peptide
+def get_e_k_props(fasta_file)->dict:
+	e_k_props = dict()
+
+	# get props for peptide file
+	fasta_dict = ft.read_fasta_dict(fasta_file)
+
+	# iterate through each sequence
+	for name, seq in fasta_dict.items():
+		e_k_count = 0
+
+		# loop through each AA, get count of e and k
+		for aa in seq:
+			if aa.lower() == 'e' or aa.lower() =='k':
+				e_k_count += 1
+
+		# add proportion to dict
+		e_k_props[name] = (e_k_count) / len(seq)
+
+	return e_k_props
+
+# get percentile of each peptide using its e and k proportion
+def get_percentiles(e_k_props)->dict:
+	# Calculate percentile of each peptide
+	names = list(e_k_props.keys())
+	all_props = np.array(list(e_k_props.values()))
+
+	# Get unique values and array for mapping each original value to its corresponding index in the unique array
+	unique_props, inverse_indices = np.unique(all_props, return_inverse=True)
+
+	# Calculate percentiles based on the unique props
+	percentile_ranks = np.linspace(0, 100, len(unique_props))
+
+	# Map sorted values back to the original corresponding name
+	percentile_dict = {names[i]: percentile_ranks[inverse_indices[i]] for i in range(len(names))}
+
+	return percentile_dict
+
+
+if __name__ == "__main__":
+	main()
diff --git a/extensions/findEpitopes.py b/extensions/findEpitopes.py
@@ -0,0 +1,105 @@
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+import argparse
+
+def main():
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument('-i', '--input-dir',  help='Directory with alignment output files and files that contain the mapped location of peptides', required=True)
+    parser.add_argument('-o', '--output-dir', default="clust_align_visualizations", help='Name of directory to output line plots.')
+
+    args = parser.parse_args()
+
+    directory_path = args.input_dir
+    alignment_to_use_dict = read_check_align_file(directory_path)
+    #print(probes_dict)
+    alignCountsD = process_probes(alignment_to_use_dict, directory_path)
+    #print(alignCountsD)
+
+    if not os.path.exists(args.output_dir):
+        os.mkdir(args.output_dir)
+    create_line_chart(alignCountsD, args.output_dir)
+
+
+def create_line_chart(alignCountsD, out_dir):
+    for file, pos_dict in alignCountsD.items():
+        x = list(pos_dict.keys())
+        y = list(pos_dict.values())
+        fig, ax = plt.subplots(figsize=(max(x)/10, 10), facecolor='w')
+        ax.plot(x, y, linestyle='-')
+        ax.set_xticks(np.arange(min(x), max(x)+5, 5))
+        ax.set_xlim(left=min(x))
+        ax.set_ylim(bottom=min(y))
+        plt.grid()
+        plt.xlabel("Sequence Position")
+        plt.ylabel("Count")
+        plt.title(file) 
+        plt.savefig(os.path.join(out_dir, f"{file.split('_')[-2]}_epitopes_lineplot.png"), dpi=300, bbox_inches='tight')
+
+
+def find_smallest_value_with_substring(data_dict, substring):
+    # Filter the dictionary to only include items with the specified substring in the key
+    filtered_dict = {k: v for k, v in data_dict.items() if substring in k}
+
+    # If there are no matches, return None
+    if not filtered_dict:
+        return None
+
+    # Find the key-value pair with the smallest value
+    smallest_pair = min(filtered_dict.items(), key=lambda item: item[1])
+
+    return smallest_pair
+
+
+def read_check_align_file(directory):
+    data_dict = {}
+    clusters = set()
+
+    # Construct the full file path
+    filepath = os.path.join(directory, 'checkAlignLength.out')
+    # Read the file content
+    with open(filepath, 'r') as file:
+        alignedCluster = None
+        for line in file:
+            if "mafft" in line:
+                alignedCluster = line.strip()
+                clusters.add(line.split('_')[-2])
+            elif "Alignment:" in line and alignedCluster:
+                alignLength = line.replace('Alignment:','').strip()
+                #print(alignedCluster,alignLength)
+                data_dict[alignedCluster] = alignLength
+    # Find alignment with shortest length for each cluster
+    results = {}
+    for cluster in clusters:
+        result = find_smallest_value_with_substring(data_dict, cluster)
+        results[result[0]] = result[1]
+
+    return results
+
+
+def process_probes(probes_dict, directory_path):
+    result = {}
+
+    for filename, data in probes_dict.items():
+        aligned_probes_file = filename.replace('.fasta', '_probesAligned.txt')
+        aligned_probes_path = os.path.join(directory_path, aligned_probes_file)
+
+        aligned_length = int(data)
+        #print(range(0, aligned_length))
+
+        alignD = {key: 0 for key in range(aligned_length + 1)}
+
+        with open(aligned_probes_path, 'r') as file:
+            for line_count, line in enumerate(file):
+                if line_count > 0:
+                    seq_positions = line.split('\t')[-1].split('~')
+                    for pos in seq_positions:
+                        alignD[int(pos)] += 1
+
+        result[filename] = alignD
+
+    return result
+
+if __name__ == "__main__":
+    main()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,3 +8,4 @@

		*~
		\#*#
		extensions/__pycache__/code.cpython-38.pyc