Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a295d14
Bump commonmarker from 0.23.9 to 0.23.10 in /docs
dependabot[bot] Aug 8, 2023
6511c3d
Create link_patternedKmers.py
jtladner Jun 10, 2024
d71913f
Merge pull request #211 from LadnerLab/dependabot/bundler/docs/common…
jtladner Jun 12, 2024
7426eba
Bump rexml from 3.2.5 to 3.3.0 in /docs
dependabot[bot] Jun 12, 2024
3ee4186
Bump activesupport from 6.0.5 to 7.1.3.4 in /docs
dependabot[bot] Jun 12, 2024
c242403
Merge pull request #242 from LadnerLab/dependabot/bundler/docs/active…
jtladner Jun 12, 2024
ff339f3
Merge pull request #240 from LadnerLab/dependabot/bundler/docs/rexml-…
jtladner Jun 12, 2024
a5e2f9d
Merge branch 'master' of https://github.com/LadnerLab/PepSIRF
jtladner Jun 18, 2024
20434a8
Create pepCountPoisson.py
jtladner Jul 9, 2024
91f4b66
Update .gitignore
jtladner Jul 10, 2024
d606cf1
Create findEpitopes.py
SeanGolez Jul 10, 2024
d0c4e4e
Created script for finding peptides that have a disproportionately hi…
SeanGolez Oct 7, 2024
7c3a1ed
Merge branch 'master' into develop
SeanGolez Oct 7, 2024
1111212
Fixed changelog
SeanGolez Oct 30, 2024
0e84645
Add option to output truncated sequence information
SeanGolez Dec 2, 2024
4a7d454
flip unique and nonunique outputs
SeanGolez Dec 3, 2024
40c5ab1
grammar fix
SeanGolez Dec 4, 2024
0700256
Add test for truncated sequence info
SeanGolez Dec 16, 2024
794e7a7
Update pepsirf_test.cpp
SeanGolez Dec 16, 2024
f14901e
Update pepsirf_test.cpp
SeanGolez Dec 16, 2024
ce8fd61
Merge pull request #259 from LadnerLab/demux-iss258
SeanGolez Dec 16, 2024
594a32d
Fix directory_iterator namespace error, update version, and change ma…
SeanGolez Feb 14, 2025
ddd9f9c
fix indentation
SeanGolez Feb 14, 2025
933a6e5
Merge pull request #260 from LadnerLab/fix-makefile
SeanGolez Feb 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@

*~
\#*#
extensions/__pycache__/code.cpython-38.pyc
21 changes: 16 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,24 @@ list( APPEND PepSIRF_LINK_LIBS
)

if(OpenMP_FOUND)
message( "OpenMP enabled" )
list( APPEND PepSIRF_LINK_LIBS OpenMP::OpenMP_CXX )
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp")
add_definitions( -DENABLE_OPENMP )
message("OpenMP enabled")

if(APPLE)
# Get libomp filepath
execute_process(COMMAND brew --prefix libomp OUTPUT_VARIABLE BREW_PREFIX OUTPUT_STRIP_TRAILING_WHITESPACE)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp -I${BREW_PREFIX}/include")
list(APPEND PepSIRF_LINK_LIBS "${BREW_PREFIX}/lib/libomp.dylib")
else()
list(APPEND PepSIRF_LINK_LIBS OpenMP::OpenMP_CXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp")
endif()

# Define OpenMP macro
add_definitions(-DENABLE_OPENMP)

else()
message( "WARNING: OpenMP not found, parallelism disabled." )
message("WARNING: OpenMP not found, parallelism disabled.")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas -Wno-unused-value")
endif()

Expand Down
6 changes: 5 additions & 1 deletion docs/5-changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@ permalink: /changelog/

## 1.7.0 | 2024-10-3

<strong>Docker: added new feature (Issue #254).</strong> Added the ability to run PepSIRF as a Docker image and added a page for instructions.
### Bug Fixes:

<strong>CMakelists: bug fix (Issue #197).</strong> Resolved CMake not locating OpenMP on MacOS. Tutorial for fix added to installation page.

## New Features:

<strong>Docker: added new feature (Issue #254).</strong> Added the ability to run PepSIRF as a Docker image and added a page for instructions.

<strong>Subjoin: added new feature (Issue #236).</strong> Added a functionality to the "-i" option in Subjoin to accept a regex pattern instead of a filename which contains sample/peptide names. The sample/peptide names used from the score matrix file will be filtered by whether they contain the regex pattern.

<strong>Demux: added new feature (Issue #234).</strong> Added "--unmapped-reads-output" option to Demux, which writes all reads that have not been mapped to a sample/peptide to the specified filename.
Expand Down
59 changes: 34 additions & 25 deletions docs/Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
GEM
remote: https://rubygems.org/
specs:
activesupport (6.0.5)
activesupport (7.1.3.4)
base64
bigdecimal
concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (>= 0.7, < 2)
minitest (~> 5.1)
tzinfo (~> 1.1)
zeitwerk (~> 2.2, >= 2.2.2)
connection_pool (>= 2.2.5)
drb
i18n (>= 1.6, < 2)
minitest (>= 5.1)
mutex_m
tzinfo (~> 2.0)
addressable (2.8.0)
public_suffix (>= 2.0.2, < 5.0)
base64 (0.2.0)
bigdecimal (3.1.8)
coffee-script (2.4.1)
coffee-script-source
execjs
coffee-script-source (1.11.1)
colorator (1.1.0)
commonmarker (0.23.9)
concurrent-ruby (1.1.10)
commonmarker (0.23.10)
concurrent-ruby (1.3.3)
connection_pool (2.4.1)
dnsruby (1.61.9)
simpleidn (~> 0.1)
drb (2.2.1)
em-websocket (0.5.3)
eventmachine (>= 0.12.9)
http_parser.rb (~> 0)
Expand Down Expand Up @@ -51,12 +59,12 @@ GEM
ffi (1.15.5)
forwardable-extended (2.6.0)
gemoji (3.0.1)
github-pages (226)
github-pages (228)
github-pages-health-check (= 1.17.9)
jekyll (= 3.9.2)
jekyll (= 3.9.3)
jekyll-avatar (= 0.7.0)
jekyll-coffeescript (= 1.1.1)
jekyll-commonmark-ghpages (= 0.2.0)
jekyll-commonmark-ghpages (= 0.4.0)
jekyll-default-layout (= 0.1.4)
jekyll-feed (= 0.15.1)
jekyll-gist (= 1.5.0)
Expand Down Expand Up @@ -90,10 +98,10 @@ GEM
jemoji (= 0.12.0)
kramdown (= 2.3.2)
kramdown-parser-gfm (= 1.1.0)
liquid (= 4.0.3)
liquid (= 4.0.4)
mercenary (~> 0.3)
minima (= 2.5.1)
nokogiri (>= 1.13.4, < 2.0)
nokogiri (>= 1.13.6, < 2.0)
rouge (= 3.26.0)
terminal-table (~> 1.4)
github-pages-health-check (1.17.9)
Expand All @@ -106,13 +114,13 @@ GEM
activesupport (>= 2)
nokogiri (>= 1.4)
http_parser.rb (0.8.0)
i18n (0.9.5)
i18n (1.14.5)
concurrent-ruby (~> 1.0)
jekyll (3.9.2)
jekyll (3.9.3)
addressable (~> 2.4)
colorator (~> 1.0)
em-websocket (~> 0.5)
i18n (~> 0.7)
i18n (>= 0.7, < 2)
jekyll-sass-converter (~> 1.0)
jekyll-watch (~> 2.0)
kramdown (>= 1.17, < 3)
Expand All @@ -128,11 +136,11 @@ GEM
coffee-script-source (~> 1.11.1)
jekyll-commonmark (1.4.0)
commonmarker (~> 0.22)
jekyll-commonmark-ghpages (0.2.0)
commonmarker (~> 0.23.4)
jekyll-commonmark-ghpages (0.4.0)
commonmarker (~> 0.23.7)
jekyll (~> 3.9.0)
jekyll-commonmark (~> 1.4.0)
rouge (>= 2.0, < 4.0)
rouge (>= 2.0, < 5.0)
jekyll-default-layout (0.1.4)
jekyll (~> 3.0)
jekyll-feed (0.15.1)
Expand Down Expand Up @@ -220,7 +228,7 @@ GEM
rexml
kramdown-parser-gfm (1.1.0)
kramdown (~> 2.0)
liquid (4.0.3)
liquid (4.0.4)
listen (3.7.1)
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
Expand All @@ -230,8 +238,9 @@ GEM
jekyll (>= 3.5, < 5.0)
jekyll-feed (~> 0.9)
jekyll-seo-tag (~> 2.1)
minitest (5.15.0)
minitest (5.23.1)
multipart-post (2.1.1)
mutex_m (0.2.0)
nokogiri (1.14.3)
mini_portile2 (~> 2.8.0)
racc (~> 1.4)
Expand All @@ -245,7 +254,8 @@ GEM
rb-fsevent (0.11.1)
rb-inotify (0.10.1)
ffi (~> 1.0)
rexml (3.2.5)
rexml (3.3.0)
strscan
rouge (3.26.0)
ruby2_keywords (0.0.5)
rubyzip (2.3.2)
Expand All @@ -260,18 +270,17 @@ GEM
faraday (> 0.8, < 2.0)
simpleidn (0.2.1)
unf (~> 0.1.4)
strscan (3.1.0)
terminal-table (1.8.0)
unicode-display_width (~> 1.1, >= 1.1.1)
thread_safe (0.3.6)
typhoeus (1.4.0)
ethon (>= 0.9.0)
tzinfo (1.2.9)
thread_safe (~> 0.1)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unf (0.1.4)
unf_ext
unf_ext (0.0.8.1)
unicode-display_width (1.8.0)
zeitwerk (2.5.4)

PLATFORMS
ruby
Expand Down
69 changes: 69 additions & 0 deletions extensions/e_k_bias.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python3

import pandas as pd
import argparse
import fastatools as ft
import numpy as np

def main():

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('-i', '--fasta-file', help='Directory with enriched petide files for input', required=True)
parser.add_argument('-o', '--output-file', default="e_k_bias_out.tsv", help='Name of .tsv to output file with AA bias data')

args = parser.parse_args()

# get proportion of e's and k's for each peptide
e_k_props = get_e_k_props(args.fasta_file)

# get percentiles
percentile_dict = get_percentiles(e_k_props)

# create output df
out_data = [(name, round(prop, 3), round(percentile_dict[name], 2)) for name, prop in e_k_props.items()]

pd.DataFrame(out_data, columns=["CodeName", "e_k_Prop", "e_k_Percentile"]).to_csv(args.output_file, index=False, sep='\t')


# get proportion of e's and k's for each peptide
def get_e_k_props(fasta_file)->dict:
e_k_props = dict()

# get props for peptide file
fasta_dict = ft.read_fasta_dict(fasta_file)

# iterate through each sequence
for name, seq in fasta_dict.items():
e_k_count = 0

# loop through each AA, get count of e and k
for aa in seq:
if aa.lower() == 'e' or aa.lower() =='k':
e_k_count += 1

# add proportion to dict
e_k_props[name] = (e_k_count) / len(seq)

return e_k_props

# get percentile of each peptide using its e and k proportion
def get_percentiles(e_k_props)->dict:
# Calculate percentile of each peptide
names = list(e_k_props.keys())
all_props = np.array(list(e_k_props.values()))

# Get unique values and array for mapping each original value to its corresponding index in the unique array
unique_props, inverse_indices = np.unique(all_props, return_inverse=True)

# Calculate percentiles based on the unique props
percentile_ranks = np.linspace(0, 100, len(unique_props))

# Map sorted values back to the original corresponding name
percentile_dict = {names[i]: percentile_ranks[inverse_indices[i]] for i in range(len(names))}

return percentile_dict


if __name__ == "__main__":
main()
105 changes: 105 additions & 0 deletions extensions/findEpitopes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import os
import matplotlib.pyplot as plt
import numpy as np
import argparse

def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('-i', '--input-dir', help='Directory with alignment output files and files that contain the mapped location of peptides', required=True)
parser.add_argument('-o', '--output-dir', default="clust_align_visualizations", help='Name of directory to output line plots.')

args = parser.parse_args()

directory_path = args.input_dir
alignment_to_use_dict = read_check_align_file(directory_path)
#print(probes_dict)
alignCountsD = process_probes(alignment_to_use_dict, directory_path)
#print(alignCountsD)

if not os.path.exists(args.output_dir):
os.mkdir(args.output_dir)
create_line_chart(alignCountsD, args.output_dir)


def create_line_chart(alignCountsD, out_dir):
for file, pos_dict in alignCountsD.items():
x = list(pos_dict.keys())
y = list(pos_dict.values())
fig, ax = plt.subplots(figsize=(max(x)/10, 10), facecolor='w')
ax.plot(x, y, linestyle='-')
ax.set_xticks(np.arange(min(x), max(x)+5, 5))
ax.set_xlim(left=min(x))
ax.set_ylim(bottom=min(y))
plt.grid()
plt.xlabel("Sequence Position")
plt.ylabel("Count")
plt.title(file)
plt.savefig(os.path.join(out_dir, f"{file.split('_')[-2]}_epitopes_lineplot.png"), dpi=300, bbox_inches='tight')


def find_smallest_value_with_substring(data_dict, substring):
# Filter the dictionary to only include items with the specified substring in the key
filtered_dict = {k: v for k, v in data_dict.items() if substring in k}

# If there are no matches, return None
if not filtered_dict:
return None

# Find the key-value pair with the smallest value
smallest_pair = min(filtered_dict.items(), key=lambda item: item[1])

return smallest_pair


def read_check_align_file(directory):
data_dict = {}
clusters = set()

# Construct the full file path
filepath = os.path.join(directory, 'checkAlignLength.out')
# Read the file content
with open(filepath, 'r') as file:
alignedCluster = None
for line in file:
if "mafft" in line:
alignedCluster = line.strip()
clusters.add(line.split('_')[-2])
elif "Alignment:" in line and alignedCluster:
alignLength = line.replace('Alignment:','').strip()
#print(alignedCluster,alignLength)
data_dict[alignedCluster] = alignLength
# Find alignment with shortest length for each cluster
results = {}
for cluster in clusters:
result = find_smallest_value_with_substring(data_dict, cluster)
results[result[0]] = result[1]

return results


def process_probes(probes_dict, directory_path):
result = {}

for filename, data in probes_dict.items():
aligned_probes_file = filename.replace('.fasta', '_probesAligned.txt')
aligned_probes_path = os.path.join(directory_path, aligned_probes_file)

aligned_length = int(data)
#print(range(0, aligned_length))

alignD = {key: 0 for key in range(aligned_length + 1)}

with open(aligned_probes_path, 'r') as file:
for line_count, line in enumerate(file):
if line_count > 0:
seq_positions = line.split('\t')[-1].split('~')
for pos in seq_positions:
alignD[int(pos)] += 1

result[filename] = alignD

return result

if __name__ == "__main__":
main()
Loading