xinyu-dev · OWissett · Oct 31, 2022
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/annotate_v/__init__.py b/annotate_v/__init__.py
@@ -0,0 +1 @@
+from .annotater import Annotater, AbNamingScheme
diff --git a/annotate_v/annotater.py b/annotate_v/annotater.py
@@ -0,0 +1,221 @@
+from typing import Literal
+import requests
+
+AbNamingScheme = Literal["kabat", "chothia", "contact", "imgt"]
+ChainType = Literal["H", "L"]
+
+class Annotater():
+    """
+    class `annotate`. 
+
+    Initiator `__init__` has 2 parameters:
+
+    :param aaseq: STRING: A single-letter, amino acid sequence corresponding to the complete VH or VL chain. Both uppercase and lowercase are accepted. 
+
+    :param scheme: STRING: "kabat", "chothia", "contact", or "imgt". Must be in lowercase
+
+    Class has 3 methods. `retrieve()`: retrieves numbered seqs from Abnum website, then sends it to method `analyze` to determine the FR and CDR regions, and to `output() ` to print the result and return a list of 2 dictionaries, the first of which contains to region:seq pairs, the second of which contains number:residue pairs. 
+
+    """
+
+    def __init__(self, aaseq: str, scheme: AbNamingScheme):
+        self.aaseq=aaseq
+        self.scheme=scheme
+
+    def __repr__(self):
+        return "Annotation of VH or VL sequence using Kabat, Chothia, Contact, or IMGT scheme"
+
+    def output(self, chain: ChainType, lst: list, regionlst: list):
+        """
+        Prints the FR and CDR regions and their corresponding seq. It returns a `list` of 2 `dict`. 
+
+        :param chain: STRING, either "H" or "L" in uppercase
+        :param lst:  LIST, a list of residue and their corresponding numbers in kabat or chothia scheme
+        :param regionlst: LIST, a list of peptides, each corresponds to a FR or CDR region
+        :return: LIST, a list of 2 `dict`, The first dict consists of region: seq pairs. The second dict consists of number:residue pairs.
+
+        """
+
+        self.chain = chain
+        self.lst = lst
+        self.regionlst = regionlst
+        self.regiondict, self.numberdict = {}, {}
+
+        for i in range (0, len(self.lst), 2):
+            self.numberdict[self.lst[i]]=self.lst[i+1]
+
+        print(f"Using {self.scheme} scheme:")
+
+        if self.chain == "L":
+            print("L-FR1:  ", self.regionlst[0])
+            print("L-CDR1: ", self.regionlst[1])
+            print("L-FR2:  ", self.regionlst[2])
+            print("L-CDR2: ", self.regionlst[3])
+            print("L-FR3:  ", self.regionlst[4])
+            print("L-CDR3: ", self.regionlst[5])
+            print("L-FR4:  ", self.regionlst[6])
+
+            for region, seq in zip(["L-FR1", "L-CDR1", "L-FR2","L-CDR2", "L-FR3", "L-CDR3", "L-FR4"], self.regionlst):
+                self.regiondict[region]=seq
+
+            return [self.regiondict, self.numberdict]
+
+        else:
+            print("H-FR1:  ", self.regionlst[0])
+            print("H-CDR1: ", self.regionlst[1])
+            print("H-FR2:  ", self.regionlst[2])
+            print("H-CDR2: ", self.regionlst[3])
+            print("H-FR3:  ", self.regionlst[4])
+            print("H-CDR3: ", self.regionlst[5])
+            print("H-FR4:  ", self.regionlst[6])
+
+            for region, seq in zip(["H-FR1", "H-CDR1", "H-FR2","H-CDR2", "H-FR3", "H-CDR3", "H-FR4"], self.regionlst):
+                self.regiondict[region]=seq
+
+            return [self.regiondict, self.numberdict]
+
+    def analyze(self,chain, lst):
+        """
+        Define CDR and FR regions based on the numbered sequence returned from website
+
+        :param chain: STRING, "H" or "L" in uppercase
+        :param lst: LIST, a list of residue and their corresponding numbers in kabat or chothia scheme
+        :return: LIST, a list of strings, where each string is a peptide corresponding to the a region, in the order of: FR1, CDR1, FR2, CDR2, FR3, CDR3, FR4
+
+        :raises: `ValueError` if any of the FR or CDR region is missing
+
+        """
+
+        self.chain=chain
+        self.lst=lst
+        if self.chain=="L":
+            self.L_FR1, self.L_CDR1, self.L_FR2, self.L_CDR2, self.L_FR3, self.L_CDR3, self.L_FR4=["" for i in range (0, 7)]
+
+            try:
+                if self.scheme in ["kabat", "chothia"]:
+                    self.L_FR1="".join([self.lst[i+1] for i in range (0, self.lst.index("L24"), 2)])
+                    self.L_CDR1="".join([self.lst[i+1] for i in range (self.lst.index("L24"), self.lst.index("L35"), 2)])
+                    self.L_FR2="".join([self.lst[i+1] for i in range (self.lst.index("L35"), self.lst.index("L50"), 2)])
+                    self.L_CDR2="".join([self.lst[i+1] for i in range (self.lst.index("L50"), self.lst.index("L57"), 2)])
+                    self.L_FR3="".join([self.lst[i+1] for i in range (self.lst.index("L57"), self.lst.index("L89"), 2)])
+                    self.L_CDR3="".join([self.lst[i+1] for i in range (self.lst.index("L89"), self.lst.index("L98"), 2)])
+                    self.L_FR4="".join([self.lst[i+1] for i in range (self.lst.index("L98"), len(self.lst), 2)])
+
+                elif self.scheme =="contact": 
+                    self.L_FR1="".join([self.lst[i+1] for i in range (0, self.lst.index("L30"), 2)])
+                    self.L_CDR1="".join([self.lst[i+1] for i in range (self.lst.index("L30"), self.lst.index("L37"), 2)])
+                    self.L_FR2="".join([self.lst[i+1] for i in range (self.lst.index("L37"), self.lst.index("L46"), 2)])
+                    self.L_CDR2="".join([self.lst[i+1] for i in range (self.lst.index("L46"), self.lst.index("L56"), 2)])
+                    self.L_FR3="".join([self.lst[i+1] for i in range (self.lst.index("L56"), self.lst.index("L89"), 2)])
+                    self.L_CDR3="".join([self.lst[i+1] for i in range (self.lst.index("L89"), self.lst.index("L97"), 2)])
+                    self.L_FR4="".join([self.lst[i+1] for i in range (self.lst.index("L97"), len(self.lst), 2)])
+
+                else: #IMGT scheme
+                    self.L_FR1="".join([self.lst[i+1] for i in range (0, self.lst.index("L27"), 2)])
+                    self.L_CDR1="".join([self.lst[i+1] for i in range (self.lst.index("L27"), self.lst.index("L33"), 2)])
+                    self.L_FR2="".join([self.lst[i+1] for i in range (self.lst.index("L33"), self.lst.index("L50"), 2)])
+                    self.L_CDR2="".join([self.lst[i+1] for i in range (self.lst.index("L50"), self.lst.index("L52"), 2)])
+                    self.L_FR3="".join([self.lst[i+1] for i in range (self.lst.index("L52"), self.lst.index("L89"), 2)])
+                    self.L_CDR3="".join([self.lst[i+1] for i in range (self.lst.index("L89"), self.lst.index("L98"), 2)])
+                    self.L_FR4="".join([self.lst[i+1] for i in range (self.lst.index("L98"), len(self.lst), 2)])
+
+                return [self.L_FR1, self.L_CDR1, self.L_FR2, self.L_CDR2, self.L_FR3, self.L_CDR3, self.L_FR4] 
+
+            except ValueError:
+                raise AnnotareError("Unable to retrieve complete V region. Make sure the sequence has complete V region")
+            except:
+                raise
+        else:
+            self.H_FR1, self.H_CDR1, self.H_FR2, self.H_CDR2, self.H_FR3, self.H_CDR3, self.H_FR4=["" for i in range (0, 7)]
+            try:
+                if self.scheme=="kabat":
+                    self.H_FR1="".join([self.lst[i+1] for i in range (0, self.lst.index("H31"), 2)])
+                    self.H_CDR1="".join([self.lst[i+1] for i in range (self.lst.index("H31"), self.lst.index("H36"), 2)])
+                    self.H_FR2="".join([self.lst[i+1] for i in range (self.lst.index("H36"), self.lst.index("H50"), 2)])
+                    self.H_CDR2="".join([self.lst[i+1] for i in range (self.lst.index("H50"), self.lst.index("H66"), 2)])
+                    self.H_FR3="".join([self.lst[i+1] for i in range (self.lst.index("H66"), self.lst.index("H95"), 2)])
+                    self.H_CDR3="".join([self.lst[i+1] for i in range (self.lst.index("H95"), self.lst.index("H103"), 2)])
+                    self.H_FR4="".join([self.lst[i+1] for i in range (self.lst.index("H103"), len(self.lst), 2)])            
+
+                elif self.scheme=="chothia":
+                    self.H_FR1="".join([self.lst[i+1] for i in range (0, self.lst.index("H26"), 2)])
+                    self.H_CDR1="".join([self.lst[i+1] for i in range (self.lst.index("H26"), self.lst.index("H33"), 2)])
+                    self.H_FR2="".join([self.lst[i+1] for i in range (self.lst.index("H33"), self.lst.index("H52"), 2)])
+                    self.H_CDR2="".join([self.lst[i+1] for i in range (self.lst.index("H52"), self.lst.index("H57"), 2)])
+                    self.H_FR3="".join([self.lst[i+1] for i in range (self.lst.index("H57"), self.lst.index("H95"), 2)])
+                    self.H_CDR3="".join([self.lst[i+1] for i in range (self.lst.index("H95"), self.lst.index("H103"), 2)])
+                    self.H_FR4="".join([self.lst[i+1] for i in range (self.lst.index("H103"), len(self.lst), 2)])
+
+                elif self.scheme=="contact":
+                    self.H_FR1="".join([self.lst[i+1] for i in range (0, self.lst.index("H30"), 2)])
+                    self.H_CDR1="".join([self.lst[i+1] for i in range (self.lst.index("H30"), self.lst.index("H36"), 2)])
+                    self.H_FR2="".join([self.lst[i+1] for i in range (self.lst.index("H36"), self.lst.index("H47"), 2)])
+                    self.H_CDR2="".join([self.lst[i+1] for i in range (self.lst.index("H47"), self.lst.index("H59"), 2)])
+                    self.H_FR3="".join([self.lst[i+1] for i in range (self.lst.index("H59"), self.lst.index("H93"), 2)])
+                    self.H_CDR3="".join([self.lst[i+1] for i in range (self.lst.index("H93"), self.lst.index("H102"), 2)])
+                    self.H_FR4="".join([self.lst[i+1] for i in range (self.lst.index("H102"), len(self.lst), 2)])
+
+                else: #IMGT scheme
+                    self.H_FR1="".join([self.lst[i+1] for i in range (0, self.lst.index("H26"), 2)])
+                    self.H_CDR1="".join([self.lst[i+1] for i in range (self.lst.index("H26"), self.lst.index("H34"), 2)])
+                    self.H_FR2="".join([self.lst[i+1] for i in range (self.lst.index("H34"), self.lst.index("H51"), 2)])
+                    self.H_CDR2="".join([self.lst[i+1] for i in range (self.lst.index("H51"), self.lst.index("H58"), 2)]) #51>57 (instead of 56)
+                    self.H_FR3="".join([self.lst[i+1] for i in range (self.lst.index("H58"), self.lst.index("H93"), 2)])
+                    self.H_CDR3="".join([self.lst[i+1] for i in range (self.lst.index("H93"), self.lst.index("H103"), 2)])
+                    self.H_FR4="".join([self.lst[i+1] for i in range (self.lst.index("H103"), len(self.lst), 2)])                    
+
+                return [self.H_FR1, self.H_CDR1, self.H_FR2, self.H_CDR2, self.H_FR3, self.H_CDR3, self.H_FR4]                    
+
+            except ValueError:
+                AnnotareError("Unable to retrieve complete V region. Make sure the sequence has complete V region")
+            except:
+                print("An error occured in the `analyze()` method")
+                raise
+
+    def retrieve (self):
+        """
+        Retrieve numbered residues from Abnum website
+
+        :return: returns same object from the `output()` method. 
+
+        :raises: `ValueError` if input scheme is not among "kabat", "chothia", "contact", and "imgt"
+
+        """
+
+        self.url="http://www.bioinf.org.uk/abs/abnum/abnum.cgi"
+
+        try: 
+            if self.scheme not in ["kabat", "chothia", "contact", "imgt"]:
+                raise Exception
+
+        except ValueError:
+            print("Incorrect scheme mode. Must be one of the following (lowercase): kabat, chothia, contact, imgt")
+
+        else:
+            if self.scheme=="kabat":
+                self.sche="-k"
+            else:
+                self.sche="-c"
+
+        try:
+            self.d={"plain":1, "scheme":self.sche, "aaseq":self.aaseq}
+            self.myPage=requests.get(self.url, params=self.d)
+            self.text=self.myPage.text
+            self.lst=self.text.split()
+
+            if len(self.lst)>1:
+                self.chain=self.lst[0][0]
+                self.result=self.output(self.chain, self.lst, self.analyze(self.chain, self.lst))
+                return self.result
+            else:
+                print("No annotation retrieved. Did you enter the complete VH or VL sequence?")
+        except:
+            raise
+
+
+class AbNamingSchemeError(Exception):
+    pass
+
+
+class AnnotareError(Exception):
+    pass
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,9 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "annotate_v"
+version = "0.0.1"
+description = "Annotates antibodies using Abnum"
+license = {text = "BSD 3-clause license"}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .annotater import Annotater, AbNamingScheme