UCD-BDLab · ramosv · Jan 27, 2025 · Jan 23, 2025 · Jan 27, 2025 · Jan 27, 2025
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -10,7 +10,7 @@ on:
 
 jobs:
   pre-commit:
-    runs-on: ubuntu-latest
+    runs-on: macos-latest
 
     steps:
       - name: Checkout code
@@ -30,12 +30,31 @@ jobs:
 
       - name: Install Python dependencies
         run: |
-          set -e
           python -m pip install --upgrade pip
           pip install -r requirements.txt
           pip install -r requirements-dev.txt
-          pip install torch
-          pip install torch_geometric
+
+      - name: Verify pre-commit installation
+        run: pre-commit --version
+
+      - name: Install system dependencies
+        run: |
+          brew update
+          brew upgrade
+
+      - name: Install R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: "latest"
+
+      - name: Install R packages
+        run: |
+          Rscript -e "if (!requireNamespace('BiocManager', quietly = TRUE)) install.packages('BiocManager', repos='https://cran.r-project.org')"
+          Rscript -e "install.packages(c('dplyr', 'jsonlite'), repos='https://cran.r-project.org')"
+          Rscript -e "BiocManager::install(c('impute', 'preprocessCore', 'GO.db', 'AnnotationDbi'), update=FALSE, ask=FALSE)"
+          Rscript -e "install.packages('SmCCNet', repos='https://cran.r-project.org')"
+          Rscript -e "install.packages('WGCNA', repos='https://cran.r-project.org')"
+        shell: bash
 
       - name: Run Pre-Commit Checks
         run: pre-commit run --all-files --show-diff-on-failure
diff --git a/.gitignore b/.gitignore
@@ -16,10 +16,18 @@ bioneuralnet.egg-info
 /docker_files/
 .mypy_cache
 
-# Block sensitive file types globally
+# Blocking sensitive file types globally
 *.csv
 *.RData
+
+# Allowing .csv files in datasets/example1 and its subdirectories
+!bioneuralnet/datasets/
+!bioneuralnet/datasets/example1/
+!bioneuralnet/datasets/example1/**/*.csv
+
 dpmon_output/
+todo.md
+ignore_dir/
 
 
 # Sphinx documentation build

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,6 +8,8 @@ repos:
         exclude: '\.svg$|docs/source/_autosummary/'
       - id: check-yaml
       - id: check-added-large-files
+        args: ['--maxkb=500']
+        exclude: '^bioneuralnet/datasets/example1/'
 
   - repo: https://github.com/psf/black
     rev: 24.10.0
@@ -38,10 +40,22 @@ repos:
         stages: [pre-commit]
 
       - id: check-forbidden-files
-        name: Prevent adding forbidden file types
-        entry: bash -c "if git diff --cached --name-only | grep -E '\\.csv$|\\.RData$'; then echo 'Forbidden file types detected!'; exit 1; fi"
+        name: Prevent adding forbidden file types except in allowed directory
+        entry: |
+          bash -c '
+          FILES=$(git diff --cached --name-only)
+          if echo "$FILES" | grep -E "\.(RData)$"; then
+            echo "Forbidden file types detected (RData)!"
+            exit 1
+          fi
+          if echo "$FILES" | grep -E "\.csv$" | grep -v "^bioneuralnet/datasets/example1/"; then
+            echo "Forbidden file types detected (CSV outside example1)!"
+            exit 1
+          fi
+          '
         language: system
-        types: [file]
+        stages: [pre-commit]
+
 
       - id: run-tests
         name: Run Tests with Pytest

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -11,9 +11,6 @@ recursive-include docs *
 # Include example scripts
 recursive-include examples *.py
 
-# Include scripts installation files
-recursive-include scripts *
-
 # Exclude __pycache__ and compiled Python files
 global-exclude __pycache__ *.pyc *.pyo *.pyd
 

diff --git a/README.md b/README.md
@@ -6,6 +6,8 @@
 ![GitHub Contributors](https://img.shields.io/github/contributors/UCD-BDLab/BioNeuralNet)
 ![Downloads](https://static.pepy.tech/badge/bioneuralnet)
 
+[![Documentation](https://img.shields.io/badge/docs-read%20the%20docs-blue.svg)](https://bioneuralnet.readthedocs.io/en/latest/)
+
 
 ## Welcome to [BioNeuralNet Beta 0.1](https://bioneuralnet.readthedocs.io/en/latest/index.html)
 
@@ -159,37 +161,46 @@ Below is a quick example demonstrating the following:
 ### Code Example:
 
 ```python
-import pandas as pd
 from bioneuralnet.external_tools import SmCCNet
 from bioneuralnet.downstream_task import DPMON
+import pandas as pd
 
-# Step 1: Data Preparation
-phenotype_data = pd.read_csv('phenotype_data.csv', index_col=0)
-omics_proteins = pd.read_csv('omics_proteins.csv', index_col=0)
-omics_metabolites = pd.read_csv('omics_metabolites.csv', index_col=0)
-clinical_data = pd.read_csv('clinical_data.csv', index_col=0)
+# 1) Prepare data
+omics_data = pd.read_csv("data/omics_data.csv")
+phenotype_data = pd.read_csv("data/phenotype_data.csv")
+clinical_data = pd.read_csv("data/clinical_data.csv")
 
-# Step 2: Network Construction
+# 2) Run SmCCNet to get adjacency
 smccnet = SmCCNet(
-    phenotype_df=phenotype_data,
-    omics_dfs=[omics_proteins, omics_metabolites],
-    data_types=["protein", "metabolite"],
-    kfold=5,
-    summarization="PCA",
-)
+   phenotype_df=phenotype_data,
+   omics_df=omics_data,
+   data_types=["genes, proteins"]
+   kfolds=5,
+   summarization = "NetSHy",
+   seed: 127,
+   )
 adjacency_matrix = smccnet.run()
-print("Adjacency matrix generated.")
 
-# Step 3: Disease Prediction
+# 3) Disease Prediction with DPMON
 dpmon = DPMON(
-    adjacency_matrix=adjacency_matrix,
-    omics_list=[omics_proteins, omics_metabolites],
-    phenotype_data=phenotype_data,
-    clinical_data=clinical_data,
-    model="GAT",
+   adjacency_matrix=adjacency_matrix,
+   omics_list=[omics_data],
+   phenotype_data=phenotype_data,
+   clinical_data=clinical_data,
+   model: "GAT",
+   gnn_hidden_dim: 64,
+   layer_num: 3,
+   nn_hidden_dim1: 2,
+   nn_hidden_dim2: 2,
+   epoch_num: 10,
+   repeat_num: 5,
+   lr: 0.01,
+   weight_decay: 1e-4,
+   tune: True,
+   gpu: False
 )
 predictions = dpmon.run()
-print("Disease phenotype predictions:\n", predictions)
+print("Disease predictions:\n", predictions)
 ```
 
 ### Output
@@ -221,14 +232,57 @@ See [FAQ](https://bioneuralnet.readthedocs.io/en/latest/faq.html) for more.
 
 ## Acknowledgments
 
-BioNeuralNet relies on or interfaces with various open-source libraries:
+BioNeuralNet relies on and interfaces with various open-source libraries. We extend our gratitude to the developers and contributors of these projects for their invaluable tools and resources.
+
+### Core Dependencies
+
+- [PyYAML](https://pyyaml.org/) - **MIT License**
+- [pandas](https://pandas.pydata.org/) - **BSD 3-Clause License**
+- [numpy](https://numpy.org/) - **BSD 3-Clause License**
+- [scikit-learn](https://scikit-learn.org/) - **BSD 3-Clause License**
+- [node2vec](https://github.com/aditya-grover/node2vec) - **MIT License**
+- [matplotlib](https://matplotlib.org/) - **Matplotlib License**
+- [ray](https://github.com/ray-project/ray) - **Apache 2.0 License**
+- [tensorboardX](https://github.com/lanpa/tensorboardX) - **MIT License**
+- [networkx](https://networkx.org/) - **BSD License**
+- [pyvis](https://github.com/WestHealth/pyvis) - **MIT License**
+- [leidenalg](https://github.com/vtraag/leidenalg) - **GNU LGPL v3**
+- [dtt](https://github.com/BioroboticsLab/dtt) - **MIT License**
+- [pyreadr](https://github.com/ofajardo/pyreadr) - **MIT License**
+- [torch](https://pytorch.org/) - **BSD License**
+- [torch_geometric](https://github.com/pyg-team/pytorch_geometric) - **MIT License**
+
+### Development Dependencies
+
+These tools are essential for the development and maintenance of BioNeuralNet but are not required for end-users.
+
+- [pytest](https://pytest.org/) - **MIT License**
+- [pytest-cov](https://pytest-cov.readthedocs.io/) - **MIT License**
+- [pytest-mock](https://github.com/pytest-dev/pytest-mock) - **MIT License**
+- [Sphinx](https://www.sphinx-doc.org/) - **BSD License**
+- [Sphinx RTD Theme](https://sphinx-rtd-theme.readthedocs.io/) - **BSD License**
+- [sphinx-autosummary-accessors](https://github.com/kennethreitz/sphinx-autosummary-accessors) - **MIT License**
+- [sphinxcontrib-napoleon](https://sphinxcontrib-napoleon.readthedocs.io/) - **BSD License**
+- [flake8](https://flake8.pycqa.org/) - **MIT License**
+- [Black](https://black.readthedocs.io/) - **MIT License**
+- [mypy](http://mypy-lang.org/) - **MIT License**
+- [pre-commit](https://pre-commit.com/) - **MIT License**
+- [tox](https://tox.readthedocs.io/) - **MIT License**
+- [setuptools](https://setuptools.pypa.io/) - **MIT License**
+- [twine](https://twine.readthedocs.io/) - **MIT License**
+
+### External Tools
+
+BioNeuralNet integrates with external tools to enhance functionality:
+
+- [WGCNA](https://cran.r-project.org/package=WGCNA) - **GPL-3.0 License**
+- [SmCCNet](https://cran.r-project.org/package=SmCCNet) - **GPL-3.0 License**
+
+### Special Thanks
 
-- [PyTorch](https://pytorch.org/) / [PyTorch Geometric](https://github.com/pyg-team/pytorch_geometric)
-- [Node2Vec](https://github.com/aditya-grover/node2vec)
-- [WGCNA](https://cran.r-project.org/package=WGCNA) / [SmCCNet](https://cran.r-project.org/package=SmCCNet)
-- [Pytest](https://pytest.org/), [Sphinx](https://www.sphinx-doc.org), [Black](https://black.readthedocs.io/), [Flake8](https://flake8.pycqa.org/)
+We appreciate the efforts of these communities and all contributors who make open-source development possible. Your dedication and hard work enable projects like BioNeuralNet to thrive and evolve.
 
-We appreciate the efforts of these communities and all contributors.
+---
 
 ## Testing & CI
 
@@ -250,10 +304,14 @@ We appreciate the efforts of these communities and all contributors.
 For more details, see our [FAQ](https://bioneuralnet.readthedocs.io/en/latest/faq.html)
 or open an [issue](https://github.com/UCD-BDLab/BioNeuralNet/issues).
 
-## License & Contact
+## License
+
+- **License:** [MIT License](https://github.com/UCD-BDLab/BioNeuralNet/blob/main/LICENSE)
+
+## Contact
 
-- **License**: [MIT License](https://github.com/UCD-BDLab/BioNeuralNet/blob/main/LICENSE)
-- **Contact**: Questions or feature requests? [Open an issue](https://github.com/UCD-BDLab/BioNeuralNet) or email [vicente.ramos@ucdenver.edu](mailto:vicente.ramos@ucdenver.edu).
+- **Questions or Feature Requests:** [Open an issue](https://github.com/UCD-BDLab/BioNeuralNet/issues)
+- **Email:** [vicente.ramos@ucdenver.edu](mailto:vicente.ramos@ucdenver.edu)
 
 ---
 

diff --git a/bioneuralnet/__init__.py b/bioneuralnet/__init__.py
@@ -26,6 +26,13 @@
 from .network_embedding import GNNEmbedding
 from .subject_representation import GraphEmbedding
 from .downstream_task import DPMON
+from .clustering import PageRank
+from .clustering import Louvain
+from .metrics import correlation
+from .metrics import mse
+from .metrics import f1_score
+from .metrics import precision
+from .metrics import recall
 
 from .external_tools import DynamicVisualizer
 from .external_tools import FeatureSelector
@@ -36,14 +43,19 @@
 from .external_tools import Node2Vec
 
 __all__: list = [
-    "network_embedding",
-    "subject_representation",
-    "utils",
     "__version__",
     "GNNEmbedding",
-    "Node2Vec",
     "GraphEmbedding",
     "DPMON",
+    "PageRank",
+    "Louvain",
+    "correlation",
+    "mse",
+    "f1_score",
+    "precision",
+    "recall",
+    "utils",
+    "Node2Vec",
     "FeatureSelector",
     "StaticVisualizer",
     "DynamicVisualizer",

diff --git a/bioneuralnet/analysis/__init__.py b/bioneuralnet/analysis/__init__.py
diff --git a/bioneuralnet/analysis/correlation_analysis.py b/bioneuralnet/analysis/correlation_analysis.py
diff --git a/bioneuralnet/clustering/__init__.py b/bioneuralnet/clustering/__init__.py
@@ -1,3 +1,4 @@
-from .pagerank import PageRank
+from .correlated_pagerank import PageRank
+from .correlated_louvain import Louvain
 
-__all__ = ["PageRank"]
+__all__ = ["PageRank", "Louvain"]
diff --git a/bioneuralnet/clustering/correlated_louvain.py b/bioneuralnet/clustering/correlated_louvain.py
@@ -0,0 +1,20 @@
+import pandas as pd
+
+
+class Louvain:
+    """
+    Correlated Louvain:
+
+    Attributes:
+    """
+
+    def __init__(self, data: pd.DataFrame):
+        self.data = data
+
+    def run(self) -> pd.DataFrame:
+        """
+
+        Returns:
+            pd.DataFrame:
+        """
+        return self.data
diff --git a/bioneuralnet/clustering/pagerank.py → ...uralnet/clustering/correlated_pagerank.py b/bioneuralnet/clustering/pagerank.py → ...uralnet/clustering/correlated_pagerank.py
@@ -22,7 +22,6 @@ class PageRank:
         tol (float): Tolerance for convergence.
         k (float): Weighting factor for composite correlation-conductance score.
         output_dir (str): Directory to save outputs.
-        logger (logging.Logger): Logger for the class.
     """
 
     def __init__(

diff --git a/bioneuralnet/datasets/__init__.py b/bioneuralnet/datasets/__init__.py
@@ -0,0 +1,3 @@
+from .dataset_loader import DatasetLoader
+
+__all__ = ["DatasetLoader"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .dataset_loader import DatasetLoader

		__all__ = ["DatasetLoader"]