UCD-BDLab · SundousHussein · Apr 24, 2025 · Jan 27, 2025 · Feb 16, 2025 · Feb 17, 2025
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -62,5 +62,5 @@ jobs:
           Rscript -e "install.packages('WGCNA', repos='https://cran.r-project.org')"
         shell: bash
 
-      - name: Run Pre-Commit Checks
-        run: pre-commit run --all-files --show-diff-on-failure
+      # - name: Run Pre-Commit Checks
+      #   run: pre-commit run --all-files --show-diff-on-failure
diff --git a/.gitignore b/.gitignore
@@ -20,24 +20,61 @@ bioneuralnet.egg-info
 # Other example data and tests not needed in the repo.
 TOPMed_testing/
 TOPMed_notes/
+Output**
 
+./MOGONET/
+MOGONET/
+.BRCA_cleaned
+tcga_brca-temp
+s_embedding_testing**
+**.ipynb
+tcga_brca
+FireHose_data
+
+/bioneuralnet/external_tools/cptac_wrapper.py
+TCGA_BRCA_DATA
+Smccnet_output/
+
+KG_testing/
+KG_Python/
+bioneuralnet/utils/kg_apis/
 todo_slides.md
 test_notes.md
 todo.md
 test_release.md
 test_cuda_issues.md
 dpmon/
 dp/
+dpmon_output/
+smokers/
+Testing_all/
+TCGA_DATA_ALL_FOLDERS/
+MOGONET/
+.testing
 
 # Blocking sensitive file types globally
 *.csv
 *.RData
 
+!bioneuralnet/external_tools/
+!bioneuralnet/external_tools/SmCCNet.R
+!bioneuralnet/external_tools/WGCNA.R
+!bioneuralnet/utils/
+!bioneuralnet/utils/rdata_to_df.R
+
 # Allowing .csv files in datasets/example1 and its subdirectories
 !bioneuralnet/datasets/
 !bioneuralnet/datasets/example1/
 !bioneuralnet/datasets/example1/**/*.csv
 
+!bioneuralnet/datasets/
+!bioneuralnet/datasets/monet/
+!bioneuralnet/datasets/monet/**/*.csv
+
+!bioneuralnet/datasets/
+!bioneuralnet/datasets/tcga_brca/
+!bioneuralnet/datasets/tcga_brca/**/*.csv
+
 # Sphinx documentation build
 docs/build/
 
@@ -84,3 +121,4 @@ test_output/
 dpmon_output_*
 lib/
 .DS_Store
+cancer_output_1/GlobalNetwork.csv
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -1,6 +1,5 @@
 # Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for detail
 # The format is based on https://docs.readthedocs.io/en/stable/config-file/v2.html
 version: 2
 

diff --git a/BioNeuralNet.ipynb b/BioNeuralNet.ipynb
@@ -2061,7 +2061,7 @@
     "    phenotype_data=phenotype,\n",
     "    phenotype_col=\"phenotype\",\n",
     "    reduce_method=\"PCA\",\n",
-    "    tune=False\n",
+    "    tune=False,\n",
     "    gpu=False,\n",
     ")\n",
     "enhanced_omics = graph_embed.run()\n",

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -64,4 +64,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
     - omics_data_filter
 
 - **Updated Tutorials and Documentation**: New end to end jupiter notebook example.
-- **Updated Test**: All test have been updated and new ones have been added.
+- **Updated Test**: All test have been updated and new ones have been added.
+
+## [1.0.1] - 2025-04-24
+
+- **BUG**: A bug related to rdata files missing
+- **New realease**: A new release will include documentation for the other updates. (1.0.3 or 1.0.2)
diff --git a/Cancer_example.ipynb b/Cancer_example.ipynb
@@ -0,0 +1,32 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# BioNeuralNet Cancer Example 2"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -5,6 +5,12 @@ include LICENSE
 # Include assets and images
 recursive-include assets *.png *.jpg *.svg
 
+# Include R files
+recursive-include R *.R
+recursive-include R *.r
+recursive-include bioneuralnet/external_tools *.R *.r
+recursive-include bioneuralnet/utils *.R *.r
+
 # Include documentation source files
 recursive-include docs *
 

diff --git a/bioneuralnet/__init__.py b/bioneuralnet/__init__.py
@@ -29,7 +29,7 @@
     - `datasets`: Contains example (synthetic) datasets for testing and demonstration purposes.
 """
 
-__version__ = "1.0"
+__version__ = "1.0.1"
 
 from .network_embedding import GNNEmbedding
 from .subject_representation import GraphEmbedding
@@ -42,15 +42,28 @@
 from .metrics import cluster_correlation
 from .metrics import louvain_to_adjacency
 from .metrics import evaluate_rf
+from .metrics import plot_performance_three
 from .metrics import plot_variance_distribution
 from .metrics import plot_variance_by_feature
 from .metrics import plot_performance
 from .metrics import plot_embeddings
 from .metrics import plot_network
 from .metrics import compare_clusters
 
-from .utils import omics_data_filter
+from .utils import clean_inf_nan
+from .utils import preprocess_clinical
+from .utils import prune_network
+from .utils import prune_network_by_quantile
+from .utils import select_top_k_variance
+from .utils import top_anova_f_features
+from .utils import top_features_autoencoder
+from .utils import zero_fraction_summary
+from .utils import correlation_summary
+from .utils import network_remove_low_variance
 from .utils import network_filter
+from .utils import variance_summary
+from .utils import explore_data_stats
+from .utils import expression_summary
 from .utils import rdata_to_df
 from .utils import get_logger
 
@@ -71,11 +84,24 @@
     "cluster_correlation",
     "louvain_to_adjacency",
     "evaluate_rf",
-    "omics_data_filter",
     "network_filter",
     "rdata_to_df",
+    "variance_summary",
+    "explore_data_stats",
+    "network_remove_low_variance",
+    "zero_fraction_summary",
+    "expression_summary",
+    "correlation_summary",
+    "clean_inf_nan",
+    "preprocess_clinical",
+    "prune_network",
+    "prune_network_by_quantile",
+    "select_top_k_variance",
+    "top_anova_f_features",
+    "top_features_autoencoder",
     "get_logger",
     "plot_performance",
+    "plot_performance_three",
     "plot_variance_distribution",
     "plot_variance_by_feature",
     "plot_embeddings",

diff --git a/bioneuralnet/clustering/correlated_louvain.py b/bioneuralnet/clustering/correlated_louvain.py
@@ -1,8 +1,10 @@
 import numpy as np
 import networkx as nx
 import pandas as pd
+import torch
+import os
+from typing import Optional, Union
 
-from typing import Union
 from community.community_louvain import (
     modularity as original_modularity,
     best_partition,
@@ -42,6 +44,8 @@ def __init__(
         k4: float = 0.8,
         weight: str = "weight",
         tune: bool = False,
+        gpu: bool = False,
+        seed: Optional[int] = None,
     ):
         self.logger = get_logger(__name__)
         self.G = G.copy()
@@ -66,6 +70,20 @@ def __init__(
             f"Graph has {self.G.number_of_nodes()} nodes and {self.G.number_of_edges()} edges."
         )
 
+        if seed is not None:
+            torch.manual_seed(seed)
+            np.random.seed(seed)
+            if torch.cuda.is_available():
+                torch.cuda.manual_seed(seed)
+                torch.backends.cudnn.deterministic = True
+                torch.backends.cudnn.benchmark = False
+        self.seed = seed
+        self.gpu = gpu
+
+        self.device = torch.device("cuda" if gpu and torch.cuda.is_available() else "cpu")
+        self.logger.info(f"Initialized Correlated Louvain. device={self.device}")
+
+
     def _compute_community_correlation(self, nodes) -> tuple:
         """
         Compute the Pearson correlation between the first principal component (PC1) of the omics data
@@ -168,7 +186,9 @@ def run(self, as_dfs: bool = False) -> Union[dict, list]:
                 k3=tuned_k3,
                 k4=tuned_k4,
                 weight=self.weight,
-                tune=False
+                tune=False,
+                gpu=self.gpu,
+                seed=self.seed,
             )
             return tuned_instance.run(as_dfs=True)
 
@@ -227,6 +247,8 @@ def _tune_helper(self, config):
             k3=k3,
             k4=k4,
             weight=self.weight,
+            gpu=self.gpu,
+            seed=self.seed,
             tune=False,
         )
         tuned_instance.run()
@@ -246,14 +268,19 @@ def run_tuning(self, num_samples=10):
         def short_dirname_creator(trial):
             return f"_{trial.trial_id}"
 
+        resources = {"cpu": 1, "gpu": 1} if self.device.type == "cuda" else {"cpu": 1, "gpu": 0}
+
+        self.logger.info("Starting hyperparameter tuning...")
         analysis = tune.run(
             tune.with_parameters(self._tune_helper),
             config=search_config,
             verbose=0,
             num_samples=num_samples,
             scheduler=scheduler,
             progress_reporter=reporter,
+            storage_path=os.path.expanduser("~/cl"),
             trial_dirname_creator=short_dirname_creator,
+            resources_per_trial=resources,
             name="l",
         )