Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,15 @@ bioneuralnet.egg-info
/build/
/docker_files/
.mypy_cache
Quick Start.ipynb
Quick_Start_testing*.ipynb
TCGA-BRCA_Datatest.ipynb
DevNotes.md
TCGA-BRCA_Datatest_copy.ipynb
split
doc_examples_outdated
dpmon_output
.enviroment
TCGA-BRCA_Dataset_testing*.ipynb
# Other example data and tests not needed in the repo.

Output**
Expand Down
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ repos:
hooks:
- id: remove-pyc-and-pycache
name: Remove .pyc files and __pycache__ directories
entry: bash -c "find . \( -path './.venv' -o -path './docs' -o -path './node_modules' \) -prune -o -type f -name '*.pyc' -exec rm -f {} + -o -type d -name '__pycache__' -exec rm -rf {} +"
entry: bash -c 'find . \( -path "./.enviroment" -o -path "./.testing" -o -path "./.venv" -o -path "./docs" -o -path "./node_modules" \) -prune -o -type f -name "*.pyc" -exec rm -f {} + -o -type d -name "__pycache__" -exec rm -rf {} +'
pass_filenames: false
language: system
stages: [pre-commit]

Expand Down
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# BioNeuralNet: Multi-Omics Integration with Graph Neural Networks

![License](https://img.shields.io/badge/license-MIT-blue.svg)
![PyPI](https://img.shields.io/pypi/v/bioneuralnet)
![GitHub Issues](https://img.shields.io/github/issues/UCD-BDLab/BioNeuralNet)
![GitHub Contributors](https://img.shields.io/github/contributors/UCD-BDLab/BioNeuralNet)
![Downloads](https://static.pepy.tech/badge/bioneuralnet)
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/UCD-BDLab/BioNeuralNet/blob/main/LICENSE)
[![PyPI](https://img.shields.io/pypi/v/bioneuralnet)](https://pypi.org/project/bioneuralnet/)
[![GitHub Issues](https://img.shields.io/github/issues/UCD-BDLab/BioNeuralNet)](https://github.com/UCD-BDLab/BioNeuralNet/issues)
[![GitHub Contributors](https://img.shields.io/github/contributors/UCD-BDLab/BioNeuralNet)](https://github.com/UCD-BDLab/BioNeuralNet/graphs/contributors)
[![Downloads](https://static.pepy.tech/badge/bioneuralnet)](https://pepy.tech/project/bioneuralnet)
[![Documentation](https://img.shields.io/badge/docs-read%20the%20docs-blue.svg)](https://bioneuralnet.readthedocs.io/en/latest/)


## Welcome to BioNeuralNet 1.0.7

![BioNeuralNet Logo](assets/LOGO_WB.png)
Expand Down
Binary file modified assets/BioNeuralNet.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/BioNeuralNet_old.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 11 additions & 5 deletions bioneuralnet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,26 @@
from .clustering import CorrelatedPageRank
from .clustering import CorrelatedLouvain
from .clustering import HybridLouvain
from .datasets import DatasetLoader
from .external_tools import SmCCNet

from .metrics import omics_correlation
from .metrics import cluster_correlation
from .metrics import louvain_to_adjacency
from .metrics import evaluate_rf
from .metrics import evaluate_model
from .metrics import evaluate_f1m
from .metrics import evaluate_f1w
from .metrics import plot_performance_three
from .metrics import plot_variance_distribution
from .metrics import plot_variance_by_feature
from .metrics import plot_performance
from .metrics import plot_embeddings
from .metrics import plot_network
from .metrics import plot_multiple_metrics
from .metrics import compare_clusters


from .utils import get_logger
from .utils import rdata_to_df
from .utils import variance_summary
Expand All @@ -75,8 +82,6 @@
from .utils import gen_mst_graph
from .utils import gen_snn_graph

from .datasets import DatasetLoader
from .external_tools import SmCCNet

__all__: list = [
"__version__",
Expand All @@ -97,6 +102,10 @@
"plot_embeddings",
"plot_network",
"compare_clusters",
"plot_multiple_metrics",
"evaluate_model",
"evaluate_f1m",
"evaluate_f1w",
"get_logger",
"rdata_to_df",
"variance_summary",
Expand All @@ -122,9 +131,6 @@
"gen_mst_graph",
"gen_snn_graph",
"DatasetLoader",
"NetworkLoader",
"SmCCNet",
"WGCNA",
"Node2Vec"
"evaluate_model",
]
2 changes: 1 addition & 1 deletion bioneuralnet/clustering/hybrid_louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(
Y: pd.DataFrame,
k3: float = 0.2,
k4: float = 0.8,
max_iter: int = 10,
max_iter: int = 3,
weight: str = "weight",
gpu: bool = False,
seed: Optional[int] = None,
Expand Down
41 changes: 19 additions & 22 deletions bioneuralnet/downstream_task/subject_representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, r2_score
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
Expand Down Expand Up @@ -247,14 +247,16 @@ def _integrate_embeddings(self, reduced: pd.DataFrame, method="multiply", alpha=

With the default parameters (alpha = 2.0, beta = 0.5), each feature is updated as:

- enhanced = beta * raw + (1 - beta) * (alpha * normalized_weight * raw)
enhanced = beta * raw + (1 - beta) * (alpha * normalized_weight * raw)

For example, with alpha = 2.0 and beta = 0.5:

- If a features normalized weight is 1.0:
- enhanced = 0.5xraw + 0.5x(2.0x1.0xraw) = 0.5xraw + raw = 1.5xraw
- If a features normalized weight is 0.5:
- enhanced = 0.5xraw + 0.5x(2.0x0.5xraw) = 0.5xraw + 0.5xraw = raw
If a features normalized weight is 1.0:

enhanced = 0.5xraw + 0.5x(2.0x1.0xraw) = 0.5xraw + raw = 1.5xraw
If a features normalized weight is 0.5:

enhanced = 0.5xraw + 0.5x(2.0x0.5xraw) = 0.5xraw + 0.5xraw = raw

This is so at least 50% of the final output is influenced by the computed weight
"""
Expand Down Expand Up @@ -340,37 +342,32 @@ def _run_classification_tuning(self) -> Dict[str, Any]:
def tune_helper(config):
try:
method = config["method"].upper()
ae_params = config.get("ae_params", {
"epochs": 64,
"hidden_dim": 4,
"dropout": 0.2,
"lr": 1e-3,
"activation": "relu",
})
alpha = config.get("alpha", 2.0)
beta = config.get("beta", 0.5)
compressed_dim = config.get("compressed_dim", 2)
ae_params = config["ae_params"]
alpha = config["alpha"]
beta = config["beta"]
compressed_dim = config["compressed_dim"]

reduced = self._reduce_embeddings(method=method, compressed_dim=compressed_dim,ae_params=ae_params)
enhanced = self._integrate_embeddings(reduced, method="multiply", alpha=alpha, beta=beta)
enhanced = self._integrate_embeddings(reduced, method=config["integration_method"], alpha=alpha, beta=beta)
common_index = enhanced.index.intersection(self.phenotype_data.index)

X = enhanced.loc[common_index].values
y = self.phenotype_data.loc[common_index, self.phenotype_col]

is_classification = y.dtype != float and y.nunique() <= 20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=self.seed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

if is_classification:
model = RandomForestClassifier(random_state=self.seed)
model = RandomForestClassifier()
model.fit(X_train, y_train.astype(int))
y_pred = model.predict(X_test)
score = accuracy_score(y_test, y_pred)
else:
model = RandomForestRegressor(random_state=self.seed)
model = RandomForestRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
score = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
score = -mse

tune.report({"score": score})

Expand All @@ -392,7 +389,7 @@ def short_dirname_creator(trial):
analysis = tune.run(
tune_helper,
config=search_config,
num_samples=10,
num_samples=20,
scheduler=scheduler,
progress_reporter=reporter,
storage_path=os.path.expanduser("~/sr"),
Expand Down
2 changes: 1 addition & 1 deletion bioneuralnet/external_tools/smccnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class SmCCNet:
omics_dfs (List[pd.DataFrame]): List of omics DataFrames.
data_types (List[str]): List of omics data type strings (e.g. ["Genes", "miRNA"]).
kfold (int): Number of folds for cross-validation. Default=5.
eval_method (str): e.g. 'accuracy', 'auc', 'f1', or 'Rsquared' (if you patch SmCCNet).
eval_method (str): e.g. 'accuracy', 'auc', 'f1', or 'Rsquared'.
subSampNum (int): # of subsamplings. Default=50.
summarization (str): 'NetSHy', 'PCA', or 'SVD'. Default='NetSHy'.
seed (int): Random seed. Default=123.
Expand Down
10 changes: 3 additions & 7 deletions bioneuralnet/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
from .correlation import omics_correlation, cluster_correlation, louvain_to_adjacency
from .plot import plot_variance_distribution, plot_variance_by_feature, plot_performance_three, plot_performance, plot_embeddings, plot_network, compare_clusters
from .evaluation import evaluate_model, evaluate_rf, evaluate_f1m, evaluate_f1w, plot_multiple_metrics, evaluate_single_run
from .evaluation import evaluate_model, evaluate_rf, evaluate_f1m, evaluate_f1w
from .plot import plot_variance_distribution, plot_variance_by_feature, plot_performance_three, plot_performance,plot_multiple_metrics, plot_embeddings, plot_network, compare_clusters

__all__ = ["omics_correlation", "cluster_correlation", "louvain_to_adjacency",
"plot_variance_distribution", "plot_variance_by_feature", "plot_performance_three",
"plot_performance", "plot_embeddings", "plot_network", "compare_clusters",
"evaluate_model", "evaluate_rf", "evaluate_single_run", "evaluate_f1m", "evaluate_f1w",
"plot_multiple_metrics"]
__all__ = ["omics_correlation", "cluster_correlation", "louvain_to_adjacency","evaluate_model", "evaluate_rf", "evaluate_f1m", "evaluate_f1w", "plot_variance_distribution", "plot_variance_by_feature", "plot_performance_three", "plot_performance", "plot_multiple_metrics", "plot_embeddings", "plot_network", "compare_clusters"]
Loading