Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,13 @@ jobs:
# Rscript -e "install.packages('WGCNA', repos='https://cran.r-project.org')"
# shell: bash

- name: Cache pre-commit hooks
uses: actions/cache@v3
with:
path: ~/.cache/pre-commit
key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}
restore-keys: pre-commit-${{ runner.os }}-

- name: Run Pre-Commit Checks
if: matrix.os != 'ubuntu-latest'
run: pre-commit run --all-files --show-diff-on-failure
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ doc_examples_outdated
dpmon_output
.enviroment
TCGA-BRCA_Dataset_testing*.ipynb

# Other example data and tests not needed in the repo.

Output**
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ It provides tools for network construction, embedding generation, clustering, an

![BioNeuralNet Workflow](assets/BioNeuralNet.png)

## Documentation

**[BioNeuralNet Documentation & Examples](https://bioneuralnet.readthedocs.io/en/latest/)**

## Table of Contents

- [1. Installation](#1-installation)
Expand Down
26 changes: 11 additions & 15 deletions bioneuralnet/downstream_task/dpmon.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def setup_device(gpu, cuda):
def slice_omics_datasets(
omics_dataset: pd.DataFrame, adjacency_matrix: pd.DataFrame
) -> List[pd.DataFrame]:
logger.info("Slicing omics dataset based on network nodes.")
logger.debug("Slicing omics dataset based on network nodes.")
omics_network_nodes_names = adjacency_matrix.index.tolist()

# Clean omics dataset columns
Expand All @@ -272,7 +272,7 @@ def build_omics_networks_tg(
omics_datasets: List[pd.DataFrame],
clinical_data: pd.DataFrame,
) -> List[Data]:
logger.info("Building PyTorch Geometric Data object from adjacency matrix.")
logger.debug("Building PyTorch Geometric Data object from adjacency matrix.")
omics_network_nodes_names = adjacency_matrix.index.tolist()

G = nx.from_pandas_adjacency(adjacency_matrix)
Expand All @@ -285,7 +285,7 @@ def build_omics_networks_tg(

if clinical_data is not None and not clinical_data.empty:
clinical_vars = clinical_data.columns.tolist()
logger.info(f"Using clinical vars for node features: {clinical_vars}")
logger.debug(f"Using clinical vars for node features: {clinical_vars}")
omics_dataset = omics_datasets[0]
missing_nodes = set(omics_network_nodes_names) - set(omics_dataset.columns)
if missing_nodes:
Expand Down Expand Up @@ -385,12 +385,12 @@ def run_standard_training(dpmon_params, adjacency_matrix, combined_omics, clinic
avg_accuracy = sum(accuracies) / len(accuracies)
std_accuracy = statistics.stdev(accuracies) if len(accuracies) > 1 else 0.0
logger.info(f"Best Accuracy: {best_accuracy:.4f}")
logger.info(f"Average Accuracy: {avg_accuracy:.4f}")
logger.info(f"Standard Deviation of Accuracy: {std_accuracy:.4f}")
logger.info(f"Average Accuracy across {len(accuracies)} models: {avg_accuracy:.4f}")
logger.info(f"Standard Deviation across all models: {std_accuracy:.4f}")
logger.info(f"Returning best model predictions and average accuracy (predictions, avg_accuracy).")

return best_predictions_df, avg_accuracy


def run_hyperparameter_tuning(
dpmon_params, adjacency_matrix, combined_omics, clinical_data
):
Expand All @@ -408,7 +408,7 @@ def run_hyperparameter_tuning(
"weight_decay": tune.loguniform(1e-4, 1e-1),
"nn_hidden_dim1": tune.choice([4, 8, 16, 32, 64, 128]),
"nn_hidden_dim2": tune.choice([4, 8, 16, 32, 64, 128]),
"num_epochs": tune.choice([2, 16, 64, 512, 1024, 4096, 8192]),
"num_epochs": tune.choice([16, 64, 256, 512, 1024,2048, 4096, 8192]),
}

reporter = CLIReporter(metric_columns=["loss", "accuracy", "training_iteration"])
Expand Down Expand Up @@ -501,7 +501,7 @@ def short_dirname_creator(trial):

result = tune.run(
tune_train_n,
resources_per_trial={"cpu": 2, "gpu": gpu_resources},
resources_per_trial={"cpu": 1, "gpu": gpu_resources},
config=pipeline_configs,
num_samples=10,
verbose=0,
Expand All @@ -515,9 +515,7 @@ def short_dirname_creator(trial):
best_trial = result.get_best_trial("loss", "min", "last")
logger.info("Best trial config: {}".format(best_trial.config))
logger.info("Best trial final loss: {}".format(best_trial.last_result["loss"]))
logger.info(
"Best trial final accuracy: {}".format(best_trial.last_result["accuracy"])
)
logger.info("Best trial final accuracy: {}".format(best_trial.last_result["accuracy"]))
best_configs.append(best_trial.config)

best_configs_df = pd.DataFrame(best_configs)
Expand All @@ -534,15 +532,13 @@ def train_model(model, criterion, optimizer, train_data, train_labels, epoch_num
optimizer.step()

if (epoch + 1) % 10 == 0 or epoch == 0:
logger.info(f"Epoch [{epoch+1}/{epoch_num}], Loss: {loss.item():.4f}")
logger.debug(f"Epoch [{epoch+1}/{epoch_num}], Loss: {loss.item():.4f}")

model.eval()
with torch.no_grad():
predictions, _ = model(train_data, train_labels["omics_network"])
_, predicted = torch.max(predictions, 1)
accuracy = (predicted == train_labels["labels"]).sum().item() / len(
train_labels["labels"]
)
accuracy = (predicted == train_labels["labels"]).sum().item() / len(train_labels["labels"])
logger.info(f"Training Accuracy: {accuracy:.4f}")

return accuracy
Expand Down
9 changes: 7 additions & 2 deletions bioneuralnet/utils/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.feature_selection import f_classif, f_regression
from statsmodels.stats.multitest import multipletests
from typing import Callable, TypeAlias, overload

from .logger import get_logger
logger = get_logger(__name__)
Expand Down Expand Up @@ -61,7 +60,7 @@ def preprocess_clinical(X: pd.DataFrame, y: pd.Series, top_k: int = 10, scale: b
else:
df_cat_encoded = pd.DataFrame(index=df_numeric_scaled.index)

df_combined = pd.concat([df_numeric_scaled, df_cat_encoded, df_ignore],axis=1,join="inner")
df_combined = pd.concat([df_numeric_scaled, df_cat_encoded],axis=1,join="inner")
df_features = df_combined.loc[:, df_combined.std(axis=0) > 0]

if y_series.nunique() <= 10:
Expand Down Expand Up @@ -94,6 +93,12 @@ def preprocess_clinical(X: pd.DataFrame, y: pd.Series, top_k: int = 10, scale: b
for idx in selected_idx:
selected_columns.append(feature_names[idx])

df_selected = df_features[selected_columns].copy()

for col in df_ignore.columns:
if col in selected_columns:
df_selected[col] = df_ignore[col]

return df_features[selected_columns]

def clean_inf_nan(df: pd.DataFrame) -> pd.DataFrame:
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading