Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@
^\.github$
^data-raw$
^vignettes/prerender_figures\.R$
^_pkgdown\.yml$
^docs$
^pkgdown$
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ rsconnect/
.DS_Store
.quarto
inst/doc
docs
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,5 @@ Depends:
LazyData: true
LazyDataCompression: xz
VignetteBuilder: knitr
URL: https://bigmindlab.github.io/OmicsKit

2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

export(add_annotations)
export(addgenesPA)
export(calc_jaccard)
export(detect_filter)
export(do_clust)
export(geneset_similarity)
export(get_annotations)
export(get_network_communities)
export(get_stars)
Expand Down
16 changes: 8 additions & 8 deletions R/dataclust_PA.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#' apoptosis & cell death, cell cycle & DNA damage, immune response &
#' inflammation, and metabolism. Gene set names follow standard database
#' conventions (`KEGG_`, `HALLMARK_`, `GO_`) and gene symbols are real human
#' genes. Designed to be used as input to [calc_jaccard()].
#' genes. Designed to be used as input to [geneset_similarity()].
#'
#' @format A named list of 40 elements. Each element is a character vector of
#' human gene symbols (HGNC) belonging to that gene set. Gene set sizes range
Expand All @@ -26,11 +26,11 @@
#' # Inspect one gene set
#' geneset_list[["KEGG_APOPTOSIS"]]
#'
#' # Use with calc_jaccard()
#' # Use with geneset_similarity()
#' data(camera_results)
#' jac <- calc_jaccard(geneset_list, camera_results, fdr_th = 0.05)
#' jac <- geneset_similarity(geneset_list, camera_results, fdr_th = 0.05)
#'
#' @seealso [calc_jaccard()], [camera_results]
#' @seealso [geneset_similarity()], [camera_results]
"geneset_list"


Expand All @@ -40,7 +40,7 @@
#' analysis, containing significance values for the 40 gene sets in
#' [geneset_list]. Approximately 60% of gene sets have FDR < 0.05, providing
#' enough significant sets for meaningful clustering. Designed to be used
#' alongside [geneset_list] as input to [calc_jaccard()].
#' alongside [geneset_list] as input to [geneset_similarity()].
#'
#' @format A data frame with 40 rows and 4 columns:
#' \describe{
Expand All @@ -63,9 +63,9 @@
#' # How many gene sets are significant?
#' sum(camera_results$FDR < 0.05)
#'
#' # Use with calc_jaccard()
#' # Use with geneset_similarity()
#' data(geneset_list)
#' jac <- calc_jaccard(geneset_list, camera_results, fdr_th = 0.05)
#' jac <- geneset_similarity(geneset_list, camera_results, fdr_th = 0.05)
#'
#' @seealso [calc_jaccard()], [geneset_list]
#' @seealso [geneset_similarity()], [geneset_list]
"camera_results"
26 changes: 13 additions & 13 deletions R/doclust_PA.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
# community detection, and super-term generation.
#
# Functions:
# calc_jaccard — Compute Jaccard similarity & distance matrices
# geneset_similarity — Compute Jaccard similarity & distance matrices
# do_clust — Hierarchical clustering with silhouette selection
# get_superterm — TF-IDF super-term labels for gene set communities
# get_network_communities — Community detection + super-terms in one call
# =============================================================================

########################
# Function calc_jaccard #
# Function geneset_similarity #
########################

#' Compute Jaccard similarity and distance matrices for gene sets
Expand Down Expand Up @@ -51,7 +51,7 @@
#' )
#'
#' # Only the first three gene sets pass the FDR threshold
#' jac <- calc_jaccard(geneset_list, results, fdr_th = 0.05)
#' jac <- geneset_similarity(geneset_list, results, fdr_th = 0.05)
#'
#' jac$jaccard_sim # similarity matrix
#' jac$dist_mat # distance object (usable in UMAP, clustering, etc.)
Expand All @@ -63,7 +63,7 @@
#' @importFrom rlang .data
#' @export

calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) {
geneset_similarity <- function(geneset_list, results, fdr_th = 0.05) {

if (!is.list(geneset_list) || is.null(names(geneset_list))) {
stop("`geneset_list` must be a named list of character vectors.", call. = FALSE)
Expand Down Expand Up @@ -136,7 +136,7 @@ calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) {
#' returns cluster assignments, a silhouette ggplot2 object, and a
#' ComplexHeatmap with dendrogram.
#'
#' @param x A `JaccardResult` object (output of [calc_jaccard()]) or an
#' @param x A `JaccardResult` object (output of [geneset_similarity()]) or an
#' object of class `dist`.
#' @param method Agglomeration method passed to [stats::hclust()].
#' Default: `"ward.D2"`.
Expand Down Expand Up @@ -168,7 +168,7 @@ calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) {
#' FDR = c(0.01, 0.02, 0.03, 0.04, 0.01)
#' )
#'
#' jac <- calc_jaccard(geneset_list, results)
#' jac <- geneset_similarity(geneset_list, results)
#' clust <- do_clust(jac)
#'
#' clust$silhouette_plot # ggplot2 silhouette curve
Expand All @@ -177,7 +177,7 @@ calc_jaccard <- function(geneset_list, results, fdr_th = 0.05) {
#' clust$cluster_assignments # tibble: NAME | cluster
#' }
#'
#' @seealso [calc_jaccard()], [get_network_communities()],
#' @seealso [geneset_similarity()], [get_network_communities()],
#' [network_clust()], [network_clust_gg()]
#' @import ggplot2
#' @importFrom rlang .data
Expand All @@ -204,7 +204,7 @@ do_clust <- function(x, method = "ward.D2", max_k = NULL) {
jaccard_sim <- 1 - as.matrix(x)
} else {
stop(
"`x` must be a `JaccardResult` object (output of `calc_jaccard()`) ",
"`x` must be a `JaccardResult` object (output of `geneset_similarity()`) ",
"or an object of class `dist`.",
call. = FALSE
)
Expand Down Expand Up @@ -471,10 +471,10 @@ get_superterm <- function(geneset_names, community_membership,
#' Convenience wrapper that builds a binary adjacency network from a Jaccard
#' similarity matrix, runs a community-detection algorithm, and optionally
#' generates super-term labels for each community via [get_superterm()].
#' Designed to be the single step between [calc_jaccard()] and the network
#' Designed to be the single step between [geneset_similarity()] and the network
#' plotting functions [network_clust()] / [network_clust_gg()].
#'
#' @param x A `JaccardResult` object (output of [calc_jaccard()]).
#' @param x A `JaccardResult` object (output of [geneset_similarity()]).
#' @param threshold Numeric between 0 and 1. Gene set pairs with a Jaccard
#' similarity above this value are connected in the network. Default: `0.3`.
#' @param method Character. Community detection algorithm to use. One of:
Expand Down Expand Up @@ -506,7 +506,7 @@ get_superterm <- function(geneset_names, community_membership,
#' res <- read.csv("path/to/results.csv")
#'
#' # Full workflow
#' jac <- calc_jaccard(gsl, res, fdr_th = 0.05)
#' jac <- geneset_similarity(gsl, res, fdr_th = 0.05)
#' clust <- do_clust(jac)
#' net <- get_network_communities(jac, threshold = 0.3, method = "louvain")
#'
Expand All @@ -524,7 +524,7 @@ get_superterm <- function(geneset_names, community_membership,
#' plots$combined
#' }
#'
#' @seealso [calc_jaccard()], [do_clust()], [get_superterm()],
#' @seealso [geneset_similarity()], [do_clust()], [get_superterm()],
#' [network_clust()], [network_clust_gg()]
#' @importFrom magrittr %>%
#' @export
Expand All @@ -542,7 +542,7 @@ get_network_communities <- function(x,
}
if (!inherits(x, "JaccardResult")) {
stop(
"`x` must be a `JaccardResult` object (output of `calc_jaccard()`).",
"`x` must be a `JaccardResult` object (output of `geneset_similarity()`).",
call. = FALSE
)
}
Expand Down
8 changes: 4 additions & 4 deletions R/list_gmts.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#'
#' Scans a directory for `.gmt` files, parses them, and returns a single named
#' list where each element is a character vector of gene symbols for one gene
#' set. The output is ready to be passed directly to [calc_jaccard()].
#' set. The output is ready to be passed directly to [geneset_similarity()].
#'
#' **GMT format:** each row contains the gene set name in column 1, an optional
#' description in column 2, and gene symbols from column 3 onward. Empty fields
Expand All @@ -31,11 +31,11 @@
#' names(geneset_list)[1:5] # first five gene set names
#' geneset_list[["KEGG_APOPTOSIS"]] # genes in a specific set
#'
#' # Pass directly to calc_jaccard
#' jac <- calc_jaccard(geneset_list, results_df, fdr_th = 0.05)
#' # Pass directly to geneset_similarity
#' jac <- geneset_similarity(geneset_list, results_df, fdr_th = 0.05)
#' }
#'
#' @seealso [calc_jaccard()]
#' @seealso [geneset_similarity()]
#' @export

list_gmts <- function(dir) {
Expand Down
16 changes: 8 additions & 8 deletions R/plotclust_PA.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#' For a ggplot2-based version that returns plot objects instead of drawing
#' them, see [network_clust_gg()].
#'
#' @param x A `JaccardResult` object (output of [calc_jaccard()]).
#' @param x A `JaccardResult` object (output of [geneset_similarity()]).
#' @param clust_result A list returned by [do_clust()], used to color nodes by
#' hierarchical cluster assignment.
#' @param jaccard_threshold Numeric. Minimum Jaccard similarity required for an
Expand Down Expand Up @@ -61,7 +61,7 @@
#' gsl <- list_gmts("path/to/gmt_folder/")
#' res <- read.csv("path/to/results.csv")
#'
#' jac <- calc_jaccard(gsl, res, fdr_th = 0.05)
#' jac <- geneset_similarity(gsl, res, fdr_th = 0.05)
#' clust <- do_clust(jac)
#' net <- get_network_communities(jac, threshold = 0.3)
#'
Expand Down Expand Up @@ -89,7 +89,7 @@
#' dev.off()
#' }
#'
#' @seealso [calc_jaccard()], [do_clust()], [get_network_communities()],
#' @seealso [geneset_similarity()], [do_clust()], [get_network_communities()],
#' [get_superterm()], [network_clust_gg()]
#' @importFrom magrittr %>%
#' @importFrom rlang .data
Expand All @@ -110,7 +110,7 @@ network_clust <- function(x, clust_result,
# --- Input validation ---------------------------------------------------
if (!inherits(x, "JaccardResult")) {
stop(
"`x` must be a `JaccardResult` object (output of `calc_jaccard()`).",
"`x` must be a `JaccardResult` object (output of `geneset_similarity()`).",
call. = FALSE
)
}
Expand Down Expand Up @@ -317,7 +317,7 @@ network_clust <- function(x, clust_result,
#' For a base R igraph version that draws directly to the active graphics
#' device, see [network_clust()].
#'
#' @param x A `JaccardResult` object (output of [calc_jaccard()]).
#' @param x A `JaccardResult` object (output of [geneset_similarity()]).
#' @param clust_result A list returned by [do_clust()], used to color nodes by
#' hierarchical cluster assignment.
#' @param jaccard_threshold Numeric. Minimum Jaccard similarity required for an
Expand Down Expand Up @@ -356,7 +356,7 @@ network_clust <- function(x, clust_result,
#' gsl <- list_gmts("path/to/gmt_folder/")
#' res <- read.csv("path/to/results.csv")
#'
#' jac <- calc_jaccard(gsl, res, fdr_th = 0.05)
#' jac <- geneset_similarity(gsl, res, fdr_th = 0.05)
#' clust <- do_clust(jac)
#' net <- get_network_communities(jac, threshold = 0.3)
#'
Expand Down Expand Up @@ -389,7 +389,7 @@ network_clust <- function(x, clust_result,
#' plots$clean + plots$superterms
#' }
#'
#' @seealso [calc_jaccard()], [do_clust()], [get_network_communities()],
#' @seealso [geneset_similarity()], [do_clust()], [get_network_communities()],
#' [get_superterm()], [network_clust()]
#' @import ggplot2
#' @importFrom magrittr %>%
Expand Down Expand Up @@ -417,7 +417,7 @@ network_clust_gg <- function(x, clust_result,
# --- Input validation ---------------------------------------------------
if (!inherits(x, "JaccardResult")) {
stop(
"`x` must be a `JaccardResult` object (output of `calc_jaccard()`).",
"`x` must be a `JaccardResult` object (output of `geneset_similarity()`).",
call. = FALSE
)
}
Expand Down
Loading
Loading