Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/workflows/memcheck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
branches:
- main
- master
- '**valgrind**'
paths:
- '.github/workflows/memcheck.yml'
- 'src/**'
Expand All @@ -30,7 +31,8 @@ name: mem-check
jobs:
mem-check:
runs-on: ubuntu-24.04
name: valgrind ${{ matrix.config.test }}, ubuntu, R release

name: valgrind ${{ matrix.config.test }}

strategy:
fail-fast: false
Expand All @@ -39,12 +41,13 @@ jobs:
- {test: 'tests'}
- {test: 'examples'}
- {test: 'vignettes'}

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
_R_CHECK_FORCE_SUGGESTS_: false
RSPM: https://packagemanager.rstudio.com/cran/__linux__/noble/latest
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
ASAN_OPTIONS: verify_asan_link_order=0

steps:
- uses: ms609/actions/memcheck@main
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: TreeDist
Title: Calculate and Map Distances Between Phylogenetic Trees
Version: 2.13.0.9002
Version: 2.13.0.9003
Authors@R: c(person("Martin R.", "Smith",
email = "martin.smith@durham.ac.uk",
role = c("aut", "cre", "cph", "prg"),
Expand Down
8 changes: 7 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# TreeDist 2.13.0.9002
# TreeDist 2.13.0.9003

## New features

Expand All @@ -17,6 +17,12 @@
Information) C++ implementations are now exposed via `inst/include/TreeDist/`
headers, allowing downstream packages to use `LinkingTo: TreeDist`.

## Internals

- Stack-allocated split buffers replaced with dynamically-sized vectors,
removing a hard dependency on the compile-time `SL_MAX_SPLITS` constant.
TreeDist now supports trees of any size permitted by TreeTools.

## Performance

- `RobinsonFoulds()` now uses a fast C++ batch path for cross-distance
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,10 @@ cpp_mci_impl_score <- function(x, y, n_tips) {
.Call(`_TreeDist_cpp_mci_impl_score`, x, y, n_tips)
}

cpp_sl_max_tips <- function() {
.Call(`_TreeDist_cpp_sl_max_tips`)
}

cpp_robinson_foulds_distance <- function(x, y, nTip) {
.Call(`_TreeDist_cpp_robinson_foulds_distance`, x, y, nTip)
}
Expand Down
4 changes: 2 additions & 2 deletions R/transfer_consensus.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ TransferConsensus <- function(trees,
if (nTip < 4L) {
return(StarTree(tipLabels))
}
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

# Convert each tree to a raw split matrix (TreeTools C++ internally).
# as.Splits() will error if a tree's tips don't match tipLabels.
Expand Down Expand Up @@ -114,7 +114,7 @@ tc_profile <- function(trees, scale = TRUE, greedy = "best",
tipLabels <- TipLabels(trees[[1]])
nTip <- length(tipLabels)
if (nTip < 4L) stop("Need at least 4 tips for profiling.")
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

splitsList <- lapply(trees, function(tr) unclass(as.Splits(tr, tipLabels)))

Expand Down
4 changes: 2 additions & 2 deletions R/tree_distance.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer,
}
nTip <- length(tipLabels)
if (nTip < 4) return(NULL) # nocov
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

splits_list <- as.Splits(tree1, tipLabels = tipLabels)
n_threads <- as.integer(getOption("mc.cores", 1L))
Expand Down Expand Up @@ -203,7 +203,7 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer,

nTip <- length(tipLabels1)
if (nTip < 4) return(NULL)
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

splits1 <- as.Splits(tree1, tipLabels = tipLabels1)
splits2 <- as.Splits(tree2, tipLabels = tipLabels1) # Use tipLabels1 to ensure order consistency
Expand Down
4 changes: 2 additions & 2 deletions R/tree_distance_transfer.R
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ TransferDistSplits <- function(splits1, splits2,
if (is.null(tipLabels)) return(NULL)
nTip <- length(tipLabels)
if (nTip < 4L) return(NULL)
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

# Check all trees share same tip set
allLabels <- TipLabels(tree1)
Expand Down Expand Up @@ -211,7 +211,7 @@ TransferDistSplits <- function(splits1, splits2,
if (is.null(tipLabels)) return(NULL)
nTip <- length(tipLabels)
if (nTip < 4L) return(NULL)
if (nTip > 32767L) stop("This many tips are not (yet) supported.")
.CheckMaxTips(nTip)

# Check all trees share same tip set
allLabels1 <- TipLabels(trees1)
Expand Down
27 changes: 18 additions & 9 deletions R/tree_distance_utilities.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
# Validate that nTip does not exceed the compiled SL_MAX_TIPS limit.
# Called from every distance entry point before any C++ work.
.CheckMaxTips <- function(nTip) {
if (!is.na(nTip) && nTip > .SL_MAX_TIPS) {
stop(
"Trees with ", nTip, " tips exceed the compiled limit of ",
.SL_MAX_TIPS, " tips.",
if (.SL_MAX_TIPS < 32768L)
"\nUpdate TreeTools and reinstall TreeDist to support more tips."
else "",
call. = FALSE
)
}
}

#' Wrapper for tree distance calculations
#'
#' Calls tree distance functions from trees or lists of trees
Expand Down Expand Up @@ -132,9 +147,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
# Fast paths: use OpenMP batch functions when all trees share the same tip
# set and no R-level cluster has been configured. Each branch mirrors the
# generic path exactly but avoids per-pair R overhead.
if (!is.na(nTip) && nTip > 32767L) {
stop("This many tips are not (yet) supported.")
}
.CheckMaxTips(nTip)
if (!is.na(nTip) && is.null(cluster)) {
.n_threads <- as.integer(getOption("mc.cores", 1L))
.batch_result <- if (identical(Func, MutualClusteringInfoSplits)) {
Expand Down Expand Up @@ -235,9 +248,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
#' @importFrom stats setNames
.SplitDistanceManyMany <- function(Func, splits1, splits2,
tipLabels, nTip = length(tipLabels), ...) {
if (!is.na(nTip) && nTip > 32767L) {
stop("This many tips are not (yet) supported.")
}
.CheckMaxTips(nTip)
if (is.na(nTip)) {
tipLabels <- union(unlist(tipLabels, use.names = FALSE),
unlist(TipLabels(splits2), use.names = FALSE))
Expand Down Expand Up @@ -408,9 +419,7 @@ CalculateTreeDistance <- function(Func, tree1, tree2 = NULL,
if (ncol(x) != ncol(y)) {
stop("Input splits must address same number of tips.")
}
if (nTip > 32767L) {
stop("This many tips are not (yet) supported.")
}
.CheckMaxTips(nTip)
}

.CheckLabelsSame <- function(labelList) {
Expand Down
6 changes: 6 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
.SL_MAX_TIPS <- NULL # populated in .onLoad

.onLoad <- function(libname, pkgname) {
.SL_MAX_TIPS <<- cpp_sl_max_tips()
}

.onUnload <- function(libpath) {
StopParallel(quietly = TRUE)
library.dynam.unload("TreeDist", libpath)
Expand Down
4 changes: 0 additions & 4 deletions memcheck/all.R

This file was deleted.

5 changes: 2 additions & 3 deletions memcheck/tests.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Code to be run with
# R -d "valgrind --tool=memcheck --leak-check=full" --vanilla < tests/thisfile.R
# First build and install the package.
# Code to be run with
# R -d "valgrind --tool=memcheck --leak-check=full --error-exitcode=1" --vanilla < memcheck/thisfile.R
library("TreeDist")
devtools::test()
8 changes: 3 additions & 5 deletions memcheck/vignettes.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# Code to be run with
# R -d "valgrind --tool=memcheck --leak-check=full" --vanilla < tests/thisfile.R
# First build and install the package.
library("TreeDist")
devtools::build_vignettes()
# Code to be run with
# R -d "valgrind --tool=memcheck --leak-check=full --error-exitcode=1" --vanilla < memcheck/thisfile.R
devtools::build_vignettes(install = FALSE)
11 changes: 11 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,16 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// cpp_sl_max_tips
int cpp_sl_max_tips();
RcppExport SEXP _TreeDist_cpp_sl_max_tips() {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
rcpp_result_gen = Rcpp::wrap(cpp_sl_max_tips());
return rcpp_result_gen;
END_RCPP
}
// cpp_robinson_foulds_distance
List cpp_robinson_foulds_distance(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip);
RcppExport SEXP _TreeDist_cpp_robinson_foulds_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) {
Expand Down Expand Up @@ -780,6 +790,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_TreeDist_cpp_transfer_dist_all_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_all_pairs, 4},
{"_TreeDist_cpp_transfer_dist_cross_pairs", (DL_FUNC) &_TreeDist_cpp_transfer_dist_cross_pairs, 5},
{"_TreeDist_cpp_mci_impl_score", (DL_FUNC) &_TreeDist_cpp_mci_impl_score, 3},
{"_TreeDist_cpp_sl_max_tips", (DL_FUNC) &_TreeDist_cpp_sl_max_tips, 0},
{"_TreeDist_cpp_robinson_foulds_distance", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_distance, 3},
{"_TreeDist_cpp_robinson_foulds_info", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_info, 3},
{"_TreeDist_cpp_matching_split_distance", (DL_FUNC) &_TreeDist_cpp_matching_split_distance, 3},
Expand Down
14 changes: 14 additions & 0 deletions src/pairwise_distances.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ NumericVector cpp_mutual_clustering_all_pairs(
const int n_tip,
const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int N = splits_list.size();
if (N < 2) return NumericVector(0);

Expand Down Expand Up @@ -397,6 +398,7 @@ NumericVector cpp_rf_info_all_pairs(
const int n_tip,
const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int N = splits_list.size();
if (N < 2) return NumericVector(0);
const int n_pairs = N * (N - 1) / 2;
Expand Down Expand Up @@ -516,6 +518,7 @@ NumericVector cpp_msd_all_pairs(
const int n_tip,
const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int N = splits_list.size();
if (N < 2) return NumericVector(0);
const int n_pairs = N * (N - 1) / 2;
Expand Down Expand Up @@ -618,6 +621,7 @@ NumericVector cpp_msi_all_pairs(
const int n_tip,
const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int N = splits_list.size();
if (N < 2) return NumericVector(0);
const int n_pairs = N * (N - 1) / 2;
Expand Down Expand Up @@ -710,6 +714,7 @@ NumericVector cpp_shared_phylo_all_pairs(
const int n_tip,
const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int N = splits_list.size();
if (N < 2) return NumericVector(0);
const int n_pairs = N * (N - 1) / 2;
Expand Down Expand Up @@ -875,6 +880,7 @@ NumericVector cpp_jaccard_all_pairs(
const bool allow_conflict = true,
const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int N = splits_list.size();
if (N < 2) return NumericVector(0);
const int n_pairs = N * (N - 1) / 2;
Expand Down Expand Up @@ -944,6 +950,7 @@ NumericMatrix cpp_mutual_clustering_cross_pairs(
const List& splits_a, const List& splits_b,
const int n_tip, const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int nA = splits_a.size();
const int nB = splits_b.size();
if (nA == 0 || nB == 0) return NumericMatrix(nA, nB);
Expand Down Expand Up @@ -987,6 +994,7 @@ NumericMatrix cpp_rf_info_cross_pairs(
const List& splits_a, const List& splits_b,
const int n_tip, const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int nA = splits_a.size();
const int nB = splits_b.size();
if (nA == 0 || nB == 0) return NumericMatrix(nA, nB);
Expand Down Expand Up @@ -1027,6 +1035,7 @@ NumericMatrix cpp_msd_cross_pairs(
const List& splits_a, const List& splits_b,
const int n_tip, const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int nA = splits_a.size();
const int nB = splits_b.size();
if (nA == 0 || nB == 0) return NumericMatrix(nA, nB);
Expand Down Expand Up @@ -1070,6 +1079,7 @@ NumericMatrix cpp_msi_cross_pairs(
const List& splits_a, const List& splits_b,
const int n_tip, const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int nA = splits_a.size();
const int nB = splits_b.size();
if (nA == 0 || nB == 0) return NumericMatrix(nA, nB);
Expand Down Expand Up @@ -1110,6 +1120,7 @@ NumericMatrix cpp_shared_phylo_cross_pairs(
const List& splits_a, const List& splits_b,
const int n_tip, const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int nA = splits_a.size();
const int nB = splits_b.size();
if (nA == 0 || nB == 0) return NumericMatrix(nA, nB);
Expand Down Expand Up @@ -1153,6 +1164,7 @@ NumericMatrix cpp_jaccard_cross_pairs(
const bool allow_conflict = true,
const int n_threads = 1
) {
TreeDist::check_ntip(n_tip);
const int nA = splits_a.size();
const int nB = splits_b.size();
if (nA == 0 || nB == 0) return NumericMatrix(nA, nB);
Expand Down Expand Up @@ -1206,6 +1218,7 @@ NumericVector cpp_clustering_entropy_batch(
const List& splits_list,
const int n_tip
) {
TreeDist::check_ntip(n_tip);
const int N = splits_list.size();
NumericVector result(N);
if (N == 0 || n_tip <= 0) return result;
Expand Down Expand Up @@ -1239,6 +1252,7 @@ NumericVector cpp_splitwise_info_batch(
const List& splits_list,
const int n_tip
) {
TreeDist::check_ntip(n_tip);
const int N = splits_list.size();
NumericVector result(N);
if (N == 0 || n_tip < 4) return result;
Expand Down
Loading
Loading