Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ Imports:
grDevices,
matrixStats,
Rdpack (>= 0.7),
Rfast,
stats,
TreeDist (> 2.2.0),
TreeTools (>= 1.9.1.9003),
Expand All @@ -58,7 +57,7 @@ Config/Needs/memcheck: devtools
Config/Needs/metadata: codemetar
Config/Needs/website: pkgdown
RdMacros: Rdpack
SystemRequirements: C99
SystemRequirements: C++17
ByteCompile: true
Encoding: UTF-8
Language: en-GB
Expand Down
8 changes: 4 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@ export(RogueTaxa)
export(TipInstability)
export(TipVolatility)
importFrom(Rdpack,reprompt)
importFrom(Rfast,rowMads)
importFrom(Rfast,rowMedians)
importFrom(Rfast,rowVars)
importFrom(Rfast,rowmeans)
importFrom(TreeDist,ClusteringInfo)
importFrom(TreeDist,ConsensusInfo)
importFrom(TreeDist,PhylogeneticInfoDistance)
Expand All @@ -35,6 +31,10 @@ importFrom(cli,cli_progress_update)
importFrom(fastmatch,"%fin%")
importFrom(fastmatch,fmatch)
importFrom(grDevices,hcl.colors)
importFrom(matrixStats,rowMads)
importFrom(matrixStats,rowMeans2)
importFrom(matrixStats,rowMedians)
importFrom(matrixStats,rowSds)
importFrom(stats,cmdscale)
importFrom(stats,setNames)
importFrom(utils,capture.output)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# Rogue v2.2.0.9000 (development)

- Improve performance of `TipInstability()` (and thence `ColByStability()`,
`QuickRogue()`).
- Remove `Rfast` dependency.


# Rogue v2.2.0 (2026-03-20)

## Performance
Expand Down
72 changes: 34 additions & 38 deletions R/stability.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ Cophenetic <- function(x, nTip = length(x$tip.label), log = FALSE,
#' calculate leaf stability.
#' @param checkTips Logical specifying whether to check that tips are numbered
#' consistently.
#' @param parallel Logical specifying whether parallel execution should take
#' place in C++.
#' @param parallel Logical, retained for backwards compatibility; the fused
#' C implementation is single-threaded, so this argument is now ignored.
#' @references
#' \insertAllCited{}
#' @examples
Expand All @@ -106,7 +106,7 @@ Cophenetic <- function(x, nTip = length(x$tip.label), log = FALSE,
#' plot(ConsensusWithout(trees, names(instab[instab > 0.2])))
#' @template MRS
#' @family tip instability functions
#' @importFrom Rfast rowmeans rowMads rowMedians rowVars
#' @importFrom matrixStats rowSds rowMads rowMeans2 rowMedians
#' @export
TipInstability <- function(trees, log = TRUE, average = "mean",
deviation = "sd",
Expand All @@ -131,17 +131,25 @@ TipInstability <- function(trees, log = TRUE, average = "mean",
nTip <- NTip(trees[[1]])
}

lt_idx <- which(lower.tri(matrix(TRUE, nTip, nTip)))
whichDev <- pmatch(tolower(deviation), c("sd", "mad"))
if (is.na(whichDev)) {
stop("`deviation` must be 'sd' or 'mad'")
}
whichAve <- pmatch(tolower(average), c("mean", "median"))
if (is.na(whichAve)) {
stop("`average` must be 'mean' or 'median'")
}

nEdge <- nrow(trees[[1]]$edge)
nNode <- trees[[1]]$Nnode
# Batch C path requires uniform tree dimensions and log = TRUE
# Fused C path requires uniform tree dimensions and log = TRUE
useBatch <- isTRUE(log) &&
all(vapply(trees, function(tr) nrow(tr$edge) == nEdge, logical(1)))

if (useBatch) {
# Batch C call: returns lower-triangle entries directly (nPairs × nTree)
# Ensure preorder (the per-tree GraphGeodesic path does this internally)
# A single C call computes the geodesics and reduces them to a per-leaf
# instability score, never materializing the nPairs × nTree distance
# matrix (nor the symmetric deviation matrix) as an R object.
trees <- lapply(trees, Preorder)
parent_all <- integer(nEdge * length(trees))
child_all <- integer(nEdge * length(trees))
Expand All @@ -150,50 +158,38 @@ TipInstability <- function(trees, log = TRUE, average = "mean",
parent_all[rng] <- trees[[k]]$edge[, 1] - 1L
child_all[rng] <- trees[[k]]$edge[, 2] - 1L
}
dists_lt <- matrix(
.Call(`LOG_GRAPH_GEODESIC_MULTI`,
n_tip = as.integer(nTip),
n_node = as.integer(nNode),
parent = as.integer(parent_all),
child = as.integer(child_all),
n_edge = as.integer(nEdge),
n_tree = as.integer(length(trees))),
ncol = length(trees)
)
} else {
dists <- vapply(trees, GraphGeodesic, double(nTip * nTip),
nTip = nTip, log = log, asMatrix = FALSE)
dists_lt <- dists[lt_idx, , drop = FALSE]
score <- .Call(`TIP_INSTABILITY`,
n_tip = as.integer(nTip),
n_node = as.integer(nNode),
parent = as.integer(parent_all),
child = as.integer(child_all),
n_edge = as.integer(nEdge),
n_tree = as.integer(length(trees)),
which_ave = as.integer(whichAve),
which_dev = as.integer(whichDev))
return(setNames(score, TipLabels(trees[[1]])))
}

# Auto-enable OpenMP parallelism for Rfast operations on large matrices
use_parallel <- parallel || nrow(dists_lt) > 1000L
# Fallback (log = FALSE, or trees of differing resolution): reduce in R.
lt_idx <- which(lower.tri(matrix(TRUE, nTip, nTip)))
dists <- vapply(trees, GraphGeodesic, double(nTip * nTip),
nTip = nTip, log = log, asMatrix = FALSE)
dists_lt <- dists[lt_idx, , drop = FALSE]

whichDev <- pmatch(tolower(deviation), c("sd", "mad"))
if (is.na(whichDev)) {
stop("`deviation` must be 'sd' or 'mad'")
}
devs_lt <- switch(whichDev,
rowVars(dists_lt, std = TRUE, parallel = use_parallel),
rowMads(dists_lt, parallel = use_parallel))
rowSds(dists_lt),
rowMads(dists_lt))
devs_lt[is.nan(devs_lt)] <- 0

whichAve <- pmatch(tolower(average), c("mean", "median"))
if (is.na(whichAve)) {
stop("`average` must be 'mean' or 'median'")
}
aves_lt <- switch(whichAve, rowmeans, Rfast::rowMedians)(dists_lt)
aves_lt <- switch(whichAve, rowMeans2(dists_lt), rowMedians(dists_lt))
meanAve <- mean(aves_lt)

# Reconstruct symmetric deviation matrix from lower triangle
devs <- matrix(0, nTip, nTip)
devs[lt_idx] <- devs_lt
devs <- devs + t(devs)

setNames(
Rfast::rowmeans(devs) / meanAve, # faster than Rfast::colmeans
TipLabels(trees[[1]])
)
setNames(rowMeans2(devs) / meanAve, TipLabels(trees[[1]]))
}

#' `ColByStability()` returns a colour reflecting the instability of each leaf.
Expand Down
56 changes: 56 additions & 0 deletions benchmark/ab_bench_final.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
.libPaths(c("C:/Users/pjjg18/GitHub/Rogue/.ab_dev",
"C:/Users/pjjg18/GitHub/Rogue/.ab_ref",
.libPaths()))
library(RogueRef)
library(RogueDev)
library(TreeTools)
library(bench)

trees150 <- ape::read.tree(system.file("example/150.bs", package="RogueRef",
lib.loc="C:/Users/pjjg18/GitHub/Rogue/.ab_ref"))
trees_med <- trees150[1:100]
trees_small <- AddTipEverywhere(BalancedTree(8), "Rogue")

# Warmup (includes OpenMP thread pool init)
invisible(RogueRef::TipInstability(trees_small))
invisible(RogueDev::TipInstability(trees_small))
invisible(RogueRef::QuickRogue(trees_small, info="phylogenetic"))
invisible(RogueDev::QuickRogue(trees_small, info="phylogenetic"))

cat("=== Canary: GraphGeodesic (single tree, should be neutral) ===\n")
b <- bench::mark(
ref = RogueRef::GraphGeodesic(trees_small[[1]], log=TRUE),
dev = RogueDev::GraphGeodesic(trees_small[[1]], log=TRUE),
min_iterations=50, check=FALSE)
print(b[, c("expression","min","median","n_itr")])

cat("\n=== TipInstability (100 trees x 150 tips) ===\n")
b <- bench::mark(
ref = RogueRef::TipInstability(trees_med, log=TRUE, average="median", deviation="mad"),
dev = RogueDev::TipInstability(trees_med, log=TRUE, average="median", deviation="mad"),
min_iterations=5, check=FALSE)
print(b[, c("expression","min","median","n_itr","mem_alloc")])

cat("\n=== QuickRogue (100 trees x 150 tips) ===\n")
b <- bench::mark(
ref = RogueRef::QuickRogue(trees_med, info="phylogenetic"),
dev = RogueDev::QuickRogue(trees_med, info="phylogenetic"),
min_iterations=3, check=FALSE)
print(b[, c("expression","min","median","n_itr","mem_alloc")])

cat("\n=== RogueTaxa fspic (100 trees x 150 tips) ===\n")
b <- bench::mark(
ref = RogueRef::RogueTaxa(trees_med, info="fspic"),
dev = RogueDev::RogueTaxa(trees_med, info="fspic"),
min_iterations=3, check=FALSE)
print(b[, c("expression","min","median","n_itr","mem_alloc")])

# Verify correctness
cat("\n=== Correctness check ===\n")
r_ref <- RogueRef::QuickRogue(trees_med, info="phylogenetic")
r_dev <- RogueDev::QuickRogue(trees_med, info="phylogenetic")
cat("Results match:", identical(r_ref, r_dev), "\n")

ti_ref <- RogueRef::TipInstability(trees_med, log=TRUE, average="median", deviation="mad")
ti_dev <- RogueDev::TipInstability(trees_med, log=TRUE, average="median", deviation="mad")
cat("TipInstability match:", all.equal(ti_ref, ti_dev), "\n")
19 changes: 19 additions & 0 deletions benchmark/vtune-driver.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# VTune driver: exercise the C hot paths in TipInstability
library(Rogue, lib.loc = ".vtune-lib")
library(TreeTools)

trees150 <- ape::read.tree(system.file("example/150.bs", package = "Rogue",
lib.loc = ".vtune-lib"))
trees_med <- trees150[1:100]

# Warmup
invisible(TipInstability(trees_med, log = TRUE, average = "median",
deviation = "mad"))

# Repeat enough times for meaningful VTune sampling (~30s target)
cat("Starting VTune workload...\n")
for (i in seq_len(50)) {
TipInstability(trees_med, log = TRUE, average = "median",
deviation = "mad", checkTips = FALSE)
}
cat("Done.\n")
4 changes: 2 additions & 2 deletions man/RogueTaxa.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/TipInstability.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions src/Makevars
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
SOURCES = graph_geodesic.c rnr/Array.c rnr/BitVector.c rnr/Dropset.c rnr/HashTable.c rnr/List.c rnr/Node.c rnr/ProfileElem.c rnr/RogueNaRok.c rnr/Tree.c rnr/common.c rnr/legacy.c rnr/newFunctions.c Rogue_init.c
OBJECTS = $(SOURCES:.c=.o)
CXX_STD = CXX17
C_SOURCES = graph_geodesic.c rnr/Array.c rnr/BitVector.c rnr/Dropset.c rnr/HashTable.c rnr/List.c rnr/Node.c rnr/ProfileElem.c rnr/RogueNaRok.c rnr/Tree.c rnr/common.c rnr/legacy.c rnr/newFunctions.c Rogue_init.c
CXX_SOURCES = tip_instability.cpp
SOURCES = $(C_SOURCES) $(CXX_SOURCES)
OBJECTS = $(C_SOURCES:.c=.o) $(CXX_SOURCES:.cpp=.o)
2 changes: 2 additions & 0 deletions src/Rogue_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ extern SEXP RogueNaRok(SEXP, SEXP, SEXP, SEXP, SEXP,
extern SEXP GRAPH_GEODESIC(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP LOG_GRAPH_GEODESIC(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP LOG_GRAPH_GEODESIC_MULTI(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP TIP_INSTABILITY(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);

static const R_CallMethodDef callMethods[] = {
{"GRAPH_GEODESIC", (DL_FUNC) &GRAPH_GEODESIC, 5},
{"LOG_GRAPH_GEODESIC", (DL_FUNC) &LOG_GRAPH_GEODESIC, 5},
{"LOG_GRAPH_GEODESIC_MULTI", (DL_FUNC) &LOG_GRAPH_GEODESIC_MULTI, 6},
{"TIP_INSTABILITY", (DL_FUNC) &TIP_INSTABILITY, 8},
{"RogueNaRok", (DL_FUNC) &RogueNaRok, 10},
{NULL, NULL, 0}
};
Expand Down
28 changes: 28 additions & 0 deletions src/geodesic.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef ROGUE_GEODESIC_H
#define ROGUE_GEODESIC_H

/* Shared declarations so the C++ translation unit (tip_instability.cpp) can
* reuse the geodesic worker and log lookup table defined in graph_geodesic.c.
* Wrapped in extern "C" when included from C++ so the symbols match the
* (unmangled) C definitions. */

#ifdef __cplusplus
extern "C" {
#endif

/* Lookup table of natural logs, lg[k] == log(k); populated at load time by a
* constructor in graph_geodesic.c. */
extern double lg[];

/* Fill `ret` (an all_nodes * all_nodes int matrix) with pairwise node graph
* geodesics for one tree. See graph_geodesic.c for the algorithm. */
void graph_geodesic_phylo(const int *n_tip, const int *n_node,
const int *parent, const int *child,
const int *n_edge, const int *all_nodes,
int *ret);

#ifdef __cplusplus
}
#endif

#endif /* ROGUE_GEODESIC_H */
1 change: 1 addition & 0 deletions src/graph_geodesic.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <R.h>
#include <Rinternals.h>
#include <assert.h>
#include "geodesic.h"

#define GEOD_MAX 32768

Expand Down
Loading
Loading