The table below shows a collection of bioinformatics R packages and how they implement classic and phylogenetic alpha/beta ecology diversity metrics.
R Package | Classic alpha/beta | Phylogenetic alpha/beta |
---|---|---|
abdiv | Serial R | none |
adiv | Serial R | Serial R |
ampvis2 | vegan | Serial R |
animalcules | vegan | GUniFrac |
ecodist | Serial C/R | none |
ecodive | Parallel C | Parallel C |
entropart | Serial R | none |
GUniFrac | none | Serial C |
phyloseq | vegan | Parallel R |
mia | vegan | ecodive |
microbiome | vegan | phyloseq |
microeco | vegan | GUniFrac |
microViz | vegan | GUniFrac |
picante | vegan | Serial R |
phyloregion | vegan | Serial R |
phylosmith | vegan | none |
rbiom | ecodive | ecodive |
tidytacos | vegan | phyloseq |
vegan | Serial C | none |
Only ten packages - abdiv
, adiv
,
ampvis2
, ecodist
, entropart
,
GUniFrac
, phyloregion
, phyloseq
,
picante
, and vegan
- have their own
implementations of these algorithms. Other R packages import code from
ecodive
, GUniFrac
, phyloseq
,
and/or vegan
to handle alpha and beta diversity
computations. Therefore, only these ten packages will be
benchmarked.
We will use the bench
R package to track the runtime and
memory consumption of the diversity algorithms in each of the ten R
packages. The host system for these benchmarking runs has the following
specifications.
6-Core i5-9600K CPU @ 3.70GHz; 64.0 GB RAM
Windows 11 Pro x64 24H2 26100.4652
The bench::mark()
function also checks that the output
from all benchmarked expressions are equal.
We’ll use two datasets from the rbiom R package: hmp50
and gems
. hmp50
has 50 samples and a
phylogenetic tree. It will be used for benchmarking the UniFrac and
Faith phylogenetic metrics. The classic diversity metrics are much
faster to calculate. Therefore, we’ll use the 1,006-sample
gems
dataset for those.
The input and output formats for the six R packages are not identical, so the benchmarking code will transform data as needed. Whenever possible, these transformations will take place outside the timed block.
install.packages('pak')
pak::pkg_install(pkg = c(
'adiv', 'abdiv', 'bench', 'ecodive', 'entropart', 'GUniFrac', 'kasperskytte/ampvis2',
'phylocomr', 'phyloregion', 'phyloseq', 'picante', 'rbiom', 'vegan' ))
library(bench)
library(ggplot2)
library(ggrepel)
library(dplyr)
version$version.string
#> [1] "R version 4.5.1 (2025-06-13 ucrt)"
sapply(FUN = packageDescription, fields = 'Version', c(
'abdiv', 'adiv', 'ampvis2', 'ecodive', 'entropart', 'GUniFrac',
'phylocomr', 'phyloregion', 'phyloseq', 'picante', 'vegan' ))
#> abdiv adiv ampvis2 ecodive entropart GUniFrac phylocomr phyloregion phyloseq picante vegan
#> "0.2.0" "2.2.1" "2.8.9" "1.0.0" "1.6-16" "1.8" "0.3.4" "1.0.9" "1.52.0" "1.8.2" "2.7-1"
(n_cpus <- ecodive::n_cpus())
#> [1] 6
# abdiv only accepts two samples at a time
pairwise <- function (f, data, ...) {
pairs <- utils::combn(ncol(data), 2)
structure(
mapply(
FUN = function (i, j) f(data[,i], data[,j], ...),
i = pairs[1,], j = pairs[2,] ),
class = 'dist',
Labels = colnames(data),
Size = ncol(data),
Diag = FALSE,
Upper = FALSE )
}
# Remove any extraneous attributes from dist objects,
# allowing them to be compared with `all.equal()`.
cleanup <- function (x) {
attr(x, 'maxdist') <- NULL
attr(x, 'method') <- NULL
attr(x, 'call') <- NULL
return (x)
}
# HMP50 dataset has 50 Samples
hmp50 <- rbiom::hmp50
hmp50_phy <- rbiom::convert_to_phyloseq(hmp50)
hmp50_mtx <- as.matrix(hmp50)
hmp50_tmtx <- t(hmp50_mtx)
hmp50_pmtx <- t(t(hmp50_mtx) / colSums(hmp50_mtx))
hmp50_tree <- hmp50$tree
# GEMS dataset has 1006 Samples
gems_mtx <- as.matrix(rbiom::gems)
gems_tmtx <- t(gems_mtx)
Here we’ll compare the time and memory taken by the unweighted,
weighted, weight normalized, generalized, and variance adjusted UniFrac
functions from the abdiv
,ampvis2
ecodive
, GUniFrac
, phyloseq
,
phyloregion
and picante
R packages. Each of
the functions will be run 10 times to time them for speed, and 1 time to
analyze memory usage.
## Unweighted UniFrac
u_unifrac_res <- rbind(
local({
# cluster for phyloseq
cl <- parallel::makeCluster(n_cpus)
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
bench::mark(
iterations = 10,
'abdiv' = cleanup(pairwise(abdiv::unweighted_unifrac, hmp50_mtx, hmp50_tree)),
'ecodive' = cleanup(ecodive::unweighted_unifrac(hmp50_mtx, hmp50_tree)),
'GUniFrac' = cleanup(as.dist(GUniFrac::GUniFrac(hmp50_tmtx, hmp50_tree, alpha=1, verbose=FALSE)[[1]][,,2])),
'phyloregion' = cleanup(phyloregion::unifrac(hmp50_tmtx, hmp50_tree)),
'phyloseq' = cleanup(phyloseq::UniFrac(hmp50_phy, weighted=FALSE, normalized=FALSE, parallel=TRUE)),
'picante' = cleanup(picante::unifrac(hmp50_tmtx, hmp50_tree)) )
}),
# ampvis2 conflicts with phyloseq cluster, so run separately
bench::mark(
iterations = 10,
'ampvis2' = {
cleanup(ampvis2:::dist.unifrac(hmp50_mtx, hmp50_tree, weighted=FALSE, normalise=FALSE, num_threads=n_cpus))
doParallel::stopImplicitCluster() } )
)
u_unifrac_res[,1:9]
#> # A tibble: 5 × 13
#> expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
#> 1 abdiv 13.84s 14.38s 0.0676 20.1GB 1.87 10 277 2.46m
#> 2 ecodive 5.19ms 5.31ms 184. 770.5KB 0 10 0 54.23ms
#> 3 GUniFrac 77.89ms 80.4ms 11.7 92.1MB 1.17 10 1 858.32ms
#> 4 phyloseq 292.07ms 327.01ms 2.49 49.9MB 0 10 0 4.02s
#> 5 ampvis2 3.36s 3.44s 0.288 49.8MB 0.0320 9 1 31.29s
ggplot(u_unifrac_res, aes(x = median, y = mem_alloc)) +
geom_point() +
geom_label_repel(aes(label = as.character(expression))) +
labs(
title = 'Unweighted UniFrac Implementations',
subtitle = '50 sample all-vs-all benchmarking on six CPU cores',
x = 'Median Calculation Time (log scale; n=10)',
y = 'Memory Allocated\n(log scale)' ) +
theme_bw()
## Weighted UniFrac
w_unifrac_res <- rbind(
local({
# cluster for phyloseq
cl <- parallel::makeCluster(n_cpus)
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
bench::mark(
iterations = 10,
'abdiv' = cleanup(pairwise(abdiv::weighted_unifrac, hmp50_mtx, hmp50_tree)),
'ecodive' = cleanup(ecodive::weighted_unifrac(hmp50_mtx, hmp50_tree)),
'phyloseq' = cleanup(phyloseq::UniFrac(hmp50_phy, weighted=TRUE, normalized=FALSE, parallel=TRUE)) )
}),
# ampvis2 conflicts with phyloseq cluster, so run separately
bench::mark(
iterations = 10,
'ampvis2' = {
cleanup(ampvis2:::dist.unifrac(hmp50_mtx, hmp50_tree, weighted=TRUE, normalise=FALSE, num_threads=n_cpus))
doParallel::stopImplicitCluster() } )
)
## Weighted Normalized UniFrac
wn_unifrac_res <- rbind(
local({
# cluster for phyloseq
cl <- parallel::makeCluster(n_cpus)
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
bench::mark(
iterations = 10,
'abdiv' = cleanup(pairwise(abdiv::weighted_normalized_unifrac, hmp50_mtx, hmp50_tree)),
'ecodive' = cleanup(ecodive::weighted_normalized_unifrac(hmp50_mtx, hmp50_tree)),
'GUniFrac' = cleanup(as.dist(GUniFrac::GUniFrac(hmp50_tmtx, hmp50_tree, alpha=1, verbose=FALSE)[[1]][,,1])),
'phyloseq' = cleanup(phyloseq::UniFrac(hmp50_phy, weighted=TRUE, normalized=TRUE, parallel=TRUE)) )
}),
# ampvis2 conflicts with phyloseq cluster, so run separately
bench::mark(
iterations = 10,
'ampvis2' = {
cleanup(ampvis2:::dist.unifrac(hmp50_mtx, hmp50_tree, weighted=TRUE, normalise=TRUE, num_threads=n_cpus))
doParallel::stopImplicitCluster() } )
)
## Weighted Normalized UniFrac
g_unifrac_res <- rbind(
local({
# cluster for phyloseq
cl <- parallel::makeCluster(n_cpus)
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
bench::mark(
iterations = 10,
'abdiv' = cleanup(pairwise(abdiv::generalized_unifrac, hmp50_mtx, hmp50_tree, alpha=0.5)),
'ecodive' = cleanup(ecodive::generalized_unifrac(hmp50_mtx, hmp50_tree, alpha=0.5)),
'GUniFrac' = cleanup(as.dist(GUniFrac::GUniFrac(hmp50_tmtx, hmp50_tree, alpha=0.5, verbose=FALSE)[[1]][,,1])) )
})
)
## Variance Adjusted UniFrac
va_unifrac_res <- rbind(
local({
# cluster for phyloseq
cl <- parallel::makeCluster(n_cpus)
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
bench::mark(
iterations = 10,
'abdiv' = cleanup(pairwise(abdiv::variance_adjusted_unifrac, hmp50_mtx, hmp50_tree)),
'ecodive' = cleanup(ecodive::variance_adjusted_unifrac(hmp50_mtx, hmp50_tree)) )
})
)
unifrac_res <- bind_rows(
mutate(u_unifrac_res, `UniFrac Variant` = 'Unweighted'),
mutate(w_unifrac_res, `UniFrac Variant` = 'Weighted'),
mutate(wn_unifrac_res, `UniFrac Variant` = 'Weighted Normalized'),
mutate(g_unifrac_res, `UniFrac Variant` = 'Generalized'),
mutate(va_unifrac_res, `UniFrac Variant` = 'Variance Adjusted') ) %>%
mutate(Package = as.character(expression)) %>%
select(Package, `UniFrac Variant`, median, mem_alloc) %>%
arrange(Package)
print(unifrac_res, n = 21)
#> # A tibble: 21 × 4
#> Package `UniFrac Variant` median mem_alloc
#> <chr> <chr> <bch:tm> <bch:byt>
#> 1 GUniFrac Unweighted 77.43ms 113.4MB
#> 2 GUniFrac Weighted Normalized 78.29ms 92.15MB
#> 3 GUniFrac Generalized 75.25ms 92.15MB
#> 4 abdiv Unweighted 14.36s 20.05GB
#> 5 abdiv Weighted 13.61s 20.02GB
#> 6 abdiv Weighted Normalized 13.59s 20.03GB
#> 7 abdiv Generalized 13.72s 20.18GB
#> 8 abdiv Variance Adjusted 15.57s 24.46GB
#> 9 ampvis2 Unweighted 3.31s 53.44MB
#> 10 ampvis2 Weighted 3.25s 49.2MB
#> 11 ampvis2 Weighted Normalized 3.3s 49.34MB
#> 12 ecodive Unweighted 3.94ms 1.01MB
#> 13 ecodive Weighted 3.46ms 779.72KB
#> 14 ecodive Weighted Normalized 3.82ms 779.73KB
#> 15 ecodive Generalized 5.59ms 799.3KB
#> 16 ecodive Variance Adjusted 4.12ms 779.73KB
#> 17 phyloregion Unweighted 6.35ms 146.44MB
#> 18 phyloseq Unweighted 290.5ms 50.87MB
#> 19 phyloseq Weighted 287.27ms 49.4MB
#> 20 phyloseq Weighted Normalized 291.59ms 49.55MB
#> 21 picante Unweighted 2.43s 1.79GB
# How much faster and more memory efficient is ecodive?
plyr::ddply(unifrac_res, as.name('UniFrac Variant'), function (x) {
x[['median']] <- as.numeric(x[['median']])
x[['mem_alloc']] <- as.numeric(x[['mem_alloc']])
ecodive <- as.list(x[x[['Package']] == 'ecodive',])
x <- x[x[['Package']] != 'ecodive',]
data.frame(
speed = paste0(paste(collapse=' - ', round(range(x$median / ecodive$median))), 'x'),
memory = paste0(paste(collapse=' - ', round(range(x$mem_alloc / ecodive$mem_alloc))), 'x') )
})
#> UniFrac Variant speed memory
#> 1 Generalized 13 - 2454x 118 - 26475x
#> 2 Unweighted 2 - 3647x 50 - 20248x
#> 3 Variance Adjusted 3776 - 3776x 32891 - 32891x
#> 4 Weighted 83 - 3931x 65 - 26922x
#> 5 Weighted Normalized 20 - 3555x 65 - 26934x
ggplot(unifrac_res, aes(x = median, y = mem_alloc)) +
geom_point(aes(shape = `UniFrac Variant`), size = 2) +
geom_label_repel(
data = ~subset(., `UniFrac Variant` == 'Unweighted'),
mapping = aes(label = Package),
box.padding = 0.4,
min.segment.length = Inf ) +
scale_shape(solid = FALSE) +
scale_y_log10(labels = scales::label_bytes()) +
labs(
title = 'UniFrac Implementations',
subtitle = 'All-vs-all 50 sample benchmarking on six CPU cores',
x = 'Median Calculation Time (log scale; n=10)',
y = 'Memory Allocated\n(log scale; n=1)' ) +
theme_bw(base_size = 12) +
theme(axis.title.x = element_text(margin = margin(t = 10)))
ecodive
demonstrates substantial performance gains for
UniFrac, being 2 to 3,900x faster and using 50 - 32,000x less memory.
Here we’ll benchmark the Bray-Curtis, Euclidean, Jaccard, and
Manhattan classic beta diversity algorithms from the abdiv
,
ecodive
, ecodist
, and vegan
R
packages. Each of the functions will be run 10 times to time them for
speed, and 1 time to analyze memory usage.
bray_curtis_res <- bench::mark(
iterations = 10,
'abdiv' = cleanup(pairwise(abdiv::bray_curtis, gems_mtx)),
'ecodist' = cleanup(ecodist::bcdist(gems_tmtx)),
'ecodive' = cleanup(ecodive::bray_curtis(gems_mtx)),
'vegan' = cleanup(vegan::vegdist(gems_tmtx, 'bray')) )
jaccard_res <- bench::mark(
iterations = 10,
check = FALSE, # abdiv has incorrect output
'abdiv' = cleanup(pairwise(abdiv::jaccard, gems_mtx)),
'ecodist' = cleanup(ecodist::distance(gems_tmtx, 'jaccard')),
'ecodive' = cleanup(ecodive::jaccard(gems_mtx)),
'vegan' = cleanup(vegan::vegdist(gems_tmtx, 'jaccard')) )
manhattan_res <- bench::mark(
iterations = 10,
'abdiv' = cleanup(pairwise(abdiv::manhattan, gems_mtx)),
'ecodist' = cleanup(ecodist::distance(gems_tmtx, 'manhattan')),
'ecodive' = cleanup(ecodive::manhattan(gems_mtx)),
'vegan' = cleanup(vegan::vegdist(gems_tmtx, 'manhattan')) )
euclidean_res <- bench::mark(
iterations = 10,
'abdiv' = cleanup(pairwise(abdiv::euclidean, gems_mtx)),
'ecodist' = cleanup(ecodist::distance(gems_tmtx, 'euclidean')),
'ecodive' = cleanup(ecodive::euclidean(gems_mtx)),
'vegan' = cleanup(vegan::vegdist(gems_tmtx, 'euclidean')) )
bdiv_res <- bind_rows(
mutate(bray_curtis_res, Metric = 'Bray-Curtis'),
mutate(jaccard_res, Metric = 'Jaccard'),
mutate(manhattan_res, Metric = 'Manhattan'),
mutate(euclidean_res, Metric = 'Euclidean') ) %>%
mutate(Package = as.character(expression)) %>%
select(Package, Metric, median, mem_alloc) %>%
arrange(Package)
bdiv_res
#> # A tibble: 12 × 4
#> Package Metric median mem_alloc
#> <chr> <chr> <bch:tm> <bch:byt>
#> 1 abdiv Bray-Curtis 12.16s 14.7GB
#> 2 abdiv Jaccard 15.6s 22GB
#> 3 abdiv Manhattan 10.13s 13.2GB
#> 4 abdiv Euclidean 10.92s 16.1GB
#> 5 ecodive Bray-Curtis 72.8ms 26.3MB
#> 6 ecodive Jaccard 73.68ms 26.3MB
#> 7 ecodive Manhattan 73.47ms 26.3MB
#> 8 ecodive Euclidean 72.84ms 26.3MB
#> 9 vegan Bray-Curtis 1.75s 22.4MB
#> 10 vegan Jaccard 1.82s 22.3MB
#> 11 vegan Manhattan 1.68s 19.4MB
#> 12 vegan Euclidean 1.68s 19.4MB
# How much faster and more memory efficient is ecodive?
plyr::ddply(bdiv_res, 'Metric', function (x) {
x[['median']] <- as.numeric(x[['median']])
x[['mem_alloc']] <- as.numeric(x[['mem_alloc']])
ecodive <- as.list(x[x[['Package']] == 'ecodive',])
x <- x[x[['Package']] != 'ecodive',]
data.frame(
speed = paste0(paste(collapse=' - ', round(range(x$median / ecodive$median))), 'x'),
memory = paste0(paste(collapse=' - ', round(range(x$mem_alloc / ecodive$mem_alloc))), 'x') )
})
#> Metric speed memory
#> 1 Bray-Curtis 6 - 680x 1 - 571x
#> 2 Euclidean 24 - 2825x 1 - 849x
#> 3 Jaccard 25 - 3359x 1 - 1865x
#> 4 Manhattan 23 - 2340x 1 - 849x
ggplot(bdiv_res, aes(x = median, y = mem_alloc)) +
geom_point() +
facet_wrap('Metric', scales = 'fixed') +
geom_label_repel(aes(label = Package), direction = 'y') +
bench::scale_x_bench_time(limits = c(.05, 500), breaks = c(.1, 1, 10, 60, 300)) +
scale_y_log10(limits = 10^c(6,11.5), labels = scales::label_bytes()) +
labs(
title = 'Classic Beta Diversity Implementations',
subtitle = 'All-vs-all 1,006 sample benchmarking on six CPU cores',
x = 'Median Calculation Time (log scale; n=10)',
y = 'Memory Allocated\n(log scale; n=1)' ) +
theme_bw(base_size = 12) +
theme(axis.title.x = element_text(margin = margin(t = 10)))
ecodive
is 6 to 2,300x faster and uses 1 to 1,800x less
memory.
Last, we’ll compare the Shannon, Simpson, and Faith alpha diversity
implementations from the abdiv
, ecodive
,
entropart
, phyloregion
, and vegan
R packages. Faith’s phylogenetic diversity metric will be run on 50
samples, while Shannon and Simpson metrics will be run on 1,006
samples.
shannon_res <- bench::mark(
iterations = 10,
'abdiv' = apply(gems_mtx, 2L, abdiv::shannon),
'adiv' = adiv::speciesdiv(gems_tmtx, 'Shannon')[,1],
'ecodive' = ecodive::shannon(gems_mtx),
'entropart' = apply(gems_mtx, 2L, entropart::Shannon, CheckArguments = FALSE),
'vegan' = vegan::diversity(gems_tmtx, 'shannon') )
simpson_res <- bench::mark(
iterations = 10,
check = FALSE, # entropart is off by a small bit
'abdiv' = apply(gems_mtx, 2L, abdiv::simpson),
'adiv' = adiv::speciesdiv(gems_tmtx, 'GiniSimpson')[,1],
'ecodive' = ecodive::simpson(gems_mtx),
'entropart' = apply(gems_mtx, 2L, entropart::Simpson, CheckArguments = FALSE),
'vegan' = vegan::diversity(gems_tmtx, 'simpson') )
faith_res <- bench::mark(
iterations = 10,
check = FALSE, # entropart has incorrect output on non-ultrametric tree
'abdiv' = apply(hmp50_mtx, 2L, abdiv::faith_pd, hmp50_tree),
'adiv' = apply(hmp50_mtx, 2L, \(x) adiv::EH(hmp50_tree, rownames(hmp50_mtx)[x > 0])),
'ecodive' = ecodive::faith(hmp50_mtx, hmp50_tree),
'entropart' = apply(hmp50_pmtx, 2L, entropart::PDFD, hmp50_tree, CheckArguments = FALSE),
'phyloregion' = phyloregion::PD(hmp50_tmtx, hmp50_tree) )
adiv_res <- bind_rows(
mutate(shannon_res, Metric = 'Shannon x 1006'),
mutate(simpson_res, Metric = 'Simpson x 1006'),
mutate(faith_res, Metric = 'Faith PD x 50') ) %>%
mutate(Package = as.character(expression)) %>%
select(Package, Metric, median, mem_alloc) %>%
arrange(Package)
adiv_res
#> # A tibble: 15 × 4
#> Package Metric median mem_alloc
#> <chr> <chr> <bch:tm> <bch:byt>
#> 1 abdiv Shannon x 1006 67.06ms 89.5MB
#> 2 abdiv Simpson x 1006 14.77ms 26.75MB
#> 3 abdiv Faith PD x 50 490.64ms 651.37MB
#> 4 adiv Shannon x 1006 40.42ms 128.1MB
#> 5 adiv Simpson x 1006 34.19ms 53.18MB
#> 6 adiv Faith PD x 50 1.78s 488.78MB
#> 7 ecodive Shannon x 1006 7.53ms 14.74MB
#> 8 ecodive Simpson x 1006 7.48ms 14.72MB
#> 9 ecodive Faith PD x 50 684.95µs 749.23KB
#> 10 entropart Shannon x 1006 1.34s 296.1MB
#> 11 entropart Simpson x 1006 25.07ms 21.48MB
#> 12 entropart Faith PD x 50 29.48s 23.95GB
#> 13 phyloregion Faith PD x 50 2.91ms 1.93MB
#> 14 vegan Shannon x 1006 59.3ms 62.2MB
#> 15 vegan Simpson x 1006 39.69ms 56.27MB
# How much faster and more memory efficient is ecodive?
plyr::ddply(adiv_res, 'Metric', function (x) {
x[['median']] <- as.numeric(x[['median']])
x[['mem_alloc']] <- as.numeric(x[['mem_alloc']])
ecodive <- as.list(x[x[['Package']] == 'ecodive',])
x <- x[x[['Package']] != 'ecodive',]
data.frame(
speed = paste0(paste(collapse=' - ', round(range(x$median / ecodive$median))), 'x'),
memory = paste0(paste(collapse=' - ', round(range(x$mem_alloc / ecodive$mem_alloc))), 'x') )
})
#> Metric speed memory
#> 1 Faith PD x 50 4 - 43038x 3 - 33517x
#> 2 Shannon x 1006 5 - 178x 4 - 20x
#> 3 Simpson x 1006 2 - 5x 1 - 4x
ggplot(adiv_res, aes(x = median, y = mem_alloc)) +
geom_point(size = 2) +
geom_label_repel(aes(label = Package)) +
facet_wrap('Metric', nrow = 1, scales = 'free') +
scale_y_log10(labels = scales::label_bytes()) +
labs(
title = 'Alpha Diversity Implementations',
subtitle = '50 or 1,006 sample benchmarking on six CPU cores',
x = 'Median Calculation Time (log scale; n=10)',
y = 'Memory Allocated\n(log scale; n=1)' ) +
theme_bw(base_size = 12) +
theme(axis.title.x = element_text(margin = margin(t = 10)))
ecodive
is 2 to 43,000x faster and uses 1 to 33,000x
less memory.