Skip to content

Commit 0ac1312

Browse files
authored
Merge pull request #23 from ethanbass/improve_parallelization
v0.5.0
2 parents 562025e + 1c22b69 commit 0ac1312

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1252
-331
lines changed

.github/workflows/R-CMD-check.yaml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,14 @@ jobs:
1818
fail-fast: false
1919
matrix:
2020
config:
21-
- {os: macos-latest, r: 'release'}
21+
- {os: macos-latest, r: 'release', visual_tests: true}
2222
- {os: windows-latest, r: 'release'}
2323
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
2424
- {os: ubuntu-latest, r: 'release'}
2525
- {os: ubuntu-latest, r: 'oldrel-1'}
2626

2727
env:
28+
VISUAL_TESTS: ${{ matrix.config.visual_tests }}
2829
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
2930
R_KEEP_PKG_SOURCE: yes
3031

@@ -39,6 +40,11 @@ jobs:
3940
http-user-agent: ${{ matrix.config.http-user-agent }}
4041
use-public-rspm: true
4142

43+
- uses: r-lib/actions/setup-r-dependencies@v2
44+
with:
45+
extra-packages: any::rcmdcheck
46+
needs: check
47+
4248
- name: Set up Python 3.9
4349
uses: actions/setup-python@v3
4450
with:
@@ -51,12 +57,7 @@ jobs:
5157
Rscript -e "reticulate::conda_install('r-reticulate', 'python-kaleido')"
5258
Rscript -e "reticulate::conda_install('r-reticulate', 'plotly', channel = 'plotly')"
5359
Rscript -e "reticulate::use_miniconda('r-reticulate')"
54-
55-
- uses: r-lib/actions/setup-r-dependencies@v2
56-
with:
57-
extra-packages: any::rcmdcheck
58-
needs: check
59-
60+
6061
- uses: r-lib/actions/check-r-package@v2
6162
with:
6263
upload-snapshots: true

DESCRIPTION

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
Type: Package
22
Package: chromatographR
33
Title: Chromatographic Data Analysis Toolset
4-
Version: 0.4.8
5-
Authors@R:
4+
Version: 0.5.0
5+
Authors@R: c(
66
person("Ethan", "Bass", , "[email protected]", role = c("aut", "cre"),
7-
comment = c(ORCID = "0000-0002-6175-6739"))
7+
comment = c(ORCID = "0000-0002-6175-6739")),
8+
person(c("Hans","W"), "Borchers", role = c("ctb", "cph"),
9+
comment = c("Author of savgol and pinv functions bundled from pracma"))
10+
)
811
Maintainer: Ethan Bass <[email protected]>
912
Description: Tools for high-throughput analysis of HPLC-DAD/UV
1013
chromatograms (or similar data). Includes functions for preprocessing, alignment,
@@ -31,25 +34,30 @@ Imports:
3134
dynamicTreeCut,
3235
fastcluster,
3336
Formula,
37+
fs,
3438
graphics,
3539
grDevices,
3640
lattice,
3741
methods,
3842
minpack.lm,
3943
parallel,
4044
ptw,
45+
purrr,
4146
pvclust,
4247
scales,
4348
stats,
4449
utils,
4550
VPdtw
4651
Suggests:
52+
cowplot,
4753
ggplot2,
4854
knitr,
4955
openxlsx,
5056
pbapply,
5157
plotly,
58+
reticulate,
5259
rmarkdown,
60+
rsvg,
5361
spelling,
5462
testthat (>= 3.0.0),
5563
vdiffr

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ importFrom(lattice,panel.stripplot)
5959
importFrom(lattice,stripplot)
6060
importFrom(methods,new)
6161
importFrom(minpack.lm,nlsLM)
62+
importFrom(parallel,mclapply)
6263
importFrom(pvclust,pvclust)
6364
importFrom(pvclust,pvpick)
6465
importFrom(pvclust,pvrect)

NEWS.md

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,38 @@
1+
# chromatographR 0.5.0
2+
3+
#### New features
4+
5+
* Added `ggplot2` option to `plot_spectrum`, `plot.peak_table` and `plot_all_spectra` functions.
6+
* Reworked `write_chroms` for more sensible handling of paths and added `filename` argument.
7+
* Updated `get_purity` function to improve speed.
8+
* Added additional argument to `reshape_chroms` function for subsetting data by
9+
retention times (`rts`).
10+
* Added parallel processing through the `pbapply` package for the `correct_rt`,
11+
`get_peaks`, and `preprocess` functions by setting the `cl` argument.
12+
13+
#### Other changes
14+
15+
* Changed behavior of `preprocess` when inferring retention times so chromatograms are no longer rounded down to the largest integer.
16+
* In `preprocess`, spectral smoothing is no longer applied on 2D chromatograms, removing error message when preprocess is used with default settings.
17+
* Moved position of `...` argument to end in `plot.peak_table`.
18+
* Changed `progress_bar` argument to `show_progress` in `correct_rt`, `preprocess`
19+
and `get_peaks` to fix strange `pmatch` behavior with additional arguments to
20+
preprocess.
21+
* Changed orientation of "plotly" plots generated by `plot_spectrum` to match other
22+
plotting engines.
23+
* Deprecated the `mc.cores` argument in `correct_rt` is now deprecated in favor of the new
24+
`cl` argument.
25+
* Deprecated the `parallel` argument in `preprocess` in favor of just using `cl`.
26+
* Changed name of first argument in `mirror_plot` from `peak_table` to `x`. Otherwise the function has not changed.
27+
* Added additional tests, improving test coverage to 80%.
28+
* Updated `get_chrom_list` (internal) to allow parsing of subsetted lists.
29+
130
# chromatographR 0.4.8
231

3-
* Fixed `merge_peaks` function so it works properly (to combine 2 or more peaks in peak table).
32+
* Fixed bug in `merge_peaks` function so it works properly (to combine 2 or more
33+
peaks in a peak table).
434
* Fixed bugs in `plot_chroms` preventing plotting with `ggplot2` and plotting wrong chromatograms in base R.
5-
* Added additional tests for `plot_chroms`, reshape functions.
35+
* Added additional tests for `plot_chroms` and reshape functions.
636

737
# chromatographR 0.4.7
838

@@ -33,6 +63,7 @@
3363
# chromatographR 0.4.5
3464

3565
### New Features
66+
3667
* Added `reshape_chroms` function for converting chromatograms to "long" format.
3768
* Added `write_peaktable` function to easily write peak_table to `csv` or `xlsx`.
3869
* Added `get_purity` function for assessing peak purity.

R/attach_metadata.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ get_reference_spectra <- function(peak_table, chrom_list,
144144

145145
attach_ref_spectra <- function(peak_table, chrom_list, ref = c("max.cor","max.int")){
146146
check_peaktable(peak_table)
147-
peak_table$ref_spectra <- get_reference_spectra(peak_table, chrom_list, ref)
147+
peak_table$ref_spectra <- get_reference_spectra(peak_table, chrom_list, ref = ref)
148148
peak_table$args["reference_spectra"] <- ref
149149
return(peak_table)
150150
}
@@ -193,7 +193,7 @@ normalize_data <- function(peak_table, column, chrom_list,
193193
})))
194194
rownames(pktab) <- rownames(peak_table$tab)
195195
peak_table$tab <- pktab
196-
peak_table$args[c("normalized","normalization_by")] <- c(TRUE, column)
196+
peak_table$args[c("normalized", "normalization_by")] <- c(TRUE, column)
197197
return(peak_table)
198198
} else if (what == "chrom_list"){
199199
if (missing(chrom_list)){

R/correct_rt.R

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,13 @@
3535
#' @param maxshift Integer. Maximum allowable shift for \code{\link[VPdtw]{VPdtw}}.
3636
#' Defaults to 50.
3737
#' @param verbose Whether to print verbose output.
38-
#' @param progress_bar Logical. Whether to show progress bar. Defaults to
38+
#' @param show_progress Logical. Whether to show progress bar. Defaults to
3939
#' \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed. Currently works
4040
#' only for \code{ptw} alignments.
41+
#' @param cl Argument to \code{\link[pbapply]{pblapply}} or \code{\link[parallel]{mclapply}}.
42+
#' Either an integer specifying the number of clusters to use for parallel
43+
#' processing or a cluster object created by \code{\link[parallel]{makeCluster}}.
44+
#' Defaults to 2. On Windows integer values will be ignored.
4145
#' @param \dots Optional arguments for the \code{\link[ptw:ptw]{ptw}} function.
4246
#' The only argument that cannot be changed is \code{warp.type}: this is always
4347
#' equal to \code{"global"}.
@@ -80,7 +84,7 @@ correct_rt <- function(chrom_list, lambdas, models = NULL, reference = 'best',
8084
init.coef = c(0, 1, 0), n.traces = NULL, n.zeros = 0,
8185
scale = FALSE, trwdth = 200, plot_it = FALSE,
8286
penalty = 5, maxshift = 50,
83-
verbose = FALSE, progress_bar, ...){
87+
verbose = FALSE, show_progress = NULL, cl = 2, ...){
8488
what <- match.arg(what, c("corrected.values", "models"))
8589
alg <- match.arg(alg, c("ptw", "vpdtw"))
8690

@@ -134,10 +138,7 @@ correct_rt <- function(chrom_list, lambdas, models = NULL, reference = 'best',
134138
penalty = penalty, maxshift = maxshift))
135139
if (alg == "ptw"){
136140
if (is.null(models)){
137-
if (missing(progress_bar)){
138-
progress_bar <- check_for_pkg("pbapply", return_boolean = TRUE)
139-
}
140-
laplee <- choose_apply_fnc(progress_bar)
141+
laplee <- choose_apply_fnc(show_progress, cl = cl)
141142
models <- laplee(seq_len(dim(allmats)[3]), function(ii){
142143
ptw(allmats.t[,, reference],
143144
allmats.t[,, ii], selected.traces = traces, init.coef = init.coef,

R/fit_peaks.R

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,12 @@
5656
find_peaks <- function(y, smooth_type=c("gaussian", "box", "savgol", "mva","tmva","none"),
5757
smooth_window = .001, slope_thresh = 0, amp_thresh = 0,
5858
bounds = TRUE){
59+
if (!is.vector(y)){
60+
stop("Please provide a vector to argument `y` to proceed.")
61+
}
5962
smooth_type <- match.arg(smooth_type, c("gaussian", "box","savgol", "mva","tmva","none"))
6063
if (smooth_window < 1){
61-
smooth_window <- length(y)*smooth_window
64+
smooth_window <- max(length(y) * smooth_window, 1)
6265
}
6366
# compute derivative (with or without smoothing)
6467
if (smooth_type == "savgol"){
@@ -71,7 +74,7 @@ find_peaks <- function(y, smooth_type=c("gaussian", "box", "savgol", "mva","tmva
7174
} else if (smooth_type == "box"){
7275
d <- diff(ksmooth(seq_along(y), y, kernel = "box", bandwidth = smooth_window)$y)
7376
} else if (smooth_type == "tmva"){
74-
d <- runmean(runmean(diff(y), k=smooth_window), k=smooth_window)
77+
d <- runmean(runmean(diff(y), k = smooth_window), k = smooth_window)
7578
} else{
7679
d <- diff(y)
7780
}
@@ -159,9 +162,10 @@ find_peaks <- function(y, smooth_type=c("gaussian", "box", "savgol", "mva","tmva
159162
#' good model for chromatographic peaks in isocratic HPLC? \emph{Chromatographia},
160163
#' /bold{26}: 285-296. \doi{10.1007/BF02268168}.
161164
#' @export fit_peaks
165+
162166
fit_peaks <- function (x, lambda, pos = NULL, sd.max = 50,
163167
fit = c("egh", "gaussian", "raw"), max.iter = 1000,
164-
estimate_purity = TRUE, noise_threshold=.001, ...){
168+
estimate_purity = TRUE, noise_threshold = .001, ...){
165169
y <- x[,lambda]
166170
fit <- match.arg(fit, c("egh", "gaussian", "raw"))
167171
if (is.null(pos)){
@@ -209,7 +213,7 @@ fit_peaks <- function (x, lambda, pos = NULL, sd.max = 50,
209213
#' Gaussian function
210214
#' @note: Adapted from \href{https://github.com/robertdouglasmorrison/DuffyTools/blob/master/R/gaussian.R}
211215
#' @noRd
212-
gaussian <- function(x, center=0, width=1, height=NULL, floor=0) {
216+
gaussian <- function(x, center = 0, width = 1, height=NULL, floor=0) {
213217

214218
# adapted from Earl F. Glynn; Stowers Institute for Medical Research, 2007
215219
twoVar <- 2 * width * width
@@ -315,7 +319,7 @@ fit_egh <- function(x1, y1, start.center=NULL, start.width=NULL, start.tau=NULL,
315319
if (is.null( start.floor)) start.floor <- quantile( y1, seq(0,1,0.1))[2]
316320
starts <- c(starts, "floor"=start.floor)
317321
nlsAns <- try(nlsLM(y1 ~ egh(x1, center, width, height, tau, floor),
318-
start=starts, control=controlList), silent=TRUE)
322+
start = starts, control = controlList), silent = TRUE)
319323
}
320324

321325
# package up the results to pass back
@@ -398,7 +402,7 @@ fitpk_raw <- function(x, pos, lambda, max.iter,
398402
#' @param fl Filter length (for instance fl = 51..151), has to be odd.
399403
#' @param forder filter order Filter order (2 = quadratic filter, 4 = quartic).
400404
#' @param dorder Derivative order (0 = smoothing, 1 = first derivative, etc.).
401-
#' @note This function is ported from \href{https://cran.r-project.org/web/packages/pracma/index.html}{pracma},
405+
#' @note This function is bundled from \href{https://cran.r-project.org/web/packages/pracma/index.html}{pracma},
402406
#' where it is licensed under GPL (>= 3).
403407
#' @importFrom stats convolve
404408
#' @noRd

R/get_peaks.R

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,12 @@
2929
#' @param estimate_purity Logical. Whether to estimate purity or not. Defaults
3030
#' to FALSE. (If TRUE, this will slow down the function significantly).
3131
#' @param noise_threshold Noise threshold. Argument to \code{get_purity}.
32-
#' @param progress_bar Logical. Whether to show progress bar. Defaults to
32+
#' @param show_progress Logical. Whether to show progress bar. Defaults to
3333
#' \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed.
34+
#' @param cl Argument to \code{\link[pbapply]{pblapply}} or \code{\link[parallel]{mclapply}}.
35+
#' Either an integer specifying the number of clusters to use for parallel
36+
#' processing or a cluster object created by \code{\link[parallel]{makeCluster}}.
37+
#' Defaults to 2. On Windows integer values will be ignored.
3438
#' @param \dots Additional arguments to \code{\link{find_peaks}}.
3539
#' @return The result is an S3 object of class \code{peak_list}, containing a nested
3640
#' list of data.frames containing information about the peaks fitted for each
@@ -69,7 +73,7 @@ get_peaks <- function(chrom_list, lambdas, fit = c("egh", "gaussian", "raw"),
6973
sd.max = 50, max.iter = 100,
7074
time.units = c("min", "s", "ms"),
7175
estimate_purity = FALSE, noise_threshold = .001,
72-
progress_bar, ...){
76+
show_progress = NULL, cl = 2, ...){
7377
time.units <- match.arg(time.units, c("min", "s", "ms"))
7478
tfac <- switch(time.units, "min" = 1, "s" = 60, "ms" = 60*1000)
7579
fit <- match.arg(fit, c("egh", "gaussian", "raw"))
@@ -90,10 +94,7 @@ get_peaks <- function(chrom_list, lambdas, fit = c("egh", "gaussian", "raw"),
9094
names(chrom_list) <- seq_along(chrom_list)
9195
}
9296
peaks<-list()
93-
if (missing(progress_bar)){
94-
progress_bar <- check_for_pkg("pbapply", return_boolean = TRUE)
95-
}
96-
laplee <- choose_apply_fnc(progress_bar)
97+
laplee <- choose_apply_fnc(show_progress, cl = cl)
9798
result <- laplee(seq_along(chrom_list), function(sample){
9899
suppressWarnings(ptable <- lapply(lambdas, function(lambda){
99100
cbind(sample = names(chrom_list)[sample], lambda,

R/get_peaktable.R

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989
#' get_peaktable(pks, response = "area")
9090
#' @seealso \code{\link{attach_ref_spectra}} \code{\link{attach_metadata}}
9191
#' @export get_peaktable
92-
#'
92+
9393
get_peaktable <- function(peak_list, chrom_list, response = c("area", "height"),
9494
use.cor = FALSE, hmax = 0.2, plot_it = FALSE,
9595
ask = plot_it, clust = c("rt","sp.rt"),
@@ -132,28 +132,28 @@ get_peaktable <- function(peak_list, chrom_list, response = c("area", "height"),
132132
}
133133
if (clust == 'sp.rt'){
134134
if (is.null(sigma.t)){
135-
sigma.t <- .5*mean(do.call(rbind,unlist(pkLst,recursive = FALSE))$end -
135+
sigma.t <- 0.5 * mean(do.call(rbind,unlist(pkLst,recursive = FALSE))$end -
136136
do.call(rbind,unlist(pkLst,recursive = FALSE))$start)
137137
}
138-
ts<- as.numeric(rownames(chrom_list[[1]]))
138+
ts <- as.numeric(rownames(chrom_list[[1]]))
139139
sp <- sapply(seq_along(pkcenters), function(i){
140140
rescale(t(chrom_list[[file.idx[i]]][
141141
which(elementwise.all.equal(ts, pkcenters[i])),]))
142142
}, simplify=TRUE)
143143
cor.matrix <- cor(sp, method = "pearson")
144144
mint <- abs(outer(unlist(pkcenters), unlist(pkcenters), FUN="-"))
145-
S <- (exp((-(1-abs(cor.matrix))^2)/(2*sigma.r^2)))*exp(-(mint^2)/(2*sigma.t^2))
146-
D <- 1-S
145+
S <- (exp((-(1 - abs(cor.matrix))^2)/(2*sigma.r^2)))*exp(-(mint^2)/(2*sigma.t^2))
146+
D <- 1 - S
147147
linkage <- "average"
148148
pkcenters.hcl <- hclust(as.dist(D), method = linkage)
149149
pkcenters.cl <- cutreeDynamicTree(pkcenters.hcl, maxTreeHeight = hmax,
150150
deepSplit = deepSplit, minModuleSize = 2)
151151
sing <- which(pkcenters.cl == 0)
152152
pkcenters.cl[sing] <- max(pkcenters.cl) + seq_along(sing)
153153
}
154-
cl.centers <- aggregate(cbind(rt,start,end,sd,tau,FWHM,r.squared,purity) ~
155-
pkcenters.cl, data=xx, "mean", na.rm=TRUE,
156-
na.action="na.pass")[,-1]
154+
vars <- c("rt", "start", "end", "sd", "tau", "FWHM", "r.squared", "purity")
155+
cl.centers <- aggregate(xx[,vars], by = list(pkcenters.cl), FUN = "mean",
156+
na.rm = TRUE, na.action = "na.pass")[,-1]
157157
ncl <- length(cl.centers$rt)
158158

159159
## re-order clusters from small to large rt

0 commit comments

Comments
 (0)