diff --git a/DESCRIPTION b/DESCRIPTION index 08419bc1..64c72957 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,7 @@ Depends: R (>= 3.1.0), qdapDictionaries (>= 1.0.2), qdapRegex (>= 0.1.2), qdapTo Imports: chron, dplyr (>= 0.3), gdata, gender (>= 0.5.1), ggplot2 (>= 2.1.0), grid, gridExtra, igraph, methods, NLP, openNLP (>= 0.2-1), parallel, plotrix, RCurl, reports, reshape2, scales, stringdist, tidyr, tm (>= 0.7.2), tools, venneuler, wordcloud, - xlsx, XML + xlsx, XML, mgsub Suggests: koRpus, knitr, lda, proxy, stringi, SnowballC, testthat LazyData: TRUE VignetteBuilder: knitr diff --git a/R/multigsub.R b/R/multigsub.R index 228f86ff..056b0341 100644 --- a/R/multigsub.R +++ b/R/multigsub.R @@ -19,6 +19,10 @@ #' \code{pattern} string is sorted by number of characters to prevent substrings #' replacing meta strings (e.g., \code{pattern = c("the", "then")} resorts to #' search for "then" first). +#' @param simultaneous logical. If \code{TRUE} then a slower method is used which +#' guarantees no conflicts in simulataneous string substitution (e.g., pattern = +#' c("hey", "ho"), replacement = c("ho", "hey"), text.var = "hey ho, let's go!" +#' will return "ho hey, let's go!"). #' @param \dots Additional arguments passed to \code{\link[base]{gsub}}. #' @rdname multigsub #' @return \code{multigsub} - Returns a vector with the pattern replaced. @@ -33,6 +37,7 @@ #' multigsub(c("it's", "I'm"), c("it is", "I am"), DATA$state) #' mgsub(c("it's", "I'm"), c("it is", "I am"), DATA$state) #' mgsub("[[:punct:]]", "PUNC", DATA$state, fixed = FALSE) +#' mgsub("hey ho, let's go!", c("hey", "ho"), c("ho", "hey"), simultaneous = TRUE) #' #' ## ====================== #' ## `sub_holder` Function @@ -54,7 +59,7 @@ multigsub <- function (pattern, replacement, text.var, leadspace = FALSE, trailspace = FALSE, fixed = TRUE, trim = TRUE, order.pattern = fixed, - ...) { + simultaneous = FALSE, ...) { if (leadspace | trailspace) replacement <- spaste(replacement, trailing = trailspace, leading = leadspace) @@ -64,9 +69,13 @@ function (pattern, replacement, text.var, leadspace = FALSE, if (length(replacement) != 1) replacement <- replacement[ord] } if (length(replacement) == 1) replacement <- rep(replacement, length(pattern)) - - for (i in seq_along(pattern)){ + + if (simultaneous) { + mgsub::mgsub(text.var, pattern, replacement, fixed = fixed, ...) + } else { + for (i in seq_along(pattern)){ text.var <- gsub(pattern[i], replacement[i], text.var, fixed = fixed, ...) + } } if (trim) text.var <- gsub("\\s+", " ", gsub("^\\s+|\\s+$", "", text.var, perl=TRUE), perl=TRUE) diff --git a/inst/Rmd_vignette/qdap_vignette.Rmd b/inst/Rmd_vignette/qdap_vignette.Rmd index 9a58ed2f..f3a930c4 100644 --- a/inst/Rmd_vignette/qdap_vignette.Rmd +++ b/inst/Rmd_vignette/qdap_vignette.Rmd @@ -1045,7 +1045,9 @@ trans_cloud(text, c("greg", "bob"), target.words=list(obs), caps.list=obs,