From 659234397752c997cffa3c55b43c823c3b0d4ae8 Mon Sep 17 00:00:00 2001 From: Kohei Watanabe Date: Mon, 6 Apr 2026 15:32:53 +0900 Subject: [PATCH] Remove experimental functions --- NEWS.md | 2 +- R/predict.R | 23 +-------- man/predict.textmodel_lss.Rd | 8 +--- tests/testthat/test-textmodel_lss.R | 72 ----------------------------- 4 files changed, 4 insertions(+), 101 deletions(-) diff --git a/NEWS.md b/NEWS.md index a3f44b04..eeb820c2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,7 @@ ## Changes in v1.5.2 * Add `nested_weight` to `textmodel_lss()` and `as.textmodel_lss()` to perform dictionary-like analysis. -* Remove `auto_weight` from `textmodel_lss()`. +* Remove `auto_weight` from `textmodel_lss()` and `cut` from `predict()`. ## Changes in v1.5.1 diff --git a/R/predict.R b/R/predict.R index 7420989e..e04e3e9e 100644 --- a/R/predict.R +++ b/R/predict.R @@ -5,9 +5,6 @@ #' @param newdata a dfm on which prediction should be made. #' @param se_fit if `TRUE`, returns standard error of document scores. #' @param density if `TRUE`, returns frequency of polarity words in documents. -#' @param cut a vector of one or two percentile values to dichotomized polarty -#' scores of words. When two values are given, words between them receive zero -#' polarity. #' @param rescale if `TRUE`, normalizes polarity scores using `scale()`. #' @param min_n set the minimum number of polarity words in documents. #' @param ... not used @@ -15,9 +12,7 @@ #' words weighted by their frequency. When `se_fit = TRUE`, this function #' returns the weighted means, their standard errors, and the number of #' polarity words in the documents. When `rescale = TRUE`, it converts the raw -#' polarity scores to z sores for easier interpretation. When `rescale = -#' FALSE` and `cut` is used, polarity scores of documents are bounded by -#' \[-1.0, 1.0\]. +#' polarity scores to z sores for easier interpretation. #' #' Documents tend to receive extreme polarity scores when they have only few #' polarity words. This is problematic when LSS is applied to short documents @@ -30,7 +25,7 @@ #' @export predict.textmodel_lss <- function(object, newdata = NULL, se_fit = FALSE, density = FALSE, rescale = TRUE, - cut = NULL, min_n = 0L, ...){ + min_n = 0L, ...){ (function(se.fit, recaling, ...) unused_dots(...))(...) # trap deprecated args @@ -45,11 +40,6 @@ predict.textmodel_lss <- function(object, newdata = NULL, se_fit = FALSE, } min_n <- check_integer(min_n, min = 0) - if (!is.null(cut)) { - cut <- check_double(cut, min = 0, max = 1, min_len = 1, max_len = 2) - object$beta <- cut_beta(object$beta, cut) - } - beta <- Matrix(object$beta, nrow = 1, sparse = TRUE, dimnames = list(NULL, names(object$beta))) @@ -101,13 +91,4 @@ predict.textmodel_lss <- function(object, newdata = NULL, se_fit = FALSE, } } -cut_beta <- function(x, p = 0.5) { - q <- c(-Inf, quantile(x, p, na.rm = TRUE), Inf) - v <- as.integer(cut(x, q)) - beta <- double(length(x)) - beta[v == min(v)] <- -1.0 - beta[v == max(v)] <- 1.0 - names(beta) <- names(x) - return(beta) -} diff --git a/man/predict.textmodel_lss.Rd b/man/predict.textmodel_lss.Rd index c726a395..2e2b51a0 100644 --- a/man/predict.textmodel_lss.Rd +++ b/man/predict.textmodel_lss.Rd @@ -10,7 +10,6 @@ se_fit = FALSE, density = FALSE, rescale = TRUE, - cut = NULL, min_n = 0L, ... ) @@ -26,10 +25,6 @@ \item{rescale}{if \code{TRUE}, normalizes polarity scores using \code{scale()}.} -\item{cut}{a vector of one or two percentile values to dichotomized polarty -scores of words. When two values are given, words between them receive zero -polarity.} - \item{min_n}{set the minimum number of polarity words in documents.} \item{...}{not used} @@ -42,8 +37,7 @@ Polarity scores of documents are the means of polarity scores of words weighted by their frequency. When \code{se_fit = TRUE}, this function returns the weighted means, their standard errors, and the number of polarity words in the documents. When \code{rescale = TRUE}, it converts the raw -polarity scores to z sores for easier interpretation. When \code{rescale = FALSE} and \code{cut} is used, polarity scores of documents are bounded by -[-1.0, 1.0]. +polarity scores to z sores for easier interpretation. Documents tend to receive extreme polarity scores when they have only few polarity words. This is problematic when LSS is applied to short documents diff --git a/tests/testthat/test-textmodel_lss.R b/tests/testthat/test-textmodel_lss.R index e4df311e..0ed4a546 100644 --- a/tests/testthat/test-textmodel_lss.R +++ b/tests/testthat/test-textmodel_lss.R @@ -399,78 +399,6 @@ test_that("se_fit is working", { expect_identical(pred1, pred2) }) -test_that("cut is working", { - - skip_on_cran() # takes to much time - - p0 <- predict(lss_test, rescale = TRUE, min_n = 10) - p1 <- predict(lss_test, cut = 0.5, rescale = TRUE) - expect_true(min(p1, na.rm = TRUE) < -1) - expect_true(max(p1, na.rm = TRUE) > 1) - expect_equal(cor(p0, p1, use = "pair"), 0.59, tolerance = 0.01) - - p2 <- predict(lss_test, cut = 0.5, rescale = FALSE) - expect_true(min(p2, na.rm = TRUE) >= -1) - expect_true(max(p2, na.rm = TRUE) <= 1) - expect_equal(cor(p0, p2, use = "pair"), 0.59, tolerance = 0.01) - - p3 <- predict(lss_test, cut = 0.5, rescale = FALSE, min_n = 10) - expect_true(min(p3, na.rm = TRUE) >= -1) - expect_true(max(p3, na.rm = TRUE) <= 1) - expect_equal(cor(p0, p3, use = "pair"), 0.73, tolerance = 0.01) - - p4 <- predict(lss_test, cut = 0.75, rescale = FALSE, min_n = 10) - expect_true(min(p4, na.rm = TRUE) >= -1) - expect_true(max(p4, na.rm = TRUE) <= 1) - expect_equal(cor(p0, p4, use = "pair"), 0.33, tolerance = 0.01) - - p5 <- predict(lss_test, cut = c(0.25, 0.75), rescale = FALSE, min_n = 10) - expect_true(min(p5, na.rm = TRUE) >= -1) - expect_true(max(p5, na.rm = TRUE) <= 1) - expect_equal(cor(p0, p5, use = "pair"), 0.77, tolerance = 0.01) - - p6 <- predict(lss_test, cut = c(0.75, 0.25), rescale = FALSE, min_n = 10) - expect_identical(p5, p6) - - expect_error( - predict(lss_test, cut = 1.5), - "The value of cut must be between 0 and 1" - ) - expect_error( - predict(lss_test, cut = -0.1), - "The value of cut must be between 0 and 1" - ) - expect_error( - predict(lss_test, cut = c(0.1, 0.5, 0.9)), - "The length of cut must be between 1 and 2" - ) - - expect_equal( - LSX:::cut_beta(c(1.1, -1.2, 0.5, 0.3, -0.2, -0.5)), - c(1, -1, 1, 1, -1, -1) - ) - expect_equal( - LSX:::cut_beta(c(1.1, -1.2, 0.5, 0.3, -0.2, -0.5), c(0.2, 0.8)), - c(1, -1, 0, 0, 0, -1) - ) - - beta <- rnorm(nfeat(dfmt_test), sd = 0.1) - names(beta) <- featnames(dfmt_test) - beta2 <- LSX:::cut_beta(beta, c(0.2, 0.8)) - - lss1 <- as.textmodel_lss(beta) - lss2 <- as.textmodel_lss(beta2) - expect_equal(names(lss1$beta), names(lss2$beta)) - - pred0 <- predict(lss1, dfmt_test, se_fit = TRUE) - pred1 <- predict(lss1, dfmt_test, cut = c(0.2, 0.8), se_fit = TRUE) - pred2 <- predict(lss2, dfmt_test, se_fit = TRUE) - - expect_equal(pred0$n, pred1$n) - expect_equal(pred0$n, pred2$n) - expect_equal(pred1$fit, pred2$fit) -}) - test_that("rescaling still works", { expect_warning({