Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Changes in v1.5.2

* Add `nested_weight` to `textmodel_lss()` and `as.textmodel_lss()` to perform dictionary-like analysis.
* Remove `auto_weight` from `textmodel_lss()`.
* Remove `auto_weight` from `textmodel_lss()` and `cut` from `predict()`.

## Changes in v1.5.1

Expand Down
23 changes: 2 additions & 21 deletions R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,14 @@
#' @param newdata a dfm on which prediction should be made.
#' @param se_fit if `TRUE`, returns standard error of document scores.
#' @param density if `TRUE`, returns frequency of polarity words in documents.
#' @param cut a vector of one or two percentile values to dichotomized polarty
#' scores of words. When two values are given, words between them receive zero
#' polarity.
#' @param rescale if `TRUE`, normalizes polarity scores using `scale()`.
#' @param min_n set the minimum number of polarity words in documents.
#' @param ... not used
#' @details Polarity scores of documents are the means of polarity scores of
#' words weighted by their frequency. When `se_fit = TRUE`, this function
#' returns the weighted means, their standard errors, and the number of
#' polarity words in the documents. When `rescale = TRUE`, it converts the raw
#' polarity scores to z sores for easier interpretation. When `rescale =
#' FALSE` and `cut` is used, polarity scores of documents are bounded by
#' \[-1.0, 1.0\].
#' polarity scores to z sores for easier interpretation.
#'
#' Documents tend to receive extreme polarity scores when they have only few
#' polarity words. This is problematic when LSS is applied to short documents
Expand All @@ -30,7 +25,7 @@
#' @export
predict.textmodel_lss <- function(object, newdata = NULL, se_fit = FALSE,
density = FALSE, rescale = TRUE,
cut = NULL, min_n = 0L, ...){
min_n = 0L, ...){


(function(se.fit, recaling, ...) unused_dots(...))(...) # trap deprecated args
Expand All @@ -45,11 +40,6 @@ predict.textmodel_lss <- function(object, newdata = NULL, se_fit = FALSE,
}
min_n <- check_integer(min_n, min = 0)

if (!is.null(cut)) {
cut <- check_double(cut, min = 0, max = 1, min_len = 1, max_len = 2)
object$beta <- cut_beta(object$beta, cut)
}

beta <- Matrix(object$beta, nrow = 1, sparse = TRUE,
dimnames = list(NULL, names(object$beta)))

Expand Down Expand Up @@ -101,13 +91,4 @@ predict.textmodel_lss <- function(object, newdata = NULL, se_fit = FALSE,
}
}

cut_beta <- function(x, p = 0.5) {
q <- c(-Inf, quantile(x, p, na.rm = TRUE), Inf)
v <- as.integer(cut(x, q))
beta <- double(length(x))
beta[v == min(v)] <- -1.0
beta[v == max(v)] <- 1.0
names(beta) <- names(x)
return(beta)
}

8 changes: 1 addition & 7 deletions man/predict.textmodel_lss.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

72 changes: 0 additions & 72 deletions tests/testthat/test-textmodel_lss.R
Original file line number Diff line number Diff line change
Expand Up @@ -399,78 +399,6 @@ test_that("se_fit is working", {
expect_identical(pred1, pred2)
})

test_that("cut is working", {

skip_on_cran() # takes to much time

p0 <- predict(lss_test, rescale = TRUE, min_n = 10)
p1 <- predict(lss_test, cut = 0.5, rescale = TRUE)
expect_true(min(p1, na.rm = TRUE) < -1)
expect_true(max(p1, na.rm = TRUE) > 1)
expect_equal(cor(p0, p1, use = "pair"), 0.59, tolerance = 0.01)

p2 <- predict(lss_test, cut = 0.5, rescale = FALSE)
expect_true(min(p2, na.rm = TRUE) >= -1)
expect_true(max(p2, na.rm = TRUE) <= 1)
expect_equal(cor(p0, p2, use = "pair"), 0.59, tolerance = 0.01)

p3 <- predict(lss_test, cut = 0.5, rescale = FALSE, min_n = 10)
expect_true(min(p3, na.rm = TRUE) >= -1)
expect_true(max(p3, na.rm = TRUE) <= 1)
expect_equal(cor(p0, p3, use = "pair"), 0.73, tolerance = 0.01)

p4 <- predict(lss_test, cut = 0.75, rescale = FALSE, min_n = 10)
expect_true(min(p4, na.rm = TRUE) >= -1)
expect_true(max(p4, na.rm = TRUE) <= 1)
expect_equal(cor(p0, p4, use = "pair"), 0.33, tolerance = 0.01)

p5 <- predict(lss_test, cut = c(0.25, 0.75), rescale = FALSE, min_n = 10)
expect_true(min(p5, na.rm = TRUE) >= -1)
expect_true(max(p5, na.rm = TRUE) <= 1)
expect_equal(cor(p0, p5, use = "pair"), 0.77, tolerance = 0.01)

p6 <- predict(lss_test, cut = c(0.75, 0.25), rescale = FALSE, min_n = 10)
expect_identical(p5, p6)

expect_error(
predict(lss_test, cut = 1.5),
"The value of cut must be between 0 and 1"
)
expect_error(
predict(lss_test, cut = -0.1),
"The value of cut must be between 0 and 1"
)
expect_error(
predict(lss_test, cut = c(0.1, 0.5, 0.9)),
"The length of cut must be between 1 and 2"
)

expect_equal(
LSX:::cut_beta(c(1.1, -1.2, 0.5, 0.3, -0.2, -0.5)),
c(1, -1, 1, 1, -1, -1)
)
expect_equal(
LSX:::cut_beta(c(1.1, -1.2, 0.5, 0.3, -0.2, -0.5), c(0.2, 0.8)),
c(1, -1, 0, 0, 0, -1)
)

beta <- rnorm(nfeat(dfmt_test), sd = 0.1)
names(beta) <- featnames(dfmt_test)
beta2 <- LSX:::cut_beta(beta, c(0.2, 0.8))

lss1 <- as.textmodel_lss(beta)
lss2 <- as.textmodel_lss(beta2)
expect_equal(names(lss1$beta), names(lss2$beta))

pred0 <- predict(lss1, dfmt_test, se_fit = TRUE)
pred1 <- predict(lss1, dfmt_test, cut = c(0.2, 0.8), se_fit = TRUE)
pred2 <- predict(lss2, dfmt_test, se_fit = TRUE)

expect_equal(pred0$n, pred1$n)
expect_equal(pred0$n, pred2$n)
expect_equal(pred1$fit, pred2$fit)
})

test_that("rescaling still works", {

expect_warning({
Expand Down
Loading